source: trunk/oarutils/oar-parexec @ 121

Last change on this file since 121 was 121, checked in by g7moreau, 9 years ago
  • Add log version trace. Begin at 1.
  • Begin doc for log version 2 (future)
File size: 21.2 KB
RevLine 
[13]1#!/usr/bin/perl
2#
[118]3# 2011/11/27 Gabriel Moreau
[13]4
5use strict;
6
7use Getopt::Long();
8use Pod::Usage;
9use Coro;
10use Coro::Semaphore;
11use Coro::Signal;
12use Coro::Channel;
13use Coro::Handle;
14use IO::File;
15use POSIX qw( WNOHANG WEXITSTATUS );
[32]16use Cwd qw( getcwd );
[13]17
[75]18my $file;
19my $dir;
20my $cmd;
21my $logtrace;
[13]22my $verbose;
[82]23my $job_np         = 1;
24my $nodefile       = $ENV{OAR_NODE_FILE} || '';
[32]25my $masterio;
[13]26my $switchio;
27my $help;
[82]28my $oarsh          = 'oarsh -q -T';
[75]29my $sig_transmit;
30my $sig_checkpoint = 'USR2';
[113]31my $job_launch_brake = 1; # one second time brake
[13]32
33Getopt::Long::GetOptions(
[47]34   'file=s'     => \$file,
[45]35   'dir=s'      => \$dir,
36   'cmd=s'      => \$cmd,
[43]37   'logtrace=s' => \$logtrace,
[32]38   'verbose'    => \$verbose,
39   'help'       => \$help,
40   'oarsh=s'    => \$oarsh,
[34]41   'jobnp=i'    => \$job_np,
[32]42   'nodefile=s' => \$nodefile,
43   'masterio=s' => \$masterio,
44   'switchio'   => \$switchio,
[75]45   'transmit'   => \$sig_transmit,
46   'kill=s'     => \$sig_checkpoint,
[41]47   ) || pod2usage(-verbose => 0);
48pod2usage(-verbose => 2) if $help;
[45]49pod2usage(-verbose => 2) if not (
[47]50 (-e "$file")
[45]51 or (-d "$dir" and $cmd ne '')
52 );
[13]53
[116]54my $oar_version = `oarsub -V | awk '{print \$4}'`;
55chomp $oar_version;
56
[43]57# re-run, keep trace of job already done
[38]58my %state;
59my $log_h = IO::File->new();
[45]60if (-e "$logtrace") {
[43]61   $log_h->open("< $logtrace")
62      or die "error: can't read log file: $!";
[38]63   while (<$log_h>) {
[45]64      $state{$1} = 'start' if m/^start\s+job\s+([^\s]+)\s/;
65      $state{$1} = 'end'   if m/^end\s+job\s+([^\s]+)\s/;
[41]66      }
[38]67   $log_h->close();
68   }
[43]69if ($logtrace) {
70   $log_h->open(">> $logtrace")
71      or die "error: can't append log file $logtrace: $!";
[40]72   $log_h->autoflush;
[38]73   $log_h = unblock $log_h;
74   }
75
[121]76# write log format version
77$log_h->print("log version 1\n") if $logtrace;
78print("log version 1\n") if $verbose;
79
[43]80# job to run
[13]81my @job = ();
[47]82if (-e "$file") {
[45]83   my $job_num = 0;
[47]84   open(JOB_LIST, '<', "$file") or die "error: can't open job file $file: $!";
[77]85   while (my $job_cmd = <JOB_LIST>) {
86      chomp $job_cmd;
87      next if $job_cmd =~ m/^#/;
88      next if $job_cmd =~ m/^\s*$/;
[45]89      $job_num++;
[77]90      my ($job_name) = $job_cmd =~ m/#.*?\bname=(\S+?)\b/i;
91      $job_name ||= $job_num;
[88]92      push @job, {
93         name   => $job_name,
94         cmd    => "$job_cmd",
95         num    => $job_num,
96         };
[45]97      }
98   close JOB_LIST;
[13]99   }
[45]100else {
[88]101   my $job_num = 0;
[45]102   opendir(DIR, $dir) or die "error: can't open folder $dir: $!";
103   while (my $item = readdir(DIR)) {
104      next if $item =~ m/^\./;
105      next if $item =~ m/:/;
106      next if $item =~ m/\.old$/;
107      next if $item =~ m/\.sav$/;
108      next if $item =~ m/\.bak$/;
109      next if $item =~ m/\.no$/;
110      next unless (-d "$dir/$item");
[88]111      $job_num++;
112      push @job, {
113         name   => $item,
114         cmd    => "cd $dir/$item/; $cmd",
115         num    => $job_num,
116         };
[45]117      }
118   closedir DIR;
119   }
[13]120
[88]121# assume unique job name
122{
123   my %seen = ();
124   my $count_unique_name = grep { ! $seen{ $_->{name} }++ } @job;
125   if ($count_unique_name != $#job) {
126      $_->{name} = $_->{num} for @job;
127      }
128   }
129
[43]130# ressources available
[34]131my @ressources = ();
[41]132open(NODE_FILE, '<', "$nodefile")
[34]133   or die "can't open $nodefile: $!";
134while (<NODE_FILE>) {
135   chomp;
136   next if m/^#/;
137   next if m/^\s*$/;
[41]138   push @ressources, $_;
[34]139   }
140close NODE_FILE;
141
142my $ressource_size = scalar(@ressources);
[43]143die "error: not enought ressources jobnp $job_np > ressources $ressource_size"
[41]144   if $job_np > $ressource_size;
[34]145
146my $current_dir = getcwd();
147
[32]148my $stderr = $ENV{OAR_STDERR} || '';
[13]149$stderr =~ s/\.stderr$//;
[32]150$stderr = $masterio if $masterio;
151my $stdout = $ENV{OAR_STDOUT} || '';
[13]152$stdout =~ s/\.stdout$//;
[32]153$stdout = $masterio if $masterio;
[13]154
155my $finished = new Coro::Signal;
156my $job_todo = new Coro::Semaphore 0;
[45]157my $job_name_maxlen;
158for (@job) {
159   $job_todo->up;
160   $job_name_maxlen = length($_->{name}) if length($_->{name}) > $job_name_maxlen;
161   }
[13]162
[43]163# slice of ressources for parallel job
[13]164my $ressources = new Coro::Channel;
[34]165for my $slot (1 .. int($ressource_size / $job_np)) {
[41]166   $ressources->put(
167      join(',',
168         @ressources[ (($slot - 1) * $job_np) .. (($slot * $job_np) - 1) ])
169         );
[13]170   }
171
172my %scheduled = ();
173
[43]174# OAR checkpoint and default signal SIGUSR2
[39]175my $oar_checkpoint = new Coro::Semaphore 0;
[84]176my $notify         = new Coro::Signal;
[75]177$SIG{$sig_checkpoint} = sub {
[42]178   print "warning: receive checkpoint at "
179      . time
180      . ", no new job, just finishing running job\n"
181      if $verbose;
182   $oar_checkpoint->up();
[84]183   $notify->send if $sig_transmit;
[42]184   };
[39]185
[81]186# asynchrone notify job
187async {
188   while () {
[84]189      $notify->wait;
[81]190
[84]191      for my $job_pid (keys %scheduled) {
192         my $job_name     = $scheduled{$job_pid}->{name};
193         my $job_pidfile  = $scheduled{$job_pid}->{pidfile};
194         my $node_connect = $scheduled{$job_pid}->{node_connect};
[81]195
[84]196         my $fh = IO::File->new();
197         $fh->open("| $oarsh $node_connect >/dev/null 2>&1")
198            or die "error: can't notify subjob: $!";
[81]199
[84]200         $fh->autoflush;
201         $fh = unblock $fh;
[81]202
[84]203         $fh->print("kill -$sig_checkpoint \$(cat $job_pidfile)\n");
204         $fh->print("exit\n");
[81]205
[84]206         print "warning: transmit signal $sig_checkpoint"
207            . " to job $job_name on node $node_connect.\n"
208            if $verbose;
[82]209
[84]210         close $fh;
211         cede;
[81]212         }
213      }
214   }
215
[43]216# asynchrone start job block
[13]217async {
[113]218   my $timer;
[81]219   JOB:
[13]220   for my $job (@job) {
[83]221      my $job_name   = $job->{name};
222      my $job_cmd    = $job->{cmd};
[38]223
[43]224      # job has been already run ?
[45]225      if (exists $state{$job_name}) {
226         if ($state{$job_name} eq 'start') {
227            print "warning: job $job_name was not clearly finished, relaunching...\n"
[41]228               if $verbose;
229            }
[45]230         elsif ($state{$job_name} eq 'end') {
231            delete $state{$job_name}; # free memory
[41]232            $job_todo->down;
[45]233            print "warning: job $job_name already run\n" if $verbose;
[41]234            cede;
[43]235            next JOB;
[41]236            }
237         }
[40]238
[113]239      # wait to not re-launch oarstat to fast
240      # equivalent to sleep $job_launch_brake
241      $timer = AE::now + $job_launch_brake;
242      while ( AE::now < $timer ) {
243         # force update of AE time
244         AE::now_update;
245         cede;
246         }
247
[43]248      # take job ressource
[36]249      my $job_ressource = $ressources->get;
[13]250
[43]251      # no more launch job when OAR checkpointing
252      last JOB if $oar_checkpoint->count() > 0;
[39]253
[36]254      my ($node_connect) = split ',', $job_ressource;
[41]255      my $fh = IO::File->new();
[34]256      my $job_pid = $fh->open("| $oarsh $node_connect >/dev/null 2>&1")
[43]257         or die "error: can't start subjob: $!";
[13]258
259      $fh->autoflush;
260      $fh = unblock $fh;
261
[113]262      my $msg = sprintf "start job %${job_name_maxlen}s / %5i at %s oar job %i on node %s\n",
263         $job_name, $job_pid, time, $ENV{OAR_JOB_ID}, $job_ressource;
[43]264      $log_h->print($msg) if $logtrace;
[42]265      print($msg) if $verbose;
[13]266
[41]267      my ($job_stdout, $job_stderr);
[45]268      $job_stdout = ">  $stdout-$job_name.stdout" if $stdout ne '' and $switchio;
269      $job_stderr = "2> $stderr-$job_name.stderr" if $stderr ne '' and $switchio;
[13]270
[120]271      my $job_nodefile   = "/tmp/oar-parexec-$ENV{LOGNAME}-$ENV{OAR_JOB_ID}-$job_name";
272      my $job_pidfile    = "/tmp/oar-parexec-$ENV{LOGNAME}-$ENV{OAR_JOB_ID}-$job_name.pid";
273      my $job_statusfile = "/tmp/oar-parexec-$ENV{LOGNAME}-$ENV{OAR_JOB_ID}-$job_name.status";
[34]274
[81]275      $scheduled{$job_pid} = {
276         fh           => $fh,
277         node_connect => $node_connect,
278         ressource    => $job_ressource,
279         name         => $job_name,
280         pidfile      => $job_pidfile,
281         };
282
283      # set job environment, run it and clean
[34]284      if ($job_np > 1) {
[36]285         $fh->print("printf \""
[41]286               . join('\n', split(',', $job_ressource,))
287               . "\" > $job_nodefile\n");
[37]288         $fh->print("OAR_NODE_FILE=$job_nodefile\n");
[34]289         $fh->print("OAR_NP=$job_np\n");
[37]290         $fh->print("export OAR_NODE_FILE\n");
[34]291         $fh->print("export OAR_NP\n");
292         $fh->print("unset OAR_MSG_NODEFILE\n");
293         }
[88]294
[32]295      $fh->print("cd $current_dir\n");
[88]296
[81]297      if ($sig_transmit) {
[87]298         $fh->print("trap 'jobs -p|xargs -r ps -o pid --no-headers --ppid|xargs -r kill -$sig_checkpoint' $sig_checkpoint\n");
[81]299         $fh->print("echo \$\$ > $job_pidfile\n");
300         }
[88]301
[120]302      $fh->print("echo 0 > $job_statusfile\n");
[88]303      $fh->print("(\n");
304      $fh->print("$job_cmd\n");
[120]305      $fh->print(") $job_stdout $job_stderr || echo \$? > $job_statusfile \&\n");
[88]306      $fh->print("while [ \$(jobs -p | wc -l) -gt 0 ]\n");
307      $fh->print("do\n");
308      $fh->print("   wait\n");
309      $fh->print("done\n");
310
[120]311      $fh->print("OAR_SUBJOB_RETCODE=\$(cat $job_statusfile)\n");
312      $fh->print("rm -f $job_statusfile\n");
[88]313      $fh->print("rm -f $job_pidfile\n")  if $sig_transmit;
[34]314      $fh->print("rm -f $job_nodefile\n") if $job_np > 1;
[120]315      $fh->print("exit \$OAR_SUBJOB_RETCODE\n");
[13]316      cede;
317      }
318   }
319
[43]320# asynchrone end job block
[13]321async {
322   while () {
[41]323      for my $job_pid (keys %scheduled) {
[82]324         # non blocking PID test
[41]325         if (waitpid($job_pid, WNOHANG)) {
[120]326            # get return status code
327            my $job_retcode0 = $? >> 8;
328            #print "ERREUR0 $job_pid $job_retcode0\n" if $job_retcode0;
329
[113]330            my $msg = sprintf "end   job %${job_name_maxlen}s / %5i at %s oar job %i on node %s\n",
[45]331               $scheduled{$job_pid}->{name},
[113]332               $job_pid, time, $ENV{OAR_JOB_ID}, $scheduled{$job_pid}->{ressource};
[76]333
[120]334            # Job error
335            $msg =~ s/^end\s+job/error:$job_retcode0 job/
336               if $job_retcode0 > 0 and $job_retcode0 != 99;
337
[76]338            # Job non finish, just suspend if received checkpoint signal
339            $msg =~ s/^end\s+job/suspend job/
340               if $sig_transmit and $oar_checkpoint->count() > 0;
341
[43]342            $log_h->print($msg) if $logtrace;
[42]343            print($msg) if $verbose;
[13]344            close $scheduled{$job_pid}->{fh};
[43]345            # leave ressources for another job
[41]346            $ressources->put($scheduled{$job_pid}->{ressource});
[13]347            $job_todo->down;
348            delete $scheduled{$job_pid};
349            }
350         cede;
351         }
352
[43]353      # checkpointing ! just finishing running job and quit
[42]354      $finished->send if $oar_checkpoint->count() > 0 and scalar(keys(%scheduled)) == 0;
[39]355
[42]356      $finished->send if $job_todo->count() == 0;
[13]357      cede;
358      }
359   }
360
361cede;
362
[43]363# all job have been done
[13]364$finished->wait;
365
[43]366# close log trace file
367$log_h->close() if $logtrace;
[38]368
[116]369exit 99 if (($oar_checkpoint->count() > 0) and ($oar_version !~ m/^2\.4/));
370
371
[13]372__END__
373
374=head1 NAME
375
[88]376oar-parexec - parallel execution of many small short or long job
[13]377
378=head1 SYNOPSIS
379
[47]380 oar-parexec --file filecommand \
381    [--logtrace tracefile] [--verbose] \
382    [--jobnp integer] [--nodefile filenode] [--oarsh sssh] \
[88]383    [--switchio] [--masterio basefileio] \
384    [--kill signal] [--transmit]
[46]385
[47]386 oar-parexec --dir foldertoiterate --cmd commandtolaunch \
387    [--logtrace tracefile] [--verbose] \
388    [--jobnp integer] [--nodefile filenode] [--oarsh sssh] \
[88]389    [--switchio] [--masterio basefileio] \
390    [--kill signal] [--transmit]
[46]391
[13]392 oar-parexec --help
393
[32]394=head1 DESCRIPTION
395
[88]396C<oar-parexec> can execute lot of small short or long job in parallel inside a cluster.
397Number of parallel job at one time cannot exceed the number of core define in the node file.
[32]398C<oar-parexec> is easier to use inside an OAR job environment
[44]399which define automatically these strategics parameters...
400However, it can be used outside OAR.
[32]401
[47]402Option C<--file> or C<--dir> and C<--cmd> are the only mandatory parameters.
[32]403
404Small job will be launch in the same folder as the master job.
[44]405Two environment variable are defined for each small job
[37]406and only in case of parallel small job (option C<--jobnp> > 1).
[32]407
[34]408 OAR_NODE_FILE - file that list node for parallel computing
409 OAR_NP        - number of processor affected
[32]410
[44]411The file define by OAR_NODE_FILE is created  in /tmp
412on the node before launching the small job
413and this file will be delete after job complete.
[34]414C<oar-parexec> is a simple script,
415OAR_NODE_FILE will not be deleted in case of crash of the master job.
416
[37]417OAR define other variable that are equivalent to OAR_NODE_FILE:
418OAR_NODEFILE, OAR_FILE_NODES, OAR_RESOURCE_FILE...
419You can use in your script the OAR original file ressources
420by using these variable if you need it.
[34]421
[88]422When use with long job,
423activate option C<--tranmit> to send OAR checkpoint signal
424and suspend small job before the walltime cut!
[82]425
[13]426=head1 OPTIONS
427
[32]428=over 12
[13]429
[47]430=item B<-f|--file filecommand>
[13]431
[32]432File name which content job list.
[45]433For the JOB_NAME definition,
434the first valid job in the list will have the number 1 and so on...
[13]435
[77]436It's possible to fix the name inside a comment on the job line.
437For example:
438
439 $HOME/test/subjob1.sh # name=subjob1
440
441The key C<name> is case insensitive,
442the associated value cannot have a space...
443
[88]444The command can be any shell command.
445It's possible to change folder,
446or launch an asynchrone job in parallel,
447but one command must block and not be launch in asynchrone (with & or coproc).
448Example :
449
450 cd ./test; ./subjob1.sh
[119]451 cd ./test; nice -18 du -sk ./ & ./subjob1.sh
[88]452
[119]453Commands C<du -sk ./>  and C<./subjob1.sh> will be done in parallel on the same ressource...
454It's better if C<du -sk ./> is faster than C<./subjob1.sh> !
455Do not abuse of that!
[88]456
[47]457=item B<-d|--dir foldertoiterate>
[45]458
459Command C<--cmd> will be launch in all sub-folder of this master folder.
460Files in this folder will be ignored.
[47]461Sub-folder name which begin with F<.>
462or finish with F<.old>, F<.sav>, F<.bak>, F<.no> will either be ignored...
[45]463
464The JOB_NAME is simply the Sub-folder name.
465
466=item B<-c|--cmd commandtolaunch>
467
[88]468Command (and argument to it) that will be launch in all sub-folder
469parameter folfer C<--dir>.
470Like for option C<--file>, command can be any valid shell command
471but one must block.
[45]472
[43]473=item B<-l|--logtrace tracefile>
474
475File which log and trace running job.
[44]476In case of running the same master command (after crash for example),
477only job that are not mark as done will be run again.
478Be careful, job mark as running (start but not finish) will be run again.
[45]479Tracing is base on the JOB_NAME between multiple run.
[43]480
481This option is very usefull in case of crash
482but also for checkpointing and idempotent OAR job.
483
[32]484=item B<-v|--verbose>
[13]485
[34]486=item B<-j|--jobnp integer>
[13]487
[34]488Number of processor to allocated for each small job.
4891 by default.
490
491=item B<-n|--nodefile filenode>
492
[44]493File name that list all the node where job could be launch.
[32]494By defaut, it's define automatically by OAR via
495environment variable C<OAR_NODE_FILE>.
[13]496
[32]497For example, if you want to use 6 core on your cluster node,
498you need to put 6 times the hostname node in this file,
499one per line...
500It's a very common file in MPI process !
[13]501
[46]502=item B<-o|-oarsh command>
[13]503
[46]504Command use to launch a shell on a node.
505By default
[13]506
[46]507 oarsh -q -T
508
509Change it to C<ssh> if you are not using an OAR cluster...
510
[32]511=item B<-s|--switchio>
[21]512
[32]513Each small job will have it's own output STDOUT and STDERR
[45]514base on master OAR job with C<JOB_NAME> inside
[32]515(or base on C<basefileio> if option C<masterio>).
516Example :
[21]517
[45]518 OAR.151524.stdout -> OAR.151524-JOB_NAME.stdout
[21]519
[32]520where 151524 here is the master C<OAR_JOB_ID>
[45]521and C<JOB_NAME> is the small job name.
[21]522
[46]523=item B<-m|--masterio basefileio>
[32]524
[46]525The C<basefileio> will be use in place of environment variable
526C<OAR_STDOUT> and C<OAR_STDERR> (without extension) to build the base name of the small job standart output
[117]527(only use when option C<switchio> is activated).
[32]528
[78]529=item B<-k|--kill signal>
530
531Signal to listen and make a clean stop of the current C<oar-parexec> process.
[118]532By default, use USR2 signal (see C<kill -l> for a list of possible signal).
[78]533
534=item B<-t|--transmit>
535
536Resend catch signal to sub-job when receiving it.
537By default, no signal is transmis to child process.
538
539It's only valuable if use for long sub-job than can
540in return make themselves a clean restart.
541
542
[32]543=item B<-h|--help>
544
545=back
546
547
548=head1 EXAMPLE
549
[44]550=head2 Simple list of sequential job
551
[47]552Content for the job file command (option C<--file>) could have:
[21]553
[13]554 - empty line
555 - comment line begin with #
[86]556 - valid shell command (can containt comment)
[13]557
558Example where F<$HOME/test/subjob1.sh> is a shell script (executable).
559
[86]560 $HOME/test/subjob01.sh  # name=subjob01
561 $HOME/test/subjob02.sh  # name=subjob02
562 $HOME/test/subjob03.sh  # name=subjob03
563 $HOME/test/subjob04.sh  # name=subjob04
[32]564 ...
[86]565 $HOME/test/subjob38.sh  # name=subjob38
566 $HOME/test/subjob39.sh  # name=subjob39
567 $HOME/test/subjob40.sh  # name=subjob40
[13]568
[44]569These jobs could be launch by:
[13]570
[49]571 oarsub -n test -l /core=6,walltime=04:00:00 \
572   "oar-parexec -f ./subjob.list.txt"
[13]573
[47]574=head2 Folder job
575
576In a folder F<subjob.d>, create sub-folder with your data inside : F<test1>, <test2>...
577The same command will be executed in every sub-folder.
578C<oar-parexec> change the current directory to the sub-folder before launching it.
579
580A very simple job could be:
581
[49]582 oarsub -n test -l /core=6,walltime=04:00:00 \
583   "oar-parexec -d ./subjob.d -c 'sleep 10; env'"
[47]584
585The command C<env> will be excuted in all folder F<test1>, F<test2>... after a 10s pause.
586
587Sometime, it's simpler to use file list command,
588sometime, jobs by folder with the same command run is more relevant.
589
[44]590=head2 Parallel job
[28]591
[44]592You need to put the number of core each small job need with option C<--jobnp>.
593If your job is build on OpenMP or MPI,
594you can use OAR_NP and OAR_NODE_FILE variables to configure them.
595On OAR cluster, you need to use C<oarsh> or a wrapper like C<oar-envsh>
596for connexion between node instead of C<ssh>.
597
598Example with parallel small job on 2 core:
599
[49]600 oarsub -n test -l /core=6,walltime=04:00:00 \
601   "oar-parexec -j 2 -f ./subjob.list.txt"
[44]602
603=head2 Tracing and master crash
604
605If the master node crash after hours of calculus, everything is lost ?
606No, with option C<--logtrace>,
607it's possible to remember older result
608and not re-run these job the second and next time.
609
[49]610 oarsub -n test -l /core=6,walltime=04:00:00 \
611   "oar-parexec -f ./subjob.list.txt -l ./subjob.list.log"
[44]612
613After a crash or an C<oardel> command,
614you can then re-run the same command that will end to execute the jobs in the list
615
[49]616 oarsub -n test -l /core=6,walltime=04:00:00 \
617   "oar-parexec -f ./subjob.list.txt -l ./subjob.list.log"
[44]618
619C<logtrace> file are just plain file.
620We use the extension '.log' because these files are automatically
621eliminate from our backup system!
622
623=head2 Checkpointing and Idempotent
624
625C<oar-parexec> is compatible with the OAR checkpointing.
[89]626If you have 2000 small jobs that need 55h to be done on 6 cores,
[44]627you can cut this in small parts.
628
629For this example, we suppose that each small job need about 10min...
630So, we send a checkpoint 12min before the end of the process
631to let C<oar-parexec> finish the jobs started.
632After being checkpointed, C<oar-parexec> do not start any new small job.
633
[49]634 oarsub -t idempotent -n test \
635   -l /core=6,walltime=04:00:00 \
636   --checkpoint 720 \
[44]637   "oar-parexec -f ./subjob.list.txt -l ./subjob.list.log"
638
639After 3h48min, the OAR job will begin to stop launching new small job.
640When all running small job are finished, it's exit.
641But as the OAR job is type C<idempotent>,
642OAR will re-submit it as long as all small job are not executed...
643
644This way, we let other users a chance to use the cluster!
645
646In this last exemple, we use moldable OAR job with idempotent
647to reserve many core for a small time or a few cores for a long time:
648
649 oarsub -t idempotent -n test \
650   -l /core=50,walltime=01:05:00 \
651   -l /core=6,walltime=04:00:00 \
652   --checkpoint 720 \
653   "oar-parexec -f ./subjob.list.txt -l ./subjob.list.log"
654
[78]655=head2 Signal, recurse and long job
[44]656
[78]657By default, OAR use signal USR2 for checkpointing.
[79]658It's possible to change this with option C<--kill>.
[78]659
660When use with long small job, checkpointing could be too long...
[79]661More than walltime!
662The option C<--transmit> could be use to checkpoint small job!
663These long small job will then stop cleanly and will be restarted next time.
[78]664
665In the C<logtrace> file, small job will have the status suspend.
[79]666They will be launch with the same command line at the next OAR run.
[78]667
[89]668Example: if you have 50 small jobs that each need 72h to be done on 1 cores,
669you can cut this in 24h parts.
670
671For this example, we suppose that each long job loop need about 20min...
672So, we send a checkpoint 30min before the end of the process
673to let C<oar-parexec> suspend the jobs started.
674After being checkpointed, C<oar-parexec> do not start any new small job.
675
676 oarsub -t idempotent -n test \
677   -l /core=6,walltime=24:00:00 \
678   --checkpoint 1800 \
679   --transmit \
680   "oar-parexec -f ./subjob.list.txt -l ./subjob.list.log"
681
682After 23h30min, the OAR job will begin to stop launching new small job.
683When all running small job are suspend, it's exit.
684But as the OAR job is type C<idempotent>,
685OAR will re-submit it as long as all small job are not finished...
686
[121]687=head2 Log format
688
689=over
690
691=item B<Version 2>
692
693 log version 2
694 start   subjob  1 pid 101468 at 1450482228 oarjob 71725 onnode cl7n001
695 end     subjob  1 pid 101468 at 1450482556 oarjob 71725 onnode cl7n001 duration 657 status 0
696 error   subjob  1 pid 101468 at 1450482556 oarjob 71725 onnode cl7n001 duration 657 status 0
697 suspend subjob  1 pid 101468 at 1450482556 oarjob 71725 onnode cl7n001 duration 657 status 0
698
699=item B<Version 1>
700
701 log version 1
702 start job 1 / 101468 at 1450482228 oar job 71725 on node cl7n001
703 end   job 1 / 101468 at 1450482556 oar job 71725 on node cl7n001
704
705=back
706
[21]707=head1 SEE ALSO
708
[44]709oar-dispatch, mpilauncher,
710orsh, oar-envsh, ssh
[21]711
712
[13]713=head1 AUTHORS
714
[21]715Written by Gabriel Moreau, Grenoble - France
[13]716
[21]717
718=head1 LICENSE AND COPYRIGHT
719
720GPL version 2 or later and Perl equivalent
721
[121]722Copyright (C) 2011-2017 Gabriel Moreau / LEGI - CNRS UMR 5519 - France
Note: See TracBrowser for help on using the repository browser.