Context Navigation

source: trunk/oarutils/oar-parexec @ 121

Last change on this file since 121 was 121, checked in by g7moreau, 9 years ago
Add log version trace. Begin at 1. Begin doc for log version 2 (future)
File size: 21.2 KB

Rev	Line
[13]	1	#!/usr/bin/perl
	2	#
[118]	3	# 2011/11/27 Gabriel Moreau
[13]	4
	5	use strict;
	6
	7	use Getopt::Long();
	8	use Pod::Usage;
	9	use Coro;
	10	use Coro::Semaphore;
	11	use Coro::Signal;
	12	use Coro::Channel;
	13	use Coro::Handle;
	14	use IO::File;
	15	use POSIX qw( WNOHANG WEXITSTATUS );
[32]	16	use Cwd qw( getcwd );
[13]	17
[75]	18	my $file;
	19	my $dir;
	20	my $cmd;
	21	my $logtrace;
[13]	22	my $verbose;
[82]	23	my $job_np = 1;
	24	my $nodefile = $ENV{OAR_NODE_FILE} \|\| '';
[32]	25	my $masterio;
[13]	26	my $switchio;
	27	my $help;
[82]	28	my $oarsh = 'oarsh -q -T';
[75]	29	my $sig_transmit;
	30	my $sig_checkpoint = 'USR2';
[113]	31	my $job_launch_brake = 1; # one second time brake
[13]	32
	33	Getopt::Long::GetOptions(
[47]	34	'file=s' => \$file,
[45]	35	'dir=s' => \$dir,
	36	'cmd=s' => \$cmd,
[43]	37	'logtrace=s' => \$logtrace,
[32]	38	'verbose' => \$verbose,
	39	'help' => \$help,
	40	'oarsh=s' => \$oarsh,
[34]	41	'jobnp=i' => \$job_np,
[32]	42	'nodefile=s' => \$nodefile,
	43	'masterio=s' => \$masterio,
	44	'switchio' => \$switchio,
[75]	45	'transmit' => \$sig_transmit,
	46	'kill=s' => \$sig_checkpoint,
[41]	47	) \|\| pod2usage(-verbose => 0);
	48	pod2usage(-verbose => 2) if $help;
[45]	49	pod2usage(-verbose => 2) if not (
[47]	50	(-e "$file")
[45]	51	or (-d "$dir" and $cmd ne '')
	52	);
[13]	53
[116]	54	my $oar_version = `oarsub -V \| awk '{print \$4}'`;
	55	chomp $oar_version;
	56
[43]	57	# re-run, keep trace of job already done
[38]	58	my %state;
	59	my $log_h = IO::File->new();
[45]	60	if (-e "$logtrace") {
[43]	61	$log_h->open("< $logtrace")
	62	or die "error: can't read log file: $!";
[38]	63	while (<$log_h>) {
[45]	64	$state{$1} = 'start' if m/^start\s+job\s+([^\s]+)\s/;
	65	$state{$1} = 'end' if m/^end\s+job\s+([^\s]+)\s/;
[41]	66	}
[38]	67	$log_h->close();
	68	}
[43]	69	if ($logtrace) {
	70	$log_h->open(">> $logtrace")
	71	or die "error: can't append log file $logtrace: $!";
[40]	72	$log_h->autoflush;
[38]	73	$log_h = unblock $log_h;
	74	}
	75
[121]	76	# write log format version
	77	$log_h->print("log version 1\n") if $logtrace;
	78	print("log version 1\n") if $verbose;
	79
[43]	80	# job to run
[13]	81	my @job = ();
[47]	82	if (-e "$file") {
[45]	83	my $job_num = 0;
[47]	84	open(JOB_LIST, '<', "$file") or die "error: can't open job file $file: $!";
[77]	85	while (my $job_cmd = <JOB_LIST>) {
	86	chomp $job_cmd;
	87	next if $job_cmd =~ m/^#/;
	88	next if $job_cmd =~ m/^\s*$/;
[45]	89	$job_num++;
[77]	90	my ($job_name) = $job_cmd =~ m/#.*?\bname=(\S+?)\b/i;
	91	$job_name \|\|= $job_num;
[88]	92	push @job, {
	93	name => $job_name,
	94	cmd => "$job_cmd",
	95	num => $job_num,
	96	};
[45]	97	}
	98	close JOB_LIST;
[13]	99	}
[45]	100	else {
[88]	101	my $job_num = 0;
[45]	102	opendir(DIR, $dir) or die "error: can't open folder $dir: $!";
	103	while (my $item = readdir(DIR)) {
	104	next if $item =~ m/^\./;
	105	next if $item =~ m/:/;
	106	next if $item =~ m/\.old$/;
	107	next if $item =~ m/\.sav$/;
	108	next if $item =~ m/\.bak$/;
	109	next if $item =~ m/\.no$/;
	110	next unless (-d "$dir/$item");
[88]	111	$job_num++;
	112	push @job, {
	113	name => $item,
	114	cmd => "cd $dir/$item/; $cmd",
	115	num => $job_num,
	116	};
[45]	117	}
	118	closedir DIR;
	119	}
[13]	120
[88]	121	# assume unique job name
	122	{
	123	my %seen = ();
	124	my $count_unique_name = grep { ! $seen{ $_->{name} }++ } @job;
	125	if ($count_unique_name != $#job) {
	126	$_->{name} = $_->{num} for @job;
	127	}
	128	}
	129
[43]	130	# ressources available
[34]	131	my @ressources = ();
[41]	132	open(NODE_FILE, '<', "$nodefile")
[34]	133	or die "can't open $nodefile: $!";
	134	while (<NODE_FILE>) {
	135	chomp;
	136	next if m/^#/;
	137	next if m/^\s*$/;
[41]	138	push @ressources, $_;
[34]	139	}
	140	close NODE_FILE;
	141
	142	my $ressource_size = scalar(@ressources);
[43]	143	die "error: not enought ressources jobnp $job_np > ressources $ressource_size"
[41]	144	if $job_np > $ressource_size;
[34]	145
	146	my $current_dir = getcwd();
	147
[32]	148	my $stderr = $ENV{OAR_STDERR} \|\| '';
[13]	149	$stderr =~ s/\.stderr$//;
[32]	150	$stderr = $masterio if $masterio;
	151	my $stdout = $ENV{OAR_STDOUT} \|\| '';
[13]	152	$stdout =~ s/\.stdout$//;
[32]	153	$stdout = $masterio if $masterio;
[13]	154
	155	my $finished = new Coro::Signal;
	156	my $job_todo = new Coro::Semaphore 0;
[45]	157	my $job_name_maxlen;
	158	for (@job) {
	159	$job_todo->up;
	160	$job_name_maxlen = length($_->{name}) if length($_->{name}) > $job_name_maxlen;
	161	}
[13]	162
[43]	163	# slice of ressources for parallel job
[13]	164	my $ressources = new Coro::Channel;
[34]	165	for my $slot (1 .. int($ressource_size / $job_np)) {
[41]	166	$ressources->put(
	167	join(',',
	168	@ressources[ (($slot - 1) * $job_np) .. (($slot * $job_np) - 1) ])
	169	);
[13]	170	}
	171
	172	my %scheduled = ();
	173
[43]	174	# OAR checkpoint and default signal SIGUSR2
[39]	175	my $oar_checkpoint = new Coro::Semaphore 0;
[84]	176	my $notify = new Coro::Signal;
[75]	177	$SIG{$sig_checkpoint} = sub {
[42]	178	print "warning: receive checkpoint at "
	179	. time
	180	. ", no new job, just finishing running job\n"
	181	if $verbose;
	182	$oar_checkpoint->up();
[84]	183	$notify->send if $sig_transmit;
[42]	184	};
[39]	185
[81]	186	# asynchrone notify job
	187	async {
	188	while () {
[84]	189	$notify->wait;
[81]	190
[84]	191	for my $job_pid (keys %scheduled) {
	192	my $job_name = $scheduled{$job_pid}->{name};
	193	my $job_pidfile = $scheduled{$job_pid}->{pidfile};
	194	my $node_connect = $scheduled{$job_pid}->{node_connect};
[81]	195
[84]	196	my $fh = IO::File->new();
	197	$fh->open("\| $oarsh $node_connect >/dev/null 2>&1")
	198	or die "error: can't notify subjob: $!";
[81]	199
[84]	200	$fh->autoflush;
	201	$fh = unblock $fh;
[81]	202
[84]	203	$fh->print("kill -$sig_checkpoint \$(cat $job_pidfile)\n");
	204	$fh->print("exit\n");
[81]	205
[84]	206	print "warning: transmit signal $sig_checkpoint"
	207	. " to job $job_name on node $node_connect.\n"
	208	if $verbose;
[82]	209
[84]	210	close $fh;
	211	cede;
[81]	212	}
	213	}
	214	}
	215
[43]	216	# asynchrone start job block
[13]	217	async {
[113]	218	my $timer;
[81]	219	JOB:
[13]	220	for my $job (@job) {
[83]	221	my $job_name = $job->{name};
	222	my $job_cmd = $job->{cmd};
[38]	223
[43]	224	# job has been already run ?
[45]	225	if (exists $state{$job_name}) {
	226	if ($state{$job_name} eq 'start') {
	227	print "warning: job $job_name was not clearly finished, relaunching...\n"
[41]	228	if $verbose;
	229	}
[45]	230	elsif ($state{$job_name} eq 'end') {
	231	delete $state{$job_name}; # free memory
[41]	232	$job_todo->down;
[45]	233	print "warning: job $job_name already run\n" if $verbose;
[41]	234	cede;
[43]	235	next JOB;
[41]	236	}
	237	}
[40]	238
[113]	239	# wait to not re-launch oarstat to fast
	240	# equivalent to sleep $job_launch_brake
	241	$timer = AE::now + $job_launch_brake;
	242	while ( AE::now < $timer ) {
	243	# force update of AE time
	244	AE::now_update;
	245	cede;
	246	}
	247
[43]	248	# take job ressource
[36]	249	my $job_ressource = $ressources->get;
[13]	250
[43]	251	# no more launch job when OAR checkpointing
	252	last JOB if $oar_checkpoint->count() > 0;
[39]	253
[36]	254	my ($node_connect) = split ',', $job_ressource;
[41]	255	my $fh = IO::File->new();
[34]	256	my $job_pid = $fh->open("\| $oarsh $node_connect >/dev/null 2>&1")
[43]	257	or die "error: can't start subjob: $!";
[13]	258
	259	$fh->autoflush;
	260	$fh = unblock $fh;
	261
[113]	262	my $msg = sprintf "start job %${job_name_maxlen}s / %5i at %s oar job %i on node %s\n",
	263	$job_name, $job_pid, time, $ENV{OAR_JOB_ID}, $job_ressource;
[43]	264	$log_h->print($msg) if $logtrace;
[42]	265	print($msg) if $verbose;
[13]	266
[41]	267	my ($job_stdout, $job_stderr);
[45]	268	$job_stdout = "> $stdout-$job_name.stdout" if $stdout ne '' and $switchio;
	269	$job_stderr = "2> $stderr-$job_name.stderr" if $stderr ne '' and $switchio;
[13]	270
[120]	271	my $job_nodefile = "/tmp/oar-parexec-$ENV{LOGNAME}-$ENV{OAR_JOB_ID}-$job_name";
	272	my $job_pidfile = "/tmp/oar-parexec-$ENV{LOGNAME}-$ENV{OAR_JOB_ID}-$job_name.pid";
	273	my $job_statusfile = "/tmp/oar-parexec-$ENV{LOGNAME}-$ENV{OAR_JOB_ID}-$job_name.status";
[34]	274
[81]	275	$scheduled{$job_pid} = {
	276	fh => $fh,
	277	node_connect => $node_connect,
	278	ressource => $job_ressource,
	279	name => $job_name,
	280	pidfile => $job_pidfile,
	281	};
	282
	283	# set job environment, run it and clean
[34]	284	if ($job_np > 1) {
[36]	285	$fh->print("printf \""
[41]	286	. join('\n', split(',', $job_ressource,))
	287	. "\" > $job_nodefile\n");
[37]	288	$fh->print("OAR_NODE_FILE=$job_nodefile\n");
[34]	289	$fh->print("OAR_NP=$job_np\n");
[37]	290	$fh->print("export OAR_NODE_FILE\n");
[34]	291	$fh->print("export OAR_NP\n");
	292	$fh->print("unset OAR_MSG_NODEFILE\n");
	293	}
[88]	294
[32]	295	$fh->print("cd $current_dir\n");
[88]	296
[81]	297	if ($sig_transmit) {
[87]	298	$fh->print("trap 'jobs -p\|xargs -r ps -o pid --no-headers --ppid\|xargs -r kill -$sig_checkpoint' $sig_checkpoint\n");
[81]	299	$fh->print("echo \$\$ > $job_pidfile\n");
	300	}
[88]	301
[120]	302	$fh->print("echo 0 > $job_statusfile\n");
[88]	303	$fh->print("(\n");
	304	$fh->print("$job_cmd\n");
[120]	305	$fh->print(") $job_stdout $job_stderr \|\| echo \$? > $job_statusfile \&\n");
[88]	306	$fh->print("while [ \$(jobs -p \| wc -l) -gt 0 ]\n");
	307	$fh->print("do\n");
	308	$fh->print(" wait\n");
	309	$fh->print("done\n");
	310
[120]	311	$fh->print("OAR_SUBJOB_RETCODE=\$(cat $job_statusfile)\n");
	312	$fh->print("rm -f $job_statusfile\n");
[88]	313	$fh->print("rm -f $job_pidfile\n") if $sig_transmit;
[34]	314	$fh->print("rm -f $job_nodefile\n") if $job_np > 1;
[120]	315	$fh->print("exit \$OAR_SUBJOB_RETCODE\n");
[13]	316	cede;
	317	}
	318	}
	319
[43]	320	# asynchrone end job block
[13]	321	async {
	322	while () {
[41]	323	for my $job_pid (keys %scheduled) {
[82]	324	# non blocking PID test
[41]	325	if (waitpid($job_pid, WNOHANG)) {
[120]	326	# get return status code
	327	my $job_retcode0 = $? >> 8;
	328	#print "ERREUR0 $job_pid $job_retcode0\n" if $job_retcode0;
	329
[113]	330	my $msg = sprintf "end job %${job_name_maxlen}s / %5i at %s oar job %i on node %s\n",
[45]	331	$scheduled{$job_pid}->{name},
[113]	332	$job_pid, time, $ENV{OAR_JOB_ID}, $scheduled{$job_pid}->{ressource};
[76]	333
[120]	334	# Job error
	335	$msg =~ s/^end\s+job/error:$job_retcode0 job/
	336	if $job_retcode0 > 0 and $job_retcode0 != 99;
	337
[76]	338	# Job non finish, just suspend if received checkpoint signal
	339	$msg =~ s/^end\s+job/suspend job/
	340	if $sig_transmit and $oar_checkpoint->count() > 0;
	341
[43]	342	$log_h->print($msg) if $logtrace;
[42]	343	print($msg) if $verbose;
[13]	344	close $scheduled{$job_pid}->{fh};
[43]	345	# leave ressources for another job
[41]	346	$ressources->put($scheduled{$job_pid}->{ressource});
[13]	347	$job_todo->down;
	348	delete $scheduled{$job_pid};
	349	}
	350	cede;
	351	}
	352
[43]	353	# checkpointing ! just finishing running job and quit
[42]	354	$finished->send if $oar_checkpoint->count() > 0 and scalar(keys(%scheduled)) == 0;
[39]	355
[42]	356	$finished->send if $job_todo->count() == 0;
[13]	357	cede;
	358	}
	359	}
	360
	361	cede;
	362
[43]	363	# all job have been done
[13]	364	$finished->wait;
	365
[43]	366	# close log trace file
	367	$log_h->close() if $logtrace;
[38]	368
[116]	369	exit 99 if (($oar_checkpoint->count() > 0) and ($oar_version !~ m/^2\.4/));
	370
	371
[13]	372	__END__
	373
	374	=head1 NAME
	375
[88]	376	oar-parexec - parallel execution of many small short or long job
[13]	377
	378	=head1 SYNOPSIS
	379
[47]	380	oar-parexec --file filecommand \
	381	[--logtrace tracefile] [--verbose] \
	382	[--jobnp integer] [--nodefile filenode] [--oarsh sssh] \
[88]	383	[--switchio] [--masterio basefileio] \
	384	[--kill signal] [--transmit]
[46]	385
[47]	386	oar-parexec --dir foldertoiterate --cmd commandtolaunch \
	387	[--logtrace tracefile] [--verbose] \
	388	[--jobnp integer] [--nodefile filenode] [--oarsh sssh] \
[88]	389	[--switchio] [--masterio basefileio] \
	390	[--kill signal] [--transmit]
[46]	391
[13]	392	oar-parexec --help
	393
[32]	394	=head1 DESCRIPTION
	395
[88]	396	C<oar-parexec> can execute lot of small short or long job in parallel inside a cluster.
	397	Number of parallel job at one time cannot exceed the number of core define in the node file.
[32]	398	C<oar-parexec> is easier to use inside an OAR job environment
[44]	399	which define automatically these strategics parameters...
	400	However, it can be used outside OAR.
[32]	401
[47]	402	Option C<--file> or C<--dir> and C<--cmd> are the only mandatory parameters.
[32]	403
	404	Small job will be launch in the same folder as the master job.
[44]	405	Two environment variable are defined for each small job
[37]	406	and only in case of parallel small job (option C<--jobnp> > 1).
[32]	407
[34]	408	OAR_NODE_FILE - file that list node for parallel computing
	409	OAR_NP - number of processor affected
[32]	410
[44]	411	The file define by OAR_NODE_FILE is created in /tmp
	412	on the node before launching the small job
	413	and this file will be delete after job complete.
[34]	414	C<oar-parexec> is a simple script,
	415	OAR_NODE_FILE will not be deleted in case of crash of the master job.
	416
[37]	417	OAR define other variable that are equivalent to OAR_NODE_FILE:
	418	OAR_NODEFILE, OAR_FILE_NODES, OAR_RESOURCE_FILE...
	419	You can use in your script the OAR original file ressources
	420	by using these variable if you need it.
[34]	421
[88]	422	When use with long job,
	423	activate option C<--tranmit> to send OAR checkpoint signal
	424	and suspend small job before the walltime cut!
[82]	425
[13]	426	=head1 OPTIONS
	427
[32]	428	=over 12
[13]	429
[47]	430	=item B<-f\|--file filecommand>
[13]	431
[32]	432	File name which content job list.
[45]	433	For the JOB_NAME definition,
	434	the first valid job in the list will have the number 1 and so on...
[13]	435
[77]	436	It's possible to fix the name inside a comment on the job line.
	437	For example:
	438
	439	$HOME/test/subjob1.sh # name=subjob1
	440
	441	The key C<name> is case insensitive,
	442	the associated value cannot have a space...
	443
[88]	444	The command can be any shell command.
	445	It's possible to change folder,
	446	or launch an asynchrone job in parallel,
	447	but one command must block and not be launch in asynchrone (with & or coproc).
	448	Example :
	449
	450	cd ./test; ./subjob1.sh
[119]	451	cd ./test; nice -18 du -sk ./ & ./subjob1.sh
[88]	452
[119]	453	Commands C<du -sk ./> and C<./subjob1.sh> will be done in parallel on the same ressource...
	454	It's better if C<du -sk ./> is faster than C<./subjob1.sh> !
	455	Do not abuse of that!
[88]	456
[47]	457	=item B<-d\|--dir foldertoiterate>
[45]	458
	459	Command C<--cmd> will be launch in all sub-folder of this master folder.
	460	Files in this folder will be ignored.
[47]	461	Sub-folder name which begin with F<.>
	462	or finish with F<.old>, F<.sav>, F<.bak>, F<.no> will either be ignored...
[45]	463
	464	The JOB_NAME is simply the Sub-folder name.
	465
	466	=item B<-c\|--cmd commandtolaunch>
	467
[88]	468	Command (and argument to it) that will be launch in all sub-folder
	469	parameter folfer C<--dir>.
	470	Like for option C<--file>, command can be any valid shell command
	471	but one must block.
[45]	472
[43]	473	=item B<-l\|--logtrace tracefile>
	474
	475	File which log and trace running job.
[44]	476	In case of running the same master command (after crash for example),
	477	only job that are not mark as done will be run again.
	478	Be careful, job mark as running (start but not finish) will be run again.
[45]	479	Tracing is base on the JOB_NAME between multiple run.
[43]	480
	481	This option is very usefull in case of crash
	482	but also for checkpointing and idempotent OAR job.
	483
[32]	484	=item B<-v\|--verbose>
[13]	485
[34]	486	=item B<-j\|--jobnp integer>
[13]	487
[34]	488	Number of processor to allocated for each small job.
	489	1 by default.
	490
	491	=item B<-n\|--nodefile filenode>
	492
[44]	493	File name that list all the node where job could be launch.
[32]	494	By defaut, it's define automatically by OAR via
	495	environment variable C<OAR_NODE_FILE>.
[13]	496
[32]	497	For example, if you want to use 6 core on your cluster node,
	498	you need to put 6 times the hostname node in this file,
	499	one per line...
	500	It's a very common file in MPI process !
[13]	501
[46]	502	=item B<-o\|-oarsh command>
[13]	503
[46]	504	Command use to launch a shell on a node.
	505	By default
[13]	506
[46]	507	oarsh -q -T
	508
	509	Change it to C<ssh> if you are not using an OAR cluster...
	510
[32]	511	=item B<-s\|--switchio>
[21]	512
[32]	513	Each small job will have it's own output STDOUT and STDERR
[45]	514	base on master OAR job with C<JOB_NAME> inside
[32]	515	(or base on C<basefileio> if option C<masterio>).
	516	Example :
[21]	517
[45]	518	OAR.151524.stdout -> OAR.151524-JOB_NAME.stdout
[21]	519
[32]	520	where 151524 here is the master C<OAR_JOB_ID>
[45]	521	and C<JOB_NAME> is the small job name.
[21]	522
[46]	523	=item B<-m\|--masterio basefileio>
[32]	524
[46]	525	The C<basefileio> will be use in place of environment variable
	526	C<OAR_STDOUT> and C<OAR_STDERR> (without extension) to build the base name of the small job standart output
[117]	527	(only use when option C<switchio> is activated).
[32]	528
[78]	529	=item B<-k\|--kill signal>
	530
	531	Signal to listen and make a clean stop of the current C<oar-parexec> process.
[118]	532	By default, use USR2 signal (see C<kill -l> for a list of possible signal).
[78]	533
	534	=item B<-t\|--transmit>
	535
	536	Resend catch signal to sub-job when receiving it.
	537	By default, no signal is transmis to child process.
	538
	539	It's only valuable if use for long sub-job than can
	540	in return make themselves a clean restart.
	541
	542
[32]	543	=item B<-h\|--help>
	544
	545	=back
	546
	547
	548	=head1 EXAMPLE
	549
[44]	550	=head2 Simple list of sequential job
	551
[47]	552	Content for the job file command (option C<--file>) could have:
[21]	553
[13]	554	- empty line
	555	- comment line begin with #
[86]	556	- valid shell command (can containt comment)
[13]	557
	558	Example where F<$HOME/test/subjob1.sh> is a shell script (executable).
	559
[86]	560	$HOME/test/subjob01.sh # name=subjob01
	561	$HOME/test/subjob02.sh # name=subjob02
	562	$HOME/test/subjob03.sh # name=subjob03
	563	$HOME/test/subjob04.sh # name=subjob04
[32]	564	...
[86]	565	$HOME/test/subjob38.sh # name=subjob38
	566	$HOME/test/subjob39.sh # name=subjob39
	567	$HOME/test/subjob40.sh # name=subjob40
[13]	568
[44]	569	These jobs could be launch by:
[13]	570
[49]	571	oarsub -n test -l /core=6,walltime=04:00:00 \
	572	"oar-parexec -f ./subjob.list.txt"
[13]	573
[47]	574	=head2 Folder job
	575
	576	In a folder F<subjob.d>, create sub-folder with your data inside : F<test1>, <test2>...
	577	The same command will be executed in every sub-folder.
	578	C<oar-parexec> change the current directory to the sub-folder before launching it.
	579
	580	A very simple job could be:
	581
[49]	582	oarsub -n test -l /core=6,walltime=04:00:00 \
	583	"oar-parexec -d ./subjob.d -c 'sleep 10; env'"
[47]	584
	585	The command C<env> will be excuted in all folder F<test1>, F<test2>... after a 10s pause.
	586
	587	Sometime, it's simpler to use file list command,
	588	sometime, jobs by folder with the same command run is more relevant.
	589
[44]	590	=head2 Parallel job
[28]	591
[44]	592	You need to put the number of core each small job need with option C<--jobnp>.
	593	If your job is build on OpenMP or MPI,
	594	you can use OAR_NP and OAR_NODE_FILE variables to configure them.
	595	On OAR cluster, you need to use C<oarsh> or a wrapper like C<oar-envsh>
	596	for connexion between node instead of C<ssh>.
	597
	598	Example with parallel small job on 2 core:
	599
[49]	600	oarsub -n test -l /core=6,walltime=04:00:00 \
	601	"oar-parexec -j 2 -f ./subjob.list.txt"
[44]	602
	603	=head2 Tracing and master crash
	604
	605	If the master node crash after hours of calculus, everything is lost ?
	606	No, with option C<--logtrace>,
	607	it's possible to remember older result
	608	and not re-run these job the second and next time.
	609
[49]	610	oarsub -n test -l /core=6,walltime=04:00:00 \
	611	"oar-parexec -f ./subjob.list.txt -l ./subjob.list.log"
[44]	612
	613	After a crash or an C<oardel> command,
	614	you can then re-run the same command that will end to execute the jobs in the list
	615
[49]	616	oarsub -n test -l /core=6,walltime=04:00:00 \
	617	"oar-parexec -f ./subjob.list.txt -l ./subjob.list.log"
[44]	618
	619	C<logtrace> file are just plain file.
	620	We use the extension '.log' because these files are automatically
	621	eliminate from our backup system!
	622
	623	=head2 Checkpointing and Idempotent
	624
	625	C<oar-parexec> is compatible with the OAR checkpointing.
[89]	626	If you have 2000 small jobs that need 55h to be done on 6 cores,
[44]	627	you can cut this in small parts.
	628
	629	For this example, we suppose that each small job need about 10min...
	630	So, we send a checkpoint 12min before the end of the process
	631	to let C<oar-parexec> finish the jobs started.
	632	After being checkpointed, C<oar-parexec> do not start any new small job.
	633
[49]	634	oarsub -t idempotent -n test \
	635	-l /core=6,walltime=04:00:00 \
	636	--checkpoint 720 \
[44]	637	"oar-parexec -f ./subjob.list.txt -l ./subjob.list.log"
	638
	639	After 3h48min, the OAR job will begin to stop launching new small job.
	640	When all running small job are finished, it's exit.
	641	But as the OAR job is type C<idempotent>,
	642	OAR will re-submit it as long as all small job are not executed...
	643
	644	This way, we let other users a chance to use the cluster!
	645
	646	In this last exemple, we use moldable OAR job with idempotent
	647	to reserve many core for a small time or a few cores for a long time:
	648
	649	oarsub -t idempotent -n test \
	650	-l /core=50,walltime=01:05:00 \
	651	-l /core=6,walltime=04:00:00 \
	652	--checkpoint 720 \
	653	"oar-parexec -f ./subjob.list.txt -l ./subjob.list.log"
	654
[78]	655	=head2 Signal, recurse and long job
[44]	656
[78]	657	By default, OAR use signal USR2 for checkpointing.
[79]	658	It's possible to change this with option C<--kill>.
[78]	659
	660	When use with long small job, checkpointing could be too long...
[79]	661	More than walltime!
	662	The option C<--transmit> could be use to checkpoint small job!
	663	These long small job will then stop cleanly and will be restarted next time.
[78]	664
	665	In the C<logtrace> file, small job will have the status suspend.
[79]	666	They will be launch with the same command line at the next OAR run.
[78]	667
[89]	668	Example: if you have 50 small jobs that each need 72h to be done on 1 cores,
	669	you can cut this in 24h parts.
	670
	671	For this example, we suppose that each long job loop need about 20min...
	672	So, we send a checkpoint 30min before the end of the process
	673	to let C<oar-parexec> suspend the jobs started.
	674	After being checkpointed, C<oar-parexec> do not start any new small job.
	675
	676	oarsub -t idempotent -n test \
	677	-l /core=6,walltime=24:00:00 \
	678	--checkpoint 1800 \
	679	--transmit \
	680	"oar-parexec -f ./subjob.list.txt -l ./subjob.list.log"
	681
	682	After 23h30min, the OAR job will begin to stop launching new small job.
	683	When all running small job are suspend, it's exit.
	684	But as the OAR job is type C<idempotent>,
	685	OAR will re-submit it as long as all small job are not finished...
	686
[121]	687	=head2 Log format
	688
	689	=over
	690
	691	=item B<Version 2>
	692
	693	log version 2
	694	start subjob 1 pid 101468 at 1450482228 oarjob 71725 onnode cl7n001
	695	end subjob 1 pid 101468 at 1450482556 oarjob 71725 onnode cl7n001 duration 657 status 0
	696	error subjob 1 pid 101468 at 1450482556 oarjob 71725 onnode cl7n001 duration 657 status 0
	697	suspend subjob 1 pid 101468 at 1450482556 oarjob 71725 onnode cl7n001 duration 657 status 0
	698
	699	=item B<Version 1>
	700
	701	log version 1
	702	start job 1 / 101468 at 1450482228 oar job 71725 on node cl7n001
	703	end job 1 / 101468 at 1450482556 oar job 71725 on node cl7n001
	704
	705	=back
	706
[21]	707	=head1 SEE ALSO
	708
[44]	709	oar-dispatch, mpilauncher,
	710	orsh, oar-envsh, ssh
[21]	711
	712
[13]	713	=head1 AUTHORS
	714
[21]	715	Written by Gabriel Moreau, Grenoble - France
[13]	716
[21]	717
	718	=head1 LICENSE AND COPYRIGHT
	719
	720	GPL version 2 or later and Perl equivalent
	721
[121]	722	Copyright (C) 2011-2017 Gabriel Moreau / LEGI - CNRS UMR 5519 - France

Note: See TracBrowser for help on using the repository browser.

Download in other formats: