This showcase describes several usage scenarios for submitting, monitoring and controlling jobs using the GridWay metascheduler. It shows how to use the GridWay CLI, DRMAA API, and remote BES interface, as well as an alternative way of monitoring jobs and resources through a Web interface.
$ grid-proxy-init
$ cat jt EXECUTABLE = /bin/ls
$ gwsubmit -t jt
$ gwhost HID PRI OS ARCH MHZ %CPU MEM(F/T) DISK(F/T) NODES(U/F/T) LRMS HOSTNAME 0 1 Linux2.6.32.27- x86_64 2533 400 2007/2007 71G/71G 0/2/2 jobmanager-sge gt5-ige.drg.lrz.de 1 1 Linux2.6.18-238 x86_64 1995 600 2007/2007 927G/927G 0/44/44 jobmanager-pbs udo-gt01.grid.tu-dortmund.de 2 1 Linux2.6.18-194 x86_64 2993 800 2011/2011 3670M/3670M 0/8/8 jobmanager-fork ve.nikhef.nl 3 1 Linux2.6.18-194 x86_64 1600 100 1024/1024 40G/40G 0/1/1 jobmanager-fork gt1.epcc.ed.ac.uk 4 1 Linux2.6.18-194 x86_64 2328 400 512/512 95G/95G 0/1/1 jobmanager-pbs gt01.ige.psnc.pl 5 1 Linux2.6.32-36 x86_64 2000 400 2009/2009 4250G/4250G 0/4/4 jobmanager-fork gt-ige.utcluj.ro
$ gwhost 0 HID PRI OS ARCH MHZ %CPU MEM(F/T) DISK(F/T) NODES(U/F/T) LRMS HOSTNAME 0 1 Linux2.6.32.27- x86_64 2533 400 2007/2007 71G/71G 0/2/2 jobmanager-sge gt5-ige.drg.lrz.de QUEUENAME SL(F/T) WALLT CPUT COUNT MAXR MAXQ STATUS DISPATCH PRIORITY all.q 2/2 0 0 2 2 2 1 Batch Inte
$ gwhost -m 0 HID QNAME RANK PRI SLOTS HOSTNAME 0 all.q 0 1 2 gt5-ige.drg.lrz.de 1 dgiseq 0 1 44 udo-gt01.grid.tu-dortmund.de 1 dgipar 0 1 44 udo-gt01.grid.tu-dortmund.de 2 default 0 1 8 ve.nikhef.nl 3 default 0 1 1 gt1.epcc.ed.ac.uk 4 globus 0 1 1 gt01.ige.psnc.pl 5 batch 0 1 4 gt-ige.utcluj.ro
$ gwps USER JID DM EM START END EXEC XFER EXIT NAME HOST user:0 0 done ---- 10:38:24 10:39:04 0:00:21 0:00:08 0 jt gt5-ige.drg.lrz.de/jobmanager-sge user:0 1 done ---- 10:46:05 10:46:39 0:00:11 0:00:08 0 jt udo-gt01.grid.tu-dortmund.de/jobmanager-pbs user:0 2 wrap actv 10:48:39 --:--:-- 0:00:39 0:00:03 -- jt gt5-ige.drg.lrz.de/jobmanager-sge
$ gwhistory 0 HID START END PROLOG WRAPPER EPILOG MIGR REASON QUEUE HOST 0 10:38:35 10:39:04 0:00:03 0:00:21 0:00:05 0:00:00 ---- all.q gt5-ige.drg.lrz.de/jobmanager-sge
$ ls -l total 8 -rw-r--r-- 1 user user 21 2011-05-04 16:47 jt -rw-r--r-- 1 user user 0 2011-05-05 10:39 stderr.0 -rw-r--r-- 1 user user 72 2011-05-05 10:39 stdout.0 $ cat stdout.0 job.env stderr.execution stderr.wrapper stdout.execution stdout.wrapper
$ gcc -O3 pi.c -o pi
EXECUTABLE = piARGUMENTS = $(TASK_ID) $(TOTAL_TASKS) 100000STDOUT_FILE = stdout_file.$(TASK_ID)STDERR_FILE = stderr_file.$(TASK_ID)RANK = CPU_MHZ
$ gwsubmit -v -t pi.jt -n 4 ARRAY ID: 0 TASK JOB 0 3 1 4 2 5 3 6
$ gwwait -v -A 0 0 : 0 1 : 0 2 : 0 3 : 0
stdout_file.0 stdout_file.1 stdout_file.2 stdout_file.3
$ awk 'BEGIN {sum=0} {sum+=$1} END {printf "Pi is %0.12g\n", sum}' stdout_file.* Pi is 3.1415926536
int done = 0, n, myid, numprocs, i; double PI25DT = 3.141592653589793238462643; double mypi, pi, h, sum, x; double startwtime = 0.0, endwtime; int namelen; char processor_name[MPI_MAX_PROCESSOR_NAME]; MPI_Init(&argc,&argv); MPI_Comm_size(MPI_COMM_WORLD,&numprocs);
MPI_Get_processor_name(processor_name,&namelen); printf("Process %d on %s\n", myid, processor_name); n = 100000000; startwtime = MPI_Wtime(); h = 1.0 / (double) n; sum = 0.0; for (i = myid + 1; i <= n; i += numprocs) { x = h * ((double)i - 0.5); sum += 4.0 / (1.0 + x*x); } mypi = h * sum;
if (myid == 0) {
}
return 0;
$ mpicc -O3 mpi.c -o mpi
EXECUTABLE= mpiSTDOUT_FILE = stdout.${JOB_ID}STDERR_FILE = stderr.${JOB_ID}RANK = CPU_MHZTYPE = "mpi"NP = 2
$ gwsubmit -t mpi.jt
$ cat A.jt EXECUTABLE=/bin/echo ARGUMENTS="$RANDOM" STDOUT_FILE=out.A $ cat B.jt EXECUTABLE=sum ARGUMENTS=out.A 1 INPUT_FILES=out.A STDOUT_FILE=out.B $ cat C.jt EXECUTABLE=sum ARGUMENTS=out.A 1 INPUT_FILES=out.A STDOUT_FILE=out.C $ cat D.jt EXECUTABLE=sum ARGUMENTS=out.B out.C INPUT_FILES=out.B, out.C STDOUT_FILE=out.D
$ gwsubmit -v -t A.jt JOB ID: 5
$ gwsubmit -v -t B.jt -d "5" JOB ID: 6 $ gwsubmit -v -t C.jt -d "5" JOB ID: 7
$ gwsubmit -t D.jt -d "6 7"
#include <stdio.h>#include <string.h> drmaa_job_template_t *jt; drmaa_attr_values_t *rusage; char error[DRMAA_ERROR_STRING_BUFFER]; char job_id[DRMAA_JOBNAME_BUFFER];char job_id_out[DRMAA_JOBNAME_BUFFER]; char attr_value[DRMAA_ATTR_BUFFER]; const char *args[3] = {"-l", "-a", NULL};int stat; /* ---- Init DRMAA ---- */ drmaa_init (NULL, error, DRMAA_ERROR_STRING_BUFFER-1);/* ---- Create the job template ---- *//* ---- Run a single job ---- */ /* ---- Wait until job execution ends ---- */ /* ---- Get values from DRMAA string vector (start time, end time, etc.) ---- */ while (drmaa_get_next_attr_value(rusage, attr_value, DRMAA_ATTR_BUFFER-1) != DRMAA_ERRNO_NO_MORE_ELEMENTS) /* ---- Destroy objects ---- */ drmaa_release_attr_values(rusage);
$ gcc -L $GW_LOCATION/lib -I $GW_LOCATION/include -o ls_drmaa ls_drmaa.c -ldrmaa
$ ./ls_drmaa
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include "drmaa2.h"
#include <unistd.h>
int main(int argc, char *argv[])
{
char cwd[80];
drmaa2_string jid;
drmaa2_jstate jstate;
drmaa2_string substate = (char*) malloc(50);
drmaa2_string statestr;
printf("==== Create a job session with given session name.\n");
drmaa2_jsession js = drmaa2_create_jsession("mysession", NULL);
printf("==== Creating the job template.\n");
drmaa2_jtemplate jt = drmaa2_jtemplate_create();
jt->jobName = strdup("ht");
jt->remoteCommand = strdup("/bin/ls");
getcwd(cwd, DRMAA2_ATTR_BUFFER);
jt->workingDirectory = strdup(cwd);
jt->args=drmaa2_list_create(DRMAA2_STRINGLIST,DRMAA2_UNSET_CALLBACK);
drmaa2_list_add(jt->args,"-l");
drmaa2_list_add(jt->args,"-a");
drmaa2_list_add(jt->args,"/tmp");
jt->outputPath=strdup("stdout."DRMAA2_GW_JOB_ID);
jt->errorPath =strdup("stderr."DRMAA2_GW_JOB_ID);
printf("==== Submitting the job.\n");
drmaa2_j job = drmaa2_jsession_run_job(js, jt);
jid = drmaa2_j_get_id(job);
drmaa2_jinfo jinfo = (drmaa2_jinfo) malloc(sizeof(drmaa2_jinfo_s));
jstate = drmaa2_j_get_state(job,&substate);
statestr = drmaa2_gw_strstatus(jstate);
printf(" Job %s released.\n", jid);
printf(" Job DRMAA2 state is: %s\n", statestr);
printf(" Job Gridway substate is: %s\n",substate);
printf(" Wait for job %s to finish.\n", jid);
drmaa2_j_wait_terminated(job, DRMAA2_INFINITE_TIME);
jinfo = drmaa2_j_get_info(job);
printf(" Info about the job %s\n", jid);
printf("\tjob->jobId=%s\n", jinfo->jobId);
printf("\tjob->exitStatus=%d\n", jinfo->exitStatus);
printf("\tjob->queueName=%s\n", jinfo->queueName);
printf("\tjob->wallclockTime=%lld\n", (long long)jinfo->wallclockTime);
printf("\tjob->cpuTime=%lld\n", jinfo->cpuTime);
printf("\tjob->submissionTime=%lld\n", (long long)jinfo->submissionTime);
printf("\tjob->dispatchTime=%lld\n", (long long)jinfo->dispatchTime);
printf("\tjob->finishTime=%lld\n", (long long)jinfo->finishTime);
drmaa2_jinfo_free(&jinfo);
printf("==== Destroying job template and job session.\n");
drmaa2_jtemplate_free(&jt);
drmaa2_destroy_jsession("mysession");
drmaa2_jsession_free(&js);
printf("==== Exiting now.\n");
return 0;
}
$ gcc -L $GW_LOCATION/lib -I $GW_LOCATION/include -o ls_drmaa2 ls_drmaa2.c -ldrmaa2
$ ./ls_drmaa2
<jsdl:JobDefinition xmlns:jsdl="http://schemas.ggf.org/jsdl/2005/11/jsdl" xmlns:jsdl-posix="http://schemas.ggf.org/jsdl/2005/11/jsdl-posix" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"> <jsdl:JobDescription> <jsdl:JobIdentification> <jsdl-posix:Error>stderr</jsdl-posix:Error> </jsdl-posix:POSIXApplication> </jsdl:Application> <jsdl:DataStaging> </jsdl:Target> </jsdl:DataStaging> <jsdl:DataStaging></jsdl:DataStaging> </jsdl:JobDescription></jsdl:JobDefinition>
$ ./gridsam.sh BESCreateActivity -s "https://gridway.fdi.ucm.es:8443/gridsam/services/bes" -j ls.jsdl > $ cat job_id
$ watch -n 5 ./gridsam.sh BESGetActivityStatuses -s "https://gridway.fdi.ucm.es:8443/gridsam/services/bes" -file job_id
|


