The attachment this time.
Steve
On Fri, 14 Mar 2003, Steve Traylen wrote:
> Presently the working directory for jobs where the sandbox
> is fetched to is in /home/wpsix001
>
> Since it is the pbs job that does this fetching this
> can be done on the WNs local disk instead.
>
> Obviously this makes a lot of sense if all your nodes are
> identical so you have just as much space on the WNs to
> share amongst 1 or 2 jobs rather than every user.
>
> Also network trafic is reduced. Since doing this here the
> CE's harddisk light is no longer on a constant red.
>
> You problably do not want to do this if you are short
> of space on the WN's disk of course.
>
> The file to be replaced is
> /opt/globus/libexec/globus-script-pbs-submit
> on the CE.
>
> This is the script that builds a wrapper script for the incoming
> job and pipes that into PBS.
>
> I have attached the replacement file.
> The extra lines in there are.
>
> #SMT next line
> grami_directory=/tmp/$USER$grami_directory
>
> and
>
> #SMT next 3 lines
> echo "if [ ! -d $grami_directory ]; then" >> $PBS_JOB_SCRIPT
> echo " /bin/mkdir -p $grami_directory" >> $PBS_JOB_SCRIPT
> echo "fi" >> $PBS_JOB_SCRIPT
>
> The default tmpwatch is for /tmp is 240 hours and
> that must not be shorter than your wall_max_times for
> any queue.
>
> Steve
>
>
>
>
>
>
> Steve
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
> --
> Steve Traylen
> [log in to unmask]
> http://www.gridpp.ac.uk/
>
--
Steve Traylen
[log in to unmask]
http://www.gridpp.ac.uk/
#! /bin/sh -f
#
# Globus Job Manager PBS interface script for submitting a job
#
# This script builds a shell job script which is supplied as input
# to the PBS qsub command. The script is built based on information
# obtained from a file passed as the script's argument. This file
# contains a list of environment variables which are set by way
# of "sourcing" the file from this script. The evironment variables
# set as a result of this action are then used to characterize the
# user's job request. Once the job script has been submitted the
# PBS job id is appended to the file passed as an argument to this
# script to be used by other scripts at a later time.
# The temporary job script is created for the submission and then removed
# at the end of this script.
. ${GLOBUS_LOCATION}/libexec/globus-script-initializer
globus_source ${libexecdir}/globus-gram-protocol-constants.sh
globus_source ${libexecdir}/globus-sh-tools.sh
globus_source ${libexecdir}/globus-gram-job-manager-tools.sh
qsub=${GLOBUS_GRAM_JOB_MANAGER_QSUB-qsub}
qstat=${GLOBUS_GRAM_JOB_MANAGER_QSTAT-qstat}
awk=${GLOBUS_SH_AWK-awk}
mpirun=${GLOBUS_GRAM_JOB_MANAGER_MPIRUN-mpirun}
rm=${GLOBUS_SH_RM-rm}
# File name to be used for temporary job script
PBS_JOB_SCRIPT="${local_tmpdir}/pbs_job_script."$$
PBS_JOB_OUT="${PBS_JOB_SCRIPT}.out"
PBS_JOB_ERR="${PBS_JOB_SCRIPT}.err"
is_cluster=false
arg_file=$1
# M&E Save content of arg_file for future reference
MaEdate=`date +%y%m%d%H%M%S`
MandElog=/tmp/globus-script-pbs-submit.log.$MaEdate.$$
echo ============ $arg_file ============ > $MandElog
cat $arg_file >> $MandElog
# Check for the argument file. If it does not exist
# then return with an error immediately
if [ ! -f $arg_file ] ; then
echo GRAM_SCRIPT_ERROR:$GLOBUS_GRAM_PROTOCOL_ERROR_BAD_SCRIPT_ARG_FILE
exit 1
fi
# Source the argument file to set environment variables
# defining all the job arguments
. $arg_file
# M&E Change grami_directory to a local dir (/tmp for the moment)
# This will be replaced with a localization script
grami_directory=/tmp$grami_directory
echo === grami_directory is now $grami_directory >> $MandElog
# If a logfile name has been defined then activate debug mode
if [ $grami_logfile = "/dev/null" ] ; then
DEBUG_ECHO=:
else
DEBUG_ECHO=echo
fi
# Write initial debug information to logfile
$DEBUG_ECHO in gram_script_pbs_submit >> $grami_logfile
$DEBUG_ECHO "" >> $grami_logfile
$DEBUG_ECHO ============================================ >> $grami_logfile
$DEBUG_ECHO "JM_SCRIPT: ====argument file contents====" >> $grami_logfile
if [ "$DEBUG_ECHO" = "echo" ] ; then
cat $arg_file >> $grami_logfile
fi
$DEBUG_ECHO "JM_SCRIPT: ====argument file contents====" >> $grami_logfile
$DEBUG_ECHO ""
# The following several lines of code can be used to perform 2
# additional error checks prior to job submission. The first check is
# for the existance of the directory which the user requested
# be the working directory. If it does not exist the script
# returns an error and the job is not submitted. The second check
# is for existance of the file requested by the user to be used for
# stdin. If the file does not exist the scripts returns an error and
# the job is not submitted.
#
# These checks are only valid if performed on the file system to be used
# by the host on which the job will run. But, this file system may not
# be shared with host from which the job is submitted. Therefore, the
# check does not make sense. If however the host from which the job
# will be submitted (i.e. the host running the globus gatekeeper)
# shares file systems with all the hosts which may potentially
# run the job these checks can be used. In order to have the job
# manager perform these checks the following 2 sections of code
# should *not* be commented out.
# Check for existance of directory
# Check for non supported parameters here. That is, if any of the RSL parameters
# which PBS can not support have been requested return an error.
$DEBUG_ECHO testing for unsupported parameters >> $grami_logfile
$DEBUG_ECHO testing for queue attribute specification >> $grami_logfile
if [ ! -z "${grami_queue}" ]; then
$DEBUG_ECHO testing for existance of PBS queue [$grami_queue] >> $grami_logfile
status=`${qstat} -Q $grami_queue`
if (test "$?" -eq "0") then
$DEBUG_ECHO PBS queue [$grami_queue] found >> $grami_logfile
else
$DEBUG_ECHO PBS queue [$grami_queue] DOES NOT exist; exiting with exit code 1 >> $grami_logfile
echo GRAM_SCRIPT_ERROR:$GLOBUS_GRAM_PROTOCOL_ERROR_INVALID_QUEUE
exit 1
fi
else
$DEBUG_ECHO no queue attribute specified >> $grami_logfile
fi
# Determine job request type
# 4 jobtypes exist pbs jobtype
# ----------------- -------------------
# jobtype 0 = mpi -----> mpi
# jobtype 1 = single -----> single
# jobtype 2 = multiple -----> multiple
# jobtype 3 = condor -----> ERROR
$DEBUG_ECHO "JM_SCRIPT: testing jobtype" >> $grami_logfile
if [ $grami_job_type = "0" ] ; then
pbs_jobtype="mpi"
elif [ $grami_job_type = "1" ] ; then
pbs_jobtype="single"
elif [ $grami_job_type = "2" ] ; then
pbs_jobtype="multiple"
elif [ $grami_job_type = "3" ] ; then
$DEBUG_ECHO "JM_SCRIPT: ERROR: jobtype parameter not supported "
>> $grami_logfile
echo "GRAM_SCRIPT_ERROR:$GLOBUS_GRAM_PROTOCOL_ERROR_JOBTYPE_NOT_SUPPORTED"
exit 1
else
$DEBUG_ECHO "JM_SCRIPT: error: invalid jobtype parameter" >> $grami_logfile
echo GRAM_SCRIPT_ERROR:$GLOBUS_GRAM_PROTOCOL_ERROR_INVALID_JOBTYPE
exit 1
fi
# Determining per process cpu time limit
# This uses max_cpu_time, or if max_cpu_time is not set, it will use
# max_time, which will be deprecated in 1.2
$DEBUG_ECHO testing for per process cpu time limit >> $grami_logfile
if [ $grami_max_cpu_time -eq 0 ] ; then
# if not set, try max_time which will be deprecated in 1.2
if [ $grami_max_time -eq 0 ] ; then
cpu_time=0
$DEBUG_ECHO No per process cpu time specified, using [queue default] per process cpu time >> $grami_logfile
else
cpu_time="$grami_max_time"
$DEBUG_ECHO using max_time, [$cpu_time] minutes, for max per process cpu time >> $grami_logfile
fi
else
cpu_time="$grami_max_cpu_time"
$DEBUG_ECHO using max_cpu_time, [$cpu_time] minutes, for max per process cpu time >> $grami_logfile
fi
# Determining process wall time limit (since processes all run at the same
# time, the wall time should be the time needed for the longest process to
# complete)
$DEBUG_ECHO testing for process wall time limit >> $grami_logfile
if [ $grami_max_wall_time -eq 0 ] ; then
wall_time=0
$DEBUG_ECHO No process wall time specified, using [queue default] process wall time >> $grami_logfile
else
wall_time="$grami_max_wall_time"
$DEBUG_ECHO using [$wall_time] minutes for max wall time >> $grami_logfile
fi
# Start building job script
$DEBUG_ECHO starting to build PBS job script >> $grami_logfile
echo "# PBS batch job script built by Globus job manager" > $PBS_JOB_SCRIPT
echo "" >> $PBS_JOB_SCRIPT
if [ ! -z "${grami_queue}" ] ; then
echo "#PBS -q $grami_queue" >> $PBS_JOB_SCRIPT
fi
if [ ! -z "${grami_project}" ] ; then
echo "#PBS -A $grami_project" >> $PBS_JOB_SCRIPT
fi
if [ $cpu_time -ne 0 ] ; then
if [ $pbs_jobtype = "multiple" ] ; then
total_cput=`expr "$cpu_time" \* "$grami_count"`
else
total_cput=$cpu_time
fi
echo "#PBS -l pcput=$cpu_time:00" >> $PBS_JOB_SCRIPT
echo "#PBS -l cput=$total_cput:00" >> $PBS_JOB_SCRIPT
fi
if [ $wall_time -ne 0 ] ; then
echo "#PBS -l walltime=$wall_time:00" >> $PBS_JOB_SCRIPT
fi
# User must specify per executable memory. If job type is multiple
# we run multiple instances of the executable an need to multiply
# memory size by nprocs.
if [ "${grami_max_memory}" -ne 0 ] ; then
if [ $pbs_jobtype = "multiple" ] ; then
max_memory=`expr "$grami_max_memory" \* "$grami_count"`
else
max_memory=$grami_max_memory
fi
echo "#PBS -l mem=${max_memory}mb" >> $PBS_JOB_SCRIPT
fi
echo "#PBS -o $grami_stdout" >> $PBS_JOB_SCRIPT
echo "#PBS -e $grami_stderr" >> $PBS_JOB_SCRIPT
echo "#PBS -l ncpus=$grami_count" >> $PBS_JOB_SCRIPT
# Uncomment the following lines if your pbs allows node count specification
#if [ $grami_host_count -ne 0 ] ; then
# echo "#PBS -l nodes=$grami_host_count" >> $PBS_JOB_SCRIPT
#fi
# Check for environment variables
#
#loop through all the environment variables. Variables and values are seperate
#arguments. While assembling var/value pairs add the specific syntax
#required for this scheduling system.
#
new_grami_env=""
if [ ! -z "${grami_env}" ] ; then
eval set -- ${grami_env}
x=0
while [ "$#" -ne 0 ]; do
if [ $x = 0 ] ; then
new_grami_env="${new_grami_env}$1"
x=1
else
x=0
new_grami_env="${new_grami_env}=$1,"
fi
shift
done
fi
echo "#PBS -v $new_grami_env" >> $PBS_JOB_SCRIPT
new_grami_args=""
if [ ! -z "${grami_args}" ] ; then
eval set -- ${grami_args}
new_grami_args="$*"
fi
# Determine directory to be used as working directory
echo "" >> $PBS_JOB_SCRIPT
echo "# Changing to directory as requested by user" >> $PBS_JOB_SCRIPT
# M&E Create the local directory if needed
echo "" >> $PBS_JOB_SCRIPT
echo "if [ ! -d $grami_directory ]; then" >> $PBS_JOB_SCRIPT
echo " /bin/mkdir $grami_directory" >> $PBS_JOB_SCRIPT
echo "fi" >> $PBS_JOB_SCRIPT
echo "" >> $PBS_JOB_SCRIPT
echo "cd $grami_directory" >> $PBS_JOB_SCRIPT
# Determining job request type
echo "" >> $PBS_JOB_SCRIPT
echo "# Executing job as requested by user" >> $PBS_JOB_SCRIPT
echo "" >> $PBS_JOB_SCRIPT
if [ $pbs_jobtype = "mpi" ] ; then
if ${is_cluster} ; then
echo "${mpirun} -np $grami_count "\
"-machinefile \$PBS_NODEFILE "\
"$grami_program $new_grami_args "\
< $grami_stdin" >> $PBS_JOB_SCRIPT
else
echo "${mpirun} -np $grami_count "\
"$grami_program $new_grami_args "\
< $grami_stdin" >> $PBS_JOB_SCRIPT
fi
elif [ $pbs_jobtype = "multiple" ] ; then
if ${is_cluster} ; then
echo 'counter=1' >> $PBS_JOB_SCRIPT;
echo 'hosts=`cat $PBS_NODEFILE`' >> $PBS_JOB_SCRIPT;
echo 'for host in $hosts; do' >> $PBS_JOB_SCRIPT;
echo ' /usr/bin/rsh $host "'"cd $grami_directory; $pbs_new_grami_env $grami_program $new_grami_args < $grami_stdin "'"&' >> $PBS_JOB_SCRIPT;
echo ' counter=`expr $counter + 1`' >> $PBS_JOB_SCRIPT;
echo 'done' >> $PBS_JOB_SCRIPT
echo 'wait' >> $PBS_JOB_SCRIPT
else
counter=0
while (test "$counter" -lt "$grami_count")
do
echo "$grami_program $new_grami_args < $grami_stdin &" \
>> $PBS_JOB_SCRIPT
counter=`expr $counter + 1`
done
echo "wait" >> $PBS_JOB_SCRIPT;
fi
else
echo "$grami_program $new_grami_args < $grami_stdin" >> $PBS_JOB_SCRIPT
fi
if ${is_cluster} ; then
echo "#ctrans nfs_in ${grami_directory}" >> $PBS_JOB_SCRIPT
echo "#ctrans nfs_out ${grami_directory}" >> $PBS_JOB_SCRIPT
fi
$DEBUG_ECHO PBS job script successfully built >> $grami_logfile
$DEBUG_ECHO submitting PBS job script >> $grami_logfile
# M&E Save job script for future reference
echo "====== $PBS_JOB_SCRIPT ======" >> $MandElog
cat $PBS_JOB_SCRIPT >> $MandElog
# Execute qsub command
${qsub} < $PBS_JOB_SCRIPT 1>$PBS_JOB_OUT 2>$PBS_JOB_ERR
if (test "$?" -eq "0") then
job_id=`cat $PBS_JOB_OUT | awk '{split($0,field,".");print field[1]"."field[2]}'`
if [ "${job_id}" = "" ] ; then
$DEBUG_ECHO "job *NOT* submitted successfully!" >> $grami_logfile
$DEBUG_ECHO "failed getting the pbs jobid for the job!" >> $grami_logfile
echo "GRAM_SCRIPT_ERROR:$GLOBUS_GRAM_PROTOCOL_ERROR_GETTING_JOBID"
else
echo "grami_job_id=$job_id" >> $arg_file
echo "GRAM_SCRIPT_JOB_ID:$job_id"
$DEBUG_ECHO "job submitted successfully!" >> $grami_logfile
$DEBUG_ECHO "returning job state: 1" >> $grami_logfile
echo "GRAM_SCRIPT_JOB_ID:"$job_id
echo "GRAM_SCRIPT_SUCCESS:"$GLOBUS_GRAM_PROTOCOL_JOB_STATE_PENDING
# Remove temporary job script file
${rm} -f $PBS_JOB_SCRIPT $PBS_JOB_OUT $PBS_JOB_ERR
$DEBUG_ECHO "exiting gram_script_pbs_submit\n\n" >> $grami_logfile
exit 0
fi
else
$DEBUG_ECHO "job *NOT* submitted successfully!" >> $grami_logfile
echo "GRAM_SCRIPT_ERROR:$GLOBUS_GRAM_PROTOCOL_ERROR_JOB_EXECUTION_FAILED"
exit 0
fi
# We got problems if we get this far.
echo "" >> ${grami_stderr}
echo "-------- output from scheduler submission --------" >> ${grami_stderr}
cat ${PBS_JOB_ERR} >> ${grami_stderr}
echo "--------------------------------------------------" >> ${grami_stderr}
echo "" >> ${grami_stderr}
# Remove temporary job script file
${rm} $PBS_JOB_SCRIPT $PBS_JOB_OUT $PBS_JOB_ERR
$DEBUG_ECHO "exiting gram_script_pbs_submit" >> $grami_logfile
$DEBUG_ECHO "" >> $grami_logfile
$DEBUG_ECHO "" >> $grami_logfile
exit 1
|