Piotr Nyczyk wrote:
> Test jobs submitted at 9:10.
>
> Total sites: 59
> Sites OK: 47
>
>
> ------------------------------------------------------------------------
>
> Current test is 2004-07-09_09.10.14
>
> SITE cclcgceli01.in2p3.fr
> SITE ce1-gla.scotgrid.ac.uk
> SITE lcg2-ce.physik.rwth-aachen.de
> JOB SUBMISSION FAILED!!!
> - reason = Cannot read JobWrapper output, both from Condor and from Maradona.
>...
Piotr,
I was looking at a similar problem reported by Claude Charlot (CMS). It
seems that the problem I reported a couple of weeks ago is still there.
The job wrapper generated by the Resource Broker is not correct: it
lacks the host name the Maradona output needs to be transfered to.
While the job actually executes correctly in the worker node, the
ouotput sandbox is not correctly transfered back to the resource broker:
the globus-url-command siply fails due to a syntax error.
At the time of my first report, David Smith had spotted a configuration
problem in the RB (related to the hostname). This time the problem is
observed with two different RBs:
lxn1188.cern.ch (used by dteam monitoring jobs)
lxn1185.cern.ch (used by Claude for his CMS jobs)
Please find attached the job wrapper generated by the RB. The line
number 7 lacks the name of the host. The other file is the standard
error of the job as seen by our LRMS.
What is surprising is that we had not observed problems with these two
RBs in the past. Has something changed with them? Do other sites observe
a similar problem?
Regards,
--
________________________________________________________________________
Fabio Hernandez
Grid Computing Team Leader
IN2P3/CNRS Computing Centre - Lyon (FRANCE)
http://cc.in2p3.fr
Tel. +33 4.78.93.08.80 | Fax. +33 4.72.69.41.70 | e-mail: [log in to unmask]
ERROR: can not parse destURL "gsiftp:///var/edgwl/SandboxDir/Au/https_3a_2f_2flxn1188.cern.ch_3a9000_2fAuzfCB-9ee6RRrxBtZNcpw/Maradona.output"
Syntax: globus-url-copy [-help | -usage] [-version[s]] [-vb] [-dbg] [-b | -a]
[-s <subject>] [-ds <subject>] [-ss <subject>]
[-tcp-bs <size>] [-bs <size>] [-p <parallelism>]
[-notpt] [-nodcau]
sourceURL destURL
Use -help to display full usage
#!/bin/sh
doExit()
{
stat=$1
globus-url-copy "file://${workdir}/${maradona}" "gsiftp:///var/edgwl/SandboxDir/Au/https_3a_2f_2flxn1188.cern.ch_3a9000_2fAuzfCB-9ee6RRrxBtZNcpw/Maradona.output"
cd ..
rm -rf ${newdir}
exit $stat
}
if [ -z "${EDG_WL_LOG_DESTINATION}" ]; then
export EDG_WL_LOG_DESTINATION="cclcgceli01.in2p3.fr"
fi
export EDG_WL_JOBID="https://lxn1188.cern.ch:9000/AuzfCB-9ee6RRrxBtZNcpw"
export EDG_WL_SEQUENCE_CODE="$1"
shift
if [ -z "${EDG_WL_LOCATION}" ]; then
export EDG_WL_LOCATION="${EDG_LOCATION:-/opt/edg}"
fi
newdir=https_3a_2f_2flxn1188.cern.ch_3a9000_2fAuzfCB-9ee6RRrxBtZNcpw
mkdir ${newdir}
cd ${newdir}
if [ ! -w . ]; then
echo "Working directory not writable"
EDG_WL_SEQUENCE_CODE=`$EDG_WL_LOCATION/bin/edg-wl-logev \
--jobid="$EDG_WL_JOBID" \
--source=LRMS \
--sequence="$EDG_WL_SEQUENCE_CODE"\
--event="Done"\
--reason="Working directory not writable!"\
--status_code=FAILED\
--exit_code=0\
|| echo $EDG_WL_SEQUENCE_CODE`
export EDG_WL_SEQUENCE_CODE
exit 1
fi
workdir="`pwd`"
export EDG_WL_RB_BROKERINFO="`pwd`/.BrokerInfo"
maradona=".maradona.https_3a_2f_2flxn1188.cern.ch_3a9000_2fAuzfCB-9ee6RRrxBtZNcpw.output"
touch "${maradona}"
if [ -z "${GLOBUS_LOCATION}" ]; then
echo "GLOBUS_LOCATION undefined"
echo "GLOBUS_LOCATION undefined" >> "${maradona}"
EDG_WL_SEQUENCE_CODE=`$EDG_WL_LOCATION/bin/edg-wl-logev \
--jobid="$EDG_WL_JOBID" \
--source=LRMS \
--sequence="$EDG_WL_SEQUENCE_CODE"\
--event="Done"\
--reason="GLOBUS_LOCATION undefined"\
--status_code=FAILED\
--exit_code=0\
|| echo $EDG_WL_SEQUENCE_CODE`
export EDG_WL_SEQUENCE_CODE
doExit 1
elif [ -r "${GLOBUS_LOCATION}/etc/globus-user-env.sh" ]; then
. ${GLOBUS_LOCATION}/etc/globus-user-env.sh
else
echo "${GLOBUS_LOCATION}/etc/globus-user-env.sh not found or unreadable"
echo "${GLOBUS_LOCATION}/etc/globus-user-env.sh not found or unreadable" >> "${maradona}"
EDG_WL_SEQUENCE_CODE=`$EDG_WL_LOCATION/bin/edg-wl-logev \
--jobid="$EDG_WL_JOBID" \
--source=LRMS \
--sequence="$EDG_WL_SEQUENCE_CODE"\
--event="Done"\
--reason="${GLOBUS_LOCATION}/etc/globus-user-env.sh not found or unreadable"\
--status_code=FAILED\
--exit_code=0\
|| echo $EDG_WL_SEQUENCE_CODE`
export EDG_WL_SEQUENCE_CODE
doExit 1
fi
umask 022
for f in; do
globus-url-copy gsiftp://lxn1188.cern.ch/var/edgwl/SandboxDir/Au/https_3a_2f_2flxn1188.cern.ch_3a9000_2fAuzfCB-9ee6RRrxBtZNcpw/input/${f} file://${workdir}/${f}
if [ $? != 0 ]; then
echo "Cannot download ${f} from gsiftp://lxn1188.cern.ch/var/edgwl/SandboxDir/Au/https_3a_2f_2flxn1188.cern.ch_3a9000_2fAuzfCB-9ee6RRrxBtZNcpw/input/"
echo "Cannot download ${f} from gsiftp://lxn1188.cern.ch/var/edgwl/SandboxDir/Au/https_3a_2f_2flxn1188.cern.ch_3a9000_2fAuzfCB-9ee6RRrxBtZNcpw/input/" >> "${maradona}"
EDG_WL_SEQUENCE_CODE=`$EDG_WL_LOCATION/bin/edg-wl-logev \
--jobid="$EDG_WL_JOBID" \
--source=LRMS \
--sequence="$EDG_WL_SEQUENCE_CODE"\
--event="Done"\
--reason="Cannot download ${f} from gsiftp://lxn1188.cern.ch/var/edgwl/SandboxDir/Au/https_3a_2f_2flxn1188.cern.ch_3a9000_2fAuzfCB-9ee6RRrxBtZNcpw/input/"\
--status_code=FAILED\
--exit_code=0\
|| echo $EDG_WL_SEQUENCE_CODE`
export EDG_WL_SEQUENCE_CODE
doExit 1
fi
done
if [ -e "/bin/echo" ]; then
chmod +x "/bin/echo" 2> /dev/null
else
echo "/bin/echo not found or unreadable"
echo "/bin/echo not found or unreadable" >> "${maradona}"
EDG_WL_SEQUENCE_CODE=`$EDG_WL_LOCATION/bin/edg-wl-logev \
--jobid="$EDG_WL_JOBID" \
--source=LRMS \
--sequence="$EDG_WL_SEQUENCE_CODE"\
--event="Done"\
--reason="/bin/echo not found or unreadable!"\
--status_code=FAILED\
--exit_code=0\
|| echo $EDG_WL_SEQUENCE_CODE`
export EDG_WL_SEQUENCE_CODE
doExit 1
fi
host=`hostname -f`
EDG_WL_SEQUENCE_CODE=`$EDG_WL_LOCATION/bin/edg-wl-logev \
--jobid="$EDG_WL_JOBID" \
--source=LRMS \
--sequence="$EDG_WL_SEQUENCE_CODE"\
--event="Running"\
--node=$host\
|| echo $EDG_WL_SEQUENCE_CODE`
export EDG_WL_SEQUENCE_CODE
"/bin/echo" [OK] $* > "std.out" 2> "std.err"
status=$?
echo "job exit status = ${status}"
echo "job exit status = ${status}" >> "${maradona}"
error=0
for f in "std.out" "std.err"; do
if [ -r "${f}" ]; then
output=`dirname $f`
if [ "x${output}" = "x." ]; then
ff=$f
else
ff=${f##*/}
fi
globus-url-copy file://${workdir}/${f} gsiftp://lxn1188.cern.ch/var/edgwl/SandboxDir/Au/https_3a_2f_2flxn1188.cern.ch_3a9000_2fAuzfCB-9ee6RRrxBtZNcpw/output/${ff}
if [ $? != 0 ]; then
echo "Cannot upload ${f} into gsiftp://lxn1188.cern.ch/var/edgwl/SandboxDir/Au/https_3a_2f_2flxn1188.cern.ch_3a9000_2fAuzfCB-9ee6RRrxBtZNcpw/output/"
echo "Cannot upload ${f} into gsiftp://lxn1188.cern.ch/var/edgwl/SandboxDir/Au/https_3a_2f_2flxn1188.cern.ch_3a9000_2fAuzfCB-9ee6RRrxBtZNcpw/output/" >> "${maradona}"
EDG_WL_SEQUENCE_CODE=`$EDG_WL_LOCATION/bin/edg-wl-logev \
--jobid="$EDG_WL_JOBID" \
--source=LRMS \
--sequence="$EDG_WL_SEQUENCE_CODE"\
--event="Done"\
--reason="Cannot upload ${f} into gsiftp://lxn1188.cern.ch/var/edgwl/SandboxDir/Au/https_3a_2f_2flxn1188.cern.ch_3a9000_2fAuzfCB-9ee6RRrxBtZNcpw/output/"\
--status_code=FAILED\
--exit_code=0\
|| echo $EDG_WL_SEQUENCE_CODE`
export EDG_WL_SEQUENCE_CODE
doExit 1
fi
fi
done
EDG_WL_SEQUENCE_CODE=`$EDG_WL_LOCATION/bin/edg-wl-logev \
--jobid="$EDG_WL_JOBID" \
--source=LRMS \
--sequence="$EDG_WL_SEQUENCE_CODE"\
--event="Done"\
--status_code=OK\
--exit_code=$status\
|| echo $EDG_WL_SEQUENCE_CODE`
export EDG_WL_SEQUENCE_CODE
doExit 0
|