Hi,
I've not seen this for a while where all jobs submitted to RB
remain in the "Waiting" state for ever.
It had apparently gone a away with recent version of resource broker
code.
I've restarted every service there making sure they really are dead but
have had no joy.
This is the LCG2_3_0 Resource Broker on SL3.
A typical job as below.
Steve
**********************************************************************
LOGGING INFORMATION:
Printing info for the Job : https://lcgrb01.gridpp.rl.ac.uk:9000/sReIPimKBNG7CtPkC9woxA
---
Event: RegJob
- host = lcgui01.gridpp.rl.ac.uk
- level = SYSTEM
- ns = lcgrb01.gridpp.rl.ac.uk:7772
- nsubjobs = 0
- priority = asynchronous
- seed = uLU0BArrdV98O41PLThJ5Q
- seqcode = UI=000001:NS=0000000000:WM=000000:BH=0000000000:JSS=000000:LM=000000:LRMS=000000:APP=000000
- source = UserInterface
- timestamp = Fri Mar 4 11:39:20 2005
- user = /C=UK/O=eScience/OU=CLRC/L=RAL/CN=steve traylen
- jdl =
[
requirements = ( RegExp(".*ac.uk.*",other.GlueCEUniqueID) ) && ( other.GlueCEStateStatus == "Production" );
RetryCount = 0;
edg_jobid = "https://lcgrb01.gridpp.rl.ac.uk:9000/sReIPimKBNG7CtPkC9woxA";
MyProxyServer = "lcgrbp01.gridpp.rl.ac.uk";
JobType = "normal";
Executable = "/bin/pwd";
StdOutput = "hello.out";
OutputSandbox = { "hello.out","hello.err" };
VirtualOrganisation = "dteam";
rank = -other.GlueCEStateEstimatedResponseTime;
Type = "job";
StdError = "hello.err";
DefaultRank = -other.GlueCEStateEstimatedResponseTime
]
---
Event: Transfer
- dest_host = lcgrb01.gridpp.rl.ac.uk
- dest_instance = lcgrb01.gridpp.rl.ac.uk:7772
- destination = NetworkServer
- host = lcgui01.gridpp.rl.ac.uk
- level = SYSTEM
- priority = asynchronous
- result = START
- seqcode = UI=000002:NS=0000000000:WM=000000:BH=0000000000:JSS=000000:LM=000000:LRMS=000000:APP=000000
- source = UserInterface
- timestamp = Fri Mar 4 11:39:21 2005
- user = /C=UK/O=eScience/OU=CLRC/L=RAL/CN=steve traylen
- job =
[
requirements = ( RegExp(".*ac.uk.*",other.GlueCEUniqueID) ) && ( other.GlueCEStateStatus == "Production" );
RetryCount = 0;
edg_jobid = "https://lcgrb01.gridpp.rl.ac.uk:9000/sReIPimKBNG7CtPkC9woxA";
MyProxyServer = "lcgrbp01.gridpp.rl.ac.uk";
JobType = "normal";
Executable = "/bin/pwd";
StdOutput = "hello.out";
OutputSandbox = { "hello.out","hello.err" };
VirtualOrganisation = "dteam";
rank = -other.GlueCEStateEstimatedResponseTime;
Type = "job";
StdError = "hello.err";
DefaultRank = -other.GlueCEStateEstimatedResponseTime
]
---
Event: Transfer
- dest_host = lcgrb01.gridpp.rl.ac.uk
- dest_instance = lcgrb01.gridpp.rl.ac.uk:7772
- destination = NetworkServer
- host = lcgui01.gridpp.rl.ac.uk
- level = SYSTEM
- priority = asynchronous
- result = OK
- seqcode = UI=000003:NS=0000000000:WM=000000:BH=0000000000:JSS=000000:LM=000000:LRMS=000000:APP=000000
- source = UserInterface
- timestamp = Fri Mar 4 11:39:23 2005
- user = /C=UK/O=eScience/OU=CLRC/L=RAL/CN=steve traylen
- job =
[
requirements = ( RegExp(".*ac.uk.*",other.GlueCEUniqueID) ) && ( other.GlueCEStateStatus == "Production" );
RetryCount = 0;
edg_jobid = "https://lcgrb01.gridpp.rl.ac.uk:9000/sReIPimKBNG7CtPkC9woxA";
MyProxyServer = "lcgrbp01.gridpp.rl.ac.uk";
JobType = "normal";
Executable = "/bin/pwd";
StdOutput = "hello.out";
OutputSandbox = { "hello.out","hello.err" };
LB_sequence_code = "UI=000003:NS=0000000000:WM=000000:BH=0000000000:JSS=000000:LM=000000:LRMS=000000:APP=000000";
VirtualOrganisation = "dteam";
rank = -other.GlueCEStateEstimatedResponseTime;
Type = "job";
StdError = "hello.err";
DefaultRank = -other.GlueCEStateEstimatedResponseTime
]
---
Event: Accepted
- from = UserInterface
- from_host = lcgrb01.gridpp.rl.ac.uk
- host = lcgrb01.gridpp.rl.ac.uk
- level = SYSTEM
- priority = asynchronous
- seqcode = UI=000003:NS=0000000001:WM=000000:BH=0000000000:JSS=000000:LM=000000:LRMS=000000:APP=000000
- source = NetworkServer
- src_instance = 7772
- timestamp = Fri Mar 4 11:39:23 2005
- user = /C=UK/O=eScience/OU=CLRC/L=RAL/CN=steve traylen
---
Event: EnQueued
- host = lcgrb01.gridpp.rl.ac.uk
- level = SYSTEM
- priority = asynchronous
- queue = /var/edgwl/workload_manager/input.fl
- result = OK
- seqcode = UI=000003:NS=0000000003:WM=000000:BH=0000000000:JSS=000000:LM=000000:LRMS=000000:APP=000000
- source = NetworkServer
- timestamp = Fri Mar 4 11:39:23 2005
- user = /C=UK/O=eScience/OU=CLRC/L=RAL/CN=steve traylen
- job =
[
requirements = ( RegExp(".*ac.uk.*",other.GlueCEUniqueID) ) && ( other.GlueCEStateStatus == "Production" );
RetryCount = 0;
edg_jobid = "https://lcgrb01.gridpp.rl.ac.uk:9000/sReIPimKBNG7CtPkC9woxA";
OutputSandboxPath = "/var/edgwl/SandboxDir/sR/https_3a_2f_2flcgrb01.gridpp.rl.ac.uk_3a9000_2fsReIPimKBNG7CtPkC9woxA/output";
MyProxyServer = "lcgrbp01.gridpp.rl.ac.uk";
JobType = "normal";
Executable = "/bin/pwd";
CertificateSubject = "/C=UK/O=eScience/OU=CLRC/L=RAL/CN=steve traylen";
X509UserProxy = "/opt/edg/var/spool/edg-wl-renewd/a7af537759be70772d5dc12460b7eb81.1";
StdOutput = "hello.out";
OutputSandbox = { "hello.out","hello.err" };
InputSandboxPath = "/var/edgwl/SandboxDir/sR/https_3a_2f_2flcgrb01.gridpp.rl.ac.uk_3a9000_2fsReIPimKBNG7CtPkC9woxA/input";
LB_sequence_code = "UI=000003:NS=0000000003:WM=000000:BH=0000000000:JSS=000000:LM=000000:LRMS=000000:APP=000000";
VirtualOrganisation = "dteam";
rank = -other.GlueCEStateEstimatedResponseTime;
Type = "job";
StdError = "hello.err";
DefaultRank = -other.GlueCEStateEstimatedResponseTime;
InputSandBox = { }
]
**********************************************************************
--
Steve Traylen
[log in to unmask]
http://www.gridpp.ac.uk/
|