Hi
The job was started 0015
Ps was done 1345
1 ? S 0:05 init
2 ? SW 0:00 [migration/0]
3 ? SW 0:00 [migration/1]
4 ? SW 0:00 [migration/2]
5 ? SW 0:00 [migration/3]
6 ? SW 0:00 [keventd]
7 ? SWN 0:00 [ksoftirqd/0]
8 ? SWN 0:00 [ksoftirqd/1]
9 ? SWN 0:00 [ksoftirqd/2]
10 ? SWN 0:00 [ksoftirqd/3]
13 ? SW 0:00 [bdflush]
11 ? SW 0:00 [kswapd]
12 ? SW 0:03 [kscand]
14 ? SW 0:01 [kupdated]
15 ? SW 0:00 [mdrecoveryd]
23 ? SW 0:00 [scsi_eh_0]
26 ? SW 0:02 [kjournald]
81 ? SW 0:00 [khubd]
1437 ? SW 0:00 [kjournald]
1838 ? S 0:00 syslogd -m 0
1842 ? S 0:00 klogd -x
1852 ? S 0:04 irqbalance
1869 ? S 0:00 portmap
1888 ? S 0:00 rpc.statd
2022 ? S 0:02 /usr/sbin/automount --timeout=60 /tmp_mnt file
/etc/auto.direct
2619 ? SW 0:00 [afs_rxlistener]
2622 ? SW 0:00 [afs_callback]
2623 ? SW 0:00 [afs_rxevent]
2625 ? SW 0:00 [afsd]
2627 ? SW 0:00 [afs_checkserver]
2630 ? SW 0:00 [afs_background]
2631 ? SW 0:00 [afs_background]
2633 ? SW 0:00 [afs_background]
2635 ? SW 0:00 [afs_cachetrim]
2651 ? S 0:00 /usr/sbin/sshd
2665 ? S 0:00 xinetd -stayalive -pidfile /var/run/xinetd.pid
2680 ? SL 0:01 ntpd -U ntp -p /var/run/ntpd.pid -g
2689 ? S 0:00 gpm -t imps2 -m /dev/input/mice
2698 ? S 0:00 crond
2730 ? S 0:00 xfs -droppriv -daemon
2747 ? S 0:00 /usr/sbin/atd
2766 tty1 S 0:00 /sbin/mingetty tty1
2767 tty2 S 0:00 /sbin/mingetty tty2
2768 tty3 S 0:00 /sbin/mingetty tty3
2769 tty4 S 0:00 /sbin/mingetty tty4
2770 tty5 S 0:00 /sbin/mingetty tty5
2771 tty6 S 0:00 /sbin/mingetty tty6
2772 ? S 0:00 /usr/bin/gdm-binary -nodaemon
2930 ? S 0:00 /usr/bin/gdm-binary -nodaemon
2931 ? S 1:30 /usr/X11R6/bin/X :0 -auth /var/gdm/:0.Xauth vt7
2940 ? S 0:49 /usr/bin/gdmgreeter
10457 ? S 2:00 /usr/sbin/pbs_mom -p
7649 ? S 0:00 -sh
7853 ? S 0:00 /bin/sh
/var/spool/pbs/mom_priv/jobs/5678.hephyg.SC
7857 ? S 0:00 /usr/bin/perl -w /tmp/bootstrap.cw7854
/home/dteam004/ hephygr.oeaw.ac.at
/home/dteam004/.globus/.gass_cache/local/md5/f1/7c1507eab8a728b3897b8faee0b5
96/md5/b9/28a7d9f2d8ba72d3e92f6b3dc7ab79/data
X509GPG:globus-cache-export.BV8458.gpg /dev/null
/home/dteam004/.globus/.gass_cache/local/md5/f1/7c1507eab8a728b3897b8faee0b5
96/md5/3b/2e51c2f82d28fd1e1aea45785db34d/data stdoutftp
/home/dteam004/.globus/.gass_cache/local/md5/f1/7c1507eab8a728b3897b8faee0b5
96/md5/52/de4c7badf3ec6e9aa1ee0f7ee0512d/data stderrftp
/home/dteam004/.lcgjm/globus-cache-export.BV8458
https://hephygr.oeaw.ac.at:20002/8153/1129076045/ /home/dteam004/ NONE
https://gdrb02.cern.ch:20024/var/edgwl/jobcontrol/submit/uS/JobWrapper.https
_3a_2f_2fgdrb02.cern.ch_3a9000_2fuS1p6-8eyBVm--QNRR9OqA.sh
UI=000003:NS=0000000003:WM=000004:BH=0000000000:JSS=000003:LM=000000:LRMS=00
0000:APP=000000
7863 ? S 0:00 /usr/bin/perl -w /tmp/bootstrap.cw7854
/home/dteam004/ hephygr.oeaw.ac.at
/home/dteam004/.globus/.gass_cache/local/md5/f1/7c1507eab8a728b3897b8faee0b5
96/md5/b9/28a7d9f2d8ba72d3e92f6b3dc7ab79/data
X509GPG:globus-cache-export.BV8458.gpg /dev/null
/home/dteam004/.globus/.gass_cache/local/md5/f1/7c1507eab8a728b3897b8faee0b5
96/md5/3b/2e51c2f82d28fd1e1aea45785db34d/data stdoutftp
/home/dteam004/.globus/.gass_cache/local/md5/f1/7c1507eab8a728b3897b8faee0b5
96/md5/52/de4c7badf3ec6e9aa1ee0f7ee0512d/data stderrftp
/home/dteam004/.lcgjm/globus-cache-export.BV8458
https://hephygr.oeaw.ac.at:20002/8153/1129076045/ /home/dteam004/ NONE
https://gdrb02.cern.ch:20024/var/edgwl/jobcontrol/submit/uS/JobWrapper.https
_3a_2f_2fgdrb02.cern.ch_3a9000_2fuS1p6-8eyBVm--QNRR9OqA.sh
UI=000003:NS=0000000003:WM=000004:BH=0000000000:JSS=000003:LM=000000:LRMS=00
0000:APP=000000
8116 ? S 0:00 /usr/bin/perl -w /tmp/bootstrap.cw7854
/home/dteam004/ hephygr.oeaw.ac.at
/home/dteam004/.globus/.gass_cache/local/md5/f1/7c1507eab8a728b3897b8faee0b5
96/md5/b9/28a7d9f2d8ba72d3e92f6b3dc7ab79/data
X509GPG:globus-cache-export.BV8458.gpg /dev/null
/home/dteam004/.globus/.gass_cache/local/md5/f1/7c1507eab8a728b3897b8faee0b5
96/md5/3b/2e51c2f82d28fd1e1aea45785db34d/data stdoutftp
/home/dteam004/.globus/.gass_cache/local/md5/f1/7c1507eab8a728b3897b8faee0b5
96/md5/52/de4c7badf3ec6e9aa1ee0f7ee0512d/data stderrftp
/home/dteam004/.lcgjm/globus-cache-export.BV8458
https://hephygr.oeaw.ac.at:20002/8153/1129076045/ /home/dteam004/ NONE
https://gdrb02.cern.ch:20024/var/edgwl/jobcontrol/submit/uS/JobWrapper.https
_3a_2f_2fgdrb02.cern.ch_3a9000_2fuS1p6-8eyBVm--QNRR9OqA.sh
UI=000003:NS=0000000003:WM=000004:BH=0000000000:JSS=000003:LM=000000:LRMS=00
0000:APP=000000
8327 ? S 0:00 bash
/home/dteam004/globus-tmp.zirbe.7857.0/globus-tmp.zirbe.7857.0/local/md5/f1/
7c1507eab8a728b3897b8faee0b596/md5/63/d42ca6acc8a7188b5f2b9e6926b5f8/data
UI=000003:NS=0000000003:WM=000004:BH=0000000000:JSS=000003:LM=000000:LRMS=00
0000:APP=000000
8395 ? S 0:00 bash
/home/dteam004/globus-tmp.zirbe.7857.0/globus-tmp.zirbe.7857.0/local/md5/f1/
7c1507eab8a728b3897b8faee0b596/md5/63/d42ca6acc8a7188b5f2b9e6926b5f8/data
UI=000003:NS=0000000003:WM=000004:BH=0000000000:JSS=000003:LM=000000:LRMS=00
0000:APP=000000
8396 ? S 0:00 /bin/bash ./testJob.sh
8397 ? SN 0:01 python2 /opt/lcg/bin/lcg-mon-wn -j
https://gdrb02.cern.ch:9000/uS1p6-8eyBVm--QNRR9OqA -p
/tmp/globus-tmp.zirbe.7857.0 -l 5678.hephygr.oeaw.ac.at -v dteam -r
gdrb02.cern.ch 7649 8396
8398 ? S 0:00 perl -e ? while (1) {? $time_left =
`grid-proxy-info -timeleft 2> /dev/null` || 0;? last if ($time_left <=
0);? sleep($time_left);? }?
kill(defined($ENV{"EDG_WL_NOSETPGRP"}) ? 9 : -9, 8396);? exit(1);?
8414 ? S 0:00 perl ./run-test sft-lcg-rm
8456 ? S 0:00 /bin/bash tests/sft-lcg-rm
9890 ? S 0:00 perl ./run-test sft-lcg-rm-cr
9892 ? S 0:00 /bin/bash tests/sft-lcg-rm-cr
9903 ? S 0:00 lcg-cr -v --vo dteam -d hephyse.oeaw.ac.at -l
lfn:sft-lcg-rm-cr-zirbe.0510120015
file:///home/dteam004/globus-tmp.zirbe.7857.0/WMS_zirbe_08327_https_3a_2f_2f
gdrb02.cern.ch_3a9000_2fuS1p6-8eyBVm--QNRR9OqA/sft-lcg-rm-cr.txt
20381 ? S 0:00 sshd: root@pts/0
20383 pts/0 S 0:00 -bash
21731 ? S 0:00 sshd: root@pts/1
21733 pts/1 S 0:00 -bash
21892 pts/1 R 0:00 ps -ax
[root@zirbe root]#
........
Regards
Gerhard
On 10/12/05 3:22 PM, "Piotr Nyczyk" <[log in to unmask]> wrote:
> Hi Guys,
>
> I was trying to figure out why the test job could hang, but I must
> admit that I was unable to reproduce the problem. Normally all tests
> are killed automatically after 15 minutes by the SIGALRM signal
> handler (the signal handler sends KILL signal to test process), and
> when I try to simulate hanging tests everything works fine for me.
>
> Could you please check the list of running processes on the WN when
> it happens next time? And if it's possible if you could also note
> down the time when the job actually started to execute and when you
> checked the process table...
> This is the most obvious way we can investigate what is happening.
>
> Piotr
>
> On Oct 12, 2005, at 1:00 PM, Gerhard Walzel wrote:
>
>> Judit
>> I have exact the same problem on site Hephy-Vienna
>> Just starting at 0015 !
>> Last days I have simply removed the job to enable
>> Sft tests again...
>> Gerhard
>>
>>
>> On 10/12/05 11:59 AM, "NOVAK Judit" <[log in to unmask]> wrote:
>>
>>
>>> Hi Christos,
>>>
>>>
>>> In the site history I can see two Job Submission failures,
>>> both from last week. The last one run to a timeout (while gstat
>>> reports many free CPUs -- is it all OK with the batch system?).
>>>
>>>
>>> Judit
>>>
>>>
>>>
>>>
>>> On k, okt 11, Filippidis christos wrote:
>>>
>>>> hi to all,
>>>>
>>>> i have the following problem:
>>>>
>>>> our site here at demokritos is passing the sft but the last week
>>>> every day
>>>> when dteam002 "/c=ch/o=cern/ou=grid/cn=judit novak 0973" send
>>>> an sft at
>>>> 18:00 the job never ends or it stop the next day and the result
>>>> is CT or js
>>>>
>>>> the same time when i send an sft from this site:
>>>> https://monitoring.egee.man.poznan.pl/
>>>> everythink is ok,
>>>>
>>>>
>>>> it is also strange that when judit novak send an sft at an
>>>> other period
>>>> of the day ,for example the morning, the sft is succesfull.
>>>>
>>>> do you have any ideas?
>>>>
>>>> thanks xristos
>>>>
>>>>
>>>> Christos Filippidis
>>>> NCSR DEMOKRITOS
>>>> Institute of Nuclear Physics
>>>> office block 6(ktirion 6)
>>>> Gr-15310 Agia Paraskevi
>>>> GREECE
>>>> Tel:2106503425
>>>>
>>>> http://consult.cern.ch/xwho/people/117002
>>>> http://www.inp.demokritos.gr/~filippidisx/
>>>>
>>>>
>>>>
>>>>
>>>>
>>>> ----------------------------------------------
>>>>
>>>> "Institute of Nuclear Physics NCSR Demokritos"
>>>> http://www.inp.demokritos.gr/
>>>>
>>>>
>>>>
>>>> Christos Filippidis
>>>> NCSR DEMOKRITOS
>>>> Institute of Nuclear Physics
>>>> office block 6(ktirion 6)
>>>> Gr-15310 Agia Paraskevi
>>>> GREECE
>>>> Tel:2106503425
>>>>
>>>> http://consult.cern.ch/xwho/people/117002
>>>> http://www.inp.demokritos.gr/~filippidisx/
>>>>
>>>>
>>>>
>>>>
>>>>
>>>> ----------------------------------------------
>>>>
>>>> "Institute of Nuclear Physics NCSR Demokritos"
>>>> http://www.inp.demokritos.gr/
>>
|