Wei Xing wrote:
> Hi, Eric,
>
> Thanks.
>
> Do you mean to comment these lines?
>
> if (/state = free/){
> $state="free";
> }elsif(/state =/){
> undef $state;
> }
>
Not really, find on attach my version of the script who works fine on my
site : good FreeCPU value and good GlueCEStateWaitingJobs value
Eric
> BTW, If sometimes the job wait in a queue very long time, do not send
> to worker nodes even the worker nodes are really "FREE", how to fix it?
>
> Thanks again,
>
> Wei
--
--------------------------------------------------------------
FEDE ERIC Mail : [log in to unmask]
CPPM Mail : [log in to unmask]
163 Av de Luminy case 902 Tel : (+33) (0)4 91 82 76 41
13288 Marseille Cedex 9 France Fax : (+33) (0)4 91 82 72 99
--------------------------------------------------------------
#!/usr/bin/perl -w
use strict;
my @config; # Contains the configuration for the information provider.
my @output; # ldif output that is sent to std out.
my $ldif_file; # File name for the static ldif file
my @ldif_file; # Content of the static ldif file
my @dn; # A dn found in the static lidf file.
my $pbsHost;
my $queue;
my $TotalCPU;
my $FreeCPU;
my $TotalJobs;
my $QueuedJobs;
my $RunningJobs;
my $UsedTime;
my $MaxTime;
my $WallTime;
my $Version;
my $MaxRunningJobs;
my $Started;
my $Enabled;
my $state;
my $num_pro;
my $Status;
my $num_jobs;
my $jobs;
# Reads the configuration file
if ($ARGV[0]) {
#Reads the configuration file.
open (CONFIG, $ARGV[0]) || die "Cannot open '$ARGV[0]': $!,";
while (<CONFIG>) {
push @config, $_;
}
close (CONFIG);
if($ARGV[1]){
$pbsHost=$ARGV[1];
}else{
$pbsHost=`hostname -f`;
}
} else {
print "Usage: $0 <config file> [pbs host]\n";
exit 1
}
#Gets the content of the static ldif file
for(@config){
if(/^ldif_file=/){
$ldif_file=$_;
$ldif_file=~s/^ldif_file=//;
open (LDIF, $ldif_file) || die "Cannot open '$ARGV[0]': $!,";
while (<LDIF>) {
push @ldif_file, $_;
}
close (LDIF);
}
}
#Gets the dns.
for (@ldif_file){
if(/dn:\s+GlueCEUniqueID=/){
push @dn, "$_";
}
}
# convert tripel hours:minutes:seconds into seconds
sub convertHhMmSs {
return $1 * 3600 + $2 * 60 + $3 if $_[0] =~ /(\d+):(\d+):(\d+)/;
return ($_[0] ne "-") ? $_[0] : 0;
}
# Get LRMS version
open QSTAT, "qstat -B -f $pbsHost 2>&1 |" or die "Error running qstat.\n";
while(<QSTAT>) {
if (/pbs_version\s+=\s+(\S+)/){
$Version=$1;
}
}
close QSTAT;
$Version || die "Can not obtain pbs version from host\n";
# Get Total and Free CPUs
open QSTAT, "pbsnodes -a -s $pbsHost 2>&1 |" or die "Error running qstat.\\n";
$num_jobs =0;
$TotalCPU=0;
$FreeCPU=0;
while(<QSTAT>) {
if (/state = free/){
$state="free";
}elsif(/state =/){
undef $state;
}
if (/jobs =/) {
$jobs = $_;
$num_jobs = $num_jobs + 1 + $jobs=~tr/,//;
}
if(/np =/){
$num_pro = $_;
$num_pro =~s/^[^=]*=//;
chomp $num_pro;
$TotalCPU=$TotalCPU + $num_pro;
}
}
close QSTAT;
$FreeCPU=$TotalCPU - $num_jobs;
for(@dn){
push @output, $_;
$queue=$_;
$queue=~s/,.*//;
$queue=~s/^.*-//;
chomp $queue;
push @output, "GlueCEInfoLRMSVersion: $Version\n";
push @output, "GlueCEInfoTotalCPUs: $TotalCPU\n";
push @output, "GlueCEStateFreeCPUs: $FreeCPU\n";
$MaxRunningJobs=9999999;
open QSTAT, "qstat -Q -f $queue\@$pbsHost 2>&1 |" or die "Error running qstat.\n";
while(<QSTAT>) {
if (/^\s+resources_max.cput\s+=\s+(\S+)/){
push @output, "GlueCEPolicyMaxCPUTime: ". int(&convertHhMmSs($1)/60) . "\n";
}
if (/^\s+max_running\s+=\s+(\d+)/){
$MaxRunningJobs=$1;
push @output, "GlueCEPolicyMaxRunningJobs: " . int($1) . "\n";
}
if (/^\s+resources_max.walltime\s+=\s+(\S+)/){
$WallTime= &convertHhMmSs($1);
push @output, "GlueCEPolicyMaxWallClockTime: ". int($WallTime/60) . "\n";
}
if (/^\s+enabled\s+=\s+(True)/){
$Enabled=1;
}
if (/^\s+started\s+=\s+(True)/){
$Started=1;
}
}
close QSTAT;
$Status =
($Enabled && $Started) ? "Production" :
($Enabled) ? "Queueing" :
($Started) ? "Draining" : "Closed" ;
push @output, "GlueCEStateStatus: $Status\n";
$TotalJobs=0;
$QueuedJobs=0;
$RunningJobs=0;
$UsedTime=0;
open QSTAT, "qstat -f $queue\@$pbsHost 2>&1 |" or die "Error running qstat. (file)\n";
while(<QSTAT>) {
if (/^Job Id:/){
$TotalJobs=$TotalJobs+1;
}
if (/job_state = [QHTW]/){
$QueuedJobs=$QueuedJobs+1;
}
if (/job_state = [RE]/){
$RunningJobs=$RunningJobs+1;
}
if (/^\s+resources_used.walltime\s+=\s+(\S+)/){
$UsedTime=$UsedTime + int(&convertHhMmSs($1)/60);
}
}
push @output,"GlueCEStateTotalJobs: $TotalJobs\n";
push @output,"GlueCEStateWaitingJobs: $QueuedJobs\n";
push @output,"GlueCEStateRunningJobs: $RunningJobs\n";
if (! $WallTime || $WallTime <0) {
$WallTime=9999999;
}
my $TCPU = ( $MaxRunningJobs < $TotalCPU )? $MaxRunningJobs : $TotalCPU;
$MaxTime=(($TotalJobs * $WallTime) - $UsedTime) / $TCPU;
if ( $MaxTime < 0){
$MaxTime=99999999;
}
$MaxTime=~s/\..*//;
push @output, "GlueCEStateWorstResponseTime: $MaxTime\n";
$MaxTime=$MaxTime/2;
$MaxTime=~s/\..*//;
push @output, "GlueCEStateEstimatedResponseTime: $MaxTime\n";
close QSTAT;
push @output, "\n";
}
print @output;
exit;
|