Changeset 31 for trunk/gridctl.php


Ignore:
Timestamp:
Oct 13, 2017, 9:37:16 PM (7 years ago)
Author:
gegorbet
Message:

global-fit and jetstream,stampede2 mods

File:
1 edited

Legend:

Unmodified
Added
Removed
  • trunk/gridctl.php

    r29 r31  
    7070   // Checking we need to do for each entry
    7171echo "us3db=$us3_db  gfid=$gfacID\n";
     72//write_log( " us3db=$us3_db  gfid=$gfacID" );
    7273   switch ( $us3_db )
    7374   {
     
    99100      $status     = aira_status( $gfacID, $status_in );
    100101if($status != $status_in )
    101 write_log( "$loghdr Set to $status from $status_in" );
     102 write_log( "$loghdr Set to $status from $status_in" );
     103//write_log( "$loghdr    aira status=$status" );
    102104   }
    103105   else if ( is_gfac_job( $gfacID ) )
     
    112114   else
    113115   {
     116//write_log( "$loghdr Local gfacID=$gfacID" );
    114117      $status_gw  = $status;
    115118      $status     = get_local_status( $gfacID );
     
    136139
    137140//echo "  st=$status\n";
     141//write_log( "$loghdr switch status=$status" );
    138142   switch ( $status )
    139143   {
     
    174178      case "COMPLETED":
    175179      case "COMPLETE":
    176 write_log( "$loghdr   COMPLETE gfacID=$gfacID" );
     180//write_log( "$loghdr   COMPLETE gfacID=$gfacID" );
    177181         complete();
    178182         break;
     
    833837   global $self;
    834838
    835    $cmd    = "/usr/bin/qstat -a $gfacID 2>&1|tail -n 1";
     839   $is_jetstr = preg_match( "/jetstream/", $cluster );
     840   if ( $is_jetstr )
     841      $cmd    = "squeue -a $gfacID 2>&1|tail -n 1";
     842   else
     843      $cmd    = "/usr/bin/qstat -a $gfacID 2>&1|tail -n 1";
    836844//write_log( "$self cmd: $cmd" );
    837845//write_log( "$self cluster: $cluster" );
    838846//write_log( "$self gfacID: $gfacID" );
     847
    839848   if ( ! preg_match( "/us3iab/", $cluster ) )
    840849   {
    841850      $system = "$cluster.uthscsa.edu";
     851      if ( $is_jetstr )
     852         $system = "$cluster";
    842853      $system = preg_replace( "/\-local/", "", $system );
    843 //write_log( "$self system: $system" );
    844854      $cmd    = "/usr/bin/ssh -x us3@$system " . $cmd;
    845855//write_log( "$self  cmd: $cmd" );
     
    849859//write_log( "$self  result: $result" );
    850860
    851    if ( $result == ""  ||  preg_match( "/^qstat: Unknown/", $result ) )
     861///////////////////////////////////////////////////////////////////
     862   $secwait    = 2;
     863   $num_try    = 0;
     864   // Sleep and retry up to 3 times if ssh has "ssh_exchange_identification" error
     865   while ( preg_match( "/ssh_exchange_id/", $result )  &&  $num_try < 3 )
     866   {
     867      sleep( $secwait );
     868      $num_try++;
     869      $secwait   *= 2;
     870write_log( "$me:   num_try=$num_try  secwait=$secwait" );
     871   }
     872///////////////////////////////////////////////////////////////////
     873   if ( $result == ""  ||
     874        preg_match( "/^qstat: Unknown/", $result )  ||
     875        preg_match( "/ssh_exchange_id/", $result ) )
    852876   {
    853877      write_log( "$self get_local_status: Local job $gfacID unknown" );
     
    857881
    858882   $values = preg_split( "/\s+/", $result );
    859 //write_log( "$self: get_local_status: job status = /{$values[9]}/");
    860    switch ( $values[ 9 ] )
     883   $jstat   = ( $is_jetstr == 0 ) ? $values[ 9 ] : $values[ 4 ];
     884//write_log( "$self: get_local_status: job status = /$jstat/");
     885   switch ( $jstat )
    861886   {
    862887      case "W" :                      // Waiting for execution time to be reached
     
    873898      case "H" :                      // Held
    874899      case "Q" :                      // Queued
     900      case "PD" :                     // Queued
    875901        $status = 'SUBMITTED';
    876902        break;
     
    10791105   if ( preg_match( "/US3-A/i", $gfacID )  &&  $devmatch )
    10801106   {
     1107//write_log( "$loghdr status_in=$status_in status=$status gfacID=$gfacID" );
    10811108      $status_ex = getExperimentStatus( $gfacID );
     1109//write_log( "$loghdr   status_ex=$status_ex" );
    10821110
    10831111      if ( $status_ex == 'COMPLETED' )
Note: See TracChangeset for help on using the changeset viewer.