- Timestamp:
- Oct 13, 2017, 9:37:16 PM (7 years ago)
- Location:
- trunk
- Files:
-
- 3 edited
Legend:
- Unmodified
- Added
- Removed
-
trunk/cleanup.php
r28 r31 225 225 if ( file_exists( $fn_stdout ) ) $stdout = file_get_contents( $fn_stdout ); 226 226 if ( file_exists( $fn_tarfile ) ) $tarfile = file_get_contents( $fn_tarfile ); 227 write_log( "$me(0): length contents stderr,stdout,tarfile -- " 228 . strlen($stderr) . "," . strlen($stdout) . "," . strlen($tarfile) ); 227 229 // If stdout,stderr have no content, retry after delay 228 230 if ( strlen( $stdout ) == 0 || strlen( $stderr ) == 0 ) … … 870 872 global $status; 871 873 $is_us3iab = preg_match( "/us3iab/", $cluster ); 874 $is_jetstr = preg_match( "/jetstream/", $cluster ); 872 875 873 876 // Figure out remote directory … … 881 884 { 882 885 // For "-local", recompute remote work directory 883 $cmd = "ssh us3@$cluster.uthscsa.edu 'ls -d ~us3/lims/work/local' 2/dev/null"; 886 $clushost = "$cluster.uthscsa.edu"; 887 $lworkdir = "~us3/lims/work/local"; 888 if ( $is_jetstr ) 889 { 890 $clushost = "js-157-184.jetstream-cloud.org"; 891 $lworkdir = "/N/us3_cluster/work/local"; 892 } 893 $cmd = "ssh us3@$clushost 'ls -d $lworkdir' 2/dev/null"; 884 894 exec( $cmd, $output, $stat ); 885 895 $work_remote = $output[ 0 ]; … … 891 901 $pwd = chdir( "$work/$gfacID" ); 892 902 893 $cmd = "scp us3@$clus ter.uthscsa.edu:$remoteDir/output/analysis-results.tar . 2>&1";903 $cmd = "scp us3@$clushost:$remoteDir/output/analysis-results.tar . 2>&1"; 894 904 895 905 exec( $cmd, $output, $stat ); … … 897 907 write_log( "$me: Bad exec:\n$cmd\n" . implode( "\n", $output ) ); 898 908 899 $cmd = "scp us3@$clus ter.uthscsa.edu:$remoteDir/stdout . 2>&1";909 $cmd = "scp us3@$clushost:$remoteDir/stdout . 2>&1"; 900 910 901 911 exec( $cmd, $output, $stat ); … … 910 920 } 911 921 912 $cmd = "scp us3@$clus ter.uthscsa.edu:$remoteDir/stderr . 2>&1";922 $cmd = "scp us3@$clushost:$remoteDir/stderr . 2>&1"; 913 923 914 924 exec( $cmd, $output, $stat ); -
trunk/cluster_status.php
r28 r31 183 183 { 184 184 $clusters = array( "alamo", "lonestar5", "stampede", "comet", 185 " gordon", "jureca", "jacinto" );185 "stampede2-b", "jetstream", "jureca", "jacinto-b" ); 186 186 } 187 187 … … 251 251 break; 252 252 } 253 case 'stampede2': 254 { 255 $host = "us3@stampede2.tacc.utexas.edu"; 256 $qstat = `ssh $host '/usr/local/bin/showq 2>&1|grep "Total Jobs"'`; 257 $sparts = preg_split( '/\s+/', $qstat ); 258 $tot = $sparts[ 2 ]; 259 $run = $sparts[ 5 ]; 260 $que = $sparts[ 8 ]; 261 $sta = "up"; 262 if ( $tot == '' || $tot == '0' ) 263 $sta = "down"; 264 break; 265 } 253 266 case 'lonestar5': 254 267 { … … 267 280 { 268 281 $run = $sparts[ 5 ]; 269 $que = $sparts[ 8 ]; 282 // $que = $sparts[ 8 ]; 283 $que = $sparts[ 11 ]; 270 284 } 271 285 break; … … 309 323 break; 310 324 } 325 case 'jetstream-local': 326 case 'jetstream': 327 { 328 $host = "us3@js-157-184.jetstream-cloud.org"; 329 $qstat = `ssh $host '/usr/bin/sinfo -s -p batch -o "%a %F" |tail -1'`; 330 $sparts = preg_split( '/\s+/', $qstat ); 331 $sta = $sparts[ 0 ]; 332 $knts = $sparts[ 1 ]; 333 $sparts = preg_split( '/\//', $knts ); 334 $run = $sparts[ 0 ]; 335 $que = $sparts[ 1 ]; 336 if ( $sta == "" ) 337 $sta = "down"; 338 break; 339 } 311 340 } 312 341 … … 328 357 $data[] = $a; 329 358 330 if ( $clname == 'alamo' || $clname == 'jacinto' ) 359 if ( $clname == 'alamo' || 360 $clname == 'jacinto' || 361 $clname == 'jetstream' ) 331 362 { 332 363 $a[ 'cluster' ] = $clname . "-local"; -
trunk/gridctl.php
r29 r31 70 70 // Checking we need to do for each entry 71 71 echo "us3db=$us3_db gfid=$gfacID\n"; 72 //write_log( " us3db=$us3_db gfid=$gfacID" ); 72 73 switch ( $us3_db ) 73 74 { … … 99 100 $status = aira_status( $gfacID, $status_in ); 100 101 if($status != $status_in ) 101 write_log( "$loghdr Set to $status from $status_in" ); 102 write_log( "$loghdr Set to $status from $status_in" ); 103 //write_log( "$loghdr aira status=$status" ); 102 104 } 103 105 else if ( is_gfac_job( $gfacID ) ) … … 112 114 else 113 115 { 116 //write_log( "$loghdr Local gfacID=$gfacID" ); 114 117 $status_gw = $status; 115 118 $status = get_local_status( $gfacID ); … … 136 139 137 140 //echo " st=$status\n"; 141 //write_log( "$loghdr switch status=$status" ); 138 142 switch ( $status ) 139 143 { … … 174 178 case "COMPLETED": 175 179 case "COMPLETE": 176 write_log( "$loghdr COMPLETE gfacID=$gfacID" );180 //write_log( "$loghdr COMPLETE gfacID=$gfacID" ); 177 181 complete(); 178 182 break; … … 833 837 global $self; 834 838 835 $cmd = "/usr/bin/qstat -a $gfacID 2>&1|tail -n 1"; 839 $is_jetstr = preg_match( "/jetstream/", $cluster ); 840 if ( $is_jetstr ) 841 $cmd = "squeue -a $gfacID 2>&1|tail -n 1"; 842 else 843 $cmd = "/usr/bin/qstat -a $gfacID 2>&1|tail -n 1"; 836 844 //write_log( "$self cmd: $cmd" ); 837 845 //write_log( "$self cluster: $cluster" ); 838 846 //write_log( "$self gfacID: $gfacID" ); 847 839 848 if ( ! preg_match( "/us3iab/", $cluster ) ) 840 849 { 841 850 $system = "$cluster.uthscsa.edu"; 851 if ( $is_jetstr ) 852 $system = "$cluster"; 842 853 $system = preg_replace( "/\-local/", "", $system ); 843 //write_log( "$self system: $system" );844 854 $cmd = "/usr/bin/ssh -x us3@$system " . $cmd; 845 855 //write_log( "$self cmd: $cmd" ); … … 849 859 //write_log( "$self result: $result" ); 850 860 851 if ( $result == "" || preg_match( "/^qstat: Unknown/", $result ) ) 861 /////////////////////////////////////////////////////////////////// 862 $secwait = 2; 863 $num_try = 0; 864 // Sleep and retry up to 3 times if ssh has "ssh_exchange_identification" error 865 while ( preg_match( "/ssh_exchange_id/", $result ) && $num_try < 3 ) 866 { 867 sleep( $secwait ); 868 $num_try++; 869 $secwait *= 2; 870 write_log( "$me: num_try=$num_try secwait=$secwait" ); 871 } 872 /////////////////////////////////////////////////////////////////// 873 if ( $result == "" || 874 preg_match( "/^qstat: Unknown/", $result ) || 875 preg_match( "/ssh_exchange_id/", $result ) ) 852 876 { 853 877 write_log( "$self get_local_status: Local job $gfacID unknown" ); … … 857 881 858 882 $values = preg_split( "/\s+/", $result ); 859 //write_log( "$self: get_local_status: job status = /{$values[9]}/"); 860 switch ( $values[ 9 ] ) 883 $jstat = ( $is_jetstr == 0 ) ? $values[ 9 ] : $values[ 4 ]; 884 //write_log( "$self: get_local_status: job status = /$jstat/"); 885 switch ( $jstat ) 861 886 { 862 887 case "W" : // Waiting for execution time to be reached … … 873 898 case "H" : // Held 874 899 case "Q" : // Queued 900 case "PD" : // Queued 875 901 $status = 'SUBMITTED'; 876 902 break; … … 1079 1105 if ( preg_match( "/US3-A/i", $gfacID ) && $devmatch ) 1080 1106 { 1107 //write_log( "$loghdr status_in=$status_in status=$status gfacID=$gfacID" ); 1081 1108 $status_ex = getExperimentStatus( $gfacID ); 1109 //write_log( "$loghdr status_ex=$status_ex" ); 1082 1110 1083 1111 if ( $status_ex == 'COMPLETED' )
Note:
See TracChangeset
for help on using the changeset viewer.