source: trunk/gridctl.php@ 35

Last change on this file since 35 was 35, checked in by gegorbet, 6 years ago

mods mostly for use of mysqli

File size: 31.8 KB
RevLine 
[1]1<?php
2
[25]3$us3bin = exec( "ls -d ~us3/lims/bin" );
4include_once "$us3bin/listen-config.php";
5//include "$us3bin/cleanup_aira.php";
6//include "$us3bin/cleanup_gfac.php";
7
[1]8// Global variables
9$gfac_message = "";
10$updateTime = 0;
11$submittime = 0;
12$cluster = '';
13
[6]14//global $self;
[18]15global $status_ex, $status_gw;
[6]16
[1]17// Produce some output temporarily, so cron will send me message
18$now = time();
[6]19echo "Time started: " . date( 'Y-m-d H:i:s', $now ) . "\n";
[1]20
21// Get data from global GFAC DB
[35]22$gLink = mysqli_connect( $dbhost, $guser, $gpasswd, $gDB );
[1]23
[35]24if ( ! $gLink )
[1]25{
[35]26 write_log( "$self: Could not select DB $gDB - " . mysqli_error() );
[1]27 mail_to_admin( "fail", "Internal Error: Could not select DB $gDB" );
[35]28 sleep(300);
[1]29 exit();
30}
31
32$query = "SELECT gfacID, us3_db, cluster, status, queue_msg, " .
33 "UNIX_TIMESTAMP(time), time from analysis";
[35]34$result = mysqli_query( $gLink, $query );
[1]35
36if ( ! $result )
37{
[35]38 write_log( "$self: Query failed $query - " . mysqli_error( $gLink ) );
39 mail_to_admin( "fail", "Query failed $query\n" . mysqli_error( $gLink ) );
[1]40 exit();
41}
42
[35]43if ( mysqli_num_rows( $result ) == 0 )
[6]44{
45//write_log( "$self: analysis read got numrows==0" );
[1]46 exit(); // Nothing to do
[6]47}
[35]48//write_log( "$loghdr gfac-analysis rows $nrows" );
[1]49
[14]50$me_devel = preg_match( "/class_devel/", $class_dir );
[35]51//echo "me_devel=$me_devel class_dir=$class_dir\n";
[14]52
[1]53while ( list( $gfacID, $us3_db, $cluster, $status, $queue_msg, $time, $updateTime )
[35]54 = mysqli_fetch_array( $result ) )
[1]55{
[14]56 // If this entry does not match class/class_devel, skip processing
[35]57//echo " gfacID=$gfacID gf_status=$status\n";
[14]58
59 if ( preg_match( "/US3-A/i", $gfacID ) )
60 { // For thrift, job and gridctl must match
61 $job_devel = preg_match( "/US3-ADEV/i", $gfacID );
[35]62//echo " THR: job_devel=$job_devel\n";
[14]63 if ( ( $me_devel && !$job_devel ) ||
64 ( !$me_devel && $job_devel ) )
[25]65 { // Job type and Airavata server mismatch: skip processing
[14]66 continue;
67 }
68 }
69
[28]70 else if ( $me_devel )
71 { // Local (us3iab/-local) and class_devel: skip processing
[35]72//echo " LOC: me_devel=$me_devel\n";
[28]73 continue;
74 }
75
[1]76 // Checking we need to do for each entry
[6]77echo "us3db=$us3_db gfid=$gfacID\n";
[31]78//write_log( " us3db=$us3_db gfid=$gfacID" );
[6]79 switch ( $us3_db )
80 {
81 case 'Xuslims3_cauma3' :
82 case 'Xuslims3_cauma3d' :
83 case 'Xuslims3_HHU' :
84 case 'Xuslims3_Uni_KN' :
85 $serviceURL = "http://gridfarm005.ucs.indiana.edu:9090/ogce-rest/job";
86 break;
[1]87
[6]88 default :
89// $serviceURL = "http://gridfarm005.ucs.indiana.edu:8080/ogce-rest/job";
90 break;
91 }
92
[25]93// $awork = array();
94// $awork = explode( "-", $gfacID );
95// $gfacLabl = $awork[0] . "-" . $awork[1] . "-" . $awork[2];
96 $gfacLabl = $gfacID;
[6]97 $loghdr = $self . ":" . $gfacLabl . "...:";
[17]98 $status_ex = $status;
[6]99
100 // If entry is for Airvata/Thrift, get the true current status
101
102 if ( is_aira_job( $gfacID ) )
103 {
104 $status_in = $status;
[25]105//write_log( "$loghdr status_in=$status_in" );
[6]106 $status = aira_status( $gfacID, $status_in );
[35]107//echo "$loghdr status_in=$status_in status_ex=$status\n";
[6]108if($status != $status_in )
[31]109 write_log( "$loghdr Set to $status from $status_in" );
110//write_log( "$loghdr aira status=$status" );
[6]111 }
[25]112 else if ( is_gfac_job( $gfacID ) )
[6]113 {
114 $status_gw = $status;
115 $status = get_gfac_status( $gfacID );
116 //if ( $status == 'FINISHED' )
117 if ( $status_gw == 'COMPLETE' )
118 $status = $status_gw;
[35]119//echo "$loghdr status_gw=$status_gw status=$status\n";
[25]120//write_log( "$loghdr non-AThrift status=$status status_gw=$status_gw" );
[6]121 }
[25]122 else
123 {
[31]124//write_log( "$loghdr Local gfacID=$gfacID" );
[25]125 $status_gw = $status;
126 $status = get_local_status( $gfacID );
127 if ( $status_gw == 'COMPLETE' || $status == 'UNKNOWN' )
128 $status = $status_gw;
[35]129//echo "$loghdr status_lo=$status\n";
[25]130//write_log( "$loghdr Local status=$status status_gw=$status_gw" );
131 }
[6]132
[1]133 // Sometimes during testing, the us3_db entry is not set
134 // If $status == 'ERROR' then the condition has been processed before
135 if ( strlen( $us3_db ) == 0 && $status != 'ERROR' )
136 {
[6]137 write_log( "$loghdr GFAC DB is NULL - $gfacID" );
[1]138 mail_to_admin( "fail", "GFAC DB is NULL\n$gfacID" );
139
140 $query2 = "UPDATE analysis SET status='ERROR' WHERE gfacID='$gfacID'";
[35]141 $result2 = mysqli_query( $gLink, $query2 );
[1]142 $status = 'ERROR';
143
144 if ( ! $result2 )
[35]145 write_log( "$loghdr Query failed $query2 - " . mysqli_error( $gLink ) );
[1]146
147 }
148
[6]149//echo " st=$status\n";
[31]150//write_log( "$loghdr switch status=$status" );
[1]151 switch ( $status )
152 {
153 // Already been handled
154 // Later update this condition to search for gfacID?
155 case "ERROR":
156 cleanup();
157 break;
158
159 case "SUBMITTED":
160 submitted( $time );
161 break;
162
163 case "SUBMIT_TIMEOUT":
164 submit_timeout( $time );
165 break;
166
167 case "RUNNING":
[6]168 case "STARTED":
169 case "STAGING":
170 case "ACTIVE":
[1]171 running( $time );
172 break;
173
174 case "RUN_TIMEOUT":
175 run_timeout($time );
176 break;
177
178 case "DATA":
[6]179 case "RESULTS_GEN":
[1]180 wait_data( $time );
181 break;
182
183 case "DATA_TIMEOUT":
184 data_timeout( $time );
185 break;
186
[6]187 case "COMPLETED":
[1]188 case "COMPLETE":
[31]189//write_log( "$loghdr COMPLETE gfacID=$gfacID" );
[1]190 complete();
191 break;
192
193 case "CANCELLED":
194 case "CANCELED":
195 case "FAILED":
196 failed();
197 break;
198
[6]199 case "FINISHED":
200 case "DONE":
[26]201 if ( ! is_aira_job( $gfacID ) )
[17]202 {
[25]203 complete();
[17]204 }
[25]205write_log( "$loghdr FINISHED gfacID=$gfacID" );
[6]206 case "PROCESSING":
[1]207 default:
208 break;
209 }
210}
[35]211mysqli_close( $gLink );
[1]212
213exit();
214
215function submitted( $updatetime )
216{
217 global $self;
218 global $gLink;
219 global $gfacID;
[6]220 global $loghdr;
[1]221
222 $now = time();
223
224 if ( $updatetime + 600 > $now ) return; // < 10 minutes ago
225
226 if ( $updatetime + 86400 > $now ) // Within the first 24 hours
227 {
228 if ( ( $job_status = get_gfac_status( $gfacID ) ) === false )
229 $job_status = get_local_status( $gfacID );
230
231 if ( $job_status == 'GFAC_STATUS_UNAVAILABLE' )
232 return;
233
234 if ( ! in_array( $job_status, array( 'SUBMITTED', 'INITIALIZED', 'PENDING' ) ) )
[6]235 {
[25]236write_log( "$loghdr submitted:job_status=$job_status" );
[1]237 update_job_status( $job_status, $gfacID );
[6]238 }
[1]239
240 return;
241 }
242
243 $message = "Job listed submitted longer than 24 hours";
244 write_log( "$self: $message - id: $gfacID" );
245 mail_to_admin( "hang", "$message - id: $gfacID" );
246 $query = "UPDATE analysis SET status='SUBMIT_TIMEOUT' WHERE gfacID='$gfacID'";
[35]247 $result = mysqli_query( $gLink, $query );
[1]248
249 if ( ! $result )
[35]250 write_log( "$self: Query failed $query - " . mysqli_error( $gLink ) );
[1]251
252 update_queue_messages( $message );
253 update_db( $message );
254}
255
256function submit_timeout( $updatetime )
257{
258 global $self;
259 global $gLink;
260 global $gfacID;
[6]261 global $loghdr;
[1]262
263 if ( ( $job_status = get_gfac_status( $gfacID ) ) === false )
264 $job_status = get_local_status( $gfacID );
265
266 if ( $job_status == 'GFAC_STATUS_UNAVAILABLE' )
267 return;
268
269 if ( ! in_array( $job_status, array( 'SUBMITTED', 'INITIALIZED', 'PENDING' ) ) )
270 {
271 update_job_status( $job_status, $gfacID );
272 return;
273 }
274
275 $now = time();
276
277 if ( $updatetime + 86400 > $now ) return; // < 24 hours ago ( 48 total submitted )
278
279 $message = "Job listed submitted longer than 48 hours";
280 write_log( "$self: $message - id: $gfacID" );
281 mail_to_admin( "hang", "$message - id: $gfacID" );
282 $query = "UPDATE analysis SET status='FAILED' WHERE gfacID='$gfacID'";
[35]283 $result = mysqli_query( $gLink, $query );
[1]284
285 if ( ! $result )
[35]286 write_log( "$self: Query failed $query - " . mysqli_error( $gLink ) );
[1]287
288 update_queue_messages( $message );
289 update_db( $message );
290}
291
292function running( $updatetime )
293{
294 global $self;
295 global $gLink;
296 global $gfacID;
[6]297 global $loghdr;
[1]298
299 $now = time();
300
301 get_us3_data();
302
303 if ( $updatetime + 600 > $now ) return; // message received < 10 minutes ago
304
305 if ( $updatetime + 86400 > $now ) // Within the first 24 hours
306 {
307 if ( ( $job_status = get_gfac_status( $gfacID ) ) === false )
308 $job_status = get_local_status( $gfacID );
309
310 if ( $job_status == 'GFAC_STATUS_UNAVAILABLE' )
311 return;
312
[6]313 if ( ! in_array( $job_status, array( 'ACTIVE', 'RUNNING', 'STARTED' ) ) )
[1]314 update_job_status( $job_status, $gfacID );
315
316 return;
317 }
318
319 $message = "Job listed running longer than 24 hours";
320 write_log( "$self: $message - id: $gfacID" );
321 mail_to_admin( "hang", "$message - id: $gfacID" );
322 $query = "UPDATE analysis SET status='RUN_TIMEOUT' WHERE gfacID='$gfacID'";
[35]323 $result = mysqli_query( $gLink, $query );
[1]324
325 if ( ! $result )
[35]326 write_log( "$self: Query failed $query - " . mysqli_error( $gLink ) );
[1]327
328 update_queue_messages( $message );
329 update_db( $message );
330}
331
332function run_timeout( $updatetime )
333{
334 global $self;
335 global $gLink;
336 global $gfacID;
[6]337 global $loghdr;
[1]338
339 if ( ( $job_status = get_gfac_status( $gfacID ) ) === false )
340 $job_status = get_local_status( $gfacID );
341
342 if ( $job_status == 'GFAC_STATUS_UNAVAILABLE' )
343 return;
344
[6]345 if ( ! in_array( $job_status, array( 'ACTIVE', 'RUNNING', 'STARTED' ) ) )
[1]346 {
347 update_job_status( $job_status, $gfacID );
348 return;
349 }
350
351 $now = time();
352
353 get_us3_data();
354
355 if ( $updatetime + 172800 > $now ) return; // < 48 hours ago
356
357 $message = "Job listed running longer than 48 hours";
358 write_log( "$self: $message - id: $gfacID" );
359 mail_to_admin( "hang", "$message - id: $gfacID" );
360 $query = "UPDATE analysis SET status='FAILED' WHERE gfacID='$gfacID'";
[35]361 $result = mysqli_query( $gLink, $query );
[1]362
363 if ( ! $result )
[35]364 write_log( "$self: Query failed $query - " . mysqli_error( $gLink ) );
[1]365
366 update_queue_messages( $message );
367 update_db( $message );
368}
369
370function wait_data( $updatetime )
371{
372 global $self;
373 global $gLink;
374 global $gfacID;
[6]375 global $loghdr;
[1]376
377 $now = time();
378
379 if ( $updatetime + 3600 > $now ) // < Within the first hour
380 {
381 if ( ( $job_status = get_gfac_status( $gfacID ) ) === false )
382 $job_status = get_local_status( $gfacID );
383
384 if ( $job_status == 'GFAC_STATUS_UNAVAILABLE' )
385 return;
386
387 if ( $job_status != 'DATA' )
388 {
389 update_job_status( $job_status, $gfacID );
390 return;
391 }
392
393 // Request to resend data, but only request every 5 minutes
394 $minute = date( 'i' ) * 1; // Makes it an int
395 if ( $minute % 5 ) return;
396
397 $output_status = get_gfac_outputs( $gfacID );
398
399 if ( $output_status !== false )
400 mail_to_admin( "debug", "wait_data/$gfacID/$output_status" );
401
402 return;
403 }
404
405 $message = "Waiting for data longer than 1 hour";
406 write_log( "$self: $message - id: $gfacID" );
407 mail_to_admin( "hang", "$message - id: $gfacID" );
408 $query = "UPDATE analysis SET status='DATA_TIMEOUT' WHERE gfacID='$gfacID'";
[35]409 $result = mysqli_query( $gLink, $query );
[1]410
411 if ( ! $result )
[35]412 write_log( "$self: Query failed $query - " . mysqli_error( $gLink ) );
[1]413
414 update_queue_messages( $message );
415 update_db( $message );
416}
417
418function data_timeout( $updatetime )
419{
420 global $self;
421 global $gLink;
422 global $gfacID;
[6]423 global $loghdr;
[1]424
425 if ( ( $job_status = get_gfac_status( $gfacID ) ) === false )
426 $job_status = get_local_status( $gfacID );
427
428 if ( $job_status == 'GFAC_STATUS_UNAVAILABLE' )
429 return;
430
431 if ( $job_status != 'DATA' )
432 {
433 update_job_status( $job_status, $gfacID );
434 return;
435 }
436
437 $now = time();
438
439 if ( $updatetime + 86400 > $now ) // < 24 hours ago
440 {
441 // Request to resend data, but only request every 15 minutes
442 $minute = date( 'i' ) * 1; // Makes it an int
443 if ( $minute % 15 ) return;
444
445 $output_status = get_gfac_outputs( $gfacID );
446
447 if ( $output_status !== false )
448 mail_to_admin( "debug", "data_timeout/$gfacID/$output_status" );
449
450 return;
451 }
452
453 $message = "Waiting for data longer than 24 hours";
454 write_log( "$self: $message - id: $gfacID" );
455 mail_to_admin( "hang", "$message - id: $gfacID" );
456 $query = "UPDATE analysis SET status='FAILED' WHERE gfacID='$gfacID'";
[35]457 $result = mysqli_query( $gLink, $query );
[1]458
459 if ( ! $result )
[35]460 write_log( "$self: Query failed $query - " . mysqli_error( $gLink ) );
[1]461
462 update_queue_messages( $message );
463 update_db( $message );
464}
465
466function complete()
467{
468 // Just cleanup
469 cleanup();
470}
471
472function failed()
473{
474 // Just cleanup
475 cleanup();
476}
477
478function cleanup()
479{
480 global $self;
481 global $gLink;
482 global $gfacID;
483 global $us3_db;
[6]484 global $loghdr;
[26]485 global $class_dir;
[1]486
487 // Double check that the gfacID exists
488 $query = "SELECT count(*) FROM analysis WHERE gfacID='$gfacID'";
[35]489 $result = mysqli_query( $gLink, $query );
[1]490
491 if ( ! $result )
492 {
[35]493 write_log( "$self: Query failed $query - " . mysqli_error( $gLink ) );
494 mail_to_admin( "fail", "Query failed $query\n" . mysqli_error( $gLink ) );
[1]495 return;
496 }
497
[35]498 list( $count ) = mysqli_fetch_array( $result );
[1]499
[35]500//if ($count==0)
501//write_log( "$loghdr count = $count gfacID = $gfacID" );
[1]502 if ( $count == 0 ) return;
503
504 // Now check the us3 instance
505 $requestID = get_us3_data();
[6]506//write_log( "$loghdr requestID = $requestID gfacID = $gfacID" );
[1]507 if ( $requestID == 0 ) return;
508
[14]509 $me_devel = preg_match( "/class_devel/", $class_dir );
[25]510 $me_local = preg_match( "/class_local/", $class_dir );
[14]511
[10]512 if ( preg_match( "/US3-A/i", $gfacID ) )
[29]513 { // Airavata job: clean up if prod/devel match
[10]514 $job_devel = preg_match( "/US3-ADEV/i", $gfacID );
515 if ( ( !$me_devel && !$job_devel ) ||
516 ( $me_devel && $job_devel ) )
[25]517 { // Job is of same type (prod/devel) as Server: process it
[6]518//write_log( "$loghdr CALLING aira_cleanup()" );
[10]519 aira_cleanup( $us3_db, $requestID, $gLink );
520 }
[6]521//write_log( "$loghdr RTN FR aira_cleanup()" );
522 }
[29]523 else
524 { // Non-airavata job: clean up in a non-aira way
525write_log( "$loghdr calling gfac_cleanup() reqID=$requestID" );
[6]526 gfac_cleanup( $us3_db, $requestID, $gLink );
527 }
[1]528}
529
530// Function to update status of job
531function update_job_status( $job_status, $gfacID )
532{
533 global $gLink;
[6]534 global $query;
535 global $self;
536 global $loghdr;
[1]537
538 switch ( $job_status )
539 {
540 case 'SUBMITTED' :
541 case 'SUBMITED' :
542 case 'INITIALIZED' :
[25]543 case 'UPDATING' :
544 case 'PENDING' :
[1]545 $query = "UPDATE analysis SET status='SUBMITTED' WHERE gfacID='$gfacID'";
[25]546 $message = "Job status request reports job is SUBMITTED";
[1]547 break;
548
[6]549 case 'STARTED' :
550 case 'RUNNING' :
[1]551 case 'ACTIVE' :
552 $query = "UPDATE analysis SET status='RUNNING' WHERE gfacID='$gfacID'";
553 $message = "Job status request reports job is RUNNING";
554 break;
555
[28]556 case 'EXECUTING' :
557 $message = "Job status request reports job is EXECUTING";
558 break;
559
[6]560 case 'FINISHED' :
561 $query = "UPDATE analysis SET status='FINISHED' WHERE gfacID='$gfacID'";
562 $message = "NONE";
563 break;
564
565 case 'DONE' :
566 $query = "UPDATE analysis SET status='DONE' WHERE gfacID='$gfacID'";
567 $message = "NONE";
568 break;
569
[1]570 case 'COMPLETED' :
[6]571 case 'COMPLETE' :
[1]572 $query = "UPDATE analysis SET status='COMPLETE' WHERE gfacID='$gfacID'";
[6]573 $message = "Job status request reports job is COMPLETED";
[1]574 break;
575
[6]576 case 'DATA' :
[1]577 $query = "UPDATE analysis SET status='DATA' WHERE gfacID='$gfacID'";
578 $message = "Job status request reports job is COMPLETE, waiting for data";
579 break;
580
581 case 'CANCELED' :
[6]582 case 'CANCELLED' :
[1]583 $query = "UPDATE analysis SET status='CANCELED' WHERE gfacID='$gfacID'";
584 $message = "Job status request reports job is CANCELED";
585 break;
586
587 case 'FAILED' :
588 $query = "UPDATE analysis SET status='FAILED' WHERE gfacID='$gfacID'";
589 $message = "Job status request reports job is FAILED";
590 break;
591
592 case 'UNKNOWN' :
[6]593write_log( "$loghdr job_status='UNKNOWN', reset to 'ERROR' " );
594 $query = "UPDATE analysis SET status='ERROR' WHERE gfacID='$gfacID'";
[1]595 $message = "Job status request reports job is not in the queue";
596 break;
597
598 default :
[3]599 // We shouldn't ever get here
[6]600 $query = "UPDATE analysis SET status='ERROR' WHERE gfacID='$gfacID'";
[1]601 $message = "Job status was not recognized - $job_status";
[6]602 write_log( "$loghdr update_job_status: " .
[3]603 "Job status was not recognized - $job_status\n" .
604 "gfacID = $gfacID\n" );
[1]605 break;
606
607 }
608
[35]609 $result = mysqli_query( $gLink, $query );
[1]610 if ( ! $result )
[35]611 write_log( "$loghdr Query failed $query - " . mysqli_error( $gLink ) );
[1]612
[6]613 if ( $message != 'NONE' )
614 {
615 update_queue_messages( $message );
616 update_db( $message );
617 }
[1]618}
619
620function get_us3_data()
621{
622 global $self;
623 global $gfacID;
624 global $dbhost;
625 global $user;
626 global $passwd;
627 global $us3_db;
628 global $updateTime;
[6]629 global $loghdr;
[1]630
[35]631 $us3_link = mysqli_connect( $dbhost, $user, $passwd, $us3_db );
[1]632
633 if ( ! $us3_link )
634 {
[35]635 write_log( "$loghdr could not connect: $dbhost, $user, $passwd, $us3_db" );
636 mail_to_admin( "fail", "Could not connect to $dbhost : $us3_db" );
[1]637 return 0;
638 }
639
640 $query = "SELECT HPCAnalysisRequestID, UNIX_TIMESTAMP(updateTime) " .
641 "FROM HPCAnalysisResult WHERE gfacID='$gfacID'";
[35]642 $result = mysqli_query( $us3_link, $query );
[1]643
644 if ( ! $result )
645 {
[35]646 write_log( "$self: Query failed $query - " . mysqli_error( $us3_link ) );
647 mail_to_admin( "fail", "Query failed $query\n" . mysqli_error( $us3_link ) );
[1]648 return 0;
649 }
650
[35]651 list( $requestID, $updateTime ) = mysqli_fetch_array( $result );
652 mysqli_close( $us3_link );
[1]653
654 return $requestID;
655}
656
[6]657// Function to determine if this is a gfac job or not
[1]658function is_gfac_job( $gfacID )
659{
660 $hex = "[0-9a-fA-F]";
661 if ( ! preg_match( "/^US3-Experiment/i", $gfacID ) &&
662 ! preg_match( "/^US3-$hex{8}-$hex{4}-$hex{4}-$hex{4}-$hex{12}$/", $gfacID ) )
663 {
664 // Then it's not a GFAC job
665 return false;
666 }
667
668 return true;
669}
670
[6]671// Function to determine if this is an airavata/thrift job or not
672function is_aira_job( $gfacID )
673{
674 global $cluster;
675
[15]676 if ( preg_match( "/US3-A/i", $gfacID ) )
[6]677 {
678 // Then it's an Airavata/Thrift job
679 return true;
680 }
681
682 return false;
683}
684
[1]685// Function to get the current job status from GFAC
686function get_gfac_status( $gfacID )
687{
688 global $serviceURL;
[6]689 global $self;
690 global $loghdr;
691 global $cluster;
[18]692 global $status_ex, $status_gw;
[1]693
[6]694 if ( is_aira_job( $gfacID ) )
695 {
696 $status_ex = getExperimentStatus( $gfacID );
[17]697
698 if ( $status_ex == 'EXECUTING' )
699 {
[18]700 if ( $status_gw == 'RUNNING' )
[17]701 $status_ex = 'ACTIVE';
[22]702 else
703 $status_ex = 'QUEUED';
[17]704 }
705
[6]706 $gfac_status = standard_status( $status_ex );
707 return $gfac_status;
708 }
709
710 else if ( ! is_gfac_job( $gfacID ) )
[25]711 {
[1]712 return false;
[25]713 }
[1]714
715 $url = "$serviceURL/jobstatus/$gfacID";
716 try
717 {
718 $post = new HttpRequest( $url, HttpRequest::METH_GET );
719 $http = $post->send();
720 $xml = $post->getResponseBody();
721 }
722 catch ( HttpException $e )
723 {
[6]724 write_log( "$loghdr Status not available - marking failed - $gfacID" );
[1]725 return 'GFAC_STATUS_UNAVAILABLE';
726 }
727
728 // Parse the result
729 $gfac_status = parse_response( $xml );
730
[3]731 // This may not seem like the best place to do this, but here we have
732 // the xml straight from GFAC
733 $status_types = array('SUBMITTED',
734 'SUBMITED',
735 'INITIALIZED',
736 'PENDING',
[6]737 'RUNNING',
[3]738 'ACTIVE',
[6]739 'STARTED',
[3]740 'COMPLETED',
[6]741 'FINISHED',
[3]742 'DONE',
743 'DATA',
[6]744 'RESULTS_GEN',
[3]745 'CANCELED',
746 'CANCELLED',
747 'FAILED',
[6]748 'STAGING',
[3]749 'UNKNOWN');
750 if ( ! in_array( $gfac_status, $status_types ) )
751 mail_to_admin( 'debug', "gfacID: /$gfacID/\n" .
752 "XML: /$xml/\n" .
753 "Status: /$gfac_status/\n" );
754
[6]755 if ( in_array( $gfac_status, array( 'DONE', 'DATA', 'RESULTS_GEN' ) ) )
756 $gfac_status = 'DATA';
757
[1]758 return $gfac_status;
759}
760
761// Function to request data outputs from GFAC
762function get_gfac_outputs( $gfacID )
763{
764 global $serviceURL;
[6]765 global $self;
[1]766
767 // Make sure it's a GFAC job and status is appropriate for this call
768 if ( ( $job_status = get_gfac_status( $gfacID ) ) === false )
769 {
770 // Then it's not a GFAC job
[25]771 $job_status = get_local_status( $gfacID );
772 return $job_status;
[1]773 }
774
[6]775 if ( ! in_array( $job_status, array( 'DONE', 'FAILED', 'COMPLETE', 'FINISHED' ) ) )
[1]776 {
777 // Then it's not appropriate to request data
778 return false;
779 }
780
781 $url = "$serviceURL/registeroutput/$gfacID";
782 try
783 {
784 $post = new HttpRequest( $url, HttpRequest::METH_GET );
785 $http = $post->send();
786 $xml = $post->getResponseBody();
787 }
788 catch ( HttpException $e )
789 {
790 write_log( "$self: Data not available - request failed - $gfacID" );
791 return false;
792 }
793
794 mail_to_admin( "debug", "get_gfac_outputs/\n$xml/" ); // Temporary, to see what the xml looks like,
795 // if we ever get one
796
797 // Parse the result
798 $gfac_status = parse_response( $xml );
799
800 return $gfac_status;
801}
802
803function parse_response( $xml )
804{
805 global $gfac_message;
806
807 $status = "";
808 $gfac_message = "";
809
810 $parser = new XMLReader();
811 $parser->xml( $xml );
812
813 while( $parser->read() )
814 {
815 $type = $parser->nodeType;
816
817 if ( $type == XMLReader::ELEMENT )
818 $name = $parser->name;
819
820 else if ( $type == XMLReader::TEXT )
821 {
822 if ( $name == "status" )
823 $status = $parser->value;
824 else
825 $gfac_message = $parser->value;
826 }
827 }
828
829 $parser->close();
830 return $status;
831}
832
833// Function to get status from local cluster
834function get_local_status( $gfacID )
835{
836 global $cluster;
[6]837 global $self;
[1]838
[31]839 $is_jetstr = preg_match( "/jetstream/", $cluster );
840 if ( $is_jetstr )
[32]841 $cmd = "squeue -j $gfacID 2>&1|tail -n 1";
[31]842 else
843 $cmd = "/usr/bin/qstat -a $gfacID 2>&1|tail -n 1";
[28]844//write_log( "$self cmd: $cmd" );
845//write_log( "$self cluster: $cluster" );
846//write_log( "$self gfacID: $gfacID" );
[31]847
[25]848 if ( ! preg_match( "/us3iab/", $cluster ) )
849 {
850 $system = "$cluster.uthscsa.edu";
[31]851 if ( $is_jetstr )
852 $system = "$cluster";
[25]853 $system = preg_replace( "/\-local/", "", $system );
854 $cmd = "/usr/bin/ssh -x us3@$system " . $cmd;
[28]855//write_log( "$self cmd: $cmd" );
[25]856 }
[1]857
858 $result = exec( $cmd );
[28]859//write_log( "$self result: $result" );
[1]860
[31]861 $secwait = 2;
862 $num_try = 0;
863 // Sleep and retry up to 3 times if ssh has "ssh_exchange_identification" error
864 while ( preg_match( "/ssh_exchange_id/", $result ) && $num_try < 3 )
[1]865 {
[31]866 sleep( $secwait );
867 $num_try++;
868 $secwait *= 2;
869write_log( "$me: num_try=$num_try secwait=$secwait" );
870 }
[35]871
[32]872 if ( preg_match( "/^qstat: Unknown/", $result ) ||
[31]873 preg_match( "/ssh_exchange_id/", $result ) )
874 {
[35]875 write_log( "$self get_local_status: Local job $gfacID unknown result=$result" );
[1]876 return 'UNKNOWN';
877 }
878
879 $values = preg_split( "/\s+/", $result );
[32]880 $jstat = ( $is_jetstr == 0 ) ? $values[ 9 ] : $values[ 5 ];
[31]881//write_log( "$self: get_local_status: job status = /$jstat/");
882 switch ( $jstat )
[1]883 {
884 case "W" : // Waiting for execution time to be reached
885 case "E" : // Job is exiting after having run
886 case "R" : // Still running
[32]887 case "CG" : // Job is completing
[1]888 $status = 'ACTIVE';
889 break;
890
891 case "C" : // Job has completed
[32]892 case "ST" : // Job has disappeared
893 case "CD" : // Job has completed
[1]894 $status = 'COMPLETED';
895 break;
896
897 case "T" : // Job is being moved
898 case "H" : // Held
899 case "Q" : // Queued
[31]900 case "PD" : // Queued
[32]901 case "CF" : // Queued
[1]902 $status = 'SUBMITTED';
903 break;
904
[32]905 case "CA" : // Job has been canceled
906 $status = 'CANCELED';
907 break;
908
909 case "F" : // Job has failed
910 case "BF" : // Job has failed
911 case "NF" : // Job has failed
912 case "TO" : // Job has timed out
913 case "" : // Job has disappeared
914 $status = 'FAILED';
915 break;
916
[1]917 default :
918 $status = 'UNKNOWN'; // This should not occur
919 break;
920 }
921
922 return $status;
923}
924
925function update_queue_messages( $message )
926{
927 global $self;
928 global $gLink;
929 global $gfacID;
930
931 // Get analysis table ID
932 $query = "SELECT id FROM analysis " .
933 "WHERE gfacID = '$gfacID' ";
[35]934 $result = mysqli_query( $gLink, $query );
[1]935 if ( ! $result )
936 {
[35]937 write_log( "$self: Query failed $query - " . mysqli_error( $gLink ) );
[1]938 return;
939 }
[35]940 list( $analysisID ) = mysqli_fetch_array( $result );
[1]941
942 // Insert message into queue_message table
943 $query = "INSERT INTO queue_messages SET " .
[35]944 "message = '" . mysqli_real_escape_string( $gLink, $message ) . "', " .
[6]945 "analysisID = '$analysisID' ";
[35]946 $result = mysqli_query( $gLink, $query );
[1]947 if ( ! $result )
948 {
[35]949 write_log( "$self: Query failed $query - " . mysqli_error( $gLink ) );
[1]950 return;
951 }
952}
953
954function update_db( $message )
955{
956 global $self;
957 global $gfacID;
958 global $dbhost;
959 global $user;
960 global $passwd;
961 global $us3_db;
962
[35]963 $us3_link = mysqli_connect( $dbhost, $user, $passwd, $us3_db );
[1]964
965 if ( ! $us3_link )
966 {
967 write_log( "$self: could not connect: $dbhost, $user, $passwd" );
[35]968 mail_to_admin( "fail", "Could not connect to $dbhost : $us3_db" );
[1]969 return 0;
970 }
971
972 $query = "UPDATE HPCAnalysisResult SET " .
[35]973 "lastMessage='" . mysqli_real_escape_string( $us3_link, $message ) . "'" .
[1]974 "WHERE gfacID = '$gfacID' ";
975
[35]976 mysqli_query( $us3_link, $query );
977 mysqli_close( $us3_link );
[1]978}
979
980function mail_to_admin( $type, $msg )
981{
982 global $updateTime;
983 global $status;
984 global $cluster;
985 global $org_name;
986 global $admin_email;
987 global $dbhost;
988 global $requestID;
989
990 $headers = "From: $org_name Admin<$admin_email>" . "\n";
991 $headers .= "Cc: $org_name Admin<$admin_email>" . "\n";
[6]992 $headers .= "Bcc: Gary Gorbet<gegorbet@gmail.com>" . "\n"; // make sure
[1]993
994 // Set the reply address
995 $headers .= "Reply-To: $org_name<$admin_email>" . "\n";
996 $headers .= "Return-Path: $org_name<$admin_email>" . "\n";
997
998 // Try to avoid spam filters
999 $now = time();
1000 $headers .= "Message-ID: <" . $now . "gridctl@$dbhost>$requestID\n";
1001 $headers .= "X-Mailer: PHP v" . phpversion() . "\n";
1002 $headers .= "MIME-Version: 1.0" . "\n";
1003 $headers .= "Content-Transfer-Encoding: 8bit" . "\n";
1004
1005 $subject = "US3 Error Notification";
1006 $message = "
1007 UltraScan job error notification from gridctl.php:
1008
1009 Update Time : $updateTime
1010 GFAC Status : $status
1011 Cluster : $cluster
1012 ";
1013
1014 $message .= "Error Message : $msg\n";
1015
1016 mail( $admin_email, $subject, $message, $headers );
1017}
[6]1018
1019// Convert a status string to one of the standard DB status strings
1020function standard_status( $status_in )
1021{
1022 switch ( $status_in )
1023 { // Map variations to standard gateway status values
1024 case 'QUEUED' :
1025 case 'LAUNCHED' :
1026 case 'CREATED' :
1027 case 'VALIDATED' :
1028 case 'SCHEDULED' :
1029 case 'submitted' :
[28]1030 case 'SUBMITTED' :
[6]1031 case '' :
1032 $status = 'SUBMITTED';
1033 break;
1034
1035 case 'EXECUTING' :
1036 case 'ACTIVE' :
1037 case 'running' :
1038 case 'executing' :
1039 $status = 'RUNNING';
1040 break;
1041
1042 case 'PENDING' :
1043 case 'CANCELING' :
1044 $status = 'UPDATING';
1045 break;
1046
1047 case 'CANCELLED' :
1048 case 'canceled' :
1049 $status = 'CANCELED';
1050 break;
1051
[25]1052 $status = 'DATA';
1053 break;
1054
[6]1055 case 'COMPLETED' :
1056 case 'completed' :
1057 $status = 'COMPLETE';
1058 break;
1059
1060 case 'FAILED_DATA' :
1061 case 'SUBMIT_TIMEOUT' :
1062 case 'RUN_TIMEOUT' :
1063 case 'DATA_TIMEOUT' :
1064 $status = 'FAILED';
1065 break;
1066
1067 case 'COMPLETE' :
1068 $status = 'DONE';
1069 break;
1070
1071 case 'UNKNOWN' :
1072 $status = 'ERROR';
1073 break;
1074
1075 // Where already standard value, retain value
1076 case 'ERROR' :
1077 case 'RUNNING' :
1078 case 'SUBMITTED' :
1079 case 'UPDATING' :
1080 case 'CANCELED' :
1081 case 'DATA' :
1082 case 'FAILED' :
1083 case 'DONE' :
1084 case 'FINISHED' :
1085 default :
1086 $status = $status_in;
1087 break;
1088 }
1089
1090 return $status;
1091}
1092
1093function aira_status( $gfacID, $status_in )
1094{
1095 global $self;
1096 global $loghdr;
[26]1097 global $class_dir;
[6]1098//echo "a_st: st_in$status_in : $gfacID\n";
1099 //$status_gw = standard_status( $status_in );
1100 $status_gw = $status_in;
1101//echo "a_st: st_db=$status_gw\n";
1102 $status = $status_gw;
[10]1103 $me_devel = preg_match( "/class_devel/", $class_dir );
1104 $job_devel = preg_match( "/US3-ADEV/i", $gfacID );
1105 $devmatch = ( ( !$me_devel && !$job_devel ) ||
1106 ( $me_devel && $job_devel ) );
[6]1107
[10]1108 if ( preg_match( "/US3-A/i", $gfacID ) && $devmatch )
[25]1109 {
[31]1110//write_log( "$loghdr status_in=$status_in status=$status gfacID=$gfacID" );
[6]1111 $status_ex = getExperimentStatus( $gfacID );
[31]1112//write_log( "$loghdr status_ex=$status_ex" );
[6]1113
1114 if ( $status_ex == 'COMPLETED' )
1115 { // Experiment is COMPLETED: check for 'FINISHED' or 'DONE'
1116 if ( $status_gw == 'FINISHED' || $status_gw == 'DONE' )
1117 { // COMPLETED + FINISHED/DONE : gateway status is now COMPLETE
1118 $status = 'COMPLETE';
1119 }
1120
1121 else
1122 { // COMPLETED + NOT-FINISHED/DONE: gw status now DONE
1123 $status = 'DONE';
1124 }
1125 }
1126
1127 else if ( $status_gw == 'FINISHED' || $status_gw == 'DONE' )
1128 { // Gfac status == FINISHED/DONE: leave as is (unless FAILED)
1129 $status = $status_gw;
1130 if ( $status_ex == 'FAILED' )
1131 {
1132 sleep( 10 );
1133 $status_ex = getExperimentStatus( $gfacID );
1134 if ( $status_ex == 'FAILED' )
1135 {
1136 write_log( "$loghdr status still 'FAILED' after 10-second delay" );
1137 sleep( 10 );
1138 $status_ex = getExperimentStatus( $gfacID );
1139 if ( $status_ex == 'FAILED' )
1140 write_log( "$loghdr status still 'FAILED' after 20-second delay" );
1141 else
1142 write_log( "$loghdr status is $status_ex after 20-second delayed retry" );
1143 }
1144 write_log( "$loghdr status reset to 'COMPLETE'" );
1145 $status = 'COMPLETE';
1146 }
1147 }
1148
[28]1149 else if ( $status_ex == 'EXECUTING' )
1150 {
1151 $status = standard_status( $status_gw );
1152write_log( "$loghdr status/_in/_gw/_ex=$status/$status_in/$status_gw/$status_ex" );
1153 }
1154
[6]1155 else
1156 { // Experiment not COMPLETED/FINISHED/DONE: use experiment status
1157 $status = standard_status( $status_ex );
1158 }
1159
[35]1160//if ( $status != 'SUBMITTED' )
[10]1161//write_log( "$loghdr status/_in/_gw/_ex=$status/$status_in/$status_gw/$status_ex" );
[6]1162 if ( $status != $status_gw )
1163 {
1164 update_job_status( $status, $gfacID );
1165 }
1166 }
1167
1168 return $status;
1169}
1170
[1]1171?>
Note: See TracBrowser for help on using the repository browser.