source: trunk/gridctl.php@ 30

Last change on this file since 30 was 29, checked in by gegorbet, 8 years ago

gridctl refinement to handle *-local job cleanup

File size: 30.3 KB
RevLine 
[1]1<?php
2
[25]3$us3bin = exec( "ls -d ~us3/lims/bin" );
4include_once "$us3bin/listen-config.php";
5//include "$us3bin/cleanup_aira.php";
6//include "$us3bin/cleanup_gfac.php";
7
[1]8// Global variables
9$gfac_message = "";
10$updateTime = 0;
11$submittime = 0;
12$cluster = '';
13
[6]14//global $self;
[18]15global $status_ex, $status_gw;
[6]16
[1]17// Produce some output temporarily, so cron will send me message
18$now = time();
[6]19echo "Time started: " . date( 'Y-m-d H:i:s', $now ) . "\n";
[1]20
21// Get data from global GFAC DB
22$gLink = mysql_connect( $dbhost, $guser, $gpasswd );
23
24if ( ! mysql_select_db( $gDB, $gLink ) )
25{
[3]26 write_log( "$self: Could not select DB $gDB - " . mysql_error() );
[1]27 mail_to_admin( "fail", "Internal Error: Could not select DB $gDB" );
28 exit();
29}
30
31$query = "SELECT gfacID, us3_db, cluster, status, queue_msg, " .
32 "UNIX_TIMESTAMP(time), time from analysis";
33$result = mysql_query( $query, $gLink );
34
35if ( ! $result )
36{
37 write_log( "$self: Query failed $query - " . mysql_error( $gLink ) );
38 mail_to_admin( "fail", "Query failed $query\n" . mysql_error( $gLink ) );
39 exit();
40}
41
42if ( mysql_num_rows( $result ) == 0 )
[6]43{
44//write_log( "$self: analysis read got numrows==0" );
[1]45 exit(); // Nothing to do
[6]46}
[1]47
[14]48$me_devel = preg_match( "/class_devel/", $class_dir );
49
[1]50while ( list( $gfacID, $us3_db, $cluster, $status, $queue_msg, $time, $updateTime )
51 = mysql_fetch_array( $result ) )
52{
[14]53 // If this entry does not match class/class_devel, skip processing
54
55 if ( preg_match( "/US3-A/i", $gfacID ) )
56 { // For thrift, job and gridctl must match
57 $job_devel = preg_match( "/US3-ADEV/i", $gfacID );
58 if ( ( $me_devel && !$job_devel ) ||
59 ( !$me_devel && $job_devel ) )
[25]60 { // Job type and Airavata server mismatch: skip processing
[14]61 continue;
62 }
63 }
64
[28]65 else if ( $me_devel )
66 { // Local (us3iab/-local) and class_devel: skip processing
67 continue;
68 }
69
[1]70 // Checking we need to do for each entry
[6]71echo "us3db=$us3_db gfid=$gfacID\n";
72 switch ( $us3_db )
73 {
74 case 'Xuslims3_cauma3' :
75 case 'Xuslims3_cauma3d' :
76 case 'Xuslims3_HHU' :
77 case 'Xuslims3_Uni_KN' :
78 $serviceURL = "http://gridfarm005.ucs.indiana.edu:9090/ogce-rest/job";
79 break;
[1]80
[6]81 default :
82// $serviceURL = "http://gridfarm005.ucs.indiana.edu:8080/ogce-rest/job";
83 break;
84 }
85
[25]86// $awork = array();
87// $awork = explode( "-", $gfacID );
88// $gfacLabl = $awork[0] . "-" . $awork[1] . "-" . $awork[2];
89 $gfacLabl = $gfacID;
[6]90 $loghdr = $self . ":" . $gfacLabl . "...:";
[17]91 $status_ex = $status;
[6]92
93 // If entry is for Airvata/Thrift, get the true current status
94
95 if ( is_aira_job( $gfacID ) )
96 {
97 $status_in = $status;
[25]98//write_log( "$loghdr status_in=$status_in" );
[6]99 $status = aira_status( $gfacID, $status_in );
100if($status != $status_in )
101write_log( "$loghdr Set to $status from $status_in" );
102 }
[25]103 else if ( is_gfac_job( $gfacID ) )
[6]104 {
105 $status_gw = $status;
106 $status = get_gfac_status( $gfacID );
107 //if ( $status == 'FINISHED' )
108 if ( $status_gw == 'COMPLETE' )
109 $status = $status_gw;
[25]110//write_log( "$loghdr non-AThrift status=$status status_gw=$status_gw" );
[6]111 }
[25]112 else
113 {
114 $status_gw = $status;
115 $status = get_local_status( $gfacID );
116 if ( $status_gw == 'COMPLETE' || $status == 'UNKNOWN' )
117 $status = $status_gw;
118//write_log( "$loghdr Local status=$status status_gw=$status_gw" );
119 }
[6]120
[1]121 // Sometimes during testing, the us3_db entry is not set
122 // If $status == 'ERROR' then the condition has been processed before
123 if ( strlen( $us3_db ) == 0 && $status != 'ERROR' )
124 {
[6]125 write_log( "$loghdr GFAC DB is NULL - $gfacID" );
[1]126 mail_to_admin( "fail", "GFAC DB is NULL\n$gfacID" );
127
128 $query2 = "UPDATE analysis SET status='ERROR' WHERE gfacID='$gfacID'";
129 $result2 = mysql_query( $query2, $gLink );
130 $status = 'ERROR';
131
132 if ( ! $result2 )
[6]133 write_log( "$loghdr Query failed $query2 - " . mysql_error( $gLink ) );
[1]134
135 }
136
[6]137//echo " st=$status\n";
[1]138 switch ( $status )
139 {
140 // Already been handled
141 // Later update this condition to search for gfacID?
142 case "ERROR":
143 cleanup();
144 break;
145
146 case "SUBMITTED":
147 submitted( $time );
148 break;
149
150 case "SUBMIT_TIMEOUT":
151 submit_timeout( $time );
152 break;
153
154 case "RUNNING":
[6]155 case "STARTED":
156 case "STAGING":
157 case "ACTIVE":
[1]158 running( $time );
159 break;
160
161 case "RUN_TIMEOUT":
162 run_timeout($time );
163 break;
164
165 case "DATA":
[6]166 case "RESULTS_GEN":
[1]167 wait_data( $time );
168 break;
169
170 case "DATA_TIMEOUT":
171 data_timeout( $time );
172 break;
173
[6]174 case "COMPLETED":
[1]175 case "COMPLETE":
[25]176write_log( "$loghdr COMPLETE gfacID=$gfacID" );
[1]177 complete();
178 break;
179
180 case "CANCELLED":
181 case "CANCELED":
182 case "FAILED":
183 failed();
184 break;
185
[6]186 case "FINISHED":
187 case "DONE":
[26]188 if ( ! is_aira_job( $gfacID ) )
[17]189 {
[25]190 complete();
[17]191 }
[25]192write_log( "$loghdr FINISHED gfacID=$gfacID" );
[6]193 case "PROCESSING":
[1]194 default:
195 break;
196 }
197}
198
199exit();
200
201function submitted( $updatetime )
202{
203 global $self;
204 global $gLink;
205 global $gfacID;
[6]206 global $loghdr;
[1]207
208 $now = time();
209
210 if ( $updatetime + 600 > $now ) return; // < 10 minutes ago
211
212 if ( $updatetime + 86400 > $now ) // Within the first 24 hours
213 {
214 if ( ( $job_status = get_gfac_status( $gfacID ) ) === false )
215 $job_status = get_local_status( $gfacID );
216
217 if ( $job_status == 'GFAC_STATUS_UNAVAILABLE' )
218 return;
219
220 if ( ! in_array( $job_status, array( 'SUBMITTED', 'INITIALIZED', 'PENDING' ) ) )
[6]221 {
[25]222write_log( "$loghdr submitted:job_status=$job_status" );
[1]223 update_job_status( $job_status, $gfacID );
[6]224 }
[1]225
226 return;
227 }
228
229 $message = "Job listed submitted longer than 24 hours";
230 write_log( "$self: $message - id: $gfacID" );
231 mail_to_admin( "hang", "$message - id: $gfacID" );
232 $query = "UPDATE analysis SET status='SUBMIT_TIMEOUT' WHERE gfacID='$gfacID'";
233 $result = mysql_query( $query, $gLink );
234
235 if ( ! $result )
236 write_log( "$self: Query failed $query - " . mysql_error( $gLink ) );
237
238 update_queue_messages( $message );
239 update_db( $message );
240}
241
242function submit_timeout( $updatetime )
243{
244 global $self;
245 global $gLink;
246 global $gfacID;
[6]247 global $loghdr;
[1]248
249 if ( ( $job_status = get_gfac_status( $gfacID ) ) === false )
250 $job_status = get_local_status( $gfacID );
251
252 if ( $job_status == 'GFAC_STATUS_UNAVAILABLE' )
253 return;
254
255 if ( ! in_array( $job_status, array( 'SUBMITTED', 'INITIALIZED', 'PENDING' ) ) )
256 {
257 update_job_status( $job_status, $gfacID );
258 return;
259 }
260
261 $now = time();
262
263 if ( $updatetime + 86400 > $now ) return; // < 24 hours ago ( 48 total submitted )
264
265 $message = "Job listed submitted longer than 48 hours";
266 write_log( "$self: $message - id: $gfacID" );
267 mail_to_admin( "hang", "$message - id: $gfacID" );
268 $query = "UPDATE analysis SET status='FAILED' WHERE gfacID='$gfacID'";
269 $result = mysql_query( $query, $gLink );
270
271 if ( ! $result )
272 write_log( "$self: Query failed $query - " . mysql_error( $gLink ) );
273
274 update_queue_messages( $message );
275 update_db( $message );
276}
277
278function running( $updatetime )
279{
280 global $self;
281 global $gLink;
282 global $gfacID;
[6]283 global $loghdr;
[1]284
285 $now = time();
286
287 get_us3_data();
288
289 if ( $updatetime + 600 > $now ) return; // message received < 10 minutes ago
290
291 if ( $updatetime + 86400 > $now ) // Within the first 24 hours
292 {
293 if ( ( $job_status = get_gfac_status( $gfacID ) ) === false )
294 $job_status = get_local_status( $gfacID );
295
296 if ( $job_status == 'GFAC_STATUS_UNAVAILABLE' )
297 return;
298
[6]299 if ( ! in_array( $job_status, array( 'ACTIVE', 'RUNNING', 'STARTED' ) ) )
[1]300 update_job_status( $job_status, $gfacID );
301
302 return;
303 }
304
305 $message = "Job listed running longer than 24 hours";
306 write_log( "$self: $message - id: $gfacID" );
307 mail_to_admin( "hang", "$message - id: $gfacID" );
308 $query = "UPDATE analysis SET status='RUN_TIMEOUT' WHERE gfacID='$gfacID'";
309 $result = mysql_query( $query, $gLink );
310
311 if ( ! $result )
312 write_log( "$self: Query failed $query - " . mysql_error( $gLink ) );
313
314 update_queue_messages( $message );
315 update_db( $message );
316}
317
318function run_timeout( $updatetime )
319{
320 global $self;
321 global $gLink;
322 global $gfacID;
[6]323 global $loghdr;
[1]324
325 if ( ( $job_status = get_gfac_status( $gfacID ) ) === false )
326 $job_status = get_local_status( $gfacID );
327
328 if ( $job_status == 'GFAC_STATUS_UNAVAILABLE' )
329 return;
330
[6]331 if ( ! in_array( $job_status, array( 'ACTIVE', 'RUNNING', 'STARTED' ) ) )
[1]332 {
333 update_job_status( $job_status, $gfacID );
334 return;
335 }
336
337 $now = time();
338
339 get_us3_data();
340
341 if ( $updatetime + 172800 > $now ) return; // < 48 hours ago
342
343 $message = "Job listed running longer than 48 hours";
344 write_log( "$self: $message - id: $gfacID" );
345 mail_to_admin( "hang", "$message - id: $gfacID" );
346 $query = "UPDATE analysis SET status='FAILED' WHERE gfacID='$gfacID'";
347 $result = mysql_query( $query, $gLink );
348
349 if ( ! $result )
350 write_log( "$self: Query failed $query - " . mysql_error( $gLink ) );
351
352 update_queue_messages( $message );
353 update_db( $message );
354}
355
356function wait_data( $updatetime )
357{
358 global $self;
359 global $gLink;
360 global $gfacID;
[6]361 global $loghdr;
[1]362
363 $now = time();
364
365 if ( $updatetime + 3600 > $now ) // < Within the first hour
366 {
367 if ( ( $job_status = get_gfac_status( $gfacID ) ) === false )
368 $job_status = get_local_status( $gfacID );
369
370 if ( $job_status == 'GFAC_STATUS_UNAVAILABLE' )
371 return;
372
373 if ( $job_status != 'DATA' )
374 {
375 update_job_status( $job_status, $gfacID );
376 return;
377 }
378
379 // Request to resend data, but only request every 5 minutes
380 $minute = date( 'i' ) * 1; // Makes it an int
381 if ( $minute % 5 ) return;
382
383 $output_status = get_gfac_outputs( $gfacID );
384
385 if ( $output_status !== false )
386 mail_to_admin( "debug", "wait_data/$gfacID/$output_status" );
387
388 return;
389 }
390
391 $message = "Waiting for data longer than 1 hour";
392 write_log( "$self: $message - id: $gfacID" );
393 mail_to_admin( "hang", "$message - id: $gfacID" );
394 $query = "UPDATE analysis SET status='DATA_TIMEOUT' WHERE gfacID='$gfacID'";
395 $result = mysql_query( $query, $gLink );
396
397 if ( ! $result )
398 write_log( "$self: Query failed $query - " . mysql_error( $gLink ) );
399
400 update_queue_messages( $message );
401 update_db( $message );
402}
403
404function data_timeout( $updatetime )
405{
406 global $self;
407 global $gLink;
408 global $gfacID;
[6]409 global $loghdr;
[1]410
411 if ( ( $job_status = get_gfac_status( $gfacID ) ) === false )
412 $job_status = get_local_status( $gfacID );
413
414 if ( $job_status == 'GFAC_STATUS_UNAVAILABLE' )
415 return;
416
417 if ( $job_status != 'DATA' )
418 {
419 update_job_status( $job_status, $gfacID );
420 return;
421 }
422
423 $now = time();
424
425 if ( $updatetime + 86400 > $now ) // < 24 hours ago
426 {
427 // Request to resend data, but only request every 15 minutes
428 $minute = date( 'i' ) * 1; // Makes it an int
429 if ( $minute % 15 ) return;
430
431 $output_status = get_gfac_outputs( $gfacID );
432
433 if ( $output_status !== false )
434 mail_to_admin( "debug", "data_timeout/$gfacID/$output_status" );
435
436 return;
437 }
438
439 $message = "Waiting for data longer than 24 hours";
440 write_log( "$self: $message - id: $gfacID" );
441 mail_to_admin( "hang", "$message - id: $gfacID" );
442 $query = "UPDATE analysis SET status='FAILED' WHERE gfacID='$gfacID'";
443 $result = mysql_query( $query, $gLink );
444
445 if ( ! $result )
446 write_log( "$self: Query failed $query - " . mysql_error( $gLink ) );
447
448 update_queue_messages( $message );
449 update_db( $message );
450}
451
452function complete()
453{
454 // Just cleanup
455 cleanup();
456}
457
458function failed()
459{
460 // Just cleanup
461 cleanup();
462}
463
464function cleanup()
465{
466 global $self;
467 global $gLink;
468 global $gfacID;
469 global $us3_db;
[6]470 global $loghdr;
[26]471 global $class_dir;
[1]472
473 // Double check that the gfacID exists
474 $query = "SELECT count(*) FROM analysis WHERE gfacID='$gfacID'";
475 $result = mysql_query( $query, $gLink );
476
477 if ( ! $result )
478 {
479 write_log( "$self: Query failed $query - " . mysql_error( $gLink ) );
480 mail_to_admin( "fail", "Query failed $query\n" . mysql_error( $gLink ) );
481 return;
482 }
483
484 list( $count ) = mysql_fetch_array( $result );
485
[6]486if ($count==0)
487write_log( "$loghdr count = $count gfacID = $gfacID" );
[1]488 if ( $count == 0 ) return;
489
490 // Now check the us3 instance
491 $requestID = get_us3_data();
[6]492//write_log( "$loghdr requestID = $requestID gfacID = $gfacID" );
[1]493 if ( $requestID == 0 ) return;
494
[14]495 $me_devel = preg_match( "/class_devel/", $class_dir );
[25]496 $me_local = preg_match( "/class_local/", $class_dir );
[14]497
[10]498 if ( preg_match( "/US3-A/i", $gfacID ) )
[29]499 { // Airavata job: clean up if prod/devel match
[10]500 $job_devel = preg_match( "/US3-ADEV/i", $gfacID );
501 if ( ( !$me_devel && !$job_devel ) ||
502 ( $me_devel && $job_devel ) )
[25]503 { // Job is of same type (prod/devel) as Server: process it
[6]504//write_log( "$loghdr CALLING aira_cleanup()" );
[10]505 aira_cleanup( $us3_db, $requestID, $gLink );
506 }
[6]507//write_log( "$loghdr RTN FR aira_cleanup()" );
508 }
[29]509 else
510 { // Non-airavata job: clean up in a non-aira way
511write_log( "$loghdr calling gfac_cleanup() reqID=$requestID" );
[6]512 gfac_cleanup( $us3_db, $requestID, $gLink );
513 }
[1]514}
515
516// Function to update status of job
517function update_job_status( $job_status, $gfacID )
518{
519 global $gLink;
[6]520 global $query;
521 global $self;
522 global $loghdr;
[1]523
524 switch ( $job_status )
525 {
526 case 'SUBMITTED' :
527 case 'SUBMITED' :
528 case 'INITIALIZED' :
[25]529 case 'UPDATING' :
530 case 'PENDING' :
[1]531 $query = "UPDATE analysis SET status='SUBMITTED' WHERE gfacID='$gfacID'";
[25]532 $message = "Job status request reports job is SUBMITTED";
[1]533 break;
534
[6]535 case 'STARTED' :
536 case 'RUNNING' :
[1]537 case 'ACTIVE' :
538 $query = "UPDATE analysis SET status='RUNNING' WHERE gfacID='$gfacID'";
539 $message = "Job status request reports job is RUNNING";
540 break;
541
[28]542 case 'EXECUTING' :
543 $message = "Job status request reports job is EXECUTING";
544 break;
545
[6]546 case 'FINISHED' :
547 $query = "UPDATE analysis SET status='FINISHED' WHERE gfacID='$gfacID'";
548 $message = "NONE";
549 break;
550
551 case 'DONE' :
552 $query = "UPDATE analysis SET status='DONE' WHERE gfacID='$gfacID'";
553 $message = "NONE";
554 break;
555
[1]556 case 'COMPLETED' :
[6]557 case 'COMPLETE' :
[1]558 $query = "UPDATE analysis SET status='COMPLETE' WHERE gfacID='$gfacID'";
[6]559 $message = "Job status request reports job is COMPLETED";
[1]560 break;
561
[6]562 case 'DATA' :
[1]563 $query = "UPDATE analysis SET status='DATA' WHERE gfacID='$gfacID'";
564 $message = "Job status request reports job is COMPLETE, waiting for data";
565 break;
566
567 case 'CANCELED' :
[6]568 case 'CANCELLED' :
[1]569 $query = "UPDATE analysis SET status='CANCELED' WHERE gfacID='$gfacID'";
570 $message = "Job status request reports job is CANCELED";
571 break;
572
573 case 'FAILED' :
574 $query = "UPDATE analysis SET status='FAILED' WHERE gfacID='$gfacID'";
575 $message = "Job status request reports job is FAILED";
576 break;
577
578 case 'UNKNOWN' :
[6]579write_log( "$loghdr job_status='UNKNOWN', reset to 'ERROR' " );
580 $query = "UPDATE analysis SET status='ERROR' WHERE gfacID='$gfacID'";
[1]581 $message = "Job status request reports job is not in the queue";
582 break;
583
584 default :
[3]585 // We shouldn't ever get here
[6]586 $query = "UPDATE analysis SET status='ERROR' WHERE gfacID='$gfacID'";
[1]587 $message = "Job status was not recognized - $job_status";
[6]588 write_log( "$loghdr update_job_status: " .
[3]589 "Job status was not recognized - $job_status\n" .
590 "gfacID = $gfacID\n" );
[1]591 break;
592
593 }
594
595 $result = mysql_query( $query, $gLink );
596 if ( ! $result )
[6]597 write_log( "$loghdr Query failed $query - " . mysql_error( $gLink ) );
[1]598
[6]599 if ( $message != 'NONE' )
600 {
601 update_queue_messages( $message );
602 update_db( $message );
603 }
[1]604}
605
606function get_us3_data()
607{
608 global $self;
609 global $gfacID;
610 global $dbhost;
611 global $user;
612 global $passwd;
613 global $us3_db;
614 global $updateTime;
[6]615 global $loghdr;
[1]616
617 $us3_link = mysql_connect( $dbhost, $user, $passwd );
618
619 if ( ! $us3_link )
620 {
[6]621 write_log( "$loghdr could not connect: $dbhost, $user, $passwd" );
[1]622 mail_to_admin( "fail", "Could not connect to $dbhost" );
623 return 0;
624 }
625
626
627 $result = mysql_select_db( $us3_db, $us3_link );
628
629 if ( ! $result )
630 {
[6]631 write_log( "$loghdr could not select DB $us3_db" );
[1]632 mail_to_admin( "fail", "Could not select DB $us3_db, $dbhost, $user, $passwd" );
633 return 0;
634 }
635
636 $query = "SELECT HPCAnalysisRequestID, UNIX_TIMESTAMP(updateTime) " .
637 "FROM HPCAnalysisResult WHERE gfacID='$gfacID'";
638 $result = mysql_query( $query, $us3_link );
639
640 if ( ! $result )
641 {
642 write_log( "$self: Query failed $query - " . mysql_error( $us3_link ) );
643 mail_to_admin( "fail", "Query failed $query\n" . mysql_error( $us3_link ) );
644 return 0;
645 }
646
647 list( $requestID, $updateTime ) = mysql_fetch_array( $result );
648 mysql_close( $us3_link );
649
650 return $requestID;
651}
652
[6]653// Function to determine if this is a gfac job or not
[1]654function is_gfac_job( $gfacID )
655{
656 $hex = "[0-9a-fA-F]";
657 if ( ! preg_match( "/^US3-Experiment/i", $gfacID ) &&
658 ! preg_match( "/^US3-$hex{8}-$hex{4}-$hex{4}-$hex{4}-$hex{12}$/", $gfacID ) )
659 {
660 // Then it's not a GFAC job
661 return false;
662 }
663
664 return true;
665}
666
[6]667// Function to determine if this is an airavata/thrift job or not
668function is_aira_job( $gfacID )
669{
670 global $cluster;
671
[15]672 if ( preg_match( "/US3-A/i", $gfacID ) )
[6]673 {
674 // Then it's an Airavata/Thrift job
675 return true;
676 }
677
678 return false;
679}
680
[1]681// Function to get the current job status from GFAC
682function get_gfac_status( $gfacID )
683{
684 global $serviceURL;
[6]685 global $self;
686 global $loghdr;
687 global $cluster;
[18]688 global $status_ex, $status_gw;
[1]689
[6]690 if ( is_aira_job( $gfacID ) )
691 {
692 $status_ex = getExperimentStatus( $gfacID );
[17]693
694 if ( $status_ex == 'EXECUTING' )
695 {
[18]696 if ( $status_gw == 'RUNNING' )
[17]697 $status_ex = 'ACTIVE';
[22]698 else
699 $status_ex = 'QUEUED';
[17]700 }
701
[6]702 $gfac_status = standard_status( $status_ex );
703 return $gfac_status;
704 }
705
706 else if ( ! is_gfac_job( $gfacID ) )
[25]707 {
[1]708 return false;
[25]709 }
[1]710
711 $url = "$serviceURL/jobstatus/$gfacID";
712 try
713 {
714 $post = new HttpRequest( $url, HttpRequest::METH_GET );
715 $http = $post->send();
716 $xml = $post->getResponseBody();
717 }
718 catch ( HttpException $e )
719 {
[6]720 write_log( "$loghdr Status not available - marking failed - $gfacID" );
[1]721 return 'GFAC_STATUS_UNAVAILABLE';
722 }
723
724 // Parse the result
725 $gfac_status = parse_response( $xml );
726
[3]727 // This may not seem like the best place to do this, but here we have
728 // the xml straight from GFAC
729 $status_types = array('SUBMITTED',
730 'SUBMITED',
731 'INITIALIZED',
732 'PENDING',
[6]733 'RUNNING',
[3]734 'ACTIVE',
[6]735 'STARTED',
[3]736 'COMPLETED',
[6]737 'FINISHED',
[3]738 'DONE',
739 'DATA',
[6]740 'RESULTS_GEN',
[3]741 'CANCELED',
742 'CANCELLED',
743 'FAILED',
[6]744 'STAGING',
[3]745 'UNKNOWN');
746 if ( ! in_array( $gfac_status, $status_types ) )
747 mail_to_admin( 'debug', "gfacID: /$gfacID/\n" .
748 "XML: /$xml/\n" .
749 "Status: /$gfac_status/\n" );
750
[6]751 if ( in_array( $gfac_status, array( 'DONE', 'DATA', 'RESULTS_GEN' ) ) )
752 $gfac_status = 'DATA';
753
[1]754 return $gfac_status;
755}
756
757// Function to request data outputs from GFAC
758function get_gfac_outputs( $gfacID )
759{
760 global $serviceURL;
[6]761 global $self;
[1]762
763 // Make sure it's a GFAC job and status is appropriate for this call
764 if ( ( $job_status = get_gfac_status( $gfacID ) ) === false )
765 {
766 // Then it's not a GFAC job
[25]767 $job_status = get_local_status( $gfacID );
768 return $job_status;
[1]769 }
770
[6]771 if ( ! in_array( $job_status, array( 'DONE', 'FAILED', 'COMPLETE', 'FINISHED' ) ) )
[1]772 {
773 // Then it's not appropriate to request data
774 return false;
775 }
776
777 $url = "$serviceURL/registeroutput/$gfacID";
778 try
779 {
780 $post = new HttpRequest( $url, HttpRequest::METH_GET );
781 $http = $post->send();
782 $xml = $post->getResponseBody();
783 }
784 catch ( HttpException $e )
785 {
786 write_log( "$self: Data not available - request failed - $gfacID" );
787 return false;
788 }
789
790 mail_to_admin( "debug", "get_gfac_outputs/\n$xml/" ); // Temporary, to see what the xml looks like,
791 // if we ever get one
792
793 // Parse the result
794 $gfac_status = parse_response( $xml );
795
796 return $gfac_status;
797}
798
799function parse_response( $xml )
800{
801 global $gfac_message;
802
803 $status = "";
804 $gfac_message = "";
805
806 $parser = new XMLReader();
807 $parser->xml( $xml );
808
809 while( $parser->read() )
810 {
811 $type = $parser->nodeType;
812
813 if ( $type == XMLReader::ELEMENT )
814 $name = $parser->name;
815
816 else if ( $type == XMLReader::TEXT )
817 {
818 if ( $name == "status" )
819 $status = $parser->value;
820 else
821 $gfac_message = $parser->value;
822 }
823 }
824
825 $parser->close();
826 return $status;
827}
828
829// Function to get status from local cluster
830function get_local_status( $gfacID )
831{
832 global $cluster;
[6]833 global $self;
[1]834
[25]835 $cmd = "/usr/bin/qstat -a $gfacID 2>&1|tail -n 1";
[28]836//write_log( "$self cmd: $cmd" );
837//write_log( "$self cluster: $cluster" );
838//write_log( "$self gfacID: $gfacID" );
[25]839 if ( ! preg_match( "/us3iab/", $cluster ) )
840 {
841 $system = "$cluster.uthscsa.edu";
842 $system = preg_replace( "/\-local/", "", $system );
[28]843//write_log( "$self system: $system" );
[25]844 $cmd = "/usr/bin/ssh -x us3@$system " . $cmd;
[28]845//write_log( "$self cmd: $cmd" );
[25]846 }
[1]847
848 $result = exec( $cmd );
[28]849//write_log( "$self result: $result" );
[1]850
851 if ( $result == "" || preg_match( "/^qstat: Unknown/", $result ) )
852 {
853 write_log( "$self get_local_status: Local job $gfacID unknown" );
[28]854//write_log( "$self get_local_status: result=$result" );
[1]855 return 'UNKNOWN';
856 }
857
858 $values = preg_split( "/\s+/", $result );
[25]859//write_log( "$self: get_local_status: job status = /{$values[9]}/");
[1]860 switch ( $values[ 9 ] )
861 {
862 case "W" : // Waiting for execution time to be reached
863 case "E" : // Job is exiting after having run
864 case "R" : // Still running
865 $status = 'ACTIVE';
866 break;
867
868 case "C" : // Job has completed
869 $status = 'COMPLETED';
870 break;
871
872 case "T" : // Job is being moved
873 case "H" : // Held
874 case "Q" : // Queued
875 $status = 'SUBMITTED';
876 break;
877
878 default :
879 $status = 'UNKNOWN'; // This should not occur
880 break;
881 }
882
883 return $status;
884}
885
886function update_queue_messages( $message )
887{
888 global $self;
889 global $gLink;
890 global $gfacID;
891
892 // Get analysis table ID
893 $query = "SELECT id FROM analysis " .
894 "WHERE gfacID = '$gfacID' ";
895 $result = mysql_query( $query, $gLink );
896 if ( ! $result )
897 {
898 write_log( "$self: Query failed $query - " . mysql_error( $gLink ) );
899 return;
900 }
901 list( $analysisID ) = mysql_fetch_array( $result );
902
903 // Insert message into queue_message table
904 $query = "INSERT INTO queue_messages SET " .
[6]905 "message = '" . mysql_real_escape_string( $message, $gLink ) . "', " .
906 "analysisID = '$analysisID' ";
[1]907 $result = mysql_query( $query, $gLink );
908 if ( ! $result )
909 {
910 write_log( "$self: Query failed $query - " . mysql_error( $gLink ) );
911 return;
912 }
913}
914
915function update_db( $message )
916{
917 global $self;
918 global $gfacID;
919 global $dbhost;
920 global $user;
921 global $passwd;
922 global $us3_db;
923
924 $us3_link = mysql_connect( $dbhost, $user, $passwd );
925
926 if ( ! $us3_link )
927 {
928 write_log( "$self: could not connect: $dbhost, $user, $passwd" );
929 mail_to_admin( "fail", "Could not connect to $dbhost" );
930 return 0;
931 }
932
933
934 $result = mysql_select_db( $us3_db, $us3_link );
935
936 if ( ! $result )
937 {
938 write_log( "$self: could not select DB $us3_db" );
939 mail_to_admin( "fail", "Could not select DB $us3_db, $dbhost, $user, $passwd" );
940 return 0;
941 }
942
943 $query = "UPDATE HPCAnalysisResult SET " .
944 "lastMessage='" . mysql_real_escape_string( $message, $us3_link ) . "'" .
945 "WHERE gfacID = '$gfacID' ";
946
947 mysql_query( $query, $us3_link );
948 mysql_close( $us3_link );
949}
950
951function mail_to_admin( $type, $msg )
952{
953 global $updateTime;
954 global $status;
955 global $cluster;
956 global $org_name;
957 global $admin_email;
958 global $dbhost;
959 global $requestID;
960
961 $headers = "From: $org_name Admin<$admin_email>" . "\n";
962 $headers .= "Cc: $org_name Admin<$admin_email>" . "\n";
[6]963 $headers .= "Bcc: Gary Gorbet<gegorbet@gmail.com>" . "\n"; // make sure
[1]964
965 // Set the reply address
966 $headers .= "Reply-To: $org_name<$admin_email>" . "\n";
967 $headers .= "Return-Path: $org_name<$admin_email>" . "\n";
968
969 // Try to avoid spam filters
970 $now = time();
971 $headers .= "Message-ID: <" . $now . "gridctl@$dbhost>$requestID\n";
972 $headers .= "X-Mailer: PHP v" . phpversion() . "\n";
973 $headers .= "MIME-Version: 1.0" . "\n";
974 $headers .= "Content-Transfer-Encoding: 8bit" . "\n";
975
976 $subject = "US3 Error Notification";
977 $message = "
978 UltraScan job error notification from gridctl.php:
979
980 Update Time : $updateTime
981 GFAC Status : $status
982 Cluster : $cluster
983 ";
984
985 $message .= "Error Message : $msg\n";
986
987 mail( $admin_email, $subject, $message, $headers );
988}
[6]989
990// Convert a status string to one of the standard DB status strings
991function standard_status( $status_in )
992{
993 switch ( $status_in )
994 { // Map variations to standard gateway status values
995 case 'QUEUED' :
996 case 'LAUNCHED' :
997 case 'CREATED' :
998 case 'VALIDATED' :
999 case 'SCHEDULED' :
1000 case 'submitted' :
[28]1001 case 'SUBMITTED' :
[6]1002 case '' :
1003 $status = 'SUBMITTED';
1004 break;
1005
1006 case 'EXECUTING' :
1007 case 'ACTIVE' :
1008 case 'running' :
1009 case 'executing' :
1010 $status = 'RUNNING';
1011 break;
1012
1013 case 'PENDING' :
1014 case 'CANCELING' :
1015 $status = 'UPDATING';
1016 break;
1017
1018 case 'CANCELLED' :
1019 case 'canceled' :
1020 $status = 'CANCELED';
1021 break;
1022
[25]1023 $status = 'DATA';
1024 break;
1025
[6]1026 case 'COMPLETED' :
1027 case 'completed' :
1028 $status = 'COMPLETE';
1029 break;
1030
1031 case 'FAILED_DATA' :
1032 case 'SUBMIT_TIMEOUT' :
1033 case 'RUN_TIMEOUT' :
1034 case 'DATA_TIMEOUT' :
1035 $status = 'FAILED';
1036 break;
1037
1038 case 'COMPLETE' :
1039 $status = 'DONE';
1040 break;
1041
1042 case 'UNKNOWN' :
1043 $status = 'ERROR';
1044 break;
1045
1046 // Where already standard value, retain value
1047 case 'ERROR' :
1048 case 'RUNNING' :
1049 case 'SUBMITTED' :
1050 case 'UPDATING' :
1051 case 'CANCELED' :
1052 case 'DATA' :
1053 case 'FAILED' :
1054 case 'DONE' :
1055 case 'FINISHED' :
1056 default :
1057 $status = $status_in;
1058 break;
1059 }
1060
1061 return $status;
1062}
1063
1064function aira_status( $gfacID, $status_in )
1065{
1066 global $self;
1067 global $loghdr;
[26]1068 global $class_dir;
[6]1069//echo "a_st: st_in$status_in : $gfacID\n";
1070 //$status_gw = standard_status( $status_in );
1071 $status_gw = $status_in;
1072//echo "a_st: st_db=$status_gw\n";
1073 $status = $status_gw;
[10]1074 $me_devel = preg_match( "/class_devel/", $class_dir );
1075 $job_devel = preg_match( "/US3-ADEV/i", $gfacID );
1076 $devmatch = ( ( !$me_devel && !$job_devel ) ||
1077 ( $me_devel && $job_devel ) );
[6]1078
[10]1079 if ( preg_match( "/US3-A/i", $gfacID ) && $devmatch )
[25]1080 {
[6]1081 $status_ex = getExperimentStatus( $gfacID );
1082
1083 if ( $status_ex == 'COMPLETED' )
1084 { // Experiment is COMPLETED: check for 'FINISHED' or 'DONE'
1085 if ( $status_gw == 'FINISHED' || $status_gw == 'DONE' )
1086 { // COMPLETED + FINISHED/DONE : gateway status is now COMPLETE
1087 $status = 'COMPLETE';
1088 }
1089
1090 else
1091 { // COMPLETED + NOT-FINISHED/DONE: gw status now DONE
1092 $status = 'DONE';
1093 }
1094 }
1095
1096 else if ( $status_gw == 'FINISHED' || $status_gw == 'DONE' )
1097 { // Gfac status == FINISHED/DONE: leave as is (unless FAILED)
1098 $status = $status_gw;
1099 if ( $status_ex == 'FAILED' )
1100 {
1101 sleep( 10 );
1102 $status_ex = getExperimentStatus( $gfacID );
1103 if ( $status_ex == 'FAILED' )
1104 {
1105 write_log( "$loghdr status still 'FAILED' after 10-second delay" );
1106 sleep( 10 );
1107 $status_ex = getExperimentStatus( $gfacID );
1108 if ( $status_ex == 'FAILED' )
1109 write_log( "$loghdr status still 'FAILED' after 20-second delay" );
1110 else
1111 write_log( "$loghdr status is $status_ex after 20-second delayed retry" );
1112 }
1113 write_log( "$loghdr status reset to 'COMPLETE'" );
1114 $status = 'COMPLETE';
1115 }
1116 }
1117
[28]1118 else if ( $status_ex == 'EXECUTING' )
1119 {
1120 $status = standard_status( $status_gw );
1121write_log( "$loghdr status/_in/_gw/_ex=$status/$status_in/$status_gw/$status_ex" );
1122 }
1123
[6]1124 else
1125 { // Experiment not COMPLETED/FINISHED/DONE: use experiment status
1126 $status = standard_status( $status_ex );
1127 }
1128
[10]1129//write_log( "$loghdr status/_in/_gw/_ex=$status/$status_in/$status_gw/$status_ex" );
[6]1130 if ( $status != $status_gw )
1131 {
1132 update_job_status( $status, $gfacID );
1133 }
1134 }
1135
1136 return $status;
1137}
1138
[1]1139?>
Note: See TracBrowser for help on using the repository browser.