source: trunk/gridctl.php@ 14

Last change on this file since 14 was 14, checked in by gegorbet, 9 years ago

fix to skip all processing where task/job devel/prod do not match

File size: 29.6 KB
RevLine 
[1]1<?php
2
3// Global variables
4$gfac_message = "";
5$updateTime = 0;
6$submittime = 0;
7$cluster = '';
8
[6]9//global $self;
10
[1]11// Produce some output temporarily, so cron will send me message
12$now = time();
[6]13echo "Time started: " . date( 'Y-m-d H:i:s', $now ) . "\n";
[1]14
15// Get data from global GFAC DB
16$gLink = mysql_connect( $dbhost, $guser, $gpasswd );
17
18if ( ! mysql_select_db( $gDB, $gLink ) )
19{
[3]20 write_log( "$self: Could not select DB $gDB - " . mysql_error() );
[1]21 mail_to_admin( "fail", "Internal Error: Could not select DB $gDB" );
22 exit();
23}
24
25$query = "SELECT gfacID, us3_db, cluster, status, queue_msg, " .
26 "UNIX_TIMESTAMP(time), time from analysis";
27$result = mysql_query( $query, $gLink );
28
29if ( ! $result )
30{
31 write_log( "$self: Query failed $query - " . mysql_error( $gLink ) );
32 mail_to_admin( "fail", "Query failed $query\n" . mysql_error( $gLink ) );
33 exit();
34}
35
36if ( mysql_num_rows( $result ) == 0 )
[6]37{
38//write_log( "$self: analysis read got numrows==0" );
[1]39 exit(); // Nothing to do
[6]40}
[1]41
[14]42$me_devel = preg_match( "/class_devel/", $class_dir );
43
[1]44while ( list( $gfacID, $us3_db, $cluster, $status, $queue_msg, $time, $updateTime )
45 = mysql_fetch_array( $result ) )
46{
[14]47 // If this entry does not match class/class_devel, skip processing
48
49 if ( preg_match( "/US3-A/i", $gfacID ) )
50 { // For thrift, job and gridctl must match
51 $job_devel = preg_match( "/US3-ADEV/i", $gfacID );
52 if ( ( $me_devel && !$job_devel ) ||
53 ( !$me_devel && $job_devel ) )
54 { // If job not from appropriate Airavata server, skip processing
55 continue;
56 }
57 }
58 else if ( $me_devel )
59 { // For non-thrift and development, skip
60 continue;
61 }
62
[1]63 // Checking we need to do for each entry
[6]64echo "us3db=$us3_db gfid=$gfacID\n";
65 switch ( $us3_db )
66 {
67 case 'Xuslims3_cauma3' :
68 case 'Xuslims3_cauma3d' :
69 case 'Xuslims3_HHU' :
70 case 'Xuslims3_Uni_KN' :
71 $serviceURL = "http://gridfarm005.ucs.indiana.edu:9090/ogce-rest/job";
72 break;
[1]73
[6]74 default :
75// $serviceURL = "http://gridfarm005.ucs.indiana.edu:8080/ogce-rest/job";
76 break;
77 }
78
79 $awork = array();
80 $awork = explode( "-", $gfacID );
81 $gfacLabl = $awork[0] . "-" . $awork[1] . "-" . $awork[2];
82 $loghdr = $self . ":" . $gfacLabl . "...:";
[14]83 $status_in = $status;
84 $status_gw = $status;
[6]85
86 // If entry is for Airvata/Thrift, get the true current status
87
88 if ( is_aira_job( $gfacID ) )
89 {
90 $status_in = $status;
91 $status = aira_status( $gfacID, $status_in );
92if($status != $status_in )
93write_log( "$loghdr Set to $status from $status_in" );
94 }
95 else
96 {
97 $status_gw = $status;
98 $status = get_gfac_status( $gfacID );
99 //if ( $status == 'FINISHED' )
100 if ( $status_gw == 'COMPLETE' )
101 $status = $status_gw;
102write_log( "$loghdr non-AThrift status=$status status_gw=$status_gw" );
103 }
104
[1]105 // Sometimes during testing, the us3_db entry is not set
106 // If $status == 'ERROR' then the condition has been processed before
107 if ( strlen( $us3_db ) == 0 && $status != 'ERROR' )
108 {
[6]109 write_log( "$loghdr GFAC DB is NULL - $gfacID" );
[1]110 mail_to_admin( "fail", "GFAC DB is NULL\n$gfacID" );
111
112 $query2 = "UPDATE analysis SET status='ERROR' WHERE gfacID='$gfacID'";
113 $result2 = mysql_query( $query2, $gLink );
114 $status = 'ERROR';
115
116 if ( ! $result2 )
[6]117 write_log( "$loghdr Query failed $query2 - " . mysql_error( $gLink ) );
[1]118
119 }
120
[6]121//echo " st=$status\n";
[1]122 switch ( $status )
123 {
124 // Already been handled
125 // Later update this condition to search for gfacID?
126 case "ERROR":
127 cleanup();
128 break;
129
130 case "SUBMITTED":
131 submitted( $time );
132 break;
133
134 case "SUBMIT_TIMEOUT":
135 submit_timeout( $time );
136 break;
137
138 case "RUNNING":
[6]139 case "STARTED":
140 case "STAGING":
141 case "ACTIVE":
[1]142 running( $time );
143 break;
144
145 case "RUN_TIMEOUT":
146 run_timeout($time );
147 break;
148
149 case "DATA":
[6]150 case "RESULTS_GEN":
[1]151 wait_data( $time );
152 break;
153
154 case "DATA_TIMEOUT":
155 data_timeout( $time );
156 break;
157
[6]158 case "COMPLETED":
[1]159 case "COMPLETE":
160 complete();
161 break;
162
163 case "CANCELLED":
164 case "CANCELED":
165 case "FAILED":
166 failed();
167 break;
168
[6]169 case "FINISHED":
170 case "DONE":
[10]171// if ( is_aira_job( $gfacID ) )
172// {
173// complete();
174// }
[6]175 case "PROCESSING":
[1]176 default:
177 break;
178 }
179}
180
181exit();
182
183function submitted( $updatetime )
184{
185 global $self;
186 global $gLink;
187 global $gfacID;
[6]188 global $loghdr;
[1]189
190 $now = time();
191
192 if ( $updatetime + 600 > $now ) return; // < 10 minutes ago
193
194 if ( $updatetime + 86400 > $now ) // Within the first 24 hours
195 {
196 if ( ( $job_status = get_gfac_status( $gfacID ) ) === false )
197 $job_status = get_local_status( $gfacID );
198
199 if ( $job_status == 'GFAC_STATUS_UNAVAILABLE' )
200 return;
201
202 if ( ! in_array( $job_status, array( 'SUBMITTED', 'INITIALIZED', 'PENDING' ) ) )
[6]203 {
[14]204//write_log( "$loghdr submitted:job_status=$job_status" );
[1]205 update_job_status( $job_status, $gfacID );
[6]206 }
[1]207
208 return;
209 }
210
211 $message = "Job listed submitted longer than 24 hours";
212 write_log( "$self: $message - id: $gfacID" );
213 mail_to_admin( "hang", "$message - id: $gfacID" );
214 $query = "UPDATE analysis SET status='SUBMIT_TIMEOUT' WHERE gfacID='$gfacID'";
215 $result = mysql_query( $query, $gLink );
216
217 if ( ! $result )
218 write_log( "$self: Query failed $query - " . mysql_error( $gLink ) );
219
220 update_queue_messages( $message );
221 update_db( $message );
222}
223
224function submit_timeout( $updatetime )
225{
226 global $self;
227 global $gLink;
228 global $gfacID;
[6]229 global $loghdr;
[1]230
231 if ( ( $job_status = get_gfac_status( $gfacID ) ) === false )
232 $job_status = get_local_status( $gfacID );
233
234 if ( $job_status == 'GFAC_STATUS_UNAVAILABLE' )
235 return;
236
237 if ( ! in_array( $job_status, array( 'SUBMITTED', 'INITIALIZED', 'PENDING' ) ) )
238 {
[14]239//write_log( "$loghdr submit_timeout:job_status=$job_status" );
[1]240 update_job_status( $job_status, $gfacID );
241 return;
242 }
243
244 $now = time();
245
246 if ( $updatetime + 86400 > $now ) return; // < 24 hours ago ( 48 total submitted )
247
248 $message = "Job listed submitted longer than 48 hours";
249 write_log( "$self: $message - id: $gfacID" );
250 mail_to_admin( "hang", "$message - id: $gfacID" );
251 $query = "UPDATE analysis SET status='FAILED' WHERE gfacID='$gfacID'";
252 $result = mysql_query( $query, $gLink );
253
254 if ( ! $result )
255 write_log( "$self: Query failed $query - " . mysql_error( $gLink ) );
256
257 update_queue_messages( $message );
258 update_db( $message );
259}
260
261function running( $updatetime )
262{
263 global $self;
264 global $gLink;
265 global $gfacID;
[6]266 global $loghdr;
[1]267
268 $now = time();
269
270 get_us3_data();
271
272 if ( $updatetime + 600 > $now ) return; // message received < 10 minutes ago
273
274 if ( $updatetime + 86400 > $now ) // Within the first 24 hours
275 {
276 if ( ( $job_status = get_gfac_status( $gfacID ) ) === false )
277 $job_status = get_local_status( $gfacID );
278
279 if ( $job_status == 'GFAC_STATUS_UNAVAILABLE' )
280 return;
281
[6]282 if ( ! in_array( $job_status, array( 'ACTIVE', 'RUNNING', 'STARTED' ) ) )
[14]283 {
284//write_log( "$loghdr running:job_status=$job_status" );
[1]285 update_job_status( $job_status, $gfacID );
[14]286 }
[1]287
288 return;
289 }
290
291 $message = "Job listed running longer than 24 hours";
292 write_log( "$self: $message - id: $gfacID" );
293 mail_to_admin( "hang", "$message - id: $gfacID" );
294 $query = "UPDATE analysis SET status='RUN_TIMEOUT' WHERE gfacID='$gfacID'";
295 $result = mysql_query( $query, $gLink );
296
297 if ( ! $result )
298 write_log( "$self: Query failed $query - " . mysql_error( $gLink ) );
299
300 update_queue_messages( $message );
301 update_db( $message );
302}
303
304function run_timeout( $updatetime )
305{
306 global $self;
307 global $gLink;
308 global $gfacID;
[6]309 global $loghdr;
[1]310
311 if ( ( $job_status = get_gfac_status( $gfacID ) ) === false )
312 $job_status = get_local_status( $gfacID );
313
314 if ( $job_status == 'GFAC_STATUS_UNAVAILABLE' )
315 return;
316
[6]317 if ( ! in_array( $job_status, array( 'ACTIVE', 'RUNNING', 'STARTED' ) ) )
[1]318 {
[14]319//write_log( "$loghdr run_timeout:job_status=$job_status" );
[1]320 update_job_status( $job_status, $gfacID );
321 return;
322 }
323
324 $now = time();
325
326 get_us3_data();
327
328 if ( $updatetime + 172800 > $now ) return; // < 48 hours ago
329
330 $message = "Job listed running longer than 48 hours";
331 write_log( "$self: $message - id: $gfacID" );
332 mail_to_admin( "hang", "$message - id: $gfacID" );
333 $query = "UPDATE analysis SET status='FAILED' WHERE gfacID='$gfacID'";
334 $result = mysql_query( $query, $gLink );
335
336 if ( ! $result )
337 write_log( "$self: Query failed $query - " . mysql_error( $gLink ) );
338
339 update_queue_messages( $message );
340 update_db( $message );
341}
342
343function wait_data( $updatetime )
344{
345 global $self;
346 global $gLink;
347 global $gfacID;
[6]348 global $loghdr;
[1]349
350 $now = time();
351
352 if ( $updatetime + 3600 > $now ) // < Within the first hour
353 {
354 if ( ( $job_status = get_gfac_status( $gfacID ) ) === false )
355 $job_status = get_local_status( $gfacID );
356
357 if ( $job_status == 'GFAC_STATUS_UNAVAILABLE' )
358 return;
359
360 if ( $job_status != 'DATA' )
361 {
[14]362//write_log( "$loghdr wait_data:job_status=$job_status" );
[1]363 update_job_status( $job_status, $gfacID );
364 return;
365 }
366
367 // Request to resend data, but only request every 5 minutes
368 $minute = date( 'i' ) * 1; // Makes it an int
369 if ( $minute % 5 ) return;
370
371 $output_status = get_gfac_outputs( $gfacID );
372
373 if ( $output_status !== false )
374 mail_to_admin( "debug", "wait_data/$gfacID/$output_status" );
375
376 return;
377 }
378
379 $message = "Waiting for data longer than 1 hour";
380 write_log( "$self: $message - id: $gfacID" );
381 mail_to_admin( "hang", "$message - id: $gfacID" );
382 $query = "UPDATE analysis SET status='DATA_TIMEOUT' WHERE gfacID='$gfacID'";
383 $result = mysql_query( $query, $gLink );
384
385 if ( ! $result )
386 write_log( "$self: Query failed $query - " . mysql_error( $gLink ) );
387
388 update_queue_messages( $message );
389 update_db( $message );
390}
391
392function data_timeout( $updatetime )
393{
394 global $self;
395 global $gLink;
396 global $gfacID;
[6]397 global $loghdr;
[1]398
399 if ( ( $job_status = get_gfac_status( $gfacID ) ) === false )
400 $job_status = get_local_status( $gfacID );
401
402 if ( $job_status == 'GFAC_STATUS_UNAVAILABLE' )
403 return;
404
405 if ( $job_status != 'DATA' )
406 {
[14]407//write_log( "$loghdr data_timeout:job_status=$job_status" );
[1]408 update_job_status( $job_status, $gfacID );
409 return;
410 }
411
412 $now = time();
413
414 if ( $updatetime + 86400 > $now ) // < 24 hours ago
415 {
416 // Request to resend data, but only request every 15 minutes
417 $minute = date( 'i' ) * 1; // Makes it an int
418 if ( $minute % 15 ) return;
419
420 $output_status = get_gfac_outputs( $gfacID );
421
422 if ( $output_status !== false )
423 mail_to_admin( "debug", "data_timeout/$gfacID/$output_status" );
424
425 return;
426 }
427
428 $message = "Waiting for data longer than 24 hours";
429 write_log( "$self: $message - id: $gfacID" );
430 mail_to_admin( "hang", "$message - id: $gfacID" );
431 $query = "UPDATE analysis SET status='FAILED' WHERE gfacID='$gfacID'";
432 $result = mysql_query( $query, $gLink );
433
434 if ( ! $result )
435 write_log( "$self: Query failed $query - " . mysql_error( $gLink ) );
436
437 update_queue_messages( $message );
438 update_db( $message );
439}
440
441function complete()
442{
443 // Just cleanup
444 cleanup();
445}
446
447function failed()
448{
449 // Just cleanup
450 cleanup();
451}
452
453function cleanup()
454{
455 global $self;
456 global $gLink;
457 global $gfacID;
458 global $us3_db;
[6]459 global $loghdr;
[10]460 global $class_dir;
[1]461
462 // Double check that the gfacID exists
463 $query = "SELECT count(*) FROM analysis WHERE gfacID='$gfacID'";
464 $result = mysql_query( $query, $gLink );
465
466 if ( ! $result )
467 {
468 write_log( "$self: Query failed $query - " . mysql_error( $gLink ) );
469 mail_to_admin( "fail", "Query failed $query\n" . mysql_error( $gLink ) );
470 return;
471 }
472
473 list( $count ) = mysql_fetch_array( $result );
474
[6]475if ($count==0)
476write_log( "$loghdr count = $count gfacID = $gfacID" );
[1]477 if ( $count == 0 ) return;
478
479 // Now check the us3 instance
480 $requestID = get_us3_data();
[6]481//write_log( "$loghdr requestID = $requestID gfacID = $gfacID" );
[1]482 if ( $requestID == 0 ) return;
483
[14]484 $me_devel = preg_match( "/class_devel/", $class_dir );
485
[10]486 if ( preg_match( "/US3-A/i", $gfacID ) )
[6]487 {
[10]488 $job_devel = preg_match( "/US3-ADEV/i", $gfacID );
489 if ( ( !$me_devel && !$job_devel ) ||
490 ( $me_devel && $job_devel ) )
491 { // If job from appropriate Airavata server, process it
[6]492//write_log( "$loghdr CALLING aira_cleanup()" );
[10]493 aira_cleanup( $us3_db, $requestID, $gLink );
494 }
[6]495//write_log( "$loghdr RTN FR aira_cleanup()" );
496 }
[14]497
498 else if ( ! $me_devel )
499 { // If this is gridctl_pro and gfac (jureca), do GFAC cleanup
[6]500//write_log( "$loghdr CALLING gfac_cleanup()" );
501 gfac_cleanup( $us3_db, $requestID, $gLink );
502 }
[1]503}
504
505// Function to update status of job
506function update_job_status( $job_status, $gfacID )
507{
508 global $gLink;
[6]509 global $query;
510 global $self;
511 global $loghdr;
[1]512
513 switch ( $job_status )
514 {
515 case 'SUBMITTED' :
516 case 'SUBMITED' :
517 case 'INITIALIZED' :
518 $query = "UPDATE analysis SET status='SUBMITTED' WHERE gfacID='$gfacID'";
[14]519 $message = "Job status request reports job is " . $job_status;
520//write_log( "$loghdr update_job_status(SUBM) job_status=$job_status" );
[1]521 break;
522
[6]523 case 'STARTED' :
524 case 'RUNNING' :
[1]525 case 'ACTIVE' :
526 $query = "UPDATE analysis SET status='RUNNING' WHERE gfacID='$gfacID'";
527 $message = "Job status request reports job is RUNNING";
528 break;
529
[6]530 case 'FINISHED' :
531 $query = "UPDATE analysis SET status='FINISHED' WHERE gfacID='$gfacID'";
532 $message = "NONE";
533 break;
534
535 case 'DONE' :
536 $query = "UPDATE analysis SET status='DONE' WHERE gfacID='$gfacID'";
537 $message = "NONE";
538 break;
539
[1]540 case 'COMPLETED' :
[6]541 case 'COMPLETE' :
[1]542 $query = "UPDATE analysis SET status='COMPLETE' WHERE gfacID='$gfacID'";
[6]543 $message = "Job status request reports job is COMPLETED";
[1]544 break;
545
[6]546 case 'DATA' :
[1]547 $query = "UPDATE analysis SET status='DATA' WHERE gfacID='$gfacID'";
548 $message = "Job status request reports job is COMPLETE, waiting for data";
549 break;
550
551 case 'CANCELED' :
[6]552 case 'CANCELLED' :
[1]553 $query = "UPDATE analysis SET status='CANCELED' WHERE gfacID='$gfacID'";
554 $message = "Job status request reports job is CANCELED";
555 break;
556
557 case 'FAILED' :
558 $query = "UPDATE analysis SET status='FAILED' WHERE gfacID='$gfacID'";
559 $message = "Job status request reports job is FAILED";
560 break;
561
562 case 'UNKNOWN' :
[6]563write_log( "$loghdr job_status='UNKNOWN', reset to 'ERROR' " );
564 $query = "UPDATE analysis SET status='ERROR' WHERE gfacID='$gfacID'";
[1]565 $message = "Job status request reports job is not in the queue";
566 break;
567
[14]568 case 'UPDATING' :
569 case 'PENDING' :
570 $message = "Job status request reports job is " . $job_status;
571 break;
572
[1]573 default :
[3]574 // We shouldn't ever get here
[6]575 $query = "UPDATE analysis SET status='ERROR' WHERE gfacID='$gfacID'";
[1]576 $message = "Job status was not recognized - $job_status";
[6]577 write_log( "$loghdr update_job_status: " .
[3]578 "Job status was not recognized - $job_status\n" .
579 "gfacID = $gfacID\n" );
[1]580 break;
581
582 }
583
584 $result = mysql_query( $query, $gLink );
585 if ( ! $result )
[6]586 write_log( "$loghdr Query failed $query - " . mysql_error( $gLink ) );
[1]587
[6]588 if ( $message != 'NONE' )
589 {
590 update_queue_messages( $message );
591 update_db( $message );
592 }
[1]593}
594
595function get_us3_data()
596{
597 global $self;
598 global $gfacID;
599 global $dbhost;
600 global $user;
601 global $passwd;
602 global $us3_db;
603 global $updateTime;
[6]604 global $loghdr;
[1]605
606 $us3_link = mysql_connect( $dbhost, $user, $passwd );
607
608 if ( ! $us3_link )
609 {
[6]610 write_log( "$loghdr could not connect: $dbhost, $user, $passwd" );
[1]611 mail_to_admin( "fail", "Could not connect to $dbhost" );
612 return 0;
613 }
614
615
616 $result = mysql_select_db( $us3_db, $us3_link );
617
618 if ( ! $result )
619 {
[6]620 write_log( "$loghdr could not select DB $us3_db" );
[1]621 mail_to_admin( "fail", "Could not select DB $us3_db, $dbhost, $user, $passwd" );
622 return 0;
623 }
624
625 $query = "SELECT HPCAnalysisRequestID, UNIX_TIMESTAMP(updateTime) " .
626 "FROM HPCAnalysisResult WHERE gfacID='$gfacID'";
627 $result = mysql_query( $query, $us3_link );
628
629 if ( ! $result )
630 {
631 write_log( "$self: Query failed $query - " . mysql_error( $us3_link ) );
632 mail_to_admin( "fail", "Query failed $query\n" . mysql_error( $us3_link ) );
633 return 0;
634 }
635
636 list( $requestID, $updateTime ) = mysql_fetch_array( $result );
637 mysql_close( $us3_link );
638
639 return $requestID;
640}
641
[6]642// Function to determine if this is a gfac job or not
[1]643function is_gfac_job( $gfacID )
644{
645 $hex = "[0-9a-fA-F]";
646 if ( ! preg_match( "/^US3-Experiment/i", $gfacID ) &&
647 ! preg_match( "/^US3-$hex{8}-$hex{4}-$hex{4}-$hex{4}-$hex{12}$/", $gfacID ) )
648 {
649 // Then it's not a GFAC job
650 return false;
651 }
652
653 return true;
654}
655
[6]656// Function to determine if this is an airavata/thrift job or not
657function is_aira_job( $gfacID )
658{
659 global $cluster;
660
[10]661 if ( preg_match( "/US3-A/i", $gfacID ) &&
[14]662 ! preg_match( "/jur/i", $cluster ) )
[6]663 {
664 // Then it's an Airavata/Thrift job
665 return true;
666 }
667
668 return false;
669}
670
[1]671// Function to get the current job status from GFAC
672function get_gfac_status( $gfacID )
673{
674 global $serviceURL;
[6]675 global $self;
676 global $loghdr;
677 global $cluster;
[1]678
[6]679 if ( is_aira_job( $gfacID ) )
680 {
681 $status_ex = getExperimentStatus( $gfacID );
682 $gfac_status = standard_status( $status_ex );
[14]683write_log( "$loghdr get_gfac_status: status_ex=$status_ex gfac_status=$gfac_status" );
[6]684 return $gfac_status;
685 }
686
687 else if ( ! is_gfac_job( $gfacID ) )
[1]688 return false;
689
690 $url = "$serviceURL/jobstatus/$gfacID";
691 try
692 {
693 $post = new HttpRequest( $url, HttpRequest::METH_GET );
694 $http = $post->send();
695 $xml = $post->getResponseBody();
696 }
697 catch ( HttpException $e )
698 {
[6]699 write_log( "$loghdr Status not available - marking failed - $gfacID" );
[1]700 return 'GFAC_STATUS_UNAVAILABLE';
701 }
702
703 // Parse the result
704 $gfac_status = parse_response( $xml );
705
[3]706 // This may not seem like the best place to do this, but here we have
707 // the xml straight from GFAC
708 $status_types = array('SUBMITTED',
709 'SUBMITED',
710 'INITIALIZED',
711 'PENDING',
[6]712 'RUNNING',
[3]713 'ACTIVE',
[6]714 'STARTED',
[3]715 'COMPLETED',
[6]716 'FINISHED',
[3]717 'DONE',
718 'DATA',
[6]719 'RESULTS_GEN',
[3]720 'CANCELED',
721 'CANCELLED',
722 'FAILED',
[6]723 'STAGING',
[3]724 'UNKNOWN');
725 if ( ! in_array( $gfac_status, $status_types ) )
726 mail_to_admin( 'debug', "gfacID: /$gfacID/\n" .
727 "XML: /$xml/\n" .
728 "Status: /$gfac_status/\n" );
729
[6]730 if ( in_array( $gfac_status, array( 'DONE', 'DATA', 'RESULTS_GEN' ) ) )
731 $gfac_status = 'DATA';
732
[1]733 return $gfac_status;
734}
735
736// Function to request data outputs from GFAC
737function get_gfac_outputs( $gfacID )
738{
739 global $serviceURL;
[6]740 global $self;
[1]741
742 // Make sure it's a GFAC job and status is appropriate for this call
743 if ( ( $job_status = get_gfac_status( $gfacID ) ) === false )
744 {
745 // Then it's not a GFAC job
746 return false;
747 }
748
[6]749 if ( ! in_array( $job_status, array( 'DONE', 'FAILED', 'COMPLETE', 'FINISHED' ) ) )
[1]750 {
751 // Then it's not appropriate to request data
752 return false;
753 }
754
755 $url = "$serviceURL/registeroutput/$gfacID";
756 try
757 {
758 $post = new HttpRequest( $url, HttpRequest::METH_GET );
759 $http = $post->send();
760 $xml = $post->getResponseBody();
761 }
762 catch ( HttpException $e )
763 {
764 write_log( "$self: Data not available - request failed - $gfacID" );
765 return false;
766 }
767
768 mail_to_admin( "debug", "get_gfac_outputs/\n$xml/" ); // Temporary, to see what the xml looks like,
769 // if we ever get one
770
771 // Parse the result
772 $gfac_status = parse_response( $xml );
773
774 return $gfac_status;
775}
776
777function parse_response( $xml )
778{
779 global $gfac_message;
780
781 $status = "";
782 $gfac_message = "";
783
784 $parser = new XMLReader();
785 $parser->xml( $xml );
786
787 while( $parser->read() )
788 {
789 $type = $parser->nodeType;
790
791 if ( $type == XMLReader::ELEMENT )
792 $name = $parser->name;
793
794 else if ( $type == XMLReader::TEXT )
795 {
796 if ( $name == "status" )
797 $status = $parser->value;
798 else
799 $gfac_message = $parser->value;
800 }
801 }
802
803 $parser->close();
804 return $status;
805}
806
807// Function to get status from local cluster
808function get_local_status( $gfacID )
809{
810 global $cluster;
[6]811 global $self;
[1]812
813 $system = "$cluster.uthscsa.edu";
814 $system = preg_replace( "/\-local/", "", $system );
815 $cmd = "/usr/bin/ssh -x us3@$system qstat -a $gfacID 2>&1";
816
817 $result = exec( $cmd );
818
819 if ( $result == "" || preg_match( "/^qstat: Unknown/", $result ) )
820 {
821 write_log( "$self get_local_status: Local job $gfacID unknown" );
822 return 'UNKNOWN';
823 }
824
825 $values = preg_split( "/\s+/", $result );
826// write_log( "$self: get_local_status: job status = /{$values[9]}/");
827 switch ( $values[ 9 ] )
828 {
829 case "W" : // Waiting for execution time to be reached
830 case "E" : // Job is exiting after having run
831 case "R" : // Still running
832 $status = 'ACTIVE';
833 break;
834
835 case "C" : // Job has completed
836 $status = 'COMPLETED';
837 break;
838
839 case "T" : // Job is being moved
840 case "H" : // Held
841 case "Q" : // Queued
842 $status = 'SUBMITTED';
843 break;
844
845 default :
846 $status = 'UNKNOWN'; // This should not occur
847 break;
848 }
849
850 return $status;
851}
852
853function update_queue_messages( $message )
854{
855 global $self;
856 global $gLink;
857 global $gfacID;
858
859 // Get analysis table ID
860 $query = "SELECT id FROM analysis " .
861 "WHERE gfacID = '$gfacID' ";
862 $result = mysql_query( $query, $gLink );
863 if ( ! $result )
864 {
865 write_log( "$self: Query failed $query - " . mysql_error( $gLink ) );
866 return;
867 }
868 list( $analysisID ) = mysql_fetch_array( $result );
869
870 // Insert message into queue_message table
871 $query = "INSERT INTO queue_messages SET " .
[6]872 "message = '" . mysql_real_escape_string( $message, $gLink ) . "', " .
873 "analysisID = '$analysisID' ";
[1]874 $result = mysql_query( $query, $gLink );
875 if ( ! $result )
876 {
877 write_log( "$self: Query failed $query - " . mysql_error( $gLink ) );
878 return;
879 }
880}
881
882function update_db( $message )
883{
884 global $self;
885 global $gfacID;
886 global $dbhost;
887 global $user;
888 global $passwd;
889 global $us3_db;
890
891 $us3_link = mysql_connect( $dbhost, $user, $passwd );
892
893 if ( ! $us3_link )
894 {
895 write_log( "$self: could not connect: $dbhost, $user, $passwd" );
896 mail_to_admin( "fail", "Could not connect to $dbhost" );
897 return 0;
898 }
899
900
901 $result = mysql_select_db( $us3_db, $us3_link );
902
903 if ( ! $result )
904 {
905 write_log( "$self: could not select DB $us3_db" );
906 mail_to_admin( "fail", "Could not select DB $us3_db, $dbhost, $user, $passwd" );
907 return 0;
908 }
909
910 $query = "UPDATE HPCAnalysisResult SET " .
911 "lastMessage='" . mysql_real_escape_string( $message, $us3_link ) . "'" .
912 "WHERE gfacID = '$gfacID' ";
913
914 mysql_query( $query, $us3_link );
915 mysql_close( $us3_link );
916}
917
918function mail_to_admin( $type, $msg )
919{
920 global $updateTime;
921 global $status;
922 global $cluster;
923 global $org_name;
924 global $admin_email;
925 global $dbhost;
926 global $requestID;
927
928 $headers = "From: $org_name Admin<$admin_email>" . "\n";
929 $headers .= "Cc: $org_name Admin<$admin_email>" . "\n";
[6]930 $headers .= "Bcc: Gary Gorbet<gegorbet@gmail.com>" . "\n"; // make sure
[1]931
932 // Set the reply address
933 $headers .= "Reply-To: $org_name<$admin_email>" . "\n";
934 $headers .= "Return-Path: $org_name<$admin_email>" . "\n";
935
936 // Try to avoid spam filters
937 $now = time();
938 $headers .= "Message-ID: <" . $now . "gridctl@$dbhost>$requestID\n";
939 $headers .= "X-Mailer: PHP v" . phpversion() . "\n";
940 $headers .= "MIME-Version: 1.0" . "\n";
941 $headers .= "Content-Transfer-Encoding: 8bit" . "\n";
942
943 $subject = "US3 Error Notification";
944 $message = "
945 UltraScan job error notification from gridctl.php:
946
947 Update Time : $updateTime
948 GFAC Status : $status
949 Cluster : $cluster
950 ";
951
952 $message .= "Error Message : $msg\n";
953
954 mail( $admin_email, $subject, $message, $headers );
955}
[6]956
957// Convert a status string to one of the standard DB status strings
958function standard_status( $status_in )
959{
960 switch ( $status_in )
961 { // Map variations to standard gateway status values
962 case 'QUEUED' :
963 case 'LAUNCHED' :
964 case 'CREATED' :
965 case 'VALIDATED' :
966 case 'SCHEDULED' :
967 case 'submitted' :
968 case '' :
969 $status = 'SUBMITTED';
970 break;
971
972 case 'EXECUTING' :
973 case 'ACTIVE' :
974 case 'running' :
975 case 'executing' :
976 $status = 'RUNNING';
977 break;
978
979 case 'PENDING' :
980 case 'CANCELING' :
981 $status = 'UPDATING';
982 break;
983
984 case 'CANCELLED' :
985 case 'canceled' :
986 $status = 'CANCELED';
987 break;
988
989 case 'COMPLETED' :
990 case 'completed' :
991 $status = 'COMPLETE';
992 break;
993
994 case 'FAILED_DATA' :
995 case 'SUBMIT_TIMEOUT' :
996 case 'RUN_TIMEOUT' :
997 case 'DATA_TIMEOUT' :
998 $status = 'FAILED';
999 break;
1000
1001 case 'COMPLETE' :
1002 $status = 'DONE';
1003 break;
1004
1005 case 'UNKNOWN' :
1006 $status = 'ERROR';
1007 break;
1008
1009 // Where already standard value, retain value
1010 case 'ERROR' :
1011 case 'RUNNING' :
1012 case 'SUBMITTED' :
1013 case 'UPDATING' :
1014 case 'CANCELED' :
1015 case 'DATA' :
1016 case 'FAILED' :
1017 case 'DONE' :
1018 case 'FINISHED' :
1019 default :
1020 $status = $status_in;
1021 break;
1022 }
1023
1024 return $status;
1025}
1026
1027function aira_status( $gfacID, $status_in )
1028{
1029 global $self;
1030 global $loghdr;
[10]1031 global $class_dir;
[6]1032//echo "a_st: st_in$status_in : $gfacID\n";
1033 //$status_gw = standard_status( $status_in );
1034 $status_gw = $status_in;
1035//echo "a_st: st_db=$status_gw\n";
1036 $status = $status_gw;
[10]1037 $me_devel = preg_match( "/class_devel/", $class_dir );
1038 $job_devel = preg_match( "/US3-ADEV/i", $gfacID );
1039 $devmatch = ( ( !$me_devel && !$job_devel ) ||
1040 ( $me_devel && $job_devel ) );
[6]1041
[10]1042//write_log( "$loghdr gfacID=$gfacID devmatch=$devmatch" );
1043//write_log( "$loghdr me_d=$me_devel jo_d=$job_devel cd=$class_dir" );
1044 if ( preg_match( "/US3-A/i", $gfacID ) && $devmatch )
1045 { // Airavata job and development/production type is right
[6]1046 $status_ex = getExperimentStatus( $gfacID );
[10]1047//write_log( "$loghdr status_ex $status_ex" );
[6]1048
1049 if ( $status_ex == 'COMPLETED' )
1050 { // Experiment is COMPLETED: check for 'FINISHED' or 'DONE'
1051 if ( $status_gw == 'FINISHED' || $status_gw == 'DONE' )
1052 { // COMPLETED + FINISHED/DONE : gateway status is now COMPLETE
1053 $status = 'COMPLETE';
1054 }
1055
1056 else
1057 { // COMPLETED + NOT-FINISHED/DONE: gw status now DONE
1058 $status = 'DONE';
1059 }
1060 }
1061
1062 else if ( $status_gw == 'FINISHED' || $status_gw == 'DONE' )
1063 { // Gfac status == FINISHED/DONE: leave as is (unless FAILED)
1064 $status = $status_gw;
1065 if ( $status_ex == 'FAILED' )
1066 {
1067 sleep( 10 );
1068 $status_ex = getExperimentStatus( $gfacID );
1069 if ( $status_ex == 'FAILED' )
1070 {
1071 write_log( "$loghdr status still 'FAILED' after 10-second delay" );
1072 sleep( 10 );
1073 $status_ex = getExperimentStatus( $gfacID );
1074 if ( $status_ex == 'FAILED' )
1075 write_log( "$loghdr status still 'FAILED' after 20-second delay" );
1076 else
1077 write_log( "$loghdr status is $status_ex after 20-second delayed retry" );
1078 }
1079 write_log( "$loghdr status reset to 'COMPLETE'" );
1080 $status = 'COMPLETE';
1081 }
1082 }
1083
1084 else
1085 { // Experiment not COMPLETED/FINISHED/DONE: use experiment status
1086 $status = standard_status( $status_ex );
1087 }
1088
[14]1089//if(preg_match("/US3-ADEV/i",$gfacID))
1090if(preg_match("/US3-A/i",$gfacID))
[12]1091write_log( "$loghdr status/_in/_gw/_ex=$status/$status_in/$status_gw/$status_ex" );
[10]1092//write_log( "$loghdr status/_in/_gw/_ex=$status/$status_in/$status_gw/$status_ex" );
1093//write_log( " me_d=$me_devel jo_d=$job_devel dm=$devmatch cd=$class_dir" );
[6]1094 if ( $status != $status_gw )
1095 {
1096 update_job_status( $status, $gfacID );
1097 }
1098 }
1099
1100 return $status;
1101}
1102
[1]1103?>
Note: See TracBrowser for help on using the repository browser.