source: trunk/gridctl.php@ 17

Last change on this file since 17 was 17, checked in by gegorbet, 9 years ago

misc. fixes plus mods for new lonestar5 cluster

File size: 29.9 KB
Line 
1<?php
2
3// Global variables
4$gfac_message = "";
5$updateTime = 0;
6$submittime = 0;
7$cluster = '';
8
9//global $self;
10
11// Produce some output temporarily, so cron will send me message
12$now = time();
13echo "Time started: " . date( 'Y-m-d H:i:s', $now ) . "\n";
14
15// Get data from global GFAC DB
16$gLink = mysql_connect( $dbhost, $guser, $gpasswd );
17
18if ( ! mysql_select_db( $gDB, $gLink ) )
19{
20 write_log( "$self: Could not select DB $gDB - " . mysql_error() );
21 mail_to_admin( "fail", "Internal Error: Could not select DB $gDB" );
22 exit();
23}
24
25$query = "SELECT gfacID, us3_db, cluster, status, queue_msg, " .
26 "UNIX_TIMESTAMP(time), time from analysis";
27$result = mysql_query( $query, $gLink );
28
29if ( ! $result )
30{
31 write_log( "$self: Query failed $query - " . mysql_error( $gLink ) );
32 mail_to_admin( "fail", "Query failed $query\n" . mysql_error( $gLink ) );
33 exit();
34}
35
36if ( mysql_num_rows( $result ) == 0 )
37{
38//write_log( "$self: analysis read got numrows==0" );
39 exit(); // Nothing to do
40}
41
42$me_devel = preg_match( "/class_devel/", $class_dir );
43
44while ( list( $gfacID, $us3_db, $cluster, $status, $queue_msg, $time, $updateTime )
45 = mysql_fetch_array( $result ) )
46{
47 // If this entry does not match class/class_devel, skip processing
48
49 if ( preg_match( "/US3-A/i", $gfacID ) )
50 { // For thrift, job and gridctl must match
51 $job_devel = preg_match( "/US3-ADEV/i", $gfacID );
52 if ( ( $me_devel && !$job_devel ) ||
53 ( !$me_devel && $job_devel ) )
54 { // If job not from appropriate Airavata server, skip processing
55 continue;
56 }
57 }
58 else if ( $me_devel )
59 { // For non-thrift and development, skip
60 continue;
61 }
62
63 // Checking we need to do for each entry
64echo "us3db=$us3_db gfid=$gfacID\n";
65 switch ( $us3_db )
66 {
67 case 'Xuslims3_cauma3' :
68 case 'Xuslims3_cauma3d' :
69 case 'Xuslims3_HHU' :
70 case 'Xuslims3_Uni_KN' :
71 $serviceURL = "http://gridfarm005.ucs.indiana.edu:9090/ogce-rest/job";
72 break;
73
74 default :
75// $serviceURL = "http://gridfarm005.ucs.indiana.edu:8080/ogce-rest/job";
76 break;
77 }
78
79 $awork = array();
80 $awork = explode( "-", $gfacID );
81 $gfacLabl = $awork[0] . "-" . $awork[1] . "-" . $awork[2];
82 $loghdr = $self . ":" . $gfacLabl . "...:";
83 $status_in = $status;
84 $status_gw = $status;
85 $status_ex = $status;
86
87 // If entry is for Airvata/Thrift, get the true current status
88
89 if ( is_aira_job( $gfacID ) )
90 {
91 $status_in = $status;
92 $status = aira_status( $gfacID, $status_in );
93if($status != $status_in )
94write_log( "$loghdr Set to $status from $status_in" );
95 }
96 else
97 {
98 $status_gw = $status;
99 $status = get_gfac_status( $gfacID );
100 //if ( $status == 'FINISHED' )
101 if ( $status_gw == 'COMPLETE' )
102 $status = $status_gw;
103write_log( "$loghdr non-AThrift status=$status status_gw=$status_gw" );
104 }
105
106 // Sometimes during testing, the us3_db entry is not set
107 // If $status == 'ERROR' then the condition has been processed before
108 if ( strlen( $us3_db ) == 0 && $status != 'ERROR' )
109 {
110 write_log( "$loghdr GFAC DB is NULL - $gfacID" );
111 mail_to_admin( "fail", "GFAC DB is NULL\n$gfacID" );
112
113 $query2 = "UPDATE analysis SET status='ERROR' WHERE gfacID='$gfacID'";
114 $result2 = mysql_query( $query2, $gLink );
115 $status = 'ERROR';
116
117 if ( ! $result2 )
118 write_log( "$loghdr Query failed $query2 - " . mysql_error( $gLink ) );
119
120 }
121
122//echo " st=$status\n";
123 switch ( $status )
124 {
125 // Already been handled
126 // Later update this condition to search for gfacID?
127 case "ERROR":
128 cleanup();
129 break;
130
131 case "SUBMITTED":
132 submitted( $time );
133 break;
134
135 case "SUBMIT_TIMEOUT":
136 submit_timeout( $time );
137 break;
138
139 case "RUNNING":
140 case "STARTED":
141 case "STAGING":
142 case "ACTIVE":
143 running( $time );
144 break;
145
146 case "RUN_TIMEOUT":
147 run_timeout($time );
148 break;
149
150 case "DATA":
151 case "RESULTS_GEN":
152 wait_data( $time );
153 break;
154
155 case "DATA_TIMEOUT":
156 data_timeout( $time );
157 break;
158
159 case "COMPLETED":
160 case "COMPLETE":
161 complete();
162 break;
163
164 case "CANCELLED":
165 case "CANCELED":
166 case "FAILED":
167 failed();
168 break;
169
170 case "FINISHED":
171 case "DONE":
172 if ( is_aira_job( $gfacID ) )
173 {
174 $status_ex = getExperimentStatus( $gfacID );
175write_log( "$loghdr status=$status status_ex=$status_ex" );
176 if ( $status_ex === 'COMPLETED' )
177 complete();
178 }
179 case "PROCESSING":
180 default:
181 break;
182 }
183}
184
185exit();
186
187function submitted( $updatetime )
188{
189 global $self;
190 global $gLink;
191 global $gfacID;
192 global $loghdr;
193
194 $now = time();
195
196 if ( $updatetime + 600 > $now ) return; // < 10 minutes ago
197
198 if ( $updatetime + 86400 > $now ) // Within the first 24 hours
199 {
200 if ( ( $job_status = get_gfac_status( $gfacID ) ) === false )
201 $job_status = get_local_status( $gfacID );
202
203 if ( $job_status == 'GFAC_STATUS_UNAVAILABLE' )
204 return;
205
206 if ( ! in_array( $job_status, array( 'SUBMITTED', 'INITIALIZED', 'PENDING' ) ) )
207 {
208//write_log( "$loghdr submitted:job_status=$job_status" );
209 update_job_status( $job_status, $gfacID );
210 }
211
212 return;
213 }
214
215 $message = "Job listed submitted longer than 24 hours";
216 write_log( "$self: $message - id: $gfacID" );
217 mail_to_admin( "hang", "$message - id: $gfacID" );
218 $query = "UPDATE analysis SET status='SUBMIT_TIMEOUT' WHERE gfacID='$gfacID'";
219 $result = mysql_query( $query, $gLink );
220
221 if ( ! $result )
222 write_log( "$self: Query failed $query - " . mysql_error( $gLink ) );
223
224 update_queue_messages( $message );
225 update_db( $message );
226}
227
228function submit_timeout( $updatetime )
229{
230 global $self;
231 global $gLink;
232 global $gfacID;
233 global $loghdr;
234
235 if ( ( $job_status = get_gfac_status( $gfacID ) ) === false )
236 $job_status = get_local_status( $gfacID );
237
238 if ( $job_status == 'GFAC_STATUS_UNAVAILABLE' )
239 return;
240
241 if ( ! in_array( $job_status, array( 'SUBMITTED', 'INITIALIZED', 'PENDING' ) ) )
242 {
243//write_log( "$loghdr submit_timeout:job_status=$job_status" );
244 update_job_status( $job_status, $gfacID );
245 return;
246 }
247
248 $now = time();
249
250 if ( $updatetime + 86400 > $now ) return; // < 24 hours ago ( 48 total submitted )
251
252 $message = "Job listed submitted longer than 48 hours";
253 write_log( "$self: $message - id: $gfacID" );
254 mail_to_admin( "hang", "$message - id: $gfacID" );
255 $query = "UPDATE analysis SET status='FAILED' WHERE gfacID='$gfacID'";
256 $result = mysql_query( $query, $gLink );
257
258 if ( ! $result )
259 write_log( "$self: Query failed $query - " . mysql_error( $gLink ) );
260
261 update_queue_messages( $message );
262 update_db( $message );
263}
264
265function running( $updatetime )
266{
267 global $self;
268 global $gLink;
269 global $gfacID;
270 global $loghdr;
271
272 $now = time();
273
274 get_us3_data();
275
276 if ( $updatetime + 600 > $now ) return; // message received < 10 minutes ago
277
278 if ( $updatetime + 86400 > $now ) // Within the first 24 hours
279 {
280 if ( ( $job_status = get_gfac_status( $gfacID ) ) === false )
281 $job_status = get_local_status( $gfacID );
282
283 if ( $job_status == 'GFAC_STATUS_UNAVAILABLE' )
284 return;
285
286 if ( ! in_array( $job_status, array( 'ACTIVE', 'RUNNING', 'STARTED' ) ) )
287 {
288//write_log( "$loghdr running:job_status=$job_status" );
289 update_job_status( $job_status, $gfacID );
290 }
291
292 return;
293 }
294
295 $message = "Job listed running longer than 24 hours";
296 write_log( "$self: $message - id: $gfacID" );
297 mail_to_admin( "hang", "$message - id: $gfacID" );
298 $query = "UPDATE analysis SET status='RUN_TIMEOUT' WHERE gfacID='$gfacID'";
299 $result = mysql_query( $query, $gLink );
300
301 if ( ! $result )
302 write_log( "$self: Query failed $query - " . mysql_error( $gLink ) );
303
304 update_queue_messages( $message );
305 update_db( $message );
306}
307
308function run_timeout( $updatetime )
309{
310 global $self;
311 global $gLink;
312 global $gfacID;
313 global $loghdr;
314
315 if ( ( $job_status = get_gfac_status( $gfacID ) ) === false )
316 $job_status = get_local_status( $gfacID );
317
318 if ( $job_status == 'GFAC_STATUS_UNAVAILABLE' )
319 return;
320
321 if ( ! in_array( $job_status, array( 'ACTIVE', 'RUNNING', 'STARTED' ) ) )
322 {
323//write_log( "$loghdr run_timeout:job_status=$job_status" );
324 update_job_status( $job_status, $gfacID );
325 return;
326 }
327
328 $now = time();
329
330 get_us3_data();
331
332 if ( $updatetime + 172800 > $now ) return; // < 48 hours ago
333
334 $message = "Job listed running longer than 48 hours";
335 write_log( "$self: $message - id: $gfacID" );
336 mail_to_admin( "hang", "$message - id: $gfacID" );
337 $query = "UPDATE analysis SET status='FAILED' WHERE gfacID='$gfacID'";
338 $result = mysql_query( $query, $gLink );
339
340 if ( ! $result )
341 write_log( "$self: Query failed $query - " . mysql_error( $gLink ) );
342
343 update_queue_messages( $message );
344 update_db( $message );
345}
346
347function wait_data( $updatetime )
348{
349 global $self;
350 global $gLink;
351 global $gfacID;
352 global $loghdr;
353
354 $now = time();
355
356 if ( $updatetime + 3600 > $now ) // < Within the first hour
357 {
358 if ( ( $job_status = get_gfac_status( $gfacID ) ) === false )
359 $job_status = get_local_status( $gfacID );
360
361 if ( $job_status == 'GFAC_STATUS_UNAVAILABLE' )
362 return;
363
364 if ( $job_status != 'DATA' )
365 {
366//write_log( "$loghdr wait_data:job_status=$job_status" );
367 update_job_status( $job_status, $gfacID );
368 return;
369 }
370
371 // Request to resend data, but only request every 5 minutes
372 $minute = date( 'i' ) * 1; // Makes it an int
373 if ( $minute % 5 ) return;
374
375 $output_status = get_gfac_outputs( $gfacID );
376
377 if ( $output_status !== false )
378 mail_to_admin( "debug", "wait_data/$gfacID/$output_status" );
379
380 return;
381 }
382
383 $message = "Waiting for data longer than 1 hour";
384 write_log( "$self: $message - id: $gfacID" );
385 mail_to_admin( "hang", "$message - id: $gfacID" );
386 $query = "UPDATE analysis SET status='DATA_TIMEOUT' WHERE gfacID='$gfacID'";
387 $result = mysql_query( $query, $gLink );
388
389 if ( ! $result )
390 write_log( "$self: Query failed $query - " . mysql_error( $gLink ) );
391
392 update_queue_messages( $message );
393 update_db( $message );
394}
395
396function data_timeout( $updatetime )
397{
398 global $self;
399 global $gLink;
400 global $gfacID;
401 global $loghdr;
402
403 if ( ( $job_status = get_gfac_status( $gfacID ) ) === false )
404 $job_status = get_local_status( $gfacID );
405
406 if ( $job_status == 'GFAC_STATUS_UNAVAILABLE' )
407 return;
408
409 if ( $job_status != 'DATA' )
410 {
411//write_log( "$loghdr data_timeout:job_status=$job_status" );
412 update_job_status( $job_status, $gfacID );
413 return;
414 }
415
416 $now = time();
417
418 if ( $updatetime + 86400 > $now ) // < 24 hours ago
419 {
420 // Request to resend data, but only request every 15 minutes
421 $minute = date( 'i' ) * 1; // Makes it an int
422 if ( $minute % 15 ) return;
423
424 $output_status = get_gfac_outputs( $gfacID );
425
426 if ( $output_status !== false )
427 mail_to_admin( "debug", "data_timeout/$gfacID/$output_status" );
428
429 return;
430 }
431
432 $message = "Waiting for data longer than 24 hours";
433 write_log( "$self: $message - id: $gfacID" );
434 mail_to_admin( "hang", "$message - id: $gfacID" );
435 $query = "UPDATE analysis SET status='FAILED' WHERE gfacID='$gfacID'";
436 $result = mysql_query( $query, $gLink );
437
438 if ( ! $result )
439 write_log( "$self: Query failed $query - " . mysql_error( $gLink ) );
440
441 update_queue_messages( $message );
442 update_db( $message );
443}
444
445function complete()
446{
447 // Just cleanup
448 cleanup();
449}
450
451function failed()
452{
453 // Just cleanup
454 cleanup();
455}
456
457function cleanup()
458{
459 global $self;
460 global $gLink;
461 global $gfacID;
462 global $us3_db;
463 global $loghdr;
464 global $class_dir;
465
466 // Double check that the gfacID exists
467 $query = "SELECT count(*) FROM analysis WHERE gfacID='$gfacID'";
468 $result = mysql_query( $query, $gLink );
469
470 if ( ! $result )
471 {
472 write_log( "$self: Query failed $query - " . mysql_error( $gLink ) );
473 mail_to_admin( "fail", "Query failed $query\n" . mysql_error( $gLink ) );
474 return;
475 }
476
477 list( $count ) = mysql_fetch_array( $result );
478
479if ($count==0)
480write_log( "$loghdr count = $count gfacID = $gfacID" );
481 if ( $count == 0 ) return;
482
483 // Now check the us3 instance
484 $requestID = get_us3_data();
485//write_log( "$loghdr requestID = $requestID gfacID = $gfacID" );
486 if ( $requestID == 0 ) return;
487
488 $me_devel = preg_match( "/class_devel/", $class_dir );
489
490 if ( preg_match( "/US3-A/i", $gfacID ) )
491 {
492 $job_devel = preg_match( "/US3-ADEV/i", $gfacID );
493 if ( ( !$me_devel && !$job_devel ) ||
494 ( $me_devel && $job_devel ) )
495 { // If job from appropriate Airavata server, process it
496//write_log( "$loghdr CALLING aira_cleanup()" );
497 aira_cleanup( $us3_db, $requestID, $gLink );
498 }
499//write_log( "$loghdr RTN FR aira_cleanup()" );
500 }
501
502 else if ( ! $me_devel )
503 { // If this is gridctl_pro and gfac (jureca), do GFAC cleanup
504//write_log( "$loghdr CALLING gfac_cleanup()" );
505 gfac_cleanup( $us3_db, $requestID, $gLink );
506 }
507}
508
509// Function to update status of job
510function update_job_status( $job_status, $gfacID )
511{
512 global $gLink;
513 global $query;
514 global $self;
515 global $loghdr;
516
517 switch ( $job_status )
518 {
519 case 'SUBMITTED' :
520 case 'SUBMITED' :
521 case 'INITIALIZED' :
522 $query = "UPDATE analysis SET status='SUBMITTED' WHERE gfacID='$gfacID'";
523 $message = "Job status request reports job is " . $job_status;
524//write_log( "$loghdr update_job_status(SUBM) job_status=$job_status" );
525 break;
526
527 case 'STARTED' :
528 case 'RUNNING' :
529 case 'ACTIVE' :
530 $query = "UPDATE analysis SET status='RUNNING' WHERE gfacID='$gfacID'";
531 $message = "Job status request reports job is RUNNING";
532 break;
533
534 case 'FINISHED' :
535 $query = "UPDATE analysis SET status='FINISHED' WHERE gfacID='$gfacID'";
536 $message = "NONE";
537 break;
538
539 case 'DONE' :
540 $query = "UPDATE analysis SET status='DONE' WHERE gfacID='$gfacID'";
541 $message = "NONE";
542 break;
543
544 case 'COMPLETED' :
545 case 'COMPLETE' :
546 $query = "UPDATE analysis SET status='COMPLETE' WHERE gfacID='$gfacID'";
547 $message = "Job status request reports job is COMPLETED";
548 break;
549
550 case 'DATA' :
551 $query = "UPDATE analysis SET status='DATA' WHERE gfacID='$gfacID'";
552 $message = "Job status request reports job is COMPLETE, waiting for data";
553 break;
554
555 case 'CANCELED' :
556 case 'CANCELLED' :
557 $query = "UPDATE analysis SET status='CANCELED' WHERE gfacID='$gfacID'";
558 $message = "Job status request reports job is CANCELED";
559 break;
560
561 case 'FAILED' :
562 $query = "UPDATE analysis SET status='FAILED' WHERE gfacID='$gfacID'";
563 $message = "Job status request reports job is FAILED";
564 break;
565
566 case 'UNKNOWN' :
567write_log( "$loghdr job_status='UNKNOWN', reset to 'ERROR' " );
568 $query = "UPDATE analysis SET status='ERROR' WHERE gfacID='$gfacID'";
569 $message = "Job status request reports job is not in the queue";
570 break;
571
572 case 'UPDATING' :
573 case 'PENDING' :
574 $message = "Job status request reports job is " . $job_status;
575 break;
576
577 default :
578 // We shouldn't ever get here
579 $query = "UPDATE analysis SET status='ERROR' WHERE gfacID='$gfacID'";
580 $message = "Job status was not recognized - $job_status";
581 write_log( "$loghdr update_job_status: " .
582 "Job status was not recognized - $job_status\n" .
583 "gfacID = $gfacID\n" );
584 break;
585
586 }
587
588 $result = mysql_query( $query, $gLink );
589 if ( ! $result )
590 write_log( "$loghdr Query failed $query - " . mysql_error( $gLink ) );
591
592 if ( $message != 'NONE' )
593 {
594 update_queue_messages( $message );
595 update_db( $message );
596 }
597}
598
599function get_us3_data()
600{
601 global $self;
602 global $gfacID;
603 global $dbhost;
604 global $user;
605 global $passwd;
606 global $us3_db;
607 global $updateTime;
608 global $loghdr;
609
610 $us3_link = mysql_connect( $dbhost, $user, $passwd );
611
612 if ( ! $us3_link )
613 {
614 write_log( "$loghdr could not connect: $dbhost, $user, $passwd" );
615 mail_to_admin( "fail", "Could not connect to $dbhost" );
616 return 0;
617 }
618
619
620 $result = mysql_select_db( $us3_db, $us3_link );
621
622 if ( ! $result )
623 {
624 write_log( "$loghdr could not select DB $us3_db" );
625 mail_to_admin( "fail", "Could not select DB $us3_db, $dbhost, $user, $passwd" );
626 return 0;
627 }
628
629 $query = "SELECT HPCAnalysisRequestID, UNIX_TIMESTAMP(updateTime) " .
630 "FROM HPCAnalysisResult WHERE gfacID='$gfacID'";
631 $result = mysql_query( $query, $us3_link );
632
633 if ( ! $result )
634 {
635 write_log( "$self: Query failed $query - " . mysql_error( $us3_link ) );
636 mail_to_admin( "fail", "Query failed $query\n" . mysql_error( $us3_link ) );
637 return 0;
638 }
639
640 list( $requestID, $updateTime ) = mysql_fetch_array( $result );
641 mysql_close( $us3_link );
642
643 return $requestID;
644}
645
646// Function to determine if this is a gfac job or not
647function is_gfac_job( $gfacID )
648{
649 $hex = "[0-9a-fA-F]";
650 if ( ! preg_match( "/^US3-Experiment/i", $gfacID ) &&
651 ! preg_match( "/^US3-$hex{8}-$hex{4}-$hex{4}-$hex{4}-$hex{12}$/", $gfacID ) )
652 {
653 // Then it's not a GFAC job
654 return false;
655 }
656
657 return true;
658}
659
660// Function to determine if this is an airavata/thrift job or not
661function is_aira_job( $gfacID )
662{
663 global $cluster;
664
665 if ( preg_match( "/US3-A/i", $gfacID ) )
666 {
667 // Then it's an Airavata/Thrift job
668 return true;
669 }
670
671 return false;
672}
673
674// Function to get the current job status from GFAC
675function get_gfac_status( $gfacID )
676{
677 global $serviceURL;
678 global $self;
679 global $loghdr;
680 global $cluster;
681
682 if ( is_aira_job( $gfacID ) )
683 {
684 $status_ex = getExperimentStatus( $gfacID );
685
686 if ( $status_ex == 'EXECUTING' )
687 {
688 if ( $gfac_status == 'SUBMITTED' )
689 $status_ex = 'QUEUED';
690 if ( $gfac_status == 'RUNNING' )
691 $status_ex = 'ACTIVE';
692 }
693
694 $gfac_status = standard_status( $status_ex );
695write_log( "$loghdr get_gfac_status: status_ex=$status_ex gfac_status=$gfac_status" );
696 return $gfac_status;
697 }
698
699 else if ( ! is_gfac_job( $gfacID ) )
700 return false;
701
702 $url = "$serviceURL/jobstatus/$gfacID";
703 try
704 {
705 $post = new HttpRequest( $url, HttpRequest::METH_GET );
706 $http = $post->send();
707 $xml = $post->getResponseBody();
708 }
709 catch ( HttpException $e )
710 {
711 write_log( "$loghdr Status not available - marking failed - $gfacID" );
712 return 'GFAC_STATUS_UNAVAILABLE';
713 }
714
715 // Parse the result
716 $gfac_status = parse_response( $xml );
717
718 // This may not seem like the best place to do this, but here we have
719 // the xml straight from GFAC
720 $status_types = array('SUBMITTED',
721 'SUBMITED',
722 'INITIALIZED',
723 'PENDING',
724 'RUNNING',
725 'ACTIVE',
726 'STARTED',
727 'COMPLETED',
728 'FINISHED',
729 'DONE',
730 'DATA',
731 'RESULTS_GEN',
732 'CANCELED',
733 'CANCELLED',
734 'FAILED',
735 'STAGING',
736 'UNKNOWN');
737 if ( ! in_array( $gfac_status, $status_types ) )
738 mail_to_admin( 'debug', "gfacID: /$gfacID/\n" .
739 "XML: /$xml/\n" .
740 "Status: /$gfac_status/\n" );
741
742 if ( in_array( $gfac_status, array( 'DONE', 'DATA', 'RESULTS_GEN' ) ) )
743 $gfac_status = 'DATA';
744
745 return $gfac_status;
746}
747
748// Function to request data outputs from GFAC
749function get_gfac_outputs( $gfacID )
750{
751 global $serviceURL;
752 global $self;
753
754 // Make sure it's a GFAC job and status is appropriate for this call
755 if ( ( $job_status = get_gfac_status( $gfacID ) ) === false )
756 {
757 // Then it's not a GFAC job
758 return false;
759 }
760
761 if ( ! in_array( $job_status, array( 'DONE', 'FAILED', 'COMPLETE', 'FINISHED' ) ) )
762 {
763 // Then it's not appropriate to request data
764 return false;
765 }
766
767 $url = "$serviceURL/registeroutput/$gfacID";
768 try
769 {
770 $post = new HttpRequest( $url, HttpRequest::METH_GET );
771 $http = $post->send();
772 $xml = $post->getResponseBody();
773 }
774 catch ( HttpException $e )
775 {
776 write_log( "$self: Data not available - request failed - $gfacID" );
777 return false;
778 }
779
780 mail_to_admin( "debug", "get_gfac_outputs/\n$xml/" ); // Temporary, to see what the xml looks like,
781 // if we ever get one
782
783 // Parse the result
784 $gfac_status = parse_response( $xml );
785
786 return $gfac_status;
787}
788
789function parse_response( $xml )
790{
791 global $gfac_message;
792
793 $status = "";
794 $gfac_message = "";
795
796 $parser = new XMLReader();
797 $parser->xml( $xml );
798
799 while( $parser->read() )
800 {
801 $type = $parser->nodeType;
802
803 if ( $type == XMLReader::ELEMENT )
804 $name = $parser->name;
805
806 else if ( $type == XMLReader::TEXT )
807 {
808 if ( $name == "status" )
809 $status = $parser->value;
810 else
811 $gfac_message = $parser->value;
812 }
813 }
814
815 $parser->close();
816 return $status;
817}
818
819// Function to get status from local cluster
820function get_local_status( $gfacID )
821{
822 global $cluster;
823 global $self;
824
825 $system = "$cluster.uthscsa.edu";
826 $system = preg_replace( "/\-local/", "", $system );
827 $cmd = "/usr/bin/ssh -x us3@$system qstat -a $gfacID 2>&1";
828
829 $result = exec( $cmd );
830
831 if ( $result == "" || preg_match( "/^qstat: Unknown/", $result ) )
832 {
833 write_log( "$self get_local_status: Local job $gfacID unknown" );
834 return 'UNKNOWN';
835 }
836
837 $values = preg_split( "/\s+/", $result );
838// write_log( "$self: get_local_status: job status = /{$values[9]}/");
839 switch ( $values[ 9 ] )
840 {
841 case "W" : // Waiting for execution time to be reached
842 case "E" : // Job is exiting after having run
843 case "R" : // Still running
844 $status = 'ACTIVE';
845 break;
846
847 case "C" : // Job has completed
848 $status = 'COMPLETED';
849 break;
850
851 case "T" : // Job is being moved
852 case "H" : // Held
853 case "Q" : // Queued
854 $status = 'SUBMITTED';
855 break;
856
857 default :
858 $status = 'UNKNOWN'; // This should not occur
859 break;
860 }
861
862 return $status;
863}
864
865function update_queue_messages( $message )
866{
867 global $self;
868 global $gLink;
869 global $gfacID;
870
871 // Get analysis table ID
872 $query = "SELECT id FROM analysis " .
873 "WHERE gfacID = '$gfacID' ";
874 $result = mysql_query( $query, $gLink );
875 if ( ! $result )
876 {
877 write_log( "$self: Query failed $query - " . mysql_error( $gLink ) );
878 return;
879 }
880 list( $analysisID ) = mysql_fetch_array( $result );
881
882 // Insert message into queue_message table
883 $query = "INSERT INTO queue_messages SET " .
884 "message = '" . mysql_real_escape_string( $message, $gLink ) . "', " .
885 "analysisID = '$analysisID' ";
886 $result = mysql_query( $query, $gLink );
887 if ( ! $result )
888 {
889 write_log( "$self: Query failed $query - " . mysql_error( $gLink ) );
890 return;
891 }
892}
893
894function update_db( $message )
895{
896 global $self;
897 global $gfacID;
898 global $dbhost;
899 global $user;
900 global $passwd;
901 global $us3_db;
902
903 $us3_link = mysql_connect( $dbhost, $user, $passwd );
904
905 if ( ! $us3_link )
906 {
907 write_log( "$self: could not connect: $dbhost, $user, $passwd" );
908 mail_to_admin( "fail", "Could not connect to $dbhost" );
909 return 0;
910 }
911
912
913 $result = mysql_select_db( $us3_db, $us3_link );
914
915 if ( ! $result )
916 {
917 write_log( "$self: could not select DB $us3_db" );
918 mail_to_admin( "fail", "Could not select DB $us3_db, $dbhost, $user, $passwd" );
919 return 0;
920 }
921
922 $query = "UPDATE HPCAnalysisResult SET " .
923 "lastMessage='" . mysql_real_escape_string( $message, $us3_link ) . "'" .
924 "WHERE gfacID = '$gfacID' ";
925
926 mysql_query( $query, $us3_link );
927 mysql_close( $us3_link );
928}
929
930function mail_to_admin( $type, $msg )
931{
932 global $updateTime;
933 global $status;
934 global $cluster;
935 global $org_name;
936 global $admin_email;
937 global $dbhost;
938 global $requestID;
939
940 $headers = "From: $org_name Admin<$admin_email>" . "\n";
941 $headers .= "Cc: $org_name Admin<$admin_email>" . "\n";
942 $headers .= "Bcc: Gary Gorbet<gegorbet@gmail.com>" . "\n"; // make sure
943
944 // Set the reply address
945 $headers .= "Reply-To: $org_name<$admin_email>" . "\n";
946 $headers .= "Return-Path: $org_name<$admin_email>" . "\n";
947
948 // Try to avoid spam filters
949 $now = time();
950 $headers .= "Message-ID: <" . $now . "gridctl@$dbhost>$requestID\n";
951 $headers .= "X-Mailer: PHP v" . phpversion() . "\n";
952 $headers .= "MIME-Version: 1.0" . "\n";
953 $headers .= "Content-Transfer-Encoding: 8bit" . "\n";
954
955 $subject = "US3 Error Notification";
956 $message = "
957 UltraScan job error notification from gridctl.php:
958
959 Update Time : $updateTime
960 GFAC Status : $status
961 Cluster : $cluster
962 ";
963
964 $message .= "Error Message : $msg\n";
965
966 mail( $admin_email, $subject, $message, $headers );
967}
968
969// Convert a status string to one of the standard DB status strings
970function standard_status( $status_in )
971{
972 switch ( $status_in )
973 { // Map variations to standard gateway status values
974 case 'QUEUED' :
975 case 'LAUNCHED' :
976 case 'CREATED' :
977 case 'VALIDATED' :
978 case 'SCHEDULED' :
979 case 'submitted' :
980 case '' :
981 $status = 'SUBMITTED';
982 break;
983
984 case 'EXECUTING' :
985 case 'ACTIVE' :
986 case 'running' :
987 case 'executing' :
988 $status = 'RUNNING';
989 break;
990
991 case 'PENDING' :
992 case 'CANCELING' :
993 $status = 'UPDATING';
994 break;
995
996 case 'CANCELLED' :
997 case 'canceled' :
998 $status = 'CANCELED';
999 break;
1000
1001 case 'COMPLETED' :
1002 case 'completed' :
1003 $status = 'COMPLETE';
1004 break;
1005
1006 case 'FAILED_DATA' :
1007 case 'SUBMIT_TIMEOUT' :
1008 case 'RUN_TIMEOUT' :
1009 case 'DATA_TIMEOUT' :
1010 $status = 'FAILED';
1011 break;
1012
1013 case 'COMPLETE' :
1014 $status = 'DONE';
1015 break;
1016
1017 case 'UNKNOWN' :
1018 $status = 'ERROR';
1019 break;
1020
1021 // Where already standard value, retain value
1022 case 'ERROR' :
1023 case 'RUNNING' :
1024 case 'SUBMITTED' :
1025 case 'UPDATING' :
1026 case 'CANCELED' :
1027 case 'DATA' :
1028 case 'FAILED' :
1029 case 'DONE' :
1030 case 'FINISHED' :
1031 default :
1032 $status = $status_in;
1033 break;
1034 }
1035
1036 return $status;
1037}
1038
1039function aira_status( $gfacID, $status_in )
1040{
1041 global $self;
1042 global $loghdr;
1043 global $class_dir;
1044//echo "a_st: st_in$status_in : $gfacID\n";
1045 //$status_gw = standard_status( $status_in );
1046 $status_gw = $status_in;
1047//echo "a_st: st_db=$status_gw\n";
1048 $status = $status_gw;
1049 $me_devel = preg_match( "/class_devel/", $class_dir );
1050 $job_devel = preg_match( "/US3-ADEV/i", $gfacID );
1051 $devmatch = ( ( !$me_devel && !$job_devel ) ||
1052 ( $me_devel && $job_devel ) );
1053
1054//write_log( "$loghdr gfacID=$gfacID devmatch=$devmatch" );
1055//write_log( "$loghdr me_d=$me_devel jo_d=$job_devel cd=$class_dir" );
1056 if ( preg_match( "/US3-A/i", $gfacID ) && $devmatch )
1057 { // Airavata job and development/production type is right
1058 $status_ex = getExperimentStatus( $gfacID );
1059//write_log( "$loghdr status_ex $status_ex" );
1060
1061 if ( $status_ex == 'COMPLETED' )
1062 { // Experiment is COMPLETED: check for 'FINISHED' or 'DONE'
1063 if ( $status_gw == 'FINISHED' || $status_gw == 'DONE' )
1064 { // COMPLETED + FINISHED/DONE : gateway status is now COMPLETE
1065 $status = 'COMPLETE';
1066 }
1067
1068 else
1069 { // COMPLETED + NOT-FINISHED/DONE: gw status now DONE
1070 $status = 'DONE';
1071 }
1072 }
1073
1074 else if ( $status_gw == 'FINISHED' || $status_gw == 'DONE' )
1075 { // Gfac status == FINISHED/DONE: leave as is (unless FAILED)
1076 $status = $status_gw;
1077 if ( $status_ex == 'FAILED' )
1078 {
1079 sleep( 10 );
1080 $status_ex = getExperimentStatus( $gfacID );
1081 if ( $status_ex == 'FAILED' )
1082 {
1083 write_log( "$loghdr status still 'FAILED' after 10-second delay" );
1084 sleep( 10 );
1085 $status_ex = getExperimentStatus( $gfacID );
1086 if ( $status_ex == 'FAILED' )
1087 write_log( "$loghdr status still 'FAILED' after 20-second delay" );
1088 else
1089 write_log( "$loghdr status is $status_ex after 20-second delayed retry" );
1090 }
1091 write_log( "$loghdr status reset to 'COMPLETE'" );
1092 $status = 'COMPLETE';
1093 }
1094 }
1095
1096 else
1097 { // Experiment not COMPLETED/FINISHED/DONE: use experiment status
1098 $status = standard_status( $status_ex );
1099 }
1100
1101if(preg_match("/US3-A/i",$gfacID))
1102//if(preg_match("/US3-ADEV/i",$gfacID))
1103write_log( "$loghdr status/_in/_gw/_ex=$status/$status_in/$status_gw/$status_ex" );
1104//write_log( "$loghdr status/_in/_gw/_ex=$status/$status_in/$status_gw/$status_ex" );
1105//write_log( " me_d=$me_devel jo_d=$job_devel dm=$devmatch cd=$class_dir" );
1106 if ( $status != $status_gw )
1107 {
1108 update_job_status( $status, $gfacID );
1109 }
1110 }
1111
1112 return $status;
1113}
1114
1115?>
Note: See TracBrowser for help on using the repository browser.