source: trunk/gridctl.php@ 16

Last change on this file since 16 was 15, checked in by gegorbet, 9 years ago

fixes, mainly for Jureca

File size: 29.5 KB
Line 
1<?php
2
3// Global variables
4$gfac_message = "";
5$updateTime = 0;
6$submittime = 0;
7$cluster = '';
8
9//global $self;
10
11// Produce some output temporarily, so cron will send me message
12$now = time();
13echo "Time started: " . date( 'Y-m-d H:i:s', $now ) . "\n";
14
15// Get data from global GFAC DB
16$gLink = mysql_connect( $dbhost, $guser, $gpasswd );
17
18if ( ! mysql_select_db( $gDB, $gLink ) )
19{
20 write_log( "$self: Could not select DB $gDB - " . mysql_error() );
21 mail_to_admin( "fail", "Internal Error: Could not select DB $gDB" );
22 exit();
23}
24
25$query = "SELECT gfacID, us3_db, cluster, status, queue_msg, " .
26 "UNIX_TIMESTAMP(time), time from analysis";
27$result = mysql_query( $query, $gLink );
28
29if ( ! $result )
30{
31 write_log( "$self: Query failed $query - " . mysql_error( $gLink ) );
32 mail_to_admin( "fail", "Query failed $query\n" . mysql_error( $gLink ) );
33 exit();
34}
35
36if ( mysql_num_rows( $result ) == 0 )
37{
38//write_log( "$self: analysis read got numrows==0" );
39 exit(); // Nothing to do
40}
41
42$me_devel = preg_match( "/class_devel/", $class_dir );
43
44while ( list( $gfacID, $us3_db, $cluster, $status, $queue_msg, $time, $updateTime )
45 = mysql_fetch_array( $result ) )
46{
47 // If this entry does not match class/class_devel, skip processing
48
49 if ( preg_match( "/US3-A/i", $gfacID ) )
50 { // For thrift, job and gridctl must match
51 $job_devel = preg_match( "/US3-ADEV/i", $gfacID );
52 if ( ( $me_devel && !$job_devel ) ||
53 ( !$me_devel && $job_devel ) )
54 { // If job not from appropriate Airavata server, skip processing
55 continue;
56 }
57 }
58 else if ( $me_devel )
59 { // For non-thrift and development, skip
60 continue;
61 }
62
63 // Checking we need to do for each entry
64echo "us3db=$us3_db gfid=$gfacID\n";
65 switch ( $us3_db )
66 {
67 case 'Xuslims3_cauma3' :
68 case 'Xuslims3_cauma3d' :
69 case 'Xuslims3_HHU' :
70 case 'Xuslims3_Uni_KN' :
71 $serviceURL = "http://gridfarm005.ucs.indiana.edu:9090/ogce-rest/job";
72 break;
73
74 default :
75// $serviceURL = "http://gridfarm005.ucs.indiana.edu:8080/ogce-rest/job";
76 break;
77 }
78
79 $awork = array();
80 $awork = explode( "-", $gfacID );
81 $gfacLabl = $awork[0] . "-" . $awork[1] . "-" . $awork[2];
82 $loghdr = $self . ":" . $gfacLabl . "...:";
83 $status_in = $status;
84 $status_gw = $status;
85
86 // If entry is for Airvata/Thrift, get the true current status
87
88 if ( is_aira_job( $gfacID ) )
89 {
90 $status_in = $status;
91 $status = aira_status( $gfacID, $status_in );
92if($status != $status_in )
93write_log( "$loghdr Set to $status from $status_in" );
94 }
95 else
96 {
97 $status_gw = $status;
98 $status = get_gfac_status( $gfacID );
99 //if ( $status == 'FINISHED' )
100 if ( $status_gw == 'COMPLETE' )
101 $status = $status_gw;
102write_log( "$loghdr non-AThrift status=$status status_gw=$status_gw" );
103 }
104
105 // Sometimes during testing, the us3_db entry is not set
106 // If $status == 'ERROR' then the condition has been processed before
107 if ( strlen( $us3_db ) == 0 && $status != 'ERROR' )
108 {
109 write_log( "$loghdr GFAC DB is NULL - $gfacID" );
110 mail_to_admin( "fail", "GFAC DB is NULL\n$gfacID" );
111
112 $query2 = "UPDATE analysis SET status='ERROR' WHERE gfacID='$gfacID'";
113 $result2 = mysql_query( $query2, $gLink );
114 $status = 'ERROR';
115
116 if ( ! $result2 )
117 write_log( "$loghdr Query failed $query2 - " . mysql_error( $gLink ) );
118
119 }
120
121//echo " st=$status\n";
122 switch ( $status )
123 {
124 // Already been handled
125 // Later update this condition to search for gfacID?
126 case "ERROR":
127 cleanup();
128 break;
129
130 case "SUBMITTED":
131 submitted( $time );
132 break;
133
134 case "SUBMIT_TIMEOUT":
135 submit_timeout( $time );
136 break;
137
138 case "RUNNING":
139 case "STARTED":
140 case "STAGING":
141 case "ACTIVE":
142 running( $time );
143 break;
144
145 case "RUN_TIMEOUT":
146 run_timeout($time );
147 break;
148
149 case "DATA":
150 case "RESULTS_GEN":
151 wait_data( $time );
152 break;
153
154 case "DATA_TIMEOUT":
155 data_timeout( $time );
156 break;
157
158 case "COMPLETED":
159 case "COMPLETE":
160 complete();
161 break;
162
163 case "CANCELLED":
164 case "CANCELED":
165 case "FAILED":
166 failed();
167 break;
168
169 case "FINISHED":
170 case "DONE":
171// if ( is_aira_job( $gfacID ) )
172// {
173// complete();
174// }
175 case "PROCESSING":
176 default:
177 break;
178 }
179}
180
181exit();
182
183function submitted( $updatetime )
184{
185 global $self;
186 global $gLink;
187 global $gfacID;
188 global $loghdr;
189
190 $now = time();
191
192 if ( $updatetime + 600 > $now ) return; // < 10 minutes ago
193
194 if ( $updatetime + 86400 > $now ) // Within the first 24 hours
195 {
196 if ( ( $job_status = get_gfac_status( $gfacID ) ) === false )
197 $job_status = get_local_status( $gfacID );
198
199 if ( $job_status == 'GFAC_STATUS_UNAVAILABLE' )
200 return;
201
202 if ( ! in_array( $job_status, array( 'SUBMITTED', 'INITIALIZED', 'PENDING' ) ) )
203 {
204//write_log( "$loghdr submitted:job_status=$job_status" );
205 update_job_status( $job_status, $gfacID );
206 }
207
208 return;
209 }
210
211 $message = "Job listed submitted longer than 24 hours";
212 write_log( "$self: $message - id: $gfacID" );
213 mail_to_admin( "hang", "$message - id: $gfacID" );
214 $query = "UPDATE analysis SET status='SUBMIT_TIMEOUT' WHERE gfacID='$gfacID'";
215 $result = mysql_query( $query, $gLink );
216
217 if ( ! $result )
218 write_log( "$self: Query failed $query - " . mysql_error( $gLink ) );
219
220 update_queue_messages( $message );
221 update_db( $message );
222}
223
224function submit_timeout( $updatetime )
225{
226 global $self;
227 global $gLink;
228 global $gfacID;
229 global $loghdr;
230
231 if ( ( $job_status = get_gfac_status( $gfacID ) ) === false )
232 $job_status = get_local_status( $gfacID );
233
234 if ( $job_status == 'GFAC_STATUS_UNAVAILABLE' )
235 return;
236
237 if ( ! in_array( $job_status, array( 'SUBMITTED', 'INITIALIZED', 'PENDING' ) ) )
238 {
239//write_log( "$loghdr submit_timeout:job_status=$job_status" );
240 update_job_status( $job_status, $gfacID );
241 return;
242 }
243
244 $now = time();
245
246 if ( $updatetime + 86400 > $now ) return; // < 24 hours ago ( 48 total submitted )
247
248 $message = "Job listed submitted longer than 48 hours";
249 write_log( "$self: $message - id: $gfacID" );
250 mail_to_admin( "hang", "$message - id: $gfacID" );
251 $query = "UPDATE analysis SET status='FAILED' WHERE gfacID='$gfacID'";
252 $result = mysql_query( $query, $gLink );
253
254 if ( ! $result )
255 write_log( "$self: Query failed $query - " . mysql_error( $gLink ) );
256
257 update_queue_messages( $message );
258 update_db( $message );
259}
260
261function running( $updatetime )
262{
263 global $self;
264 global $gLink;
265 global $gfacID;
266 global $loghdr;
267
268 $now = time();
269
270 get_us3_data();
271
272 if ( $updatetime + 600 > $now ) return; // message received < 10 minutes ago
273
274 if ( $updatetime + 86400 > $now ) // Within the first 24 hours
275 {
276 if ( ( $job_status = get_gfac_status( $gfacID ) ) === false )
277 $job_status = get_local_status( $gfacID );
278
279 if ( $job_status == 'GFAC_STATUS_UNAVAILABLE' )
280 return;
281
282 if ( ! in_array( $job_status, array( 'ACTIVE', 'RUNNING', 'STARTED' ) ) )
283 {
284//write_log( "$loghdr running:job_status=$job_status" );
285 update_job_status( $job_status, $gfacID );
286 }
287
288 return;
289 }
290
291 $message = "Job listed running longer than 24 hours";
292 write_log( "$self: $message - id: $gfacID" );
293 mail_to_admin( "hang", "$message - id: $gfacID" );
294 $query = "UPDATE analysis SET status='RUN_TIMEOUT' WHERE gfacID='$gfacID'";
295 $result = mysql_query( $query, $gLink );
296
297 if ( ! $result )
298 write_log( "$self: Query failed $query - " . mysql_error( $gLink ) );
299
300 update_queue_messages( $message );
301 update_db( $message );
302}
303
304function run_timeout( $updatetime )
305{
306 global $self;
307 global $gLink;
308 global $gfacID;
309 global $loghdr;
310
311 if ( ( $job_status = get_gfac_status( $gfacID ) ) === false )
312 $job_status = get_local_status( $gfacID );
313
314 if ( $job_status == 'GFAC_STATUS_UNAVAILABLE' )
315 return;
316
317 if ( ! in_array( $job_status, array( 'ACTIVE', 'RUNNING', 'STARTED' ) ) )
318 {
319//write_log( "$loghdr run_timeout:job_status=$job_status" );
320 update_job_status( $job_status, $gfacID );
321 return;
322 }
323
324 $now = time();
325
326 get_us3_data();
327
328 if ( $updatetime + 172800 > $now ) return; // < 48 hours ago
329
330 $message = "Job listed running longer than 48 hours";
331 write_log( "$self: $message - id: $gfacID" );
332 mail_to_admin( "hang", "$message - id: $gfacID" );
333 $query = "UPDATE analysis SET status='FAILED' WHERE gfacID='$gfacID'";
334 $result = mysql_query( $query, $gLink );
335
336 if ( ! $result )
337 write_log( "$self: Query failed $query - " . mysql_error( $gLink ) );
338
339 update_queue_messages( $message );
340 update_db( $message );
341}
342
343function wait_data( $updatetime )
344{
345 global $self;
346 global $gLink;
347 global $gfacID;
348 global $loghdr;
349
350 $now = time();
351
352 if ( $updatetime + 3600 > $now ) // < Within the first hour
353 {
354 if ( ( $job_status = get_gfac_status( $gfacID ) ) === false )
355 $job_status = get_local_status( $gfacID );
356
357 if ( $job_status == 'GFAC_STATUS_UNAVAILABLE' )
358 return;
359
360 if ( $job_status != 'DATA' )
361 {
362//write_log( "$loghdr wait_data:job_status=$job_status" );
363 update_job_status( $job_status, $gfacID );
364 return;
365 }
366
367 // Request to resend data, but only request every 5 minutes
368 $minute = date( 'i' ) * 1; // Makes it an int
369 if ( $minute % 5 ) return;
370
371 $output_status = get_gfac_outputs( $gfacID );
372
373 if ( $output_status !== false )
374 mail_to_admin( "debug", "wait_data/$gfacID/$output_status" );
375
376 return;
377 }
378
379 $message = "Waiting for data longer than 1 hour";
380 write_log( "$self: $message - id: $gfacID" );
381 mail_to_admin( "hang", "$message - id: $gfacID" );
382 $query = "UPDATE analysis SET status='DATA_TIMEOUT' WHERE gfacID='$gfacID'";
383 $result = mysql_query( $query, $gLink );
384
385 if ( ! $result )
386 write_log( "$self: Query failed $query - " . mysql_error( $gLink ) );
387
388 update_queue_messages( $message );
389 update_db( $message );
390}
391
392function data_timeout( $updatetime )
393{
394 global $self;
395 global $gLink;
396 global $gfacID;
397 global $loghdr;
398
399 if ( ( $job_status = get_gfac_status( $gfacID ) ) === false )
400 $job_status = get_local_status( $gfacID );
401
402 if ( $job_status == 'GFAC_STATUS_UNAVAILABLE' )
403 return;
404
405 if ( $job_status != 'DATA' )
406 {
407//write_log( "$loghdr data_timeout:job_status=$job_status" );
408 update_job_status( $job_status, $gfacID );
409 return;
410 }
411
412 $now = time();
413
414 if ( $updatetime + 86400 > $now ) // < 24 hours ago
415 {
416 // Request to resend data, but only request every 15 minutes
417 $minute = date( 'i' ) * 1; // Makes it an int
418 if ( $minute % 15 ) return;
419
420 $output_status = get_gfac_outputs( $gfacID );
421
422 if ( $output_status !== false )
423 mail_to_admin( "debug", "data_timeout/$gfacID/$output_status" );
424
425 return;
426 }
427
428 $message = "Waiting for data longer than 24 hours";
429 write_log( "$self: $message - id: $gfacID" );
430 mail_to_admin( "hang", "$message - id: $gfacID" );
431 $query = "UPDATE analysis SET status='FAILED' WHERE gfacID='$gfacID'";
432 $result = mysql_query( $query, $gLink );
433
434 if ( ! $result )
435 write_log( "$self: Query failed $query - " . mysql_error( $gLink ) );
436
437 update_queue_messages( $message );
438 update_db( $message );
439}
440
441function complete()
442{
443 // Just cleanup
444 cleanup();
445}
446
447function failed()
448{
449 // Just cleanup
450 cleanup();
451}
452
453function cleanup()
454{
455 global $self;
456 global $gLink;
457 global $gfacID;
458 global $us3_db;
459 global $loghdr;
460 global $class_dir;
461
462 // Double check that the gfacID exists
463 $query = "SELECT count(*) FROM analysis WHERE gfacID='$gfacID'";
464 $result = mysql_query( $query, $gLink );
465
466 if ( ! $result )
467 {
468 write_log( "$self: Query failed $query - " . mysql_error( $gLink ) );
469 mail_to_admin( "fail", "Query failed $query\n" . mysql_error( $gLink ) );
470 return;
471 }
472
473 list( $count ) = mysql_fetch_array( $result );
474
475if ($count==0)
476write_log( "$loghdr count = $count gfacID = $gfacID" );
477 if ( $count == 0 ) return;
478
479 // Now check the us3 instance
480 $requestID = get_us3_data();
481//write_log( "$loghdr requestID = $requestID gfacID = $gfacID" );
482 if ( $requestID == 0 ) return;
483
484 $me_devel = preg_match( "/class_devel/", $class_dir );
485
486 if ( preg_match( "/US3-A/i", $gfacID ) )
487 {
488 $job_devel = preg_match( "/US3-ADEV/i", $gfacID );
489 if ( ( !$me_devel && !$job_devel ) ||
490 ( $me_devel && $job_devel ) )
491 { // If job from appropriate Airavata server, process it
492//write_log( "$loghdr CALLING aira_cleanup()" );
493 aira_cleanup( $us3_db, $requestID, $gLink );
494 }
495//write_log( "$loghdr RTN FR aira_cleanup()" );
496 }
497
498 else if ( ! $me_devel )
499 { // If this is gridctl_pro and gfac (jureca), do GFAC cleanup
500//write_log( "$loghdr CALLING gfac_cleanup()" );
501 gfac_cleanup( $us3_db, $requestID, $gLink );
502 }
503}
504
505// Function to update status of job
506function update_job_status( $job_status, $gfacID )
507{
508 global $gLink;
509 global $query;
510 global $self;
511 global $loghdr;
512
513 switch ( $job_status )
514 {
515 case 'SUBMITTED' :
516 case 'SUBMITED' :
517 case 'INITIALIZED' :
518 $query = "UPDATE analysis SET status='SUBMITTED' WHERE gfacID='$gfacID'";
519 $message = "Job status request reports job is " . $job_status;
520//write_log( "$loghdr update_job_status(SUBM) job_status=$job_status" );
521 break;
522
523 case 'STARTED' :
524 case 'RUNNING' :
525 case 'ACTIVE' :
526 $query = "UPDATE analysis SET status='RUNNING' WHERE gfacID='$gfacID'";
527 $message = "Job status request reports job is RUNNING";
528 break;
529
530 case 'FINISHED' :
531 $query = "UPDATE analysis SET status='FINISHED' WHERE gfacID='$gfacID'";
532 $message = "NONE";
533 break;
534
535 case 'DONE' :
536 $query = "UPDATE analysis SET status='DONE' WHERE gfacID='$gfacID'";
537 $message = "NONE";
538 break;
539
540 case 'COMPLETED' :
541 case 'COMPLETE' :
542 $query = "UPDATE analysis SET status='COMPLETE' WHERE gfacID='$gfacID'";
543 $message = "Job status request reports job is COMPLETED";
544 break;
545
546 case 'DATA' :
547 $query = "UPDATE analysis SET status='DATA' WHERE gfacID='$gfacID'";
548 $message = "Job status request reports job is COMPLETE, waiting for data";
549 break;
550
551 case 'CANCELED' :
552 case 'CANCELLED' :
553 $query = "UPDATE analysis SET status='CANCELED' WHERE gfacID='$gfacID'";
554 $message = "Job status request reports job is CANCELED";
555 break;
556
557 case 'FAILED' :
558 $query = "UPDATE analysis SET status='FAILED' WHERE gfacID='$gfacID'";
559 $message = "Job status request reports job is FAILED";
560 break;
561
562 case 'UNKNOWN' :
563write_log( "$loghdr job_status='UNKNOWN', reset to 'ERROR' " );
564 $query = "UPDATE analysis SET status='ERROR' WHERE gfacID='$gfacID'";
565 $message = "Job status request reports job is not in the queue";
566 break;
567
568 case 'UPDATING' :
569 case 'PENDING' :
570 $message = "Job status request reports job is " . $job_status;
571 break;
572
573 default :
574 // We shouldn't ever get here
575 $query = "UPDATE analysis SET status='ERROR' WHERE gfacID='$gfacID'";
576 $message = "Job status was not recognized - $job_status";
577 write_log( "$loghdr update_job_status: " .
578 "Job status was not recognized - $job_status\n" .
579 "gfacID = $gfacID\n" );
580 break;
581
582 }
583
584 $result = mysql_query( $query, $gLink );
585 if ( ! $result )
586 write_log( "$loghdr Query failed $query - " . mysql_error( $gLink ) );
587
588 if ( $message != 'NONE' )
589 {
590 update_queue_messages( $message );
591 update_db( $message );
592 }
593}
594
595function get_us3_data()
596{
597 global $self;
598 global $gfacID;
599 global $dbhost;
600 global $user;
601 global $passwd;
602 global $us3_db;
603 global $updateTime;
604 global $loghdr;
605
606 $us3_link = mysql_connect( $dbhost, $user, $passwd );
607
608 if ( ! $us3_link )
609 {
610 write_log( "$loghdr could not connect: $dbhost, $user, $passwd" );
611 mail_to_admin( "fail", "Could not connect to $dbhost" );
612 return 0;
613 }
614
615
616 $result = mysql_select_db( $us3_db, $us3_link );
617
618 if ( ! $result )
619 {
620 write_log( "$loghdr could not select DB $us3_db" );
621 mail_to_admin( "fail", "Could not select DB $us3_db, $dbhost, $user, $passwd" );
622 return 0;
623 }
624
625 $query = "SELECT HPCAnalysisRequestID, UNIX_TIMESTAMP(updateTime) " .
626 "FROM HPCAnalysisResult WHERE gfacID='$gfacID'";
627 $result = mysql_query( $query, $us3_link );
628
629 if ( ! $result )
630 {
631 write_log( "$self: Query failed $query - " . mysql_error( $us3_link ) );
632 mail_to_admin( "fail", "Query failed $query\n" . mysql_error( $us3_link ) );
633 return 0;
634 }
635
636 list( $requestID, $updateTime ) = mysql_fetch_array( $result );
637 mysql_close( $us3_link );
638
639 return $requestID;
640}
641
642// Function to determine if this is a gfac job or not
643function is_gfac_job( $gfacID )
644{
645 $hex = "[0-9a-fA-F]";
646 if ( ! preg_match( "/^US3-Experiment/i", $gfacID ) &&
647 ! preg_match( "/^US3-$hex{8}-$hex{4}-$hex{4}-$hex{4}-$hex{12}$/", $gfacID ) )
648 {
649 // Then it's not a GFAC job
650 return false;
651 }
652
653 return true;
654}
655
656// Function to determine if this is an airavata/thrift job or not
657function is_aira_job( $gfacID )
658{
659 global $cluster;
660
661 if ( preg_match( "/US3-A/i", $gfacID ) )
662 {
663 // Then it's an Airavata/Thrift job
664 return true;
665 }
666
667 return false;
668}
669
670// Function to get the current job status from GFAC
671function get_gfac_status( $gfacID )
672{
673 global $serviceURL;
674 global $self;
675 global $loghdr;
676 global $cluster;
677
678 if ( is_aira_job( $gfacID ) )
679 {
680 $status_ex = getExperimentStatus( $gfacID );
681 $gfac_status = standard_status( $status_ex );
682write_log( "$loghdr get_gfac_status: status_ex=$status_ex gfac_status=$gfac_status" );
683 return $gfac_status;
684 }
685
686 else if ( ! is_gfac_job( $gfacID ) )
687 return false;
688
689 $url = "$serviceURL/jobstatus/$gfacID";
690 try
691 {
692 $post = new HttpRequest( $url, HttpRequest::METH_GET );
693 $http = $post->send();
694 $xml = $post->getResponseBody();
695 }
696 catch ( HttpException $e )
697 {
698 write_log( "$loghdr Status not available - marking failed - $gfacID" );
699 return 'GFAC_STATUS_UNAVAILABLE';
700 }
701
702 // Parse the result
703 $gfac_status = parse_response( $xml );
704
705 // This may not seem like the best place to do this, but here we have
706 // the xml straight from GFAC
707 $status_types = array('SUBMITTED',
708 'SUBMITED',
709 'INITIALIZED',
710 'PENDING',
711 'RUNNING',
712 'ACTIVE',
713 'STARTED',
714 'COMPLETED',
715 'FINISHED',
716 'DONE',
717 'DATA',
718 'RESULTS_GEN',
719 'CANCELED',
720 'CANCELLED',
721 'FAILED',
722 'STAGING',
723 'UNKNOWN');
724 if ( ! in_array( $gfac_status, $status_types ) )
725 mail_to_admin( 'debug', "gfacID: /$gfacID/\n" .
726 "XML: /$xml/\n" .
727 "Status: /$gfac_status/\n" );
728
729 if ( in_array( $gfac_status, array( 'DONE', 'DATA', 'RESULTS_GEN' ) ) )
730 $gfac_status = 'DATA';
731
732 return $gfac_status;
733}
734
735// Function to request data outputs from GFAC
736function get_gfac_outputs( $gfacID )
737{
738 global $serviceURL;
739 global $self;
740
741 // Make sure it's a GFAC job and status is appropriate for this call
742 if ( ( $job_status = get_gfac_status( $gfacID ) ) === false )
743 {
744 // Then it's not a GFAC job
745 return false;
746 }
747
748 if ( ! in_array( $job_status, array( 'DONE', 'FAILED', 'COMPLETE', 'FINISHED' ) ) )
749 {
750 // Then it's not appropriate to request data
751 return false;
752 }
753
754 $url = "$serviceURL/registeroutput/$gfacID";
755 try
756 {
757 $post = new HttpRequest( $url, HttpRequest::METH_GET );
758 $http = $post->send();
759 $xml = $post->getResponseBody();
760 }
761 catch ( HttpException $e )
762 {
763 write_log( "$self: Data not available - request failed - $gfacID" );
764 return false;
765 }
766
767 mail_to_admin( "debug", "get_gfac_outputs/\n$xml/" ); // Temporary, to see what the xml looks like,
768 // if we ever get one
769
770 // Parse the result
771 $gfac_status = parse_response( $xml );
772
773 return $gfac_status;
774}
775
776function parse_response( $xml )
777{
778 global $gfac_message;
779
780 $status = "";
781 $gfac_message = "";
782
783 $parser = new XMLReader();
784 $parser->xml( $xml );
785
786 while( $parser->read() )
787 {
788 $type = $parser->nodeType;
789
790 if ( $type == XMLReader::ELEMENT )
791 $name = $parser->name;
792
793 else if ( $type == XMLReader::TEXT )
794 {
795 if ( $name == "status" )
796 $status = $parser->value;
797 else
798 $gfac_message = $parser->value;
799 }
800 }
801
802 $parser->close();
803 return $status;
804}
805
806// Function to get status from local cluster
807function get_local_status( $gfacID )
808{
809 global $cluster;
810 global $self;
811
812 $system = "$cluster.uthscsa.edu";
813 $system = preg_replace( "/\-local/", "", $system );
814 $cmd = "/usr/bin/ssh -x us3@$system qstat -a $gfacID 2>&1";
815
816 $result = exec( $cmd );
817
818 if ( $result == "" || preg_match( "/^qstat: Unknown/", $result ) )
819 {
820 write_log( "$self get_local_status: Local job $gfacID unknown" );
821 return 'UNKNOWN';
822 }
823
824 $values = preg_split( "/\s+/", $result );
825// write_log( "$self: get_local_status: job status = /{$values[9]}/");
826 switch ( $values[ 9 ] )
827 {
828 case "W" : // Waiting for execution time to be reached
829 case "E" : // Job is exiting after having run
830 case "R" : // Still running
831 $status = 'ACTIVE';
832 break;
833
834 case "C" : // Job has completed
835 $status = 'COMPLETED';
836 break;
837
838 case "T" : // Job is being moved
839 case "H" : // Held
840 case "Q" : // Queued
841 $status = 'SUBMITTED';
842 break;
843
844 default :
845 $status = 'UNKNOWN'; // This should not occur
846 break;
847 }
848
849 return $status;
850}
851
852function update_queue_messages( $message )
853{
854 global $self;
855 global $gLink;
856 global $gfacID;
857
858 // Get analysis table ID
859 $query = "SELECT id FROM analysis " .
860 "WHERE gfacID = '$gfacID' ";
861 $result = mysql_query( $query, $gLink );
862 if ( ! $result )
863 {
864 write_log( "$self: Query failed $query - " . mysql_error( $gLink ) );
865 return;
866 }
867 list( $analysisID ) = mysql_fetch_array( $result );
868
869 // Insert message into queue_message table
870 $query = "INSERT INTO queue_messages SET " .
871 "message = '" . mysql_real_escape_string( $message, $gLink ) . "', " .
872 "analysisID = '$analysisID' ";
873 $result = mysql_query( $query, $gLink );
874 if ( ! $result )
875 {
876 write_log( "$self: Query failed $query - " . mysql_error( $gLink ) );
877 return;
878 }
879}
880
881function update_db( $message )
882{
883 global $self;
884 global $gfacID;
885 global $dbhost;
886 global $user;
887 global $passwd;
888 global $us3_db;
889
890 $us3_link = mysql_connect( $dbhost, $user, $passwd );
891
892 if ( ! $us3_link )
893 {
894 write_log( "$self: could not connect: $dbhost, $user, $passwd" );
895 mail_to_admin( "fail", "Could not connect to $dbhost" );
896 return 0;
897 }
898
899
900 $result = mysql_select_db( $us3_db, $us3_link );
901
902 if ( ! $result )
903 {
904 write_log( "$self: could not select DB $us3_db" );
905 mail_to_admin( "fail", "Could not select DB $us3_db, $dbhost, $user, $passwd" );
906 return 0;
907 }
908
909 $query = "UPDATE HPCAnalysisResult SET " .
910 "lastMessage='" . mysql_real_escape_string( $message, $us3_link ) . "'" .
911 "WHERE gfacID = '$gfacID' ";
912
913 mysql_query( $query, $us3_link );
914 mysql_close( $us3_link );
915}
916
917function mail_to_admin( $type, $msg )
918{
919 global $updateTime;
920 global $status;
921 global $cluster;
922 global $org_name;
923 global $admin_email;
924 global $dbhost;
925 global $requestID;
926
927 $headers = "From: $org_name Admin<$admin_email>" . "\n";
928 $headers .= "Cc: $org_name Admin<$admin_email>" . "\n";
929 $headers .= "Bcc: Gary Gorbet<gegorbet@gmail.com>" . "\n"; // make sure
930
931 // Set the reply address
932 $headers .= "Reply-To: $org_name<$admin_email>" . "\n";
933 $headers .= "Return-Path: $org_name<$admin_email>" . "\n";
934
935 // Try to avoid spam filters
936 $now = time();
937 $headers .= "Message-ID: <" . $now . "gridctl@$dbhost>$requestID\n";
938 $headers .= "X-Mailer: PHP v" . phpversion() . "\n";
939 $headers .= "MIME-Version: 1.0" . "\n";
940 $headers .= "Content-Transfer-Encoding: 8bit" . "\n";
941
942 $subject = "US3 Error Notification";
943 $message = "
944 UltraScan job error notification from gridctl.php:
945
946 Update Time : $updateTime
947 GFAC Status : $status
948 Cluster : $cluster
949 ";
950
951 $message .= "Error Message : $msg\n";
952
953 mail( $admin_email, $subject, $message, $headers );
954}
955
956// Convert a status string to one of the standard DB status strings
957function standard_status( $status_in )
958{
959 switch ( $status_in )
960 { // Map variations to standard gateway status values
961 case 'QUEUED' :
962 case 'LAUNCHED' :
963 case 'CREATED' :
964 case 'VALIDATED' :
965 case 'SCHEDULED' :
966 case 'submitted' :
967 case '' :
968 $status = 'SUBMITTED';
969 break;
970
971 case 'EXECUTING' :
972 case 'ACTIVE' :
973 case 'running' :
974 case 'executing' :
975 $status = 'RUNNING';
976 break;
977
978 case 'PENDING' :
979 case 'CANCELING' :
980 $status = 'UPDATING';
981 break;
982
983 case 'CANCELLED' :
984 case 'canceled' :
985 $status = 'CANCELED';
986 break;
987
988 case 'COMPLETED' :
989 case 'completed' :
990 $status = 'COMPLETE';
991 break;
992
993 case 'FAILED_DATA' :
994 case 'SUBMIT_TIMEOUT' :
995 case 'RUN_TIMEOUT' :
996 case 'DATA_TIMEOUT' :
997 $status = 'FAILED';
998 break;
999
1000 case 'COMPLETE' :
1001 $status = 'DONE';
1002 break;
1003
1004 case 'UNKNOWN' :
1005 $status = 'ERROR';
1006 break;
1007
1008 // Where already standard value, retain value
1009 case 'ERROR' :
1010 case 'RUNNING' :
1011 case 'SUBMITTED' :
1012 case 'UPDATING' :
1013 case 'CANCELED' :
1014 case 'DATA' :
1015 case 'FAILED' :
1016 case 'DONE' :
1017 case 'FINISHED' :
1018 default :
1019 $status = $status_in;
1020 break;
1021 }
1022
1023 return $status;
1024}
1025
1026function aira_status( $gfacID, $status_in )
1027{
1028 global $self;
1029 global $loghdr;
1030 global $class_dir;
1031//echo "a_st: st_in$status_in : $gfacID\n";
1032 //$status_gw = standard_status( $status_in );
1033 $status_gw = $status_in;
1034//echo "a_st: st_db=$status_gw\n";
1035 $status = $status_gw;
1036 $me_devel = preg_match( "/class_devel/", $class_dir );
1037 $job_devel = preg_match( "/US3-ADEV/i", $gfacID );
1038 $devmatch = ( ( !$me_devel && !$job_devel ) ||
1039 ( $me_devel && $job_devel ) );
1040
1041//write_log( "$loghdr gfacID=$gfacID devmatch=$devmatch" );
1042//write_log( "$loghdr me_d=$me_devel jo_d=$job_devel cd=$class_dir" );
1043 if ( preg_match( "/US3-A/i", $gfacID ) && $devmatch )
1044 { // Airavata job and development/production type is right
1045 $status_ex = getExperimentStatus( $gfacID );
1046//write_log( "$loghdr status_ex $status_ex" );
1047
1048 if ( $status_ex == 'COMPLETED' )
1049 { // Experiment is COMPLETED: check for 'FINISHED' or 'DONE'
1050 if ( $status_gw == 'FINISHED' || $status_gw == 'DONE' )
1051 { // COMPLETED + FINISHED/DONE : gateway status is now COMPLETE
1052 $status = 'COMPLETE';
1053 }
1054
1055 else
1056 { // COMPLETED + NOT-FINISHED/DONE: gw status now DONE
1057 $status = 'DONE';
1058 }
1059 }
1060
1061 else if ( $status_gw == 'FINISHED' || $status_gw == 'DONE' )
1062 { // Gfac status == FINISHED/DONE: leave as is (unless FAILED)
1063 $status = $status_gw;
1064 if ( $status_ex == 'FAILED' )
1065 {
1066 sleep( 10 );
1067 $status_ex = getExperimentStatus( $gfacID );
1068 if ( $status_ex == 'FAILED' )
1069 {
1070 write_log( "$loghdr status still 'FAILED' after 10-second delay" );
1071 sleep( 10 );
1072 $status_ex = getExperimentStatus( $gfacID );
1073 if ( $status_ex == 'FAILED' )
1074 write_log( "$loghdr status still 'FAILED' after 20-second delay" );
1075 else
1076 write_log( "$loghdr status is $status_ex after 20-second delayed retry" );
1077 }
1078 write_log( "$loghdr status reset to 'COMPLETE'" );
1079 $status = 'COMPLETE';
1080 }
1081 }
1082
1083 else
1084 { // Experiment not COMPLETED/FINISHED/DONE: use experiment status
1085 $status = standard_status( $status_ex );
1086 }
1087
1088if(preg_match("/US3-A/i",$gfacID))
1089//if(preg_match("/US3-ADEV/i",$gfacID))
1090write_log( "$loghdr status/_in/_gw/_ex=$status/$status_in/$status_gw/$status_ex" );
1091//write_log( "$loghdr status/_in/_gw/_ex=$status/$status_in/$status_gw/$status_ex" );
1092//write_log( " me_d=$me_devel jo_d=$job_devel dm=$devmatch cd=$class_dir" );
1093 if ( $status != $status_gw )
1094 {
1095 update_job_status( $status, $gfacID );
1096 }
1097 }
1098
1099 return $status;
1100}
1101
1102?>
Note: See TracBrowser for help on using the repository browser.