source: trunk/gridctl.php@ 25

Last change on this file since 25 was 25, checked in by gegorbet, 8 years ago

merge us3iab/airavata gridctl code

File size: 29.5 KB
Line 
1<?php
2
3$us3bin = exec( "ls -d ~us3/lims/bin" );
4include_once "$us3bin/listen-config.php";
5//include "$us3bin/cleanup_aira.php";
6//include "$us3bin/cleanup_gfac.php";
7
8// Global variables
9$gfac_message = "";
10$updateTime = 0;
11$submittime = 0;
12$cluster = '';
13
14//global $self;
15global $status_ex, $status_gw;
16
17// Produce some output temporarily, so cron will send me message
18$now = time();
19echo "Time started: " . date( 'Y-m-d H:i:s', $now ) . "\n";
20
21// Get data from global GFAC DB
22$gLink = mysql_connect( $dbhost, $guser, $gpasswd );
23
24if ( ! mysql_select_db( $gDB, $gLink ) )
25{
26 write_log( "$self: Could not select DB $gDB - " . mysql_error() );
27 mail_to_admin( "fail", "Internal Error: Could not select DB $gDB" );
28 exit();
29}
30
31$query = "SELECT gfacID, us3_db, cluster, status, queue_msg, " .
32 "UNIX_TIMESTAMP(time), time from analysis";
33$result = mysql_query( $query, $gLink );
34
35if ( ! $result )
36{
37 write_log( "$self: Query failed $query - " . mysql_error( $gLink ) );
38 mail_to_admin( "fail", "Query failed $query\n" . mysql_error( $gLink ) );
39 exit();
40}
41
42if ( mysql_num_rows( $result ) == 0 )
43{
44//write_log( "$self: analysis read got numrows==0" );
45 exit(); // Nothing to do
46}
47
48$me_devel = preg_match( "/class_devel/", $class_dir );
49
50while ( list( $gfacID, $us3_db, $cluster, $status, $queue_msg, $time, $updateTime )
51 = mysql_fetch_array( $result ) )
52{
53 // If this entry does not match class/class_devel, skip processing
54
55 if ( preg_match( "/US3-A/i", $gfacID ) )
56 { // For thrift, job and gridctl must match
57 $job_devel = preg_match( "/US3-ADEV/i", $gfacID );
58 if ( ( $me_devel && !$job_devel ) ||
59 ( !$me_devel && $job_devel ) )
60 { // Job type and Airavata server mismatch: skip processing
61 continue;
62 }
63 }
64
65 // Checking we need to do for each entry
66echo "us3db=$us3_db gfid=$gfacID\n";
67 switch ( $us3_db )
68 {
69 case 'Xuslims3_cauma3' :
70 case 'Xuslims3_cauma3d' :
71 case 'Xuslims3_HHU' :
72 case 'Xuslims3_Uni_KN' :
73 $serviceURL = "http://gridfarm005.ucs.indiana.edu:9090/ogce-rest/job";
74 break;
75
76 default :
77// $serviceURL = "http://gridfarm005.ucs.indiana.edu:8080/ogce-rest/job";
78 break;
79 }
80
81// $awork = array();
82// $awork = explode( "-", $gfacID );
83// $gfacLabl = $awork[0] . "-" . $awork[1] . "-" . $awork[2];
84 $gfacLabl = $gfacID;
85 $loghdr = $self . ":" . $gfacLabl . "...:";
86 $status_ex = $status;
87
88 // If entry is for Airvata/Thrift, get the true current status
89
90 if ( is_aira_job( $gfacID ) )
91 {
92 $status_in = $status;
93//write_log( "$loghdr status_in=$status_in" );
94 $status = aira_status( $gfacID, $status_in );
95if($status != $status_in )
96write_log( "$loghdr Set to $status from $status_in" );
97 }
98 else if ( is_gfac_job( $gfacID ) )
99 {
100 $status_gw = $status;
101 $status = get_gfac_status( $gfacID );
102 //if ( $status == 'FINISHED' )
103 if ( $status_gw == 'COMPLETE' )
104 $status = $status_gw;
105//write_log( "$loghdr non-AThrift status=$status status_gw=$status_gw" );
106 }
107 else
108 {
109 $status_gw = $status;
110 $status = get_local_status( $gfacID );
111 if ( $status_gw == 'COMPLETE' || $status == 'UNKNOWN' )
112 $status = $status_gw;
113//write_log( "$loghdr Local status=$status status_gw=$status_gw" );
114 }
115
116 // Sometimes during testing, the us3_db entry is not set
117 // If $status == 'ERROR' then the condition has been processed before
118 if ( strlen( $us3_db ) == 0 && $status != 'ERROR' )
119 {
120 write_log( "$loghdr GFAC DB is NULL - $gfacID" );
121 mail_to_admin( "fail", "GFAC DB is NULL\n$gfacID" );
122
123 $query2 = "UPDATE analysis SET status='ERROR' WHERE gfacID='$gfacID'";
124 $result2 = mysql_query( $query2, $gLink );
125 $status = 'ERROR';
126
127 if ( ! $result2 )
128 write_log( "$loghdr Query failed $query2 - " . mysql_error( $gLink ) );
129
130 }
131
132//echo " st=$status\n";
133 switch ( $status )
134 {
135 // Already been handled
136 // Later update this condition to search for gfacID?
137 case "ERROR":
138 cleanup();
139 break;
140
141 case "SUBMITTED":
142 submitted( $time );
143 break;
144
145 case "SUBMIT_TIMEOUT":
146 submit_timeout( $time );
147 break;
148
149 case "RUNNING":
150 case "STARTED":
151 case "STAGING":
152 case "ACTIVE":
153 running( $time );
154 break;
155
156 case "RUN_TIMEOUT":
157 run_timeout($time );
158 break;
159
160 case "DATA":
161 case "RESULTS_GEN":
162 wait_data( $time );
163 break;
164
165 case "DATA_TIMEOUT":
166 data_timeout( $time );
167 break;
168
169 case "COMPLETED":
170 case "COMPLETE":
171write_log( "$loghdr COMPLETE gfacID=$gfacID" );
172 complete();
173 break;
174
175 case "CANCELLED":
176 case "CANCELED":
177 case "FAILED":
178 failed();
179 break;
180
181 case "FINISHED":
182 case "DONE":
183 if ( is_aira_job( $gfacID ) )
184 {
185 complete();
186 }
187write_log( "$loghdr FINISHED gfacID=$gfacID" );
188 case "PROCESSING":
189 default:
190 break;
191 }
192}
193
194exit();
195
196function submitted( $updatetime )
197{
198 global $self;
199 global $gLink;
200 global $gfacID;
201 global $loghdr;
202
203 $now = time();
204
205 if ( $updatetime + 600 > $now ) return; // < 10 minutes ago
206
207 if ( $updatetime + 86400 > $now ) // Within the first 24 hours
208 {
209 if ( ( $job_status = get_gfac_status( $gfacID ) ) === false )
210 $job_status = get_local_status( $gfacID );
211
212 if ( $job_status == 'GFAC_STATUS_UNAVAILABLE' )
213 return;
214
215 if ( ! in_array( $job_status, array( 'SUBMITTED', 'INITIALIZED', 'PENDING' ) ) )
216 {
217write_log( "$loghdr submitted:job_status=$job_status" );
218 update_job_status( $job_status, $gfacID );
219 }
220
221 return;
222 }
223
224 $message = "Job listed submitted longer than 24 hours";
225 write_log( "$self: $message - id: $gfacID" );
226 mail_to_admin( "hang", "$message - id: $gfacID" );
227 $query = "UPDATE analysis SET status='SUBMIT_TIMEOUT' WHERE gfacID='$gfacID'";
228 $result = mysql_query( $query, $gLink );
229
230 if ( ! $result )
231 write_log( "$self: Query failed $query - " . mysql_error( $gLink ) );
232
233 update_queue_messages( $message );
234 update_db( $message );
235}
236
237function submit_timeout( $updatetime )
238{
239 global $self;
240 global $gLink;
241 global $gfacID;
242 global $loghdr;
243
244 if ( ( $job_status = get_gfac_status( $gfacID ) ) === false )
245 $job_status = get_local_status( $gfacID );
246
247 if ( $job_status == 'GFAC_STATUS_UNAVAILABLE' )
248 return;
249
250 if ( ! in_array( $job_status, array( 'SUBMITTED', 'INITIALIZED', 'PENDING' ) ) )
251 {
252 update_job_status( $job_status, $gfacID );
253 return;
254 }
255
256 $now = time();
257
258 if ( $updatetime + 86400 > $now ) return; // < 24 hours ago ( 48 total submitted )
259
260 $message = "Job listed submitted longer than 48 hours";
261 write_log( "$self: $message - id: $gfacID" );
262 mail_to_admin( "hang", "$message - id: $gfacID" );
263 $query = "UPDATE analysis SET status='FAILED' WHERE gfacID='$gfacID'";
264 $result = mysql_query( $query, $gLink );
265
266 if ( ! $result )
267 write_log( "$self: Query failed $query - " . mysql_error( $gLink ) );
268
269 update_queue_messages( $message );
270 update_db( $message );
271}
272
273function running( $updatetime )
274{
275 global $self;
276 global $gLink;
277 global $gfacID;
278 global $loghdr;
279
280 $now = time();
281
282 get_us3_data();
283
284 if ( $updatetime + 600 > $now ) return; // message received < 10 minutes ago
285
286 if ( $updatetime + 86400 > $now ) // Within the first 24 hours
287 {
288 if ( ( $job_status = get_gfac_status( $gfacID ) ) === false )
289 $job_status = get_local_status( $gfacID );
290
291 if ( $job_status == 'GFAC_STATUS_UNAVAILABLE' )
292 return;
293
294 if ( ! in_array( $job_status, array( 'ACTIVE', 'RUNNING', 'STARTED' ) ) )
295 update_job_status( $job_status, $gfacID );
296
297 return;
298 }
299
300 $message = "Job listed running longer than 24 hours";
301 write_log( "$self: $message - id: $gfacID" );
302 mail_to_admin( "hang", "$message - id: $gfacID" );
303 $query = "UPDATE analysis SET status='RUN_TIMEOUT' WHERE gfacID='$gfacID'";
304 $result = mysql_query( $query, $gLink );
305
306 if ( ! $result )
307 write_log( "$self: Query failed $query - " . mysql_error( $gLink ) );
308
309 update_queue_messages( $message );
310 update_db( $message );
311}
312
313function run_timeout( $updatetime )
314{
315 global $self;
316 global $gLink;
317 global $gfacID;
318 global $loghdr;
319
320 if ( ( $job_status = get_gfac_status( $gfacID ) ) === false )
321 $job_status = get_local_status( $gfacID );
322
323 if ( $job_status == 'GFAC_STATUS_UNAVAILABLE' )
324 return;
325
326 if ( ! in_array( $job_status, array( 'ACTIVE', 'RUNNING', 'STARTED' ) ) )
327 {
328 update_job_status( $job_status, $gfacID );
329 return;
330 }
331
332 $now = time();
333
334 get_us3_data();
335
336 if ( $updatetime + 172800 > $now ) return; // < 48 hours ago
337
338 $message = "Job listed running longer than 48 hours";
339 write_log( "$self: $message - id: $gfacID" );
340 mail_to_admin( "hang", "$message - id: $gfacID" );
341 $query = "UPDATE analysis SET status='FAILED' WHERE gfacID='$gfacID'";
342 $result = mysql_query( $query, $gLink );
343
344 if ( ! $result )
345 write_log( "$self: Query failed $query - " . mysql_error( $gLink ) );
346
347 update_queue_messages( $message );
348 update_db( $message );
349}
350
351function wait_data( $updatetime )
352{
353 global $self;
354 global $gLink;
355 global $gfacID;
356 global $loghdr;
357
358 $now = time();
359
360 if ( $updatetime + 3600 > $now ) // < Within the first hour
361 {
362 if ( ( $job_status = get_gfac_status( $gfacID ) ) === false )
363 $job_status = get_local_status( $gfacID );
364
365 if ( $job_status == 'GFAC_STATUS_UNAVAILABLE' )
366 return;
367
368 if ( $job_status != 'DATA' )
369 {
370 update_job_status( $job_status, $gfacID );
371 return;
372 }
373
374 // Request to resend data, but only request every 5 minutes
375 $minute = date( 'i' ) * 1; // Makes it an int
376 if ( $minute % 5 ) return;
377
378 $output_status = get_gfac_outputs( $gfacID );
379
380 if ( $output_status !== false )
381 mail_to_admin( "debug", "wait_data/$gfacID/$output_status" );
382
383 return;
384 }
385
386 $message = "Waiting for data longer than 1 hour";
387 write_log( "$self: $message - id: $gfacID" );
388 mail_to_admin( "hang", "$message - id: $gfacID" );
389 $query = "UPDATE analysis SET status='DATA_TIMEOUT' WHERE gfacID='$gfacID'";
390 $result = mysql_query( $query, $gLink );
391
392 if ( ! $result )
393 write_log( "$self: Query failed $query - " . mysql_error( $gLink ) );
394
395 update_queue_messages( $message );
396 update_db( $message );
397}
398
399function data_timeout( $updatetime )
400{
401 global $self;
402 global $gLink;
403 global $gfacID;
404 global $loghdr;
405
406 if ( ( $job_status = get_gfac_status( $gfacID ) ) === false )
407 $job_status = get_local_status( $gfacID );
408
409 if ( $job_status == 'GFAC_STATUS_UNAVAILABLE' )
410 return;
411
412 if ( $job_status != 'DATA' )
413 {
414 update_job_status( $job_status, $gfacID );
415 return;
416 }
417
418 $now = time();
419
420 if ( $updatetime + 86400 > $now ) // < 24 hours ago
421 {
422 // Request to resend data, but only request every 15 minutes
423 $minute = date( 'i' ) * 1; // Makes it an int
424 if ( $minute % 15 ) return;
425
426 $output_status = get_gfac_outputs( $gfacID );
427
428 if ( $output_status !== false )
429 mail_to_admin( "debug", "data_timeout/$gfacID/$output_status" );
430
431 return;
432 }
433
434 $message = "Waiting for data longer than 24 hours";
435 write_log( "$self: $message - id: $gfacID" );
436 mail_to_admin( "hang", "$message - id: $gfacID" );
437 $query = "UPDATE analysis SET status='FAILED' WHERE gfacID='$gfacID'";
438 $result = mysql_query( $query, $gLink );
439
440 if ( ! $result )
441 write_log( "$self: Query failed $query - " . mysql_error( $gLink ) );
442
443 update_queue_messages( $message );
444 update_db( $message );
445}
446
447function complete()
448{
449 // Just cleanup
450 cleanup();
451}
452
453function failed()
454{
455 // Just cleanup
456 cleanup();
457}
458
459function cleanup()
460{
461 global $self;
462 global $gLink;
463 global $gfacID;
464 global $us3_db;
465 global $loghdr;
466
467 // Double check that the gfacID exists
468 $query = "SELECT count(*) FROM analysis WHERE gfacID='$gfacID'";
469 $result = mysql_query( $query, $gLink );
470
471 if ( ! $result )
472 {
473 write_log( "$self: Query failed $query - " . mysql_error( $gLink ) );
474 mail_to_admin( "fail", "Query failed $query\n" . mysql_error( $gLink ) );
475 return;
476 }
477
478 list( $count ) = mysql_fetch_array( $result );
479
480if ($count==0)
481write_log( "$loghdr count = $count gfacID = $gfacID" );
482 if ( $count == 0 ) return;
483
484 // Now check the us3 instance
485 $requestID = get_us3_data();
486//write_log( "$loghdr requestID = $requestID gfacID = $gfacID" );
487 if ( $requestID == 0 ) return;
488
489 $me_devel = preg_match( "/class_devel/", $class_dir );
490 $me_local = preg_match( "/class_local/", $class_dir );
491
492 if ( preg_match( "/US3-A/i", $gfacID ) )
493 {
494 $job_devel = preg_match( "/US3-ADEV/i", $gfacID );
495 if ( ( !$me_devel && !$job_devel ) ||
496 ( $me_devel && $job_devel ) )
497 { // Job is of same type (prod/devel) as Server: process it
498//write_log( "$loghdr CALLING aira_cleanup()" );
499 aira_cleanup( $us3_db, $requestID, $gLink );
500 }
501//write_log( "$loghdr RTN FR aira_cleanup()" );
502 }
503 else if ( ! $me_local )
504 {
505write_log( "$loghdr CALLING gfac_cleanup() reqID=$requestID" );
506 gfac_cleanup( $us3_db, $requestID, $gLink );
507 }
508}
509
510// Function to update status of job
511function update_job_status( $job_status, $gfacID )
512{
513 global $gLink;
514 global $query;
515 global $self;
516 global $loghdr;
517
518 switch ( $job_status )
519 {
520 case 'SUBMITTED' :
521 case 'SUBMITED' :
522 case 'INITIALIZED' :
523 case 'UPDATING' :
524 case 'PENDING' :
525 $query = "UPDATE analysis SET status='SUBMITTED' WHERE gfacID='$gfacID'";
526 $message = "Job status request reports job is SUBMITTED";
527 break;
528
529 case 'STARTED' :
530 case 'RUNNING' :
531 case 'ACTIVE' :
532 $query = "UPDATE analysis SET status='RUNNING' WHERE gfacID='$gfacID'";
533 $message = "Job status request reports job is RUNNING";
534 break;
535
536 case 'FINISHED' :
537 $query = "UPDATE analysis SET status='FINISHED' WHERE gfacID='$gfacID'";
538 $message = "NONE";
539 break;
540
541 case 'DONE' :
542 $query = "UPDATE analysis SET status='DONE' WHERE gfacID='$gfacID'";
543 $message = "NONE";
544 break;
545
546 case 'COMPLETED' :
547 case 'COMPLETE' :
548 $query = "UPDATE analysis SET status='COMPLETE' WHERE gfacID='$gfacID'";
549 $message = "Job status request reports job is COMPLETED";
550 break;
551
552 case 'DATA' :
553 $query = "UPDATE analysis SET status='DATA' WHERE gfacID='$gfacID'";
554 $message = "Job status request reports job is COMPLETE, waiting for data";
555 break;
556
557 case 'CANCELED' :
558 case 'CANCELLED' :
559 $query = "UPDATE analysis SET status='CANCELED' WHERE gfacID='$gfacID'";
560 $message = "Job status request reports job is CANCELED";
561 break;
562
563 case 'FAILED' :
564 $query = "UPDATE analysis SET status='FAILED' WHERE gfacID='$gfacID'";
565 $message = "Job status request reports job is FAILED";
566 break;
567
568 case 'UNKNOWN' :
569write_log( "$loghdr job_status='UNKNOWN', reset to 'ERROR' " );
570 $query = "UPDATE analysis SET status='ERROR' WHERE gfacID='$gfacID'";
571 $message = "Job status request reports job is not in the queue";
572 break;
573
574 default :
575 // We shouldn't ever get here
576 $query = "UPDATE analysis SET status='ERROR' WHERE gfacID='$gfacID'";
577 $message = "Job status was not recognized - $job_status";
578 write_log( "$loghdr update_job_status: " .
579 "Job status was not recognized - $job_status\n" .
580 "gfacID = $gfacID\n" );
581 break;
582
583 }
584
585 $result = mysql_query( $query, $gLink );
586 if ( ! $result )
587 write_log( "$loghdr Query failed $query - " . mysql_error( $gLink ) );
588
589 if ( $message != 'NONE' )
590 {
591 update_queue_messages( $message );
592 update_db( $message );
593 }
594}
595
596function get_us3_data()
597{
598 global $self;
599 global $gfacID;
600 global $dbhost;
601 global $user;
602 global $passwd;
603 global $us3_db;
604 global $updateTime;
605 global $loghdr;
606
607 $us3_link = mysql_connect( $dbhost, $user, $passwd );
608
609 if ( ! $us3_link )
610 {
611 write_log( "$loghdr could not connect: $dbhost, $user, $passwd" );
612 mail_to_admin( "fail", "Could not connect to $dbhost" );
613 return 0;
614 }
615
616
617 $result = mysql_select_db( $us3_db, $us3_link );
618
619 if ( ! $result )
620 {
621 write_log( "$loghdr could not select DB $us3_db" );
622 mail_to_admin( "fail", "Could not select DB $us3_db, $dbhost, $user, $passwd" );
623 return 0;
624 }
625
626 $query = "SELECT HPCAnalysisRequestID, UNIX_TIMESTAMP(updateTime) " .
627 "FROM HPCAnalysisResult WHERE gfacID='$gfacID'";
628 $result = mysql_query( $query, $us3_link );
629
630 if ( ! $result )
631 {
632 write_log( "$self: Query failed $query - " . mysql_error( $us3_link ) );
633 mail_to_admin( "fail", "Query failed $query\n" . mysql_error( $us3_link ) );
634 return 0;
635 }
636
637 list( $requestID, $updateTime ) = mysql_fetch_array( $result );
638 mysql_close( $us3_link );
639
640 return $requestID;
641}
642
643// Function to determine if this is a gfac job or not
644function is_gfac_job( $gfacID )
645{
646 $hex = "[0-9a-fA-F]";
647 if ( ! preg_match( "/^US3-Experiment/i", $gfacID ) &&
648 ! preg_match( "/^US3-$hex{8}-$hex{4}-$hex{4}-$hex{4}-$hex{12}$/", $gfacID ) )
649 {
650 // Then it's not a GFAC job
651 return false;
652 }
653
654 return true;
655}
656
657// Function to determine if this is an airavata/thrift job or not
658function is_aira_job( $gfacID )
659{
660 global $cluster;
661
662 if ( preg_match( "/US3-A/i", $gfacID ) )
663 {
664 // Then it's an Airavata/Thrift job
665 return true;
666 }
667
668 return false;
669}
670
671// Function to get the current job status from GFAC
672function get_gfac_status( $gfacID )
673{
674 global $serviceURL;
675 global $self;
676 global $loghdr;
677 global $cluster;
678 global $status_ex, $status_gw;
679
680 if ( is_aira_job( $gfacID ) )
681 {
682 $status_ex = getExperimentStatus( $gfacID );
683
684 if ( $status_ex == 'EXECUTING' )
685 {
686 if ( $status_gw == 'RUNNING' )
687 $status_ex = 'ACTIVE';
688 else
689 $status_ex = 'QUEUED';
690 }
691
692 $gfac_status = standard_status( $status_ex );
693 return $gfac_status;
694 }
695
696 else if ( ! is_gfac_job( $gfacID ) )
697 {
698 return false;
699 }
700
701 $url = "$serviceURL/jobstatus/$gfacID";
702 try
703 {
704 $post = new HttpRequest( $url, HttpRequest::METH_GET );
705 $http = $post->send();
706 $xml = $post->getResponseBody();
707 }
708 catch ( HttpException $e )
709 {
710 write_log( "$loghdr Status not available - marking failed - $gfacID" );
711 return 'GFAC_STATUS_UNAVAILABLE';
712 }
713
714 // Parse the result
715 $gfac_status = parse_response( $xml );
716
717 // This may not seem like the best place to do this, but here we have
718 // the xml straight from GFAC
719 $status_types = array('SUBMITTED',
720 'SUBMITED',
721 'INITIALIZED',
722 'PENDING',
723 'RUNNING',
724 'ACTIVE',
725 'STARTED',
726 'COMPLETED',
727 'FINISHED',
728 'DONE',
729 'DATA',
730 'RESULTS_GEN',
731 'CANCELED',
732 'CANCELLED',
733 'FAILED',
734 'STAGING',
735 'UNKNOWN');
736 if ( ! in_array( $gfac_status, $status_types ) )
737 mail_to_admin( 'debug', "gfacID: /$gfacID/\n" .
738 "XML: /$xml/\n" .
739 "Status: /$gfac_status/\n" );
740
741 if ( in_array( $gfac_status, array( 'DONE', 'DATA', 'RESULTS_GEN' ) ) )
742 $gfac_status = 'DATA';
743
744 return $gfac_status;
745}
746
747// Function to request data outputs from GFAC
748function get_gfac_outputs( $gfacID )
749{
750 global $serviceURL;
751 global $self;
752
753 // Make sure it's a GFAC job and status is appropriate for this call
754 if ( ( $job_status = get_gfac_status( $gfacID ) ) === false )
755 {
756 // Then it's not a GFAC job
757 $job_status = get_local_status( $gfacID );
758 return $job_status;
759 }
760
761 if ( ! in_array( $job_status, array( 'DONE', 'FAILED', 'COMPLETE', 'FINISHED' ) ) )
762 {
763 // Then it's not appropriate to request data
764 return false;
765 }
766
767 $url = "$serviceURL/registeroutput/$gfacID";
768 try
769 {
770 $post = new HttpRequest( $url, HttpRequest::METH_GET );
771 $http = $post->send();
772 $xml = $post->getResponseBody();
773 }
774 catch ( HttpException $e )
775 {
776 write_log( "$self: Data not available - request failed - $gfacID" );
777 return false;
778 }
779
780 mail_to_admin( "debug", "get_gfac_outputs/\n$xml/" ); // Temporary, to see what the xml looks like,
781 // if we ever get one
782
783 // Parse the result
784 $gfac_status = parse_response( $xml );
785
786 return $gfac_status;
787}
788
789function parse_response( $xml )
790{
791 global $gfac_message;
792
793 $status = "";
794 $gfac_message = "";
795
796 $parser = new XMLReader();
797 $parser->xml( $xml );
798
799 while( $parser->read() )
800 {
801 $type = $parser->nodeType;
802
803 if ( $type == XMLReader::ELEMENT )
804 $name = $parser->name;
805
806 else if ( $type == XMLReader::TEXT )
807 {
808 if ( $name == "status" )
809 $status = $parser->value;
810 else
811 $gfac_message = $parser->value;
812 }
813 }
814
815 $parser->close();
816 return $status;
817}
818
819// Function to get status from local cluster
820function get_local_status( $gfacID )
821{
822 global $cluster;
823 global $self;
824
825 $cmd = "/usr/bin/qstat -a $gfacID 2>&1|tail -n 1";
826 if ( ! preg_match( "/us3iab/", $cluster ) )
827 {
828 $system = "$cluster.uthscsa.edu";
829 $system = preg_replace( "/\-local/", "", $system );
830 $cmd = "/usr/bin/ssh -x us3@$system " . $cmd;
831 }
832
833 $result = exec( $cmd );
834
835 if ( $result == "" || preg_match( "/^qstat: Unknown/", $result ) )
836 {
837 write_log( "$self get_local_status: Local job $gfacID unknown" );
838write_log( "$self get_local_status: result=$result" );
839 return 'UNKNOWN';
840 }
841
842 $values = preg_split( "/\s+/", $result );
843//write_log( "$self: get_local_status: job status = /{$values[9]}/");
844 switch ( $values[ 9 ] )
845 {
846 case "W" : // Waiting for execution time to be reached
847 case "E" : // Job is exiting after having run
848 case "R" : // Still running
849 $status = 'ACTIVE';
850 break;
851
852 case "C" : // Job has completed
853 $status = 'COMPLETED';
854 break;
855
856 case "T" : // Job is being moved
857 case "H" : // Held
858 case "Q" : // Queued
859 $status = 'SUBMITTED';
860 break;
861
862 default :
863 $status = 'UNKNOWN'; // This should not occur
864 break;
865 }
866
867 return $status;
868}
869
870function update_queue_messages( $message )
871{
872 global $self;
873 global $gLink;
874 global $gfacID;
875
876 // Get analysis table ID
877 $query = "SELECT id FROM analysis " .
878 "WHERE gfacID = '$gfacID' ";
879 $result = mysql_query( $query, $gLink );
880 if ( ! $result )
881 {
882 write_log( "$self: Query failed $query - " . mysql_error( $gLink ) );
883 return;
884 }
885 list( $analysisID ) = mysql_fetch_array( $result );
886
887 // Insert message into queue_message table
888 $query = "INSERT INTO queue_messages SET " .
889 "message = '" . mysql_real_escape_string( $message, $gLink ) . "', " .
890 "analysisID = '$analysisID' ";
891 $result = mysql_query( $query, $gLink );
892 if ( ! $result )
893 {
894 write_log( "$self: Query failed $query - " . mysql_error( $gLink ) );
895 return;
896 }
897}
898
899function update_db( $message )
900{
901 global $self;
902 global $gfacID;
903 global $dbhost;
904 global $user;
905 global $passwd;
906 global $us3_db;
907
908 $us3_link = mysql_connect( $dbhost, $user, $passwd );
909
910 if ( ! $us3_link )
911 {
912 write_log( "$self: could not connect: $dbhost, $user, $passwd" );
913 mail_to_admin( "fail", "Could not connect to $dbhost" );
914 return 0;
915 }
916
917
918 $result = mysql_select_db( $us3_db, $us3_link );
919
920 if ( ! $result )
921 {
922 write_log( "$self: could not select DB $us3_db" );
923 mail_to_admin( "fail", "Could not select DB $us3_db, $dbhost, $user, $passwd" );
924 return 0;
925 }
926
927 $query = "UPDATE HPCAnalysisResult SET " .
928 "lastMessage='" . mysql_real_escape_string( $message, $us3_link ) . "'" .
929 "WHERE gfacID = '$gfacID' ";
930
931 mysql_query( $query, $us3_link );
932 mysql_close( $us3_link );
933}
934
935function mail_to_admin( $type, $msg )
936{
937 global $updateTime;
938 global $status;
939 global $cluster;
940 global $org_name;
941 global $admin_email;
942 global $dbhost;
943 global $requestID;
944
945 $headers = "From: $org_name Admin<$admin_email>" . "\n";
946 $headers .= "Cc: $org_name Admin<$admin_email>" . "\n";
947 $headers .= "Bcc: Gary Gorbet<gegorbet@gmail.com>" . "\n"; // make sure
948
949 // Set the reply address
950 $headers .= "Reply-To: $org_name<$admin_email>" . "\n";
951 $headers .= "Return-Path: $org_name<$admin_email>" . "\n";
952
953 // Try to avoid spam filters
954 $now = time();
955 $headers .= "Message-ID: <" . $now . "gridctl@$dbhost>$requestID\n";
956 $headers .= "X-Mailer: PHP v" . phpversion() . "\n";
957 $headers .= "MIME-Version: 1.0" . "\n";
958 $headers .= "Content-Transfer-Encoding: 8bit" . "\n";
959
960 $subject = "US3 Error Notification";
961 $message = "
962 UltraScan job error notification from gridctl.php:
963
964 Update Time : $updateTime
965 GFAC Status : $status
966 Cluster : $cluster
967 ";
968
969 $message .= "Error Message : $msg\n";
970
971 mail( $admin_email, $subject, $message, $headers );
972}
973
974// Convert a status string to one of the standard DB status strings
975function standard_status( $status_in )
976{
977 switch ( $status_in )
978 { // Map variations to standard gateway status values
979 case 'QUEUED' :
980 case 'LAUNCHED' :
981 case 'CREATED' :
982 case 'VALIDATED' :
983 case 'SCHEDULED' :
984 case 'submitted' :
985 case '' :
986 $status = 'SUBMITTED';
987 break;
988
989 case 'EXECUTING' :
990 case 'ACTIVE' :
991 case 'running' :
992 case 'executing' :
993 $status = 'RUNNING';
994 break;
995
996 case 'PENDING' :
997 case 'CANCELING' :
998 $status = 'UPDATING';
999 break;
1000
1001 case 'CANCELLED' :
1002 case 'canceled' :
1003 $status = 'CANCELED';
1004 break;
1005
1006 $status = 'DATA';
1007 break;
1008
1009 case 'COMPLETED' :
1010 case 'completed' :
1011 $status = 'COMPLETE';
1012 break;
1013
1014 case 'FAILED_DATA' :
1015 case 'SUBMIT_TIMEOUT' :
1016 case 'RUN_TIMEOUT' :
1017 case 'DATA_TIMEOUT' :
1018 $status = 'FAILED';
1019 break;
1020
1021 case 'COMPLETE' :
1022 $status = 'DONE';
1023 break;
1024
1025 case 'UNKNOWN' :
1026 $status = 'ERROR';
1027 break;
1028
1029 // Where already standard value, retain value
1030 case 'ERROR' :
1031 case 'RUNNING' :
1032 case 'SUBMITTED' :
1033 case 'UPDATING' :
1034 case 'CANCELED' :
1035 case 'DATA' :
1036 case 'FAILED' :
1037 case 'DONE' :
1038 case 'FINISHED' :
1039 default :
1040 $status = $status_in;
1041 break;
1042 }
1043
1044 return $status;
1045}
1046
1047function aira_status( $gfacID, $status_in )
1048{
1049 global $self;
1050 global $loghdr;
1051//echo "a_st: st_in$status_in : $gfacID\n";
1052 //$status_gw = standard_status( $status_in );
1053 $status_gw = $status_in;
1054//echo "a_st: st_db=$status_gw\n";
1055 $status = $status_gw;
1056 $me_devel = preg_match( "/class_devel/", $class_dir );
1057 $job_devel = preg_match( "/US3-ADEV/i", $gfacID );
1058 $devmatch = ( ( !$me_devel && !$job_devel ) ||
1059 ( $me_devel && $job_devel ) );
1060
1061 if ( preg_match( "/US3-A/i", $gfacID ) && $devmatch )
1062 {
1063 $status_ex = getExperimentStatus( $gfacID );
1064
1065 if ( $status_ex == 'COMPLETED' )
1066 { // Experiment is COMPLETED: check for 'FINISHED' or 'DONE'
1067 if ( $status_gw == 'FINISHED' || $status_gw == 'DONE' )
1068 { // COMPLETED + FINISHED/DONE : gateway status is now COMPLETE
1069 $status = 'COMPLETE';
1070 }
1071
1072 else
1073 { // COMPLETED + NOT-FINISHED/DONE: gw status now DONE
1074 $status = 'DONE';
1075 }
1076 }
1077
1078 else if ( $status_gw == 'FINISHED' || $status_gw == 'DONE' )
1079 { // Gfac status == FINISHED/DONE: leave as is (unless FAILED)
1080 $status = $status_gw;
1081 if ( $status_ex == 'FAILED' )
1082 {
1083 sleep( 10 );
1084 $status_ex = getExperimentStatus( $gfacID );
1085 if ( $status_ex == 'FAILED' )
1086 {
1087 write_log( "$loghdr status still 'FAILED' after 10-second delay" );
1088 sleep( 10 );
1089 $status_ex = getExperimentStatus( $gfacID );
1090 if ( $status_ex == 'FAILED' )
1091 write_log( "$loghdr status still 'FAILED' after 20-second delay" );
1092 else
1093 write_log( "$loghdr status is $status_ex after 20-second delayed retry" );
1094 }
1095 write_log( "$loghdr status reset to 'COMPLETE'" );
1096 $status = 'COMPLETE';
1097 }
1098 }
1099
1100 else
1101 { // Experiment not COMPLETED/FINISHED/DONE: use experiment status
1102 $status = standard_status( $status_ex );
1103 }
1104
1105//write_log( "$loghdr status/_in/_gw/_ex=$status/$status_in/$status_gw/$status_ex" );
1106 if ( $status != $status_gw )
1107 {
1108 update_job_status( $status, $gfacID );
1109 }
1110 }
1111
1112 return $status;
1113}
1114
1115?>
Note: See TracBrowser for help on using the repository browser.