source: trunk/gridctl.php@ 26

Last change on this file since 26 was 26, checked in by gegorbet, 7 years ago

various minor fixes/mods to gridctl

File size: 29.5 KB
Line 
1<?php
2
3$us3bin = exec( "ls -d ~us3/lims/bin" );
4include_once "$us3bin/listen-config.php";
5//include "$us3bin/cleanup_aira.php";
6//include "$us3bin/cleanup_gfac.php";
7
8// Global variables
9$gfac_message = "";
10$updateTime = 0;
11$submittime = 0;
12$cluster = '';
13
14//global $self;
15global $status_ex, $status_gw;
16
17// Produce some output temporarily, so cron will send me message
18$now = time();
19echo "Time started: " . date( 'Y-m-d H:i:s', $now ) . "\n";
20
21// Get data from global GFAC DB
22$gLink = mysql_connect( $dbhost, $guser, $gpasswd );
23
24if ( ! mysql_select_db( $gDB, $gLink ) )
25{
26 write_log( "$self: Could not select DB $gDB - " . mysql_error() );
27 mail_to_admin( "fail", "Internal Error: Could not select DB $gDB" );
28 exit();
29}
30
31$query = "SELECT gfacID, us3_db, cluster, status, queue_msg, " .
32 "UNIX_TIMESTAMP(time), time from analysis";
33$result = mysql_query( $query, $gLink );
34
35if ( ! $result )
36{
37 write_log( "$self: Query failed $query - " . mysql_error( $gLink ) );
38 mail_to_admin( "fail", "Query failed $query\n" . mysql_error( $gLink ) );
39 exit();
40}
41
42if ( mysql_num_rows( $result ) == 0 )
43{
44//write_log( "$self: analysis read got numrows==0" );
45 exit(); // Nothing to do
46}
47
48$me_devel = preg_match( "/class_devel/", $class_dir );
49
50while ( list( $gfacID, $us3_db, $cluster, $status, $queue_msg, $time, $updateTime )
51 = mysql_fetch_array( $result ) )
52{
53 // If this entry does not match class/class_devel, skip processing
54
55 if ( preg_match( "/US3-A/i", $gfacID ) )
56 { // For thrift, job and gridctl must match
57 $job_devel = preg_match( "/US3-ADEV/i", $gfacID );
58 if ( ( $me_devel && !$job_devel ) ||
59 ( !$me_devel && $job_devel ) )
60 { // Job type and Airavata server mismatch: skip processing
61 continue;
62 }
63 }
64
65 // Checking we need to do for each entry
66echo "us3db=$us3_db gfid=$gfacID\n";
67 switch ( $us3_db )
68 {
69 case 'Xuslims3_cauma3' :
70 case 'Xuslims3_cauma3d' :
71 case 'Xuslims3_HHU' :
72 case 'Xuslims3_Uni_KN' :
73 $serviceURL = "http://gridfarm005.ucs.indiana.edu:9090/ogce-rest/job";
74 break;
75
76 default :
77// $serviceURL = "http://gridfarm005.ucs.indiana.edu:8080/ogce-rest/job";
78 break;
79 }
80
81// $awork = array();
82// $awork = explode( "-", $gfacID );
83// $gfacLabl = $awork[0] . "-" . $awork[1] . "-" . $awork[2];
84 $gfacLabl = $gfacID;
85 $loghdr = $self . ":" . $gfacLabl . "...:";
86 $status_ex = $status;
87
88 // If entry is for Airvata/Thrift, get the true current status
89
90 if ( is_aira_job( $gfacID ) )
91 {
92 $status_in = $status;
93//write_log( "$loghdr status_in=$status_in" );
94 $status = aira_status( $gfacID, $status_in );
95if($status != $status_in )
96write_log( "$loghdr Set to $status from $status_in" );
97 }
98 else if ( is_gfac_job( $gfacID ) )
99 {
100 $status_gw = $status;
101 $status = get_gfac_status( $gfacID );
102 //if ( $status == 'FINISHED' )
103 if ( $status_gw == 'COMPLETE' )
104 $status = $status_gw;
105//write_log( "$loghdr non-AThrift status=$status status_gw=$status_gw" );
106 }
107 else
108 {
109 $status_gw = $status;
110 $status = get_local_status( $gfacID );
111 if ( $status_gw == 'COMPLETE' || $status == 'UNKNOWN' )
112 $status = $status_gw;
113//write_log( "$loghdr Local status=$status status_gw=$status_gw" );
114 }
115
116 // Sometimes during testing, the us3_db entry is not set
117 // If $status == 'ERROR' then the condition has been processed before
118 if ( strlen( $us3_db ) == 0 && $status != 'ERROR' )
119 {
120 write_log( "$loghdr GFAC DB is NULL - $gfacID" );
121 mail_to_admin( "fail", "GFAC DB is NULL\n$gfacID" );
122
123 $query2 = "UPDATE analysis SET status='ERROR' WHERE gfacID='$gfacID'";
124 $result2 = mysql_query( $query2, $gLink );
125 $status = 'ERROR';
126
127 if ( ! $result2 )
128 write_log( "$loghdr Query failed $query2 - " . mysql_error( $gLink ) );
129
130 }
131
132//echo " st=$status\n";
133 switch ( $status )
134 {
135 // Already been handled
136 // Later update this condition to search for gfacID?
137 case "ERROR":
138 cleanup();
139 break;
140
141 case "SUBMITTED":
142 submitted( $time );
143 break;
144
145 case "SUBMIT_TIMEOUT":
146 submit_timeout( $time );
147 break;
148
149 case "RUNNING":
150 case "STARTED":
151 case "STAGING":
152 case "ACTIVE":
153 running( $time );
154 break;
155
156 case "RUN_TIMEOUT":
157 run_timeout($time );
158 break;
159
160 case "DATA":
161 case "RESULTS_GEN":
162 wait_data( $time );
163 break;
164
165 case "DATA_TIMEOUT":
166 data_timeout( $time );
167 break;
168
169 case "COMPLETED":
170 case "COMPLETE":
171write_log( "$loghdr COMPLETE gfacID=$gfacID" );
172 complete();
173 break;
174
175 case "CANCELLED":
176 case "CANCELED":
177 case "FAILED":
178 failed();
179 break;
180
181 case "FINISHED":
182 case "DONE":
183 if ( ! is_aira_job( $gfacID ) )
184 {
185 complete();
186 }
187write_log( "$loghdr FINISHED gfacID=$gfacID" );
188 case "PROCESSING":
189 default:
190 break;
191 }
192}
193
194exit();
195
196function submitted( $updatetime )
197{
198 global $self;
199 global $gLink;
200 global $gfacID;
201 global $loghdr;
202
203 $now = time();
204
205 if ( $updatetime + 600 > $now ) return; // < 10 minutes ago
206
207 if ( $updatetime + 86400 > $now ) // Within the first 24 hours
208 {
209 if ( ( $job_status = get_gfac_status( $gfacID ) ) === false )
210 $job_status = get_local_status( $gfacID );
211
212 if ( $job_status == 'GFAC_STATUS_UNAVAILABLE' )
213 return;
214
215 if ( ! in_array( $job_status, array( 'SUBMITTED', 'INITIALIZED', 'PENDING' ) ) )
216 {
217write_log( "$loghdr submitted:job_status=$job_status" );
218 update_job_status( $job_status, $gfacID );
219 }
220
221 return;
222 }
223
224 $message = "Job listed submitted longer than 24 hours";
225 write_log( "$self: $message - id: $gfacID" );
226 mail_to_admin( "hang", "$message - id: $gfacID" );
227 $query = "UPDATE analysis SET status='SUBMIT_TIMEOUT' WHERE gfacID='$gfacID'";
228 $result = mysql_query( $query, $gLink );
229
230 if ( ! $result )
231 write_log( "$self: Query failed $query - " . mysql_error( $gLink ) );
232
233 update_queue_messages( $message );
234 update_db( $message );
235}
236
237function submit_timeout( $updatetime )
238{
239 global $self;
240 global $gLink;
241 global $gfacID;
242 global $loghdr;
243
244 if ( ( $job_status = get_gfac_status( $gfacID ) ) === false )
245 $job_status = get_local_status( $gfacID );
246
247 if ( $job_status == 'GFAC_STATUS_UNAVAILABLE' )
248 return;
249
250 if ( ! in_array( $job_status, array( 'SUBMITTED', 'INITIALIZED', 'PENDING' ) ) )
251 {
252 update_job_status( $job_status, $gfacID );
253 return;
254 }
255
256 $now = time();
257
258 if ( $updatetime + 86400 > $now ) return; // < 24 hours ago ( 48 total submitted )
259
260 $message = "Job listed submitted longer than 48 hours";
261 write_log( "$self: $message - id: $gfacID" );
262 mail_to_admin( "hang", "$message - id: $gfacID" );
263 $query = "UPDATE analysis SET status='FAILED' WHERE gfacID='$gfacID'";
264 $result = mysql_query( $query, $gLink );
265
266 if ( ! $result )
267 write_log( "$self: Query failed $query - " . mysql_error( $gLink ) );
268
269 update_queue_messages( $message );
270 update_db( $message );
271}
272
273function running( $updatetime )
274{
275 global $self;
276 global $gLink;
277 global $gfacID;
278 global $loghdr;
279
280 $now = time();
281
282 get_us3_data();
283
284 if ( $updatetime + 600 > $now ) return; // message received < 10 minutes ago
285
286 if ( $updatetime + 86400 > $now ) // Within the first 24 hours
287 {
288 if ( ( $job_status = get_gfac_status( $gfacID ) ) === false )
289 $job_status = get_local_status( $gfacID );
290
291 if ( $job_status == 'GFAC_STATUS_UNAVAILABLE' )
292 return;
293
294 if ( ! in_array( $job_status, array( 'ACTIVE', 'RUNNING', 'STARTED' ) ) )
295 update_job_status( $job_status, $gfacID );
296
297 return;
298 }
299
300 $message = "Job listed running longer than 24 hours";
301 write_log( "$self: $message - id: $gfacID" );
302 mail_to_admin( "hang", "$message - id: $gfacID" );
303 $query = "UPDATE analysis SET status='RUN_TIMEOUT' WHERE gfacID='$gfacID'";
304 $result = mysql_query( $query, $gLink );
305
306 if ( ! $result )
307 write_log( "$self: Query failed $query - " . mysql_error( $gLink ) );
308
309 update_queue_messages( $message );
310 update_db( $message );
311}
312
313function run_timeout( $updatetime )
314{
315 global $self;
316 global $gLink;
317 global $gfacID;
318 global $loghdr;
319
320 if ( ( $job_status = get_gfac_status( $gfacID ) ) === false )
321 $job_status = get_local_status( $gfacID );
322
323 if ( $job_status == 'GFAC_STATUS_UNAVAILABLE' )
324 return;
325
326 if ( ! in_array( $job_status, array( 'ACTIVE', 'RUNNING', 'STARTED' ) ) )
327 {
328 update_job_status( $job_status, $gfacID );
329 return;
330 }
331
332 $now = time();
333
334 get_us3_data();
335
336 if ( $updatetime + 172800 > $now ) return; // < 48 hours ago
337
338 $message = "Job listed running longer than 48 hours";
339 write_log( "$self: $message - id: $gfacID" );
340 mail_to_admin( "hang", "$message - id: $gfacID" );
341 $query = "UPDATE analysis SET status='FAILED' WHERE gfacID='$gfacID'";
342 $result = mysql_query( $query, $gLink );
343
344 if ( ! $result )
345 write_log( "$self: Query failed $query - " . mysql_error( $gLink ) );
346
347 update_queue_messages( $message );
348 update_db( $message );
349}
350
351function wait_data( $updatetime )
352{
353 global $self;
354 global $gLink;
355 global $gfacID;
356 global $loghdr;
357
358 $now = time();
359
360 if ( $updatetime + 3600 > $now ) // < Within the first hour
361 {
362 if ( ( $job_status = get_gfac_status( $gfacID ) ) === false )
363 $job_status = get_local_status( $gfacID );
364
365 if ( $job_status == 'GFAC_STATUS_UNAVAILABLE' )
366 return;
367
368 if ( $job_status != 'DATA' )
369 {
370 update_job_status( $job_status, $gfacID );
371 return;
372 }
373
374 // Request to resend data, but only request every 5 minutes
375 $minute = date( 'i' ) * 1; // Makes it an int
376 if ( $minute % 5 ) return;
377
378 $output_status = get_gfac_outputs( $gfacID );
379
380 if ( $output_status !== false )
381 mail_to_admin( "debug", "wait_data/$gfacID/$output_status" );
382
383 return;
384 }
385
386 $message = "Waiting for data longer than 1 hour";
387 write_log( "$self: $message - id: $gfacID" );
388 mail_to_admin( "hang", "$message - id: $gfacID" );
389 $query = "UPDATE analysis SET status='DATA_TIMEOUT' WHERE gfacID='$gfacID'";
390 $result = mysql_query( $query, $gLink );
391
392 if ( ! $result )
393 write_log( "$self: Query failed $query - " . mysql_error( $gLink ) );
394
395 update_queue_messages( $message );
396 update_db( $message );
397}
398
399function data_timeout( $updatetime )
400{
401 global $self;
402 global $gLink;
403 global $gfacID;
404 global $loghdr;
405
406 if ( ( $job_status = get_gfac_status( $gfacID ) ) === false )
407 $job_status = get_local_status( $gfacID );
408
409 if ( $job_status == 'GFAC_STATUS_UNAVAILABLE' )
410 return;
411
412 if ( $job_status != 'DATA' )
413 {
414 update_job_status( $job_status, $gfacID );
415 return;
416 }
417
418 $now = time();
419
420 if ( $updatetime + 86400 > $now ) // < 24 hours ago
421 {
422 // Request to resend data, but only request every 15 minutes
423 $minute = date( 'i' ) * 1; // Makes it an int
424 if ( $minute % 15 ) return;
425
426 $output_status = get_gfac_outputs( $gfacID );
427
428 if ( $output_status !== false )
429 mail_to_admin( "debug", "data_timeout/$gfacID/$output_status" );
430
431 return;
432 }
433
434 $message = "Waiting for data longer than 24 hours";
435 write_log( "$self: $message - id: $gfacID" );
436 mail_to_admin( "hang", "$message - id: $gfacID" );
437 $query = "UPDATE analysis SET status='FAILED' WHERE gfacID='$gfacID'";
438 $result = mysql_query( $query, $gLink );
439
440 if ( ! $result )
441 write_log( "$self: Query failed $query - " . mysql_error( $gLink ) );
442
443 update_queue_messages( $message );
444 update_db( $message );
445}
446
447function complete()
448{
449 // Just cleanup
450 cleanup();
451}
452
453function failed()
454{
455 // Just cleanup
456 cleanup();
457}
458
459function cleanup()
460{
461 global $self;
462 global $gLink;
463 global $gfacID;
464 global $us3_db;
465 global $loghdr;
466 global $class_dir;
467
468 // Double check that the gfacID exists
469 $query = "SELECT count(*) FROM analysis WHERE gfacID='$gfacID'";
470 $result = mysql_query( $query, $gLink );
471
472 if ( ! $result )
473 {
474 write_log( "$self: Query failed $query - " . mysql_error( $gLink ) );
475 mail_to_admin( "fail", "Query failed $query\n" . mysql_error( $gLink ) );
476 return;
477 }
478
479 list( $count ) = mysql_fetch_array( $result );
480
481if ($count==0)
482write_log( "$loghdr count = $count gfacID = $gfacID" );
483 if ( $count == 0 ) return;
484
485 // Now check the us3 instance
486 $requestID = get_us3_data();
487//write_log( "$loghdr requestID = $requestID gfacID = $gfacID" );
488 if ( $requestID == 0 ) return;
489
490 $me_devel = preg_match( "/class_devel/", $class_dir );
491 $me_local = preg_match( "/class_local/", $class_dir );
492
493 if ( preg_match( "/US3-A/i", $gfacID ) )
494 {
495 $job_devel = preg_match( "/US3-ADEV/i", $gfacID );
496 if ( ( !$me_devel && !$job_devel ) ||
497 ( $me_devel && $job_devel ) )
498 { // Job is of same type (prod/devel) as Server: process it
499//write_log( "$loghdr CALLING aira_cleanup()" );
500 aira_cleanup( $us3_db, $requestID, $gLink );
501 }
502//write_log( "$loghdr RTN FR aira_cleanup()" );
503 }
504 else if ( ! $me_local )
505 {
506write_log( "$loghdr CALLING gfac_cleanup() reqID=$requestID" );
507 gfac_cleanup( $us3_db, $requestID, $gLink );
508 }
509}
510
511// Function to update status of job
512function update_job_status( $job_status, $gfacID )
513{
514 global $gLink;
515 global $query;
516 global $self;
517 global $loghdr;
518
519 switch ( $job_status )
520 {
521 case 'SUBMITTED' :
522 case 'SUBMITED' :
523 case 'INITIALIZED' :
524 case 'UPDATING' :
525 case 'PENDING' :
526 $query = "UPDATE analysis SET status='SUBMITTED' WHERE gfacID='$gfacID'";
527 $message = "Job status request reports job is SUBMITTED";
528 break;
529
530 case 'STARTED' :
531 case 'RUNNING' :
532 case 'ACTIVE' :
533 $query = "UPDATE analysis SET status='RUNNING' WHERE gfacID='$gfacID'";
534 $message = "Job status request reports job is RUNNING";
535 break;
536
537 case 'FINISHED' :
538 $query = "UPDATE analysis SET status='FINISHED' WHERE gfacID='$gfacID'";
539 $message = "NONE";
540 break;
541
542 case 'DONE' :
543 $query = "UPDATE analysis SET status='DONE' WHERE gfacID='$gfacID'";
544 $message = "NONE";
545 break;
546
547 case 'COMPLETED' :
548 case 'COMPLETE' :
549 $query = "UPDATE analysis SET status='COMPLETE' WHERE gfacID='$gfacID'";
550 $message = "Job status request reports job is COMPLETED";
551 break;
552
553 case 'DATA' :
554 $query = "UPDATE analysis SET status='DATA' WHERE gfacID='$gfacID'";
555 $message = "Job status request reports job is COMPLETE, waiting for data";
556 break;
557
558 case 'CANCELED' :
559 case 'CANCELLED' :
560 $query = "UPDATE analysis SET status='CANCELED' WHERE gfacID='$gfacID'";
561 $message = "Job status request reports job is CANCELED";
562 break;
563
564 case 'FAILED' :
565 $query = "UPDATE analysis SET status='FAILED' WHERE gfacID='$gfacID'";
566 $message = "Job status request reports job is FAILED";
567 break;
568
569 case 'UNKNOWN' :
570write_log( "$loghdr job_status='UNKNOWN', reset to 'ERROR' " );
571 $query = "UPDATE analysis SET status='ERROR' WHERE gfacID='$gfacID'";
572 $message = "Job status request reports job is not in the queue";
573 break;
574
575 default :
576 // We shouldn't ever get here
577 $query = "UPDATE analysis SET status='ERROR' WHERE gfacID='$gfacID'";
578 $message = "Job status was not recognized - $job_status";
579 write_log( "$loghdr update_job_status: " .
580 "Job status was not recognized - $job_status\n" .
581 "gfacID = $gfacID\n" );
582 break;
583
584 }
585
586 $result = mysql_query( $query, $gLink );
587 if ( ! $result )
588 write_log( "$loghdr Query failed $query - " . mysql_error( $gLink ) );
589
590 if ( $message != 'NONE' )
591 {
592 update_queue_messages( $message );
593 update_db( $message );
594 }
595}
596
597function get_us3_data()
598{
599 global $self;
600 global $gfacID;
601 global $dbhost;
602 global $user;
603 global $passwd;
604 global $us3_db;
605 global $updateTime;
606 global $loghdr;
607
608 $us3_link = mysql_connect( $dbhost, $user, $passwd );
609
610 if ( ! $us3_link )
611 {
612 write_log( "$loghdr could not connect: $dbhost, $user, $passwd" );
613 mail_to_admin( "fail", "Could not connect to $dbhost" );
614 return 0;
615 }
616
617
618 $result = mysql_select_db( $us3_db, $us3_link );
619
620 if ( ! $result )
621 {
622 write_log( "$loghdr could not select DB $us3_db" );
623 mail_to_admin( "fail", "Could not select DB $us3_db, $dbhost, $user, $passwd" );
624 return 0;
625 }
626
627 $query = "SELECT HPCAnalysisRequestID, UNIX_TIMESTAMP(updateTime) " .
628 "FROM HPCAnalysisResult WHERE gfacID='$gfacID'";
629 $result = mysql_query( $query, $us3_link );
630
631 if ( ! $result )
632 {
633 write_log( "$self: Query failed $query - " . mysql_error( $us3_link ) );
634 mail_to_admin( "fail", "Query failed $query\n" . mysql_error( $us3_link ) );
635 return 0;
636 }
637
638 list( $requestID, $updateTime ) = mysql_fetch_array( $result );
639 mysql_close( $us3_link );
640
641 return $requestID;
642}
643
644// Function to determine if this is a gfac job or not
645function is_gfac_job( $gfacID )
646{
647 $hex = "[0-9a-fA-F]";
648 if ( ! preg_match( "/^US3-Experiment/i", $gfacID ) &&
649 ! preg_match( "/^US3-$hex{8}-$hex{4}-$hex{4}-$hex{4}-$hex{12}$/", $gfacID ) )
650 {
651 // Then it's not a GFAC job
652 return false;
653 }
654
655 return true;
656}
657
658// Function to determine if this is an airavata/thrift job or not
659function is_aira_job( $gfacID )
660{
661 global $cluster;
662
663 if ( preg_match( "/US3-A/i", $gfacID ) )
664 {
665 // Then it's an Airavata/Thrift job
666 return true;
667 }
668
669 return false;
670}
671
672// Function to get the current job status from GFAC
673function get_gfac_status( $gfacID )
674{
675 global $serviceURL;
676 global $self;
677 global $loghdr;
678 global $cluster;
679 global $status_ex, $status_gw;
680
681 if ( is_aira_job( $gfacID ) )
682 {
683 $status_ex = getExperimentStatus( $gfacID );
684
685 if ( $status_ex == 'EXECUTING' )
686 {
687 if ( $status_gw == 'RUNNING' )
688 $status_ex = 'ACTIVE';
689 else
690 $status_ex = 'QUEUED';
691 }
692
693 $gfac_status = standard_status( $status_ex );
694 return $gfac_status;
695 }
696
697 else if ( ! is_gfac_job( $gfacID ) )
698 {
699 return false;
700 }
701
702 $url = "$serviceURL/jobstatus/$gfacID";
703 try
704 {
705 $post = new HttpRequest( $url, HttpRequest::METH_GET );
706 $http = $post->send();
707 $xml = $post->getResponseBody();
708 }
709 catch ( HttpException $e )
710 {
711 write_log( "$loghdr Status not available - marking failed - $gfacID" );
712 return 'GFAC_STATUS_UNAVAILABLE';
713 }
714
715 // Parse the result
716 $gfac_status = parse_response( $xml );
717
718 // This may not seem like the best place to do this, but here we have
719 // the xml straight from GFAC
720 $status_types = array('SUBMITTED',
721 'SUBMITED',
722 'INITIALIZED',
723 'PENDING',
724 'RUNNING',
725 'ACTIVE',
726 'STARTED',
727 'COMPLETED',
728 'FINISHED',
729 'DONE',
730 'DATA',
731 'RESULTS_GEN',
732 'CANCELED',
733 'CANCELLED',
734 'FAILED',
735 'STAGING',
736 'UNKNOWN');
737 if ( ! in_array( $gfac_status, $status_types ) )
738 mail_to_admin( 'debug', "gfacID: /$gfacID/\n" .
739 "XML: /$xml/\n" .
740 "Status: /$gfac_status/\n" );
741
742 if ( in_array( $gfac_status, array( 'DONE', 'DATA', 'RESULTS_GEN' ) ) )
743 $gfac_status = 'DATA';
744
745 return $gfac_status;
746}
747
748// Function to request data outputs from GFAC
749function get_gfac_outputs( $gfacID )
750{
751 global $serviceURL;
752 global $self;
753
754 // Make sure it's a GFAC job and status is appropriate for this call
755 if ( ( $job_status = get_gfac_status( $gfacID ) ) === false )
756 {
757 // Then it's not a GFAC job
758 $job_status = get_local_status( $gfacID );
759 return $job_status;
760 }
761
762 if ( ! in_array( $job_status, array( 'DONE', 'FAILED', 'COMPLETE', 'FINISHED' ) ) )
763 {
764 // Then it's not appropriate to request data
765 return false;
766 }
767
768 $url = "$serviceURL/registeroutput/$gfacID";
769 try
770 {
771 $post = new HttpRequest( $url, HttpRequest::METH_GET );
772 $http = $post->send();
773 $xml = $post->getResponseBody();
774 }
775 catch ( HttpException $e )
776 {
777 write_log( "$self: Data not available - request failed - $gfacID" );
778 return false;
779 }
780
781 mail_to_admin( "debug", "get_gfac_outputs/\n$xml/" ); // Temporary, to see what the xml looks like,
782 // if we ever get one
783
784 // Parse the result
785 $gfac_status = parse_response( $xml );
786
787 return $gfac_status;
788}
789
790function parse_response( $xml )
791{
792 global $gfac_message;
793
794 $status = "";
795 $gfac_message = "";
796
797 $parser = new XMLReader();
798 $parser->xml( $xml );
799
800 while( $parser->read() )
801 {
802 $type = $parser->nodeType;
803
804 if ( $type == XMLReader::ELEMENT )
805 $name = $parser->name;
806
807 else if ( $type == XMLReader::TEXT )
808 {
809 if ( $name == "status" )
810 $status = $parser->value;
811 else
812 $gfac_message = $parser->value;
813 }
814 }
815
816 $parser->close();
817 return $status;
818}
819
820// Function to get status from local cluster
821function get_local_status( $gfacID )
822{
823 global $cluster;
824 global $self;
825
826 $cmd = "/usr/bin/qstat -a $gfacID 2>&1|tail -n 1";
827 if ( ! preg_match( "/us3iab/", $cluster ) )
828 {
829 $system = "$cluster.uthscsa.edu";
830 $system = preg_replace( "/\-local/", "", $system );
831 $cmd = "/usr/bin/ssh -x us3@$system " . $cmd;
832 }
833
834 $result = exec( $cmd );
835
836 if ( $result == "" || preg_match( "/^qstat: Unknown/", $result ) )
837 {
838 write_log( "$self get_local_status: Local job $gfacID unknown" );
839write_log( "$self get_local_status: result=$result" );
840 return 'UNKNOWN';
841 }
842
843 $values = preg_split( "/\s+/", $result );
844//write_log( "$self: get_local_status: job status = /{$values[9]}/");
845 switch ( $values[ 9 ] )
846 {
847 case "W" : // Waiting for execution time to be reached
848 case "E" : // Job is exiting after having run
849 case "R" : // Still running
850 $status = 'ACTIVE';
851 break;
852
853 case "C" : // Job has completed
854 $status = 'COMPLETED';
855 break;
856
857 case "T" : // Job is being moved
858 case "H" : // Held
859 case "Q" : // Queued
860 $status = 'SUBMITTED';
861 break;
862
863 default :
864 $status = 'UNKNOWN'; // This should not occur
865 break;
866 }
867
868 return $status;
869}
870
871function update_queue_messages( $message )
872{
873 global $self;
874 global $gLink;
875 global $gfacID;
876
877 // Get analysis table ID
878 $query = "SELECT id FROM analysis " .
879 "WHERE gfacID = '$gfacID' ";
880 $result = mysql_query( $query, $gLink );
881 if ( ! $result )
882 {
883 write_log( "$self: Query failed $query - " . mysql_error( $gLink ) );
884 return;
885 }
886 list( $analysisID ) = mysql_fetch_array( $result );
887
888 // Insert message into queue_message table
889 $query = "INSERT INTO queue_messages SET " .
890 "message = '" . mysql_real_escape_string( $message, $gLink ) . "', " .
891 "analysisID = '$analysisID' ";
892 $result = mysql_query( $query, $gLink );
893 if ( ! $result )
894 {
895 write_log( "$self: Query failed $query - " . mysql_error( $gLink ) );
896 return;
897 }
898}
899
900function update_db( $message )
901{
902 global $self;
903 global $gfacID;
904 global $dbhost;
905 global $user;
906 global $passwd;
907 global $us3_db;
908
909 $us3_link = mysql_connect( $dbhost, $user, $passwd );
910
911 if ( ! $us3_link )
912 {
913 write_log( "$self: could not connect: $dbhost, $user, $passwd" );
914 mail_to_admin( "fail", "Could not connect to $dbhost" );
915 return 0;
916 }
917
918
919 $result = mysql_select_db( $us3_db, $us3_link );
920
921 if ( ! $result )
922 {
923 write_log( "$self: could not select DB $us3_db" );
924 mail_to_admin( "fail", "Could not select DB $us3_db, $dbhost, $user, $passwd" );
925 return 0;
926 }
927
928 $query = "UPDATE HPCAnalysisResult SET " .
929 "lastMessage='" . mysql_real_escape_string( $message, $us3_link ) . "'" .
930 "WHERE gfacID = '$gfacID' ";
931
932 mysql_query( $query, $us3_link );
933 mysql_close( $us3_link );
934}
935
936function mail_to_admin( $type, $msg )
937{
938 global $updateTime;
939 global $status;
940 global $cluster;
941 global $org_name;
942 global $admin_email;
943 global $dbhost;
944 global $requestID;
945
946 $headers = "From: $org_name Admin<$admin_email>" . "\n";
947 $headers .= "Cc: $org_name Admin<$admin_email>" . "\n";
948 $headers .= "Bcc: Gary Gorbet<gegorbet@gmail.com>" . "\n"; // make sure
949
950 // Set the reply address
951 $headers .= "Reply-To: $org_name<$admin_email>" . "\n";
952 $headers .= "Return-Path: $org_name<$admin_email>" . "\n";
953
954 // Try to avoid spam filters
955 $now = time();
956 $headers .= "Message-ID: <" . $now . "gridctl@$dbhost>$requestID\n";
957 $headers .= "X-Mailer: PHP v" . phpversion() . "\n";
958 $headers .= "MIME-Version: 1.0" . "\n";
959 $headers .= "Content-Transfer-Encoding: 8bit" . "\n";
960
961 $subject = "US3 Error Notification";
962 $message = "
963 UltraScan job error notification from gridctl.php:
964
965 Update Time : $updateTime
966 GFAC Status : $status
967 Cluster : $cluster
968 ";
969
970 $message .= "Error Message : $msg\n";
971
972 mail( $admin_email, $subject, $message, $headers );
973}
974
975// Convert a status string to one of the standard DB status strings
976function standard_status( $status_in )
977{
978 switch ( $status_in )
979 { // Map variations to standard gateway status values
980 case 'QUEUED' :
981 case 'LAUNCHED' :
982 case 'CREATED' :
983 case 'VALIDATED' :
984 case 'SCHEDULED' :
985 case 'submitted' :
986 case '' :
987 $status = 'SUBMITTED';
988 break;
989
990 case 'EXECUTING' :
991 case 'ACTIVE' :
992 case 'running' :
993 case 'executing' :
994 $status = 'RUNNING';
995 break;
996
997 case 'PENDING' :
998 case 'CANCELING' :
999 $status = 'UPDATING';
1000 break;
1001
1002 case 'CANCELLED' :
1003 case 'canceled' :
1004 $status = 'CANCELED';
1005 break;
1006
1007 $status = 'DATA';
1008 break;
1009
1010 case 'COMPLETED' :
1011 case 'completed' :
1012 $status = 'COMPLETE';
1013 break;
1014
1015 case 'FAILED_DATA' :
1016 case 'SUBMIT_TIMEOUT' :
1017 case 'RUN_TIMEOUT' :
1018 case 'DATA_TIMEOUT' :
1019 $status = 'FAILED';
1020 break;
1021
1022 case 'COMPLETE' :
1023 $status = 'DONE';
1024 break;
1025
1026 case 'UNKNOWN' :
1027 $status = 'ERROR';
1028 break;
1029
1030 // Where already standard value, retain value
1031 case 'ERROR' :
1032 case 'RUNNING' :
1033 case 'SUBMITTED' :
1034 case 'UPDATING' :
1035 case 'CANCELED' :
1036 case 'DATA' :
1037 case 'FAILED' :
1038 case 'DONE' :
1039 case 'FINISHED' :
1040 default :
1041 $status = $status_in;
1042 break;
1043 }
1044
1045 return $status;
1046}
1047
1048function aira_status( $gfacID, $status_in )
1049{
1050 global $self;
1051 global $loghdr;
1052 global $class_dir;
1053//echo "a_st: st_in$status_in : $gfacID\n";
1054 //$status_gw = standard_status( $status_in );
1055 $status_gw = $status_in;
1056//echo "a_st: st_db=$status_gw\n";
1057 $status = $status_gw;
1058 $me_devel = preg_match( "/class_devel/", $class_dir );
1059 $job_devel = preg_match( "/US3-ADEV/i", $gfacID );
1060 $devmatch = ( ( !$me_devel && !$job_devel ) ||
1061 ( $me_devel && $job_devel ) );
1062
1063 if ( preg_match( "/US3-A/i", $gfacID ) && $devmatch )
1064 {
1065 $status_ex = getExperimentStatus( $gfacID );
1066
1067 if ( $status_ex == 'COMPLETED' )
1068 { // Experiment is COMPLETED: check for 'FINISHED' or 'DONE'
1069 if ( $status_gw == 'FINISHED' || $status_gw == 'DONE' )
1070 { // COMPLETED + FINISHED/DONE : gateway status is now COMPLETE
1071 $status = 'COMPLETE';
1072 }
1073
1074 else
1075 { // COMPLETED + NOT-FINISHED/DONE: gw status now DONE
1076 $status = 'DONE';
1077 }
1078 }
1079
1080 else if ( $status_gw == 'FINISHED' || $status_gw == 'DONE' )
1081 { // Gfac status == FINISHED/DONE: leave as is (unless FAILED)
1082 $status = $status_gw;
1083 if ( $status_ex == 'FAILED' )
1084 {
1085 sleep( 10 );
1086 $status_ex = getExperimentStatus( $gfacID );
1087 if ( $status_ex == 'FAILED' )
1088 {
1089 write_log( "$loghdr status still 'FAILED' after 10-second delay" );
1090 sleep( 10 );
1091 $status_ex = getExperimentStatus( $gfacID );
1092 if ( $status_ex == 'FAILED' )
1093 write_log( "$loghdr status still 'FAILED' after 20-second delay" );
1094 else
1095 write_log( "$loghdr status is $status_ex after 20-second delayed retry" );
1096 }
1097 write_log( "$loghdr status reset to 'COMPLETE'" );
1098 $status = 'COMPLETE';
1099 }
1100 }
1101
1102 else
1103 { // Experiment not COMPLETED/FINISHED/DONE: use experiment status
1104 $status = standard_status( $status_ex );
1105 }
1106
1107//write_log( "$loghdr status/_in/_gw/_ex=$status/$status_in/$status_gw/$status_ex" );
1108 if ( $status != $status_gw )
1109 {
1110 update_job_status( $status, $gfacID );
1111 }
1112 }
1113
1114 return $status;
1115}
1116
1117?>
Note: See TracBrowser for help on using the repository browser.