source: trunk/gridctl.php@ 6

Last change on this file since 6 was 6, checked in by gegorbet, 10 years ago

changes since 2013, mostly for airvata/thrift

File size: 27.6 KB
RevLine 
[1]1<?php
2
[6]3$us3bin = exec( "ls -d ~us3/bin" );
4include_once "$us3bin/listen-config.php";
5include "$us3bin/cleanup_aira.php";
6include "$us3bin/cleanup_gfac.php";
[1]7
8// Global variables
9$gfac_message = "";
10$updateTime = 0;
11$submittime = 0;
12$cluster = '';
13
[6]14//global $self;
15
[1]16// Produce some output temporarily, so cron will send me message
17$now = time();
[6]18echo "Time started: " . date( 'Y-m-d H:i:s', $now ) . "\n";
[1]19
20// Get data from global GFAC DB
21$gLink = mysql_connect( $dbhost, $guser, $gpasswd );
22
23if ( ! mysql_select_db( $gDB, $gLink ) )
24{
[3]25 write_log( "$self: Could not select DB $gDB - " . mysql_error() );
[1]26 mail_to_admin( "fail", "Internal Error: Could not select DB $gDB" );
27 exit();
28}
29
30$query = "SELECT gfacID, us3_db, cluster, status, queue_msg, " .
31 "UNIX_TIMESTAMP(time), time from analysis";
32$result = mysql_query( $query, $gLink );
33
34if ( ! $result )
35{
36 write_log( "$self: Query failed $query - " . mysql_error( $gLink ) );
37 mail_to_admin( "fail", "Query failed $query\n" . mysql_error( $gLink ) );
38 exit();
39}
40
41if ( mysql_num_rows( $result ) == 0 )
[6]42{
43//write_log( "$self: analysis read got numrows==0" );
[1]44 exit(); // Nothing to do
[6]45}
[1]46
47while ( list( $gfacID, $us3_db, $cluster, $status, $queue_msg, $time, $updateTime )
48 = mysql_fetch_array( $result ) )
49{
50 // Checking we need to do for each entry
[6]51echo "us3db=$us3_db gfid=$gfacID\n";
52 switch ( $us3_db )
53 {
54 case 'Xuslims3_cauma3' :
55 case 'Xuslims3_cauma3d' :
56 case 'Xuslims3_HHU' :
57 case 'Xuslims3_Uni_KN' :
58 $serviceURL = "http://gridfarm005.ucs.indiana.edu:9090/ogce-rest/job";
59 break;
[1]60
[6]61 default :
62// $serviceURL = "http://gridfarm005.ucs.indiana.edu:8080/ogce-rest/job";
63 break;
64 }
65
66 $awork = array();
67 $awork = explode( "-", $gfacID );
68 $gfacLabl = $awork[0] . "-" . $awork[1] . "-" . $awork[2];
69 $loghdr = $self . ":" . $gfacLabl . "...:";
70
71 // If entry is for Airvata/Thrift, get the true current status
72
73 if ( is_aira_job( $gfacID ) )
74 {
75 $status_in = $status;
76//write_log( "$loghdr status_in=$status_in" );
77 $status = aira_status( $gfacID, $status_in );
78if($status != $status_in )
79write_log( "$loghdr Set to $status from $status_in" );
80 }
81 else
82 {
83 $status_gw = $status;
84 $status = get_gfac_status( $gfacID );
85 //if ( $status == 'FINISHED' )
86 if ( $status_gw == 'COMPLETE' )
87 $status = $status_gw;
88write_log( "$loghdr non-AThrift status=$status status_gw=$status_gw" );
89 }
90
[1]91 // Sometimes during testing, the us3_db entry is not set
92 // If $status == 'ERROR' then the condition has been processed before
93 if ( strlen( $us3_db ) == 0 && $status != 'ERROR' )
94 {
[6]95 write_log( "$loghdr GFAC DB is NULL - $gfacID" );
[1]96 mail_to_admin( "fail", "GFAC DB is NULL\n$gfacID" );
97
98 $query2 = "UPDATE analysis SET status='ERROR' WHERE gfacID='$gfacID'";
99 $result2 = mysql_query( $query2, $gLink );
100 $status = 'ERROR';
101
102 if ( ! $result2 )
[6]103 write_log( "$loghdr Query failed $query2 - " . mysql_error( $gLink ) );
[1]104
105 }
106
[6]107//echo " st=$status\n";
[1]108 switch ( $status )
109 {
110 // Already been handled
111 // Later update this condition to search for gfacID?
112 case "ERROR":
113 cleanup();
114 break;
115
116 case "SUBMITTED":
117 submitted( $time );
118 break;
119
120 case "SUBMIT_TIMEOUT":
121 submit_timeout( $time );
122 break;
123
124 case "RUNNING":
[6]125 case "STARTED":
126 case "STAGING":
127 case "ACTIVE":
[1]128 running( $time );
129 break;
130
131 case "RUN_TIMEOUT":
132 run_timeout($time );
133 break;
134
135 case "DATA":
[6]136 case "RESULTS_GEN":
[1]137 wait_data( $time );
138 break;
139
140 case "DATA_TIMEOUT":
141 data_timeout( $time );
142 break;
143
[6]144 case "COMPLETED":
[1]145 case "COMPLETE":
146 complete();
147 break;
148
149 case "CANCELLED":
150 case "CANCELED":
151 case "FAILED":
152 failed();
153 break;
154
[6]155 case "FINISHED":
156 case "DONE":
157 if ( is_aira_job( $gfacID ) )
158 {
159 complete();
160 }
161 case "PROCESSING":
[1]162 default:
163 break;
164 }
165}
166
167exit();
168
169function submitted( $updatetime )
170{
171 global $self;
172 global $gLink;
173 global $gfacID;
[6]174 global $loghdr;
[1]175
176 $now = time();
177
178 if ( $updatetime + 600 > $now ) return; // < 10 minutes ago
179
180 if ( $updatetime + 86400 > $now ) // Within the first 24 hours
181 {
182 if ( ( $job_status = get_gfac_status( $gfacID ) ) === false )
183 $job_status = get_local_status( $gfacID );
184
185 if ( $job_status == 'GFAC_STATUS_UNAVAILABLE' )
186 return;
187
188 if ( ! in_array( $job_status, array( 'SUBMITTED', 'INITIALIZED', 'PENDING' ) ) )
[6]189 {
190write_log( "$loghdr submitted:job_status=$job_status" );
[1]191 update_job_status( $job_status, $gfacID );
[6]192 }
[1]193
194 return;
195 }
196
197 $message = "Job listed submitted longer than 24 hours";
198 write_log( "$self: $message - id: $gfacID" );
199 mail_to_admin( "hang", "$message - id: $gfacID" );
200 $query = "UPDATE analysis SET status='SUBMIT_TIMEOUT' WHERE gfacID='$gfacID'";
201 $result = mysql_query( $query, $gLink );
202
203 if ( ! $result )
204 write_log( "$self: Query failed $query - " . mysql_error( $gLink ) );
205
206 update_queue_messages( $message );
207 update_db( $message );
208}
209
210function submit_timeout( $updatetime )
211{
212 global $self;
213 global $gLink;
214 global $gfacID;
[6]215 global $loghdr;
[1]216
217 if ( ( $job_status = get_gfac_status( $gfacID ) ) === false )
218 $job_status = get_local_status( $gfacID );
219
220 if ( $job_status == 'GFAC_STATUS_UNAVAILABLE' )
221 return;
222
223 if ( ! in_array( $job_status, array( 'SUBMITTED', 'INITIALIZED', 'PENDING' ) ) )
224 {
225 update_job_status( $job_status, $gfacID );
226 return;
227 }
228
229 $now = time();
230
231 if ( $updatetime + 86400 > $now ) return; // < 24 hours ago ( 48 total submitted )
232
233 $message = "Job listed submitted longer than 48 hours";
234 write_log( "$self: $message - id: $gfacID" );
235 mail_to_admin( "hang", "$message - id: $gfacID" );
236 $query = "UPDATE analysis SET status='FAILED' WHERE gfacID='$gfacID'";
237 $result = mysql_query( $query, $gLink );
238
239 if ( ! $result )
240 write_log( "$self: Query failed $query - " . mysql_error( $gLink ) );
241
242 update_queue_messages( $message );
243 update_db( $message );
244}
245
246function running( $updatetime )
247{
248 global $self;
249 global $gLink;
250 global $gfacID;
[6]251 global $loghdr;
[1]252
253 $now = time();
254
255 get_us3_data();
256
257 if ( $updatetime + 600 > $now ) return; // message received < 10 minutes ago
258
259 if ( $updatetime + 86400 > $now ) // Within the first 24 hours
260 {
261 if ( ( $job_status = get_gfac_status( $gfacID ) ) === false )
262 $job_status = get_local_status( $gfacID );
263
264 if ( $job_status == 'GFAC_STATUS_UNAVAILABLE' )
265 return;
266
[6]267 if ( ! in_array( $job_status, array( 'ACTIVE', 'RUNNING', 'STARTED' ) ) )
[1]268 update_job_status( $job_status, $gfacID );
269
270 return;
271 }
272
273 $message = "Job listed running longer than 24 hours";
274 write_log( "$self: $message - id: $gfacID" );
275 mail_to_admin( "hang", "$message - id: $gfacID" );
276 $query = "UPDATE analysis SET status='RUN_TIMEOUT' WHERE gfacID='$gfacID'";
277 $result = mysql_query( $query, $gLink );
278
279 if ( ! $result )
280 write_log( "$self: Query failed $query - " . mysql_error( $gLink ) );
281
282 update_queue_messages( $message );
283 update_db( $message );
284}
285
286function run_timeout( $updatetime )
287{
288 global $self;
289 global $gLink;
290 global $gfacID;
[6]291 global $loghdr;
[1]292
293 if ( ( $job_status = get_gfac_status( $gfacID ) ) === false )
294 $job_status = get_local_status( $gfacID );
295
296 if ( $job_status == 'GFAC_STATUS_UNAVAILABLE' )
297 return;
298
[6]299 if ( ! in_array( $job_status, array( 'ACTIVE', 'RUNNING', 'STARTED' ) ) )
[1]300 {
301 update_job_status( $job_status, $gfacID );
302 return;
303 }
304
305 $now = time();
306
307 get_us3_data();
308
309 if ( $updatetime + 172800 > $now ) return; // < 48 hours ago
310
311 $message = "Job listed running longer than 48 hours";
312 write_log( "$self: $message - id: $gfacID" );
313 mail_to_admin( "hang", "$message - id: $gfacID" );
314 $query = "UPDATE analysis SET status='FAILED' WHERE gfacID='$gfacID'";
315 $result = mysql_query( $query, $gLink );
316
317 if ( ! $result )
318 write_log( "$self: Query failed $query - " . mysql_error( $gLink ) );
319
320 update_queue_messages( $message );
321 update_db( $message );
322}
323
324function wait_data( $updatetime )
325{
326 global $self;
327 global $gLink;
328 global $gfacID;
[6]329 global $loghdr;
[1]330
331 $now = time();
332
333 if ( $updatetime + 3600 > $now ) // < Within the first hour
334 {
335 if ( ( $job_status = get_gfac_status( $gfacID ) ) === false )
336 $job_status = get_local_status( $gfacID );
337
338 if ( $job_status == 'GFAC_STATUS_UNAVAILABLE' )
339 return;
340
341 if ( $job_status != 'DATA' )
342 {
343 update_job_status( $job_status, $gfacID );
344 return;
345 }
346
347 // Request to resend data, but only request every 5 minutes
348 $minute = date( 'i' ) * 1; // Makes it an int
349 if ( $minute % 5 ) return;
350
351 $output_status = get_gfac_outputs( $gfacID );
352
353 if ( $output_status !== false )
354 mail_to_admin( "debug", "wait_data/$gfacID/$output_status" );
355
356 return;
357 }
358
359 $message = "Waiting for data longer than 1 hour";
360 write_log( "$self: $message - id: $gfacID" );
361 mail_to_admin( "hang", "$message - id: $gfacID" );
362 $query = "UPDATE analysis SET status='DATA_TIMEOUT' WHERE gfacID='$gfacID'";
363 $result = mysql_query( $query, $gLink );
364
365 if ( ! $result )
366 write_log( "$self: Query failed $query - " . mysql_error( $gLink ) );
367
368 update_queue_messages( $message );
369 update_db( $message );
370}
371
372function data_timeout( $updatetime )
373{
374 global $self;
375 global $gLink;
376 global $gfacID;
[6]377 global $loghdr;
[1]378
379 if ( ( $job_status = get_gfac_status( $gfacID ) ) === false )
380 $job_status = get_local_status( $gfacID );
381
382 if ( $job_status == 'GFAC_STATUS_UNAVAILABLE' )
383 return;
384
385 if ( $job_status != 'DATA' )
386 {
387 update_job_status( $job_status, $gfacID );
388 return;
389 }
390
391 $now = time();
392
393 if ( $updatetime + 86400 > $now ) // < 24 hours ago
394 {
395 // Request to resend data, but only request every 15 minutes
396 $minute = date( 'i' ) * 1; // Makes it an int
397 if ( $minute % 15 ) return;
398
399 $output_status = get_gfac_outputs( $gfacID );
400
401 if ( $output_status !== false )
402 mail_to_admin( "debug", "data_timeout/$gfacID/$output_status" );
403
404 return;
405 }
406
407 $message = "Waiting for data longer than 24 hours";
408 write_log( "$self: $message - id: $gfacID" );
409 mail_to_admin( "hang", "$message - id: $gfacID" );
410 $query = "UPDATE analysis SET status='FAILED' WHERE gfacID='$gfacID'";
411 $result = mysql_query( $query, $gLink );
412
413 if ( ! $result )
414 write_log( "$self: Query failed $query - " . mysql_error( $gLink ) );
415
416 update_queue_messages( $message );
417 update_db( $message );
418}
419
420function complete()
421{
422 // Just cleanup
423 cleanup();
424}
425
426function failed()
427{
428 // Just cleanup
429 cleanup();
430}
431
432function cleanup()
433{
434 global $self;
435 global $gLink;
436 global $gfacID;
437 global $us3_db;
[6]438 global $loghdr;
[1]439
440 // Double check that the gfacID exists
441 $query = "SELECT count(*) FROM analysis WHERE gfacID='$gfacID'";
442 $result = mysql_query( $query, $gLink );
443
444 if ( ! $result )
445 {
446 write_log( "$self: Query failed $query - " . mysql_error( $gLink ) );
447 mail_to_admin( "fail", "Query failed $query\n" . mysql_error( $gLink ) );
448 return;
449 }
450
451 list( $count ) = mysql_fetch_array( $result );
452
[6]453if ($count==0)
454write_log( "$loghdr count = $count gfacID = $gfacID" );
[1]455 if ( $count == 0 ) return;
456
457 // Now check the us3 instance
458 $requestID = get_us3_data();
[6]459//write_log( "$loghdr requestID = $requestID gfacID = $gfacID" );
[1]460 if ( $requestID == 0 ) return;
461
[6]462 if ( preg_match( "/US3-AIRA/i", $gfacID ) )
463 {
464//write_log( "$loghdr CALLING aira_cleanup()" );
465 aira_cleanup( $us3_db, $requestID, $gLink );
466//write_log( "$loghdr RTN FR aira_cleanup()" );
467 }
468 else
469 {
470//write_log( "$loghdr CALLING gfac_cleanup()" );
471 gfac_cleanup( $us3_db, $requestID, $gLink );
472 }
[1]473}
474
475// Function to update status of job
476function update_job_status( $job_status, $gfacID )
477{
478 global $gLink;
[6]479 global $query;
480 global $self;
481 global $loghdr;
[1]482
483 switch ( $job_status )
484 {
485 case 'SUBMITTED' :
486 case 'SUBMITED' :
487 case 'INITIALIZED' :
[6]488 case 'UPDATING' :
[1]489 case 'PENDING' :
490 $query = "UPDATE analysis SET status='SUBMITTED' WHERE gfacID='$gfacID'";
491 $message = "Job status request reports job is SUBMITTED";
492 break;
493
[6]494 case 'STARTED' :
495 case 'RUNNING' :
[1]496 case 'ACTIVE' :
497 $query = "UPDATE analysis SET status='RUNNING' WHERE gfacID='$gfacID'";
498 $message = "Job status request reports job is RUNNING";
499 break;
500
[6]501 case 'FINISHED' :
502 $query = "UPDATE analysis SET status='FINISHED' WHERE gfacID='$gfacID'";
503 $message = "NONE";
504 break;
505
506 case 'DONE' :
507 $query = "UPDATE analysis SET status='DONE' WHERE gfacID='$gfacID'";
508 $message = "NONE";
509 break;
510
[1]511 case 'COMPLETED' :
[6]512 case 'COMPLETE' :
[1]513 $query = "UPDATE analysis SET status='COMPLETE' WHERE gfacID='$gfacID'";
[6]514 $message = "Job status request reports job is COMPLETED";
[1]515 break;
516
[6]517 case 'DATA' :
[1]518 $query = "UPDATE analysis SET status='DATA' WHERE gfacID='$gfacID'";
519 $message = "Job status request reports job is COMPLETE, waiting for data";
520 break;
521
522 case 'CANCELED' :
[6]523 case 'CANCELLED' :
[1]524 $query = "UPDATE analysis SET status='CANCELED' WHERE gfacID='$gfacID'";
525 $message = "Job status request reports job is CANCELED";
526 break;
527
528 case 'FAILED' :
529 $query = "UPDATE analysis SET status='FAILED' WHERE gfacID='$gfacID'";
530 $message = "Job status request reports job is FAILED";
531 break;
532
533 case 'UNKNOWN' :
[6]534write_log( "$loghdr job_status='UNKNOWN', reset to 'ERROR' " );
535 $query = "UPDATE analysis SET status='ERROR' WHERE gfacID='$gfacID'";
[1]536 $message = "Job status request reports job is not in the queue";
537 break;
538
539 default :
[3]540 // We shouldn't ever get here
[6]541 $query = "UPDATE analysis SET status='ERROR' WHERE gfacID='$gfacID'";
[1]542 $message = "Job status was not recognized - $job_status";
[6]543 write_log( "$loghdr update_job_status: " .
[3]544 "Job status was not recognized - $job_status\n" .
545 "gfacID = $gfacID\n" );
[1]546 break;
547
548 }
549
550 $result = mysql_query( $query, $gLink );
551 if ( ! $result )
[6]552 write_log( "$loghdr Query failed $query - " . mysql_error( $gLink ) );
[1]553
[6]554 if ( $message != 'NONE' )
555 {
556 update_queue_messages( $message );
557 update_db( $message );
558 }
[1]559}
560
561function get_us3_data()
562{
563 global $self;
564 global $gfacID;
565 global $dbhost;
566 global $user;
567 global $passwd;
568 global $us3_db;
569 global $updateTime;
[6]570 global $loghdr;
[1]571
572 $us3_link = mysql_connect( $dbhost, $user, $passwd );
573
574 if ( ! $us3_link )
575 {
[6]576 write_log( "$loghdr could not connect: $dbhost, $user, $passwd" );
[1]577 mail_to_admin( "fail", "Could not connect to $dbhost" );
578 return 0;
579 }
580
581
582 $result = mysql_select_db( $us3_db, $us3_link );
583
584 if ( ! $result )
585 {
[6]586 write_log( "$loghdr could not select DB $us3_db" );
[1]587 mail_to_admin( "fail", "Could not select DB $us3_db, $dbhost, $user, $passwd" );
588 return 0;
589 }
590
591 $query = "SELECT HPCAnalysisRequestID, UNIX_TIMESTAMP(updateTime) " .
592 "FROM HPCAnalysisResult WHERE gfacID='$gfacID'";
593 $result = mysql_query( $query, $us3_link );
594
595 if ( ! $result )
596 {
597 write_log( "$self: Query failed $query - " . mysql_error( $us3_link ) );
598 mail_to_admin( "fail", "Query failed $query\n" . mysql_error( $us3_link ) );
599 return 0;
600 }
601
602 list( $requestID, $updateTime ) = mysql_fetch_array( $result );
603 mysql_close( $us3_link );
604
605 return $requestID;
606}
607
[6]608// Function to determine if this is a gfac job or not
[1]609function is_gfac_job( $gfacID )
610{
611 $hex = "[0-9a-fA-F]";
612 if ( ! preg_match( "/^US3-Experiment/i", $gfacID ) &&
613 ! preg_match( "/^US3-$hex{8}-$hex{4}-$hex{4}-$hex{4}-$hex{12}$/", $gfacID ) )
614 {
615 // Then it's not a GFAC job
616 return false;
617 }
618
619 return true;
620}
621
[6]622// Function to determine if this is an airavata/thrift job or not
623function is_aira_job( $gfacID )
624{
625 global $cluster;
626
627 if ( preg_match( "/US3-AIRA/i", $gfacID ) &&
628 ! preg_match( "/juropa/i", $cluster ) )
629 {
630 // Then it's an Airavata/Thrift job
631 return true;
632 }
633
634 return false;
635}
636
[1]637// Function to get the current job status from GFAC
638function get_gfac_status( $gfacID )
639{
640 global $serviceURL;
[6]641 global $self;
642 global $loghdr;
643 global $cluster;
[1]644
[6]645 if ( is_aira_job( $gfacID ) )
646 {
647 $status_ex = getExperimentStatus( $gfacID );
648 $gfac_status = standard_status( $status_ex );
649 return $gfac_status;
650 }
651
652 else if ( ! is_gfac_job( $gfacID ) )
[1]653 return false;
654
655 $url = "$serviceURL/jobstatus/$gfacID";
656 try
657 {
658 $post = new HttpRequest( $url, HttpRequest::METH_GET );
659 $http = $post->send();
660 $xml = $post->getResponseBody();
661 }
662 catch ( HttpException $e )
663 {
[6]664 write_log( "$loghdr Status not available - marking failed - $gfacID" );
[1]665 return 'GFAC_STATUS_UNAVAILABLE';
666 }
667
668 // Parse the result
669 $gfac_status = parse_response( $xml );
670
[3]671 // This may not seem like the best place to do this, but here we have
672 // the xml straight from GFAC
673 $status_types = array('SUBMITTED',
674 'SUBMITED',
675 'INITIALIZED',
676 'PENDING',
[6]677 'RUNNING',
[3]678 'ACTIVE',
[6]679 'STARTED',
[3]680 'COMPLETED',
[6]681 'FINISHED',
[3]682 'DONE',
683 'DATA',
[6]684 'RESULTS_GEN',
[3]685 'CANCELED',
686 'CANCELLED',
687 'FAILED',
[6]688 'STAGING',
[3]689 'UNKNOWN');
690 if ( ! in_array( $gfac_status, $status_types ) )
691 mail_to_admin( 'debug', "gfacID: /$gfacID/\n" .
692 "XML: /$xml/\n" .
693 "Status: /$gfac_status/\n" );
694
[6]695 if ( in_array( $gfac_status, array( 'DONE', 'DATA', 'RESULTS_GEN' ) ) )
696 $gfac_status = 'DATA';
697
[1]698 return $gfac_status;
699}
700
701// Function to request data outputs from GFAC
702function get_gfac_outputs( $gfacID )
703{
704 global $serviceURL;
[6]705 global $self;
[1]706
707 // Make sure it's a GFAC job and status is appropriate for this call
708 if ( ( $job_status = get_gfac_status( $gfacID ) ) === false )
709 {
710 // Then it's not a GFAC job
711 return false;
712 }
713
[6]714 if ( ! in_array( $job_status, array( 'DONE', 'FAILED', 'COMPLETE', 'FINISHED' ) ) )
[1]715 {
716 // Then it's not appropriate to request data
717 return false;
718 }
719
720 $url = "$serviceURL/registeroutput/$gfacID";
721 try
722 {
723 $post = new HttpRequest( $url, HttpRequest::METH_GET );
724 $http = $post->send();
725 $xml = $post->getResponseBody();
726 }
727 catch ( HttpException $e )
728 {
729 write_log( "$self: Data not available - request failed - $gfacID" );
730 return false;
731 }
732
733 mail_to_admin( "debug", "get_gfac_outputs/\n$xml/" ); // Temporary, to see what the xml looks like,
734 // if we ever get one
735
736 // Parse the result
737 $gfac_status = parse_response( $xml );
738
739 return $gfac_status;
740}
741
742function parse_response( $xml )
743{
744 global $gfac_message;
745
746 $status = "";
747 $gfac_message = "";
748
749 $parser = new XMLReader();
750 $parser->xml( $xml );
751
752 while( $parser->read() )
753 {
754 $type = $parser->nodeType;
755
756 if ( $type == XMLReader::ELEMENT )
757 $name = $parser->name;
758
759 else if ( $type == XMLReader::TEXT )
760 {
761 if ( $name == "status" )
762 $status = $parser->value;
763 else
764 $gfac_message = $parser->value;
765 }
766 }
767
768 $parser->close();
769 return $status;
770}
771
772// Function to get status from local cluster
773function get_local_status( $gfacID )
774{
775 global $cluster;
[6]776 global $self;
[1]777
778 $system = "$cluster.uthscsa.edu";
779 $system = preg_replace( "/\-local/", "", $system );
780 $cmd = "/usr/bin/ssh -x us3@$system qstat -a $gfacID 2>&1";
781
782 $result = exec( $cmd );
783
784 if ( $result == "" || preg_match( "/^qstat: Unknown/", $result ) )
785 {
786 write_log( "$self get_local_status: Local job $gfacID unknown" );
787 return 'UNKNOWN';
788 }
789
790 $values = preg_split( "/\s+/", $result );
791// write_log( "$self: get_local_status: job status = /{$values[9]}/");
792 switch ( $values[ 9 ] )
793 {
794 case "W" : // Waiting for execution time to be reached
795 case "E" : // Job is exiting after having run
796 case "R" : // Still running
797 $status = 'ACTIVE';
798 break;
799
800 case "C" : // Job has completed
801 $status = 'COMPLETED';
802 break;
803
804 case "T" : // Job is being moved
805 case "H" : // Held
806 case "Q" : // Queued
807 $status = 'SUBMITTED';
808 break;
809
810 default :
811 $status = 'UNKNOWN'; // This should not occur
812 break;
813 }
814
815 return $status;
816}
817
818function update_queue_messages( $message )
819{
820 global $self;
821 global $gLink;
822 global $gfacID;
823
824 // Get analysis table ID
825 $query = "SELECT id FROM analysis " .
826 "WHERE gfacID = '$gfacID' ";
827 $result = mysql_query( $query, $gLink );
828 if ( ! $result )
829 {
830 write_log( "$self: Query failed $query - " . mysql_error( $gLink ) );
831 return;
832 }
833 list( $analysisID ) = mysql_fetch_array( $result );
834
835 // Insert message into queue_message table
836 $query = "INSERT INTO queue_messages SET " .
[6]837 "message = '" . mysql_real_escape_string( $message, $gLink ) . "', " .
838 "analysisID = '$analysisID' ";
[1]839 $result = mysql_query( $query, $gLink );
840 if ( ! $result )
841 {
842 write_log( "$self: Query failed $query - " . mysql_error( $gLink ) );
843 return;
844 }
845}
846
847function update_db( $message )
848{
849 global $self;
850 global $gfacID;
851 global $dbhost;
852 global $user;
853 global $passwd;
854 global $us3_db;
855
856 $us3_link = mysql_connect( $dbhost, $user, $passwd );
857
858 if ( ! $us3_link )
859 {
860 write_log( "$self: could not connect: $dbhost, $user, $passwd" );
861 mail_to_admin( "fail", "Could not connect to $dbhost" );
862 return 0;
863 }
864
865
866 $result = mysql_select_db( $us3_db, $us3_link );
867
868 if ( ! $result )
869 {
870 write_log( "$self: could not select DB $us3_db" );
871 mail_to_admin( "fail", "Could not select DB $us3_db, $dbhost, $user, $passwd" );
872 return 0;
873 }
874
875 $query = "UPDATE HPCAnalysisResult SET " .
876 "lastMessage='" . mysql_real_escape_string( $message, $us3_link ) . "'" .
877 "WHERE gfacID = '$gfacID' ";
878
879 mysql_query( $query, $us3_link );
880 mysql_close( $us3_link );
881}
882
883function mail_to_admin( $type, $msg )
884{
885 global $updateTime;
886 global $status;
887 global $cluster;
888 global $org_name;
889 global $admin_email;
890 global $dbhost;
891 global $requestID;
892
893 $headers = "From: $org_name Admin<$admin_email>" . "\n";
894 $headers .= "Cc: $org_name Admin<$admin_email>" . "\n";
[6]895 $headers .= "Bcc: Gary Gorbet<gegorbet@gmail.com>" . "\n"; // make sure
[1]896
897 // Set the reply address
898 $headers .= "Reply-To: $org_name<$admin_email>" . "\n";
899 $headers .= "Return-Path: $org_name<$admin_email>" . "\n";
900
901 // Try to avoid spam filters
902 $now = time();
903 $headers .= "Message-ID: <" . $now . "gridctl@$dbhost>$requestID\n";
904 $headers .= "X-Mailer: PHP v" . phpversion() . "\n";
905 $headers .= "MIME-Version: 1.0" . "\n";
906 $headers .= "Content-Transfer-Encoding: 8bit" . "\n";
907
908 $subject = "US3 Error Notification";
909 $message = "
910 UltraScan job error notification from gridctl.php:
911
912 Update Time : $updateTime
913 GFAC Status : $status
914 Cluster : $cluster
915 ";
916
917 $message .= "Error Message : $msg\n";
918
919 mail( $admin_email, $subject, $message, $headers );
920}
[6]921
922// Convert a status string to one of the standard DB status strings
923function standard_status( $status_in )
924{
925 switch ( $status_in )
926 { // Map variations to standard gateway status values
927 case 'QUEUED' :
928 case 'LAUNCHED' :
929 case 'CREATED' :
930 case 'VALIDATED' :
931 case 'SCHEDULED' :
932 case 'submitted' :
933 case '' :
934 $status = 'SUBMITTED';
935 break;
936
937 case 'EXECUTING' :
938 case 'ACTIVE' :
939 case 'running' :
940 case 'executing' :
941 $status = 'RUNNING';
942 break;
943
944 case 'PENDING' :
945 case 'CANCELING' :
946 $status = 'UPDATING';
947 break;
948
949 case 'CANCELLED' :
950 case 'canceled' :
951 $status = 'CANCELED';
952 break;
953
954 $status = 'DATA';
955 break;
956
957 case 'COMPLETED' :
958 case 'completed' :
959 $status = 'COMPLETE';
960 break;
961
962 case 'FAILED_DATA' :
963 case 'SUBMIT_TIMEOUT' :
964 case 'RUN_TIMEOUT' :
965 case 'DATA_TIMEOUT' :
966 $status = 'FAILED';
967 break;
968
969 case 'COMPLETE' :
970 $status = 'DONE';
971 break;
972
973 case 'UNKNOWN' :
974 $status = 'ERROR';
975 break;
976
977 // Where already standard value, retain value
978 case 'ERROR' :
979 case 'RUNNING' :
980 case 'SUBMITTED' :
981 case 'UPDATING' :
982 case 'CANCELED' :
983 case 'DATA' :
984 case 'FAILED' :
985 case 'DONE' :
986 case 'FINISHED' :
987 default :
988 $status = $status_in;
989 break;
990 }
991
992 return $status;
993}
994
995function aira_status( $gfacID, $status_in )
996{
997 global $self;
998 global $loghdr;
999//echo "a_st: st_in$status_in : $gfacID\n";
1000 //$status_gw = standard_status( $status_in );
1001 $status_gw = $status_in;
1002//echo "a_st: st_db=$status_gw\n";
1003 $status = $status_gw;
1004
1005 if ( preg_match( "/US3-AIRA/i", $gfacID ) )
1006 {
1007 $status_ex = getExperimentStatus( $gfacID );
1008
1009 if ( $status_ex == 'COMPLETED' )
1010 { // Experiment is COMPLETED: check for 'FINISHED' or 'DONE'
1011 if ( $status_gw == 'FINISHED' || $status_gw == 'DONE' )
1012 { // COMPLETED + FINISHED/DONE : gateway status is now COMPLETE
1013 $status = 'COMPLETE';
1014 }
1015
1016 else
1017 { // COMPLETED + NOT-FINISHED/DONE: gw status now DONE
1018 $status = 'DONE';
1019 }
1020 }
1021
1022 else if ( $status_gw == 'FINISHED' || $status_gw == 'DONE' )
1023 { // Gfac status == FINISHED/DONE: leave as is (unless FAILED)
1024 $status = $status_gw;
1025 if ( $status_ex == 'FAILED' )
1026 {
1027 sleep( 10 );
1028 $status_ex = getExperimentStatus( $gfacID );
1029 if ( $status_ex == 'FAILED' )
1030 {
1031 write_log( "$loghdr status still 'FAILED' after 10-second delay" );
1032 sleep( 10 );
1033 $status_ex = getExperimentStatus( $gfacID );
1034 if ( $status_ex == 'FAILED' )
1035 write_log( "$loghdr status still 'FAILED' after 20-second delay" );
1036 else
1037 write_log( "$loghdr status is $status_ex after 20-second delayed retry" );
1038 }
1039 write_log( "$loghdr status reset to 'COMPLETE'" );
1040 $status = 'COMPLETE';
1041 }
1042 }
1043
1044 else
1045 { // Experiment not COMPLETED/FINISHED/DONE: use experiment status
1046 $status = standard_status( $status_ex );
1047 }
1048
1049write_log( "$loghdr status/_in/_gw/_ex=$status/$status_in/$status_gw/$status_ex" );
1050 if ( $status != $status_gw )
1051 {
1052 update_job_status( $status, $gfacID );
1053 }
1054 }
1055
1056 return $status;
1057}
1058
[1]1059?>
Note: See TracBrowser for help on using the repository browser.