source: trunk/gridctl.php@ 10

Last change on this file since 10 was 10, checked in by gegorbet, 9 years ago

fixes to allow simultaneous production/development jobs

File size: 28.2 KB
RevLine 
[1]1<?php
2
3// Global variables
4$gfac_message = "";
5$updateTime = 0;
6$submittime = 0;
7$cluster = '';
8
[6]9//global $self;
10
[1]11// Produce some output temporarily, so cron will send me message
12$now = time();
[6]13echo "Time started: " . date( 'Y-m-d H:i:s', $now ) . "\n";
[1]14
15// Get data from global GFAC DB
16$gLink = mysql_connect( $dbhost, $guser, $gpasswd );
17
18if ( ! mysql_select_db( $gDB, $gLink ) )
19{
[3]20 write_log( "$self: Could not select DB $gDB - " . mysql_error() );
[1]21 mail_to_admin( "fail", "Internal Error: Could not select DB $gDB" );
22 exit();
23}
24
25$query = "SELECT gfacID, us3_db, cluster, status, queue_msg, " .
26 "UNIX_TIMESTAMP(time), time from analysis";
27$result = mysql_query( $query, $gLink );
28
29if ( ! $result )
30{
31 write_log( "$self: Query failed $query - " . mysql_error( $gLink ) );
32 mail_to_admin( "fail", "Query failed $query\n" . mysql_error( $gLink ) );
33 exit();
34}
35
36if ( mysql_num_rows( $result ) == 0 )
[6]37{
38//write_log( "$self: analysis read got numrows==0" );
[1]39 exit(); // Nothing to do
[6]40}
[1]41
42while ( list( $gfacID, $us3_db, $cluster, $status, $queue_msg, $time, $updateTime )
43 = mysql_fetch_array( $result ) )
44{
45 // Checking we need to do for each entry
[6]46echo "us3db=$us3_db gfid=$gfacID\n";
47 switch ( $us3_db )
48 {
49 case 'Xuslims3_cauma3' :
50 case 'Xuslims3_cauma3d' :
51 case 'Xuslims3_HHU' :
52 case 'Xuslims3_Uni_KN' :
53 $serviceURL = "http://gridfarm005.ucs.indiana.edu:9090/ogce-rest/job";
54 break;
[1]55
[6]56 default :
57// $serviceURL = "http://gridfarm005.ucs.indiana.edu:8080/ogce-rest/job";
58 break;
59 }
60
61 $awork = array();
62 $awork = explode( "-", $gfacID );
63 $gfacLabl = $awork[0] . "-" . $awork[1] . "-" . $awork[2];
64 $loghdr = $self . ":" . $gfacLabl . "...:";
65
66 // If entry is for Airvata/Thrift, get the true current status
67
68 if ( is_aira_job( $gfacID ) )
69 {
70 $status_in = $status;
71 $status = aira_status( $gfacID, $status_in );
72if($status != $status_in )
73write_log( "$loghdr Set to $status from $status_in" );
74 }
75 else
76 {
77 $status_gw = $status;
78 $status = get_gfac_status( $gfacID );
79 //if ( $status == 'FINISHED' )
80 if ( $status_gw == 'COMPLETE' )
81 $status = $status_gw;
82write_log( "$loghdr non-AThrift status=$status status_gw=$status_gw" );
83 }
84
[1]85 // Sometimes during testing, the us3_db entry is not set
86 // If $status == 'ERROR' then the condition has been processed before
87 if ( strlen( $us3_db ) == 0 && $status != 'ERROR' )
88 {
[6]89 write_log( "$loghdr GFAC DB is NULL - $gfacID" );
[1]90 mail_to_admin( "fail", "GFAC DB is NULL\n$gfacID" );
91
92 $query2 = "UPDATE analysis SET status='ERROR' WHERE gfacID='$gfacID'";
93 $result2 = mysql_query( $query2, $gLink );
94 $status = 'ERROR';
95
96 if ( ! $result2 )
[6]97 write_log( "$loghdr Query failed $query2 - " . mysql_error( $gLink ) );
[1]98
99 }
100
[6]101//echo " st=$status\n";
[1]102 switch ( $status )
103 {
104 // Already been handled
105 // Later update this condition to search for gfacID?
106 case "ERROR":
107 cleanup();
108 break;
109
110 case "SUBMITTED":
111 submitted( $time );
112 break;
113
114 case "SUBMIT_TIMEOUT":
115 submit_timeout( $time );
116 break;
117
118 case "RUNNING":
[6]119 case "STARTED":
120 case "STAGING":
121 case "ACTIVE":
[1]122 running( $time );
123 break;
124
125 case "RUN_TIMEOUT":
126 run_timeout($time );
127 break;
128
129 case "DATA":
[6]130 case "RESULTS_GEN":
[1]131 wait_data( $time );
132 break;
133
134 case "DATA_TIMEOUT":
135 data_timeout( $time );
136 break;
137
[6]138 case "COMPLETED":
[1]139 case "COMPLETE":
140 complete();
141 break;
142
143 case "CANCELLED":
144 case "CANCELED":
145 case "FAILED":
146 failed();
147 break;
148
[6]149 case "FINISHED":
150 case "DONE":
[10]151// if ( is_aira_job( $gfacID ) )
152// {
153// complete();
154// }
[6]155 case "PROCESSING":
[1]156 default:
157 break;
158 }
159}
160
161exit();
162
163function submitted( $updatetime )
164{
165 global $self;
166 global $gLink;
167 global $gfacID;
[6]168 global $loghdr;
[1]169
170 $now = time();
171
172 if ( $updatetime + 600 > $now ) return; // < 10 minutes ago
173
174 if ( $updatetime + 86400 > $now ) // Within the first 24 hours
175 {
176 if ( ( $job_status = get_gfac_status( $gfacID ) ) === false )
177 $job_status = get_local_status( $gfacID );
178
179 if ( $job_status == 'GFAC_STATUS_UNAVAILABLE' )
180 return;
181
182 if ( ! in_array( $job_status, array( 'SUBMITTED', 'INITIALIZED', 'PENDING' ) ) )
[6]183 {
184write_log( "$loghdr submitted:job_status=$job_status" );
[1]185 update_job_status( $job_status, $gfacID );
[6]186 }
[1]187
188 return;
189 }
190
191 $message = "Job listed submitted longer than 24 hours";
192 write_log( "$self: $message - id: $gfacID" );
193 mail_to_admin( "hang", "$message - id: $gfacID" );
194 $query = "UPDATE analysis SET status='SUBMIT_TIMEOUT' WHERE gfacID='$gfacID'";
195 $result = mysql_query( $query, $gLink );
196
197 if ( ! $result )
198 write_log( "$self: Query failed $query - " . mysql_error( $gLink ) );
199
200 update_queue_messages( $message );
201 update_db( $message );
202}
203
204function submit_timeout( $updatetime )
205{
206 global $self;
207 global $gLink;
208 global $gfacID;
[6]209 global $loghdr;
[1]210
211 if ( ( $job_status = get_gfac_status( $gfacID ) ) === false )
212 $job_status = get_local_status( $gfacID );
213
214 if ( $job_status == 'GFAC_STATUS_UNAVAILABLE' )
215 return;
216
217 if ( ! in_array( $job_status, array( 'SUBMITTED', 'INITIALIZED', 'PENDING' ) ) )
218 {
219 update_job_status( $job_status, $gfacID );
220 return;
221 }
222
223 $now = time();
224
225 if ( $updatetime + 86400 > $now ) return; // < 24 hours ago ( 48 total submitted )
226
227 $message = "Job listed submitted longer than 48 hours";
228 write_log( "$self: $message - id: $gfacID" );
229 mail_to_admin( "hang", "$message - id: $gfacID" );
230 $query = "UPDATE analysis SET status='FAILED' WHERE gfacID='$gfacID'";
231 $result = mysql_query( $query, $gLink );
232
233 if ( ! $result )
234 write_log( "$self: Query failed $query - " . mysql_error( $gLink ) );
235
236 update_queue_messages( $message );
237 update_db( $message );
238}
239
240function running( $updatetime )
241{
242 global $self;
243 global $gLink;
244 global $gfacID;
[6]245 global $loghdr;
[1]246
247 $now = time();
248
249 get_us3_data();
250
251 if ( $updatetime + 600 > $now ) return; // message received < 10 minutes ago
252
253 if ( $updatetime + 86400 > $now ) // Within the first 24 hours
254 {
255 if ( ( $job_status = get_gfac_status( $gfacID ) ) === false )
256 $job_status = get_local_status( $gfacID );
257
258 if ( $job_status == 'GFAC_STATUS_UNAVAILABLE' )
259 return;
260
[6]261 if ( ! in_array( $job_status, array( 'ACTIVE', 'RUNNING', 'STARTED' ) ) )
[1]262 update_job_status( $job_status, $gfacID );
263
264 return;
265 }
266
267 $message = "Job listed running longer than 24 hours";
268 write_log( "$self: $message - id: $gfacID" );
269 mail_to_admin( "hang", "$message - id: $gfacID" );
270 $query = "UPDATE analysis SET status='RUN_TIMEOUT' WHERE gfacID='$gfacID'";
271 $result = mysql_query( $query, $gLink );
272
273 if ( ! $result )
274 write_log( "$self: Query failed $query - " . mysql_error( $gLink ) );
275
276 update_queue_messages( $message );
277 update_db( $message );
278}
279
280function run_timeout( $updatetime )
281{
282 global $self;
283 global $gLink;
284 global $gfacID;
[6]285 global $loghdr;
[1]286
287 if ( ( $job_status = get_gfac_status( $gfacID ) ) === false )
288 $job_status = get_local_status( $gfacID );
289
290 if ( $job_status == 'GFAC_STATUS_UNAVAILABLE' )
291 return;
292
[6]293 if ( ! in_array( $job_status, array( 'ACTIVE', 'RUNNING', 'STARTED' ) ) )
[1]294 {
295 update_job_status( $job_status, $gfacID );
296 return;
297 }
298
299 $now = time();
300
301 get_us3_data();
302
303 if ( $updatetime + 172800 > $now ) return; // < 48 hours ago
304
305 $message = "Job listed running longer than 48 hours";
306 write_log( "$self: $message - id: $gfacID" );
307 mail_to_admin( "hang", "$message - id: $gfacID" );
308 $query = "UPDATE analysis SET status='FAILED' WHERE gfacID='$gfacID'";
309 $result = mysql_query( $query, $gLink );
310
311 if ( ! $result )
312 write_log( "$self: Query failed $query - " . mysql_error( $gLink ) );
313
314 update_queue_messages( $message );
315 update_db( $message );
316}
317
318function wait_data( $updatetime )
319{
320 global $self;
321 global $gLink;
322 global $gfacID;
[6]323 global $loghdr;
[1]324
325 $now = time();
326
327 if ( $updatetime + 3600 > $now ) // < Within the first hour
328 {
329 if ( ( $job_status = get_gfac_status( $gfacID ) ) === false )
330 $job_status = get_local_status( $gfacID );
331
332 if ( $job_status == 'GFAC_STATUS_UNAVAILABLE' )
333 return;
334
335 if ( $job_status != 'DATA' )
336 {
337 update_job_status( $job_status, $gfacID );
338 return;
339 }
340
341 // Request to resend data, but only request every 5 minutes
342 $minute = date( 'i' ) * 1; // Makes it an int
343 if ( $minute % 5 ) return;
344
345 $output_status = get_gfac_outputs( $gfacID );
346
347 if ( $output_status !== false )
348 mail_to_admin( "debug", "wait_data/$gfacID/$output_status" );
349
350 return;
351 }
352
353 $message = "Waiting for data longer than 1 hour";
354 write_log( "$self: $message - id: $gfacID" );
355 mail_to_admin( "hang", "$message - id: $gfacID" );
356 $query = "UPDATE analysis SET status='DATA_TIMEOUT' WHERE gfacID='$gfacID'";
357 $result = mysql_query( $query, $gLink );
358
359 if ( ! $result )
360 write_log( "$self: Query failed $query - " . mysql_error( $gLink ) );
361
362 update_queue_messages( $message );
363 update_db( $message );
364}
365
366function data_timeout( $updatetime )
367{
368 global $self;
369 global $gLink;
370 global $gfacID;
[6]371 global $loghdr;
[1]372
373 if ( ( $job_status = get_gfac_status( $gfacID ) ) === false )
374 $job_status = get_local_status( $gfacID );
375
376 if ( $job_status == 'GFAC_STATUS_UNAVAILABLE' )
377 return;
378
379 if ( $job_status != 'DATA' )
380 {
381 update_job_status( $job_status, $gfacID );
382 return;
383 }
384
385 $now = time();
386
387 if ( $updatetime + 86400 > $now ) // < 24 hours ago
388 {
389 // Request to resend data, but only request every 15 minutes
390 $minute = date( 'i' ) * 1; // Makes it an int
391 if ( $minute % 15 ) return;
392
393 $output_status = get_gfac_outputs( $gfacID );
394
395 if ( $output_status !== false )
396 mail_to_admin( "debug", "data_timeout/$gfacID/$output_status" );
397
398 return;
399 }
400
401 $message = "Waiting for data longer than 24 hours";
402 write_log( "$self: $message - id: $gfacID" );
403 mail_to_admin( "hang", "$message - id: $gfacID" );
404 $query = "UPDATE analysis SET status='FAILED' WHERE gfacID='$gfacID'";
405 $result = mysql_query( $query, $gLink );
406
407 if ( ! $result )
408 write_log( "$self: Query failed $query - " . mysql_error( $gLink ) );
409
410 update_queue_messages( $message );
411 update_db( $message );
412}
413
414function complete()
415{
416 // Just cleanup
417 cleanup();
418}
419
420function failed()
421{
422 // Just cleanup
423 cleanup();
424}
425
426function cleanup()
427{
428 global $self;
429 global $gLink;
430 global $gfacID;
431 global $us3_db;
[6]432 global $loghdr;
[10]433 global $class_dir;
[1]434
435 // Double check that the gfacID exists
436 $query = "SELECT count(*) FROM analysis WHERE gfacID='$gfacID'";
437 $result = mysql_query( $query, $gLink );
438
439 if ( ! $result )
440 {
441 write_log( "$self: Query failed $query - " . mysql_error( $gLink ) );
442 mail_to_admin( "fail", "Query failed $query\n" . mysql_error( $gLink ) );
443 return;
444 }
445
446 list( $count ) = mysql_fetch_array( $result );
447
[6]448if ($count==0)
449write_log( "$loghdr count = $count gfacID = $gfacID" );
[1]450 if ( $count == 0 ) return;
451
452 // Now check the us3 instance
453 $requestID = get_us3_data();
[6]454//write_log( "$loghdr requestID = $requestID gfacID = $gfacID" );
[1]455 if ( $requestID == 0 ) return;
456
[10]457 if ( preg_match( "/US3-A/i", $gfacID ) )
[6]458 {
[10]459 $me_devel = preg_match( "/class_devel/", $class_dir );
460 $job_devel = preg_match( "/US3-ADEV/i", $gfacID );
461 if ( ( !$me_devel && !$job_devel ) ||
462 ( $me_devel && $job_devel ) )
463 { // If job from appropriate Airavata server, process it
[6]464//write_log( "$loghdr CALLING aira_cleanup()" );
[10]465 aira_cleanup( $us3_db, $requestID, $gLink );
466 }
[6]467//write_log( "$loghdr RTN FR aira_cleanup()" );
468 }
469 else
470 {
471//write_log( "$loghdr CALLING gfac_cleanup()" );
472 gfac_cleanup( $us3_db, $requestID, $gLink );
473 }
[1]474}
475
476// Function to update status of job
477function update_job_status( $job_status, $gfacID )
478{
479 global $gLink;
[6]480 global $query;
481 global $self;
482 global $loghdr;
[1]483
484 switch ( $job_status )
485 {
486 case 'SUBMITTED' :
487 case 'SUBMITED' :
488 case 'INITIALIZED' :
[6]489 case 'UPDATING' :
[1]490 case 'PENDING' :
491 $query = "UPDATE analysis SET status='SUBMITTED' WHERE gfacID='$gfacID'";
492 $message = "Job status request reports job is SUBMITTED";
493 break;
494
[6]495 case 'STARTED' :
496 case 'RUNNING' :
[1]497 case 'ACTIVE' :
498 $query = "UPDATE analysis SET status='RUNNING' WHERE gfacID='$gfacID'";
499 $message = "Job status request reports job is RUNNING";
500 break;
501
[6]502 case 'FINISHED' :
503 $query = "UPDATE analysis SET status='FINISHED' WHERE gfacID='$gfacID'";
504 $message = "NONE";
505 break;
506
507 case 'DONE' :
508 $query = "UPDATE analysis SET status='DONE' WHERE gfacID='$gfacID'";
509 $message = "NONE";
510 break;
511
[1]512 case 'COMPLETED' :
[6]513 case 'COMPLETE' :
[1]514 $query = "UPDATE analysis SET status='COMPLETE' WHERE gfacID='$gfacID'";
[6]515 $message = "Job status request reports job is COMPLETED";
[1]516 break;
517
[6]518 case 'DATA' :
[1]519 $query = "UPDATE analysis SET status='DATA' WHERE gfacID='$gfacID'";
520 $message = "Job status request reports job is COMPLETE, waiting for data";
521 break;
522
523 case 'CANCELED' :
[6]524 case 'CANCELLED' :
[1]525 $query = "UPDATE analysis SET status='CANCELED' WHERE gfacID='$gfacID'";
526 $message = "Job status request reports job is CANCELED";
527 break;
528
529 case 'FAILED' :
530 $query = "UPDATE analysis SET status='FAILED' WHERE gfacID='$gfacID'";
531 $message = "Job status request reports job is FAILED";
532 break;
533
534 case 'UNKNOWN' :
[6]535write_log( "$loghdr job_status='UNKNOWN', reset to 'ERROR' " );
536 $query = "UPDATE analysis SET status='ERROR' WHERE gfacID='$gfacID'";
[1]537 $message = "Job status request reports job is not in the queue";
538 break;
539
540 default :
[3]541 // We shouldn't ever get here
[6]542 $query = "UPDATE analysis SET status='ERROR' WHERE gfacID='$gfacID'";
[1]543 $message = "Job status was not recognized - $job_status";
[6]544 write_log( "$loghdr update_job_status: " .
[3]545 "Job status was not recognized - $job_status\n" .
546 "gfacID = $gfacID\n" );
[1]547 break;
548
549 }
550
551 $result = mysql_query( $query, $gLink );
552 if ( ! $result )
[6]553 write_log( "$loghdr Query failed $query - " . mysql_error( $gLink ) );
[1]554
[6]555 if ( $message != 'NONE' )
556 {
557 update_queue_messages( $message );
558 update_db( $message );
559 }
[1]560}
561
562function get_us3_data()
563{
564 global $self;
565 global $gfacID;
566 global $dbhost;
567 global $user;
568 global $passwd;
569 global $us3_db;
570 global $updateTime;
[6]571 global $loghdr;
[1]572
573 $us3_link = mysql_connect( $dbhost, $user, $passwd );
574
575 if ( ! $us3_link )
576 {
[6]577 write_log( "$loghdr could not connect: $dbhost, $user, $passwd" );
[1]578 mail_to_admin( "fail", "Could not connect to $dbhost" );
579 return 0;
580 }
581
582
583 $result = mysql_select_db( $us3_db, $us3_link );
584
585 if ( ! $result )
586 {
[6]587 write_log( "$loghdr could not select DB $us3_db" );
[1]588 mail_to_admin( "fail", "Could not select DB $us3_db, $dbhost, $user, $passwd" );
589 return 0;
590 }
591
592 $query = "SELECT HPCAnalysisRequestID, UNIX_TIMESTAMP(updateTime) " .
593 "FROM HPCAnalysisResult WHERE gfacID='$gfacID'";
594 $result = mysql_query( $query, $us3_link );
595
596 if ( ! $result )
597 {
598 write_log( "$self: Query failed $query - " . mysql_error( $us3_link ) );
599 mail_to_admin( "fail", "Query failed $query\n" . mysql_error( $us3_link ) );
600 return 0;
601 }
602
603 list( $requestID, $updateTime ) = mysql_fetch_array( $result );
604 mysql_close( $us3_link );
605
606 return $requestID;
607}
608
[6]609// Function to determine if this is a gfac job or not
[1]610function is_gfac_job( $gfacID )
611{
612 $hex = "[0-9a-fA-F]";
613 if ( ! preg_match( "/^US3-Experiment/i", $gfacID ) &&
614 ! preg_match( "/^US3-$hex{8}-$hex{4}-$hex{4}-$hex{4}-$hex{12}$/", $gfacID ) )
615 {
616 // Then it's not a GFAC job
617 return false;
618 }
619
620 return true;
621}
622
[6]623// Function to determine if this is an airavata/thrift job or not
624function is_aira_job( $gfacID )
625{
626 global $cluster;
627
[10]628 if ( preg_match( "/US3-A/i", $gfacID ) &&
[6]629 ! preg_match( "/juropa/i", $cluster ) )
630 {
631 // Then it's an Airavata/Thrift job
632 return true;
633 }
634
635 return false;
636}
637
[1]638// Function to get the current job status from GFAC
639function get_gfac_status( $gfacID )
640{
641 global $serviceURL;
[6]642 global $self;
643 global $loghdr;
644 global $cluster;
[1]645
[6]646 if ( is_aira_job( $gfacID ) )
647 {
648 $status_ex = getExperimentStatus( $gfacID );
649 $gfac_status = standard_status( $status_ex );
650 return $gfac_status;
651 }
652
653 else if ( ! is_gfac_job( $gfacID ) )
[1]654 return false;
655
656 $url = "$serviceURL/jobstatus/$gfacID";
657 try
658 {
659 $post = new HttpRequest( $url, HttpRequest::METH_GET );
660 $http = $post->send();
661 $xml = $post->getResponseBody();
662 }
663 catch ( HttpException $e )
664 {
[6]665 write_log( "$loghdr Status not available - marking failed - $gfacID" );
[1]666 return 'GFAC_STATUS_UNAVAILABLE';
667 }
668
669 // Parse the result
670 $gfac_status = parse_response( $xml );
671
[3]672 // This may not seem like the best place to do this, but here we have
673 // the xml straight from GFAC
674 $status_types = array('SUBMITTED',
675 'SUBMITED',
676 'INITIALIZED',
677 'PENDING',
[6]678 'RUNNING',
[3]679 'ACTIVE',
[6]680 'STARTED',
[3]681 'COMPLETED',
[6]682 'FINISHED',
[3]683 'DONE',
684 'DATA',
[6]685 'RESULTS_GEN',
[3]686 'CANCELED',
687 'CANCELLED',
688 'FAILED',
[6]689 'STAGING',
[3]690 'UNKNOWN');
691 if ( ! in_array( $gfac_status, $status_types ) )
692 mail_to_admin( 'debug', "gfacID: /$gfacID/\n" .
693 "XML: /$xml/\n" .
694 "Status: /$gfac_status/\n" );
695
[6]696 if ( in_array( $gfac_status, array( 'DONE', 'DATA', 'RESULTS_GEN' ) ) )
697 $gfac_status = 'DATA';
698
[1]699 return $gfac_status;
700}
701
702// Function to request data outputs from GFAC
703function get_gfac_outputs( $gfacID )
704{
705 global $serviceURL;
[6]706 global $self;
[1]707
708 // Make sure it's a GFAC job and status is appropriate for this call
709 if ( ( $job_status = get_gfac_status( $gfacID ) ) === false )
710 {
711 // Then it's not a GFAC job
712 return false;
713 }
714
[6]715 if ( ! in_array( $job_status, array( 'DONE', 'FAILED', 'COMPLETE', 'FINISHED' ) ) )
[1]716 {
717 // Then it's not appropriate to request data
718 return false;
719 }
720
721 $url = "$serviceURL/registeroutput/$gfacID";
722 try
723 {
724 $post = new HttpRequest( $url, HttpRequest::METH_GET );
725 $http = $post->send();
726 $xml = $post->getResponseBody();
727 }
728 catch ( HttpException $e )
729 {
730 write_log( "$self: Data not available - request failed - $gfacID" );
731 return false;
732 }
733
734 mail_to_admin( "debug", "get_gfac_outputs/\n$xml/" ); // Temporary, to see what the xml looks like,
735 // if we ever get one
736
737 // Parse the result
738 $gfac_status = parse_response( $xml );
739
740 return $gfac_status;
741}
742
743function parse_response( $xml )
744{
745 global $gfac_message;
746
747 $status = "";
748 $gfac_message = "";
749
750 $parser = new XMLReader();
751 $parser->xml( $xml );
752
753 while( $parser->read() )
754 {
755 $type = $parser->nodeType;
756
757 if ( $type == XMLReader::ELEMENT )
758 $name = $parser->name;
759
760 else if ( $type == XMLReader::TEXT )
761 {
762 if ( $name == "status" )
763 $status = $parser->value;
764 else
765 $gfac_message = $parser->value;
766 }
767 }
768
769 $parser->close();
770 return $status;
771}
772
773// Function to get status from local cluster
774function get_local_status( $gfacID )
775{
776 global $cluster;
[6]777 global $self;
[1]778
779 $system = "$cluster.uthscsa.edu";
780 $system = preg_replace( "/\-local/", "", $system );
781 $cmd = "/usr/bin/ssh -x us3@$system qstat -a $gfacID 2>&1";
782
783 $result = exec( $cmd );
784
785 if ( $result == "" || preg_match( "/^qstat: Unknown/", $result ) )
786 {
787 write_log( "$self get_local_status: Local job $gfacID unknown" );
788 return 'UNKNOWN';
789 }
790
791 $values = preg_split( "/\s+/", $result );
792// write_log( "$self: get_local_status: job status = /{$values[9]}/");
793 switch ( $values[ 9 ] )
794 {
795 case "W" : // Waiting for execution time to be reached
796 case "E" : // Job is exiting after having run
797 case "R" : // Still running
798 $status = 'ACTIVE';
799 break;
800
801 case "C" : // Job has completed
802 $status = 'COMPLETED';
803 break;
804
805 case "T" : // Job is being moved
806 case "H" : // Held
807 case "Q" : // Queued
808 $status = 'SUBMITTED';
809 break;
810
811 default :
812 $status = 'UNKNOWN'; // This should not occur
813 break;
814 }
815
816 return $status;
817}
818
819function update_queue_messages( $message )
820{
821 global $self;
822 global $gLink;
823 global $gfacID;
824
825 // Get analysis table ID
826 $query = "SELECT id FROM analysis " .
827 "WHERE gfacID = '$gfacID' ";
828 $result = mysql_query( $query, $gLink );
829 if ( ! $result )
830 {
831 write_log( "$self: Query failed $query - " . mysql_error( $gLink ) );
832 return;
833 }
834 list( $analysisID ) = mysql_fetch_array( $result );
835
836 // Insert message into queue_message table
837 $query = "INSERT INTO queue_messages SET " .
[6]838 "message = '" . mysql_real_escape_string( $message, $gLink ) . "', " .
839 "analysisID = '$analysisID' ";
[1]840 $result = mysql_query( $query, $gLink );
841 if ( ! $result )
842 {
843 write_log( "$self: Query failed $query - " . mysql_error( $gLink ) );
844 return;
845 }
846}
847
848function update_db( $message )
849{
850 global $self;
851 global $gfacID;
852 global $dbhost;
853 global $user;
854 global $passwd;
855 global $us3_db;
856
857 $us3_link = mysql_connect( $dbhost, $user, $passwd );
858
859 if ( ! $us3_link )
860 {
861 write_log( "$self: could not connect: $dbhost, $user, $passwd" );
862 mail_to_admin( "fail", "Could not connect to $dbhost" );
863 return 0;
864 }
865
866
867 $result = mysql_select_db( $us3_db, $us3_link );
868
869 if ( ! $result )
870 {
871 write_log( "$self: could not select DB $us3_db" );
872 mail_to_admin( "fail", "Could not select DB $us3_db, $dbhost, $user, $passwd" );
873 return 0;
874 }
875
876 $query = "UPDATE HPCAnalysisResult SET " .
877 "lastMessage='" . mysql_real_escape_string( $message, $us3_link ) . "'" .
878 "WHERE gfacID = '$gfacID' ";
879
880 mysql_query( $query, $us3_link );
881 mysql_close( $us3_link );
882}
883
884function mail_to_admin( $type, $msg )
885{
886 global $updateTime;
887 global $status;
888 global $cluster;
889 global $org_name;
890 global $admin_email;
891 global $dbhost;
892 global $requestID;
893
894 $headers = "From: $org_name Admin<$admin_email>" . "\n";
895 $headers .= "Cc: $org_name Admin<$admin_email>" . "\n";
[6]896 $headers .= "Bcc: Gary Gorbet<gegorbet@gmail.com>" . "\n"; // make sure
[1]897
898 // Set the reply address
899 $headers .= "Reply-To: $org_name<$admin_email>" . "\n";
900 $headers .= "Return-Path: $org_name<$admin_email>" . "\n";
901
902 // Try to avoid spam filters
903 $now = time();
904 $headers .= "Message-ID: <" . $now . "gridctl@$dbhost>$requestID\n";
905 $headers .= "X-Mailer: PHP v" . phpversion() . "\n";
906 $headers .= "MIME-Version: 1.0" . "\n";
907 $headers .= "Content-Transfer-Encoding: 8bit" . "\n";
908
909 $subject = "US3 Error Notification";
910 $message = "
911 UltraScan job error notification from gridctl.php:
912
913 Update Time : $updateTime
914 GFAC Status : $status
915 Cluster : $cluster
916 ";
917
918 $message .= "Error Message : $msg\n";
919
920 mail( $admin_email, $subject, $message, $headers );
921}
[6]922
923// Convert a status string to one of the standard DB status strings
924function standard_status( $status_in )
925{
926 switch ( $status_in )
927 { // Map variations to standard gateway status values
928 case 'QUEUED' :
929 case 'LAUNCHED' :
930 case 'CREATED' :
931 case 'VALIDATED' :
932 case 'SCHEDULED' :
933 case 'submitted' :
934 case '' :
935 $status = 'SUBMITTED';
936 break;
937
938 case 'EXECUTING' :
939 case 'ACTIVE' :
940 case 'running' :
941 case 'executing' :
942 $status = 'RUNNING';
943 break;
944
945 case 'PENDING' :
946 case 'CANCELING' :
947 $status = 'UPDATING';
948 break;
949
950 case 'CANCELLED' :
951 case 'canceled' :
952 $status = 'CANCELED';
953 break;
954
955 $status = 'DATA';
956 break;
957
958 case 'COMPLETED' :
959 case 'completed' :
960 $status = 'COMPLETE';
961 break;
962
963 case 'FAILED_DATA' :
964 case 'SUBMIT_TIMEOUT' :
965 case 'RUN_TIMEOUT' :
966 case 'DATA_TIMEOUT' :
967 $status = 'FAILED';
968 break;
969
970 case 'COMPLETE' :
971 $status = 'DONE';
972 break;
973
974 case 'UNKNOWN' :
975 $status = 'ERROR';
976 break;
977
978 // Where already standard value, retain value
979 case 'ERROR' :
980 case 'RUNNING' :
981 case 'SUBMITTED' :
982 case 'UPDATING' :
983 case 'CANCELED' :
984 case 'DATA' :
985 case 'FAILED' :
986 case 'DONE' :
987 case 'FINISHED' :
988 default :
989 $status = $status_in;
990 break;
991 }
992
993 return $status;
994}
995
996function aira_status( $gfacID, $status_in )
997{
998 global $self;
999 global $loghdr;
[10]1000 global $class_dir;
[6]1001//echo "a_st: st_in$status_in : $gfacID\n";
1002 //$status_gw = standard_status( $status_in );
1003 $status_gw = $status_in;
1004//echo "a_st: st_db=$status_gw\n";
1005 $status = $status_gw;
[10]1006 $me_devel = preg_match( "/class_devel/", $class_dir );
1007 $job_devel = preg_match( "/US3-ADEV/i", $gfacID );
1008 $devmatch = ( ( !$me_devel && !$job_devel ) ||
1009 ( $me_devel && $job_devel ) );
[6]1010
[10]1011//write_log( "$loghdr gfacID=$gfacID devmatch=$devmatch" );
1012//write_log( "$loghdr me_d=$me_devel jo_d=$job_devel cd=$class_dir" );
1013 if ( preg_match( "/US3-A/i", $gfacID ) && $devmatch )
1014 { // Airavata job and development/production type is right
[6]1015 $status_ex = getExperimentStatus( $gfacID );
[10]1016//write_log( "$loghdr status_ex $status_ex" );
[6]1017
1018 if ( $status_ex == 'COMPLETED' )
1019 { // Experiment is COMPLETED: check for 'FINISHED' or 'DONE'
1020 if ( $status_gw == 'FINISHED' || $status_gw == 'DONE' )
1021 { // COMPLETED + FINISHED/DONE : gateway status is now COMPLETE
1022 $status = 'COMPLETE';
1023 }
1024
1025 else
1026 { // COMPLETED + NOT-FINISHED/DONE: gw status now DONE
1027 $status = 'DONE';
1028 }
1029 }
1030
1031 else if ( $status_gw == 'FINISHED' || $status_gw == 'DONE' )
1032 { // Gfac status == FINISHED/DONE: leave as is (unless FAILED)
1033 $status = $status_gw;
1034 if ( $status_ex == 'FAILED' )
1035 {
1036 sleep( 10 );
1037 $status_ex = getExperimentStatus( $gfacID );
1038 if ( $status_ex == 'FAILED' )
1039 {
1040 write_log( "$loghdr status still 'FAILED' after 10-second delay" );
1041 sleep( 10 );
1042 $status_ex = getExperimentStatus( $gfacID );
1043 if ( $status_ex == 'FAILED' )
1044 write_log( "$loghdr status still 'FAILED' after 20-second delay" );
1045 else
1046 write_log( "$loghdr status is $status_ex after 20-second delayed retry" );
1047 }
1048 write_log( "$loghdr status reset to 'COMPLETE'" );
1049 $status = 'COMPLETE';
1050 }
1051 }
1052
1053 else
1054 { // Experiment not COMPLETED/FINISHED/DONE: use experiment status
1055 $status = standard_status( $status_ex );
1056 }
1057
[10]1058//write_log( "$loghdr status/_in/_gw/_ex=$status/$status_in/$status_gw/$status_ex" );
1059//write_log( " me_d=$me_devel jo_d=$job_devel dm=$devmatch cd=$class_dir" );
[6]1060 if ( $status != $status_gw )
1061 {
1062 update_job_status( $status, $gfacID );
1063 }
1064 }
1065
1066 return $status;
1067}
1068
[1]1069?>
Note: See TracBrowser for help on using the repository browser.