source: trunk/gridctl.php@ 24

Last change on this file since 24 was 22, checked in by gegorbet, 9 years ago

fix us3 baseon listen.php and status_ex=EXECUTING handling in gridctl.php

File size: 30.0 KB
Line 
1<?php
2
3// Global variables
4$gfac_message = "";
5$updateTime = 0;
6$submittime = 0;
7$cluster = '';
8
9//global $self;
10global $status_ex, $status_gw;
11
12// Produce some output temporarily, so cron will send me message
13$now = time();
14echo "Time started: " . date( 'Y-m-d H:i:s', $now ) . "\n";
15
16// Get data from global GFAC DB
17$gLink = mysql_connect( $dbhost, $guser, $gpasswd );
18
19if ( ! mysql_select_db( $gDB, $gLink ) )
20{
21 write_log( "$self: Could not select DB $gDB - " . mysql_error() );
22 mail_to_admin( "fail", "Internal Error: Could not select DB $gDB" );
23 exit();
24}
25
26$query = "SELECT gfacID, us3_db, cluster, status, queue_msg, " .
27 "UNIX_TIMESTAMP(time), time from analysis";
28$result = mysql_query( $query, $gLink );
29
30if ( ! $result )
31{
32 write_log( "$self: Query failed $query - " . mysql_error( $gLink ) );
33 mail_to_admin( "fail", "Query failed $query\n" . mysql_error( $gLink ) );
34 exit();
35}
36
37if ( mysql_num_rows( $result ) == 0 )
38{
39//write_log( "$self: analysis read got numrows==0" );
40 exit(); // Nothing to do
41}
42
43$me_devel = preg_match( "/class_devel/", $class_dir );
44
45while ( list( $gfacID, $us3_db, $cluster, $status, $queue_msg, $time, $updateTime )
46 = mysql_fetch_array( $result ) )
47{
48 // If this entry does not match class/class_devel, skip processing
49
50 if ( preg_match( "/US3-A/i", $gfacID ) )
51 { // For thrift, job and gridctl must match
52 $job_devel = preg_match( "/US3-ADEV/i", $gfacID );
53 if ( ( $me_devel && !$job_devel ) ||
54 ( !$me_devel && $job_devel ) )
55 { // If job not from appropriate Airavata server, skip processing
56 continue;
57 }
58 }
59 else if ( $me_devel )
60 { // For non-thrift and development, skip
61 continue;
62 }
63
64 // Checking we need to do for each entry
65echo "us3db=$us3_db gfid=$gfacID\n";
66 switch ( $us3_db )
67 {
68 case 'Xuslims3_cauma3' :
69 case 'Xuslims3_cauma3d' :
70 case 'Xuslims3_HHU' :
71 case 'Xuslims3_Uni_KN' :
72 $serviceURL = "http://gridfarm005.ucs.indiana.edu:9090/ogce-rest/job";
73 break;
74
75 default :
76// $serviceURL = "http://gridfarm005.ucs.indiana.edu:8080/ogce-rest/job";
77 break;
78 }
79
80 $awork = array();
81 $awork = explode( "-", $gfacID );
82 $gfacLabl = $awork[0] . "-" . $awork[1] . "-" . $awork[2];
83 $loghdr = $self . ":" . $gfacLabl . "...:";
84 $status_in = $status;
85 $status_gw = $status;
86 $status_ex = $status;
87
88 // If entry is for Airvata/Thrift, get the true current status
89
90 if ( is_aira_job( $gfacID ) )
91 {
92 $status_in = $status;
93 $status = aira_status( $gfacID, $status_in );
94if($status != $status_in )
95write_log( "$loghdr Set to $status from $status_in" );
96 }
97 else
98 {
99 $status_gw = $status;
100 $status = get_gfac_status( $gfacID );
101 //if ( $status == 'FINISHED' )
102 if ( $status_gw == 'COMPLETE' )
103 $status = $status_gw;
104write_log( "$loghdr non-AThrift status=$status status_gw=$status_gw" );
105 }
106
107 // Sometimes during testing, the us3_db entry is not set
108 // If $status == 'ERROR' then the condition has been processed before
109 if ( strlen( $us3_db ) == 0 && $status != 'ERROR' )
110 {
111 write_log( "$loghdr GFAC DB is NULL - $gfacID" );
112 mail_to_admin( "fail", "GFAC DB is NULL\n$gfacID" );
113
114 $query2 = "UPDATE analysis SET status='ERROR' WHERE gfacID='$gfacID'";
115 $result2 = mysql_query( $query2, $gLink );
116 $status = 'ERROR';
117
118 if ( ! $result2 )
119 write_log( "$loghdr Query failed $query2 - " . mysql_error( $gLink ) );
120
121 }
122
123//echo " st=$status\n";
124 switch ( $status )
125 {
126 // Already been handled
127 // Later update this condition to search for gfacID?
128 case "ERROR":
129 cleanup();
130 break;
131
132 case "SUBMITTED":
133 submitted( $time );
134 break;
135
136 case "SUBMIT_TIMEOUT":
137 submit_timeout( $time );
138 break;
139
140 case "RUNNING":
141 case "STARTED":
142 case "STAGING":
143 case "ACTIVE":
144 running( $time );
145 break;
146
147 case "RUN_TIMEOUT":
148 run_timeout($time );
149 break;
150
151 case "DATA":
152 case "RESULTS_GEN":
153 wait_data( $time );
154 break;
155
156 case "DATA_TIMEOUT":
157 data_timeout( $time );
158 break;
159
160 case "COMPLETED":
161 case "COMPLETE":
162 complete();
163 break;
164
165 case "CANCELLED":
166 case "CANCELED":
167 case "FAILED":
168 failed();
169 break;
170
171 case "FINISHED":
172 case "DONE":
173 if ( is_aira_job( $gfacID ) )
174 {
175 $status_ex = getExperimentStatus( $gfacID );
176write_log( "$loghdr status=$status status_ex=$status_ex" );
177 if ( $status_ex === 'COMPLETED' )
178 complete();
179 }
180 case "PROCESSING":
181 default:
182 break;
183 }
184}
185
186exit();
187
188function submitted( $updatetime )
189{
190 global $self;
191 global $gLink;
192 global $gfacID;
193 global $loghdr;
194
195 $now = time();
196
197 if ( $updatetime + 600 > $now ) return; // < 10 minutes ago
198
199 if ( $updatetime + 86400 > $now ) // Within the first 24 hours
200 {
201 if ( ( $job_status = get_gfac_status( $gfacID ) ) === false )
202 $job_status = get_local_status( $gfacID );
203
204 if ( $job_status == 'GFAC_STATUS_UNAVAILABLE' )
205 return;
206
207 if ( ! in_array( $job_status, array( 'SUBMITTED', 'INITIALIZED', 'PENDING' ) ) )
208 {
209//write_log( "$loghdr submitted:job_status=$job_status" );
210 update_job_status( $job_status, $gfacID );
211 }
212
213 return;
214 }
215
216 $message = "Job listed submitted longer than 24 hours";
217 write_log( "$self: $message - id: $gfacID" );
218 mail_to_admin( "hang", "$message - id: $gfacID" );
219 $query = "UPDATE analysis SET status='SUBMIT_TIMEOUT' WHERE gfacID='$gfacID'";
220 $result = mysql_query( $query, $gLink );
221
222 if ( ! $result )
223 write_log( "$self: Query failed $query - " . mysql_error( $gLink ) );
224
225 update_queue_messages( $message );
226 update_db( $message );
227}
228
229function submit_timeout( $updatetime )
230{
231 global $self;
232 global $gLink;
233 global $gfacID;
234 global $loghdr;
235
236 if ( ( $job_status = get_gfac_status( $gfacID ) ) === false )
237 $job_status = get_local_status( $gfacID );
238
239 if ( $job_status == 'GFAC_STATUS_UNAVAILABLE' )
240 return;
241
242 if ( ! in_array( $job_status, array( 'SUBMITTED', 'INITIALIZED', 'PENDING' ) ) )
243 {
244//write_log( "$loghdr submit_timeout:job_status=$job_status" );
245 update_job_status( $job_status, $gfacID );
246 return;
247 }
248
249 $now = time();
250
251 if ( $updatetime + 86400 > $now ) return; // < 24 hours ago ( 48 total submitted )
252
253 $message = "Job listed submitted longer than 48 hours";
254 write_log( "$self: $message - id: $gfacID" );
255 mail_to_admin( "hang", "$message - id: $gfacID" );
256 $query = "UPDATE analysis SET status='FAILED' WHERE gfacID='$gfacID'";
257 $result = mysql_query( $query, $gLink );
258
259 if ( ! $result )
260 write_log( "$self: Query failed $query - " . mysql_error( $gLink ) );
261
262 update_queue_messages( $message );
263 update_db( $message );
264}
265
266function running( $updatetime )
267{
268 global $self;
269 global $gLink;
270 global $gfacID;
271 global $loghdr;
272
273 $now = time();
274
275 get_us3_data();
276
277 if ( $updatetime + 600 > $now ) return; // message received < 10 minutes ago
278
279 if ( $updatetime + 86400 > $now ) // Within the first 24 hours
280 {
281 if ( ( $job_status = get_gfac_status( $gfacID ) ) === false )
282 $job_status = get_local_status( $gfacID );
283
284 if ( $job_status == 'GFAC_STATUS_UNAVAILABLE' )
285 return;
286
287 if ( ! in_array( $job_status, array( 'ACTIVE', 'RUNNING', 'STARTED' ) ) )
288 {
289//write_log( "$loghdr running:job_status=$job_status" );
290 update_job_status( $job_status, $gfacID );
291 }
292
293 return;
294 }
295
296 $message = "Job listed running longer than 24 hours";
297 write_log( "$self: $message - id: $gfacID" );
298 mail_to_admin( "hang", "$message - id: $gfacID" );
299 $query = "UPDATE analysis SET status='RUN_TIMEOUT' WHERE gfacID='$gfacID'";
300 $result = mysql_query( $query, $gLink );
301
302 if ( ! $result )
303 write_log( "$self: Query failed $query - " . mysql_error( $gLink ) );
304
305 update_queue_messages( $message );
306 update_db( $message );
307}
308
309function run_timeout( $updatetime )
310{
311 global $self;
312 global $gLink;
313 global $gfacID;
314 global $loghdr;
315
316 if ( ( $job_status = get_gfac_status( $gfacID ) ) === false )
317 $job_status = get_local_status( $gfacID );
318
319 if ( $job_status == 'GFAC_STATUS_UNAVAILABLE' )
320 return;
321
322 if ( ! in_array( $job_status, array( 'ACTIVE', 'RUNNING', 'STARTED' ) ) )
323 {
324//write_log( "$loghdr run_timeout:job_status=$job_status" );
325 update_job_status( $job_status, $gfacID );
326 return;
327 }
328
329 $now = time();
330
331 get_us3_data();
332
333 if ( $updatetime + 172800 > $now ) return; // < 48 hours ago
334
335 $message = "Job listed running longer than 48 hours";
336 write_log( "$self: $message - id: $gfacID" );
337 mail_to_admin( "hang", "$message - id: $gfacID" );
338 $query = "UPDATE analysis SET status='FAILED' WHERE gfacID='$gfacID'";
339 $result = mysql_query( $query, $gLink );
340
341 if ( ! $result )
342 write_log( "$self: Query failed $query - " . mysql_error( $gLink ) );
343
344 update_queue_messages( $message );
345 update_db( $message );
346}
347
348function wait_data( $updatetime )
349{
350 global $self;
351 global $gLink;
352 global $gfacID;
353 global $loghdr;
354
355 $now = time();
356
357 if ( $updatetime + 3600 > $now ) // < Within the first hour
358 {
359 if ( ( $job_status = get_gfac_status( $gfacID ) ) === false )
360 $job_status = get_local_status( $gfacID );
361
362 if ( $job_status == 'GFAC_STATUS_UNAVAILABLE' )
363 return;
364
365 if ( $job_status != 'DATA' )
366 {
367//write_log( "$loghdr wait_data:job_status=$job_status" );
368 update_job_status( $job_status, $gfacID );
369 return;
370 }
371
372 // Request to resend data, but only request every 5 minutes
373 $minute = date( 'i' ) * 1; // Makes it an int
374 if ( $minute % 5 ) return;
375
376 $output_status = get_gfac_outputs( $gfacID );
377
378 if ( $output_status !== false )
379 mail_to_admin( "debug", "wait_data/$gfacID/$output_status" );
380
381 return;
382 }
383
384 $message = "Waiting for data longer than 1 hour";
385 write_log( "$self: $message - id: $gfacID" );
386 mail_to_admin( "hang", "$message - id: $gfacID" );
387 $query = "UPDATE analysis SET status='DATA_TIMEOUT' WHERE gfacID='$gfacID'";
388 $result = mysql_query( $query, $gLink );
389
390 if ( ! $result )
391 write_log( "$self: Query failed $query - " . mysql_error( $gLink ) );
392
393 update_queue_messages( $message );
394 update_db( $message );
395}
396
397function data_timeout( $updatetime )
398{
399 global $self;
400 global $gLink;
401 global $gfacID;
402 global $loghdr;
403
404 if ( ( $job_status = get_gfac_status( $gfacID ) ) === false )
405 $job_status = get_local_status( $gfacID );
406
407 if ( $job_status == 'GFAC_STATUS_UNAVAILABLE' )
408 return;
409
410 if ( $job_status != 'DATA' )
411 {
412//write_log( "$loghdr data_timeout:job_status=$job_status" );
413 update_job_status( $job_status, $gfacID );
414 return;
415 }
416
417 $now = time();
418
419 if ( $updatetime + 86400 > $now ) // < 24 hours ago
420 {
421 // Request to resend data, but only request every 15 minutes
422 $minute = date( 'i' ) * 1; // Makes it an int
423 if ( $minute % 15 ) return;
424
425 $output_status = get_gfac_outputs( $gfacID );
426
427 if ( $output_status !== false )
428 mail_to_admin( "debug", "data_timeout/$gfacID/$output_status" );
429
430 return;
431 }
432
433 $message = "Waiting for data longer than 24 hours";
434 write_log( "$self: $message - id: $gfacID" );
435 mail_to_admin( "hang", "$message - id: $gfacID" );
436 $query = "UPDATE analysis SET status='FAILED' WHERE gfacID='$gfacID'";
437 $result = mysql_query( $query, $gLink );
438
439 if ( ! $result )
440 write_log( "$self: Query failed $query - " . mysql_error( $gLink ) );
441
442 update_queue_messages( $message );
443 update_db( $message );
444}
445
446function complete()
447{
448 // Just cleanup
449 cleanup();
450}
451
452function failed()
453{
454 // Just cleanup
455 cleanup();
456}
457
458function cleanup()
459{
460 global $self;
461 global $gLink;
462 global $gfacID;
463 global $us3_db;
464 global $loghdr;
465 global $class_dir;
466
467 // Double check that the gfacID exists
468 $query = "SELECT count(*) FROM analysis WHERE gfacID='$gfacID'";
469 $result = mysql_query( $query, $gLink );
470
471 if ( ! $result )
472 {
473 write_log( "$self: Query failed $query - " . mysql_error( $gLink ) );
474 mail_to_admin( "fail", "Query failed $query\n" . mysql_error( $gLink ) );
475 return;
476 }
477
478 list( $count ) = mysql_fetch_array( $result );
479
480if ($count==0)
481write_log( "$loghdr count = $count gfacID = $gfacID" );
482 if ( $count == 0 ) return;
483
484 // Now check the us3 instance
485 $requestID = get_us3_data();
486//write_log( "$loghdr requestID = $requestID gfacID = $gfacID" );
487 if ( $requestID == 0 ) return;
488
489 $me_devel = preg_match( "/class_devel/", $class_dir );
490
491 if ( preg_match( "/US3-A/i", $gfacID ) )
492 {
493 $job_devel = preg_match( "/US3-ADEV/i", $gfacID );
494 if ( ( !$me_devel && !$job_devel ) ||
495 ( $me_devel && $job_devel ) )
496 { // If job from appropriate Airavata server, process it
497//write_log( "$loghdr CALLING aira_cleanup()" );
498 aira_cleanup( $us3_db, $requestID, $gLink );
499 }
500//write_log( "$loghdr RTN FR aira_cleanup()" );
501 }
502
503 else if ( ! $me_devel )
504 { // If this is gridctl_pro and gfac (jureca), do GFAC cleanup
505//write_log( "$loghdr CALLING gfac_cleanup()" );
506 gfac_cleanup( $us3_db, $requestID, $gLink );
507 }
508}
509
510// Function to update status of job
511function update_job_status( $job_status, $gfacID )
512{
513 global $gLink;
514 global $query;
515 global $self;
516 global $loghdr;
517
518 switch ( $job_status )
519 {
520 case 'SUBMITTED' :
521 case 'SUBMITED' :
522 case 'INITIALIZED' :
523 $query = "UPDATE analysis SET status='SUBMITTED' WHERE gfacID='$gfacID'";
524 $message = "Job status request reports job is " . $job_status;
525//write_log( "$loghdr update_job_status(SUBM) job_status=$job_status" );
526 break;
527
528 case 'STARTED' :
529 case 'RUNNING' :
530 case 'ACTIVE' :
531 $query = "UPDATE analysis SET status='RUNNING' WHERE gfacID='$gfacID'";
532 $message = "Job status request reports job is RUNNING";
533 break;
534
535 case 'FINISHED' :
536 $query = "UPDATE analysis SET status='FINISHED' WHERE gfacID='$gfacID'";
537 $message = "NONE";
538 break;
539
540 case 'DONE' :
541 $query = "UPDATE analysis SET status='DONE' WHERE gfacID='$gfacID'";
542 $message = "NONE";
543 break;
544
545 case 'COMPLETED' :
546 case 'COMPLETE' :
547 $query = "UPDATE analysis SET status='COMPLETE' WHERE gfacID='$gfacID'";
548 $message = "Job status request reports job is COMPLETED";
549 break;
550
551 case 'DATA' :
552 $query = "UPDATE analysis SET status='DATA' WHERE gfacID='$gfacID'";
553 $message = "Job status request reports job is COMPLETE, waiting for data";
554 break;
555
556 case 'CANCELED' :
557 case 'CANCELLED' :
558 $query = "UPDATE analysis SET status='CANCELED' WHERE gfacID='$gfacID'";
559 $message = "Job status request reports job is CANCELED";
560 break;
561
562 case 'FAILED' :
563 $query = "UPDATE analysis SET status='FAILED' WHERE gfacID='$gfacID'";
564 $message = "Job status request reports job is FAILED";
565 break;
566
567 case 'UNKNOWN' :
568write_log( "$loghdr job_status='UNKNOWN', reset to 'ERROR' " );
569 $query = "UPDATE analysis SET status='ERROR' WHERE gfacID='$gfacID'";
570 $message = "Job status request reports job is not in the queue";
571 break;
572
573 case 'UPDATING' :
574 case 'PENDING' :
575 $message = "Job status request reports job is " . $job_status;
576 break;
577
578 default :
579 // We shouldn't ever get here
580 $query = "UPDATE analysis SET status='ERROR' WHERE gfacID='$gfacID'";
581 $message = "Job status was not recognized - $job_status";
582 write_log( "$loghdr update_job_status: " .
583 "Job status was not recognized - $job_status\n" .
584 "gfacID = $gfacID\n" );
585 break;
586
587 }
588
589 $result = mysql_query( $query, $gLink );
590 if ( ! $result )
591 write_log( "$loghdr Query failed $query - " . mysql_error( $gLink ) );
592
593 if ( $message != 'NONE' )
594 {
595 update_queue_messages( $message );
596 update_db( $message );
597 }
598}
599
600function get_us3_data()
601{
602 global $self;
603 global $gfacID;
604 global $dbhost;
605 global $user;
606 global $passwd;
607 global $us3_db;
608 global $updateTime;
609 global $loghdr;
610
611 $us3_link = mysql_connect( $dbhost, $user, $passwd );
612
613 if ( ! $us3_link )
614 {
615 write_log( "$loghdr could not connect: $dbhost, $user, $passwd" );
616 mail_to_admin( "fail", "Could not connect to $dbhost" );
617 return 0;
618 }
619
620
621 $result = mysql_select_db( $us3_db, $us3_link );
622
623 if ( ! $result )
624 {
625 write_log( "$loghdr could not select DB $us3_db" );
626 mail_to_admin( "fail", "Could not select DB $us3_db, $dbhost, $user, $passwd" );
627 return 0;
628 }
629
630 $query = "SELECT HPCAnalysisRequestID, UNIX_TIMESTAMP(updateTime) " .
631 "FROM HPCAnalysisResult WHERE gfacID='$gfacID'";
632 $result = mysql_query( $query, $us3_link );
633
634 if ( ! $result )
635 {
636 write_log( "$self: Query failed $query - " . mysql_error( $us3_link ) );
637 mail_to_admin( "fail", "Query failed $query\n" . mysql_error( $us3_link ) );
638 return 0;
639 }
640
641 list( $requestID, $updateTime ) = mysql_fetch_array( $result );
642 mysql_close( $us3_link );
643
644 return $requestID;
645}
646
647// Function to determine if this is a gfac job or not
648function is_gfac_job( $gfacID )
649{
650 $hex = "[0-9a-fA-F]";
651 if ( ! preg_match( "/^US3-Experiment/i", $gfacID ) &&
652 ! preg_match( "/^US3-$hex{8}-$hex{4}-$hex{4}-$hex{4}-$hex{12}$/", $gfacID ) )
653 {
654 // Then it's not a GFAC job
655 return false;
656 }
657
658 return true;
659}
660
661// Function to determine if this is an airavata/thrift job or not
662function is_aira_job( $gfacID )
663{
664 global $cluster;
665
666 if ( preg_match( "/US3-A/i", $gfacID ) )
667 {
668 // Then it's an Airavata/Thrift job
669 return true;
670 }
671
672 return false;
673}
674
675// Function to get the current job status from GFAC
676function get_gfac_status( $gfacID )
677{
678 global $serviceURL;
679 global $self;
680 global $loghdr;
681 global $cluster;
682 global $status_ex, $status_gw;
683
684 if ( is_aira_job( $gfacID ) )
685 {
686 $status_ex = getExperimentStatus( $gfacID );
687
688 if ( $status_ex == 'EXECUTING' )
689 {
690 if ( $status_gw == 'RUNNING' )
691 $status_ex = 'ACTIVE';
692 else
693 $status_ex = 'QUEUED';
694 }
695
696 $gfac_status = standard_status( $status_ex );
697write_log( "$loghdr get_gfac_status: status_ex=$status_ex gfac_status=$gfac_status" );
698 return $gfac_status;
699 }
700
701 else if ( ! is_gfac_job( $gfacID ) )
702 return false;
703
704 $url = "$serviceURL/jobstatus/$gfacID";
705 try
706 {
707 $post = new HttpRequest( $url, HttpRequest::METH_GET );
708 $http = $post->send();
709 $xml = $post->getResponseBody();
710 }
711 catch ( HttpException $e )
712 {
713 write_log( "$loghdr Status not available - marking failed - $gfacID" );
714 return 'GFAC_STATUS_UNAVAILABLE';
715 }
716
717 // Parse the result
718 $gfac_status = parse_response( $xml );
719
720 // This may not seem like the best place to do this, but here we have
721 // the xml straight from GFAC
722 $status_types = array('SUBMITTED',
723 'SUBMITED',
724 'INITIALIZED',
725 'PENDING',
726 'RUNNING',
727 'ACTIVE',
728 'STARTED',
729 'COMPLETED',
730 'FINISHED',
731 'DONE',
732 'DATA',
733 'RESULTS_GEN',
734 'CANCELED',
735 'CANCELLED',
736 'FAILED',
737 'STAGING',
738 'UNKNOWN');
739 if ( ! in_array( $gfac_status, $status_types ) )
740 mail_to_admin( 'debug', "gfacID: /$gfacID/\n" .
741 "XML: /$xml/\n" .
742 "Status: /$gfac_status/\n" );
743
744 if ( in_array( $gfac_status, array( 'DONE', 'DATA', 'RESULTS_GEN' ) ) )
745 $gfac_status = 'DATA';
746
747 return $gfac_status;
748}
749
750// Function to request data outputs from GFAC
751function get_gfac_outputs( $gfacID )
752{
753 global $serviceURL;
754 global $self;
755
756 // Make sure it's a GFAC job and status is appropriate for this call
757 if ( ( $job_status = get_gfac_status( $gfacID ) ) === false )
758 {
759 // Then it's not a GFAC job
760 return false;
761 }
762
763 if ( ! in_array( $job_status, array( 'DONE', 'FAILED', 'COMPLETE', 'FINISHED' ) ) )
764 {
765 // Then it's not appropriate to request data
766 return false;
767 }
768
769 $url = "$serviceURL/registeroutput/$gfacID";
770 try
771 {
772 $post = new HttpRequest( $url, HttpRequest::METH_GET );
773 $http = $post->send();
774 $xml = $post->getResponseBody();
775 }
776 catch ( HttpException $e )
777 {
778 write_log( "$self: Data not available - request failed - $gfacID" );
779 return false;
780 }
781
782 mail_to_admin( "debug", "get_gfac_outputs/\n$xml/" ); // Temporary, to see what the xml looks like,
783 // if we ever get one
784
785 // Parse the result
786 $gfac_status = parse_response( $xml );
787
788 return $gfac_status;
789}
790
791function parse_response( $xml )
792{
793 global $gfac_message;
794
795 $status = "";
796 $gfac_message = "";
797
798 $parser = new XMLReader();
799 $parser->xml( $xml );
800
801 while( $parser->read() )
802 {
803 $type = $parser->nodeType;
804
805 if ( $type == XMLReader::ELEMENT )
806 $name = $parser->name;
807
808 else if ( $type == XMLReader::TEXT )
809 {
810 if ( $name == "status" )
811 $status = $parser->value;
812 else
813 $gfac_message = $parser->value;
814 }
815 }
816
817 $parser->close();
818 return $status;
819}
820
821// Function to get status from local cluster
822function get_local_status( $gfacID )
823{
824 global $cluster;
825 global $self;
826
827 $system = "$cluster.uthscsa.edu";
828 $system = preg_replace( "/\-local/", "", $system );
829 $cmd = "/usr/bin/ssh -x us3@$system qstat -a $gfacID 2>&1";
830
831 $result = exec( $cmd );
832
833 if ( $result == "" || preg_match( "/^qstat: Unknown/", $result ) )
834 {
835 write_log( "$self get_local_status: Local job $gfacID unknown" );
836 return 'UNKNOWN';
837 }
838
839 $values = preg_split( "/\s+/", $result );
840// write_log( "$self: get_local_status: job status = /{$values[9]}/");
841 switch ( $values[ 9 ] )
842 {
843 case "W" : // Waiting for execution time to be reached
844 case "E" : // Job is exiting after having run
845 case "R" : // Still running
846 $status = 'ACTIVE';
847 break;
848
849 case "C" : // Job has completed
850 $status = 'COMPLETED';
851 break;
852
853 case "T" : // Job is being moved
854 case "H" : // Held
855 case "Q" : // Queued
856 $status = 'SUBMITTED';
857 break;
858
859 default :
860 $status = 'UNKNOWN'; // This should not occur
861 break;
862 }
863
864 return $status;
865}
866
867function update_queue_messages( $message )
868{
869 global $self;
870 global $gLink;
871 global $gfacID;
872
873 // Get analysis table ID
874 $query = "SELECT id FROM analysis " .
875 "WHERE gfacID = '$gfacID' ";
876 $result = mysql_query( $query, $gLink );
877 if ( ! $result )
878 {
879 write_log( "$self: Query failed $query - " . mysql_error( $gLink ) );
880 return;
881 }
882 list( $analysisID ) = mysql_fetch_array( $result );
883
884 // Insert message into queue_message table
885 $query = "INSERT INTO queue_messages SET " .
886 "message = '" . mysql_real_escape_string( $message, $gLink ) . "', " .
887 "analysisID = '$analysisID' ";
888 $result = mysql_query( $query, $gLink );
889 if ( ! $result )
890 {
891 write_log( "$self: Query failed $query - " . mysql_error( $gLink ) );
892 return;
893 }
894}
895
896function update_db( $message )
897{
898 global $self;
899 global $gfacID;
900 global $dbhost;
901 global $user;
902 global $passwd;
903 global $us3_db;
904
905 $us3_link = mysql_connect( $dbhost, $user, $passwd );
906
907 if ( ! $us3_link )
908 {
909 write_log( "$self: could not connect: $dbhost, $user, $passwd" );
910 mail_to_admin( "fail", "Could not connect to $dbhost" );
911 return 0;
912 }
913
914
915 $result = mysql_select_db( $us3_db, $us3_link );
916
917 if ( ! $result )
918 {
919 write_log( "$self: could not select DB $us3_db" );
920 mail_to_admin( "fail", "Could not select DB $us3_db, $dbhost, $user, $passwd" );
921 return 0;
922 }
923
924 $query = "UPDATE HPCAnalysisResult SET " .
925 "lastMessage='" . mysql_real_escape_string( $message, $us3_link ) . "'" .
926 "WHERE gfacID = '$gfacID' ";
927
928 mysql_query( $query, $us3_link );
929 mysql_close( $us3_link );
930}
931
932function mail_to_admin( $type, $msg )
933{
934 global $updateTime;
935 global $status;
936 global $cluster;
937 global $org_name;
938 global $admin_email;
939 global $dbhost;
940 global $requestID;
941
942 $headers = "From: $org_name Admin<$admin_email>" . "\n";
943 $headers .= "Cc: $org_name Admin<$admin_email>" . "\n";
944 $headers .= "Bcc: Gary Gorbet<gegorbet@gmail.com>" . "\n"; // make sure
945
946 // Set the reply address
947 $headers .= "Reply-To: $org_name<$admin_email>" . "\n";
948 $headers .= "Return-Path: $org_name<$admin_email>" . "\n";
949
950 // Try to avoid spam filters
951 $now = time();
952 $headers .= "Message-ID: <" . $now . "gridctl@$dbhost>$requestID\n";
953 $headers .= "X-Mailer: PHP v" . phpversion() . "\n";
954 $headers .= "MIME-Version: 1.0" . "\n";
955 $headers .= "Content-Transfer-Encoding: 8bit" . "\n";
956
957 $subject = "US3 Error Notification";
958 $message = "
959 UltraScan job error notification from gridctl.php:
960
961 Update Time : $updateTime
962 GFAC Status : $status
963 Cluster : $cluster
964 ";
965
966 $message .= "Error Message : $msg\n";
967
968 mail( $admin_email, $subject, $message, $headers );
969}
970
971// Convert a status string to one of the standard DB status strings
972function standard_status( $status_in )
973{
974 switch ( $status_in )
975 { // Map variations to standard gateway status values
976 case 'QUEUED' :
977 case 'LAUNCHED' :
978 case 'CREATED' :
979 case 'VALIDATED' :
980 case 'SCHEDULED' :
981 case 'submitted' :
982 case '' :
983 $status = 'SUBMITTED';
984 break;
985
986 case 'EXECUTING' :
987 case 'ACTIVE' :
988 case 'running' :
989 case 'executing' :
990 $status = 'RUNNING';
991 break;
992
993 case 'PENDING' :
994 case 'CANCELING' :
995 $status = 'UPDATING';
996 break;
997
998 case 'CANCELLED' :
999 case 'canceled' :
1000 $status = 'CANCELED';
1001 break;
1002
1003 case 'COMPLETED' :
1004 case 'completed' :
1005 $status = 'COMPLETE';
1006 break;
1007
1008 case 'FAILED_DATA' :
1009 case 'SUBMIT_TIMEOUT' :
1010 case 'RUN_TIMEOUT' :
1011 case 'DATA_TIMEOUT' :
1012 $status = 'FAILED';
1013 break;
1014
1015 case 'COMPLETE' :
1016 $status = 'DONE';
1017 break;
1018
1019 case 'UNKNOWN' :
1020 $status = 'ERROR';
1021 break;
1022
1023 // Where already standard value, retain value
1024 case 'ERROR' :
1025 case 'RUNNING' :
1026 case 'SUBMITTED' :
1027 case 'UPDATING' :
1028 case 'CANCELED' :
1029 case 'DATA' :
1030 case 'FAILED' :
1031 case 'DONE' :
1032 case 'FINISHED' :
1033 default :
1034 $status = $status_in;
1035 break;
1036 }
1037
1038 return $status;
1039}
1040
1041function aira_status( $gfacID, $status_in )
1042{
1043 global $self;
1044 global $loghdr;
1045 global $class_dir;
1046//echo "a_st: st_in$status_in : $gfacID\n";
1047 //$status_gw = standard_status( $status_in );
1048 $status_gw = $status_in;
1049//echo "a_st: st_db=$status_gw\n";
1050 $status = $status_gw;
1051 $me_devel = preg_match( "/class_devel/", $class_dir );
1052 $job_devel = preg_match( "/US3-ADEV/i", $gfacID );
1053 $devmatch = ( ( !$me_devel && !$job_devel ) ||
1054 ( $me_devel && $job_devel ) );
1055
1056//write_log( "$loghdr gfacID=$gfacID devmatch=$devmatch" );
1057//write_log( "$loghdr me_d=$me_devel jo_d=$job_devel cd=$class_dir" );
1058 if ( preg_match( "/US3-A/i", $gfacID ) && $devmatch )
1059 { // Airavata job and development/production type is right
1060 $status_ex = getExperimentStatus( $gfacID );
1061//write_log( "$loghdr status_ex $status_ex" );
1062
1063 if ( $status_ex == 'COMPLETED' )
1064 { // Experiment is COMPLETED: check for 'FINISHED' or 'DONE'
1065 if ( $status_gw == 'FINISHED' || $status_gw == 'DONE' )
1066 { // COMPLETED + FINISHED/DONE : gateway status is now COMPLETE
1067 $status = 'COMPLETE';
1068 }
1069
1070 else
1071 { // COMPLETED + NOT-FINISHED/DONE: gw status now DONE
1072 $status = 'DONE';
1073 }
1074 }
1075
1076 else if ( $status_gw == 'FINISHED' || $status_gw == 'DONE' )
1077 { // Gfac status == FINISHED/DONE: leave as is (unless FAILED)
1078 $status = $status_gw;
1079 if ( $status_ex == 'FAILED' )
1080 {
1081 sleep( 10 );
1082 $status_ex = getExperimentStatus( $gfacID );
1083 if ( $status_ex == 'FAILED' )
1084 {
1085 write_log( "$loghdr status still 'FAILED' after 10-second delay" );
1086 sleep( 10 );
1087 $status_ex = getExperimentStatus( $gfacID );
1088 if ( $status_ex == 'FAILED' )
1089 write_log( "$loghdr status still 'FAILED' after 20-second delay" );
1090 else
1091 write_log( "$loghdr status is $status_ex after 20-second delayed retry" );
1092 }
1093 write_log( "$loghdr status reset to 'COMPLETE'" );
1094 $status = 'COMPLETE';
1095 }
1096 }
1097
1098 else
1099 { // Experiment not COMPLETED/FINISHED/DONE: use experiment status
1100 $status = standard_status( $status_ex );
1101 }
1102
1103if(preg_match("/US3-A/i",$gfacID))
1104//if(preg_match("/US3-ADEV/i",$gfacID))
1105write_log( "$loghdr status/_in/_gw/_ex=$status/$status_in/$status_gw/$status_ex" );
1106//write_log( "$loghdr status/_in/_gw/_ex=$status/$status_in/$status_gw/$status_ex" );
1107//write_log( " me_d=$me_devel jo_d=$job_devel dm=$devmatch cd=$class_dir" );
1108 if ( $status != $status_gw )
1109 {
1110 update_job_status( $status, $gfacID );
1111 }
1112 }
1113
1114 return $status;
1115}
1116
1117?>
Note: See TracBrowser for help on using the repository browser.