source: trunk/gridctl.php@ 39

Last change on this file since 39 was 39, checked in by gegorbet, 5 years ago

analType and other minor enhancements

File size: 32.0 KB
Line 
1<?php
2
3$us3bin = exec( "ls -d ~us3/lims/bin" );
4include_once "$us3bin/listen-config.php";
5//include "$us3bin/cleanup_aira.php";
6//include "$us3bin/cleanup_gfac.php";
7
8// Global variables
9$gfac_message = "";
10$updateTime = 0;
11$submittime = 0;
12$cluster = '';
13
14//global $self;
15global $status_ex, $status_gw;
16
17// Produce some output temporarily, so cron will send me message
18$now = time();
19echo "Time started: " . date( 'Y-m-d H:i:s', $now ) . "\n";
20
21// Get data from global GFAC DB
22$gLink = mysqli_connect( $dbhost, $guser, $gpasswd, $gDB );
23
24if ( ! $gLink )
25{
26 write_log( "$self: Could not select DB $gDB - " . mysqli_error($gLink) );
27 //mail_to_admin( "fail", "Internal Error: Could not select DB $gDB" );
28 mail_to_admin( "fail",
29 "Internal Error: Could not select DB $gDB $dbhost $guser " );
30 //sleep(300);
31 sleep(3);
32 exit();
33}
34
35$query = "SELECT gfacID, us3_db, cluster, status, queue_msg, " .
36 "UNIX_TIMESTAMP(time), time from analysis";
37$result = mysqli_query( $gLink, $query );
38
39if ( ! $result )
40{
41 write_log( "$self: Query failed $query - " . mysqli_error( $gLink ) );
42 mail_to_admin( "fail", "Query failed $query\n" . mysqli_error( $gLink ) );
43 exit();
44}
45
46if ( mysqli_num_rows( $result ) == 0 )
47{
48//write_log( "$self: analysis read got numrows==0" );
49 exit(); // Nothing to do
50}
51//write_log( "$loghdr gfac-analysis rows $nrows" );
52
53$me_devel = preg_match( "/class_devel/", $class_dir );
54//echo "me_devel=$me_devel class_dir=$class_dir\n";
55
56while ( list( $gfacID, $us3_db, $cluster, $status, $queue_msg, $time, $updateTime )
57 = mysqli_fetch_array( $result ) )
58{
59 // If this entry does not match class/class_devel, skip processing
60//echo " gfacID=$gfacID gf_status=$status\n";
61
62 if ( preg_match( "/US3-A/i", $gfacID ) )
63 { // For thrift, job and gridctl must match
64 $job_devel = preg_match( "/US3-ADEV/i", $gfacID );
65//echo " THR: job_devel=$job_devel\n";
66 if ( ( $me_devel && !$job_devel ) ||
67 ( !$me_devel && $job_devel ) )
68 { // Job type and Airavata server mismatch: skip processing
69 continue;
70 }
71 }
72
73 else if ( $me_devel )
74 { // Local (us3iab/-local) and class_devel: skip processing
75//echo " LOC: me_devel=$me_devel\n";
76 continue;
77 }
78
79 // Checking we need to do for each entry
80echo "us3db=$us3_db gfid=$gfacID\n";
81//write_log( " us3db=$us3_db gfid=$gfacID" );
82 switch ( $us3_db )
83 {
84 case 'Xuslims3_cauma3' :
85 case 'Xuslims3_cauma3d' :
86 case 'Xuslims3_HHU' :
87 case 'Xuslims3_Uni_KN' :
88 $serviceURL = "http://gridfarm005.ucs.indiana.edu:9090/ogce-rest/job";
89 break;
90
91 default :
92// $serviceURL = "http://gridfarm005.ucs.indiana.edu:8080/ogce-rest/job";
93 break;
94 }
95
96// $awork = array();
97// $awork = explode( "-", $gfacID );
98// $gfacLabl = $awork[0] . "-" . $awork[1] . "-" . $awork[2];
99 $gfacLabl = $gfacID;
100 $loghdr = $self . ":" . $gfacLabl . "...:";
101 $status_ex = $status;
102
103 // If entry is for Airvata/Thrift, get the true current status
104
105 if ( is_aira_job( $gfacID ) )
106 {
107 $status_in = $status;
108//write_log( "$loghdr status_in=$status_in" );
109 $status = aira_status( $gfacID, $status_in );
110//echo "$loghdr status_in=$status_in status_ex=$status\n";
111if($status != $status_in )
112 write_log( "$loghdr Set to $status from $status_in" );
113//write_log( "$loghdr aira status=$status" );
114 }
115 else if ( is_gfac_job( $gfacID ) )
116 {
117 $status_gw = $status;
118 $status = get_gfac_status( $gfacID );
119 //if ( $status == 'FINISHED' )
120 if ( $status_gw == 'COMPLETE' )
121 $status = $status_gw;
122//echo "$loghdr status_gw=$status_gw status=$status\n";
123//write_log( "$loghdr non-AThrift status=$status status_gw=$status_gw" );
124 }
125 else
126 {
127//write_log( "$loghdr Local gfacID=$gfacID" );
128 $status_gw = $status;
129 $status = get_local_status( $gfacID );
130 if ( $status_gw == 'COMPLETE' || $status == 'UNKNOWN' )
131 $status = $status_gw;
132//echo "$loghdr status_lo=$status\n";
133//write_log( "$loghdr Local status=$status status_gw=$status_gw" );
134 }
135
136 // Sometimes during testing, the us3_db entry is not set
137 // If $status == 'ERROR' then the condition has been processed before
138 if ( strlen( $us3_db ) == 0 && $status != 'ERROR' )
139 {
140 write_log( "$loghdr GFAC DB is NULL - $gfacID" );
141 mail_to_admin( "fail", "GFAC DB is NULL\n$gfacID" );
142
143 $query2 = "UPDATE analysis SET status='ERROR' WHERE gfacID='$gfacID'";
144 $result2 = mysqli_query( $gLink, $query2 );
145 $status = 'ERROR';
146
147 if ( ! $result2 )
148 write_log( "$loghdr Query failed $query2 - " . mysqli_error( $gLink ) );
149
150 }
151
152//echo " st=$status\n";
153//write_log( "$loghdr switch status=$status" );
154 switch ( $status )
155 {
156 // Already been handled
157 // Later update this condition to search for gfacID?
158 case "ERROR":
159 cleanup();
160 break;
161
162 case "SUBMITTED":
163 submitted( $time );
164 break;
165
166 case "SUBMIT_TIMEOUT":
167 submit_timeout( $time );
168 break;
169
170 case "RUNNING":
171 case "STARTED":
172 case "STAGING":
173 case "ACTIVE":
174 running( $time );
175 break;
176
177 case "RUN_TIMEOUT":
178 run_timeout($time );
179 break;
180
181 case "DATA":
182 case "RESULTS_GEN":
183 wait_data( $time );
184 break;
185
186 case "DATA_TIMEOUT":
187 data_timeout( $time );
188 break;
189
190 case "COMPLETED":
191 case "COMPLETE":
192//write_log( "$loghdr COMPLETE gfacID=$gfacID" );
193 complete();
194 break;
195
196 case "CANCELLED":
197 case "CANCELED":
198 case "FAILED":
199 failed();
200 break;
201
202 case "FINISHED":
203 case "DONE":
204 if ( ! is_aira_job( $gfacID ) )
205 {
206 complete();
207 }
208write_log( "$loghdr FINISHED gfacID=$gfacID" );
209 case "PROCESSING":
210 default:
211 break;
212 }
213}
214mysqli_close( $gLink );
215
216exit();
217
218function submitted( $updatetime )
219{
220 global $self;
221 global $gLink;
222 global $gfacID;
223 global $loghdr;
224
225 $now = time();
226
227 if ( $updatetime + 600 > $now ) return; // < 10 minutes ago
228
229 if ( $updatetime + 86400 > $now ) // Within the first 24 hours
230 {
231 if ( ( $job_status = get_gfac_status( $gfacID ) ) === false )
232 $job_status = get_local_status( $gfacID );
233
234 if ( $job_status == 'GFAC_STATUS_UNAVAILABLE' )
235 return;
236
237 if ( ! in_array( $job_status, array( 'SUBMITTED', 'INITIALIZED', 'PENDING' ) ) )
238 {
239write_log( "$loghdr submitted:job_status=$job_status" );
240 update_job_status( $job_status, $gfacID );
241 }
242
243 return;
244 }
245
246 $message = "Job listed submitted longer than 24 hours";
247 write_log( "$self: $message - id: $gfacID" );
248 mail_to_admin( "hang", "$message - id: $gfacID" );
249 $query = "UPDATE analysis SET status='SUBMIT_TIMEOUT' WHERE gfacID='$gfacID'";
250 $result = mysqli_query( $gLink, $query );
251
252 if ( ! $result )
253 write_log( "$self: Query failed $query - " . mysqli_error( $gLink ) );
254
255 update_queue_messages( $message );
256 update_db( $message );
257}
258
259function submit_timeout( $updatetime )
260{
261 global $self;
262 global $gLink;
263 global $gfacID;
264 global $loghdr;
265
266 if ( ( $job_status = get_gfac_status( $gfacID ) ) === false )
267 $job_status = get_local_status( $gfacID );
268
269 if ( $job_status == 'GFAC_STATUS_UNAVAILABLE' )
270 return;
271
272 if ( ! in_array( $job_status, array( 'SUBMITTED', 'INITIALIZED', 'PENDING' ) ) )
273 {
274 update_job_status( $job_status, $gfacID );
275 return;
276 }
277
278 $now = time();
279
280 if ( $updatetime + 86400 > $now ) return; // < 24 hours ago ( 48 total submitted )
281
282 $message = "Job listed submitted longer than 48 hours";
283 write_log( "$self: $message - id: $gfacID" );
284 mail_to_admin( "hang", "$message - id: $gfacID" );
285 $query = "UPDATE analysis SET status='FAILED' WHERE gfacID='$gfacID'";
286 $result = mysqli_query( $gLink, $query );
287
288 if ( ! $result )
289 write_log( "$self: Query failed $query - " . mysqli_error( $gLink ) );
290
291 update_queue_messages( $message );
292 update_db( $message );
293}
294
295function running( $updatetime )
296{
297 global $self;
298 global $gLink;
299 global $gfacID;
300 global $loghdr;
301
302 $now = time();
303
304 get_us3_data();
305
306 if ( $updatetime + 600 > $now ) return; // message received < 10 minutes ago
307
308 if ( $updatetime + 86400 > $now ) // Within the first 24 hours
309 {
310 if ( ( $job_status = get_gfac_status( $gfacID ) ) === false )
311 $job_status = get_local_status( $gfacID );
312
313 if ( $job_status == 'GFAC_STATUS_UNAVAILABLE' )
314 return;
315
316 if ( ! in_array( $job_status, array( 'ACTIVE', 'RUNNING', 'STARTED' ) ) )
317 update_job_status( $job_status, $gfacID );
318
319 return;
320 }
321
322 $message = "Job listed running longer than 24 hours";
323 write_log( "$self: $message - id: $gfacID" );
324 mail_to_admin( "hang", "$message - id: $gfacID" );
325 $query = "UPDATE analysis SET status='RUN_TIMEOUT' WHERE gfacID='$gfacID'";
326 $result = mysqli_query( $gLink, $query );
327
328 if ( ! $result )
329 write_log( "$self: Query failed $query - " . mysqli_error( $gLink ) );
330
331 update_queue_messages( $message );
332 update_db( $message );
333}
334
335function run_timeout( $updatetime )
336{
337 global $self;
338 global $gLink;
339 global $gfacID;
340 global $loghdr;
341
342 if ( ( $job_status = get_gfac_status( $gfacID ) ) === false )
343 $job_status = get_local_status( $gfacID );
344
345 if ( $job_status == 'GFAC_STATUS_UNAVAILABLE' )
346 return;
347
348 if ( ! in_array( $job_status, array( 'ACTIVE', 'RUNNING', 'STARTED' ) ) )
349 {
350 update_job_status( $job_status, $gfacID );
351 return;
352 }
353
354 $now = time();
355
356 get_us3_data();
357
358 if ( $updatetime + 172800 > $now ) return; // < 48 hours ago
359
360 $message = "Job listed running longer than 48 hours";
361 write_log( "$self: $message - id: $gfacID" );
362 mail_to_admin( "hang", "$message - id: $gfacID" );
363 $query = "UPDATE analysis SET status='FAILED' WHERE gfacID='$gfacID'";
364 $result = mysqli_query( $gLink, $query );
365
366 if ( ! $result )
367 write_log( "$self: Query failed $query - " . mysqli_error( $gLink ) );
368
369 update_queue_messages( $message );
370 update_db( $message );
371}
372
373function wait_data( $updatetime )
374{
375 global $self;
376 global $gLink;
377 global $gfacID;
378 global $loghdr;
379
380 $now = time();
381
382 if ( $updatetime + 3600 > $now ) // < Within the first hour
383 {
384 if ( ( $job_status = get_gfac_status( $gfacID ) ) === false )
385 $job_status = get_local_status( $gfacID );
386
387 if ( $job_status == 'GFAC_STATUS_UNAVAILABLE' )
388 return;
389
390 if ( $job_status != 'DATA' )
391 {
392 update_job_status( $job_status, $gfacID );
393 return;
394 }
395
396 // Request to resend data, but only request every 5 minutes
397 $minute = date( 'i' ) * 1; // Makes it an int
398 if ( $minute % 5 ) return;
399
400 $output_status = get_gfac_outputs( $gfacID );
401
402 if ( $output_status !== false )
403 mail_to_admin( "debug", "wait_data/$gfacID/$output_status" );
404
405 return;
406 }
407
408 $message = "Waiting for data longer than 1 hour";
409 write_log( "$self: $message - id: $gfacID" );
410 mail_to_admin( "hang", "$message - id: $gfacID" );
411 $query = "UPDATE analysis SET status='DATA_TIMEOUT' WHERE gfacID='$gfacID'";
412 $result = mysqli_query( $gLink, $query );
413
414 if ( ! $result )
415 write_log( "$self: Query failed $query - " . mysqli_error( $gLink ) );
416
417 update_queue_messages( $message );
418 update_db( $message );
419}
420
421function data_timeout( $updatetime )
422{
423 global $self;
424 global $gLink;
425 global $gfacID;
426 global $loghdr;
427
428 if ( ( $job_status = get_gfac_status( $gfacID ) ) === false )
429 $job_status = get_local_status( $gfacID );
430
431 if ( $job_status == 'GFAC_STATUS_UNAVAILABLE' )
432 return;
433
434 if ( $job_status != 'DATA' )
435 {
436 update_job_status( $job_status, $gfacID );
437 return;
438 }
439
440 $now = time();
441
442 if ( $updatetime + 86400 > $now ) // < 24 hours ago
443 {
444 // Request to resend data, but only request every 15 minutes
445 $minute = date( 'i' ) * 1; // Makes it an int
446 if ( $minute % 15 ) return;
447
448 $output_status = get_gfac_outputs( $gfacID );
449
450 if ( $output_status !== false )
451 mail_to_admin( "debug", "data_timeout/$gfacID/$output_status" );
452
453 return;
454 }
455
456 $message = "Waiting for data longer than 24 hours";
457 write_log( "$self: $message - id: $gfacID" );
458 mail_to_admin( "hang", "$message - id: $gfacID" );
459 $query = "UPDATE analysis SET status='FAILED' WHERE gfacID='$gfacID'";
460 $result = mysqli_query( $gLink, $query );
461
462 if ( ! $result )
463 write_log( "$self: Query failed $query - " . mysqli_error( $gLink ) );
464
465 update_queue_messages( $message );
466 update_db( $message );
467}
468
469function complete()
470{
471 // Just cleanup
472 cleanup();
473}
474
475function failed()
476{
477 // Just cleanup
478 cleanup();
479}
480
481function cleanup()
482{
483 global $self;
484 global $gLink;
485 global $gfacID;
486 global $us3_db;
487 global $loghdr;
488 global $class_dir;
489
490 // Double check that the gfacID exists
491 $query = "SELECT count(*) FROM analysis WHERE gfacID='$gfacID'";
492 $result = mysqli_query( $gLink, $query );
493
494 if ( ! $result )
495 {
496 write_log( "$self: Query failed $query - " . mysqli_error( $gLink ) );
497 mail_to_admin( "fail", "Query failed $query\n" . mysqli_error( $gLink ) );
498 return;
499 }
500
501 list( $count ) = mysqli_fetch_array( $result );
502
503//if ($count==0)
504//write_log( "$loghdr count = $count gfacID = $gfacID" );
505 if ( $count == 0 ) return;
506
507 // Now check the us3 instance
508 $requestID = get_us3_data();
509//write_log( "$loghdr requestID = $requestID gfacID = $gfacID" );
510 if ( $requestID == 0 ) return;
511
512 $me_devel = preg_match( "/class_devel/", $class_dir );
513 $me_local = preg_match( "/class_local/", $class_dir );
514
515 if ( preg_match( "/US3-A/i", $gfacID ) )
516 { // Airavata job: clean up if prod/devel match
517 $job_devel = preg_match( "/US3-ADEV/i", $gfacID );
518 if ( ( !$me_devel && !$job_devel ) ||
519 ( $me_devel && $job_devel ) )
520 { // Job is of same type (prod/devel) as Server: process it
521//write_log( "$loghdr CALLING aira_cleanup()" );
522 aira_cleanup( $us3_db, $requestID, $gLink );
523 }
524//write_log( "$loghdr RTN FR aira_cleanup()" );
525 }
526 else
527 { // Non-airavata job: clean up in a non-aira way
528write_log( "$loghdr calling gfac_cleanup() reqID=$requestID" );
529 gfac_cleanup( $us3_db, $requestID, $gLink );
530 }
531}
532
533// Function to update status of job
534function update_job_status( $job_status, $gfacID )
535{
536 global $gLink;
537 global $query;
538 global $self;
539 global $loghdr;
540
541 switch ( $job_status )
542 {
543 case 'SUBMITTED' :
544 case 'SUBMITED' :
545 case 'INITIALIZED' :
546 case 'UPDATING' :
547 case 'PENDING' :
548 $query = "UPDATE analysis SET status='SUBMITTED' WHERE gfacID='$gfacID'";
549 $message = "Job status request reports job is SUBMITTED";
550 break;
551
552 case 'STARTED' :
553 case 'RUNNING' :
554 case 'ACTIVE' :
555 $query = "UPDATE analysis SET status='RUNNING' WHERE gfacID='$gfacID'";
556 $message = "Job status request reports job is RUNNING";
557 break;
558
559 case 'EXECUTING' :
560 $message = "Job status request reports job is EXECUTING";
561 break;
562
563 case 'FINISHED' :
564 $query = "UPDATE analysis SET status='FINISHED' WHERE gfacID='$gfacID'";
565 $message = "NONE";
566 break;
567
568 case 'DONE' :
569 $query = "UPDATE analysis SET status='DONE' WHERE gfacID='$gfacID'";
570 $message = "NONE";
571 break;
572
573 case 'COMPLETED' :
574 case 'COMPLETE' :
575 $query = "UPDATE analysis SET status='COMPLETE' WHERE gfacID='$gfacID'";
576 $message = "Job status request reports job is COMPLETED";
577 break;
578
579 case 'DATA' :
580 $query = "UPDATE analysis SET status='DATA' WHERE gfacID='$gfacID'";
581 $message = "Job status request reports job is COMPLETE, waiting for data";
582 break;
583
584 case 'CANCELED' :
585 case 'CANCELLED' :
586 $query = "UPDATE analysis SET status='CANCELED' WHERE gfacID='$gfacID'";
587 $message = "Job status request reports job is CANCELED";
588 break;
589
590 case 'FAILED' :
591 $query = "UPDATE analysis SET status='FAILED' WHERE gfacID='$gfacID'";
592 $message = "Job status request reports job is FAILED";
593 break;
594
595 case 'UNKNOWN' :
596write_log( "$loghdr job_status='UNKNOWN', reset to 'ERROR' " );
597 $query = "UPDATE analysis SET status='ERROR' WHERE gfacID='$gfacID'";
598 $message = "Job status request reports job is not in the queue";
599 break;
600
601 default :
602 // We shouldn't ever get here
603 $query = "UPDATE analysis SET status='ERROR' WHERE gfacID='$gfacID'";
604 $message = "Job status was not recognized - $job_status";
605 write_log( "$loghdr update_job_status: " .
606 "Job status was not recognized - $job_status\n" .
607 "gfacID = $gfacID\n" );
608 break;
609
610 }
611
612 $result = mysqli_query( $gLink, $query );
613 if ( ! $result )
614 write_log( "$loghdr Query failed $query - " . mysqli_error( $gLink ) );
615
616 if ( $message != 'NONE' )
617 {
618 update_queue_messages( $message );
619 update_db( $message );
620 }
621}
622
623function get_us3_data()
624{
625 global $self;
626 global $gfacID;
627 global $dbhost;
628 global $user;
629 global $passwd;
630 global $us3_db;
631 global $updateTime;
632 global $loghdr;
633
634 $us3_link = mysqli_connect( $dbhost, $user, $passwd, $us3_db );
635
636 if ( ! $us3_link )
637 {
638 write_log( "$loghdr could not connect: $dbhost, $user, $passwd, $us3_db" );
639 mail_to_admin( "fail", "Could not connect to $dbhost : $us3_db" );
640 return 0;
641 }
642
643 $query = "SELECT HPCAnalysisRequestID, UNIX_TIMESTAMP(updateTime) " .
644 "FROM HPCAnalysisResult WHERE gfacID='$gfacID'";
645 $result = mysqli_query( $us3_link, $query );
646
647 if ( ! $result )
648 {
649 write_log( "$self: Query failed $query - " . mysqli_error( $us3_link ) );
650 mail_to_admin( "fail", "Query failed $query\n" . mysqli_error( $us3_link ) );
651 return 0;
652 }
653
654 list( $requestID, $updateTime ) = mysqli_fetch_array( $result );
655 mysqli_close( $us3_link );
656
657 return $requestID;
658}
659
660// Function to determine if this is a gfac job or not
661function is_gfac_job( $gfacID )
662{
663 $hex = "[0-9a-fA-F]";
664 if ( ! preg_match( "/^US3-Experiment/i", $gfacID ) &&
665 ! preg_match( "/^US3-$hex{8}-$hex{4}-$hex{4}-$hex{4}-$hex{12}$/", $gfacID ) )
666 {
667 // Then it's not a GFAC job
668 return false;
669 }
670
671 return true;
672}
673
674// Function to determine if this is an airavata/thrift job or not
675function is_aira_job( $gfacID )
676{
677 global $cluster;
678
679 if ( preg_match( "/US3-A/i", $gfacID ) )
680 {
681 // Then it's an Airavata/Thrift job
682 return true;
683 }
684
685 return false;
686}
687
688// Function to get the current job status from GFAC
689function get_gfac_status( $gfacID )
690{
691 global $serviceURL;
692 global $self;
693 global $loghdr;
694 global $cluster;
695 global $status_ex, $status_gw;
696
697 if ( is_aira_job( $gfacID ) )
698 {
699 $status_ex = getExperimentStatus( $gfacID );
700
701 if ( $status_ex == 'EXECUTING' )
702 {
703 if ( $status_gw == 'RUNNING' )
704 $status_ex = 'ACTIVE';
705 else
706 $status_ex = 'QUEUED';
707 }
708
709 $gfac_status = standard_status( $status_ex );
710 return $gfac_status;
711 }
712
713 else if ( ! is_gfac_job( $gfacID ) )
714 {
715 return false;
716 }
717
718 $url = "$serviceURL/jobstatus/$gfacID";
719 try
720 {
721 $post = new HttpRequest( $url, HttpRequest::METH_GET );
722 $http = $post->send();
723 $xml = $post->getResponseBody();
724 }
725 catch ( HttpException $e )
726 {
727 write_log( "$loghdr Status not available - marking failed - $gfacID" );
728 return 'GFAC_STATUS_UNAVAILABLE';
729 }
730
731 // Parse the result
732 $gfac_status = parse_response( $xml );
733
734 // This may not seem like the best place to do this, but here we have
735 // the xml straight from GFAC
736 $status_types = array('SUBMITTED',
737 'SUBMITED',
738 'INITIALIZED',
739 'PENDING',
740 'RUNNING',
741 'ACTIVE',
742 'STARTED',
743 'COMPLETED',
744 'FINISHED',
745 'DONE',
746 'DATA',
747 'RESULTS_GEN',
748 'CANCELED',
749 'CANCELLED',
750 'FAILED',
751 'STAGING',
752 'UNKNOWN');
753 if ( ! in_array( $gfac_status, $status_types ) )
754 mail_to_admin( 'debug', "gfacID: /$gfacID/\n" .
755 "XML: /$xml/\n" .
756 "Status: /$gfac_status/\n" );
757
758 if ( in_array( $gfac_status, array( 'DONE', 'DATA', 'RESULTS_GEN' ) ) )
759 $gfac_status = 'DATA';
760
761 return $gfac_status;
762}
763
764// Function to request data outputs from GFAC
765function get_gfac_outputs( $gfacID )
766{
767 global $serviceURL;
768 global $self;
769
770 // Make sure it's a GFAC job and status is appropriate for this call
771 if ( ( $job_status = get_gfac_status( $gfacID ) ) === false )
772 {
773 // Then it's not a GFAC job
774 $job_status = get_local_status( $gfacID );
775 return $job_status;
776 }
777
778 if ( ! in_array( $job_status, array( 'DONE', 'FAILED', 'COMPLETE', 'FINISHED' ) ) )
779 {
780 // Then it's not appropriate to request data
781 return false;
782 }
783
784 $url = "$serviceURL/registeroutput/$gfacID";
785 try
786 {
787 $post = new HttpRequest( $url, HttpRequest::METH_GET );
788 $http = $post->send();
789 $xml = $post->getResponseBody();
790 }
791 catch ( HttpException $e )
792 {
793 write_log( "$self: Data not available - request failed - $gfacID" );
794 return false;
795 }
796
797 mail_to_admin( "debug", "get_gfac_outputs/\n$xml/" ); // Temporary, to see what the xml looks like,
798 // if we ever get one
799
800 // Parse the result
801 $gfac_status = parse_response( $xml );
802
803 return $gfac_status;
804}
805
806function parse_response( $xml )
807{
808 global $gfac_message;
809
810 $status = "";
811 $gfac_message = "";
812
813 $parser = new XMLReader();
814 $parser->xml( $xml );
815
816 while( $parser->read() )
817 {
818 $type = $parser->nodeType;
819
820 if ( $type == XMLReader::ELEMENT )
821 $name = $parser->name;
822
823 else if ( $type == XMLReader::TEXT )
824 {
825 if ( $name == "status" )
826 $status = $parser->value;
827 else
828 $gfac_message = $parser->value;
829 }
830 }
831
832 $parser->close();
833 return $status;
834}
835
836// Function to get status from local cluster
837function get_local_status( $gfacID )
838{
839 global $cluster;
840 global $self;
841
842 $is_jetstr = preg_match( "/jetstream/", $cluster );
843 if ( $is_jetstr )
844 $cmd = "squeue -j $gfacID 2>&1|tail -n 1";
845 else
846 $cmd = "/usr/bin/qstat -a $gfacID 2>&1|tail -n 1";
847//write_log( "$self cmd: $cmd" );
848//write_log( "$self cluster: $cluster" );
849//write_log( "$self gfacID: $gfacID" );
850
851 if ( ! preg_match( "/us3iab/", $cluster ) )
852 {
853 $system = "$cluster.uthscsa.edu";
854 if ( $is_jetstr )
855 $system = "$cluster";
856 $system = preg_replace( "/\-local/", "", $system );
857 $cmd = "/usr/bin/ssh -x us3@$system " . $cmd;
858//write_log( "$self cmd: $cmd" );
859 }
860
861 $result = exec( $cmd );
862//write_log( "$self result: $result" );
863
864 $secwait = 2;
865 $num_try = 0;
866 // Sleep and retry up to 3 times if ssh has "ssh_exchange_identification" error
867 while ( preg_match( "/ssh_exchange_id/", $result ) && $num_try < 3 )
868 {
869 sleep( $secwait );
870 $num_try++;
871 $secwait *= 2;
872write_log( "$me: num_try=$num_try secwait=$secwait" );
873 }
874
875 if ( preg_match( "/^qstat: Unknown/", $result ) ||
876 preg_match( "/ssh_exchange_id/", $result ) )
877 {
878 write_log( "$self get_local_status: Local job $gfacID unknown result=$result" );
879 return 'UNKNOWN';
880 }
881
882 $values = preg_split( "/\s+/", $result );
883 $jstat = ( $is_jetstr == 0 ) ? $values[ 9 ] : $values[ 5 ];
884//write_log( "$self: get_local_status: job status = /$jstat/");
885 switch ( $jstat )
886 {
887 case "W" : // Waiting for execution time to be reached
888 case "E" : // Job is exiting after having run
889 case "R" : // Still running
890 case "CG" : // Job is completing
891 $status = 'ACTIVE';
892 break;
893
894 case "C" : // Job has completed
895 case "ST" : // Job has disappeared
896 case "CD" : // Job has completed
897 $status = 'COMPLETED';
898 break;
899
900 case "T" : // Job is being moved
901 case "H" : // Held
902 case "Q" : // Queued
903 case "PD" : // Queued
904 case "CF" : // Queued
905 $status = 'SUBMITTED';
906 break;
907
908 case "CA" : // Job has been canceled
909 $status = 'CANCELED';
910 break;
911
912 case "F" : // Job has failed
913 case "BF" : // Job has failed
914 case "NF" : // Job has failed
915 case "TO" : // Job has timed out
916 case "" : // Job has disappeared
917 $status = 'FAILED';
918 break;
919
920 default :
921 $status = 'UNKNOWN'; // This should not occur
922 break;
923 }
924
925 return $status;
926}
927
928function update_queue_messages( $message )
929{
930 global $self;
931 global $gLink;
932 global $gfacID;
933
934 // Get analysis table ID
935 $query = "SELECT id FROM analysis " .
936 "WHERE gfacID = '$gfacID' ";
937 $result = mysqli_query( $gLink, $query );
938 if ( ! $result )
939 {
940 write_log( "$self: Query failed $query - " . mysqli_error( $gLink ) );
941 return;
942 }
943 list( $analysisID ) = mysqli_fetch_array( $result );
944
945 // Insert message into queue_message table
946 $query = "INSERT INTO queue_messages SET " .
947 "message = '" . mysqli_real_escape_string( $gLink, $message ) . "', " .
948 "analysisID = '$analysisID' ";
949 $result = mysqli_query( $gLink, $query );
950 if ( ! $result )
951 {
952 write_log( "$self: Query failed $query - " . mysqli_error( $gLink ) );
953 return;
954 }
955}
956
957function update_db( $message )
958{
959 global $self;
960 global $gfacID;
961 global $dbhost;
962 global $user;
963 global $passwd;
964 global $us3_db;
965
966 $us3_link = mysqli_connect( $dbhost, $user, $passwd, $us3_db );
967
968 if ( ! $us3_link )
969 {
970 write_log( "$self: could not connect: $dbhost, $user, $passwd" );
971 mail_to_admin( "fail", "Could not connect to $dbhost : $us3_db" );
972 return 0;
973 }
974
975 $query = "UPDATE HPCAnalysisResult SET " .
976 "lastMessage='" . mysqli_real_escape_string( $us3_link, $message ) . "'" .
977 "WHERE gfacID = '$gfacID' ";
978
979 mysqli_query( $us3_link, $query );
980 mysqli_close( $us3_link );
981}
982
983function mail_to_admin( $type, $msg )
984{
985 global $updateTime;
986 global $status;
987 global $cluster;
988 global $org_name;
989 global $admin_email;
990 global $dbhost;
991 global $requestID;
992
993 $headers = "From: $org_name Admin<$admin_email>" . "\n";
994 $headers .= "Cc: $org_name Admin<$admin_email>" . "\n";
995 $headers .= "Bcc: Gary Gorbet<gegorbet@gmail.com>" . "\n"; // make sure
996
997 // Set the reply address
998 $headers .= "Reply-To: $org_name<$admin_email>" . "\n";
999 $headers .= "Return-Path: $org_name<$admin_email>" . "\n";
1000
1001 // Try to avoid spam filters
1002 $now = time();
1003 $headers .= "Message-ID: <" . $now . "gridctl@$dbhost>$requestID\n";
1004 $headers .= "X-Mailer: PHP v" . phpversion() . "\n";
1005 $headers .= "MIME-Version: 1.0" . "\n";
1006 $headers .= "Content-Transfer-Encoding: 8bit" . "\n";
1007
1008 $subject = "US3 Error Notification";
1009 $message = "
1010 UltraScan job error notification from gridctl.php:
1011
1012 Update Time : $updateTime [ now=$now ]
1013 GFAC Status : $status
1014 Cluster : $cluster
1015 ";
1016
1017 $message .= "Error Message : $msg\n";
1018
1019 mail( $admin_email, $subject, $message, $headers );
1020}
1021
1022// Convert a status string to one of the standard DB status strings
1023function standard_status( $status_in )
1024{
1025 switch ( $status_in )
1026 { // Map variations to standard gateway status values
1027 case 'QUEUED' :
1028 case 'LAUNCHED' :
1029 case 'CREATED' :
1030 case 'VALIDATED' :
1031 case 'SCHEDULED' :
1032 case 'submitted' :
1033 case 'SUBMITTED' :
1034 case '' :
1035 $status = 'SUBMITTED';
1036 break;
1037
1038 case 'EXECUTING' :
1039 case 'ACTIVE' :
1040 case 'running' :
1041 case 'executing' :
1042 $status = 'RUNNING';
1043 break;
1044
1045 case 'PENDING' :
1046 case 'CANCELING' :
1047 $status = 'UPDATING';
1048 break;
1049
1050 case 'CANCELLED' :
1051 case 'canceled' :
1052 $status = 'CANCELED';
1053 break;
1054
1055// $status = 'DATA';
1056// break;
1057
1058 case 'COMPLETED' :
1059 case 'completed' :
1060 $status = 'COMPLETE';
1061 break;
1062
1063 case 'FAILED_DATA' :
1064 case 'SUBMIT_TIMEOUT' :
1065 case 'RUN_TIMEOUT' :
1066 case 'DATA_TIMEOUT' :
1067 $status = 'FAILED';
1068 break;
1069
1070 case 'COMPLETE' :
1071 $status = 'DONE';
1072 break;
1073
1074 case 'UNKNOWN' :
1075 $status = 'ERROR';
1076 break;
1077
1078 // Where already standard value, retain value
1079 case 'ERROR' :
1080 case 'RUNNING' :
1081 case 'SUBMITTED' :
1082 case 'UPDATING' :
1083 case 'CANCELED' :
1084 case 'DATA' :
1085 case 'FAILED' :
1086 case 'DONE' :
1087 case 'FINISHED' :
1088 default :
1089 $status = $status_in;
1090 break;
1091 }
1092
1093 return $status;
1094}
1095
1096function aira_status( $gfacID, $status_in )
1097{
1098 global $self;
1099 global $loghdr;
1100 global $class_dir;
1101//echo "a_st: st_in$status_in : $gfacID\n";
1102 //$status_gw = standard_status( $status_in );
1103 $status_gw = $status_in;
1104//echo "a_st: st_db=$status_gw\n";
1105 $status = $status_gw;
1106 $me_devel = preg_match( "/class_devel/", $class_dir );
1107 $job_devel = preg_match( "/US3-ADEV/i", $gfacID );
1108 $devmatch = ( ( !$me_devel && !$job_devel ) ||
1109 ( $me_devel && $job_devel ) );
1110
1111 if ( preg_match( "/US3-A/i", $gfacID ) && $devmatch )
1112 {
1113//write_log( "$loghdr status_in=$status_in status=$status gfacID=$gfacID" );
1114 $status_ex = getExperimentStatus( $gfacID );
1115//write_log( "$loghdr status_ex=$status_ex" );
1116
1117 if ( $status_ex == 'COMPLETED' )
1118 { // Experiment is COMPLETED: check for 'FINISHED' or 'DONE'
1119 if ( $status_gw == 'FINISHED' || $status_gw == 'DONE' )
1120 { // COMPLETED + FINISHED/DONE : gateway status is now COMPLETE
1121 $status = 'COMPLETE';
1122 }
1123
1124 else
1125 { // COMPLETED + NOT-FINISHED/DONE: gw status now DONE
1126 $status = 'DONE';
1127 }
1128 }
1129
1130 else if ( $status_gw == 'FINISHED' || $status_gw == 'DONE' )
1131 { // Gfac status == FINISHED/DONE: leave as is (unless FAILED)
1132 $status = $status_gw;
1133 if ( $status_ex == 'FAILED' )
1134 {
1135 sleep( 10 );
1136 $status_ex = getExperimentStatus( $gfacID );
1137 if ( $status_ex == 'FAILED' )
1138 {
1139 write_log( "$loghdr status still 'FAILED' after 10-second delay" );
1140 sleep( 10 );
1141 $status_ex = getExperimentStatus( $gfacID );
1142 if ( $status_ex == 'FAILED' )
1143 write_log( "$loghdr status still 'FAILED' after 20-second delay" );
1144 else
1145 write_log( "$loghdr status is $status_ex after 20-second delayed retry" );
1146 }
1147 write_log( "$loghdr status reset to 'COMPLETE'" );
1148 $status = 'COMPLETE';
1149 }
1150 }
1151
1152 else if ( $status_ex == 'EXECUTING' )
1153 {
1154 $status = standard_status( $status_gw );
1155write_log( "$loghdr status/_in/_gw/_ex=$status/$status_in/$status_gw/$status_ex" );
1156 }
1157
1158 else
1159 { // Experiment not COMPLETED/FINISHED/DONE: use experiment status
1160 $status = standard_status( $status_ex );
1161 }
1162
1163//if ( $status != 'SUBMITTED' )
1164//write_log( "$loghdr status/_in/_gw/_ex=$status/$status_in/$status_gw/$status_ex" );
1165 if ( $status != $status_gw )
1166 {
1167 update_job_status( $status, $gfacID );
1168 }
1169 }
1170
1171 return $status;
1172}
1173
1174?>
Note: See TracBrowser for help on using the repository browser.