Main Page   Class Hierarchy   Alphabetical List   Compound List   File List   Compound Members   File Members  

direct/src/autorestart/autorestart.c

Go to the documentation of this file.
00001 /* Filename: autorestart.c
00002  * Created by:  drose (05Sep02)
00003  *
00004  * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * *
00005  *
00006  * PANDA 3D SOFTWARE
00007  * Copyright (c) 2001, Disney Enterprises, Inc.  All rights reserved
00008  *
00009  * All use of this software is subject to the terms of the Panda 3d
00010  * Software license.  You should have received a copy of this license
00011  * along with this source code; you will also find a current copy of
00012  * the license at http://www.panda3d.org/license.txt .
00013  *
00014  * To contact the maintainers of this program write to
00015  * panda3d@yahoogroups.com .
00016  *
00017  * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * */
00018 
00019 #include "dtool_config.h"
00020 
00021 #ifndef HAVE_GETOPT
00022 #include "gnu_getopt.h"
00023 #else
00024 #include <getopt.h>
00025 #endif
00026 
00027 #include <stdio.h>
00028 #include <errno.h>
00029 #include <string.h>  /* for strerror */
00030 #include <unistd.h>
00031 #include <sys/types.h>
00032 #include <sys/wait.h>
00033 #include <sys/stat.h>
00034 #include <fcntl.h>
00035 #include <time.h>
00036 #include <signal.h>
00037 #include <stdlib.h>
00038 
00039 char **params = NULL;
00040 char *logfile_name = NULL;
00041 int logfile_fd = -1;
00042 int stop_on_terminate = 0;
00043 
00044 pid_t child_pid = 0;
00045 
00046 #define TIME_BUFFER_SIZE 128
00047 
00048 /* We shouldn't respawn more than (COUNT_RESPAWN - 1) times over
00049    COUNT_RESPAWN_TIME seconds. */
00050 #define COUNT_RESPAWN 5
00051 #define COUNT_RESPAWN_TIME 30
00052 
00053 void
00054 exec_process() {
00055   /* First, output the command line to the log file. */
00056   char **p;
00057   for (p = params; *p != NULL; ++p) {
00058     fprintf(stderr, "%s ", *p);
00059   }
00060   fprintf(stderr, "\n");
00061   execvp(params[0], params);
00062   fprintf(stderr, "Cannot exec %s: %s\n", params[0], strerror(errno));
00063 
00064   /* Exit with a status of 0, to indicate to the parent process that
00065      we should stop. */
00066   exit(0); 
00067 }
00068 
00069 int
00070 spawn_process() {
00071   /* Spawns the child process.  Returns true if the process terminated
00072      by itself and should be respawned, false if it was explicitly
00073      killed (or some other error condition exists), and it should not
00074      respawn any more. */
00075   pid_t wresult;
00076   int status;
00077 
00078   child_pid = fork();
00079   if (child_pid < 0) {
00080     /* Fork error. */
00081     perror("fork");
00082     return 0;
00083   }
00084 
00085   if (child_pid == 0) {
00086     /* Child.  Exec the process. */
00087     fprintf(stderr, "Child pid is %d.\n", getpid());
00088     exec_process();
00089     /* Shouldn't get here. */
00090     exit(1);
00091   }
00092 
00093   /* Parent.  Wait for the child to terminate, then diagnose the reason. */
00094   wresult = waitpid(child_pid, &status, 0);
00095   if (wresult < 0) {
00096     perror("waitpid");
00097     return 0;
00098   }
00099 
00100   /* Now that we've returned from waitpid, clear the child pid number
00101      so our signal handler doesn't get too confused. */
00102   child_pid = 0;
00103 
00104   if (WIFSIGNALED(status)) {
00105     int signal = WTERMSIG(status);
00106     fprintf(stderr, "\nprocess caught signal %d.\n\n", signal);
00107     /* A signal exit is a reason to respawn unless the signal is TERM
00108        or KILL. */
00109     return !stop_on_terminate || (signal != SIGTERM && signal != SIGKILL);
00110 
00111   } else {
00112     int exit_status = WEXITSTATUS(status);
00113     fprintf(stderr, "\nprocess exited with status %d.\n\n", WEXITSTATUS(status));
00114     /* Normal exit is a reason to respawn if the status indicates failure. */
00115     return !stop_on_terminate || (exit_status != 0);
00116   }
00117 }
00118 
00119 void
00120 sigterm_handler() {
00121   pid_t wresult;
00122   int status;
00123   time_t now;
00124   char time_buffer[TIME_BUFFER_SIZE];
00125 
00126   now = time(NULL);
00127   strftime(time_buffer, TIME_BUFFER_SIZE, "%T on %A, %d %b %Y", localtime(&now));
00128 
00129   fprintf(stderr, "\nsigterm caught at %s; shutting down.\n", time_buffer);
00130   if (child_pid == 0) {
00131     fprintf(stderr, "no child process.\n\n");
00132 
00133   } else {
00134     kill(child_pid, SIGTERM);
00135 
00136     wresult = waitpid(child_pid, &status, 0);
00137     if (wresult < 0) {
00138       perror("waitpid");
00139     } else {
00140       fprintf(stderr, "child process terminated.\n\n");
00141     }
00142   }
00143   exit(1);
00144 }
00145 
00146 void
00147 do_autorestart() {
00148   char time_buffer[TIME_BUFFER_SIZE];
00149   time_t now;
00150   time_t count_respawn[COUNT_RESPAWN];
00151   int cri, num_cri;
00152   struct sigaction sa;
00153 
00154   /* Make our process its own process group. */
00155   setpgid(0, 0);
00156 
00157   /* Set up a signal handler to trap SIGTERM. */
00158   sa.sa_handler = sigterm_handler;
00159   sigemptyset(&sa.sa_mask);
00160   sa.sa_flags = 0;
00161   if (sigaction(SIGTERM, &sa, NULL) < 0) {
00162     perror("sigaction");
00163   }
00164 
00165   if (logfile_fd >= 0) {
00166     /* If we have a logfile, dup it onto stdout and stderr. */
00167     dup2(logfile_fd, STDOUT_FILENO);
00168     dup2(logfile_fd, STDERR_FILENO);
00169     close(logfile_fd);
00170   } else {
00171     /* Otherwise, close them. */
00172     close(STDOUT_FILENO);
00173     close(STDERR_FILENO);
00174   }
00175 
00176   /* Make sure stdin is closed. */
00177   close(STDIN_FILENO);
00178 
00179   now = time(NULL);
00180   strftime(time_buffer, TIME_BUFFER_SIZE, "%T on %A, %d %b %Y", localtime(&now));
00181   fprintf(stderr, "autorestart begun at %s.\n", time_buffer);
00182 
00183   cri = 1;
00184   num_cri = 1;
00185   count_respawn[1] = now;
00186   
00187   while (spawn_process()) {
00188     now = time(NULL);
00189 
00190     /* Make sure we're not respawning too fast. */
00191     cri = (cri + 1) % COUNT_RESPAWN;
00192     count_respawn[cri] = now;
00193     if (num_cri < COUNT_RESPAWN) {
00194       num_cri++;
00195     } else {
00196       time_t last = count_respawn[(cri + 1) % COUNT_RESPAWN];
00197       if (now - last < COUNT_RESPAWN_TIME) {
00198         fprintf(stderr, "respawning too fast, giving up.\n");
00199         break;
00200       }
00201     }
00202       
00203     strftime(time_buffer, TIME_BUFFER_SIZE, "%T on %A, %d %b %Y", localtime(&now));
00204     fprintf(stderr, "respawning at %s.\n", time_buffer);
00205   }
00206 
00207   now = time(NULL);
00208   strftime(time_buffer, TIME_BUFFER_SIZE, "%T on %A, %d %b %Y", localtime(&now));
00209   fprintf(stderr, "autorestart terminated at %s.\n", time_buffer);
00210   exit(0);
00211 }
00212 
00213 void
00214 double_fork() {
00215   pid_t child, grandchild, wresult;
00216   int status;
00217 
00218   /* Fork once, then again, to disassociate the child from the command
00219      shell process group. */
00220   child = fork();
00221   if (child < 0) {
00222     /* Failure to fork. */
00223     perror("fork");
00224     exit(1);
00225   }
00226 
00227   if (child == 0) {
00228     /* Child.  Fork again. */
00229     grandchild = fork();
00230     if (grandchild < 0) {
00231       perror("fork");
00232       exit(1);
00233     }
00234 
00235     if (grandchild == 0) {
00236       /* Grandchild.  Begin useful work. */
00237       do_autorestart();
00238       /* Shouldn't get here. */
00239       exit(1);
00240     }
00241 
00242     /* Child.  Report the new pid, then terminate gracefully. */
00243     fprintf(stderr, "Spawned, monitoring pid is %d.\n", grandchild);
00244     exit(0);
00245   }
00246 
00247   /* Parent.  Wait for the child to terminate, then return. */
00248   wresult = waitpid(child, &status, 0);
00249   if (wresult < 0) {
00250     perror("waitpid");
00251     exit(1);
00252   }
00253 
00254   if (!WIFEXITED(status)) {
00255     if (WIFSIGNALED(status)) {
00256       fprintf(stderr, "child caught signal %d unexpectedly.\n", WTERMSIG(status));
00257     } else {
00258       fprintf(stderr, "child exited with status %d.\n", WEXITSTATUS(status));
00259     }
00260     exit(1);
00261   }
00262 }
00263 
00264 void
00265 usage() {
00266   fprintf(stderr,
00267           "\n"
00268           "autorestart [-l logfilename] program [args . . . ]\n\n");
00269 }
00270 
00271 void
00272 help() {
00273   usage();
00274   fprintf(stderr,
00275           "This program is used to run a program as a background task and\n"
00276           "automatically restart it should it terminate for any reason other\n"
00277           "than normal exit or explicit user kill.\n\n"
00278 
00279           "If the program exits with a status of 0, indicating successful\n"
00280           "completion, it is not restarted.\n\n"
00281 
00282           "If the program is terminated via a TERM or KILL signal (e.g. via\n"
00283           "kill [pid] or kill -9 [pid]), it is assumed the user meant for the\n"
00284           "process to stop, and it is not restarted.\n\n");
00285 }
00286 
00287 int 
00288 main(int argc, char *argv[]) {
00289   extern char *optarg;
00290   extern int optind;
00291   /* The initial '+' instructs GNU getopt not to reorder switches. */
00292   static const char *optflags = "+l:th";
00293   int flag;
00294 
00295   flag = getopt(argc, argv, optflags);
00296   while (flag != EOF) {
00297     switch (flag) {
00298     case 'l':
00299       logfile_name = optarg;
00300       break;
00301 
00302     case 't':
00303       stop_on_terminate = 1;
00304       break;
00305 
00306     case 'h':
00307       help();
00308       return 1;
00309 
00310     case '?':
00311     case '+':
00312       usage();
00313       return 1;
00314 
00315     default:
00316       fprintf(stderr, "Unhandled switch: -%c\n", flag);
00317       return 1;
00318     }
00319     flag = getopt(argc, argv, optflags);
00320   }
00321 
00322   argc -= (optind - 1);
00323   argv += (optind - 1);
00324 
00325   if (argc < 2) {
00326     fprintf(stderr, "No program to execute given.\n");
00327     usage();
00328     return 1;
00329   }
00330 
00331   params = &argv[1];
00332 
00333   if (logfile_name != NULL) {
00334     logfile_fd = open(logfile_name, O_WRONLY | O_CREAT | O_TRUNC, 0666);
00335     if (logfile_fd < 0) {
00336       fprintf(stderr, "Cannot write to logfile %s: %s\n", 
00337               logfile_name, strerror(errno));
00338       return 1;
00339     }
00340     fprintf(stderr, "Generating output to %s.\n", logfile_name);
00341   }
00342 
00343   double_fork();
00344 
00345   return 0;
00346 }
00347 

Generated on Fri May 2 01:36:51 2003 for Direct by doxygen1.3