--- afd-1.3.4/src/init_afd/init_afd.c 2007-06-05 11:12:03.000000000 +0200 +++ afd-1.3.5pre4/src/init_afd/init_afd.c 2007-06-05 11:14:59.000000000 +0200 @@ -59,6 +59,9 @@ ** 16.05.2002 H.Kiehl Included a heartbeat counter in AFD_ACTIVE_FILE. ** 25.08.2002 H.Kiehl Addition of process rename_log. ** 12.08.2004 H.Kiehl Replace rec() with system_log(). + ** 05.06.2007 H.Kiehl Systems like HPUX require that we open AFD_ACTIVE + ** file with O_SYNC flag, otherwise the pids are + ** not getting to disk. ** */ DESCR__E_M1 @@ -86,7 +89,7 @@ #define NO_OF_SAVED_CORE_FILES 10 #define FULL_DIR_CHECK_INTERVAL 300 /* Every 5 minutes. */ -/* Global definitions */ +/* Global definitions. */ int sys_log_fd = STDERR_FILENO, afd_cmd_fd, afd_resp_fd, @@ -113,7 +116,8 @@ off_t fra_size, fsa_size; #endif -char *p_work_dir, +char *pid_list, + *p_work_dir, afd_status_file[MAX_PATH_LENGTH], afd_active_file[MAX_PATH_LENGTH]; struct afd_status *p_afd_status; @@ -122,7 +126,7 @@ struct proc_table proc_table[NO_OF_PROCESS]; const char *sys_log_name = SYSTEM_LOG_FIFO; -/* local functions */ +/* Local function prototypes. */ static pid_t make_process(char *, char *, sigset_t *); static void afd_exit(void), check_dirs(char *), @@ -172,7 +176,7 @@ CHECK_FOR_VERSION(argc, argv); - /* First get working directory for the AFD */ + /* First get working directory for the AFD. */ if (get_afd_path(&argc, argv, work_dir) < 0) { exit(INCORRECT); @@ -189,7 +193,7 @@ exit(INCORRECT); } - /* Initialise variables */ + /* Initialise variables. */ p_work_dir = work_dir; (void)strcpy(afd_status_file, work_dir); (void)strcat(afd_status_file, FIFO_DIR); @@ -200,8 +204,8 @@ (void)strcpy(afd_file_dir, work_dir); (void)strcat(afd_file_dir, AFD_FILE_DIR); - /* Make sure that no other AFD is running in this directory */ - if (check_afd_heartbeat(30L, YES) == 1) + /* Make sure that no other AFD is running in this directory. */ + if (check_afd_heartbeat(25L, YES) == 1) { (void)fprintf(stderr, "ERROR : Another AFD is already active.\n"); exit(INCORRECT); @@ -246,11 +250,12 @@ } offset -= (sizeof(unsigned int) + 1 + 1); heartbeat = (unsigned int *)(ptr + offset); + pid_list = ptr; shared_shutdown = ptr + offset + sizeof(unsigned int); *shared_shutdown = 0; *heartbeat = 0; - /* Open and create all fifos */ + /* Open and create all fifos. */ init_fifos_afd(); if ((argc == 2) && (argv[1][0] == '-') && (argv[1][1] == 'n') && @@ -263,7 +268,7 @@ daemon_init(AFD); } - /* Now check if all directories needed are created */ + /* Now check if all directories needed are created. */ check_dirs(work_dir); if ((stat(afd_status_file, &stat_buf) == -1) || @@ -314,9 +319,9 @@ if ((ptr = mmap(0, sizeof(struct afd_status), (PROT_READ | PROT_WRITE), MAP_SHARED, fd, 0)) == (caddr_t) -1) #else - /* Start mapper process that emulates mmap() */ + /* Start mapper process that emulates mmap(). */ proc_table[MAPPER_NO].pid = make_process(MAPPER, work_dir, NULL); - log_pid(proc_table[MAPPER_NO].pid, MAPPER_NO + 1); + *(pid_t *)(pid_list + ((MAPPER_NO + 1) * sizeof(pid_t))) = proc_table[MAPPER_NO].pid; if ((ptr = mmap_emu(0, sizeof(struct afd_status), (PROT_READ | PROT_WRITE), MAP_SHARED, afd_status_file, 0)) == (caddr_t) -1) @@ -427,7 +432,7 @@ break; case DC_NO : /* dir_check */ - log_pid(0, i + 1); + *(pid_t *)(pid_list + ((i + 1) * sizeof(pid_t))) = 0; break; case AFDD_NO : @@ -512,51 +517,52 @@ full_dir_check_time = ((now / FULL_DIR_CHECK_INTERVAL) * FULL_DIR_CHECK_INTERVAL) + FULL_DIR_CHECK_INTERVAL; - /* Initialise communication flag FD <-> AMG */ + /* Initialise communication flag FD <-> AMG. */ p_afd_status->amg_jobs = 0; - /* Start all log process */ + /* Start all log process. */ proc_table[SLOG_NO].pid = make_process(SLOG, work_dir, NULL); - log_pid(proc_table[SLOG_NO].pid, SLOG_NO + 1); + *(pid_t *)(pid_list + ((SLOG_NO + 1) * sizeof(pid_t))) = proc_table[SLOG_NO].pid; *proc_table[SLOG_NO].status = ON; proc_table[RLOG_NO].pid = make_process(RLOG, work_dir, NULL); - log_pid(proc_table[RLOG_NO].pid, RLOG_NO + 1); + *(pid_t *)(pid_list + ((RLOG_NO + 1) * sizeof(pid_t))) = proc_table[RLOG_NO].pid; + *proc_table[RLOG_NO].status = ON; proc_table[TLOG_NO].pid = make_process(TLOG, work_dir, NULL); - log_pid(proc_table[TLOG_NO].pid, TLOG_NO + 1); + *(pid_t *)(pid_list + ((TLOG_NO + 1) * sizeof(pid_t))) = proc_table[TLOG_NO].pid; *proc_table[TLOG_NO].status = ON; proc_table[TDBLOG_NO].pid = make_process(TDBLOG, work_dir, NULL); - log_pid(proc_table[TDBLOG_NO].pid, TDBLOG_NO + 1); + *(pid_t *)(pid_list + ((TDBLOG_NO + 1) * sizeof(pid_t))) = proc_table[TDBLOG_NO].pid; *proc_table[TDBLOG_NO].status = ON; - /* Start process cleaning archive directory */ + /* Start process cleaning archive directory. */ proc_table[AW_NO].pid = make_process(ARCHIVE_WATCH, work_dir, NULL); - log_pid(proc_table[AW_NO].pid, AW_NO + 1); + *(pid_t *)(pid_list + ((AW_NO + 1) * sizeof(pid_t))) = proc_table[AW_NO].pid; *proc_table[AW_NO].status = ON; - /* Start process doing the I/O logging */ + /* Start process doing the I/O logging. */ #ifdef _INPUT_LOG proc_table[IL_NO].pid = make_process(INPUT_LOG_PROCESS, work_dir, NULL); - log_pid(proc_table[IL_NO].pid, IL_NO + 1); + *(pid_t *)(pid_list + ((IL_NO + 1) * sizeof(pid_t))) = proc_table[IL_NO].pid; *proc_table[IL_NO].status = ON; #endif #ifdef _OUTPUT_LOG proc_table[OL_NO].pid = make_process(OUTPUT_LOG_PROCESS, work_dir, NULL); - log_pid(proc_table[OL_NO].pid, OL_NO + 1); + *(pid_t *)(pid_list + ((OL_NO + 1) * sizeof(pid_t))) = proc_table[OL_NO].pid; *proc_table[OL_NO].status = ON; #endif #ifdef _DELETE_LOG proc_table[DL_NO].pid = make_process(DELETE_LOG_PROCESS, work_dir, NULL); - log_pid(proc_table[DL_NO].pid, DL_NO + 1); + *(pid_t *)(pid_list + ((DL_NO + 1) * sizeof(pid_t))) = proc_table[DL_NO].pid; *proc_table[DL_NO].status = ON; #endif #ifdef _PRODUCTION_LOG proc_table[PL_NO].pid = make_process(PRODUCTION_LOG_PROCESS, work_dir, NULL); - log_pid(proc_table[PL_NO].pid, PL_NO + 1); + *(pid_t *)(pid_list + ((PL_NO + 1) * sizeof(pid_t))) = proc_table[PL_NO].pid; *proc_table[PL_NO].status = ON; #endif - /* Tell user at what time the AFD was started */ - log_pid(getpid(), 0); + /* Tell user at what time the AFD was started. */ + *(pid_t *)(pid_list) = getpid(); system_log(CONFIG_SIGN, NULL, 0, "=================> STARTUP <================="); if (gethostname(hostname, 64) == 0) @@ -573,16 +579,16 @@ "AFD configuration: Default age limit %d (sec)", default_age_limit); - /* Start the process AMG */ + /* Start the process AMG. */ proc_table[AMG_NO].pid = make_process(AMG, work_dir, NULL); - log_pid(proc_table[AMG_NO].pid, AMG_NO + 1); + *(pid_t *)(pid_list + ((AMG_NO + 1) * sizeof(pid_t))) = proc_table[AMG_NO].pid; *proc_table[AMG_NO].status = ON; - /* Start TCP info daemon of AFD */ + /* Start TCP info daemon of AFD. */ if (afdd_port > 0) { proc_table[AFDD_NO].pid = make_process(AFDD, work_dir, NULL); - log_pid(proc_table[AFDD_NO].pid, AFDD_NO + 1); + *(pid_t *)(pid_list + ((AFDD_NO + 1) * sizeof(pid_t))) = proc_table[AFDD_NO].pid; *proc_table[AFDD_NO].status = ON; } else @@ -781,7 +787,7 @@ { /* Restart the AMG */ proc_table[AMG_NO].pid = make_process(AMG, work_dir, NULL); - log_pid(proc_table[AMG_NO].pid, AMG_NO + 1); + *(pid_t *)(pid_list + ((AMG_NO + 1) * sizeof(pid_t))) = proc_table[AMG_NO].pid; *proc_table[AMG_NO].status = ON; system_log(ERROR_SIGN, __FILE__, __LINE__, "Have started AMG, that was stopped due to too many jobs in the system!"); @@ -1099,7 +1105,7 @@ proc_table[AMG_NO].pid = make_process(AMG, work_dir, NULL); - log_pid(proc_table[AMG_NO].pid, AMG_NO + 1); + *(pid_t *)(pid_list + ((AMG_NO + 1) * sizeof(pid_t))) = proc_table[AMG_NO].pid; *proc_table[AMG_NO].status = ON; stop_typ = NONE_ID; } @@ -1117,7 +1123,7 @@ { proc_table[FD_NO].pid = make_process(FD, work_dir, NULL); - log_pid(proc_table[FD_NO].pid, FD_NO + 1); + *(pid_t *)(pid_list + ((FD_NO + 1) * sizeof(pid_t))) = proc_table[FD_NO].pid; *proc_table[FD_NO].status = ON; stop_typ = NONE_ID; } @@ -1151,7 +1157,7 @@ { /* Start the AFD_STAT */ proc_table[STAT_NO].pid = make_process(AFD_STAT, work_dir, NULL); - log_pid(proc_table[STAT_NO].pid, STAT_NO + 1); + *(pid_t *)(pid_list + ((STAT_NO + 1) * sizeof(pid_t))) = proc_table[STAT_NO].pid; *proc_table[STAT_NO].status = ON; /* Attach to the FSA */ @@ -1165,7 +1171,7 @@ proc_table[FD_NO].pid = make_process(FD, work_dir, NULL); - log_pid(proc_table[FD_NO].pid, FD_NO + 1); + *(pid_t *)(pid_list + ((FD_NO + 1) * sizeof(pid_t))) = proc_table[FD_NO].pid; *proc_table[FD_NO].status = ON; stop_typ = NONE_ID; @@ -1493,7 +1499,7 @@ " Normal termination of process %s", proc_table[i].proc_name); proc_table[i].pid = 0; - log_pid(0, i + 1); + *(pid_t *)(pid_list + ((i + 1) * sizeof(pid_t))) = 0; *proc_table[i].status = STOPPED; break; @@ -1503,7 +1509,7 @@ case 2 : /* Process has received SIGHUP */ proc_table[i].pid = make_process(proc_table[i].proc_name, p_work_dir, NULL); - log_pid(proc_table[i].pid, i + 1); + *(pid_t *)(pid_list + ((i + 1) * sizeof(pid_t))) = proc_table[i].pid; *proc_table[i].status = ON; system_log(INFO_SIGN, __FILE__, __LINE__, " Have restarted %s. SIGHUP received!", @@ -1513,7 +1519,7 @@ case 3 : /* Shared memory region gone. Restart. */ proc_table[i].pid = make_process(proc_table[i].proc_name, p_work_dir, NULL); - log_pid(proc_table[i].pid, i + 1); + *(pid_t *)(pid_list + ((i + 1) * sizeof(pid_t))) = proc_table[i].pid; *proc_table[i].status = ON; system_log(INFO_SIGN, __FILE__, __LINE__, " Have restarted %s, due to missing shared memory area.", @@ -1566,7 +1572,7 @@ #else p_work_dir, NULL); #endif - log_pid(proc_table[i].pid, i + 1); + *(pid_t *)(pid_list + ((i + 1) * sizeof(pid_t))) = proc_table[i].pid; *proc_table[i].status = ON; system_log(INFO_SIGN, __FILE__, __LINE__, " Have restarted %s", @@ -1601,8 +1607,8 @@ proc_table[i].pid = 0; *proc_table[i].status = OFF; system_log(ERROR_SIGN, __FILE__, __LINE__, - " Abnormal termination of %s!", - proc_table[i].proc_name); + " Abnormal termination of %s, caused by signal %d!", + proc_table[i].proc_name, WTERMSIG(status)); #ifdef NO_OF_SAVED_CORE_FILES if (no_of_saved_cores < NO_OF_SAVED_CORE_FILES) { @@ -1638,7 +1644,7 @@ /* No process may end abnormally! */ proc_table[i].pid = make_process(proc_table[i].proc_name, p_work_dir, NULL); - log_pid(proc_table[i].pid, i + 1); + *(pid_t *)(pid_list + ((i + 1) * sizeof(pid_t))) = proc_table[i].pid; *proc_table[i].status = ON; system_log(INFO_SIGN, __FILE__, __LINE__, " Have restarted %s",