/* * ahtml_list.c - Part of AFD, an automatic file distribution program. * Copyright (c) 2024 Deutscher Wetterdienst (DWD), * Holger Kiehl * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation; either version 2 of the License, or * (at your option) any later version. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License * along with this program; if not, write to the Free Software * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. */ #include "ahtml_listdefs.h" DESCR__S_M1 /* ** NAME ** ahtml_list - show a HTML listing what AFD is able to see. ** ** SYNOPSIS ** ahtml_list [options] ** ** options ** --version - Show current version ** ** DESCRIPTION ** ahtml_list list the links it finds in a given URL or file ** name, to make it easier to create a [files] filter of the ** correct files one wants to download. ** ** RETURN VALUES ** ** AUTHOR ** H.Kiehl ** ** HISTORY ** 23.02.2024 H.Kiehl Created ** */ DESCR__E_M1 #include #include /* exit(), atexit() */ #include /* strlen() */ #include /* STDERR_FILENO */ #include /* signal() */ #include #include "httpdefs.h" #include "version.h" /* Global variables. */ int sigpipe_flag, sys_log_fd = STDERR_FILENO, timeout_flag, transfer_log_fd = STDERR_FILENO, use_ip_db = NO; long transfer_timeout; char *html_list_filename = NULL, msg_str[MAX_RET_MSG_LENGTH], *p_work_dir = NULL; const char *sys_log_name = SYSTEM_LOG_FIFO; struct data db; /* Local global variables. */ static char name[30]; /* Local function prototypes. */ static void ahtml_list_exit(void), init_ahtml_list(int, char **, struct data *, char **), sig_bus(int), sig_segv(int), sig_pipe(int), sig_exit(int), usage(void); /*$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$ main() $$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$*/ int main(int argc, char *argv[]) { char *parg = NULL; CHECK_FOR_VERSION(argc, argv); /* Do some cleanups when we exit. */ db.index_file = NULL; if (atexit(ahtml_list_exit) != 0) { (void)rec(sys_log_fd, FATAL_SIGN, _("Could not register exit function : %s (%s %d)\n"), strerror(errno), __FILE__, __LINE__); exit(INCORRECT); } if ((signal(SIGINT, sig_exit) == SIG_ERR) || (signal(SIGSEGV, sig_segv) == SIG_ERR) || (signal(SIGBUS, sig_bus) == SIG_ERR) || (signal(SIGHUP, SIG_IGN) == SIG_ERR) || (signal(SIGPIPE, sig_pipe) == SIG_ERR)) { (void)rec(sys_log_fd, FATAL_SIGN, _("signal() error : %s (%s %d)\n"), strerror(errno), __FILE__, __LINE__); exit(INCORRECT); } /* Initialise variables. */ init_ahtml_list(argc, argv, &db, &parg); /* Check if it is a URL. */ if (((parg[0] == 'h') && (parg[1] == 't') && (parg[2] == 't') && (parg[3] == 'p') && (parg[4] == ':') && (parg[5] == '/') && (parg[6] == '/')) || ((parg[0] == 'h') && (parg[1] == 't') && (parg[2] == 't') && (parg[3] == 'p') && (parg[4] == 's') && (parg[5] == ':') && (parg[6] == '/') && (parg[7] == '/'))) { unsigned int error_mask; time_t now = time(NULL); if (db.remove == NEITHER) { db.remove = YES; } if ((error_mask = url_evaluate(parg, NULL, db.user, NULL, NULL, #ifdef WITH_SSH_FINGERPRINT NULL, NULL, #endif db.password, NO, db.hostname, &db.port, db.remote_dir, NULL, &now, NULL, NULL, NULL, NULL, NULL, NULL)) > 3) { char error_msg[MAX_URL_ERROR_MSG]; url_get_error(error_mask, error_msg, MAX_URL_ERROR_MSG); (void)fprintf(stderr, _("ERROR : Incorrect url `%s'. Error is: %s.\n"), argv[1], error_msg); } else /* Try to retrieve the HTML list. */ { if ((html_list_filename = malloc(DEFAULT_HTML_LIST_FILENAME_LENGTH + 1)) == NULL) { (void)fprintf(stderr, _("ERROR : malloc() error : %s\n"), strerror(errno)); exit(INCORRECT); } (void)strcpy(html_list_filename, DEFAULT_HTML_LIST_FILENAME); /* Set HTTP timeout value. */ transfer_timeout = db.transfer_timeout; if (parg[4] == 's') { /* Note, url_evaluate() will set port to -1 if no port is given. */ if (db.port == -1) { db.port = DEFAULT_HTTPS_PORT; } db.tls_auth = YES; } else { /* Note, url_evaluate() will set port to -1 if no port is given. */ if (db.port == -1) { db.port = DEFAULT_HTTP_PORT; } db.tls_auth = NO; } get_html_content(html_list_filename, &db); } } else /* Lets assume this is a file name which contains a HTML list. */ { off_t list_size; char *list_buffer = NULL; if (db.remove == NEITHER) { db.remove = NO; } if ((html_list_filename = malloc(strlen(parg) + 1)) == NULL) { (void)fprintf(stderr, _("ERROR : malloc() error : %s\n"), strerror(errno)); exit(INCORRECT); } (void)strcpy(html_list_filename, parg); if ((list_size = read_file(html_list_filename, &list_buffer)) == INCORRECT) { (void)rec(sys_log_fd, ERROR_SIGN, "Failed to read_file() %s (%s %d)\n", html_list_filename, __FILE__, __LINE__); exit(INCORRECT); } if (list_size > 0) { if (eval_html_dir_list(list_buffer, list_size, 0, (db.special_flag & HREF_SEARCH_ONLY) ? YES : NO, NULL, &db) != SUCCESS) { (void)rec(sys_log_fd, WARN_SIGN, "eval_html_dir_list() error. (%s %d)\n", __FILE__, __LINE__); } } free(list_buffer); } if (db.remove == YES) { if (unlink(html_list_filename) == -1) { (void)rec(sys_log_fd, ERROR_SIGN, "Failed to unlink() %s : %s (%s %d)\n", html_list_filename, strerror(errno), __FILE__, __LINE__); } } free(parg); exit(SUCCESS); } /*+++++++++++++++++++++++++++ init_ahtml_list() +++++++++++++++++++++++++*/ static void init_ahtml_list(int argc, char *argv[], struct data *p_db, char **parg) { int correct = YES; /* Was input/syntax correct? */ size_t length; char *ptr; ptr = argv[0] + strlen(argv[0]) - 1; while ((*ptr != '/') && (ptr != &argv[0][0])) { ptr--; } if (*ptr == '/') { ptr++; } (void)my_strncpy(name, ptr, 30); /* First initialize all values with default values. */ msg_str[0] = '\0'; p_db->hostname[0] = '\0'; p_db->user[0] = '\0'; p_db->password[0] = '\0'; p_db->remote_dir[0] = '\0'; p_db->blocksize = DEFAULT_TRANSFER_BLOCKSIZE; p_db->proxy_name[0] = '\0'; p_db->transfer_timeout = DEFAULT_TRANSFER_TIMEOUT; p_db->verbose = NO; p_db->remove = NEITHER; p_db->sndbuf_size = 0; p_db->rcvbuf_size = 0; p_db->no_expect = NO; #ifdef WITH_SSL p_db->strict = NO; p_db->legacy_renegotiation = NO; #endif p_db->special_flag = 0; /* Evaluate all arguments with '-'. */ while ((--argc > 0) && ((*++argv)[0] == '-')) { switch (*(argv[0] + 1)) { case 'b' : /* HTTP transfer block size. */ if ((argc == 1) || (*(argv + 1)[0] == '-')) { (void)fprintf(stderr, _("ERROR : No block size specified for option -b.\n")); correct = NO; } else { p_db->blocksize = atoi(*(argv + 1)); argc--; argv++; } break; case 'c' : /* Remove content file. */ p_db->remove = YES; break; case 'C' : /* Do not remove content file. */ p_db->remove = NO; break; case 'E' : /* No expect. */ p_db->no_expect = YES; break; case 'f' : /* Force href search only. */ p_db->special_flag |= HREF_SEARCH_ONLY; break; case 'i' : /* Index file name. */ if ((argc == 1) || (*(argv + 1)[0] == '-')) { (void)fprintf(stderr, _("ERROR : No index file name specified for option -i.\n")); correct = NO; } else { argv++; length = strlen(argv[0]); if ((p_db->index_file = malloc(length + 1)) == NULL) { (void)fprintf(stderr, _("ERROR : malloc() error : %s\n"), strerror(errno)); } else { (void)memcpy(p_db->index_file, argv[0], length); p_db->index_file[length] = '\0'; } } break; case 'P' : /* Proxy server */ if ((argc == 1) || (*(argv + 1)[0] == '-')) { (void)fprintf(stderr, _("ERROR : No proxy server specified for option -P.\n")); correct = NO; } else { argv++; (void)my_strncpy(p_db->proxy_name, argv[0], MAX_PROXY_NAME_LENGTH); } break; case 'R' : /* Socket receive buffer. */ if ((argc == 1) || (*(argv + 1)[0] == '-')) { (void)fprintf(stderr, _("ERROR : No buffer size specified for option -R.\n")); correct = NO; } else { p_db->rcvbuf_size = atoi(*(argv + 1)); argc--; argv++; } break; case 'S' : /* Socket send buffer. */ if ((argc == 1) || (*(argv + 1)[0] == '-')) { (void)fprintf(stderr, _("ERROR : No buffer size specified for option -S.\n")); correct = NO; } else { p_db->sndbuf_size = atoi(*(argv + 1)); argc--; argv++; } break; case 't' : /* HTTP timeout. */ if ((argc == 1) || (*(argv + 1)[0] == '-')) { (void)fprintf(stderr, _("ERROR : No timeout specified for option -t.\n")); correct = NO; } else { p_db->transfer_timeout = atol(*(argv + 1)); argc--; argv++; } break; case 'u': if ((argc == 1) || (*(argv + 1)[0] == '-')) { (void)fprintf(stderr, _("ERROR : No URL given for option -u.\n")); correct = NO; } else { unsigned int error_mask; time_t now = time(NULL); argv++; if ((error_mask = url_evaluate(argv[0], NULL, p_db->user, NULL, NULL, #ifdef WITH_SSH_FINGERPRINT NULL, NULL, #endif p_db->password, NO, p_db->hostname, &p_db->port, p_db->remote_dir, NULL, &now, NULL, NULL, NULL, NULL, NULL, NULL)) > 3) { char error_msg[MAX_URL_ERROR_MSG]; url_get_error(error_mask, error_msg, MAX_URL_ERROR_MSG); (void)fprintf(stderr, _("ERROR : Incorrect url `%s'. Error is: %s.\n"), argv[0], error_msg); correct = NO; } } break; case 'v' : /* Verbose mode. */ p_db->verbose = YES; break; #ifdef WITH_SSL case 'x' : /* TLS legacy renegotiation. */ p_db->legacy_renegotiation = YES; break; case 'Y' : /* Strict SSL/TLS verification. */ p_db->strict = YES; break; #endif case '?' : /* Help. */ usage(); exit(0); default : /* Unknown parameter. */ (void)fprintf(stderr, _("ERROR : Unknown parameter <%c>. (%s %d)\n"), *(argv[0] + 1), __FILE__, __LINE__); correct = NO; break; } /* switch (*(argv[0] + 1)) */ } if ((*argv)[0] != '-') { argc++; argv--; } length = strlen(argv[1]); if ((*parg = malloc(length + 1)) == NULL) { (void)fprintf(stderr, _("ERROR : malloc() error : %s\n"), strerror(errno)); exit(INCORRECT); } (void)memcpy(*parg, argv[1], length); (*parg)[length] = '\0'; if ((argc < 2) || (correct == NO)) { usage(); exit(SYNTAX_ERROR); } return; } /*+++++++++++++++++++++++++++++++ usage() ++++++++++++++++++++++++++++++*/ static void usage(void) { (void)fprintf(stderr, _("SYNTAX: %s [options] [URL|file]\n\n"), name); (void)fprintf(stderr, _(" OPTIONS DESCRIPTION\n")); (void)fprintf(stderr, _(" --version - Show current version\n")); (void)fprintf(stderr, _(" -b - Transfer block size in bytes. Default %d\n"), DEFAULT_TRANSFER_BLOCKSIZE); (void)fprintf(stderr, _(" bytes.\n")); (void)fprintf(stderr, _(" -c - Remove content file.\n")); (void)fprintf(stderr, _(" -C - Do not remove content file.\n")); (void)fprintf(stderr, _(" -E - Do not send expect.\n")); (void)fprintf(stderr, _(" -f - Force href search only.\n")); (void)fprintf(stderr, _(" -i - Non standard index file name.\n")); (void)fprintf(stderr, _(" -P - Proxy server.\n")); (void)fprintf(stderr, _(" -p - Remote port number of HTTP-server.\n")); (void)fprintf(stderr, _(" Default %d or %d.\n"), DEFAULT_HTTP_PORT, DEFAULT_HTTPS_PORT); (void)fprintf(stderr, _(" -R - Socket receive buffer size\n")); (void)fprintf(stderr, _(" (in bytes).\n")); (void)fprintf(stderr, _(" -S - Socket send buffer size\n")); (void)fprintf(stderr, _(" (in bytes).\n")); (void)fprintf(stderr, _(" -t - HTTP timeout in seconds. Default %lds.\n"), DEFAULT_TRANSFER_TIMEOUT); (void)fprintf(stderr, _(" -u - When just evaluating a local file. This\n")); (void)fprintf(stderr, _(" allows adding a URL for testing.\n")); (void)fprintf(stderr, _(" -v - Verbose. Shows more information.\n")); #ifdef WITH_SSL (void)fprintf(stderr, _(" -x - Use TLS legacy renegotiation.\n")); (void)fprintf(stderr, _(" -Y - Use strict SSL/TLS verification.\n")); #endif (void)fprintf(stderr, _(" -? - Display this help and exit.\n")); (void)fprintf(stderr, _(" The following values are returned on exit:\n")); (void)fprintf(stderr, _(" %2d - File transmitted successfully.\n"), TRANSFER_SUCCESS); (void)fprintf(stderr, _(" %2d - Failed to connect.\n"), CONNECT_ERROR); (void)fprintf(stderr, _(" %2d - Failed to open remote file.\n"), OPEN_REMOTE_ERROR); (void)fprintf(stderr, _(" %2d - System error stat().\n"), STAT_ERROR); (void)fprintf(stderr, " %2d - %s.\n", TIMEOUT_ERROR, TIMEOUT_ERROR_STR); (void)fprintf(stderr, " %2d - %s.\n", CONNECTION_RESET_ERROR, CONNECTION_RESET_ERROR_STR); (void)fprintf(stderr, " %2d - %s.\n", CONNECTION_REFUSED_ERROR, CONNECTION_REFUSED_ERROR_STR); (void)fprintf(stderr, _(" %2d - System error malloc().\n"), ALLOC_ERROR); (void)fprintf(stderr, _(" %2d - Syntax wrong.\n"), SYNTAX_ERROR); return; } /*+++++++++++++++++++++++++++ ahtml_list_exit() +++++++++++++++++++++++++*/ static void ahtml_list_exit(void) { if (db.index_file != NULL) { free(db.index_file); } return; } /*++++++++++++++++++++++++++++++ sig_pipe() +++++++++++++++++++++++++++++*/ static void sig_pipe(int signo) { /* Ignore any future signals of this kind. */ if (signal(SIGPIPE, SIG_IGN) == SIG_ERR) { (void)rec(sys_log_fd, ERROR_SIGN, _("signal() error : %s (%s %d)\n"), strerror(errno), __FILE__, __LINE__); } sigpipe_flag = ON; return; } /*++++++++++++++++++++++++++++++ sig_segv() +++++++++++++++++++++++++++++*/ static void sig_segv(int signo) { (void)rec(sys_log_fd, DEBUG_SIGN, _("Aaarrrggh! Received SIGSEGV. Remove the programmer who wrote this! (%s %d)\n"), __FILE__, __LINE__); exit(INCORRECT); } /*++++++++++++++++++++++++++++++ sig_bus() ++++++++++++++++++++++++++++++*/ static void sig_bus(int signo) { (void)rec(sys_log_fd, DEBUG_SIGN, _("Uuurrrggh! Received SIGBUS. (%s %d)\n"), __FILE__, __LINE__); exit(INCORRECT); } /*++++++++++++++++++++++++++++++ sig_exit() +++++++++++++++++++++++++++++*/ static void sig_exit(int signo) { exit(INCORRECT); }