/* * get_html_content.c - Part of AFD, an automatic file distribution program. * Copyright (c) 2024 Holger Kiehl * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation; either version 2 of the License, or * (at your option) any later version. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License * along with this program; if not, write to the Free Software * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. */ #include "ahtml_listdefs.h" DESCR__S_M3 /* ** NAME ** get_html_content - gets the content of the given URL ** ** SYNOPSIS ** void get_html_content(char *html_content_filename) ** ** DESCRIPTION ** ** RETURN VALUES ** None. ** ** AUTHOR ** H.Kiehl ** ** HISTORY ** 01.03.2024 H.Kiehl Created ** */ DESCR__E_M3 #include /* fopen(), fseeko(), fclose() */ #include /* exit(), malloc(), free() */ #include /* strerror() */ #include #include "httpdefs.h" /* Global variables. */ int simulation_mode = NO; unsigned int special_flag = 0; /* External global variables. */ extern int sys_log_fd; extern char msg_str[]; /*########################## get_html_content() #########################*/ void get_html_content(char *html_content_filename, struct data *p_db) { int features = 0, listing_complete, status; #ifdef _WITH_EXTRA_CHECK char etag[MAX_EXTRA_LS_DATA_LENGTH + 1]; #endif char *listbuffer = NULL; off_t bytes_buffered, content_length; FILE *fp; if (p_db->strict == YES) { features |= PROT_OPT_TLS_STRICT_VERIFY; } if (p_db->legacy_renegotiation == YES) { features |= PROT_OPT_TLS_LEGACY_RENEGOTIATION; } if (p_db->no_expect == YES) { features |= PROT_OPT_NO_EXPECT; } status = http_connect(p_db->hostname, p_db->proxy_name, p_db->port, p_db->user, p_db->password, 0, 0, features, #ifdef WITH_SSL 0, SERVICE_NONE, "", p_db->tls_auth, #endif p_db->sndbuf_size, p_db->rcvbuf_size, p_db->verbose, YES); if (status != SUCCESS) { if (p_db->proxy_name[0] == '\0') { trans_log(ERROR_SIGN, __FILE__, __LINE__, NULL, msg_str, "HTTP connection to %s at port %d failed (%d).", p_db->hostname, p_db->port, status); } else { trans_log(ERROR_SIGN, __FILE__, __LINE__, NULL, msg_str, "HTTP connection to HTTP proxy %s at port %d failed (%d).", p_db->proxy_name, p_db->port, status); } exit(CONNECT_ERROR); } if (p_db->verbose) { trans_log(INFO_SIGN, NULL, 0, NULL, NULL, "Opened HTTP connection to %s:%d.\n", p_db->hostname, p_db->port); } if ((fp = fopen(html_content_filename, "w")) == NULL) { (void)rec(sys_log_fd, ERROR_SIGN, "Could not fopen() `%s' : %s (%s %d)\n", html_content_filename, strerror(errno), __FILE__, __LINE__); http_quit(); exit(INCORRECT); } do { bytes_buffered = 0; content_length = -1; #ifdef _WITH_EXTRA_CHECK etag[0] = '\0'; #endif if (((status = http_get(p_db->remote_dir, (p_db->index_file == NULL) ? "" : p_db->index_file, NULL, #ifdef _WITH_EXTRA_CHECK etag, #endif &content_length, 0)) != SUCCESS) && (status != CHUNKED)) { trans_log(ERROR_SIGN, __FILE__, __LINE__, NULL, (status == INCORRECT) ? NULL : msg_str, "Failed to open remote directory %s (%d).", p_db->remote_dir, status); http_quit(); exit(eval_timeout(OPEN_REMOTE_ERROR)); } listing_complete = YES; if (status == SUCCESS) { int read_length; if (content_length > MAX_HTTP_DIR_BUFFER) { trans_log(ERROR_SIGN, __FILE__, __LINE__, NULL, NULL, #if SIZEOF_OFF_T == 4 "Directory buffer length is only for %d bytes, remote system wants to send %ld bytes. If needed increase MAX_HTTP_DIR_BUFFER.", #else "Directory buffer length is only for %d bytes, remote system wants to send %lld bytes. If needed increase MAX_HTTP_DIR_BUFFER.", #endif MAX_HTTP_DIR_BUFFER, (pri_off_t)content_length); http_quit(); exit(ALLOC_ERROR); } else if (content_length == 0) { content_length = MAX_HTTP_DIR_BUFFER; } if ((listbuffer = malloc(content_length + 1)) == NULL) { (void)rec(sys_log_fd, ERROR_SIGN, #if SIZEOF_OFF_T == 4 "Failed to malloc() %ld bytes : %s (%s %d)\n", #else "Failed to malloc() %lld bytes : %s (%s %d)\n", #endif (pri_off_t)(content_length + 1), strerror(errno)); http_quit(); exit(ALLOC_ERROR); } do { if ((content_length - (bytes_buffered + p_db->blocksize)) >= 0) { read_length = p_db->blocksize; } else { read_length = content_length - bytes_buffered; } if (read_length > 0) { if ((status = http_read(&listbuffer[bytes_buffered], read_length)) == INCORRECT) { trans_log(ERROR_SIGN, __FILE__, __LINE__, NULL, (status > 0) ? msg_str : NULL, "Failed to read from remote content for %s (%d)", p_db->remote_dir, status); free(listbuffer); http_quit(); (void)fclose(fp); exit(eval_timeout(READ_REMOTE_ERROR)); } else if (status > 0) { if (fwrite(&listbuffer[bytes_buffered], 1, status, fp) != status) { (void)rec(sys_log_fd, ERROR_SIGN, "Failed to fwrite() %d bytes : %s (%s %d)\n", status, strerror(errno), __FILE__, __LINE__); free(listbuffer); http_quit(); (void)fclose(fp); exit(INCORRECT); } bytes_buffered += status; if (bytes_buffered == content_length) { status = 0; } else if (bytes_buffered > content_length) { trans_log(ERROR_SIGN, __FILE__, __LINE__, NULL, NULL, #if SIZEOF_OFF_T == 4 "Maximum directory buffer length (%ld bytes) reached.", #else "Maximum directory buffer length (%lld bytes) reached.", #endif (pri_off_t)content_length); status = 0; } } } else { status = 0; } } while (status != 0); } else /* status == CHUNKED */ { int chunksize; char *chunkbuffer = NULL; chunksize = p_db->blocksize + 4; if ((chunkbuffer = malloc(chunksize)) == NULL) { (void)rec(sys_log_fd, ERROR_SIGN, "Failed to malloc() %d bytes : %s (%s %d)\n", chunksize, strerror(errno), __FILE__, __LINE__); http_quit(); free(listbuffer); (void)fclose(fp); exit(ALLOC_ERROR); } do { if ((status = http_chunk_read(&chunkbuffer, &chunksize)) < 0) { trans_log(ERROR_SIGN, __FILE__, __LINE__, NULL, (status == INCORRECT) ? NULL : msg_str, "Failed to read from remote directory listing for %s", p_db->remote_dir); http_quit(); free(chunkbuffer); (void)fclose(fp); exit(eval_timeout(READ_REMOTE_ERROR)); } else if (status > 0) { if (listbuffer == NULL) { if ((listbuffer = malloc(status)) == NULL) { (void)rec(sys_log_fd, ERROR_SIGN, #if SIZEOF_OFF_T == 4 "Failed to malloc() %ld bytes : %s (%s %d)\n", #else "Failed to malloc() %lld bytes : %s (%s %d)\n", #endif (pri_off_t)(content_length + 1), strerror(errno)); http_quit(); free(chunkbuffer); exit(ALLOC_ERROR); } } else { if (bytes_buffered > MAX_HTTP_DIR_BUFFER) { trans_log(ERROR_SIGN, __FILE__, __LINE__, NULL, NULL, #if SIZEOF_OFF_T == 4 "Directory length buffer is only for %d bytes, remote system wants to send %ld bytes. If needed increase MAX_HTTP_DIR_BUFFER.", #else "Directory length buffer is only for %d bytes, remote system wants to send %lld bytes. If needed increase MAX_HTTP_DIR_BUFFER.", #endif MAX_HTTP_DIR_BUFFER, (pri_off_t)content_length); http_quit(); free(listbuffer); free(chunkbuffer); exit(ALLOC_ERROR); } if ((listbuffer = realloc(listbuffer, bytes_buffered + status)) == NULL) { (void)rec(sys_log_fd, ERROR_SIGN, #if SIZEOF_OFF_T == 4 "Failed to realloc() %ld bytes : %s (%s %d)\n", #else "Failed to realloc() %lld bytes : %s (%s %d)\n", #endif (pri_off_t)(bytes_buffered + status), strerror(errno), __FILE__, __LINE__); free(chunkbuffer); http_quit(); exit(ALLOC_ERROR); } } (void)memcpy(&listbuffer[bytes_buffered], chunkbuffer, status); if (fwrite(chunkbuffer, 1, status, fp) != status) { (void)rec(sys_log_fd, ERROR_SIGN, "Failed to fwrite() %d bytes : %s (%s %d)\n", status, strerror(errno), __FILE__, __LINE__); free(chunkbuffer); http_quit(); (void)fclose(fp); exit(INCORRECT); } bytes_buffered += status; } } while (status != HTTP_LAST_CHUNK); free(chunkbuffer); if ((listbuffer = realloc(listbuffer, bytes_buffered + 1)) == NULL) { (void)rec(sys_log_fd, ERROR_SIGN, #if SIZEOF_OFF_T == 4 "Failed to realloc() %ld bytes : %s (%s %d)\n", #else "Failed to realloc() %lld bytes : %s (%s %d)\n", #endif (pri_off_t)(bytes_buffered + status), strerror(errno), __FILE__, __LINE__); free(chunkbuffer); http_quit(); exit(ALLOC_ERROR); } } if (bytes_buffered > 0) { listbuffer[bytes_buffered] = '\0'; if (eval_html_dir_list(listbuffer, bytes_buffered, 0, (p_db->special_flag & HREF_SEARCH_ONLY) ? YES : NO, &listing_complete, p_db) != SUCCESS) { (void)rec(sys_log_fd, WARN_SIGN, "eval_html_dir_list() error. (%s %d)\n", __FILE__, __LINE__); } } free(listbuffer); listbuffer = NULL; } while (listing_complete == NO); http_quit(); (void)fclose(fp); return; }