From 8f888fb1a3469b87e557efad93b293dd36288ba9 Mon Sep 17 00:00:00 2001 From: Sunil Nimmagadda Date: Sat, 14 Sep 2024 12:32:01 +0530 Subject: A HTTP(S), FTP client --- url.c | 424 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 424 insertions(+) create mode 100644 url.c (limited to 'url.c') diff --git a/url.c b/url.c new file mode 100644 index 0000000..546d448 --- /dev/null +++ b/url.c @@ -0,0 +1,424 @@ +/* + * Copyright (c) 2017 Sunil Nimmagadda + * + * Permission to use, copy, modify, and distribute this software for any + * purpose with or without fee is hereby granted, provided that the above + * copyright notice and this permission notice appear in all copies. + * + * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES + * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF + * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR + * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES + * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN + * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF + * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. + */ + +/*- + * Copyright (c) 1997 The NetBSD Foundation, Inc. + * All rights reserved. + * + * This code is derived from software contributed to The NetBSD Foundation + * by Jason Thorpe and Luke Mewburn. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS + * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED + * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR + * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS + * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ +#include + +#include +#include + +#include +#include +#include +#include +#include +#include + +#include "ftp.h" +#include "xmalloc.h" + +#define BASICAUTH_LEN 1024 + +static void authority_parse(const char *, char **, char **, char **); +static int ipv6_parse(const char *, char **, char **); +static int unsafe_char(const char *); + +#ifndef NOSSL +const char *scheme_str[] = { "http:", "ftp:", "file:", "https:" }; +const char *port_str[] = { "80", "21", NULL, "443" }; +#else +const char *scheme_str[] = { "http:", "ftp:", "file:" }; +const char *port_str[] = { "80", "21", NULL }; +#endif /* NOSSL */ + +int +url_scheme_lookup(const char *str) +{ + size_t i; + +#ifdef NOSSL + if (strncasecmp(str, "https:", 6) == 0) + errx(1, "No HTTPS support."); +#endif /* NOSSL */ + + for (i = 0; i < nitems(scheme_str); i++) + if (strncasecmp(str, scheme_str[i], strlen(scheme_str[i])) == 0) + return i; + + return -1; +} + +static int +ipv6_parse(const char *str, char **host, char **port) +{ + char *p; + + if ((p = strchr(str, ']')) == NULL) { + warnx("%s: invalid IPv6 address: %s", __func__, str); + return 1; + } + + *p++ = '\0'; + if (strlen(str + 1) > 0) + *host = xstrdup(str + 1); + + if (*p == '\0') + return 0; + + if (*p++ != ':') { + warnx("%s: invalid port: %s", __func__, p); + free(*host); + return 1; + } + + if (strlen(p) > 0) + *port = xstrdup(p); + + return 0; +} + +static void +authority_parse(const char *str, char **host, char **port, char **basic_auth) +{ + char *p; + + if ((p = strchr(str, '@')) != NULL) { + *basic_auth = xcalloc(1, BASICAUTH_LEN); + if (b64_ntop((unsigned char *)str, p - str, + *basic_auth, BASICAUTH_LEN) == -1) + errx(1, "base64 encode failed"); + + str = ++p; + } + + if ((p = strchr(str, ':')) != NULL) { + *p++ = '\0'; + if (strlen(p) > 0) + *port = xstrdup(p); + } + + if (strlen(str) > 0) + *host = xstrdup(str); +} + +struct url * +xurl_parse(const char *str) +{ + struct url *url; + + if ((url = url_parse(str)) == NULL) + exit(1); + + return url; +} + +struct url * +url_parse(const char *str) +{ + struct url *url; + const char *p, *q; + char *basic_auth, *host, *port, *path, *s; + size_t len; + int ip_literal, scheme; + + p = str; + ip_literal = 0; + host = port = path = basic_auth = NULL; + while (isblank((unsigned char)*p)) + p++; + + if ((q = strchr(p, ':')) == NULL) { + warnx("%s: scheme missing: %s", __func__, str); + return NULL; + } + + if ((scheme = url_scheme_lookup(p)) == -1) { + warnx("%s: invalid scheme: %s", __func__, p); + return NULL; + } + + p = ++q; + if (strncmp(p, "//", 2) != 0) { + if (scheme == S_FILE) + goto done; + else { + warnx("%s: invalid url: %s", __func__, str); + return NULL; + } + } + + p += 2; + + /* + * quirk to parse file:// which isn't valid but required for + * backwards compatibility. + */ + if (scheme == S_FILE) { + q = (*p == '/') ? p : p - 1; + goto done; + } + + len = strlen(p); + /* Authority terminated by a '/' if present */ + if ((q = strchr(p, '/')) != NULL) + len = q - p; + + s = xstrndup(p, len); + if (*p == '[') { + if (ipv6_parse(s, &host, &port) != 0) { + free(s); + return NULL; + } + ip_literal = 1; + } else + authority_parse(s, &host, &port, &basic_auth); + + free(s); + if (port == NULL && scheme != S_FILE) + port = xstrdup(port_str[scheme]); + + done: + if (q != NULL) + path = xstrdup(q); + + if (io_debug) { + fprintf(stderr, + "scheme: %s\nhost: %s\nport: %s\npath: %s\n", + scheme_str[scheme], host, port, path); + } + + url = xcalloc(1, sizeof *url); + url->scheme = scheme; + url->host = host; + url->port = port; + url->path = path; + url->basic_auth = basic_auth; + url->ip_literal = ip_literal; + return url; +} + +void +url_free(struct url *url) +{ + if (url == NULL) + return; + + free(url->host); + free(url->port); + free(url->path); + freezero(url->basic_auth, BASICAUTH_LEN); + free(url); +} + +void +url_connect(struct url *url, int timeout) +{ + switch (url->scheme) { + case S_HTTP: + case S_HTTPS: + http_connect(url, timeout); + break; + case S_FTP: + if (ftp_proxy) + http_connect(url, timeout); + else + ftp_connect(url, timeout); + break; + } +} + +struct url * +url_request(struct url *url, off_t *offset, off_t *sz) +{ + switch (url->scheme) { + case S_HTTP: + case S_HTTPS: + return http_get(url, offset, sz); + case S_FTP: + if (ftp_proxy) + return http_get(url, offset, sz); + + return ftp_get(url, offset, sz); + case S_FILE: + return file_get(url, offset, sz); + } + + return NULL; +} + +void +url_save(struct url *url, FILE *dst_fp, off_t *offset) +{ + switch (url->scheme) { + case S_HTTP: + case S_HTTPS: + http_save(url, dst_fp, offset); + break; + case S_FTP: + if (ftp_proxy) + http_save(url, dst_fp, offset); + else + ftp_save(url, dst_fp, offset); + break; + case S_FILE: + file_save(url, dst_fp, offset); + break; + } +} + +void +url_close(struct url *url) +{ + switch (url->scheme) { + case S_HTTP: + case S_HTTPS: + http_close(url); + break; + case S_FTP: + if (ftp_proxy) + http_close(url); + else + ftp_close(url); + break; + } +} + +char * +url_str(struct url *url) +{ + char *host, *str; + int custom_port; + + custom_port = strcmp(url->port, port_str[url->scheme]) ? 1 : 0; + if (url->ip_literal) + xasprintf(&host, "[%s]", url->host); + else + host = xstrdup(url->host); + + xasprintf(&str, "%s//%s%s%s%s", + scheme_str[url->scheme], + host, + custom_port ? ":" : "", + custom_port ? url->port : "", + url->path ? url->path : "/"); + + free(host); + return str; +} + +const char * +url_scheme_str(int scheme) +{ + return scheme_str[scheme]; +} + +const char * +url_port_str(int scheme) +{ + return port_str[scheme]; +} + +/* + * Encode given URL, per RFC1738. + * Allocate and return string to the caller. + */ +char * +url_encode(const char *path) +{ + size_t i, length, new_length; + char *epath, *epathp; + + length = new_length = strlen(path); + + /* + * First pass: + * Count unsafe characters, and determine length of the + * final URL. + */ + for (i = 0; i < length; i++) + if (unsafe_char(path + i)) + new_length += 2; + + epath = epathp = xmalloc(new_length + 1); /* One more for '\0'. */ + + /* + * Second pass: + * Encode, and copy final URL. + */ + for (i = 0; i < length; i++) + if (unsafe_char(path + i)) { + snprintf(epathp, 4, "%%" "%02x", + (unsigned char)path[i]); + epathp += 3; + } else + *(epathp++) = path[i]; + + *epathp = '\0'; + return epath; +} + +/* + * Determine whether the character needs encoding, per RFC1738: + * - No corresponding graphic US-ASCII. + * - Unsafe characters. + */ +static int +unsafe_char(const char *c0) +{ + const char *unsafe_chars = " <>\"#{}|\\^~[]`"; + const unsigned char *c = (const unsigned char *)c0; + + /* + * No corresponding graphic US-ASCII. + * Control characters and octets not used in US-ASCII. + */ + return (iscntrl(*c) || !isascii(*c) || + + /* + * Unsafe characters. + * '%' is also unsafe, if is not followed by two + * hexadecimal digits. + */ + strchr(unsafe_chars, *c) != NULL || + (*c == '%' && (!isxdigit(*++c) || !isxdigit(*++c)))); +} -- cgit v1.2.3