From 60245e42b0f8eae5d3eac87f1c204ae510b8c547 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Dag-Erling=20Sm=C3=B8rgrav?= Date: Thu, 11 May 2000 13:31:02 +0000 Subject: [PATCH] Reorganize some of the http code and split it into more functions. Implement fetchStatHTTP(). Unbungle struct url, and add fetchFreeURL(). Document it. --- lib/libfetch/fetch.3 | 48 +++++----- lib/libfetch/fetch.c | 38 ++++---- lib/libfetch/fetch.h | 11 ++- lib/libfetch/http.c | 209 +++++++++++++++++++++++++++++++------------ 4 files changed, 203 insertions(+), 103 deletions(-) diff --git a/lib/libfetch/fetch.3 b/lib/libfetch/fetch.3 index 28bdb3336d6d..e99fdb98a792 100644 --- a/lib/libfetch/fetch.3 +++ b/lib/libfetch/fetch.3 @@ -28,11 +28,12 @@ .Dt FETCH 3 .Os .Sh NAME +.Nm fetchParseURL , +.Nm fetchFreeURL , .Nm fetchGetURL , .Nm fetchPutURL , .Nm fetchStatURL , .Nm fetchListURL , -.Nm fetchParseURL , .Nm fetchGet , .Nm fetchPut , .Nm fetchStat , @@ -56,6 +57,10 @@ .Fd #include .Fd #include .Fd #include +.Ft struct url * +.Fn fetchParseURL "char *URL" +.Ft void +.Fn fetchFreeURL "struct url *URL" .Ft FILE * .Fn fetchGetURL "char *URL" "char *flags" .Ft FILE * @@ -64,8 +69,6 @@ .Fn fetchStatURL "char *URL" "struct url_stat *us" "char *flags" .Ft struct url_ent * .Fn fetchListURL "char *URL" "char *flags" -.Ft struct url * -.Fn fetchParseURL "char *URL" .Ft FILE * .Fn fetchGet "struct url *URL" "char *flags" .Ft FILE * @@ -103,6 +106,25 @@ These functions implement a high-level library for retrieving and uploading files using Uniform Resource Locators (URLs). .Pp +.Fn fetchParseURL +takes a URL in the form of a null-terminated string and splits it into +its components function according to the Common Internet Scheme Syntax +detailed in RFC1738. A regular expression which produces this syntax +is: +.Bd -literal + :(//((:)?@)?(:)?)?/()? +.Ed +.Pp +Note that some components of the URL are not necessarily relevant to +all URL schemes. +For instance, the file scheme only needs the +and components. +.Pp +The pointer returned by +.Fn fetchParseURL +should be freed using +.Fn fetchFreeURL . +.Pp .Fn fetchGetURL and .Fn fetchPutURL @@ -158,25 +180,6 @@ The pointer returned by should be freed using .Fn free . .Pp -.Fn fetchParseURL -takes a URL in the form of a null-terminated string and splits it into -its components function according to the Common Internet Scheme Syntax -detailed in RFC1738. A regular expression which produces this syntax -is: -.Bd -literal - :(//((:)?@)?(:)?)?/()? -.Ed -.Pp -Note that some components of the URL are not necessarily relevant to -all URL schemes. -For instance, the file scheme only needs the -and components. -.Pp -The pointer returned by -.Fn fetchParseURL -should be freed using -.Fn free . -.Pp .Fn fetchGet , .Fn fetchPut and @@ -414,7 +417,6 @@ Some parts of the library are not yet implemented. The most notable examples of this are .Fn fetchPutHTTP , -.Fn fetchStatHTTP , .Fn fetchListHTTP , .Fn fetchListFTP and FTP proxy support. diff --git a/lib/libfetch/fetch.c b/lib/libfetch/fetch.c index bc2f565d9c95..c7e46a9c3d0d 100644 --- a/lib/libfetch/fetch.c +++ b/lib/libfetch/fetch.c @@ -152,7 +152,7 @@ fetchGetURL(char *URL, char *flags) f = fetchGet(u, flags); - free(u); + fetchFreeURL(u); return f; } @@ -171,7 +171,7 @@ fetchPutURL(char *URL, char *flags) f = fetchPut(u, flags); - free(u); + fetchFreeURL(u); return f; } @@ -189,7 +189,7 @@ fetchStatURL(char *URL, struct url_stat *us, char *flags) s = fetchStat(u, us, flags); - free(u); + fetchFreeURL(u); return s; } @@ -207,7 +207,7 @@ fetchListURL(char *URL, char *flags) ue = fetchList(u, flags); - free(u); + fetchFreeURL(u); return ue; } @@ -282,19 +282,13 @@ fetchParseURL(char *URL) nohost: /* document */ - if (*p) { - struct url *t; - t = realloc(u, sizeof *u + strlen(p) - 1); - if (t == NULL) { - errno = ENOMEM; - _fetch_syserr(); - goto ouch; - } - u = t; - strcpy(u->doc, p); - } else { - u->doc[0] = '/'; - u->doc[1] = 0; + if (!*p) + p = "/"; + + if ((u->doc = strdup(p)) == NULL) { + errno = ENOMEM; + _fetch_syserr(); + goto ouch; } DEBUG(fprintf(stderr, @@ -313,3 +307,13 @@ ouch: free(u); return NULL; } + +/* + * Free a URL + */ +void +fetchFreeURL(struct url *u) +{ + free(u->doc); + free(u); +} diff --git a/lib/libfetch/fetch.h b/lib/libfetch/fetch.h index e2375156e84c..02df28a5df30 100644 --- a/lib/libfetch/fetch.h +++ b/lib/libfetch/fetch.h @@ -40,14 +40,14 @@ #define URL_PWDLEN 256 struct url { - off_t offset; - size_t length; char scheme[URL_SCHEMELEN+1]; char user[URL_USERLEN+1]; char pwd[URL_PWDLEN+1]; char host[MAXHOSTNAMELEN+1]; int port; - char doc[2]; + char *doc; + off_t offset; + size_t length; }; struct url_stat { @@ -81,7 +81,6 @@ int fetchStatFTP(struct url *, struct url_stat *, char *); struct url_ent *fetchListFTP(struct url *, char *); /* Generic functions */ -struct url *fetchParseURL(char *); FILE *fetchGetURL(char *, char *); FILE *fetchPutURL(char *, char *); int fetchStatURL(char *, struct url_stat *, char *); @@ -91,6 +90,10 @@ FILE *fetchPut(struct url *, char *); int fetchStat(struct url *, struct url_stat *, char *); struct url_ent *fetchList(struct url *, char *); +/* URL parsing */ +struct url *fetchParseURL(char *); +void fetchFreeURL(struct url *); + /* Last error code */ extern int fetchLastErrCode; extern int fetchTimeout; diff --git a/lib/libfetch/http.c b/lib/libfetch/http.c index ce57bfa56e73..cfc710243ddb 100644 --- a/lib/libfetch/http.c +++ b/lib/libfetch/http.c @@ -64,11 +64,13 @@ #include #include +#include #include #include #include #include #include +#include #include #include "fetch.h" @@ -292,25 +294,19 @@ _http_auth(char *usr, char *pwd) } /* - * Retrieve a file by HTTP + * Connect to server or proxy */ FILE * -fetchGetHTTP(struct url *URL, char *flags) +_http_connect(struct url *URL, char *flags) { - int sd = -1, e, i, enc = ENC_NONE, direct, verbose; - struct cookie *c; - char *ln, *p, *px, *q; - FILE *f, *cf; + int direct, sd = -1, verbose; size_t len; - off_t pos = 0; - + char *px; + FILE *f; + direct = (flags && strchr(flags, 'd')); verbose = (flags && strchr(flags, 'v')); - /* allocate cookie */ - if ((c = calloc(1, sizeof *c)) == NULL) - return NULL; - /* check port */ if (!URL->port) { struct servent *se; @@ -374,20 +370,40 @@ fetchGetHTTP(struct url *URL, char *flags) /* reopen as stream */ if ((f = fdopen(sd, "r+")) == NULL) goto ouch; - c->real_f = f; + + return f; +ouch: + if (sd >= 0) + close(sd); + _http_seterr(999); /* XXX do this properly RSN */ + return NULL; +} + +/* + * Send a HEAD or GET request + */ +int +_http_request(FILE *f, char *op, struct url *URL, char *flags) +{ + int e, verbose; + char *ln, *p; + size_t len; + + verbose = (flags && strchr(flags, 'v')); + /* send request (proxies require absolute form, so use that) */ if (verbose) _fetch_info("requesting http://%s:%d%s", URL->host, URL->port, URL->doc); - _http_cmd(f, "GET http://%s:%d%s HTTP/1.1" ENDL, - URL->host, URL->port, URL->doc); + _http_cmd(f, "%s %s://%s:%d%s HTTP/1.1" ENDL, + op, URL->scheme, URL->host, URL->port, URL->doc); /* start sending headers away */ if (URL->user[0] || URL->pwd[0]) { char *auth_str = _http_auth(URL->user, URL->pwd); if (!auth_str) - goto fouch; + return 999; /* XXX wrong */ _http_cmd(f, "Authorization: Basic %s" ENDL, auth_str); free(auth_str); } @@ -399,7 +415,7 @@ fetchGetHTTP(struct url *URL, char *flags) /* get response */ if ((ln = fgetln(f, &len)) == NULL) - goto fouch; + return 999; DEBUG(fprintf(stderr, "response: [\033[1m%*.*s\033[m]\n", (int)len-2, (int)len-2, ln)); @@ -410,9 +426,55 @@ fetchGetHTTP(struct url *URL, char *flags) while ((p < ln + len) && !isdigit(*p)) p++; if (!isdigit(*p)) - goto fouch; + return 999; + e = atoi(p); DEBUG(fprintf(stderr, "code: [\033[1m%d\033[m]\n", e)); + return e; +} + +/* + * Check a header line + */ +char * +_http_match(char *str, char *hdr) +{ + while (*str && *hdr && tolower(*str++) == tolower(*hdr++)) + /* nothing */; + if (*str || *hdr != ':') + return NULL; + while (*hdr && isspace(*++hdr)) + /* nothing */; + return hdr; +} + +/* + * Retrieve a file by HTTP + */ +FILE * +fetchGetHTTP(struct url *URL, char *flags) +{ + int e, enc = ENC_NONE, i, verbose; + struct cookie *c; + char *ln, *p, *q; + FILE *f, *cf; + size_t len; + off_t pos = 0; + + verbose = (flags && strchr(flags, 'v')); + + /* allocate cookie */ + if ((c = calloc(1, sizeof *c)) == NULL) + return NULL; + + /* connect */ + if ((f = _http_connect(URL, flags)) == NULL) { + free(c); + return NULL; + } + c->real_f = f; + + e = _http_request(f, "GET", URL, flags); /* add code to handle redirects later */ if (e != (URL->offset ? HTTP_PARTIAL : HTTP_OK)) { @@ -426,49 +488,33 @@ fetchGetHTTP(struct url *URL, char *flags) goto fouch; if ((ln[0] == '\r') || (ln[0] == '\n')) break; - DEBUG(fprintf(stderr, "header: [\033[1m%*.*s\033[m]\n", - (int)len-2, (int)len-2, ln)); -#define XFERENC "Transfer-Encoding:" - if (strncasecmp(ln, XFERENC, sizeof XFERENC - 1) == 0) { - p = ln + sizeof XFERENC - 1; - while ((p < ln + len) && isspace(*p)) - p++; - for (q = p; (q < ln + len) && !isspace(*q); q++) + while (isspace(ln[len-1])) + --len; + ln[len] = '\0'; /* XXX */ + DEBUG(fprintf(stderr, "header: [\033[1m%s\033[m]\n", ln)); + if ((p = _http_match("Transfer-Encoding", ln)) != NULL) { + for (q = p; *q && !isspace(*q); q++) /* VOID */ ; *q = 0; if (strcasecmp(p, "chunked") == 0) enc = ENC_CHUNKED; - DEBUG(fprintf(stderr, "xferenc: [\033[1m%s\033[m]\n", p)); -#undef XFERENC -#define CONTTYPE "Content-Type:" - } else if (strncasecmp(ln, CONTTYPE, sizeof CONTTYPE - 1) == 0) { - p = ln + sizeof CONTTYPE - 1; - while ((p < ln + len) && isspace(*p)) - p++; - for (i = 0; p < ln + len; p++) - if (i < HTTPCTYPELEN) - c->content_type[i++] = *p; + DEBUG(fprintf(stderr, "transfer encoding: [\033[1m%s\033[m]\n", p)); + } else if ((p = _http_match("Content-Type", ln)) != NULL) { + for (i = 0; *p && i < HTTPCTYPELEN; p++, i++) + c->content_type[i] = *p; do c->content_type[i--] = 0; while (isspace(c->content_type[i])); - DEBUG(fprintf(stderr, "conttype: [\033[1m%s\033[m]\n", + DEBUG(fprintf(stderr, "content type: [\033[1m%s\033[m]\n", c->content_type)); -#undef CONTTYPE -#define CONTRANGE "Content-Range:" -#define BYTES "bytes " - } else if (strncasecmp(ln, CONTRANGE, sizeof CONTRANGE - 1) == 0) { - p = ln + sizeof CONTRANGE - 1; - while ((p < ln + len) && isspace(*p)) - p++; - if (strncasecmp(p, BYTES, sizeof BYTES - 1) != 0 - || (p += 6) >= ln + len) + } else if ((p = _http_match("Content-Range", ln)) != NULL) { + if (strncasecmp(p, "bytes ", 6) != 0) goto fouch; - while ((p < ln + len) && isdigit(*p)) + p += 6; + while (*p && isdigit(*p)) pos = pos * 10 + (*p++ - '0'); /* XXX wouldn't hurt to be slightly more paranoid here */ - DEBUG(fprintf(stderr, "contrange: [\033[1m%lld-\033[m]\n", pos)); + DEBUG(fprintf(stderr, "content range: [\033[1m%lld-\033[m]\n", pos)); if (pos > URL->offset) goto fouch; -#undef BYTES -#undef CONTRANGE } } @@ -488,12 +534,6 @@ fetchGetHTTP(struct url *URL, char *flags) return cf; -ouch: - if (sd >= 0) - close(sd); - free(c); - _http_seterr(999); /* XXX do this properly RSN */ - return NULL; fouch: fclose(f); free(c); @@ -516,10 +556,61 @@ fetchPutHTTP(struct url *URL, char *flags) * Get an HTTP document's metadata */ int -fetchStatHTTP(struct url *url, struct url_stat *us, char *flags) +fetchStatHTTP(struct url *URL, struct url_stat *us, char *flags) { - warnx("fetchStatHTTP(): not implemented"); - return -1; + int e, verbose; + size_t len; + char *ln, *p; + FILE *f; + + verbose = (flags && strchr(flags, 'v')); + + /* connect */ + if ((f = _http_connect(URL, flags)) == NULL) + return -1; + + if ((e = _http_request(f, "HEAD", URL, flags)) != HTTP_OK) { + _http_seterr(e); + goto ouch; + } + + while (1) { + if ((ln = fgetln(f, &len)) == NULL) + goto fouch; + if ((ln[0] == '\r') || (ln[0] == '\n')) + break; + while (isspace(ln[len-1])) + --len; + ln[len] = '\0'; /* XXX */ + DEBUG(fprintf(stderr, "header: [\033[1m%s\033[m]\n", ln)); + if ((p = _http_match("Last-Modified", ln)) != NULL) { + struct tm tm; + char locale[64]; + + strncpy(locale, setlocale(LC_TIME, NULL), sizeof locale); + setlocale(LC_TIME, "C"); + strptime(p, "%a, %d %b %Y %H:%M:%S GMT", &tm); + /* XXX should add support for date-2 and date-3 */ + setlocale(LC_TIME, locale); + us->atime = us->mtime = timegm(&tm); + DEBUG(fprintf(stderr, "last modified: [\033[1m%04d-%02d-%02d " + "%02d:%02d:%02d\033[m]\n", + tm.tm_year + 1900, tm.tm_mon + 1, tm.tm_mday, + tm.tm_hour, tm.tm_min, tm.tm_sec)); + } else if ((p = _http_match("Content-Length", ln)) != NULL) { + us->size = 0; + while (*p && isdigit(*p)) + us->size = us->size * 10 + (*p++ - '0'); + DEBUG(fprintf(stderr, "content length: [\033[1m%lld\033[m]\n", us->size)); + } + } + + return 0; + ouch: + _http_seterr(999); /* XXX do this properly RSN */ + fouch: + fclose(f); + return -1; } /*