/*
 * routines for downloading a file using http
 *
 * error reporting: function return values
 * error messages: none
 *
 * debugging: output to stdio stream 'dfp' if non-null
 *
 * dependencies: string package, field package
 *
 * todo:
 * [ ] support IPv6
 * [ ] support HTTP 1.1
 */

#include "conf.h"

#include <stdio.h>
#ifdef STDC_HEADERS
#include <stdlib.h>
#include <string.h>
#else
char *strdup ();
#endif
#include <ctype.h>
#include <sys/types.h>
#include <sys/socket.h>
#include <netinet/in.h>
#include <arpa/inet.h>
#include <netdb.h>

#include "field.h"
#include "httpget.h"
#include "str.h"

#ifndef HAVE_MEMCPY
#  define memcpy(d, s, n) bcopy ((s), (d), (n))
#  define memmove(d, s, n) bcopy ((s), (d), (n))
#endif

#ifndef HAVE_STRDUP
char *strdup ();
#endif

/*
 * parse a URL into 'host', 'port' and 'tail'
 *
 * return 0 on success, -1 on failure
 */

int
http_parse_url (url, host, port, tail)
char *url;
struct string *host;
int *port;
struct string *tail;
{
    if (strncmp (url, "http://", 7) != 0)
	return -1;
    url += 7;
    if (*url == '/')
	return -1;
    str_zero (host);
    while (*url && *url != '/' && *url != ':')
	STR_APPEND (host, *url++);
    STR_APPEND (host, '\0');
    if (*url == ':') {
	++url;
	*port = 0;
	while (isdigit (*url)) 
	    *port = *port * 10 + *url++ - '0';
    }
    else
	*port = 80;
    str_zero (tail);
    if (*url == '\0') {
	STR_APPEND (tail, '/');
	STR_APPEND (tail, '\0');
	return 0;
    }
    if (*url != '/')
	return -1;
    while (*url)
	STR_APPEND (tail, *url++);
    STR_APPEND (tail, '\0');
    return 0;
}

/*
 * return a pointer to the first nonspace character
 */

static char *
firstnonspace (s)
char *s;
{
    if (s == NULL)
	return NULL;
    while (isspace (*s))
	++s;
    return s;
}

static FILE *dfp;

http_set_debug_output (fp)
FILE *fp;
{
    dfp = fp;
}

/*
 * send http headers supplied by caller
 */

static void
send_headers (fp, headers, prefix)
FILE *fp;
char **headers;
char *prefix;
{
    int i;

    if (headers == NULL)
	return;

    for (i = 0; headers[i] != NULL; ++i)
	fprintf (fp, "%s%s\r\n", prefix ? prefix : "", headers[i]);
}

/*
 * download a file using HTTP GET
 *
 * url is the URL to load
 * level is the recursion level (for redirects)
 * proxy is the URL of a local proxy (or NULL if not needed)
 * headers are extra request headers to send
 * callback gets called for each response header
 * context is passed as an argument for callback
 *
 * returns an open stream on success, or NULL on error
 */


FILE *
http_get_generic (url, level, proxy, headers, callback, context)
char *url;
int level;
char *proxy;
char **headers;
int (*callback)();
void *context;
{
    struct string host, tail;
    int port;
    struct hostent *hp;
    struct sockaddr_in addr;
    char buf[10240];		/* response header buffer */
    char *location = NULL;	/* Location: response header */
    int fd, fd2;
    FILE *ifp, *ofp;
    char *ptr;
    int naddrs;
    struct in_addr *addrs;
    int code;
    int i, j;

    if (dfp)
	fprintf (dfp, "## http_get_generic (%s, %d, %s)\n",
		 url, level, proxy ? proxy : "(null)");

    if (level > 10)
	return NULL;

    str_init (&host);
    str_init (&tail);

    /*
     * step 1: extract host, port, file
     */
    if (proxy) {
	if (http_parse_url (proxy, &host, &port, &tail) < 0)
	    return NULL;
    }
    else {
	if (http_parse_url (url, &host, &port, &tail) < 0)
	    return NULL;
    }

    /*
     * step 2: lookup host ip address
     * copy it to a private array so it won't get stomped on
     * during recursive calls
     */
    if ((hp = gethostbyname (host.base)) == NULL) {
	if (dfp)
	    fprintf (dfp, "## gethostbyname(%s) failed\n", host.base);
	return NULL;
    }
    for (i = 0; hp->h_addr_list[i] != NULL; ++i);
    naddrs = i;
    addrs = (struct in_addr *) malloc (naddrs * sizeof (addr.sin_addr));
    for (i = 0; i < naddrs; ++i)
	memcpy ((char *) &addrs[i], hp->h_addr_list[i],
		sizeof (struct in_addr));

    /*
     * try each host address in sequence
     */
    for (i = 0; i < naddrs; ++i) {
	/*
	 * step 3: open up connection to the host
	 *
	 * XXX need to support IPv6
	 */

	memcpy ((char *) &addr.sin_addr, (char *) &addrs[i],
		sizeof (addr.sin_addr));
	addr.sin_port = htons (port);
	if (dfp)
	    fprintf (dfp, "## trying to connect to %s:%d\n",
		     inet_ntoa (addr.sin_addr), ntohs (addr.sin_port));

	if ((fd = socket (AF_INET, SOCK_STREAM, 0)) < 0)
	    return NULL;
	addr.sin_family = AF_INET;

	if (connect (fd, (struct sockaddr *) &addr, sizeof (addr)) < 0)
	    continue;

	/*
	 * okay, this is mysterious
	 * open a read-only stdio stream that corresponds to the socket,
	 * so that we can read it using stdio buffered read routines
	 */
	if ((ifp = fdopen (fd, "r")) == NULL) {
	    close (fd);
	    return NULL;
	}
	/*
	 * now duplicate the fd and open up a separate stream
	 * for writing, so that we can write to it using stdio
	 * formatted print routines.
	 */
	if ((fd2 = dup (fd)) < 0) {
	    close (fd);
	    return NULL;
	}
	if ((ofp = fdopen (fd2, "w")) == NULL) {
	    fclose (ifp);
	    return NULL;
	}
	
	/*
	 * step 4: send request
	 *
	 * XXX need to handle %-encoding of URL?
	 * XXX need to support HTTP/1.1?
	 * XXX what other request header fields should we supply?
	 */
	if (proxy) {
	    fprintf (ofp, "GET %s HTTP/1.0\r\n", url);
	    send_headers (ofp, headers, NULL);
	    fprintf (ofp, "\r\n");
	    if (dfp) {
		fprintf (dfp, ">> GET %s HTTP/1.0\r\n", url);
		send_headers (dfp, headers, ">> ");
		fprintf (dfp, ">> \r\n");
	    }
	}
	else {
	    fprintf (ofp, "GET %s HTTP/1.0\r\n", tail.base);
	    fprintf (ofp, "Host: %s\r\n", host.base);
	    send_headers (ofp, headers, NULL);
	    fprintf (ofp, "\r\n");
	    if (dfp) {
		fprintf (dfp, ">> GET %s HTTP/1.0\r\n", tail.base);
		fprintf (dfp, ">> Host: %s\r\n", host.base);
		send_headers (dfp, headers, ">> ");
		fprintf (dfp, ">> \r\n");
	    }
	}
	fflush (ofp);

	/*
	 * step 5: parse response header
	 */
	if (fgets (buf, sizeof (buf), ifp) == NULL) {
	    if (dfp)
		fprintf (dfp, "premature EOF on HTTP stream\n");
	    return NULL;
	}
	if (dfp)
	    fprintf (dfp, "<< %s", buf);
	for (ptr = buf; !isspace (*ptr); ++ptr);
	while (isspace (*ptr))
	    ++ptr;
	if (!isdigit (*ptr) || !isdigit (ptr[1]) || !isdigit (ptr[2])) {
	    if (dfp)
		fprintf (dfp, "bad format for status line\n");
	    return NULL;
	}
	code = atoi (ptr);
	if (dfp)
	    fprintf (dfp, "## code = %d\n", code);
	if (callback)
	    (*callback) (buf, context);


	while (get_header_field ((unsigned char *) buf, sizeof (buf), ifp)) {
	    if (dfp)
		fprintf (dfp, "<< %s", buf);
	    if (isblankline (buf))
		break;
	    if (match_field_name ("location", (unsigned char *) buf)) {
		int len;
		location = strdup (firstnonspace (field_body ((unsigned char *) buf)));
		len = strlen (location);
		while (len > 0 && isspace (location[len-1]))
		    location[--len] = '\0';
		if (dfp)
		    fprintf (dfp, "## location = %s\n", location);
	    }
	    if (callback)
		(*callback)(buf, context);
	}
	    
	/*
	 * cleanup before doing response code processing
	 */
	fclose (ofp);
	str_free (&host);
	str_free (&tail);

	/*
	 * step 6: do something based on response code
	 */
	switch (code) {
	case 200:		/* OK */
	case 203:		/* Non-Authoritative Information */
	    if (dfp)
		fprintf (dfp, "## getting file\n");
	    if (location)
		free (location);
	    return ifp;

	case 300:		/* Multiple Choices */
#if 0
	    /*
	     * XXX not sure if anyone actually generates this -
	     * maybe implement it later.
	     * until then, treat as any other redirect
	     */
	    if (location)
		free (location);
	    return http_handle_multiple (ifp);
#endif

	case 301:		/* Moved Permanently */
	case 302:		/* Found */
	case 303:		/* See Other */
	case 307:		/* Temporary Redirect */
	    if (dfp)
		fprintf (dfp, "## processing redirect\n");
	    fclose (ifp);
	    if (location == NULL)
		return NULL;
	    ifp = http_get_generic (location, level+1, proxy,
				    headers, callback, context);
	    free (location);
	    return ifp;

	case 304:		/* Not Modified */
	    if (headers == NULL)
		goto server_failure;
	    for (j = 0; headers[j] != NULL; ++j) {
		if (match_field_name ("if-modified-since", headers[j])) {
		    if (dfp)
			fprintf (dfp, "## file not modified\n");
		    if (location)
			free (location);
		    return ifp;
		}
	    }
	    /*
	     * if we fall through to here, we're not doing conditional GET,
	     * so we shouldn't have seen a 304 response code.  treat
	     * this condition as a server failure.
	     */
	    goto server_failure;

	case 305:		/* Use Proxy */
	    if (dfp)
		fprintf (dfp, "## using proxy\n");
	    fclose (ifp);
	    if (location == NULL)
		return NULL;
	    ifp = http_get_generic (url, level+1, location,
				    headers, callback, context);
	    free (location);
	    return ifp;

    /*
     * re handling of error conditions:
     * some errors are treated as inaccessible resources
     * and cause an immediate failure of the GET operation
     */

	case 400:		/* Bad Request */
	    /* request syntax error  */
	case 401:		/* Unauthorized */
	    /* need authentication - not supported yet */
	case 402:		/* Payment Required */
	case 403:		/* Forbidden */
	case 404:		/* Not Found */
	case 405:		/* Method Not Allowed */
	case 407:		/* Proxy Authentication Required */
	    /* authentication not supported yet */
	case 409:		/* Conflict */
	case 410:		/* Gone */
	case 413:		/* Request Entity Too Large */
	    if (dfp)
		fprintf (dfp, "## failure code\n");
	    if (location)
		free (location);
	    fclose (ifp);
	    return NULL;

    /*
     * "shouldn't happen" errors are treated as server failures;
     * so we fail over to the next server address
     * (another server might be working just fine)
     */

	case 100:		/* continue */
	    /* shouldn't happen - we're not sending Expect: 100-continue */
	case 101:		/* switching protocols */
	    /* shouldn't happen - we're not sending Upgrade: */
	case 201:		/* Created */
	    /* shouldn't happen - GET should not create a resource */
	case 202:		/* Accepted */
	    /* shouldn't happen on a GET */
	case 204:		/* No Content */
	    /* shouldn't happen */
	case 205:		/* Reset Content */
	    /* shouldn't happen */
	case 206:		/* Partial Content */
	    /* shouldn't happen - we're not sending Range: requests */
	case 306:		/* Unused (no longer used) */
	    /* shouldn't happen ?? */
	case 406:		/* Not Acceptable */
	    /* shouldn't happen - we're not using Accept: */
	case 408:		/* Request Time-Out */
	    /* shouldn't happen - we issue requests immediately */
	case 411:		/* Length Required */
	    /* shouldn't happen on a GET? */
	case 412:		/* Precondition Failed */
	    /* shouldn't happen */
	case 414:		/* Request URI Too Large */
	    /* shouldn't happen */
	case 415:		/* Unsupported Media Type */
	    /* shouldn't happen */
	case 416:		/* Requested range not available */
	    /* shouldn't happen */
	case 417:		/* Expectation Failed */
	    /* shouldn't happen */
	case 500:		/* Internal Server Error */
	    /* shouldn't happen */
	case 501:		/* Not Implemented */
	    /* shouldn't happen */
	case 502:		/* Bad Gateway */
	    /* shouldn't happen */
	case 503:		/* Service Unavailable */
	    /* shouldn't happen */
	case 504:		/* Gateway Time-Out */
	    /* shouldn't happen */
	case 505:		/* HTTP Version not supported */
	    /* shouldn't happen */
	default:
	    ;
	}
	server_failure:
	    if (dfp)
		fprintf (dfp, "## treating as server failure\n");
	    if (location) {
		free (location);
		location = NULL;
	    }
	    fclose (ifp);
	    continue;	    
    }
    /*
     * no server hosts were reachable, or all returned server errors
     */
    return NULL;
}

/*
 * download a file using HTTP (simple version)
 */

FILE *
http_get_internal (url, level, proxy)
char *url;
int level;
char *proxy;
{
    return http_get_generic (url, level, proxy,
			     (char **) NULL, (int (*)()) NULL, (void *) NULL);
}

/*
 * callback used for http_get_if_modified_since()
 */

struct http_get_if_modified_since_context {
    int code;
    char *date;
};

static int
http_get_if_modified_since_cb (char *buf, void *vctxt)
{
    struct http_get_if_modified_since_context *context =
	(struct http_get_if_modified_since_context *) vctxt;

    if (strncasecmp (buf, "http/", 5) == 0) {
	char *space = strchr (buf, ' ');

	if (space)
	    context->code = atoi (space+1);
    }
    else if (match_field_name ("last-modified", buf))
	context->date = strdup (field_body (buf));
    return 0;
}

/*
 * get a file using HTTP only if it was modified since 'datestr'
 *
 * return:
 * - HTTP_SUCCESS if the download was successful,
 * - HTTP_NOT_MODIFIED if the file had not changed
 * - HTTP_ERROR on error
 */

int
http_get_if_modified_since (char *dest_filename, char *source_base_uri,
			    char *datestr, char *newdatestr)
{
    FILE *out, *in;
    char *headers[2];
    char buf[1024];
    struct http_get_if_modified_since_context context;
    int nread;

    if ((out = fopen (dest_filename, "w")) == NULL)
	return HTTP_ERROR;
    if (datestr)
	sprintf (buf, "If-Modified-Since: %s", datestr);
    headers[0] = buf;
    headers[1] = NULL;
    if ((in = http_get_generic (source_base_uri, NULL, NULL,
				datestr ? headers : NULL,
				http_get_if_modified_since_cb,
				(void *) &context)) == NULL) {
	fclose (out);
	unlink (dest_filename);
	return -1;
    }
    while ((nread = fread (buf, 1, sizeof (buf), in)) > 0) {
	fwrite (buf, 1, nread, out);
    }
    fclose (out);
    fclose (in);
    if (context.code / 100 == 2) {
	if (newdatestr)
	    strcpy (newdatestr, context.date);
	return HTTP_SUCCESS;
    }
    if (context.code == 304)
	return HTTP_NOT_MODIFIED;
    else
	return HTTP_ERROR;
}

#ifdef TEST
#include <sys/stat.h>
#include "arpadate.h"

time_t
modtime (char *fname)
{
    struct stat buf;

    if (stat (fname, &buf) < 0)
	return 0;
    return buf.st_mtime;
}

int verbosity = 1;

main (argc, argv)
int argc;
char **argv;
{
    int c;
    char newdatestr[1024];
    time_t x;
    int result;

    dfp = stderr;
    x = modtime (argv[2]);
    if ((result = http_get_if_modified_since (argv[2], argv[1],
					      x == 0 ? NULL : arpadate (&x),
					      newdatestr)) < 0) {
	fprintf (stderr, "http get failed\n");
	exit (1);
    }
    printf ("result is %d\n", result);
    printf ("new date is %s\n", newdatestr);
}
#endif
