/*
 * routines for maintaining a cache of HTTP-accessible resources
 *
 * error reporting: function return values
 * error messages: print to stderr
 *
 * dependencies: most of netbuild
 *
 * todo:
 * [ ] pass verbosity as an extra parameter is handled
 */

#include "conf.h"

#if HAVE_LIBGEN_H
#include <libgen.h>
#endif
#include <stdio.h>
#include <stdlib.h>
#include <sys/types.h>
#include <string.h>
#include <time.h>
#include <unistd.h>

#include "basename.h"
#include "cache.h"
#include "field.h"
#include "global.h"
#include "httpget.h"
#include "md5.h"
#include "uri.h"
#include "pathnames.h"


/*
 * find the first non-space character
 */

static unsigned char *
firstnonblank (unsigned char *s)
{
    while (isspace (*s))
	++s;
    return s;
}

/*
 * get the contents of the last-modified field
 * from 822-style fields stored in 'filename'
 *
 * NB: result is malloc'ed
 */

static char *
get_date_from_file (char *filename)
{
    FILE *fp;
    char buf[1024];
    char *result;

    if ((fp = fopen (filename, "r")) == NULL)
	return NULL;
    while (get_header_field (buf, sizeof (buf), fp) != NULL) {
	if (match_field_name ("last-modified", buf)) {
	    int len;

	    fclose (fp);
	    result = strdup (firstnonblank (field_body (buf)));
	    len = strlen (result);
	    if (len > 0 && result[len-1] == '\n')
		result[--len] = '\0';
	    if (len > 0 && result[len-1] == '\r')
		result[--len] = '\0';
	    return result;
	}
    }
    fclose (fp);
    return NULL;
}

/*
 * generate a hash of a URI that consists of only 'safe' filename characters 
 * different spellings of equivalent URIs should produce the same hash 
 *
 * XXX hex representation of the hash is not space-efficient but it works 
 * on OSes where filenames are case-independent.
 *
 * NB: return value is static and overwritten on subsequent calls
 */

char *
hash_uri (char *uri)
{
    char *curi;
    int length;
    MD5_CTX foo;
    unsigned char md5result[16];
    static char result[16 * 2 + 1];
    char *ptr;
    int i;
    static char hex[]= "0123456789abcdef";

    curi = canonicalize_uri (uri);
    length = strlen (curi);
    MD5Init (&foo);
    MD5Update (&foo, curi, length);
    MD5Final (md5result, &foo);
    free (curi);

    ptr = result;
    for (i = 0; i < 16; ++i) {
	*ptr++ = hex[(md5result[i] >> 4) & 0xf];
	*ptr++ = hex[md5result[i] & 0xf];
    }
    *ptr = '\0';
    return result;
}


/*
 * given a URI, determine the directory where it will be cached
 * (multiple files will be stored in that directory, not just
 * the resource associated with the URI but also extracted
 * metadata, signatures, etc.)
 *
 * NB: result is statically allocated and will be overwritten on
 * subsequent calls
 */

static char *
cached_directory_name (char *uri) 
{
    static char result[1024];
    char *hash = hash_uri (uri);

    sprintf (result, "%s/.netbuild/cache/%2.2s/%s",
	     getenv ("HOME"), hash, hash);
    return result;
}

/*
 * given a filename or uri, return its suffix
 * i.e. if the filename/uri contains a '.' in the last component,
 * return the first '.' and everything after it
 *
 * return null string if there is no suffix
 *
 * XXX duplicated elsewhere?
 */

static char *
get_suffix (char *uri)
{
    char *bn, *dot;

    bn = basename (uri);
    if (bn == NULL)
	return "";

    dot = strchr (bn, '.');
    return dot ? dot : "";
}


extern int verbosity;

/*
 * download a file from the net.  
 * if we already have it in cache, don't bother downloading it again.
 *
 * return the name of the downloaded copy of the file.
 * NB: result is malloc'ed.
 */

char *
download_file_via_cache (char *uri, char *libname)
{
    char *dirname;
    char fn_file[1024];
    char fn_library[1024];
    char fn_last_mod_date[1024];
    char fn_tempfile[1024];
    char buf[1024];
    char newdatestr[1024];
    char hostname[1024];
    char tempdirname[1024];
    char *datestr;
    time_t bindate;
    struct tm *tm;
    char *base_uri;
    char *fragment;
    char *ptr;
    char *suffix;
    FILE *fp;

    if (verbosity > 0)
	fprintf (stderr, "[download_file_via_cache (%s, %s)]\n", uri, libname);

    /*
     * separate URI into base and fragment portions
     */

    if (ptr = strrchr (uri, '#')) {
	*ptr = '\0';
	base_uri = strdup (uri);
	fragment = strdup (ptr + 1);
        suffix = get_suffix (fragment);
	*ptr = '#';
    }
    else {
	base_uri = strdup (uri);
	fragment = strdup ("");
	suffix = get_suffix (base_uri);
    }

    /* 
     * figure out where the downloaded file will be stored
     */
    dirname = cached_directory_name (uri);
    sprintf (fn_file, "%s/file%s", dirname, suffix);
    sprintf (fn_last_mod_date, "%s/last-mod-date", dirname);

    /* 
     * concoct a temp file name in $HOME/.netbuild/temp
     * to be used to hold the file during download
     *
     * create the user's netbuild temp directory if necessary
     */
    time (&bindate);
    tm = gmtime (&bindate);
    gethostname (hostname, sizeof (hostname));
    sprintf (tempdirname, "%s/.netbuild/temp", getenv ("HOME"));
    sprintf (fn_tempfile, "%s/%s%04d%02d%02d%02d%02d%02d.%d",
	     tempdirname, hostname, tm->tm_year + 1900,
	     tm->tm_mon+1, tm->tm_mday, tm->tm_hour, tm->tm_min,
	     tm->tm_sec, getpid ());

    if (access (tempdirname, X_OK) < 0)
	mkdir_recursive (tempdirname, 0700);

    /*
     * if the file itself isn't present, it doesn't matter
     * whether the metadata has the right date or not
     */
    if (access (fn_file, R_OK) < 0)
	datestr = NULL;
    else
        datestr = get_date_from_file (fn_last_mod_date);

    if (datestr) {
	if (verbosity > 0)
        	fprintf (stderr, "[cached %s last modified %s]\n", 
			 base_uri, datestr);
    }
    else {
	if (verbosity > 0) 
        	fprintf (stderr, "[%s: not in cache]\n", base_uri);
	mkdir_recursive (dirname, 0700);
	datestr = NULL;
    }

    /*
     * download and extract only if cached version is stale
     *
     * 'datestr' is the last modified date of the cached file
     * 'newdatestr' gets the last modified date of the new file
     */
    switch (http_get_if_modified_since (fn_tempfile, base_uri, datestr,
					newdatestr)) {
    case HTTP_SUCCESS:
	break;
    case HTTP_NOT_MODIFIED:
	if (verbosity > 0) {
	    fprintf (stderr, "[%s unchanged; using cached version]\n",
		     base_uri);
	}
	goto use_cached_library;
    case HTTP_TEMPFAIL:
	if (verbosity > 0)
	    fprintf (stderr, "[couldn't verify current date of %s]\n",
		     base_uri);
	if (access (fn_library, R_OK) == 0) {
	    if (verbosity > 0) 
		fprintf (stderr, "[using previously cached copy]\n");
	    goto use_cached_library;
	}
	else
	    return NULL;
    case HTTP_ERROR:
	if (verbosity > 0)
	    fprintf (stderr, "[cannot download %s]\n", base_uri);
	return NULL;
    }    

    /*
     * move downloaded file to new location
     *
     * XXX change this to use copyfile()
     */
    if (access (dirname, W_OK|X_OK) < 0) 
 	mkdir_recursive (dirname, 0711);
    sprintf (buf, "mv '%s' '%s'", fn_tempfile, fn_file);
    if (system (buf) != 0) {
	if (verbosity > 0)
	    fprintf (stderr, "[error copying downloaded file %s to cache]",
		     fn_tempfile);
	unlink (fn_tempfile);
	unlink (fn_file);
	unlink (fn_last_mod_date);
	return NULL;
    }

    /*
     * update metadata
     */
    if (*newdatestr && (fp = fopen (fn_last_mod_date, "w"))) {
	fprintf (fp, "Last-Modified: %s\n", newdatestr);
	fclose (fp);
    }
    else {
	unlink (fn_last_mod_date);
    }

 use_cached_library:
    unlink (fn_tempfile);
    return strdup (fn_file);
}

#ifdef TEST
int verbosity = 1;

main (int argc, char **argv)
{
    char *result;

#if 1
    result = download_file_via_cache (argv[1], argv[2]);
#else
    result = hash_uri (argv[1]);
#endif
    fprintf (stderr, "%s\n", result);
}
#endif
