/*
 * tar reading routines
 *
 * modified for netbuild to:
 * - compute MD5 on-the-fly for each file
 * - allow extraction into a different directory
 * - return a list of files extracted
 * - call zlib for input
 *
 * error reporting: function return values
 * error messages: fprintf to stderr
 *
 * NB: these routines for reading tar files are intended to work only
 * on disk files and pipes.  in particular, they will not work on
 * tape devices that cannot read arbitrary numbers of bytes at a time.
 *
 * XXX remove netbuild dependencies: concat, misc.h
 * XXX generalize MD5 handling (via callback?)
 * XXX remove zlib dependencies - instead, pass a file handle and a
 *     function to do reading
 */

#include <stdio.h>
#include <stdlib.h>
#include <errno.h>
#include <fcntl.h>
#include <string.h>
#include <zlib.h>
#include "concat.h"
#include "md5.h"
#include "misc.h"
#include "tar.h"
#include "tarlib.h"

/*
 * array full of zeros for comparison against tar header blocks -
 * two blocks of all zeros indicates the end of the archive
 */

static int zeros[512] = { 0 } ;

/* convert an ASCII octal number to binary notation */

static unsigned long
o2b (char *s)
{
    long result = 0;

    while (*s == ' ')
	++s;
    while (*s) {
	if (*s == ' ')
	    break;
	if (isdigit (*s))
	    result = result * 8 + (*s++ - '0');
    }
    return result;
}

/*
 * verify that the checksum of a tar header is valid.  the checksum
 * is a twos complement sum of all of the bytes in the header,
 * with the checksum field filled with spaces.  it's expected to
 * match the ASCII octal value stored in the checksum field
 */

static unsigned int
tar_cksum (unsigned char *buf)
{
    unsigned int sum;
    unsigned int pktsum;
    int i;
    unsigned char mybuf[512];

    pktsum = (unsigned int) o2b (((struct tar_hdr *) buf)->chksum);

    memcpy (mybuf, buf, sizeof (mybuf));
    memcpy (((struct tar_hdr *) mybuf)->chksum, "        ", 8);
    sum = 0;
    for (i = 0; i < 512; ++i)
	sum += (mybuf[i] & 0xff);
    return sum == pktsum ? 0 : -1;
}

/*
 * read and decode a tar file header.
 * return -1 on any sort of read error,
 *        -2 if the checksum wasn't valid,
 *         0 if the block was all zeros,
 *          1 if the block was valid
 */

static int
tar_rd_hdr (gzFile gfd, struct tar_mem *tarmem)
{
    unsigned char buf[512];
    int x;
    struct tar_hdr *tar = (struct tar_hdr *) buf;

    x = gzread (gfd, buf, sizeof (buf));
    if (x == 0)
	return 0;
    if (x != sizeof (buf))
	return -1;
    if (memcmp (buf, zeros, sizeof (buf)) == 0)
	return 0;
    if (tar_cksum (buf) != 0)
	return -2;
    if (memcmp (tar->magic, TMAGIC, TMAGLEN) == 0) {
	memcpy (tarmem->name, tar->prefix, sizeof (tar->prefix));
	strcat (tarmem->name, tar->name);
    }
    else {
	strcpy (tarmem->name, tar->name);
    }
    tarmem->mode = (mode_t) o2b (tar->mode);
    tarmem->uid = (uid_t) o2b (tar->uid);
    tarmem->gid = (gid_t) o2b (tar->gid);
    tarmem->size = (off_t) o2b (tar->size);
    tarmem->mtime = (time_t) o2b (tar->mtime);
    tarmem->typeflag = tar->typeflag;
    strcpy (tarmem->linkname, tar->linkname);
    return 1;
}

#ifndef MIN
#define MIN(a,b) ((a) < (b) ? (a) : (b))
#endif

/*
 * skip 'skip_size' bytes in the input by reading them and discarding
 * them.  this works even on a pipe, or with calls to gzread().
 */

static int
tar_skip_pipe (gzFile gfd, off_t skip_size)
{
    char buf[512*20];
    int size2read;
    int nread;

    while (skip_size > 0) {
	size2read = MIN (skip_size, sizeof (buf));
	if ((nread = gzread (gfd, buf, size2read)) > 0)
	    skip_size -= nread;
	else 
	    return EOF;
    }
}

/*
 * skip 'size' bytes in the input.  this is used to skip over files
 * that we don't want to extract.
 *
 * XXX untested (and seeking doesn't work with gz* routines)
 * so we just call tar_skip_pipe
 */

static int
tar_skip (gzFile gfd, off_t size)
{
#if 0
    off_t cur = lseek (fd, (off_t) 0L, SEEK_CUR);
    off_t skip_size = (size + 511) & ~511;
    off_t new = lseek (fd, (off_t) skip_size, SEEK_CUR);
    if (new < 0 && errno == ESPIPE)
	return tar_skip_pipe (fd, skip_size);
    if (new < 0)
	return -1;
    if (new != cur + skip_size)
	return -1;
    return 0;
#else
    off_t skip_size;

    if ((size % 512) == 0)
	skip_size = size;
    else
	skip_size = size + 512 - (size % 512);

    return tar_skip_pipe (gfd, skip_size);
#endif
}

/*
 * extract the current file from the tar archive.  'memhdr' must
 * point to the most-recently-read decoded header block,
 * and the file pointer 'fd' must not have been moved (read or seeked)
 * since that header block was read.
 *
 * if dir is non-NULL, file is extracted into 'dir' rather than cwd
 * if md5 is non-NULL, md5 fingerprint of the extracted file is stored in 'md5'
 *
 * the caller must set umask to 0 before calling this routine if
 * it is desired to restore the original file permissions.
 */

static int
tar_extract (gzFile gfd, struct tar_mem *memhdr, char *dir, unsigned char *md5)
{
    char buf[512*20];
    off_t size = memhdr->size;
    int size2read;
    int nread;
    int ofd;
    MD5_CTX c;
    char *filename;

    if (dir)
	filename = concat (dir, "/", memhdr->name, NULL);
    else
	filename = strdup (dir);

    if ((ofd = open (filename, O_WRONLY|O_CREAT|O_TRUNC,
		     memhdr->mode & 0777)) < 0) {
	fprintf (stderr, "tar_extract: open(%s): %s\n",
		 filename, strerror (errno));
	return EOF;
    }
    free (filename);

    if (md5)
	MD5Init (&c);
    while (size > 0) {
	size2read = MIN (size, sizeof (buf));
	if ((nread = gzread (gfd, buf, size2read)) > 0) {
	    if (write (ofd, buf, nread) < 0)
		return EOF;
	    if (md5)
		MD5Update (&c, buf, (unsigned long) nread);
	    size -= nread;
	}
	else 
	    return EOF;
    }
    if (close (ofd) < 0)
	return EOF;
    if (md5)
	MD5Final (md5, &c);

    if (memhdr->size % 512 != 0)
	gzread (gfd, buf, 512 - (memhdr->size % 512));
    return 0;
}

/*
 * read a tar file.  call 'cb()' for each file encountered,
 * passing the decoded header of that file and 'param'.
 * if cb() returns 1, extract the file.
 */

struct file_md5 *
tar_rd (int fd, char *dir, int (*cb)(), void *param)
{
    int x;
    struct tar_mem memhdr;
    int last_hdr_was_zeros = 0;
    struct file_md5 *file_list = NULL;
    gzFile gfd;

    gfd = gzdopen (fd, "rb");

#if 0
    lseek (fd, (off_t) 0l, SEEK_SET);
#endif

    do {
	switch (tar_rd_hdr (gfd, &memhdr)) {
	case -2:		/* invalid header */
	    last_hdr_was_zeros = 0;
	    continue;
	case -1:		/* error or (premature) end of file */
	    return NULL;
	case 0:			/* all zeros */
	    if (last_hdr_was_zeros == 1)
		return file_list;
	    last_hdr_was_zeros = 1;
	    break;
	default:		/* valid header */
	    last_hdr_was_zeros = 0;
	    if ((x = cb (&memhdr, param)) < 0)
		return NULL;
	    else if (x) {
		struct file_md5 *newfile;

		newfile = (struct file_md5 *)
		    malloc_or_else (sizeof (struct file_md5));
		newfile->filename = strdup (memhdr.name);

		if (tar_extract (gfd, &memhdr, dir, newfile->md5) < 0)
		    return NULL;

		newfile->next = file_list;
		file_list = newfile;
	    }
	    else {
		if (tar_skip (gfd, memhdr.size) < 0)
		    return NULL;
	    }
	}
    } while (1);
}
