/*
 * routines for getting information about the cpu of the link platform
 *
 * error reporting: function return values
 * error messages: none
 *
 * dependencies: netbuild conf.h, symtab
 *
 * todo: expand to other CPUs besides ia32
 */

#include <stdio.h>
#include <sys/types.h>

#include "conf.h"
#include "cpu.h"
#include "symtab.h"

#ifdef HAVE_SYS_SYSCTL_H
#include <sys/sysctl.h>
#endif

#ifdef CPU_alpha
void
cpu_init ()
{
    /* cpu_init_sysctl (); */
    symtab_set_value_str ("target.cpu.arch", "alpha");
}
#endif

#ifdef CPU_ia32
/*
 * routines for getting specific attributes of ia32 processors
 *
 * XXX currently this module requires gcc
 */


#include <setjmp.h>
#include <signal.h>

#define EAX 0
#define EBX 1
#define ECX 2
#define EDX 3

/*
 * this routine cribbed from NetBSD/i386-current as of 19 Jan 2001
 * 
 * Copyright (c) 1996, 1997, 1998, 2000 The NetBSD Foundation, Inc.
 * All rights reserved.
 *
 * This code is derived from software contributed to The NetBSD Foundation
 * by Charles M. Hannum and by Jason R. Thorpe of the Numerical Aerospace
 * Simulation Facility, NASA Ames Research Center.
 *
 * Redistribution and use in source and binary forms, with or without
 * modification, are permitted provided that the following conditions
 * are met:
 * 1. Redistributions of source code must retain the above copyright
 *    notice, this list of conditions and the following disclaimer.
 * 2. Redistributions in binary form must reproduce the above copyright
 *    notice, this list of conditions and the following disclaimer in the
 *    documentation and/or other materials provided with the distribution.
 * 3. All advertising materials mentioning features or use of this software
 *    must display the following acknowledgement:
 *      This product includes software developed by the NetBSD
 *      Foundation, Inc. and its contributors.
 * 4. Neither the name of The NetBSD Foundation nor the names of its
 *    contributors may be used to endorse or promote products derived
 *    from this software without specific prior written permission.
 *
 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
 * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
 * POSSIBILITY OF SUCH DAMAGE.
 */

static void
do_cpuid (unsigned int which, unsigned int *regs)
{
        register unsigned int eax __asm("%eax") = which;

        __asm __volatile(
        "       cpuid                   ;"
        "       movl    %%eax,0(%2)     ;"
        "       movl    %%ebx,4(%2)     ;"
        "       movl    %%ecx,8(%2)     ;"
        "       movl    %%edx,12(%2)    ;"
        : "=a" (eax)
        : "0" (eax), "S" (regs)
        : "ebx", "ecx", "edx");
}

/*
 * return an indication of CPU type
 * 3 if the processor is a 386
 * 4 if the processor is a 486 that doesn't support CPUID
 * 5 if the processor is a 486 or better that does support CPUID
 *
 * XXX requires gcc
 */

static int
primitive_cpu_type ()
{
	static int cpu_type = 0;

	if (cpu_type != 0)
	    return cpu_type;

	__asm __volatile(
	"	pushf			; " /* push EFLAGS */
	"	popl %%eax		; " /* get EFLAGS in eax */
	"	movl %%eax, %%ecx	; " /* save original EFLAGS */
	"	xorl $0x4000, %%eax	; " /* toggle AC bit */
	"	pushl %%eax		; " /* push modified EFLAGS */
	"	popf			; " /* attempt to modify EFLAGS */
	"	pushf			; " /* duplicate new EFLAGS */
	"	popl %%eax		; " /* pop new EFLAGS into eax */
	"	xorl %%eax,%%ecx	; " /* see if EFLAGS was modified */
	"	movl $3,%0		; " /* store 3 in cpu_type */
	"	jz 0f			; " /* if EFLAGS was unchanged, done */
	"	pushl %%ecx		; " /* restore old EFLAGS */
	"	popf			; " /* pop EFLAGS into eax */
	"	movl $4,%0		; " /* store 4 in cpu_type */
	"	movl %%eax, %%ecx	; " /* save EFLAGS in ecx */
	"	xorl $0x200000, %%eax	; " /* toggle ID bit */
	"	pushl %%eax		; " /* push modified EFLAGS */
	"	popf			; " /* attempt to pop into EFLAGS */
	"	pushf			; " /* now get EFLAGS in eax */
	"	popl %%eax		; "
	"	xorl %%eax, %%ecx	; " /* see if they're unchanged */
	"	je 0f			; " /* if so, done */ 
	"	movl $5,%0		; " /* store 5 in cpu_type */
	"	pushl %%ecx		; " /* restore old EFLAGS */
	"	popf			; "
	"0:				  "
	: "=m" (cpu_type)		/* output operands */
	: 				/* input operands (none) */
	: "eax", "ecx"			/* clobber registers */
	);

	return cpu_type;
}

/* 
 * end NetBSD-copyrighted stuff 
 */

/*
 * return the vendor string if it can be found from the CPUID instruction,
 * else "unknown";
 */

static char *
ia32_vendor_str ()
{
    static char result[13];
    unsigned int regs[4];

    if (primitive_cpu_type() <= 4)
	return "unknown";
    do_cpuid (0, regs);
    sprintf (result, "%.4s%.4s%.4s", &regs[EBX], &regs[EDX], &regs[ECX]);
    return result;
}

/*
 * process a cache descriptor on an Intel IA-32 family processor,
 * setting appropriate variables.
 *
 * XXX stuff to rationalize:
 * - unified cache versus separate I&D caches
 * - how to report size for segmented caches
 */

static void
intel_process_cache_descriptor (int foo, int family)
{
    switch (foo) {
    case 0x01:
	/* instruction TLB: 4K-byte pages, 4-way, 32 entries */
	break;
    case 0x02:
	/* instruction TLB: 4M-byte pages, 4-way, 2 entries */
	break;
    case 0x03:
	/* data TLB: 4K-byte pages, 4-way, 64 entries */
	break;
    case 0x04:
	/* data TLB: 4M-byte pages, 4-way, 8 entries */
	break;
    case 0x06:
	/* L1 icache: 8K bytes, 4-way, 32 byte line size */
	symtab_set_value_int ("target.cpu.ia32.l1.icache.size", 8*1024);
	symtab_set_value_int ("target.cpu.ia32.l1.icache.way", 4);
	symtab_set_value_int ("target.cpu.ia32.l1.icache.linesize", 32);
	break;
    case 0x08:
	/* L1 icache: 16K bytes, 4-way, 32 byte line size */
	symtab_set_value_int ("target.cpu.ia32.l1.icache.size", 16*1024);
	symtab_set_value_int ("target.cpu.ia32.l1.icache.way", 4);
	symtab_set_value_int ("target.cpu.ia32.l1.icache.linesize", 32);
	break;
    case 0x0A:
	/* L1 dcache: 8K bytes, 2-way, 32 byte line size */
	symtab_set_value_int ("target.cpu.ia32.l1.dcache.size", 8*1024);
	symtab_set_value_int ("target.cpu.ia32.l1.dcache.way", 2);
	symtab_set_value_int ("target.cpu.ia32.l1.dcache.linesize", 32);
	break;
    case 0x0c:
	/* L1 dcache: 16K bytes, 4-way, 32 byte line size */
	symtab_set_value_int ("target.cpu.ia32.l1.dcache.size", 16*1024);
	symtab_set_value_int ("target.cpu.ia32.l1.dcache.way", 4);
	symtab_set_value_int ("target.cpu.ia32.l1.dcache.linesize", 32);
	break;
    case 0x22:
	/* L3 cache: 512K bytes, 4-way, 64 byte line size */
	symtab_set_value_int ("target.cpu.ia32.l3.dcache.size", 512*1024);
	symtab_set_value_int ("target.cpu.ia32.l3.dcache.way", 4);
	symtab_set_value_int ("target.cpu.ia32.l3.dcache.linesize", 64);
	break;
    case 0x23:
	/* L3 cache: 1M bytes, 8-way, 64 byte line size */
	symtab_set_value_int ("target.cpu.ia32.l3.dcache.size", 1024*1024);
	symtab_set_value_int ("target.cpu.ia32.l3.dcache.way", 8);
	symtab_set_value_int ("target.cpu.ia32.l3.dcache.linesize", 64);
	break;
    case 0x25:
	/* L3 cache: 2M bytes, 8-way, 64 byte line size */
	symtab_set_value_int ("target.cpu.ia32.l3.dcache.size", 2*1024*1024);
	symtab_set_value_int ("target.cpu.ia32.l3.dcache.way", 8);
	symtab_set_value_int ("target.cpu.ia32.l3.dcache.linesize", 64);
	break;
    case 0x29:
	/* L3 cache: 4M bytes, 8-way, 64 byte line size */
	symtab_set_value_int ("target.cpu.ia32.l3.dcache.size", 4*1024*1024);
	symtab_set_value_int ("target.cpu.ia32.l3.dcache.way", 8);
	symtab_set_value_int ("target.cpu.ia32.l3.dcache.linesize", 64);
	break;
    case 0x40:
	if (family >= 15)
	    symtab_set_value_int ("target.cpu.ia32.l3.nocache", 1);
	else
	    symtab_set_value_int ("target.cpu.ia32.l2.nocache", 1);
	break;
    case 0x41:
	/* L2 cache: 128K bytes, 4-way, 32 byte line size */
	symtab_set_value_int ("target.cpu.ia32.l2.dcache.linesize", 32);
	symtab_set_value_int ("target.cpu.ia32.l2.dcache.way", 4);
	symtab_set_value_int ("target.cpu.ia32.l2.dcache.size", 128*1024);
	break;
    case 0x42:
	/* L2 cache: 256K bytes, 4-way, 32 byte line size */
	symtab_set_value_int ("target.cpu.ia32.l2.dcache.linesize", 32);
	symtab_set_value_int ("target.cpu.ia32.l2.dcache.way", 4);
	symtab_set_value_int ("target.cpu.ia32.l2.dcache.size", 256*1024);
	break;
    case 0x43:
	/* L2 cache: 512K bytes, 4-way, 32 byte line size */
	symtab_set_value_int ("target.cpu.ia32.l2.dcache.linesize", 32);
	symtab_set_value_int ("target.cpu.ia32.l2.dcache.way", 4);
	symtab_set_value_int ("target.cpu.ia32.l2.dcache.size", 512*1024);
	break;
    case 0x44:
	/* L2 cache: 1M bytes, 4-way, 32 byte line size */
	symtab_set_value_int ("target.cpu.ia32.l2.dcache.linesize", 32);
	symtab_set_value_int ("target.cpu.ia32.l2.dcache.way", 4);
	symtab_set_value_int ("target.cpu.ia32.l2.dcache.size", 1024*1024);
	break;
    case 0x45:
	/* L2 cache: 2M bytes, 4-way, 32 byte line size */
	symtab_set_value_int ("target.cpu.ia32.l2.dcache.linesize", 32);
	symtab_set_value_int ("target.cpu.ia32.l2.dcache.way", 4);
	symtab_set_value_int ("target.cpu.ia32.l2.dcache.size", 2*1024*1024);
	break;
    case 0x50:
	/* instruction TLB: 4K-byte pages, and 2/4M-byte pages, 64 entries */
	break;
    case 0x51:
	/* instruction TLB: 4K-byte pages, and 2/4M-byte pages, 128 entries */
	break;
    case 0x52:
	/* instruction TLB: 4K-byte pages, and 2/4M-byte pages, 128 entries */
	break;
    case 0x5B:
	/* data TLB: 4K-byte and 4M-byte pages, 64 entries */
	break;
    case 0x5C:
	/* data TLB: 4K-byte and 4M-byte pages, 128 entries */
	break;
    case 0x5D:
	/* data TLB: 4K-byte and 4M-byte pages, 256 entries */
	break;
    case 0x66:
	/* L1 data cache: 8Kb, 4-way, 64 byte line size */
	symtab_set_value_int ("target.cpu.ia32.l1.dcache.linesize", 64);
	symtab_set_value_int ("target.cpu.ia32.l1.dcache.way", 4);
	symtab_set_value_int ("target.cpu.ia32.l1.dcache.entries", 8*1024);
	symtab_set_value_int ("target.cpu.ia32.l1.dcache.sectored", 1);
	break;
    case 0x67:
	/* L1 data cache: 16Kb, 4-way, 64 byte line size */
	symtab_set_value_int ("target.cpu.ia32.l1.dcache.linesize", 64);
	symtab_set_value_int ("target.cpu.ia32.l1.dcache.way", 4);
	symtab_set_value_int ("target.cpu.ia32.l1.dcache.entries", 16*1024);
	symtab_set_value_int ("target.cpu.ia32.l1.dcache.sectored", 1);
	break;
    case 0x68:
	/* L1 data cache: 32Kb, 4-way, 64 byte line size */
	symtab_set_value_int ("target.cpu.ia32.l1.dcache.linesize", 64);
	symtab_set_value_int ("target.cpu.ia32.l1.dcache.way", 4);
	symtab_set_value_int ("target.cpu.ia32.l1.dcache.entries", 32*1024);
	symtab_set_value_int ("target.cpu.ia32.l1.dcache.sectored", 1);
	break;
    case 0x70:
	/* trace cache: 12k uops, 8-way */
	break;
    case 0x71:
	/* trace cache: 24k uops, 8-way */
	break;
    case 0x72:
	/* trace cache: 32k uops, 8-way */
	break;
    case 0x79:
	/* L2 cache: 128Kb, 8-way, sectored, 64-byte line size */
	symtab_set_value_int ("target.cpu.ia32.l2.dcache.sectored", 1);
	symtab_set_value_int ("target.cpu.ia32.l2.dcache.linesize", 64);
	symtab_set_value_int ("target.cpu.ia32.l2.dcache.way", 8);
	symtab_set_value_int ("target.cpu.ia32.l2.dcache.size", 128*1024);
	break;
    case 0x7A:
	/* L2 cache: 256Kb, 8-way, sectored, 64-byte line size */
	symtab_set_value_int ("target.cpu.ia32.l2.dcache.sectored", 1);
	symtab_set_value_int ("target.cpu.ia32.l2.dcache.linesize", 64);
	symtab_set_value_int ("target.cpu.ia32.l2.dcache.way", 8);
	symtab_set_value_int ("target.cpu.ia32.l2.dcache.size", 256*1024);
	break;
    case 0x7B:
	/* L2 cache: 512Kb, 8-way, sectored, 64-byte line size */
	symtab_set_value_int ("target.cpu.ia32.l2.dcache.sectored", 1);
	symtab_set_value_int ("target.cpu.ia32.l2.dcache.linesize", 64);
	symtab_set_value_int ("target.cpu.ia32.l2.dcache.way", 8);
	symtab_set_value_int ("target.cpu.ia32.l2.dcache.size", 512*1024);
	break;
    case 0x7C:
	/* L2 cache: 1Mb, 8-way, sectored, 64-byte line size */
	symtab_set_value_int ("target.cpu.ia32.l2.dcache.sectored", 1);
	symtab_set_value_int ("target.cpu.ia32.l2.dcache.linesize", 64);
	symtab_set_value_int ("target.cpu.ia32.l2.dcache.way", 8);
	symtab_set_value_int ("target.cpu.ia32.l2.dcache.size", 1024*1024);
	break;
    case 0x82:
	/* L2 cache: 256K, 8-way, 32 byte line size */
	symtab_set_value_int ("target.cpu.ia32.l2.dcache.linesize", 32);
	symtab_set_value_int ("target.cpu.ia32.l2.dcache.way", 8);
	symtab_set_value_int ("target.cpu.ia32.l2.dcache.size", 256*1024);
	break;
    case 0x83:
	/* L2 cache: 512K, 8-way, 32 byte line size */
	symtab_set_value_int ("target.cpu.ia32.l2.dcache.linesize", 32);
	symtab_set_value_int ("target.cpu.ia32.l2.dcache.way", 8);
	symtab_set_value_int ("target.cpu.ia32.l2.dcache.size", 512*1024);
	break;
    case 0x84:
	/* L2 cache: 1M, 8-way, 32 byte line size */
	symtab_set_value_int ("target.cpu.ia32.l2.dcache.linesize", 32);
	symtab_set_value_int ("target.cpu.ia32.l2.dcache.way", 8);
	symtab_set_value_int ("target.cpu.ia32.l2.dcache.size", 1024*1024);
	break;
    case 0x85:
	/* L2 cache: 2M, 8-way, 32 byte line size */
	symtab_set_value_int ("target.cpu.ia32.l2.dcache.linesize", 32);
	symtab_set_value_int ("target.cpu.ia32.l2.dcache.way", 8);
	symtab_set_value_int ("target.cpu.ia32.l2.dcache.size", 2*1024*1024);
	break;
    }
}

/*
 * for an Intel processor, process cache descriptors from the registers
 * returned from cpuid instruction
 */

static void
intel_get_cache_info_from_regs (unsigned int *regs)
{
    int i;
    static unsigned int ia32_family ();
    unsigned int family = ia32_family ();

    for (i = 0; i < 4; ++i) {
	if (regs[i] & 0x80000000)
	    ;
    	else {
	    intel_process_cache_descriptor ((regs[i] >> 24) & 0xff, family);
	    intel_process_cache_descriptor ((regs[i] >> 16) & 0xff, family);
	    intel_process_cache_descriptor ((regs[i] >> 8) & 0xff, family);
	    if (i != EAX)
		intel_process_cache_descriptor (regs[i] & 0xff, family);
	}
    }
}

/*
 * extract cache information
 *
 * XXX for now, only Intel and AMD are supported
 */

static void
ia32_get_cache_info ()
{
    char *vendor = ia32_vendor_str ();

    if (strcmp (vendor, "GenuineIntel") == 0) {
	unsigned int regs[4];
	int more;

	do_cpuid (2, regs);
	intel_get_cache_info_from_regs (regs);
	more = regs[EAX] & 0xff;
	while (--more > 0) {
	    do_cpuid (2, regs);
	   intel_get_cache_info_from_regs (regs);
	}
    }
    else if (strcmp (vendor, "AuthenticAMD") == 0) {
	unsigned int regs[4];
	unsigned int max_function;
	unsigned int family, model, stepping;

	do_cpuid (0x00000001, regs);
        family = (regs[EAX] >> 8) & 0x0f;
	model = (regs[EAX] >> 4) & 0x0f;
	stepping = regs[EAX]  & 0x0f;

	do_cpuid (0x80000000, regs);
	max_function = regs[EAX];

	/* XXX add TLB symbols */

	if (max_function >= 0x80000005) {
	    do_cpuid (0x80000005, regs);
	    symtab_set_value_int ("target.cpu.ia32.l1.dcache.linesize", regs[ECX] & 0xff);
	    symtab_set_value_int ("target.cpu.ia32.l1.dcache.lines", (regs[ECX] >> 8) & 0xff);
	    symtab_set_value_int ("target.cpu.ia32.l1.dcache.way", (regs[ECX] >> 16) & 0xff);
	    symtab_set_value_int ("target.cpu.ia32.l1.dcache.size", ((regs[ECX] >> 24) & 0xff) * 1024);
	    symtab_set_value_int ("target.cpu.ia32.l1.icache.linesize", regs[EDX] & 0xff);
	    symtab_set_value_int ("target.cpu.ia32.l1.icache.lines", (regs[EDX] >> 8) & 0xff);
	    symtab_set_value_int ("target.cpu.ia32.l1.icache.way", (regs[EDX] >> 16) & 0xff);
	    symtab_set_value_int ("target.cpu.ia32.l1.icache.size", ((regs[EDX] >> 24) & 0xff) * 1024);
	}
	if (max_function >= 0x80000006) {
	    do_cpuid (0x80000006, regs);
	    /*
	     * apparently CPUID is buggy on some AMD chips:
	     * see  /usr/src/linux/arch/i386/kernel/setup.c
	     */
	    if (family == 6 && model == 3 && stepping == 0) {
		/* Duron Rev A0 */
		symtab_set_value_int ("target.cpu.ia32.l2.dcache.size", 64*1024);
		symtab_set_value_int ("target.cpu.ia32.l2.dcache.way", 16);
	    }
	    else if (family == 6 && model == 4 && (stepping == 1 || stepping == 0)) {
		/* Tbird rev A1 */
                symtab_set_value_int ("target.cpu.ia32.l2.dcache.size", 256*1024);
                symtab_set_value_int ("target.cpu.ia32.l2.dcache.way", 16);
	    }
	    else {
	        symtab_set_value_int ("target.cpu.ia32.l2.dcache.size", ((regs[ECX] >> 16) & 0xffff) * 1024);
	        symtab_set_value_int ("target.cpu.ia32.l2.dcache.way", (regs[ECX] >> 12) & 0x0f);
	    }
	    symtab_set_value_int ("target.cpu.ia32.l2.dcache.linesize", regs[ECX] & 0xff);
	    symtab_set_value_int ("target.cpu.ia32.l2.dcache.lines", (regs[ECX] >> 8) & 0xf);
	}
    }
}


/*
 * decode the unsigned integer starting at *strp, incrementing
 * *strp as a side-effect.  return the integer, or -1 on error.
 */

static int 
getnum (char **strp)
{
    char *str = *strp;
    int result;

    while (*str == '.' || isspace (*str))
	++str;
    if (strncasecmp (str, "EAX", 3) == 0) {
	result = EAX;
	str += 3;
    }
    else if (strncasecmp (str, "EBX", 3) == 0) {
	result = EBX;
	str += 3;
    }
    else if (strncasecmp (str, "ECX", 3) == 0) {
	result = ECX;
	str += 3;
    }
    else if (strncasecmp (str, "EDX", 3) == 0) {
	result = EDX;
	str += 3;
    }
    else if (*str == '0' && (str[1] == 'x' || str[1] == 'X')) {
	str += 2;
	result = 0;
	while (isxdigit (*str)) {
	    if (isupper (*str))
		result = result * 16 + *str++ - 'A' + 10;
	    else if (islower (*str))
		result = result * 16 + *str++ - 'a' + 10;
	    else
		result = result * 16 + *str++ - '0';
	}
    }
    else if (*str == '0') {
	result = 0;
	while (isdigit (*str))
	    result = result * 8 + (*str++ - '0');
    }
    else if (isdigit (*str)) {
	result = 0;
	while (isdigit (*str))
	    result = result * 10 + (*str++ - '0');
    }
    else
	return -1;

    while (*str == '.' || isspace (*str))
	++str;
    *strp = str;
    return result;
}

/*
 * generate a mask containing 'x' one bits
 */

#define MASK(x) ((x) == 32 ? 0xffffffff : (1<<(x)) - 1)

/*
 * invoke the CPUID instruction and return the bits requested by 'arg'
 *
 * 'arg' is an ASCII string containing a series of integers, separated
 * by white space or '.' characters.
 *
 * the first integer is the EAX argument to the CPUID instruction
 * subsequent integers are in sets of three:
 * register,msb,lsb
 * register specifies which register to look at
 * msb specifies the msb from that register to use
 * lsb specifies the lsb from that register to use
 *
 * XXX on multiprocessor system, this assumes all CPUs are the same
 * (which might be bogus)
 */

static unsigned int
ia32_cpuid_int (char *arg)
{
    unsigned int cpuid_which;
    unsigned int regs[4];
    unsigned int result = 0;

    cpuid_which = getnum (&arg);
    while (*arg) {
	int reg = getnum (&arg);	/* which register */
	int msb = getnum (&arg);	/* MSB */
	int lsb = getnum (&arg);	/* LSB */
	unsigned int nbits, shift, bits;

	if (reg < 0 || reg > 3 || msb < 0 || msb > 31 ||
	    lsb < 0 || lsb > 31 || msb < lsb)
	    return 0xdeadbeef;
	nbits = msb - lsb + 1;
	shift = lsb;
	do_cpuid (cpuid_which, regs);
	bits = (regs[reg] >> shift) & MASK(nbits);
	result = (result << nbits) | bits;
    }
    return result;
}

/*
 * return the family of the IA-32 processor
 */

static unsigned int 
ia32_family ()
{
    int result;

    if ((result = primitive_cpu_type ()) <= 4)
	return result;
    if ((result = ia32_cpuid_int ("1.EAX.11.8")) != 0x0f)
	return result;
    return ia32_cpuid_int ("1.EAX.27.20.EAX.11.8");
}

/*
 * return the model of the IA-32 processor
 */

static unsigned int
ia32_model ()
{
    int result;

    if ((result = primitive_cpu_type ()) <= 4)
	return 0;
    if ((result = ia32_cpuid_int ("1.EAX.7.4")) != 0x0f)
	return result;
    return ia32_cpuid_int ("1.EAX.19.16.EAX.7.4");
}

/*
 * return the product name from an IA-32 processor, or ""
 * if it doesn't exist.
 *
 * for Intel, only works for Pentium 4 and later - result may
 * contain leading spaces
 * for AMD, works for some K6 processors, Athlon/Duron and later
 */

static char *
ia32_product_str ()
{
    char *vendor = ia32_vendor_str ();
    unsigned int family = ia32_family ();
    unsigned int model = ia32_model ();
    static char result[49];
    unsigned int regs1[4], regs2[4], regs3[4];

    if (primitive_cpu_type () < 5)
	return "";

    do_cpuid (0x80000000, regs1);
    if (regs1[EAX] < 0x80000004)
	return "";

    do_cpuid (0x80000002, regs1);
    do_cpuid (0x80000003, regs2);
    do_cpuid (0x80000004, regs3);
    sprintf (result,
	     "%.4s%.4s%.4s%.4s%.4s%.4s%.4s%.4s%.4s%.4s%.4s%.4s",
	     &regs1[EAX], &regs1[EBX], &regs1[ECX], &regs1[EDX],
	     &regs2[EAX], &regs2[EBX], &regs2[ECX], &regs2[EDX], 
	     &regs3[EAX], &regs3[EBX], &regs3[ECX], &regs3[EDX]);
    return result;
}

/*
 * provide direct access to CPUID instruction
 */

static int
ia32_cpuid (int c, char *name, VAL *value, void *arg)
{
    if (primitive_cpu_type () <= 4)
	return 0;
    value->valtype = INT;
    name += strlen("target.cpu.ia32.cpuid.");
    value->intval = ia32_cpuid_int (name);
    return 0;
}

/*
 * determine whether CPU and OS supports instruction set extensions
 * MMX, SSE, SSE2
 *
 * XXX does this yield correct results for AMD processors?
 * XXX need to check 3Dnow! also
 */

static volatile int intr = 0;
static jmp_buf jb;

static void
exceptionhandler ()
{
    longjmp (jb, 1);
    intr = 1;
}

static void
ia32_test_for_features ()
{
    int have_mmx = 0;
    int have_sse = 0;
    int have_sse2 = 0;
    void (*oldsigill)();
    int x;

    if (primitive_cpu_type () < 5)
	return;

    oldsigill = signal (SIGILL, exceptionhandler);

    /* test for MMX */
    if (ia32_cpuid_int ("1.EDX.23.23")) {
	have_mmx = 1;
	if ((x = setjmp (jb)) == 0) {
	    __asm __volatile ("	emms	;": );
	}
	else {
	    /* emms failed */
	    have_mmx = 0;
	}
    }
    
    /* test for SSE */
    if (ia32_cpuid_int ("1.EDX.25.25")) {
	/* reset in case signal isn't reset when caught */
	signal (SIGILL, exceptionhandler);
	have_sse = 1;
	if ((x = setjmp (jb)) == 0) {
	    /* orps xmm0, xmm0 */
	    __asm __volatile ("	.byte 0x0f,0x56,0xc0 ;": );
	}
	else {
	    /* orps failed */
	    have_sse = 0;
	}
    }

    /* test for SSE2 */
    if (ia32_cpuid_int ("1.EDX.26.26")) {
	/* reset in case signal isn't reset when caught */
	signal (SIGILL, exceptionhandler);
	have_sse2 = 1;
	if ((x = setjmp (jb)) == 0) {
	    /* paddq xmm0, xmm0 */
	    __asm __volatile ("	.byte 0x66,0x0f,0xd4,0xc0 ;": );
	}
	else {
	    /* paddq failed */
	    have_sse2 = 0;
	}
    }

    signal (SIGILL, oldsigill);

    if (have_sse2)
	symtab_set_value_int ("target.cpu.ia32.sse2", 1);
    if (have_sse)
	symtab_set_value_int ("target.cpu.ia32.sse", 1);
    if (have_mmx)
	symtab_set_value_int ("target.cpu.ia32.mmx", 1);
    
    /*
     * XXX
     * if 3DNow! and extensions require OS support, this doesn't test
     * for it
     */
    if (ia32_cpuid_int ("0x80000001.EAX.31.31") == 1)
	symtab_set_value_int ("target.cpu.ia32.3dnow", 1);
    if (ia32_cpuid_int ("0x80000001.EAX.30.30") == 1)
	symtab_set_value_int ("target.cpu.ia32.3dnowext", 1);
}

#if 0
#define XYZZY(f,m) (((f) << 4) | (m))

static char *
cpu_type_str (family, model) {

    switch (XYZZY(family, model)) {
    case XYZZY(4,0):
    case XYZZY(4,1):
	return "486DX";
    case XYZZY(4,2):
	return "486SX";
    case XYZZY(4,3):
	return "486DX2";
    case XYZZY(4,4):
	return "486SL";
    case XYZZY(4,5):
	return "SX2";
    case XYZZY(4,7):
	return "write-back enhanced DX2";
    case XYZZY(4,8):
	return "DX4";
    case XYZZY(4,6):
    case XYZZY(4,9):
    case XYZZY(4,10):
    case XYZZY(4,11):
    case XYZZY(4,12):
    case XYZZY(4,13):
    case XYZZY(4,14):
    case XYZZY(4,15):
	return "AMD 5x86";
    case XYZZY(5,0):
	return "AMD 486 or 586";
    case XYZZY(5,1):
	return "Intel Pentium or AMD-K5 model 0";
    case XYZZY(5,2):
	return "Intel Pentium or AMD-K5 model 1";
    case XYZZY(5,3):
	return "Pentium OverDrive for 486 or AMD-K5 model 2";
    case XYZZY(5,4):
	return "Pentium with MMX";
    case XYZZY(5,6):
	return "AMD-K6 model 6";
    case XYZZY(5,7):
	return "AMD-K6 model 7";
    case XYZZY(5,8):
	return "AMD-K6 model 8";
    case XYZZY(5,9):
	return "AMD-K6 model 9";
    case XYZZY(6,1):
	return "Intel Pentium Pro or AMD Athlon model 1";
    case XYZZY(6,2):
	return "AMD Athlon model 2";
    case XYZZY(6,3):
	return "Pentium II model 3 or AMD Duron";
    case XYZZY(6,4):
	return "AMD Athlon model 4";
    case XYZZY(6,5):
	return "Pentium II or Xeon or Celeron model 5";
    case XYZZY(6,6):
	return "Celeron model 6";
    case XYZZY(6,7):
	return "Pentium III or Xeon model 7";
    case XYZZY(6,8):
	return "Pentium III or Xeon or Celeron model 8";
    case XYZZY(6,10):
	return "Pentium III Xeon model A";
    case XYZZY(15,0):
	return "Pentium 4";
    default:
	return "unknown";
    }
}
#endif

static char *
firstnonspace (char *s)
{
    while (*s && *s == ' ')
	++s;
    return s;
}

void
cpu_count_cpus ()
{
#ifdef OS_linux
    FILE *fp;

    if ((fp = fopen ("/proc/cpuinfo", "r")) == NULL) {
	return ;
    }
    else {
	char buf[10240];
	int n = 0;

	while (fgets (buf, sizeof (buf), fp) != NULL) {
	    if (strncmp (buf, "processor\t:", 11) == 0)
		++n;
	}
	if (n > 0)
	    symtab_set_value_int ("target.ncpu", n);
    }
    fclose (fp);
#endif
}

void
cpu_init ()
{
    int x;

    /* cpu_init_sysctl (); */
    symtab_set_value_str ("target.cpu.arch", "ia32");
    symtab_set_value_str ("target.cpu.ia32.vendor", ia32_vendor_str ());
    symtab_set_value_str ("target.cpu.ia32.product", firstnonspace (ia32_product_str ()));
    symtab_set_value_int ("target.cpu.ia32.model", ia32_model ());
    symtab_set_value_int ("target.cpu.ia32.family", ia32_family ());
    symtab_bind_pseudo ("target.cpu.ia32.cpuid.*", &ia32_cpuid, 0);
    ia32_get_cache_info ();
    ia32_test_for_features ();
    cpu_count_cpus ();
}

#ifdef TEST
main ()
{
    int i;
    int pct = primitive_cpu_type ();

    printf ("cpu type = %d\n", pct);
    printf ("cpu.vendor = %s\n", ia32_vendor_str ());
    printf ("cpu.family = %d\n", ia32_family ());
    printf ("cpu.model = %d\n", ia32_model ());
    printf ("cpu.type = %s\n", ia32_type_str (ia32_family (), ia32_model ()));
}
#endif
#endif /* CPU_ia32 */

#ifdef CPU_powerpc

void
cpu_count_cpus ()
{
#ifdef OS_darwin
    size_t oldlen;
    int ncpu;

    oldlen = sizeof (ncpu);
    if (sysctlbyname ("hw.ncpu", &ncpu, &oldlen, NULL, 0) == 0)
	symtab_set_value_int ("target.ncpu", ncpu);
#endif
}

#if OS_darwin
/*
 * XXX 
 * crude hack for now.  we should really extract the type
 * information from the sysctl mib and use that to determine
 * the result type, but for now we just need some of the hw.*
 * values and those are currently (macosx 10.3.1) all INTs
 * so for now this is restricted to hw.*
 */

static int
darwin_sysctl_hw (int c, char *name, VAL *value)
{
    int val, oldlen;

    value->valtype = INT;
    name += strlen("target.sysctl.");
    oldlen = sizeof (val);
    if (sysctlbyname (name, &val, &oldlen, NULL, 0) == 0) {
	value->intval = val;
        return 0;  
    }
    return -1;
}

#include <mach-o/arch.h>

static void
darwin_set_cpu ()
{
    NXArchInfo *foo;

    if ((foo = (NXArchInfo *) NXGetLocalArchInfo ()) !=  NULL) {
	symtab_set_value_str ("target.cpu.powerpc.model", (char *) foo->name);
#if 0
	symtab_set_value_str ("target.cpu.powerpc.description", 
			      (char *) foo->description);
#endif
    }
}
#endif /* OS_darwin */

#if OS_aix
#include <sys/systemcfg.h>

aix_set_cpu ()
{
    char *foo = NULL;

    /* 
     * AIX returns both 'implementation' (presumably a generic cpu type)
     * and 'version' (presumably a more specific type).   Use 'version'
     * if we know how to decode it, else implementation.  Eventually
     * try to return both for all powerpc processors.
     */
    switch (_system_configuration.implementation) {
        case POWER_RS1: foo="rs1"; break;
        case POWER_RSC: foo="rsc"; break;
        case POWER_RS2: foo="rs2"; break;
        case POWER_601: foo="ppc601"; break;
        case POWER_603: foo="ppc603"; break;
        case POWER_604: foo="ppc604"; break;
        case POWER_620: foo="ppc620"; break;
        case POWER_630: foo="ppc630"; break;
        case POWER_A35: foo="ppca35"; break;
        case POWER_RS64II: foo="rs64-ii"; break;
        case POWER_RS64III: foo="rs64-iii"; break;
        case POWER_RS64IV: foo="power4"; break;
        case POWER_MPC7450: foo="mpc7450"; break;
    }
    switch (_system_configuration.version) {
        case PV_601:    foo="ppc601"; break;
        case PV_601a:   foo="ppc601a"; break;
        case PV_603:    foo="ppc603"; break;
        case PV_604:    foo="ppc604"; break;
        case PV_620:    foo="ppc620"; break;
        case PV_630:    foo="ppc630"; break;
        case PV_A35:    foo="ppca35"; break;
        case PV_RS64II: foo="ppcrs64ii"; break;
        case PV_RS64III: foo="ppcrs64iii"; break;
        case PV_4:      foo="ppc4"; break;
        case PV_MPC7450: foo="ppc7450"; break;
        case PV_4_2:    foo="ppc4_2"; break;
        case PV_RS2:    foo="rs2"; break;
        case PV_RS1:    foo="rs1"; break;
        case PV_RSC:    foo="rsc"; break;
        case PV_M1:     foo="ia64-m1"; break;
        case PV_M2:     foo="ia64-m2"; break;
    }
    if (foo)
    	symtab_set_value_str ("target.cpu.powerpc.model", foo);
    symtab_set_value_int ("target.ncpu", _system_configuration.ncpus);
    /* 
     * XXX eventually fill in cache sizes 
     */
}
#endif

void
cpu_init ()
{
    /* cpu_init_sysctl (); */
#ifdef OS_darwin
    cpu_count_cpus ();
    symtab_bind_pseudo ("target.sysctl.hw.*", &darwin_sysctl_hw, 0);
    darwin_set_cpu ();
#endif /* OS_darwin */
#ifdef OS_aix
    aix_set_cpu ();
#endif
    symtab_set_value_str ("target.cpu.arch", "powerpc");
}
#endif /* CPU_powerpc */

#ifdef CPU_sparc
void
cpu_init ()
{
    /* cpu_init_sysctl (); */
    cpu_count_cpus ();
    symtab_set_value_str ("target.cpu.arch", "sparc");
}
#endif
