#!/bin/sh

# Print the gcc cpu specific options tailored for the current CPU

# License: LGPLv2
# Author:
#    http://www.pixelbeat.org/
# Notes:
#    This script currently supports Linux,FreeBSD,Cygwin
#    This script is x86 (32 bit) specific
#    It should work on any gcc >= 2.95 at least
#    It only returns CPU specific options. You probably also want -03 etc.
# Changes:
#    V0.1, 12 Mar 2003, Initial release
#    V0.2, 01 Jun 2005, Added support for 3.2>=gcc<=4.0
#    V0.3, 03 Jun 2005, Added support for pentium-m
#    V0.4, 03 Jun 2005, Fix silly bugs
#    V0.5, 07 Jun 2005, Clarify/Simplify confusing floating point expr usage
#                       Print warning when CPU only supported on a newer gcc
#    V0.6, 15 Dec 2006, Added support for Intel Core and Core 2 processors
#                       Added support for 4.1>=gcc<=4.3
#                       Added support for gcc -msse3 option
#                       Added support for new gcc -march=native option
#    V0.7, 18 Dec 2006, Changes from Conor McDermottroe
#                         Added support for FreeBSD
#                         Remove bash specific constructs
#                         Better error handling
#    V0.8, 19 Dec 2006, Give warnings and 32 bit -march on 64 bit platforms.
#                       Previously it just gave an invalid blank -march.
#                       Reported and tested by Ewan Oughton.
#    V0.9, 30 Apr 2007, Give error if compiler not present.
#                       Warn about rather than default to -march=native option.
#   V0.92, 08 Nov 2007, Change from Krzysztof Jankowski to support Cygwin.
#                       Added support for AMD family 10 processors.
#                       Add support for gcc -msse4 & -msse5 options.
#                       Use "prescott" rather than "pentium4" for all
#                       models >= 3 in intel family 15, not just model 3.
#   V0.93, 13 Nov 2007, Oops, actually not all intel family 15, model >= 3
#                       are prescotts. Use the sse3 flag to distinguish.
#   V0.94, 31 Dec 2007, Oops, actually all intel family 15, model >= 3
#                       are prescotts. This was indicated by Adam Drzewiecki.
#                       I was confused by a bug in older linux kernels where pni
#                       was not reported for my intel "F41" processor at least.
#   V0.95, 18 Jan 2008, Changes from Conor McDermottroe
#                         Support for Mac OS X
#                         Support for FreeBSD base system
#   V0.96, 17 Dec 2008, Following a report from Alfredo Pons, add support
#                       for newer intel core2 processors (models 23,26,29,...)
#                       by assuming models >= 15 are core2.
#                       Report details of unrecognised CPUs.
#   V0.97, 19 Dec 2008, Use prescott for Intel Atom (model 28).
#                       Ensure all errors are output to stderr.
#   V0.98, 24 Feb 2009, Actually Intel Atom (model 28) is core2 ISA compliant.
#                       It's an in-order core (like i586) so mtune as such.
#   V0.99, 30 Apr 2009, Intel Atom (model 28) is getting a corresponding "atom"
#                       option in GCC 4.5
#   V0.99.3, 11 Aug 2009, Support for AMD Geode LX processor.
#   V0.99.4, 24 Oct 2009, Following a report from Maxime de Roucy, add support
#                         for AMD family 17 (griffin).
#   V0.99.11, 04 Jul 2013
#     http://github.com/pixelb/scripts/commits/master/scripts/gcccpuopt

if [ "$1" = "--version" ]; then
    echo "0.99.11" && exit
fi

# This table shows when -march options were introduced into _official_ gcc releases.
# Note there are vendor deviations that complicate this.
# For e.g. Red Hat introduced the prescott option in 3.3-13.
#   gcc-2.95   = i386, i486, i586,pentium, i686,pentiumpro, k6
#   gcc-3.0   += athlon
#   gcc-3.1   += pentium-mmx, pentium2, pentium3, pentium4, k6-2, k6-3, athlon-{tbird, 4,xp,mp}
#   gcc-3.3   += winchip-c6, winchip2, c3
#   gcc-3.4.0 += k8,opteron,athlon64,athlon-fx, c3-2
#   gcc-3.4.1 += pentium-m, pentium3m, pentium4m, prescott, nocona
#   gcc-4.3   += core2, amdfam10, geode
#   gcc-4.5   += atom
#   gcc-4.6   += corei7, corei7-avx, bdver1, btver1
#   gcc-4.7   += core-avx-i, core-avx2, bdver2
#   gcc-4.8   += btver2

[ -z "$CC" ] && CC=gcc

try_gcc_options() {
    $CC $* -S -o /dev/null -xc /dev/null >/dev/null 2>&1
}

if ! try_gcc_options; then
    echo "Error: Couldn't execute your compiler ($CC)" >&2
    exit 1
fi

if try_gcc_options -march=native; then
    echo "Warning: Your compiler supports the -march=native option which you may prefer" >&2
    gcc -march=native --verbose -xc /dev/null 2>&1 | sed -n 's|.*/dev/null \(-march.*-mtune[^ ]*\).*|\1|p' >&2
fi

#if ! try_gcc_options -march=i386; then
#    if ! try_gcc_options -m32 -march=i386; then
#        echo "Error: This script only supports 32 bit x86 architectures" >&2
#        exit 1
#    else
#        echo "Warning: The optimum *32 bit* architecture is reported" >&2
#        m32="-m32 "
#    fi
#fi

#try_line() {
#    skip=0
#    for arch in $1; do
#        if try_gcc_options $m32 -march=$arch; then
#            echo $arch
#            return
#        elif [ "$skip" = "0" ] && [ "$arch" != "native" ]; then
#            skip=1
#            echo "Warning: Newer versions of GCC better support your CPU with -march=$arch" >&2
#        fi
#    done
#    return 1
#}

read_cpu_data_linux() {
    IFS=":"
    while read name value; do
        unset IFS
        name=`echo $name` #strip spaces
        value=`echo $value` #strip spaces
        if [ "$name" = "vendor_id" ]; then
            value=`echo $value | sed 's/\([^ ]*\).*/\1/'` #take first word
        fi
        IFS=":"
        if [ "$name" = "vendor_id" ]; then
            vendor_id="$value"
        elif [ "$name" = "cpu family" ]; then
            cpu_family="$value"
        elif [ "$name" = "model" ]; then
            cpu_model="$value"
        elif [ "$name" = "flags" ]; then
            flags="$value"
            break #flags last so break early
        fi
    done < /proc/cpuinfo
    unset IFS
}

read_cpu_data_freebsd() {
    local _line _cpu_id

    if [ ! -r /var/run/dmesg.boot ]; then
        echo "Error: /var/run/dmesg.boot does not exist!" >&2
        exit 1;
    fi

    IFS="
"
    for _line in `grep -A2 '^CPU: ' /var/run/dmesg.boot`; do
        if [ -n "`echo $_line | grep '^  Origin = '`" ]; then
            vendor_id="`echo $_line | sed -e 's/^  Origin = .//' -e 's/[^A-Za-z0-9].*$//'`"
            _cpu_id="`echo $_line | sed -e 's/^.*Id = //' -e 's/ .*$//' -e 'y/abcdef/ABCDEF/'`"
            cpu_family=$(( ($_cpu_id & 0xF0F) >> 8 )) #FreeBSD 5.0 sh doesn't support 0x
            cpu_model=$(( (($_cpu_id & 0xF0) >> 4) + (($_cpu_id & 0xF0000) >> 12) ))
        fi
        if [ -n "`echo $_line | grep '^  Features='`" ]; then
            flags="`echo $_line | sed -e 's/^.*<//' -e 's/>.*//' -e 's/,/ /g' | tr 'A-Z' 'a-z'`"
        fi
    done
    unset IFS
}

read_cpu_data_darwin() {
    vendor_id="`/usr/sbin/sysctl -n machdep.cpu.vendor`"
    cpu_family="`/usr/sbin/sysctl -n machdep.cpu.family`"
    cpu_model="`/usr/sbin/sysctl -n machdep.cpu.model`"
    flags="`/usr/sbin/sysctl -n machdep.cpu.features | tr 'A-Z' 'a-z'`"
}

read_cpu_data() {
    # Default values
    vendor_id="NotFound"
    cpu_family="-1"
    cpu_model="-1"
    flags=""
    case "$(uname)" in
        Linux|CYGWIN*)
            read_cpu_data_linux ;;
        FreeBSD)
            read_cpu_data_freebsd ;;
        Darwin)
            read_cpu_data_darwin ;;
        *)
            echo "Error: $(uname) is not a supported operating system" >&2
            exit 1 ;;
    esac
}

read_cpu_data

if [ "$vendor_id" = "AuthenticAMD" ]; then
    if [ $cpu_family -eq 4 ]; then
        _CFLAGS="-march=i486"
    elif [ $cpu_family -eq 5 ]; then
        if [ $cpu_model -lt 4 ]; then
            _CFLAGS="-march=pentium"
        elif [ \( $cpu_model -eq 6 \) -o \( $cpu_model -eq 7 \) ]; then
            _CFLAGS="-march=k6"
        elif [ \( $cpu_model -eq 8 \) -o \( $cpu_model -eq 12 \) ]; then
            line="k6-2 k6"
        elif [ \( $cpu_model -eq 9 \) -o \( $cpu_model -eq 13 \) ]; then
            line="k6-3 k6-2 k6"
        elif [ $cpu_model -eq 10 ]; then #geode LX
            line="geode k6-2 k6"
            #The LX supports 3dnowext in addition to the k6-2 instructions,
            #however gcc doesn't support explicitly selecting that.
        fi
    elif [ $cpu_family -eq 6 ]; then
        if [ $cpu_model -le 3 ]; then
            line="athlon k6-3 k6-2 k6"
        elif [ $cpu_model -eq 4 ]; then
            line="athlon-tbird athlon k6-3 k6-2 k6"
        elif [ $cpu_model -ge 6 ]; then #athlon-{4,xp,mp} (also geode NX)
            line="athlon-4 athlon k6-3 k6-2 k6"
        fi
    elif [ $cpu_family -eq 15 ]; then #k8,opteron,athlon64,athlon-fx
        line="k8 athlon-4 athlon k6-3 k6-2 k6"
    elif [ $cpu_family -eq 16 ] ||    #barcelona,amdfam10
         [ $cpu_family -eq 17 ] ||    #griffin
         [ $cpu_family -eq 18 ]; then #llano
        line="amdfam10 k8 athlon-4 athlon k6-3 k6-2 k6"
    elif [ $cpu_family -eq 20 ]; then #bobcat
        line="btver1 amdfam10 k8 athlon-4 athlon k6-3 k6-2 k6"
    elif [ $cpu_family -eq 21 ]; then #bulldozer
        if echo "$flags" | grep -q bmi; then #piledriver
            line="bdver2 bdver1 btver1 amdfam10 k8 athlon-4 athlon k6-3 k6-2 k6"
        else
            line="bdver1 btver1 amdfam10 k8 athlon-4 athlon k6-3 k6-2 k6"
        fi
    elif [ $cpu_family -eq 22 ]; then #jaguar
        line="btver2 btver1 amdfam10 k8 athlon-4 athlon k6-3 k6-2 k6"
    fi
elif [ "$vendor_id" = "CentaurHauls" ]; then
    if [ $cpu_family -eq 5 ]; then
        if [ $cpu_model -eq 4 ]; then
            line="winchip-c6 pentium"
        elif [ $cpu_model -eq 8 ]; then
            line="winchip2 winchip-c6 pentium"
        elif [ $cpu_model -ge 9 ]; then
            line="winchip2 winchip-c6 pentium" #actually winchip3 but gcc doesn't support this currently
        fi
    elif [ $cpu_family -eq 6 ]; then
        if echo "$flags" | grep -q cmov; then
            fallback=pentiumpro
        else
            fallback=pentium #gcc incorrectly assumes i686 always has cmov
        fi
        if [ $cpu_model -eq 6 ]; then
            _CFLAGS="-march=pentium" # ? Cyrix 3 (samuel)
        elif [ $cpu_model -eq 7 ] || [ $cpu_model -eq 8 ]; then
            line="c3 winchip2 winchip-c6 $fallback"
        elif [ $cpu_model -ge 9 ]; then
            line="c3-2 c3 winchip2 winchip-c6 $fallback"
        fi
    fi
elif [ "$vendor_id" = "GenuineIntel" ]; then
    if [ $cpu_family -eq 3 ]; then
        _CFLAGS="-march=i386"
    elif [ $cpu_family -eq 4 ]; then
        _CFLAGS="-march=i486"
    elif [ $cpu_family -eq 5 ]; then
        if [ $cpu_model -ne 4 ]; then
            _CFLAGS="-march=pentium"
        else
            line="pentium-mmx pentium" #No overlap with other vendors
        fi
    elif [ $cpu_family -eq 6 ]; then
        if [ \( $cpu_model -eq 0 \) -o \( $cpu_model -eq 1 \) ]; then
            _CFLAGS="-march=pentiumpro"
        elif [ \( $cpu_model -ge 2 \) -a \( $cpu_model -le 6 \) ]; then #4=TM5600 at least
            line="pentium2 pentiumpro pentium-mmx pentium i486 i386"
            if [ \( $cpu_model -eq 2 \) ]; then #qemu generic
                echo "\
Warning: Unspecified QEMU CPU model.
Please consider upgrading QEMU or configuring it to use a specific model." >&2
            fi
        elif [ \( $cpu_model -eq 9 \) -o \( $cpu_model -eq 13 \) ]; then #centrino
            line="pentium-m pentium4 pentium3 pentium2 pentiumpro pentium-mmx pentium i486 i386"
        elif [ \( $cpu_model -eq 14 \) ]; then #Core
            line="prescott pentium-m pentium4 pentium3 pentium2 pentiumpro pentium-mmx pentium i486 i386"
        elif [ $cpu_model -eq 28 ] || # pine
             [ $cpu_model -eq 38 ] || # oak
             [ $cpu_model -eq 54 ]; then # cedar
            line="atom core2 pentium-m pentium4 pentium3 pentium2 pentiumpro pentium-mmx pentium i486 i386"
        elif [ $cpu_model -eq 26 ] ||
             [ $cpu_model -eq 30 ] ||
             [ $cpu_model -eq 31 ] ||
             [ $cpu_model -eq 46 ] ||
             # ^ Nehalem ^
             [ $cpu_model -eq 37 ] ||
             [ $cpu_model -eq 44 ] ||
             [ $cpu_model -eq 47 ]; then
             # ^ Westmere ^
            line="corei7 core2 pentium-m pentium4 pentium3 pentium2 pentiumpro pentium-mmx pentium i486 i386"
        elif [ $cpu_model -eq 42 ] || #Sandy Bridge
             [ $cpu_model -eq 45 ]; then #Sandy Bridge E
            line="corei7-avx corei7 core2 pentium-m pentium4 pentium3 pentium2 pentiumpro pentium-mmx pentium i486 i386"
        elif [ $cpu_model -eq 58 ]; then #Ivy Bridge
            line="core-avx-i corei7-avx corei7 core2 pentium-m pentium4 pentium3 pentium2 pentiumpro pentium-mmx pentium i486 i386"
        elif [ $cpu_model -eq 60 ]; then #Haswell
            line="core-avx2 core-avx-i corei7-avx corei7 core2 pentium-m pentium4 pentium3 pentium2 pentiumpro pentium-mmx pentium i486 i386"
        elif [ $cpu_model -eq 69 ]; then #Haswell/Lynnfield/Clarkdale/Sandy Bridge
            line="core-avx2 core-avx-i core2 pentium-m pentium4 pentium3 pentium2 pentiumpro pentium-mmx pentium i486 i386"
        elif [ $cpu_model -eq 15 ] ||
             # ^ Merom ^
             [ $cpu_model -eq 22 ] ||
             # ^ Conroe-L ^
             [ $cpu_model -eq 23 ] ||
             [ $cpu_model -eq 29 ]; then
             # ^ Penryn ^
            line="core2 pentium-m pentium4 pentium3 pentium2 pentiumpro pentium-mmx pentium i486 i386"
        elif [ \( $cpu_model -ge 7 \) -a \( $cpu_model -le 11 \) ]; then
            line="pentium3 pentium2 pentiumpro pentium-mmx pentium i486 i386"
        fi
    elif [ $cpu_family -eq 15 ]; then
        line="pentium4 pentium3 pentium2 pentiumpro pentium-mmx pentium i486 i386"
        if [ $cpu_model -ge 3 ]; then
            line="prescott $line"
        fi
    fi
elif [ "$vendor_id" = "Geode" ]; then #by NSC
    if [ $cpu_family -eq 5 ]; then
        if [ \( $cpu_model -eq 4 \) -o \( $cpu_model -eq 9 \) ]; then
            # Note both models 4 and 9 have cmov.
            # In addition, model 9 has cxmmx.
            # Note also, the "geode" gcc arch is for newer AMD geode cores
            # and is not appropriate for this older core.
            line="pentium-mmx pentium"
        fi
    fi
fi

if [ \( -z "$_CFLAGS" \) -a \( -z "$line" \) ]; then
    echo "\
Unrecognised CPU. Please email the following to: P@draigBrady.com
  Vendor:$vendor_id family:$cpu_family model:$cpu_model
  flags:$flags" >&2
    exit 1
fi

#[ -z "$_CFLAGS" ] && _CFLAGS="-march=`try_line "$line"`"

#The Atom CPU supports the full core2 instruction set,
#but it's an in-order core, the last one of those being the i586.
#Therefore if gcc hasn't explicit support for the Atom,
#tune it for the i586 architecture.
if ! echo "$_CFLAGS" | grep -q "atom"; then #gcc hasn't specific Atom support
    if echo "$line" | grep -q "core2"; then #atom ISA line
        if [ $cpu_model -eq 28 ]; then      #atom
            if echo "$_CFLAGS" | grep -E -q "(core2|pentium[^ ])"; then #gcc chose out of order arch
                _CFLAGS="$_CFLAGS -mtune=pentium" #tune for last in-order core
            fi
        fi
    fi
fi

#SSE is not used for floating point by default in gcc 32 bit
#so turn that on here.
#if echo "$flags" | grep -q "sse"; then
#    if try_gcc_options "-mfpmath=sse"; then #gcc >= 3.1
#        _CFLAGS="$_CFLAGS -mfpmath=sse"
#    fi
#fi

#The SSE options are mostly selected automatically
#when a particular march option is selected.
#There are a few exceptions unfortunately, which we handle here.
#Note the sse instruction lines are:
#   intel: [sse4.2] [sse4.1] ssse3 sse3 sse2 sse ...
#   amd:   [sse5] sse4a [sse3] sse2 sse ...
# The bracketed ones are only available on some cpus
# in a particular family and so need to be added explicitly.
#if echo "$_CFLAGS" | grep -q "amdfam10"; then
#    if echo "$flags" | grep -q "sse5"; then
#        if try_gcc_options "-msse5"; then #gcc >= 4.3
#            _CFLAGS="$_CFLAGS -msse5"
#        fi
#    fi
#elif echo "$_CFLAGS" | grep -E -q "(k8|c3-2)"; then
#    if echo "$flags" | grep -E -q "(sse3|pni)"; then
#        if try_gcc_options "-msse3"; then #gcc >= 3.3.3
#            _CFLAGS="$_CFLAGS -msse3"
#        fi
#    fi
#elif echo "$_CFLAGS" | grep -q "core2"; then
#    if echo "$flags" | grep -q "sse4_2"; then
#        if try_gcc_options "-msse4"; then #gcc >= 4.3
#            _CFLAGS="$_CFLAGS -msse4"
#        fi
#    elif echo "$flags" | grep -q "sse4_1"; then
#        if try_gcc_options "-msse4.1"; then #gcc >= 4.3
#            _CFLAGS="$_CFLAGS -msse4.1"
#        fi
#    fi
#fi

echo "$_CFLAGS"
