| 1 | /******************************************************************** |
| 2 | * * |
| 3 | * THIS FILE IS PART OF THE OggTheora SOFTWARE CODEC SOURCE CODE. * |
| 4 | * USE, DISTRIBUTION AND REPRODUCTION OF THIS LIBRARY SOURCE IS * |
| 5 | * GOVERNED BY A BSD-STYLE SOURCE LICENSE INCLUDED WITH THIS SOURCE * |
| 6 | * IN 'COPYING'. PLEASE READ THESE TERMS BEFORE DISTRIBUTING. * |
| 7 | * * |
| 8 | * THE Theora SOURCE CODE IS COPYRIGHT (C) 2002-2009 * |
| 9 | * by the Xiph.Org Foundation and contributors http://www.xiph.org/ * |
| 10 | * * |
| 11 | ******************************************************************** |
| 12 | |
| 13 | CPU capability detection for x86 processors. |
| 14 | Originally written by Rudolf Marek. |
| 15 | |
| 16 | function: |
| 17 | last mod: $Id$ |
| 18 | |
| 19 | ********************************************************************/ |
| 20 | |
| 21 | #include "x86cpu.h" |
| 22 | |
| 23 | #if !defined(OC_X86_ASM) |
| 24 | ogg_uint32_t oc_cpu_flags_get(void){ |
| 25 | return 0; |
| 26 | } |
| 27 | #else |
| 28 | # if defined(__amd64__)||defined(__x86_64__) |
| 29 | /*On x86-64, gcc seems to be able to figure out how to save %rbx for us when |
| 30 | compiling with -fPIC.*/ |
| 31 | # define cpuid(_op,_eax,_ebx,_ecx,_edx) \ |
| 32 | __asm__ __volatile__( \ |
| 33 | "cpuid\n\t" \ |
| 34 | :[eax]"=a"(_eax),[ebx]"=b"(_ebx),[ecx]"=c"(_ecx),[edx]"=d"(_edx) \ |
| 35 | :"a"(_op) \ |
| 36 | :"cc" \ |
| 37 | ) |
| 38 | # else |
| 39 | /*On x86-32, not so much.*/ |
| 40 | # define cpuid(_op,_eax,_ebx,_ecx,_edx) \ |
| 41 | __asm__ __volatile__( \ |
| 42 | "xchgl %%ebx,%[ebx]\n\t" \ |
| 43 | "cpuid\n\t" \ |
| 44 | "xchgl %%ebx,%[ebx]\n\t" \ |
| 45 | :[eax]"=a"(_eax),[ebx]"=r"(_ebx),[ecx]"=c"(_ecx),[edx]"=d"(_edx) \ |
| 46 | :"a"(_op) \ |
| 47 | :"cc" \ |
| 48 | ) |
| 49 | # endif |
| 50 | |
| 51 | static ogg_uint32_t oc_parse_intel_flags(ogg_uint32_t _edx,ogg_uint32_t _ecx){ |
| 52 | ogg_uint32_t flags; |
| 53 | /*If there isn't even MMX, give up.*/ |
| 54 | if(!(_edx&0x00800000))return 0; |
| 55 | flags=OC_CPU_X86_MMX; |
| 56 | if(_edx&0x02000000)flags|=OC_CPU_X86_MMXEXT|OC_CPU_X86_SSE; |
| 57 | if(_edx&0x04000000)flags|=OC_CPU_X86_SSE2; |
| 58 | if(_ecx&0x00000001)flags|=OC_CPU_X86_PNI; |
| 59 | if(_ecx&0x00000100)flags|=OC_CPU_X86_SSSE3; |
| 60 | if(_ecx&0x00080000)flags|=OC_CPU_X86_SSE4_1; |
| 61 | if(_ecx&0x00100000)flags|=OC_CPU_X86_SSE4_2; |
| 62 | return flags; |
| 63 | } |
| 64 | |
| 65 | static ogg_uint32_t oc_parse_amd_flags(ogg_uint32_t _edx,ogg_uint32_t _ecx){ |
| 66 | ogg_uint32_t flags; |
| 67 | /*If there isn't even MMX, give up.*/ |
| 68 | if(!(_edx&0x00800000))return 0; |
| 69 | flags=OC_CPU_X86_MMX; |
| 70 | if(_edx&0x00400000)flags|=OC_CPU_X86_MMXEXT; |
| 71 | if(_edx&0x80000000)flags|=OC_CPU_X86_3DNOW; |
| 72 | if(_edx&0x40000000)flags|=OC_CPU_X86_3DNOWEXT; |
| 73 | if(_ecx&0x00000040)flags|=OC_CPU_X86_SSE4A; |
| 74 | if(_ecx&0x00000800)flags|=OC_CPU_X86_SSE5; |
| 75 | return flags; |
| 76 | } |
| 77 | |
| 78 | ogg_uint32_t oc_cpu_flags_get(void){ |
| 79 | ogg_uint32_t flags; |
| 80 | ogg_uint32_t eax; |
| 81 | ogg_uint32_t ebx; |
| 82 | ogg_uint32_t ecx; |
| 83 | ogg_uint32_t edx; |
| 84 | # if !defined(__amd64__)&&!defined(__x86_64__) |
| 85 | /*Not all x86-32 chips support cpuid, so we have to check.*/ |
| 86 | __asm__ __volatile__( |
| 87 | "pushfl\n\t" |
| 88 | "pushfl\n\t" |
| 89 | "popl %[a]\n\t" |
| 90 | "movl %[a],%[b]\n\t" |
| 91 | "xorl $0x200000,%[a]\n\t" |
| 92 | "pushl %[a]\n\t" |
| 93 | "popfl\n\t" |
| 94 | "pushfl\n\t" |
| 95 | "popl %[a]\n\t" |
| 96 | "popfl\n\t" |
| 97 | :[a]"=r" (eax),[b]"=r" (ebx) |
| 98 | : |
| 99 | :"cc" |
| 100 | ); |
| 101 | /*No cpuid.*/ |
| 102 | if(eax==ebx)return 0; |
| 103 | # endif |
| 104 | cpuid(0,eax,ebx,ecx,edx); |
| 105 | /* l e t n I e n i u n e G*/ |
| 106 | if(ecx==0x6C65746E&&edx==0x49656E69&&ebx==0x756E6547|| |
| 107 | /* 6 8 x M T e n i u n e G*/ |
| 108 | ecx==0x3638784D&&edx==0x54656E69&&ebx==0x756E6547){ |
| 109 | int family; |
| 110 | int model; |
| 111 | /*Intel, Transmeta (tested with Crusoe TM5800):*/ |
| 112 | cpuid(1,eax,ebx,ecx,edx); |
| 113 | flags=oc_parse_intel_flags(edx,ecx); |
| 114 | family=(eax>>8)&0xF; |
| 115 | model=(eax>>4)&0xF; |
| 116 | /*The SSE unit on the Pentium M and Core Duo is much slower than the MMX |
| 117 | unit, so don't use it.*/ |
| 118 | if(family==6&&(model==9||model==13||model==14)){ |
| 119 | flags&=~(OC_CPU_X86_SSE2|OC_CPU_X86_PNI); |
| 120 | } |
| 121 | } |
| 122 | /* D M A c i t n e h t u A*/ |
| 123 | else if(ecx==0x444D4163&&edx==0x69746E65&&ebx==0x68747541|| |
| 124 | /* C S N y b e d o e G*/ |
| 125 | ecx==0x43534e20&&edx==0x79622065&&ebx==0x646f6547){ |
| 126 | /*AMD, Geode:*/ |
| 127 | cpuid(0x80000000,eax,ebx,ecx,edx); |
| 128 | if(eax<0x80000001)flags=0; |
| 129 | else{ |
| 130 | cpuid(0x80000001,eax,ebx,ecx,edx); |
| 131 | flags=oc_parse_amd_flags(edx,ecx); |
| 132 | } |
| 133 | /*Also check for SSE.*/ |
| 134 | cpuid(1,eax,ebx,ecx,edx); |
| 135 | flags|=oc_parse_intel_flags(edx,ecx); |
| 136 | } |
| 137 | /*Technically some VIA chips can be configured in the BIOS to return any |
| 138 | string here the user wants. |
| 139 | There is a special detection method that can be used to identify such |
| 140 | processors, but in my opinion, if the user really wants to change it, they |
| 141 | deserve what they get.*/ |
| 142 | /* s l u a H r u a t n e C*/ |
| 143 | else if(ecx==0x736C7561&&edx==0x48727561&&ebx==0x746E6543){ |
| 144 | /*VIA:*/ |
| 145 | /*I only have documentation for the C7 (Esther) and Isaiah (forthcoming) |
| 146 | chips (thanks to the engineers from Centaur Technology who provided it). |
| 147 | These chips support Intel-like cpuid info. |
| 148 | The C3-2 (Nehemiah) cores appear to, as well.*/ |
| 149 | cpuid(1,eax,ebx,ecx,edx); |
| 150 | flags=oc_parse_intel_flags(edx,ecx); |
| 151 | if(eax>=0x80000001){ |
| 152 | /*The (non-Nehemiah) C3 processors support AMD-like cpuid info. |
| 153 | We need to check this even if the Intel test succeeds to pick up 3DNow! |
| 154 | support on these processors. |
| 155 | Unlike actual AMD processors, we cannot _rely_ on this info, since |
| 156 | some cores (e.g., the 693 stepping of the Nehemiah) claim to support |
| 157 | this function, yet return edx=0, despite the Intel test indicating |
| 158 | MMX support. |
| 159 | Therefore the features detected here are strictly added to those |
| 160 | detected by the Intel test.*/ |
| 161 | /*TODO: How about earlier chips?*/ |
| 162 | cpuid(0x80000001,eax,ebx,ecx,edx); |
| 163 | /*Note: As of the C7, this function returns Intel-style extended feature |
| 164 | flags, not AMD-style. |
| 165 | Currently, this only defines bits 11, 20, and 29 (0x20100800), which |
| 166 | do not conflict with any of the AMD flags we inspect. |
| 167 | For the remaining bits, Intel tells us, "Do not count on their value", |
| 168 | but VIA assures us that they will all be zero (at least on the C7 and |
| 169 | Isaiah chips). |
| 170 | In the (unlikely) event a future processor uses bits 18, 19, 30, or 31 |
| 171 | (0xC0C00000) for something else, we will have to add code to detect |
| 172 | the model to decide when it is appropriate to inspect them.*/ |
| 173 | flags|=oc_parse_amd_flags(edx,ecx); |
| 174 | } |
| 175 | } |
| 176 | else{ |
| 177 | /*Implement me.*/ |
| 178 | flags=0; |
| 179 | } |
| 180 | return flags; |
| 181 | } |
| 182 | #endif |
| 183 | |