61 lines
2.2 KiB
Diff
61 lines
2.2 KiB
Diff
|
diff --git a/build/cmake/cpu.cmake b/build/cmake/cpu.cmake
|
||
|
index acebe20..8c67d89 100644
|
||
|
--- a/build/cmake/cpu.cmake
|
||
|
+++ b/build/cmake/cpu.cmake
|
||
|
@@ -120,6 +120,19 @@ elseif("${AOM_TARGET_CPU}" MATCHES "^x86")
|
||
|
set(RTCD_ARCH_X86_64 "yes")
|
||
|
endif()
|
||
|
|
||
|
+ # AVX2 requires __m256i definition starting v3.9.0
|
||
|
+
|
||
|
+ if(ENABLE_AVX2)
|
||
|
+ aom_check_source_compiles("x86_64_avx2_m256i_available" "
|
||
|
+#include <emmintrin.h>
|
||
|
+#ifndef __m256i
|
||
|
+#error 1
|
||
|
+#endif" HAVE_AVX2_M256I)
|
||
|
+ if(HAVE_AVX2_M256I EQUAL 0)
|
||
|
+ set(ENABLE_AVX2 0)
|
||
|
+ endif()
|
||
|
+ endif()
|
||
|
+
|
||
|
set(X86_FLAVORS "MMX;SSE;SSE2;SSE3;SSSE3;SSE4_1;SSE4_2;AVX;AVX2")
|
||
|
foreach(flavor ${X86_FLAVORS})
|
||
|
if(ENABLE_${flavor} AND NOT disable_remaining_flavors)
|
||
|
diff --git a/aom_dsp/x86/synonyms.h b/aom_dsp/x86/synonyms.h
|
||
|
index 0d51cdf..6744ec5 100644
|
||
|
--- a/aom_dsp/x86/synonyms.h
|
||
|
+++ b/aom_dsp/x86/synonyms.h
|
||
|
@@ -46,13 +46,6 @@ static INLINE __m128i xx_loadu_128(const void *a) {
|
||
|
return _mm_loadu_si128((const __m128i *)a);
|
||
|
}
|
||
|
|
||
|
-// Load 64 bits from each of hi and low, and pack into an SSE register
|
||
|
-// Since directly loading as `int64_t`s and using _mm_set_epi64 may violate
|
||
|
-// the strict aliasing rule, this takes a different approach
|
||
|
-static INLINE __m128i xx_loadu_2x64(const void *hi, const void *lo) {
|
||
|
- return _mm_unpacklo_epi64(_mm_loadu_si64(lo), _mm_loadu_si64(hi));
|
||
|
-}
|
||
|
-
|
||
|
static INLINE void xx_storel_32(void *const a, const __m128i v) {
|
||
|
const int val = _mm_cvtsi128_si32(v);
|
||
|
memcpy(a, &val, sizeof(val));
|
||
|
diff --git a/aom_dsp/x86/synonyms_avx2.h b/aom_dsp/x86/synonyms_avx2.h
|
||
|
index d4e8f69..45be17e 100644
|
||
|
--- a/aom_dsp/x86/synonyms_avx2.h
|
||
|
+++ b/aom_dsp/x86/synonyms_avx2.h
|
||
|
@@ -25,6 +25,13 @@
|
||
|
* Intrinsics prefixed with yy_ operate on or return 256bit YMM registers.
|
||
|
*/
|
||
|
|
||
|
+// Load 64 bits from each of hi and low, and pack into an SSE register
|
||
|
+// Since directly loading as `int64_t`s and using _mm_set_epi64 may violate
|
||
|
+// the strict aliasing rule, this takes a different approach
|
||
|
+static INLINE __m128i xx_loadu_2x64(const void *hi, const void *lo) {
|
||
|
+ return _mm_unpacklo_epi64(_mm_loadu_si64(lo), _mm_loadu_si64(hi));
|
||
|
+}
|
||
|
+
|
||
|
// Loads and stores to do away with the tedium of casting the address
|
||
|
// to the right type.
|
||
|
static INLINE __m256i yy_load_256(const void *a) {
|