High Performance Plasticity  0.5.0
simdUtils.h
Go to the documentation of this file.
1 
6 #ifndef HPP_SIMDUTILS_H
7 #define HPP_SIMDUTILS_H
8 
9 #include "immintrin.h"
10 #include "emmintrin.h"
11 #include "smmintrin.h"
12 #include <hpp/config.h>
13 
14 // Intrinsics defined by the standard, but not implemented in GCC as of 4.9.3
15 #define _mm256_set_m128i(hi, lo) _mm256_insertf128_si256(_mm256_castsi128_si256(lo), (hi), 1)
16 #define _mm256_set_m128(hi, lo) _mm256_insertf128_ps(_mm256_castps128_ps256(lo), (hi), 1)
17 #define _mm256_loadu2_m128d(hi, lo) _mm256_insertf128_pd(_mm256_castpd128_pd256(_mm_loadu_pd(lo)), _mm_loadu_pd(hi), 1);
18 
19 // Convenience defines in this library only
20 #ifdef HPP_USE_SSE
21 inline __m128 _mm128_loadu2_m64(float const *hiaddr, float const *loaddr) {
22  #pragma GCC diagnostic push
23  #pragma GCC diagnostic ignored "-Wuninitialized"
24  __m128 val;
25  val = _mm_loadl_pi(val, (__m64*)loaddr);
26  val = _mm_loadh_pi(val, (__m64*)hiaddr);
27  return val;
28  #pragma GCC diagnostic pop
29 }
30 #endif
31 
32 #ifdef HPP_USE_AVX
33 inline __m256 _mm256_loadu4_m64(float const *p3, float const *p2, float const *p1, float const *p0){
34  return _mm256_set_m128(_mm128_loadu2_m64(p3, p2), _mm128_loadu2_m64(p1, p0));
35 };
36 #endif
37 
38 #endif /* HPP_SIMDUTILS_H */
#define _mm256_set_m128(hi, lo)
Definition: simdUtils.h:16