H2Lib  3.0
simd.h
1 #ifndef SIMD_H_
2 #define SIMD_H_
3 
4 #ifdef USE_SIMD
5 
6 #include <immintrin.h>
7 
8 #ifdef __AVX__
9 #include "simd_avx.h"
10 #else
11 #ifdef __SSE2__
12 #include "simd_sse2.h"
13 #else
14 #ifdef __SSE__
15 #include "simd_sse.h"
16 #endif
17 #endif
18 #endif
19 
20 /****************************************************
21  * Define vector sizes
22  ****************************************************/
23 
24 #ifdef USE_FLOAT
25 #define VREAL VFLOAT
26 #else
27 #define VREAL VDOUBLE
28 #endif
29 
30 #ifdef USE_COMPLEX
31 #define VFIELD (VREAL/2)
32 #else
33 #define VFIELD VREAL
34 #endif
35 
36 /****************************************************
37  * Define vector types
38  ****************************************************/
39 
40 #ifdef USE_FLOAT
41 typedef vecf vreal;
42 #else
43 typedef vecd vreal;
44 #endif
45 
46 typedef vreal vfield;
47 
48 /****************************************************
49  * Define arithmetic operations for real data-type
50  ****************************************************/
51 
52 #ifdef USE_FLOAT
53 #define vadd vadd_ps
54 #define vsub vsub_ps
55 #define vmul vmul_ps
56 #define vdiv vdiv_ps
57 #define vsqrt vsqrt_ps
58 #define vrsqrt vrsqrt_ps
59 #define vfmadd vfmadd_ps
60 #define vfmsub vfmsub_ps
61 #define vfnmadd vfnmadd_ps
62 #define vfnmsub vfnmsub_ps
63 #else
64 #define vadd vadd_pd
65 #define vsub vsub_pd
66 #define vmul vmul_pd
67 #define vdiv vdiv_pd
68 #define vsqrt vsqrt_pd
69 #define vrsqrt vrsqrt_pd
70 #define vfmadd vfmadd_pd
71 #define vfmsub vfmsub_pd
72 #define vfnmadd vfnmadd_pd
73 #define vfnmsub vfnmsub_pd
74 #endif
75 
76 /****************************************************
77  * Define advanced arithmetic operations for real data-type
78  ****************************************************/
79 
80 #ifdef USE_FLOAT
81 #define vsin vsin_ps
82 #define vcos vcos_ps
83 #define vsincos vsincos_ps
84 #define vexp vexp_ps
85 #else
86 #define vsin vsin_pd
87 #define vcos vcos_pd
88 #define vsincos vsincos_pd
89 #define vexp vexp_pd
90 #endif
91 
92 /****************************************************
93  * Define load/store operations for real data-type
94  ****************************************************/
95 
96 #ifdef USE_FLOAT
97 #define vload vload_ps
98 #define vload1 vload1_ps
99 #define vloadu vloadu_ps
100 #define vset1 vset1_ps
101 #define vsetzero vsetzero_ps
102 #define vstore vstore_ps
103 #define vstoreu vstoreu_ps
104 #else
105 #define vload vload_pd
106 #define vload1 vload1_pd
107 #define vloadu vloadu_pd
108 #define vset1 vset1_pd
109 #define vsetzero vsetzero_pd
110 #define vstore vstore_pd
111 #define vstoreu vstoreu_pd
112 #endif
113 
114 /****************************************************
115  * Define compare operations for real data-type
116  ****************************************************/
117 
118 #ifdef USE_FLOAT
119 #define vcmpeq(a,b) vcmpeq_ps(a,b)
120 #define vcmpneq(a,b) vcmpneq_ps(a,b)
121 #define vcmpge(a,b) vcmpge_ps(a,b)
122 #define vcmpgt(a,b) vcmpgt_ps(a,b)
123 #define vcmpnge(a,b) vcmpnge_ps(a,b)
124 #define vcmpngt(a,b) vcmpngt_ps(a,b)
125 #define vcmple(a,b) vcmple_ps(a,b)
126 #define vcmplt(a,b) vcmplt_ps(a,b)
127 #define vcmpnle(a,b) vcmpnle_ps(a,b)
128 #define vcmpnlt(a,b) vcmpnlt_ps(a,b)
129 #else
130 #define vcmpeq(a,b) vcmpeq_pd(a,b)
131 #define vcmpneq(a,b) vcmpneq_pd(a,b)
132 #define vcmpge(a,b) vcmpge_pd(a,b)
133 #define vcmpgt(a,b) vcmpgt_pd(a,b)
134 #define vcmpnge(a,b) vcmpnge_pd(a,b)
135 #define vcmpngt(a,b) vcmpngt_pd(a,b)
136 #define vcmple(a,b) vcmple_pd(a,b)
137 #define vcmplt(a,b) vcmplt_pd(a,b)
138 #define vcmpnle(a,b) vcmpnle_pd(a,b)
139 #define vcmpnlt(a,b) vcmpnlt_pd(a,b)
140 #endif
141 
142 /****************************************************
143  * Definitions of bit operations for real data-type
144  ****************************************************/
145 
146 #ifdef USE_FLOAT
147 #define vand vand_ps
148 #define vandnot vandnot_ps
149 #define vor vor_ps
150 #define vxor vxor_ps
151 #else
152 #define vand vand_pd
153 #define vandnot vandnot_pd
154 #define vor vor_pd
155 #define vxor vxor_pd
156 #endif
157 
158 /****************************************************
159  * Define reductions of vector registers for real data-type
160  ****************************************************/
161 
162 #ifdef USE_FLOAT
163 #define vreduce vreduce_ps
164 #else
165 #define vreduce vreduce_pd
166 #endif
167 
168 /****************************************************
169  * Definition of little helper functions for real-type
170  ****************************************************/
171 
172 #ifdef USE_FLOAT
173 #define vdot3 vdot3_ps
174 #else
175 #define vdot3 vdot3_pd
176 #endif
177 
178 
179 #endif
180 
181 #endif /* SIMD_H_ */