CNORXZ
Container with Native Operation Routines and Expressions
Loading...
Searching...
No Matches
src
include
operation
extensions
avx.cc.h
Go to the documentation of this file.
1
// -*- C++ -*-
12
#ifndef __cxz_avx_cc_h__
13
#define __cxz_avx_cc_h__
14
15
#include "
avx.h
"
16
17
namespace
CNORXZ
18
{
19
/*=====================+
20
| PlusCC / PlusCX |
21
+=====================*/
22
23
inline
decltype
(
auto
)
24
PlusCC<Double,Double,AVX::ND>::eval
(
const
Consecutive<Double,AVX::ND>
& a,
25
const
Consecutive<Double,AVX::ND>
&
b
)
26
{
27
Consecutive<Double,AVX::ND>
o;
28
__m256d
av
=
_mm256_load_pd
(a.
mD
);
29
__m256d
bv
=
_mm256_load_pd
(
b
.mD);
30
__m256d
ov
=
_mm256_add_pd
(
av
,
bv
);
31
_mm256_store_pd
(o.
mD
,
ov
);
32
return
o;
33
}
34
35
inline
decltype
(
auto
)
36
PlusCC<Double,Double,AVX::ND>::aeval
(
Consecutive<Double,AVX::ND>
& a,
37
const
Consecutive<Double,AVX::ND>
&
b
)
38
{
39
__m256d
av
=
_mm256_load_pd
(a.
mD
);
40
__m256d
bv
=
_mm256_load_pd
(
b
.mD);
41
__m256d
ov
=
_mm256_add_pd
(
av
,
bv
);
42
_mm256_store_pd
(a.
mD
,
ov
);
43
return
a;
44
}
45
46
template
<
typename
X>
47
inline
decltype
(
auto
)
48
PlusCX<Double,X,AVX::ND>::eval
(
const
Consecutive<Double,AVX::ND>
& a,
49
const
X
&
b
)
50
{
51
Consecutive<Double,AVX::ND>
o;
52
__m256d
av
=
_mm256_load_pd
(a.
mD
);
53
__m256d
bv
=
_mm256_set1_pd
(
static_cast<
Double
>
(
b
) );
54
__m256d
ov
=
_mm256_add_pd
(
av
,
bv
);
55
_mm256_store_pd
(o.
mD
,
ov
);
56
return
o;
57
}
58
59
template
<
typename
X>
60
inline
decltype
(
auto
)
61
PlusCX<Double,X,AVX::ND>::aeval
(
Consecutive<Double,AVX::ND>
& a,
const
X
&
b
)
62
{
63
__m256d
av
=
_mm256_load_pd
(a.
mD
);
64
__m256d
bv
=
_mm256_set1_pd
(
static_cast<
Double
>
(
b
) );
65
__m256d
ov
=
_mm256_add_pd
(
av
,
bv
);
66
_mm256_store_pd
(a.
mD
,
ov
);
67
return
a;
68
}
69
70
template
<
typename
X>
71
inline
decltype
(
auto
)
72
PlusCX<Double,X,AVX::ND>::eval
(
const
X
& a,
73
const
Consecutive<Double,AVX::ND>
&
b
)
74
{
75
Consecutive<Double,AVX::ND>
o;
76
__m256d
av
=
_mm256_set1_pd
(
static_cast<
Double
>
(a) );
77
__m256d
bv
=
_mm256_load_pd
(
b
.mD);
78
__m256d
ov
=
_mm256_add_pd
(
av
,
bv
);
79
_mm256_store_pd
(o.
mD
,
ov
);
80
return
o;
81
}
82
83
/*=======================+
84
| MinusCC / MinusCX |
85
+=======================*/
86
87
inline
decltype
(
auto
)
88
MinusCC<Double,Double,AVX::ND>::eval
(
const
Consecutive<Double,AVX::ND>
& a,
89
const
Consecutive<Double,AVX::ND>
&
b
)
90
{
91
Consecutive<Double,AVX::ND>
o;
92
__m256d
av
=
_mm256_load_pd
(a.
mD
);
93
__m256d
bv
=
_mm256_load_pd
(
b
.mD);
94
__m256d
ov
=
_mm256_sub_pd
(
av
,
bv
);
95
_mm256_store_pd
(o.
mD
,
ov
);
96
return
o;
97
}
98
99
inline
decltype
(
auto
)
100
MinusCC<Double,Double,AVX::ND>::aeval
(
Consecutive<Double,AVX::ND>
& a,
101
const
Consecutive<Double,AVX::ND>
&
b
)
102
{
103
__m256d
av
=
_mm256_load_pd
(a.
mD
);
104
__m256d
bv
=
_mm256_load_pd
(
b
.mD);
105
__m256d
ov
=
_mm256_sub_pd
(
av
,
bv
);
106
_mm256_store_pd
(a.
mD
,
ov
);
107
return
a;
108
}
109
110
template
<
typename
X>
111
inline
decltype
(
auto
)
112
MinusCX<Double,X,AVX::ND>::eval
(
const
Consecutive<Double,AVX::ND>
& a,
113
const
X
&
b
)
114
{
115
Consecutive<Double,AVX::ND>
o;
116
__m256d
av
=
_mm256_load_pd
(a.
mD
);
117
__m256d
bv
=
_mm256_set1_pd
(
static_cast<
Double
>
(
b
) );
118
__m256d
ov
=
_mm256_sub_pd
(
av
,
bv
);
119
_mm256_store_pd
(o.
mD
,
ov
);
120
return
o;
121
}
122
123
template
<
typename
X>
124
inline
decltype
(
auto
)
125
MinusCX<Double,X,AVX::ND>::aeval
(
Consecutive<Double,AVX::ND>
& a,
const
X
&
b
)
126
{
127
__m256d
av
=
_mm256_load_pd
(a.
mD
);
128
__m256d
bv
=
_mm256_set1_pd
(
static_cast<
Double
>
(
b
) );
129
__m256d
ov
=
_mm256_sub_pd
(
av
,
bv
);
130
_mm256_store_pd
(a.
mD
,
ov
);
131
return
a;
132
}
133
134
template
<
typename
X>
135
inline
decltype
(
auto
)
136
MinusCX<Double,X,AVX::ND>::eval
(
const
X
& a,
137
const
Consecutive<Double,AVX::ND>
&
b
)
138
{
139
Consecutive<Double,AVX::ND>
o;
140
__m256d
av
=
_mm256_set1_pd
(
static_cast<
Double
>
(a) );
141
__m256d
bv
=
_mm256_load_pd
(
b
.mD);
142
__m256d
ov
=
_mm256_sub_pd
(
av
,
bv
);
143
_mm256_store_pd
(o.
mD
,
ov
);
144
return
o;
145
}
146
147
/*=================================+
148
| MultipliesCC / MultipliesCX |
149
+=================================*/
150
151
inline
decltype
(
auto
)
152
MultipliesCC<Double,Double,AVX::ND>::eval
(
const
Consecutive<Double,AVX::ND>
& a,
153
const
Consecutive<Double,AVX::ND>
&
b
)
154
{
155
Consecutive<Double,AVX::ND>
o;
156
__m256d
av
=
_mm256_load_pd
(a.
mD
);
157
__m256d
bv
=
_mm256_load_pd
(
b
.mD);
158
__m256d
ov
=
_mm256_mul_pd
(
av
,
bv
);
159
_mm256_store_pd
(o.
mD
,
ov
);
160
return
o;
161
}
162
163
inline
decltype
(
auto
)
164
MultipliesCC<Double,Double,AVX::ND>::aeval
(
Consecutive<Double,AVX::ND>
& a,
165
const
Consecutive<Double,AVX::ND>
&
b
)
166
{
167
__m256d
av
=
_mm256_load_pd
(a.
mD
);
168
__m256d
bv
=
_mm256_load_pd
(
b
.mD);
169
__m256d
ov
=
_mm256_mul_pd
(
av
,
bv
);
170
_mm256_store_pd
(a.
mD
,
ov
);
171
return
a;
172
}
173
174
template
<
typename
X>
175
inline
decltype
(
auto
)
176
MultipliesCX<Double,X,AVX::ND>::eval
(
const
Consecutive<Double,AVX::ND>
& a,
177
const
X
&
b
)
178
{
179
Consecutive<Double,AVX::ND>
o;
180
__m256d
av
=
_mm256_load_pd
(a.
mD
);
181
__m256d
bv
=
_mm256_set1_pd
(
static_cast<
Double
>
(
b
) );
182
__m256d
ov
=
_mm256_mul_pd
(
av
,
bv
);
183
_mm256_store_pd
(o.
mD
,
ov
);
184
return
o;
185
}
186
187
template
<
typename
X>
188
inline
decltype
(
auto
)
189
MultipliesCX<Double,X,AVX::ND>::aeval
(
Consecutive<Double,AVX::ND>
& a,
190
const
X
&
b
)
191
{
192
__m256d
av
=
_mm256_load_pd
(a.
mD
);
193
__m256d
bv
=
_mm256_set1_pd
(
static_cast<
Double
>
(
b
) );
194
__m256d
ov
=
_mm256_mul_pd
(
av
,
bv
);
195
_mm256_store_pd
(a.
mD
,
ov
);
196
return
a;
197
}
198
199
template
<
typename
X>
200
inline
decltype
(
auto
)
201
MultipliesCX<Double,X,AVX::ND>::eval
(
const
X
& a,
202
const
Consecutive<Double,AVX::ND>
&
b
)
203
{
204
Consecutive<Double,AVX::ND>
o;
205
__m256d
av
=
_mm256_set1_pd
(
static_cast<
Double
>
(a) );
206
__m256d
bv
=
_mm256_load_pd
(
b
.mD);
207
__m256d
ov
=
_mm256_mul_pd
(
av
,
bv
);
208
_mm256_store_pd
(o.
mD
,
ov
);
209
return
o;
210
}
211
212
213
/*===========================+
214
| DividesCC / DividesCX |
215
+===========================*/
216
217
inline
decltype
(
auto
)
218
DividesCC<Double,Double,AVX::ND>::eval
(
const
Consecutive<Double,AVX::ND>
& a,
219
const
Consecutive<Double,AVX::ND>
&
b
)
220
{
221
Consecutive<Double,AVX::ND>
o;
222
__m256d
av
=
_mm256_load_pd
(a.
mD
);
223
__m256d
bv
=
_mm256_load_pd
(
b
.mD);
224
__m256d
ov
=
_mm256_div_pd
(
av
,
bv
);
225
_mm256_store_pd
(o.
mD
,
ov
);
226
return
o;
227
}
228
229
inline
decltype
(
auto
)
230
DividesCC<Double,Double,AVX::ND>::aeval
(
Consecutive<Double,AVX::ND>
& a,
231
const
Consecutive<Double,AVX::ND>
&
b
)
232
{
233
__m256d
av
=
_mm256_load_pd
(a.
mD
);
234
__m256d
bv
=
_mm256_load_pd
(
b
.mD);
235
__m256d
ov
=
_mm256_div_pd
(
av
,
bv
);
236
_mm256_store_pd
(a.
mD
,
ov
);
237
return
a;
238
}
239
240
template
<
typename
X>
241
inline
decltype
(
auto
)
242
DividesCX<Double,X,AVX::ND>::eval
(
const
Consecutive<Double,AVX::ND>
& a,
243
const
X
&
b
)
244
{
245
Consecutive<Double,AVX::ND>
o;
246
__m256d
av
=
_mm256_load_pd
(a.
mD
);
247
__m256d
bv
=
_mm256_set1_pd
(
static_cast<
Double
>
(
b
) );
248
__m256d
ov
=
_mm256_div_pd
(
av
,
bv
);
249
_mm256_store_pd
(o.
mD
,
ov
);
250
return
o;
251
}
252
253
template
<
typename
X>
254
inline
decltype
(
auto
)
255
DividesCX<Double,X,AVX::ND>::aeval
(
Consecutive<Double,AVX::ND>
& a,
const
X
&
b
)
256
{
257
__m256d
av
=
_mm256_load_pd
(a.
mD
);
258
__m256d
bv
=
_mm256_set1_pd
(
static_cast<
Double
>
(
b
) );
259
__m256d
ov
=
_mm256_div_pd
(
av
,
bv
);
260
_mm256_store_pd
(a.
mD
,
ov
);
261
return
a;
262
}
263
264
template
<
typename
X>
265
inline
decltype
(
auto
)
266
DividesCX<Double,X,AVX::ND>::eval
(
const
X
& a,
267
const
Consecutive<Double,AVX::ND>
&
b
)
268
{
269
Consecutive<Double,AVX::ND>
o;
270
__m256d
av
=
_mm256_set1_pd
(
static_cast<
Double
>
(a) );
271
__m256d
bv
=
_mm256_load_pd
(
b
.mD);
272
__m256d
ov
=
_mm256_div_pd
(
av
,
bv
);
273
_mm256_store_pd
(o.
mD
,
ov
);
274
return
o;
275
}
276
277
}
278
279
#endif
avx.h
Register type specialization for AVX.
CNORXZ
Definition
aindex.cc.h:18
CNORXZ::Double
double Double
Definition
types.h:39
CNORXZ::rangeCast
Sptr< Range > rangeCast(const RangePtr r)
Definition
range_base.cc.h:53
CNORXZ::Consecutive
Definition
reg.h:24
CNORXZ::Consecutive::mD
T mD[N]
Definition
reg.h:25
CNORXZ::DividesCC
Definition
reg.h:236
CNORXZ::DividesCX
Definition
reg.h:246
CNORXZ::MinusCC
Definition
reg.h:142
CNORXZ::MinusCX
Definition
reg.h:152
CNORXZ::MultipliesCC
Definition
reg.h:189
CNORXZ::MultipliesCX
Definition
reg.h:199
CNORXZ::PlusCC
Definition
reg.h:95
CNORXZ::PlusCX
Definition
reg.h:105
Generated by
1.10.0