Grok 10.0.5
set_macros-inl.h
Go to the documentation of this file.
1// Copyright 2020 Google LLC
2// SPDX-License-Identifier: Apache-2.0
3//
4// Licensed under the Apache License, Version 2.0 (the "License");
5// you may not use this file except in compliance with the License.
6// You may obtain a copy of the License at
7//
8// http://www.apache.org/licenses/LICENSE-2.0
9//
10// Unless required by applicable law or agreed to in writing, software
11// distributed under the License is distributed on an "AS IS" BASIS,
12// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13// See the License for the specific language governing permissions and
14// limitations under the License.
15
16// Sets macros based on HWY_TARGET.
17
18// This include guard is toggled by foreach_target, so avoid the usual _H_
19// suffix to prevent copybara from renaming it.
20#if defined(HWY_SET_MACROS_PER_TARGET) == defined(HWY_TARGET_TOGGLE)
21#ifdef HWY_SET_MACROS_PER_TARGET
22#undef HWY_SET_MACROS_PER_TARGET
23#else
24#define HWY_SET_MACROS_PER_TARGET
25#endif
26
27#endif // HWY_SET_MACROS_PER_TARGET
28
29#include "hwy/detect_targets.h"
30
31#undef HWY_NAMESPACE
32#undef HWY_ALIGN
33#undef HWY_MAX_BYTES
34#undef HWY_LANES
35
36#undef HWY_HAVE_SCALABLE
37#undef HWY_HAVE_INTEGER64
38#undef HWY_HAVE_FLOAT16
39#undef HWY_HAVE_FLOAT64
40#undef HWY_MEM_OPS_MIGHT_FAULT
41#undef HWY_NATIVE_FMA
42#undef HWY_CAP_GE256
43#undef HWY_CAP_GE512
44
45#undef HWY_TARGET_STR
46
47#if defined(HWY_DISABLE_PCLMUL_AES)
48#define HWY_TARGET_STR_PCLMUL_AES ""
49#else
50#define HWY_TARGET_STR_PCLMUL_AES ",pclmul,aes"
51#endif
52
53#if defined(HWY_DISABLE_BMI2_FMA)
54#define HWY_TARGET_STR_BMI2_FMA ""
55#else
56#define HWY_TARGET_STR_BMI2_FMA ",bmi,bmi2,fma"
57#endif
58
59#if defined(HWY_DISABLE_F16C)
60#define HWY_TARGET_STR_F16C ""
61#else
62#define HWY_TARGET_STR_F16C ",f16c"
63#endif
64
65#define HWY_TARGET_STR_SSSE3 "sse2,ssse3"
66
67#define HWY_TARGET_STR_SSE4 \
68 HWY_TARGET_STR_SSSE3 ",sse4.1,sse4.2" HWY_TARGET_STR_PCLMUL_AES
69// Include previous targets, which are the half-vectors of the next target.
70#define HWY_TARGET_STR_AVX2 \
71 HWY_TARGET_STR_SSE4 ",avx,avx2" HWY_TARGET_STR_BMI2_FMA HWY_TARGET_STR_F16C
72#define HWY_TARGET_STR_AVX3 \
73 HWY_TARGET_STR_AVX2 ",avx512f,avx512vl,avx512dq,avx512bw"
74
75// Before include guard so we redefine HWY_TARGET_STR on each include,
76// governed by the current HWY_TARGET.
77
78//-----------------------------------------------------------------------------
79// SSSE3
80#if HWY_TARGET == HWY_SSSE3
81
82#define HWY_NAMESPACE N_SSSE3
83#define HWY_ALIGN alignas(16)
84#define HWY_MAX_BYTES 16
85#define HWY_LANES(T) (16 / sizeof(T))
86
87#define HWY_HAVE_SCALABLE 0
88#define HWY_HAVE_INTEGER64 1
89#define HWY_HAVE_FLOAT16 1
90#define HWY_HAVE_FLOAT64 1
91#define HWY_MEM_OPS_MIGHT_FAULT 1
92#define HWY_NATIVE_FMA 0
93#define HWY_CAP_GE256 0
94#define HWY_CAP_GE512 0
95
96#define HWY_TARGET_STR HWY_TARGET_STR_SSSE3
97
98//-----------------------------------------------------------------------------
99// SSE4
100#elif HWY_TARGET == HWY_SSE4
101
102#define HWY_NAMESPACE N_SSE4
103#define HWY_ALIGN alignas(16)
104#define HWY_MAX_BYTES 16
105#define HWY_LANES(T) (16 / sizeof(T))
106
107#define HWY_HAVE_SCALABLE 0
108#define HWY_HAVE_INTEGER64 1
109#define HWY_HAVE_FLOAT16 1
110#define HWY_HAVE_FLOAT64 1
111#define HWY_MEM_OPS_MIGHT_FAULT 1
112#define HWY_NATIVE_FMA 0
113#define HWY_CAP_GE256 0
114#define HWY_CAP_GE512 0
115
116#define HWY_TARGET_STR HWY_TARGET_STR_SSE4
117
118//-----------------------------------------------------------------------------
119// AVX2
120#elif HWY_TARGET == HWY_AVX2
121
122#define HWY_NAMESPACE N_AVX2
123#define HWY_ALIGN alignas(32)
124#define HWY_MAX_BYTES 32
125#define HWY_LANES(T) (32 / sizeof(T))
126
127#define HWY_HAVE_SCALABLE 0
128#define HWY_HAVE_INTEGER64 1
129#define HWY_HAVE_FLOAT16 1
130#define HWY_HAVE_FLOAT64 1
131#define HWY_MEM_OPS_MIGHT_FAULT 1
132
133#ifdef HWY_DISABLE_BMI2_FMA
134#define HWY_NATIVE_FMA 0
135#else
136#define HWY_NATIVE_FMA 1
137#endif
138
139#define HWY_CAP_GE256 1
140#define HWY_CAP_GE512 0
141
142#define HWY_TARGET_STR HWY_TARGET_STR_AVX2
143
144//-----------------------------------------------------------------------------
145// AVX3[_DL]
146#elif HWY_TARGET == HWY_AVX3 || HWY_TARGET == HWY_AVX3_DL
147
148#define HWY_ALIGN alignas(64)
149#define HWY_MAX_BYTES 64
150#define HWY_LANES(T) (64 / sizeof(T))
151
152#define HWY_HAVE_SCALABLE 0
153#define HWY_HAVE_INTEGER64 1
154#define HWY_HAVE_FLOAT16 1
155#define HWY_HAVE_FLOAT64 1
156#define HWY_MEM_OPS_MIGHT_FAULT 0
157#define HWY_NATIVE_FMA 1
158#define HWY_CAP_GE256 1
159#define HWY_CAP_GE512 1
160
161#if HWY_TARGET == HWY_AVX3
162
163#define HWY_NAMESPACE N_AVX3
164#define HWY_TARGET_STR HWY_TARGET_STR_AVX3
165
166#elif HWY_TARGET == HWY_AVX3_DL
167
168#define HWY_NAMESPACE N_AVX3_DL
169#define HWY_TARGET_STR \
170 HWY_TARGET_STR_AVX3 \
171 ",vpclmulqdq,avx512vbmi,avx512vbmi2,vaes,avxvnni,avx512bitalg," \
172 "avx512vpopcntdq"
173
174#else
175#error "Logic error"
176#endif // HWY_TARGET == HWY_AVX3_DL
177
178//-----------------------------------------------------------------------------
179// PPC8
180#elif HWY_TARGET == HWY_PPC8
181
182#define HWY_ALIGN alignas(16)
183#define HWY_MAX_BYTES 16
184#define HWY_LANES(T) (16 / sizeof(T))
185
186#define HWY_HAVE_SCALABLE 0
187#define HWY_HAVE_INTEGER64 1
188#define HWY_HAVE_FLOAT16 0
189#define HWY_HAVE_FLOAT64 1
190#define HWY_MEM_OPS_MIGHT_FAULT 1
191#define HWY_NATIVE_FMA 1
192#define HWY_CAP_GE256 0
193#define HWY_CAP_GE512 0
194
195#define HWY_NAMESPACE N_PPC8
196
197#define HWY_TARGET_STR "altivec,vsx"
198
199//-----------------------------------------------------------------------------
200// NEON
201#elif HWY_TARGET == HWY_NEON
202
203#define HWY_ALIGN alignas(16)
204#define HWY_MAX_BYTES 16
205#define HWY_LANES(T) (16 / sizeof(T))
206
207#define HWY_HAVE_SCALABLE 0
208#define HWY_HAVE_INTEGER64 1
209#define HWY_HAVE_FLOAT16 1
210
211#if HWY_ARCH_ARM_A64
212#define HWY_HAVE_FLOAT64 1
213#else
214#define HWY_HAVE_FLOAT64 0
215#endif
216
217#define HWY_MEM_OPS_MIGHT_FAULT 1
218
219#if defined(__ARM_VFPV4__) || HWY_ARCH_ARM_A64
220#define HWY_NATIVE_FMA 1
221#else
222#define HWY_NATIVE_FMA 0
223#endif
224
225#define HWY_CAP_GE256 0
226#define HWY_CAP_GE512 0
227
228#define HWY_NAMESPACE N_NEON
229
230// Can use pragmas instead of -march compiler flag
231#if HWY_HAVE_RUNTIME_DISPATCH
232#if HWY_ARCH_ARM_V7
233#define HWY_TARGET_STR "+neon-vfpv4"
234#else
235#define HWY_TARGET_STR "+crypto"
236#endif // HWY_ARCH_ARM_V7
237#else
238// HWY_TARGET_STR remains undefined
239#endif
240
241//-----------------------------------------------------------------------------
242// SVE[2]
243#elif HWY_TARGET == HWY_SVE2 || HWY_TARGET == HWY_SVE || \
244 HWY_TARGET == HWY_SVE_256 || HWY_TARGET == HWY_SVE2_128
245
246// SVE only requires lane alignment, not natural alignment of the entire vector.
247#define HWY_ALIGN alignas(8)
248
249// Value ensures MaxLanes() is the tightest possible upper bound to reduce
250// overallocation.
251#define HWY_LANES(T) ((HWY_MAX_BYTES) / sizeof(T))
252
253#define HWY_HAVE_SCALABLE 1
254#define HWY_HAVE_INTEGER64 1
255#define HWY_HAVE_FLOAT16 1
256#define HWY_HAVE_FLOAT64 1
257#define HWY_MEM_OPS_MIGHT_FAULT 0
258#define HWY_NATIVE_FMA 1
259#define HWY_CAP_GE256 0
260#define HWY_CAP_GE512 0
261
262#if HWY_TARGET == HWY_SVE2
263#define HWY_NAMESPACE N_SVE2
264#define HWY_MAX_BYTES 256
265#elif HWY_TARGET == HWY_SVE_256
266#define HWY_NAMESPACE N_SVE_256
267#define HWY_MAX_BYTES 32
268#elif HWY_TARGET == HWY_SVE2_128
269#define HWY_NAMESPACE N_SVE2_128
270#define HWY_MAX_BYTES 16
271#else
272#define HWY_NAMESPACE N_SVE
273#define HWY_MAX_BYTES 256
274#endif
275
276// Can use pragmas instead of -march compiler flag
277#if HWY_HAVE_RUNTIME_DISPATCH
278#if HWY_TARGET == HWY_SVE2 || HWY_TARGET == HWY_SVE2_128
279#define HWY_TARGET_STR "+sve2-aes"
280#else
281#define HWY_TARGET_STR "+sve"
282#endif
283#else
284// HWY_TARGET_STR remains undefined
285#endif
286
287//-----------------------------------------------------------------------------
288// WASM
289#elif HWY_TARGET == HWY_WASM
290
291#define HWY_ALIGN alignas(16)
292#define HWY_MAX_BYTES 16
293#define HWY_LANES(T) (16 / sizeof(T))
294
295#define HWY_HAVE_SCALABLE 0
296#define HWY_HAVE_INTEGER64 1
297#define HWY_HAVE_FLOAT16 1
298#define HWY_HAVE_FLOAT64 0
299#define HWY_MEM_OPS_MIGHT_FAULT 1
300#define HWY_NATIVE_FMA 0
301#define HWY_CAP_GE256 0
302#define HWY_CAP_GE512 0
303
304#define HWY_NAMESPACE N_WASM
305
306#define HWY_TARGET_STR "simd128"
307
308//-----------------------------------------------------------------------------
309// WASM_EMU256
310#elif HWY_TARGET == HWY_WASM_EMU256
311
312#define HWY_ALIGN alignas(32)
313#define HWY_MAX_BYTES 32
314#define HWY_LANES(T) (32 / sizeof(T))
315
316#define HWY_HAVE_SCALABLE 0
317#define HWY_HAVE_INTEGER64 1
318#define HWY_HAVE_FLOAT16 1
319#define HWY_HAVE_FLOAT64 0
320#define HWY_MEM_OPS_MIGHT_FAULT 1
321#define HWY_NATIVE_FMA 0
322#define HWY_CAP_GE256 1
323#define HWY_CAP_GE512 0
324
325#define HWY_NAMESPACE N_WASM_EMU256
326
327#define HWY_TARGET_STR "simd128"
328
329//-----------------------------------------------------------------------------
330// RVV
331#elif HWY_TARGET == HWY_RVV
332
333// RVV only requires lane alignment, not natural alignment of the entire vector,
334// and the compiler already aligns builtin types, so nothing to do here.
335#define HWY_ALIGN
336
337// The spec requires VLEN <= 2^16 bits, so the limit is 2^16 bytes (LMUL=8).
338#define HWY_MAX_BYTES 65536
339
340// = HWY_MAX_BYTES divided by max LMUL=8 because MaxLanes includes the actual
341// LMUL. This is the tightest possible upper bound.
342#define HWY_LANES(T) (8192 / sizeof(T))
343
344#define HWY_HAVE_SCALABLE 1
345#define HWY_HAVE_INTEGER64 1
346#define HWY_HAVE_FLOAT64 1
347#define HWY_MEM_OPS_MIGHT_FAULT 0
348#define HWY_NATIVE_FMA 1
349#define HWY_CAP_GE256 0
350#define HWY_CAP_GE512 0
351
352#if defined(__riscv_zvfh)
353#define HWY_HAVE_FLOAT16 1
354#else
355#define HWY_HAVE_FLOAT16 0
356#endif
357
358#define HWY_NAMESPACE N_RVV
359
360// HWY_TARGET_STR remains undefined so HWY_ATTR is a no-op.
361// (rv64gcv is not a valid target)
362
363//-----------------------------------------------------------------------------
364// EMU128
365#elif HWY_TARGET == HWY_EMU128
366
367#define HWY_ALIGN alignas(16)
368#define HWY_MAX_BYTES 16
369#define HWY_LANES(T) (16 / sizeof(T))
370
371#define HWY_HAVE_SCALABLE 0
372#define HWY_HAVE_INTEGER64 1
373#define HWY_HAVE_FLOAT16 1
374#define HWY_HAVE_FLOAT64 1
375#define HWY_MEM_OPS_MIGHT_FAULT 1
376#define HWY_NATIVE_FMA 0
377#define HWY_CAP_GE256 0
378#define HWY_CAP_GE512 0
379
380#define HWY_NAMESPACE N_EMU128
381
382// HWY_TARGET_STR remains undefined so HWY_ATTR is a no-op.
383
384//-----------------------------------------------------------------------------
385// SCALAR
386#elif HWY_TARGET == HWY_SCALAR
387
388#define HWY_ALIGN
389#define HWY_MAX_BYTES 8
390#define HWY_LANES(T) 1
391
392#define HWY_HAVE_SCALABLE 0
393#define HWY_HAVE_INTEGER64 1
394#define HWY_HAVE_FLOAT16 1
395#define HWY_HAVE_FLOAT64 1
396#define HWY_MEM_OPS_MIGHT_FAULT 0
397#define HWY_NATIVE_FMA 0
398#define HWY_CAP_GE256 0
399#define HWY_CAP_GE512 0
400
401#define HWY_NAMESPACE N_SCALAR
402
403// HWY_TARGET_STR remains undefined so HWY_ATTR is a no-op.
404
405#else
406#pragma message("HWY_TARGET does not match any known target")
407#endif // HWY_TARGET
408
409// Override this to 1 in asan/msan builds, which will still fault.
410#if HWY_IS_ASAN || HWY_IS_MSAN
411#undef HWY_MEM_OPS_MIGHT_FAULT
412#define HWY_MEM_OPS_MIGHT_FAULT 1
413#endif
414
415// Clang <9 requires this be invoked at file scope, before any namespace.
416#undef HWY_BEFORE_NAMESPACE
417#if defined(HWY_TARGET_STR)
418#define HWY_BEFORE_NAMESPACE() \
419 HWY_PUSH_ATTRIBUTES(HWY_TARGET_STR) \
420 static_assert(true, "For requiring trailing semicolon")
421#else
422// avoids compiler warning if no HWY_TARGET_STR
423#define HWY_BEFORE_NAMESPACE() \
424 static_assert(true, "For requiring trailing semicolon")
425#endif
426
427// Clang <9 requires any namespaces be closed before this macro.
428#undef HWY_AFTER_NAMESPACE
429#if defined(HWY_TARGET_STR)
430#define HWY_AFTER_NAMESPACE() \
431 HWY_POP_ATTRIBUTES \
432 static_assert(true, "For requiring trailing semicolon")
433#else
434// avoids compiler warning if no HWY_TARGET_STR
435#define HWY_AFTER_NAMESPACE() \
436 static_assert(true, "For requiring trailing semicolon")
437#endif
438
439#undef HWY_ATTR
440#if defined(HWY_TARGET_STR) && HWY_HAS_ATTRIBUTE(target)
441#define HWY_ATTR __attribute__((target(HWY_TARGET_STR)))
442#else
443#define HWY_ATTR
444#endif