70 #ifndef INCLUDED_volk_32f_s32f_normalize_a_H
71 #define INCLUDED_volk_32f_s32f_normalize_a_H
77 #include <immintrin.h>
81 unsigned int num_points)
83 unsigned int number = 0;
84 float* inputPtr = vecBuffer;
86 const float invScalar = 1.0 / scalar;
87 __m256 vecScalar = _mm256_set1_ps(invScalar);
91 const uint64_t eighthPoints = num_points / 8;
92 for (; number < eighthPoints; number++) {
94 input1 = _mm256_load_ps(inputPtr);
96 input1 = _mm256_mul_ps(input1, vecScalar);
98 _mm256_store_ps(inputPtr, input1);
103 number = eighthPoints * 8;
104 for (; number < num_points; number++) {
105 *inputPtr *= invScalar;
112 #include <xmmintrin.h>
116 unsigned int num_points)
118 unsigned int number = 0;
119 float* inputPtr = vecBuffer;
121 const float invScalar = 1.0 / scalar;
122 __m128 vecScalar = _mm_set_ps1(invScalar);
126 const uint64_t quarterPoints = num_points / 4;
127 for (; number < quarterPoints; number++) {
129 input1 = _mm_load_ps(inputPtr);
131 input1 = _mm_mul_ps(input1, vecScalar);
133 _mm_store_ps(inputPtr, input1);
138 number = quarterPoints * 4;
139 for (; number < num_points; number++) {
140 *inputPtr *= invScalar;
146 #ifdef LV_HAVE_GENERIC
150 unsigned int num_points)
152 unsigned int number = 0;
153 float* inputPtr = vecBuffer;
154 const float invScalar = 1.0 / scalar;
155 for (number = 0; number < num_points; number++) {
156 *inputPtr *= invScalar;
164 extern void volk_32f_s32f_normalize_a_orc_impl(
float* dst,
167 unsigned int num_points);
168 static inline void volk_32f_s32f_normalize_u_orc(
float* vecBuffer,
170 unsigned int num_points)
172 float invscalar = 1.0 / scalar;
173 volk_32f_s32f_normalize_a_orc_impl(vecBuffer, vecBuffer, invscalar, num_points);
179 #ifndef INCLUDED_volk_32f_s32f_normalize_u_H
180 #define INCLUDED_volk_32f_s32f_normalize_u_H
182 #include <inttypes.h>
185 #include <immintrin.h>
189 unsigned int num_points)
191 unsigned int number = 0;
192 float* inputPtr = vecBuffer;
194 const float invScalar = 1.0 / scalar;
195 __m256 vecScalar = _mm256_set1_ps(invScalar);
199 const uint64_t eighthPoints = num_points / 8;
200 for (; number < eighthPoints; number++) {
202 input1 = _mm256_loadu_ps(inputPtr);
204 input1 = _mm256_mul_ps(input1, vecScalar);
206 _mm256_storeu_ps(inputPtr, input1);
211 number = eighthPoints * 8;
212 for (; number < num_points; number++) {
213 *inputPtr *= invScalar;