BitMagic-C++
bmutil.h
Go to the documentation of this file.
1 #ifndef BMUTIL__H__INCLUDED__
2 #define BMUTIL__H__INCLUDED__
3 /*
4 Copyright(c) 2002-2017 Anatoliy Kuznetsov(anatoliy_kuznetsov at yahoo.com)
5 
6 Licensed under the Apache License, Version 2.0 (the "License");
7 you may not use this file except in compliance with the License.
8 You may obtain a copy of the License at
9 
10  http://www.apache.org/licenses/LICENSE-2.0
11 
12 Unless required by applicable law or agreed to in writing, software
13 distributed under the License is distributed on an "AS IS" BASIS,
14 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15 See the License for the specific language governing permissions and
16 limitations under the License.
17 
18 For more information please visit: http://bitmagic.io
19 */
20 
21 /*! \file bmutil.h
22  \brief Bit manipulation primitives (internal)
23 */
24 
25 #include "bmdef.h"
26 #include "bmconst.h"
27 
28 #if defined(_M_AMD64) || defined(_M_X64)
29 #include <intrin.h>
30 #elif defined(BMSSE2OPT) || defined(BMSSE42OPT)
31 #include <emmintrin.h>
32 #elif defined(BMAVX2OPT)
33 #include <emmintrin.h>
34 #include <avx2intrin.h>
35 #endif
36 
37 #ifdef __GNUG__
38 #pragma GCC diagnostic push
39 #pragma GCC diagnostic ignored "-Wconversion"
40 #endif
41 
42 #ifdef _MSC_VER
43 #pragma warning( push )
44 #pragma warning( disable : 4146)
45 #endif
46 
47 
48 namespace bm
49 {
50 
51  /**
52  bit-block array wrapped into union for correct interpretation of
53  32-bit vs 64-bit access vs SIMD
54  @internal
55  */
56  struct bit_block_t
57  {
58  union bunion_t
59  {
62 
63 #if defined(BMAVX512OPT)
65 #endif
66 #if defined(BMAVX2OPT)
68 #endif
69 #if defined(BMSSE2OPT) || defined(BMSSE42OPT)
71 #endif
72  } b_;
73 
74  operator bm::word_t*() { return &(b_.w32[0]); }
75  operator const bm::word_t*() const { return &(b_.w32[0]); }
76  explicit operator bm::id64_t*() { return &b_.w64[0]; }
77  explicit operator const bm::id64_t*() const { return &b_.w64[0]; }
78 #ifdef BMAVX512OPT
79  explicit operator __m512i*() { return &b_.w512[0]; }
80  explicit operator const __m512i*() const { return &b_.w512[0]; }
81 #endif
82 #ifdef BMAVX2OPT
83  explicit operator __m256i*() { return &b_.w256[0]; }
84  explicit operator const __m256i*() const { return &b_.w256[0]; }
85 #endif
86 #if defined(BMSSE2OPT) || defined(BMSSE42OPT)
87  explicit operator __m128i*() { return &b_.w128[0]; }
88  explicit operator const __m128i*() const { return &b_.w128[0]; }
89 #endif
90 
91  const bm::word_t* begin() const { return (b_.w32 + 0); }
92  bm::word_t* begin() { return (b_.w32 + 0); }
93  const bm::word_t* end() const { return (b_.w32 + bm::set_block_size); }
94  bm::word_t* end() { return (b_.w32 + bm::set_block_size); }
95  };
96 
97 /**
98  Get minimum of 2 values
99 */
100 template<typename T>
101 T min_value(T v1, T v2) BMNOEXCEPT
102 {
103  return v1 < v2 ? v1 : v2;
104 }
105 
106 /**
107  \brief ad-hoc conditional expressions
108  \internal
109 */
110 template <bool b> struct conditional
111 {
112  static bool test() { return true; }
113 };
114 template <> struct conditional<false>
115 {
116  static bool test() { return false; }
117 };
118 
119 
120 /**
121  Fast loop-less function to find LOG2
122 */
123 template<typename T>
125 {
126  unsigned int l = 0;
127 
128  if (x >= 1<<16) { x = (T)(x >> 16); l |= 16; }
129  if (x >= 1<<8) { x = (T)(x >> 8); l |= 8; }
130  if (x >= 1<<4) { x = (T)(x >> 4); l |= 4; }
131  if (x >= 1<<2) { x = (T)(x >> 2); l |= 2; }
132  if (x >= 1<<1) l |=1;
133  return (T)l;
134 }
135 
136 template<>
138 {
139  unsigned int l = 0;
140  if (x >= 1<<8) { x = (bm::gap_word_t)(x >> 8); l |= 8; }
141  if (x >= 1<<4) { x = (bm::gap_word_t)(x >> 4); l |= 4; }
142  if (x >= 1<<2) { x = (bm::gap_word_t)(x >> 2); l |= 2; }
143  if (x >= 1<<1) l |=1;
144  return (bm::gap_word_t)l;
145 }
146 
147 /**
148  Mini auto-pointer for internal memory management
149  @internal
150 */
151 template<class T>
153 {
154 public:
155  ptr_guard(T* p) BMNOEXCEPT : ptr_(p) {}
156  ~ptr_guard() { delete ptr_; }
157 private:
158  ptr_guard(const ptr_guard<T>& p);
159  ptr_guard& operator=(const ptr_guard<T>& p);
160 private:
161  T* ptr_;
162 };
163 
164 /**
165  Portable LZCNT with (uses minimal LUT)
166  @ingroup bitfunc
167  @internal
168 */
169 inline unsigned count_leading_zeros(unsigned x) BMNOEXCEPT
170 {
171  unsigned n =
172  (x >= (1U << 16)) ?
173  ((x >= (1U << 24)) ? ((x >= (1 << 28)) ? 28u : 24u) : ((x >= (1U << 20)) ? 20u : 16u))
174  :
175  ((x >= (1U << 8)) ? ((x >= (1U << 12)) ? 12u : 8u) : ((x >= (1U << 4)) ? 4u : 0u));
176  return unsigned(bm::lzcnt_table<true>::_lut[x >> n]) - n;
177 }
178 
179 /**
180  Portable TZCNT with (uses 37-LUT)
181  @ingroup bitfunc
182  @internal
183 */
184 inline
185 unsigned count_trailing_zeros(unsigned v) BMNOEXCEPT
186 {
187  // (v & -v) isolates the last set bit
188  return unsigned(bm::tzcnt_table<true>::_lut[(-v & v) % 37]);
189 }
190 
191 /**
192  Lookup table based integer LOG2
193 */
194 template<typename T>
196 {
197  unsigned l = 0;
198  if (x & 0xffff0000)
199  {
200  l += 16; x >>= 16;
201  }
202 
203  if (x & 0xff00)
204  {
205  l += 8; x >>= 8;
206  }
207  return l + T(first_bit_table<true>::_idx[x]);
208 }
209 
210 /**
211  Lookup table based short integer LOG2
212 */
213 template<>
214 inline bm::gap_word_t ilog2_LUT<bm::gap_word_t>(bm::gap_word_t x) BMNOEXCEPT
215 {
216  bm::gap_word_t l = 0;
217  if (x & 0xff00)
218  {
219  l = bm::gap_word_t( + 8u);
220  x = bm::gap_word_t(x >> 8u);
221  }
223 }
224 
225 
226 // if we are running on x86 CPU we can use inline ASM
227 
228 #ifdef BM_x86
229 #ifdef __GNUG__
230 
232 unsigned bsf_asm32(unsigned int v) BMNOEXCEPT
233 {
234  unsigned r;
235  asm volatile(" bsfl %1, %0": "=r"(r): "rm"(v) );
236  return r;
237 }
238 
240 unsigned bsr_asm32(unsigned int v) BMNOEXCEPT
241 {
242  unsigned r;
243  asm volatile(" bsrl %1, %0": "=r"(r): "rm"(v) );
244  return r;
245 }
246 
247 #endif // __GNUG__
248 
249 #ifdef _MSC_VER
250 
251 #if defined(_M_AMD64) || defined(_M_X64) // inline assembly not supported
252 
254 unsigned int bsr_asm32(unsigned int value) BMNOEXCEPT
255 {
256  unsigned long r;
257  _BitScanReverse(&r, value);
258  return r;
259 }
260 
262 unsigned int bsf_asm32(unsigned int value) BMNOEXCEPT
263 {
264  unsigned long r;
265  _BitScanForward(&r, value);
266  return r;
267 }
268 
269 #else
270 
272 unsigned int bsr_asm32(unsigned int value) BMNOEXCEPT
273 {
274  __asm bsr eax, value
275 }
276 
278 unsigned int bsf_asm32(unsigned int value) BMNOEXCEPT
279 {
280  __asm bsf eax, value
281 }
282 
283 #endif
284 
285 #endif // _MSC_VER
286 
287 #endif // BM_x86
288 
289 
290 // From:
291 // http://citeseerx.ist.psu.edu/viewdoc/summary?doi=10.1.1.37.8562
292 //
293 template<typename T>
295 {
296  return
297  DeBruijn_bit_position<true>::_multiply[(((v & -v) * 0x077CB531U)) >> 27];
298 }
299 
300 inline
301 unsigned bit_scan_reverse32(unsigned value) BMNOEXCEPT
302 {
303  BM_ASSERT(value);
304 #if defined(BM_USE_GCC_BUILD)
305  return (unsigned) (31 - __builtin_clz(value));
306 #else
307 # if defined(BM_x86) && (defined(__GNUG__) || defined(_MSC_VER))
308  return bm::bsr_asm32(value);
309 # else
310  return bm::ilog2_LUT<unsigned int>(value);
311 # endif
312 #endif
313 }
314 
315 inline
316 unsigned bit_scan_forward32(unsigned value) BMNOEXCEPT
317 {
318  BM_ASSERT(value);
319 #if defined(BM_USE_GCC_BUILD)
320  return (unsigned) __builtin_ctz(value);
321 #else
322 # if defined(BM_x86) && (defined(__GNUG__) || defined(_MSC_VER))
323  return bm::bsf_asm32(value);
324 # else
325  return bit_scan_fwd(value);
326 # endif
327 #endif
328 }
329 
330 
332 unsigned long long bmi_bslr_u64(unsigned long long w) BMNOEXCEPT
333 {
334 #if defined(BMAVX2OPT) || defined (BMAVX512OPT)
335  return _blsr_u64(w);
336 #else
337  return w & (w - 1);
338 #endif
339 }
340 
342 unsigned long long bmi_blsi_u64(unsigned long long w)
343 {
344 #if defined(BMAVX2OPT) || defined (BMAVX512OPT)
345  return _blsi_u64(w);
346 #else
347  return w & (-w);
348 #endif
349 }
350 
351 /// 64-bit bit-scan reverse
352 inline
354 {
355  BM_ASSERT(w);
356 #if defined(BMAVX2OPT) || defined (BMAVX512OPT)
357  return (unsigned)_lzcnt_u64(w);
358 #else
359  #if defined(BM_USE_GCC_BUILD)
360  return (unsigned) __builtin_clzll(w);
361  #else
362  unsigned z;
363  unsigned w1 = unsigned(w >> 32);
364  if (!w1)
365  {
366  z = 32;
367  w1 = unsigned(w);
368  z += 31 - bm::bit_scan_reverse32(w1);
369  }
370  else
371  {
372  z = 31 - bm::bit_scan_reverse32(w1);
373  }
374  return z;
375  #endif
376 #endif
377 }
378 
379 /// 64-bit bit-scan fwd
380 inline
382 {
383  BM_ASSERT(w);
384 
385 #if defined(BMAVX2OPT) || defined (BMAVX512OPT)
386  return (unsigned)_tzcnt_u64(w);
387 #else
388  #if defined(BM_USE_GCC_BUILD)
389  return (unsigned) __builtin_ctzll(w);
390  #else
391  unsigned z;
392  unsigned w1 = unsigned(w);
393  if (!w1)
394  {
395  z = 32;
396  w1 = unsigned(w >> 32);
397  z += bm::bit_scan_forward32(w1);
398  }
399  else
400  {
401  z = bm::bit_scan_forward32(w1);
402  }
403  return z;
404  #endif
405 #endif
406 }
407 
408 
409 
410 /*!
411  Returns BSR value
412  @ingroup bitfunc
413 */
414 template <class T>
415 unsigned bit_scan_reverse(T value) BMNOEXCEPT
416 {
417  BM_ASSERT(value);
418 
419  if (bm::conditional<sizeof(T)==8>::test())
420  {
421  #if defined(BM_USE_GCC_BUILD)
422  return (unsigned) (63 - __builtin_clzll(value));
423  #else
424  bm::id64_t v8 = value;
425  v8 >>= 32;
426  unsigned v = (unsigned)v8;
427  if (v)
428  {
429  v = bm::bit_scan_reverse32(v);
430  return v + 32;
431  }
432  #endif
433  }
434  return bm::bit_scan_reverse32((unsigned)value);
435 }
436 
437 
438 #ifdef __GNUG__
439 #pragma GCC diagnostic pop
440 #endif
441 #ifdef _MSC_VER
442 #pragma warning( pop )
443 #endif
444 
445 
446 } // bm
447 
448 #endif
BM_VECT_ALIGN
#define BM_VECT_ALIGN
Definition: bmdef.h:359
bm::bmi_blsi_u64
BMFORCEINLINE unsigned long long bmi_blsi_u64(unsigned long long w)
Definition: bmutil.h:342
bmconst.h
Constants, tables and typedefs.
bm::bit_block_t::bunion_t
Definition: bmutil.h:58
bm::bit_block_t::bunion_t::BM_VECT_ALIGN_ATTR
bm::id64_t BM_VECT_ALIGN w64[bm::set_block_size/2] BM_VECT_ALIGN_ATTR
Definition: bmutil.h:61
bm::set_block_size
const unsigned set_block_size
Definition: bmconst.h:54
bm::count_trailing_zeros_u64
unsigned count_trailing_zeros_u64(bm::id64_t w) BMNOEXCEPT
64-bit bit-scan fwd
Definition: bmutil.h:381
bm::bit_scan_forward32
unsigned bit_scan_forward32(unsigned value) BMNOEXCEPT
Definition: bmutil.h:316
bm::ilog2
T ilog2(T x) BMNOEXCEPT
Fast loop-less function to find LOG2.
Definition: bmutil.h:124
bm::tzcnt_table
Structure for TZCNT constants.
Definition: bmconst.h:324
bm::bit_block_t
bit-block array wrapped into union for correct interpretation of 32-bit vs 64-bit access vs SIMD
Definition: bmutil.h:56
bm::id64_t
unsigned long long int id64_t
Definition: bmconst.h:34
bm::first_bit_table
Structure keeps index of first right 1 bit for every byte.
Definition: bmconst.h:255
bm::count_trailing_zeros
unsigned count_trailing_zeros(unsigned v) BMNOEXCEPT
Portable TZCNT with (uses 37-LUT)
Definition: bmutil.h:185
bm::bit_block_t::end
bm::word_t * end()
Definition: bmutil.h:94
bm::bit_block_t::bunion_t::BM_VECT_ALIGN_ATTR
bm::word_t BM_VECT_ALIGN w32[bm::set_block_size] BM_VECT_ALIGN_ATTR
Definition: bmutil.h:60
bm::count_leading_zeros
unsigned count_leading_zeros(unsigned x) BMNOEXCEPT
Portable LZCNT with (uses minimal LUT)
Definition: bmutil.h:169
bm::ilog2_LUT
T ilog2_LUT(T x) BMNOEXCEPT
Lookup table based integer LOG2.
Definition: bmutil.h:195
bm::ptr_guard::ptr_guard
ptr_guard(T *p) BMNOEXCEPT
Definition: bmutil.h:155
BMNOEXCEPT
#define BMNOEXCEPT
Definition: bmdef.h:79
bm::gap_word_t
unsigned short gap_word_t
Definition: bmconst.h:77
bm::ptr_guard
Mini auto-pointer for internal memory management.
Definition: bmutil.h:152
bm::conditional
ad-hoc conditional expressions
Definition: bmutil.h:110
BM_ASSERT
#define BM_ASSERT
Definition: bmdef.h:130
bm::conditional< false >::test
static bool test()
Definition: bmutil.h:116
bm::conditional::test
static bool test()
Definition: bmutil.h:112
bmdef.h
Definitions(internal)
bm::bit_block_t::end
const bm::word_t * end() const
Definition: bmutil.h:93
bm::bit_scan_reverse32
unsigned bit_scan_reverse32(unsigned value) BMNOEXCEPT
Definition: bmutil.h:301
bm::bit_block_t::b_
union bm::bit_block_t::bunion_t b_
bm::bit_block_t::begin
const bm::word_t * begin() const
Definition: bmutil.h:91
bm::count_leading_zeros_u64
unsigned count_leading_zeros_u64(bm::id64_t w) BMNOEXCEPT
64-bit bit-scan reverse
Definition: bmutil.h:353
bm::min_value
T min_value(T v1, T v2) BMNOEXCEPT
Get minimum of 2 values.
Definition: bmutil.h:101
BMFORCEINLINE
#define BMFORCEINLINE
Definition: bmdef.h:203
bm
Definition: bm.h:76
bm::bit_scan_reverse
unsigned bit_scan_reverse(T value) BMNOEXCEPT
Definition: bmutil.h:415
bm::ptr_guard::~ptr_guard
~ptr_guard()
Definition: bmutil.h:156
bm::DeBruijn_bit_position
DeBruijn majic table.
Definition: bmconst.h:240
bm::word_t
unsigned int word_t
Definition: bmconst.h:38
bm::bit_block_t::begin
bm::word_t * begin()
Definition: bmutil.h:92
bm::bit_scan_fwd
T bit_scan_fwd(T v) BMNOEXCEPT
Definition: bmutil.h:294
bm::bmi_bslr_u64
BMFORCEINLINE unsigned long long bmi_bslr_u64(unsigned long long w) BMNOEXCEPT
Definition: bmutil.h:332
bm::lzcnt_table
Structure for LZCNT constants (4-bit)
Definition: bmconst.h:309
bm::bit_block_t::bunion_t::BM_VECT_ALIGN_ATTR
__m128i BM_VECT_ALIGN w128[bm::set_block_size/4] BM_VECT_ALIGN_ATTR
Definition: bmutil.h:70