Turi Create  4.0
fast_integer_power.hpp
1 /* Copyright © 2017 Apple Inc. All rights reserved.
2  *
3  * Use of this source code is governed by a BSD-3-clause license that can
4  * be found in the LICENSE.txt file or at https://opensource.org/licenses/BSD-3-Clause
5  */
6 #ifndef TURI_FAST_INTEGER_POWER_H_
7 #define TURI_FAST_INTEGER_POWER_H_
8 
9 #include <iostream>
10 #include <array>
11 #include <algorithm>
12 #include <core/util/bitops.hpp>
13 #include <core/util/code_optimization.hpp>
14 
15 namespace turi {
16 
17 /**
18  * \ingroup util
19  * A class that gives highly optimized version of the power function
20  * for when a^b needs to be computed for many different values of b.
21  * b must be a non-negative integer (type size_t).
22  *
23  * Usage:
24  *
25  * fast_integer_power fip(a); // Slower than std::pow by a factor
26  // of 10 or so.
27  *
28  * fip.pow(b); // Very fast, returns std::pow(a, b);
29  *
30  * Internals:
31  *
32  * Let the size_t value x be laid out in 4 bit blocks on disk.
33  *
34  * | b0 b1 b2 b3 | b4 b5 b6 b7 | ... |
35  *
36  * +-------------+-------------+ ...
37  * block 1 block 2
38  *
39  * First note that a^b can be computed for by looking at all the bits of
40  * b that are 1, e.g. if b0, b4, and b5, are 1 (so b = 2^0 + 2^4 + 2^5),
41  * then a^b = a^(2^0) * a^(2^4) * a^(2^5).
42  *
43  * Thus caching the values for each of these powers of 2 gives us the
44  * final value of a.
45  *
46  * The method used internally employs this technique, but treats each
47  * block as a unit. Thus we actually compute
48  *
49  * a^b = a^(lookup1[block1 bits]) * a^(lookup2[block2 bits]) * ...
50  *
51  * with each lookup having 8 bits and thus 256 possible values.
52  */
54  public:
55 
56  /** Constructs lookup tables to return std::pow(a, b).
57  */
58  fast_integer_power(double a = 1.0) {
59  set_base(a);
60  }
61 
62  /** Sets the base of the power function.
63  */
64  void set_base(double a) {
65  _setup_block_lookups(a);
66  }
67 
68  /** Returns a^b, where a is given in the constructor.
69  */
70  inline double pow(size_t b) const GL_HOT_INLINE_FLATTEN;
71 
72  private:
73 
74  // Must be greater than 2
75  static constexpr size_t bits_per_block = 8;
76  static constexpr size_t bit_selector = (1 << bits_per_block) - 1;
77  static constexpr size_t first_level_size = (bitsizeof(size_t) + bits_per_block - 1) / bits_per_block;
78 
79  std::array<std::array<double, (1 << bits_per_block)>, first_level_size> block_lookups;
80 
81  /** Set up all lookups.
82  */
83  inline void _setup_block_lookups(double v);
84 
85 };
86 
87 ////////////////////////////////////////////////////////////////////////////////
88 // All the functions definitions of the stuff above
89 
90 
91 /** Calculate out a^n.
92  */
93 inline double fast_integer_power::pow(size_t n) const {
94  double v = 1.0;
95  for (size_t i = 0; i < bitsizeof(size_t) / bits_per_block; ++i) {
96  v *= block_lookups[i][n & bit_selector];
97  n >>= bits_per_block;
98  if (n == 0) break;
99  }
100 
101  return v;
102 }
103 
104 
105 /** Set up all lookups.
106  */
107 inline void fast_integer_power::_setup_block_lookups(double v) {
108 
109  std::array<double, bitsizeof(size_t)> power_lookup;
110 
111  // First set up a lookup table by all powers of 2 in the mix.
112  power_lookup[0] = v;
113  for(size_t i = 1; i < bitsizeof(v); ++i)
114  power_lookup[i] = power_lookup[i - 1] * power_lookup[i - 1];
115 
116  // Now build in the block-style lookup tables.
117  for(size_t main_level = 0; main_level < first_level_size; ++main_level) {
118  size_t offset = main_level * bits_per_block;
119 
120  for(size_t second_level = 0; second_level < (1 << bits_per_block); ++second_level) {
121 
122  double vp = 1.0;
123 
124  for(size_t bit_idx = 0;
125  bit_idx < bits_per_block && offset + bit_idx < bitsizeof(size_t);
126  ++bit_idx) {
127 
128  if(second_level & (1 << bit_idx) )
129  vp *= power_lookup[offset + bit_idx];
130  }
131 
132  block_lookups[main_level][second_level] = vp;
133  }
134  }
135 }
136 
137 }
138 
139 #endif /* TURI_FAST_INTEGER_POWER_H_ */
double pow(size_t b) const GL_HOT_INLINE_FLATTEN
#define GL_HOT_INLINE_FLATTEN