Turi Create  4.0
code_optimization.hpp
1 /* Copyright © 2017 Apple Inc. All rights reserved.
2  *
3  * Use of this source code is governed by a BSD-3-clause license that can
4  * be found in the LICENSE.txt file or at https://opensource.org/licenses/BSD-3-Clause
5  */
6 #ifndef TURI_CODE_OPTIMIZATION_H_
7 #define TURI_CODE_OPTIMIZATION_H_
8 
9 #ifdef NDEBUG
10 #define GL_OPT_ATTR(...) __attribute__((__VA_ARGS__))
11 #else
12 // put it as hot; this is more-or-less ignored in the ; the "cold" ones have their own attribute def.
13 #define GL_OPT_ATTR(...) __attribute__((hot))
14 #endif
15 
16 #ifdef __clang__
17 #define GL_GCC_ONLY(...)
18 #else
19 #define GL_GCC_ONLY(...) __VA_ARGS__
20 #endif
21 
22 // Attributes that enable aggressive optimizations for math-heavy
23 // functions.
24 
25 #define _GL_GCC_HOT_FUNCTION_FLAGS \
26  hot, \
27  optimize("tree-loop-im"), \
28  optimize("tree-vectorize"), \
29  optimize("inline-limit=10000"), \
30  optimize("unsafe-math-optimizations"), \
31  optimize("fast-math"), \
32  optimize("unroll-loops"), \
33  optimize("peel-loops"), \
34  optimize("variable-expansion-in-unroller")
35 
36 #define _GL_HOT_FUNCTION_FLAGS _GL_GCC_HOT_FUNCTION_FLAGS
37 
38 /**
39  * \ingroup util
40  * Forces always inline
41  */
42 #define GL_HOT_INLINE \
43  GL_OPT_ATTR(always_inline, \
44  _GL_HOT_FUNCTION_FLAGS \
45  )
46 
47 /**
48  * \ingroup util
49  * Forces always inline and flatten
50  */
51 #define GL_HOT_INLINE_FLATTEN \
52  GL_OPT_ATTR(always_inline, \
53  flatten, \
54  _GL_HOT_FUNCTION_FLAGS \
55  )
56 
57 /**
58  * \ingroup util
59  * Forces flatten
60  */
61 #define GL_HOT_FLATTEN \
62  GL_OPT_ATTR(flatten, \
63  _GL_HOT_FUNCTION_FLAGS \
64  )
65 
66 #define GL_HOT \
67  GL_OPT_ATTR(_GL_HOT_FUNCTION_FLAGS)
68 
69 #define GL_HOT_NOINLINE \
70  GL_OPT_ATTR(noinline, \
71  _GL_HOT_FUNCTION_FLAGS \
72  )
73 
74 #define GL_HOT_NOINLINE_FLATTEN \
75  GL_OPT_ATTR(noinline, \
76  flatten, \
77  _GL_HOT_FUNCTION_FLAGS \
78  )
79 
80 #define GL_COLD_NOINLINE \
81  __attribute__((cold, noinline))
82 
83 
84 /**
85  * \ingroup util
86  * This is used with various assertion routines. It never returns.
87  * The noreturn suppresses the "warning: control reaches end of
88  * non-void function" messages you get by using it this way.
89  */
90 #define GL_COLD_NOINLINE_ERROR \
91  __attribute__((cold, noinline, noreturn))
92 
93 #ifdef __SSE2__
94 #include <xmmintrin.h>
95 #endif
96 
97 // Set the floating point register to flush denormal numbers to zero.
98 // This gives improved performance on most sgd things.
99 static inline void set_denormal_are_zero() {
100 #ifdef __SSE2__
101  _MM_SET_FLUSH_ZERO_MODE(_MM_FLUSH_ZERO_ON);
102  _mm_setcsr(_mm_getcsr() | 0x8040);
103 #endif
104 }
105 
106 // Set the floating point register to flush denormal numbers to zero.
107 // This gives improved performance on most sgd things.
108 static inline void unset_denormal_are_zero() {
109 #ifdef __SSE2__
110  _MM_SET_FLUSH_ZERO_MODE(_MM_FLUSH_ZERO_OFF);
111  _mm_setcsr(_mm_getcsr() & ~(decltype(_mm_getcsr())(0x8040)));
112 #endif
113 }
114 
115 #define LIKELY(x) (__builtin_expect(!!(x), 1))
116 #define UNLIKELY(x) (__builtin_expect(!!(x), 0))
117 
118 
119 /** A macro to make sure that a vector is sized sufficiently to hold
120  * the index idx. If not, resize.
121  *
122  * One issue with using the standard vector resize is that
123  */
124 #define FAST_CHECK_VECTOR_BUFFER_SIZE(v, idx) \
125  do { \
126  if(UNLIKELY((v).size() <= (idx) )) { \
127  auto resize = [&]() GL_GCC_ONLY(GL_COLD_NOINLINE) { \
128  (v).resize( (5*((idx) + 4)) / 4); \
129  }; \
130  resize(); \
131  } \
132  } while(false)
133 
134 
135 
136 #endif /* TURI_CODE_OPTIMIZATION_H_ */