6 #ifndef TURI_2D_SPARSE_PARALLEL_ARRAY 7 #define TURI_2D_SPARSE_PARALLEL_ARRAY 9 #include <core/util/bitops.hpp> 10 #include <core/util/cityhash_tc.hpp> 11 #include <core/parallel/pthread_tools.hpp> 12 #include <core/parallel/atomic.hpp> 13 #include <core/parallel/lambda_omp.hpp> 14 #include <sparsehash/dense_hash_set> 41 resize(n_rows, n_cols);
44 size_t rows()
const {
return n_rows; }
45 size_t cols()
const {
return n_cols; }
52 template <
typename ApplyFunction>
54 void apply(
size_t i,
size_t j, ApplyFunction&& apply_f) {
55 DASSERT_LT(i, rows());
56 DASSERT_LT(j, cols());
60 kv_container& kv = kv_temp_container_v[thread_idx];
61 kv.set_key(i, j, n_col_bits);
63 size_t base_idx = get_first_level_hash(i, j, kv);
65 auto& hs = hash_maps[base_idx];
67 std::lock_guard<simple_spinlock> lg(hs.access_lock);
69 auto ret = hs.hash_map.insert(std::move(kv));
71 apply_f(ret.first->value);
79 DASSERT_LT(i, rows());
80 DASSERT_LT(j, cols());
82 kv_container& kv = kv_temp_container_v[0];
83 kv.set_key(i, j, n_col_bits);
85 size_t base_idx = get_first_level_hash(i, j, kv);
87 auto& hs = hash_maps[base_idx];
88 auto ret = hs.hash_map.insert(std::move(kv));
90 return ret.first->value;
103 template <
typename ApplyFunction>
106 atomic<size_t> current_block_idx = 0;
108 in_parallel([&](
size_t thread_idx,
size_t num_threads)
109 GL_GCC_ONLY(GL_HOT_NOINLINE_FLATTEN) {
112 size_t block_idx = (++current_block_idx) - 1;
114 if(block_idx >= n_thread_blocks) {
118 size_t start_idx = n_levels_per_block * block_idx;
119 size_t end_idx = n_levels_per_block * (block_idx + 1);
121 for(
size_t i = start_idx; i < end_idx; ++i) {
122 const hash_block& hb = hash_maps[i];
124 for(
auto it = hb.hash_map.begin(); it != hb.hash_map.end(); ++it) {
125 const kv_container& kv = *it;
126 const auto idx_pair = kv.get_indices(n_col_bits);
127 const auto& value = kv.value;
128 apply_f(idx_pair.first, idx_pair.second, value);
146 template <
typename ApplyFunction>
149 atomic<size_t> current_block_idx = 0;
151 in_parallel([&](
size_t thread_idx,
size_t num_threads)
152 GL_GCC_ONLY(GL_HOT_NOINLINE_FLATTEN) {
154 size_t block_idx = (++current_block_idx) - 1;
156 if(block_idx >= n_thread_blocks) {
160 size_t start_idx = n_levels_per_block * block_idx;
161 size_t end_idx = n_levels_per_block * (block_idx + 1);
163 for(
size_t i = start_idx; i < end_idx; ++i) {
164 const hash_block& hb = hash_maps[i];
166 for(
auto it = hb.hash_map.begin(); it != hb.hash_map.end(); ++it) {
167 const kv_container& kv = *it;
168 const auto idx_pair = kv.get_indices(n_col_bits);
169 apply_f(idx_pair.first, idx_pair.second, kv.value);
178 parallel_for(
size_t(0), hash_maps.size(), [&](
size_t i) {
179 hash_maps[i].hash_map.clear();
183 void resize(
size_t _n_rows,
size_t _n_cols) {
196 struct kv_container {
201 static kv_container as_empty() {
211 inline bool operator==(
const kv_container& kv)
const {
212 return key == kv.key;
221 auto p = get_indices(n_col_bits);
222 DASSERT_EQ(p.first, i);
223 DASSERT_EQ(p.second, j);
230 return std::pair<size_t, size_t>{(idx >> n_col_bits), idx & bit_mask<size_t>(static_cast<int>(n_col_bits))};
234 kv_container empty_container;
239 static constexpr
size_t n_thread_block_bits = 6;
240 static constexpr
size_t n_levels_per_block_bits = 5;
241 static constexpr
size_t n_thread_blocks = (size_t(1) << n_thread_block_bits);
242 static constexpr
size_t n_levels_per_block = (size_t(1) << n_levels_per_block_bits);
243 static constexpr
size_t n_level_bits = n_thread_block_bits + n_levels_per_block_bits;
244 static constexpr
size_t n_levels = (size_t(1) << n_level_bits);
247 inline size_t get_first_level_hash(
size_t i,
size_t j,
const kv_container& kv)
const {
253 size_t first_idx = i & bit_mask<size_t>(n_thread_block_bits);
254 DASSERT_LT(first_idx, n_thread_blocks);
256 size_t second_idx = kv.key >> (bitsizeof(
size_t) - n_levels_per_block_bits);
257 DASSERT_LT(second_idx, n_levels_per_block);
259 size_t base_idx = first_idx * n_levels_per_block + second_idx;
260 DASSERT_LT(base_idx, n_levels);
264 size_t n_rows = 0, n_cols = 0;
265 size_t n_col_bits = 0;
271 hash_map.set_empty_key(kv_container::as_empty());
276 struct trivial_kv_container_hash {
282 google::dense_hash_set<kv_container, trivial_kv_container_hash> hash_map;
286 std::array<hash_block, n_levels> hash_maps;
289 std::vector<kv_container> kv_temp_container_v;
void parallel_for(size_t begin, size_t end, const FunctionType &fn)
static uint64_t reverse_index_hash(uint64_t idx)
GL_HOT void apply(size_t i, size_t j, ApplyFunction &&apply_f)
GL_HOT_INLINE_FLATTEN T & operator()(size_t i, size_t j)
void apply_all(ApplyFunction &&apply_f) const
static size_t cpu_count()
#define GL_HOT_INLINE_FLATTEN
static size_t thread_id()
void apply_all(ApplyFunction &&apply_f)
void in_parallel(const std::function< void(size_t thread_id, size_t num_threads)> &fn)
static unsigned int bitwise_log2_ceil(const T &v, _ENABLE_IF_UINT(T))
static uint64_t index_hash(uint64_t idx)