7 #include <unordered_set> 8 #include <unordered_map> 10 #include <core/storage/sframe_data/sframe.hpp> 13 enum join_type_t {INNER_JOIN = 0, LEFT_JOIN, RIGHT_JOIN, FULL_JOIN};
29 size_t hash_key(
const std::vector<flexible_type>& key);
30 size_t compute_hash_from_row(
const std::vector<flexible_type> &row,
31 const std::vector<size_t> &positions);
34 std::vector<std::vector<flexible_type>> rows;
47 typedef hash_join_row_t value_type;
60 bool add_row(
const std::vector<flexible_type> &row);
72 const std::vector<size_t> &hash_positions,
73 bool mark_match=
true);
79 std::unordered_map<size_t, std::list<value_type>>::const_iterator cbegin();
80 std::unordered_map<size_t, std::list<value_type>>::const_iterator cend();
87 bool join_values_equal(
const std::vector<flexible_type> &row,
88 const std::vector<flexible_type> &other,
89 const std::vector<size_t> &hash_positions);
95 std::unordered_map<size_t, std::list<value_type>> _hash_table;
97 std::vector<size_t> _hash_positions;
98 const static value_type empty_vt;
113 const std::vector<size_t> &left_join_positions,
114 const std::vector<size_t> &right_join_positions,
115 join_type_t join_type,
116 const std::map<std::string,std::string>& alter_names,
117 size_t max_buffer_size);
127 std::vector<size_t> _left_join_positions;
128 std::vector<size_t> _right_join_positions;
129 size_t _max_buffer_size;
132 std::unordered_map<size_t,size_t> _right_to_left_join_positions;
133 std::unordered_map<size_t,size_t> _left_to_right_join_positions;
134 std::vector<std::string> _output_column_names;
135 std::vector<flex_type_enum> _output_column_types;
136 bool _reverse_output_column_order;
137 std::unordered_map<size_t, size_t> _reverse_to_original;
138 bool _frames_partitioned;
139 std::map<std::string, std::string> _alter_names_right;
145 std::pair<std::shared_ptr<sframe>,std::shared_ptr<sframe>> grace_partition_frames();
152 std::shared_ptr<sframe> grace_partition_frame(
const sframe &sf,
const std::vector<size_t> &join_col_nums,
size_t num_partitions);
157 size_t get_num_cells(
const sframe &sf);
164 size_t choose_number_of_grace_partitions(
const sframe &sf);
170 void init_result_frame(
sframe &result_frame);
181 void merge_rows_for_output(
sframe &result_frame,
183 const std::vector<std::vector<flexible_type>> &left_rows,
184 const std::vector<std::vector<flexible_type>> &right_rows);
186 std::vector<flexible_type> unpack_row(std::string val,
size_t num_cols);
const value_type & get_matching_rows(const std::vector< flexible_type > &row, const std::vector< size_t > &hash_positions, bool mark_match=true)
bool add_row(const std::vector< flexible_type > &row)
join_hash_table(std::vector< size_t > hp)