6 #ifndef TURI_FP_RULE_MINING_H 7 #define TURI_FP_RULE_MINING_H 16 #include <core/data/sframe/gl_sframe.hpp> 17 #include <core/data/sframe/gl_sarray.hpp> 19 #include <toolkits/feature_engineering/topk_indexer.hpp> 20 #include <toolkits/feature_engineering/statistics_tracker.hpp> 22 #include <toolkits/pattern_mining/fp_node.hpp> 23 #include <toolkits/pattern_mining/fp_results_tree.hpp> 26 namespace pattern_mining {
28 const size_t CONF_SCORE = 0;
29 const size_t LIFT_SCORE = 1;
30 const size_t ALL_CONF_SCORE = 2;
31 const size_t MAX_CONF_SCORE = 3;
32 const size_t KULC_SCORE = 4;
33 const size_t COSINE_SCORE = 5;
34 const size_t CONVICTION_SCORE = 6;
52 std::vector<size_t> LHS;
53 std::vector<size_t> RHS;
57 size_t num_transactions;
66 std::vector<rule> rules;
67 size_t num_transactions;
69 void add_rule(
const rule& new_rule) {rules.push_back(new_rule);}
70 std::vector<size_t> get_LHS_supports()
const;
71 std::vector<size_t> get_RHS_supports()
const;
72 std::vector<size_t> get_total_supports()
const;
73 inline size_t size()
const {
return rules.size(); };
76 void append_rule_list(
const rule_list& other_list);
80 const std::shared_ptr<topk_indexer>& indexer =
nullptr)
const;
83 flex_list to_flex_list(std::vector<double> scores, \
84 const std::shared_ptr<topk_indexer>& indexer =
nullptr)
const;
97 flex_list get_top_k_rules(
const size_t& top_k = TOP_K_MAX, \
98 const size_t& score_type = CONF_SCORE,\
99 const std::shared_ptr<topk_indexer>& indexer =
nullptr)
const;
101 std::vector<double> score_rules(
const size_t& score_type)
const;
107 std::ostream& operator<<(std::ostream& out,
const rule_list& my_rules);
133 rule_list extract_relevant_rules(
const std::vector<size_t>& my_itemset, \
149 flex_list extract_top_k_rules(
const std::vector<size_t>& my_itemset, \
151 const size_t& top_k = TOP_K_MAX, \
152 const size_t& score_type = CONF_SCORE, \
153 const std::shared_ptr<topk_indexer>& indexer =
nullptr);
160 std::vector<size_t> RHS;
161 std::vector<std::vector<size_t>> LHS_list;
162 std::vector<size_t> LHS_support_list;
163 std::vector<std::vector<size_t>> itemset_list;
168 rule_miner(
const std::vector<size_t>& sorted_itemset,\
171 void extract_relevant_rules_helper(std::shared_ptr<fp_node>& node);
173 rule_list get_rule_list() {
return my_rules;}
174 std::vector<size_t> get_itemset() {
return itemset_list.front();}
183 inline bool operator()(
const std::pair<rule, double> & left, \
184 const std::pair<rule, double> &right){
185 return left.second > right.second;
194 std::priority_queue<std::pair<rule, double>, \
195 std::vector<std::pair<rule, double>>, \
196 rule_score_compare> \
201 void add_rule_score_pair(
const std::pair<rule, double>& rule_score_pair);
202 std::vector<std::pair<rule, double>> convert_to_sorted_vector();
204 flex_list rules_to_flex_list(std::vector<std::pair<rule, double>> rule_score_pairs,
205 const std::shared_ptr<topk_indexer>& indexer);
248 inline size_t get_score_function_type_from_name(
const std::string& score_function_name){
249 if (score_function_name ==
"confidence") {
251 }
else if (score_function_name ==
"lift") {
253 }
else if (score_function_name ==
"all_confidence") {
254 return ALL_CONF_SCORE;
255 }
else if (score_function_name ==
"max_confidence") {
256 return MAX_CONF_SCORE;
257 }
else if (score_function_name ==
"kulczynski") {
259 }
else if (score_function_name ==
"cosine") {
261 }
else if (score_function_name ==
"conviction"){
262 return CONVICTION_SCORE;
264 log_and_throw(
"Internal error. No such scoring function exists.");
270 std::function<double (const rule&)> get_score_function(
const size_t& score_type, \
271 const size_t& num_transactions);
272 double confidence_score(
const double& LHS_support,
const double& RHS_support,
const double& total_support);
273 double lift_score(
const double& LHS_support,
const double& RHS_support,
const double& total_support);
274 double all_confidence_score(
const double& LHS_support,
const double& RHS_support,
const double& total_support);
275 double max_confidence_score(
const double& LHS_support,
const double& RHS_support,
const double& total_support);
276 double kulc_score(
const double& LHS_support,
const double& RHS_support,
const double& total_support);
277 double cosine_score(
const double& LHS_support,
const double& RHS_support,
const double& total_support);
278 double conviction_score(
const double& LHS_support,
const double& RHS_support,
const double& total_support);
std::vector< flexible_type > flex_list