6 #ifndef TURI_TEXT_TOPICMODEL_H_ 7 #define TURI_TEXT_TOPICMODEL_H_ 10 #include <core/storage/sframe_data/sarray.hpp> 11 #include <core/storage/sframe_data/sframe.hpp> 14 #include <core/storage/fileio/temp_files.hpp> 18 #include <model_server/lib/unity_base_types.hpp> 19 #include <core/util/hash_value.hpp> 20 #include <model_server/lib/flex_dict_view.hpp> 21 #include <toolkits/ml_data_2/ml_data.hpp> 22 #include <toolkits/ml_data_2/metadata.hpp> 25 #include <model_server/lib/extensions/ml_model.hpp> 29 #include <core/export.hpp> 63 typedef Eigen::Matrix<int, Eigen::Dynamic, Eigen::Dynamic, Eigen::RowMajor> count_matrix_type;
64 typedef Eigen::Matrix<int, 1, Eigen::Dynamic, Eigen::RowMajor> count_vector_type;
66 static constexpr
size_t TOPIC_MODEL_VERSION = 1;
73 std::map<size_t, size_t> associations;
80 std::shared_ptr<v2::ml_metadata> metadata;
83 count_matrix_type word_topic_counts;
90 std::shared_ptr<sarray<flexible_type>> validation_train;
91 std::shared_ptr<sarray<flexible_type>> validation_test;
115 virtual void init_options(
const std::map<std::string,flexible_type>& _opts)
override = 0;
121 virtual size_t get_version()
const override = 0;
131 virtual void load_version(
turi::iarchive& iarc,
size_t version)
override = 0;
146 std::vector<std::string> list_fields();
162 v2::ml_data create_ml_data_using_metadata(
176 void set_associations(
const sframe& associations);
210 std::pair<std::vector<flexible_type>, std::vector<double>>
211 get_topic(
size_t topic_id,
size_t num_words=5,
double cdf_cutoff=1.0);
226 std::shared_ptr<sarray<flexible_type>>
234 count_matrix_type predict_counts(std::shared_ptr<
sarray<flexible_type> > dataset,
size_t num_burnin);
240 std::shared_ptr<sarray<flexible_type>> get_topics_matrix();
245 std::shared_ptr<sarray<flexible_type>> get_vocabulary();
263 const count_matrix_type& doc_topic_counts,
264 const count_matrix_type& word_topic_counts);
The serialization input archive object which, provided with a reference to an istream, will read from the istream, providing deserialization capabilities.
The serialization output archive object which, provided with a reference to an ostream, will write to the ostream, providing serialization capabilities.