6 #ifndef TURI_TEXT_SCVB_H_ 7 #define TURI_TEXT_SCVB_H_ 10 #include <core/storage/sframe_data/sarray.hpp> 11 #include <core/storage/sframe_data/sframe.hpp> 14 #include <core/storage/fileio/temp_files.hpp> 19 #include <model_server/lib/variant.hpp> 20 #include <model_server/lib/unity_base_types.hpp> 21 #include <core/data/flexible_type/flexible_type.hpp> 22 #include <core/util/hash_value.hpp> 23 #include <model_server/lib/flex_dict_view.hpp> 27 #include <core/random/random.hpp> 37 scvb0_solver(topic_model* _model) {
61 void train(std::shared_ptr<sarray<flexible_type>> data,
bool verbose);
74 Eigen::MatrixXd N_theta_j;
75 Eigen::MatrixXd N_phi;
76 Eigen::MatrixXd N_phi_hat;
77 Eigen::MatrixXd N_Z_hat;
84 void initialize_N_theta_j(
size_t C_j) {
85 N_theta_j = Eigen::MatrixXd::Zero(model->num_topics, 1);
86 for (
size_t i = 0; i < C_j; ++i) {
87 size_t ix = random::fast_uniform<size_t>(0, model->num_topics-1);
101 Eigen::MatrixXd compute_gamma(
size_t w_ij) {
104 Eigen::MatrixXd gamma_ij(model->num_topics, 1);
105 for (
size_t k = 0; k < model->num_topics; ++k) {
106 gamma_ij(k, 0) = (N_phi(w_ij, k) + model->beta) *
107 (N_theta_j(k) + model->alpha) /
108 (N_Z(k) + model->beta * model->vocab_size);
110 gamma_ij.normalize();
118 void update_N_theta_j(
const Eigen::MatrixXd& gamma_ij,
122 double alpha = std::pow(1 - rho, count_ij);
123 N_theta_j = alpha * N_theta_j + C_j * gamma_ij * (1 - alpha);
129 void update_N_Z_hat(
const Eigen::MatrixXd& gamma_ij,
130 size_t M,
size_t C) {
131 N_Z_hat += gamma_ij * C / M;
137 void update_N_Z(
double rho) {
138 N_Z = (1 - rho) * N_Z + rho * N_Z_hat;
144 void update_N_phi(
double rho) {
145 N_phi = (1 - rho) * N_phi + rho * N_phi_hat;
151 void update_N_phi_hat(
const Eigen::MatrixXd& gamma_ij,
153 size_t M,
size_t C) {
154 for (
size_t k = 0; k < model->num_topics; ++k) {
155 N_phi_hat(word_ij, k) += gamma_ij(k) * C / M;
166 double compute_rho(
size_t t,
size_t s = 10,
size_t tau=1000,
double kappa = .9) {
167 return (
double) s / std::pow(tau + t, kappa);