Turi Create  4.0
perplexity.hpp
1 /* Copyright © 2017 Apple Inc. All rights reserved.
2  *
3  * Use of this source code is governed by a BSD-3-clause license that can
4  * be found in the LICENSE.txt file or at https://opensource.org/licenses/BSD-3-Clause
5  */
6 #ifndef TURI_TEXT_PERPLEXITY_H_
7 #define TURI_TEXT_PERPLEXITY_H_
8 
9 #include <core/storage/sframe_interface/unity_sarray.hpp>
10 
11 namespace turi {
12 namespace text {
13 
14 /**
15  * Compute perplexity, a measure of the likelihood of data given
16  * the current parameters of the model.
17 
18  * Then for each word in each document, we compute
19  * \f[ \Pr(word | theta[doc_id,:], phi[word,:]) =
20  * \sum_k theta[doc_id, k] * phi[word_id, k] \f]
21  *
22  * We compute loglikelihood to be:
23  * \f[l(D) = \sum_{i \in D} \sum_{j in D_i} count_{i,j} *
24  * log Pr(word_{i,j} | \theta, \phi)\f]
25  *
26  * and perplexity to be
27  * \f[\exp \{ - l(D) / \sum_i \sum_j count_{i,j} \}\f]
28  *
29  * For more information, see http://en.wikipedia.org/wiki/Perplexity.
30  */
31 double perplexity(std::shared_ptr<sarray<flexible_type>> documents,
32  const std::shared_ptr<sarray<flexible_type>> doc_topic_prob,
33  const std::shared_ptr<sarray<flexible_type>> word_topic_prob,
34  const std::shared_ptr<sarray<flexible_type>> vocabulary);
35 
36 } // text
37 } // turicreate
38 
39 #endif