Turi Create  4.0
optimization_interface.hpp
1 /* Copyright © 2017 Apple Inc. All rights reserved.
2  *
3  * Use of this source code is governed by a BSD-3-clause license that can
4  * be found in the LICENSE.txt file or at https://opensource.org/licenses/BSD-3-Clause
5  */
6 #ifndef TURI_OPTIMIZATION_INTERFACE_H_
7 #define TURI_OPTIMIZATION_INTERFACE_H_
8 
9 #include <string>
10 #include <core/data/flexible_type/flexible_type.hpp>
11 #include <Eigen/Core>
12 #include <Eigen/SparseCore>
13 #include <core/storage/sframe_data/sframe.hpp>
14 
15 // TODO: List of todo's for this file
16 //------------------------------------------------------------------------------
17 //
18 
19 namespace turi {
20 
21 // Typedefs for matrices and vectors
22 typedef Eigen::Matrix<double, Eigen::Dynamic,1> DenseVector;
23 typedef Eigen::Matrix<double, Eigen::Dynamic, Eigen::Dynamic> DenseMatrix;
24 typedef Eigen::DiagonalMatrix<double, Eigen::Dynamic> DiagonalMatrix;
25 typedef Eigen::SparseVector<double> SparseVector;
26 typedef Eigen::SparseMatrix<double> SparseMatrix;
27 
28 namespace optimization {
29 
30 
31 /**
32  * \ingroup group_optimization
33  * \addtogroup optimization_config Optimization Model Types And Config
34  * \{
35  */
36 
37 /// Solver options type is a map from string to flexible_type.
38 const std::map<std::string, flexible_type> default_solver_options {
39  {"convergence_threshold", 1e-2},
40  {"step_size", 1.0},
41  {"lbfgs_memory_level", 3},
42  {"mini_batch_size", 1000},
43  {"max_iterations", 10},
44  {"auto_tuning", true},
45 };
46 
47 /// Types of the solver options
48 const std::map<std::string, flex_type_enum> default_solver_option_types {
49  {"convergence_threshold", flex_type_enum::FLOAT},
50  {"step_size", flex_type_enum::FLOAT},
51  {"lbfgs_memory_level", flex_type_enum::INTEGER},
52  {"mini_batch_size", flex_type_enum::INTEGER},
53  {"max_iterations", flex_type_enum::INTEGER},
54  {"auto_tuning", flex_type_enum::INTEGER},
55 };
56 
57 
58 // Structures & Enums
59 // ---------------------------------------------------------------------------
60 
61 /// Optimization status
62 enum class OPTIMIZATION_STATUS {
63 OPT_UNSET= 0, ///< Optimizer wasn't called
64 OPT_LOADED = 1, ///< Model was loaded but the solution was not found.
65 OPT_OPTIMAL = 2, ///< Optimal solution found
66 OPT_ITERATION_LIMIT = 3, ///< Iteration limit reached.
67 OPT_TIME_LIMIT = 4, ///< Time limit reached.
68 OPT_INTERRUPTED = 5, ///< Optimization terminated by user.
69 OPT_NUMERIC_ERROR = 6, ///< Numerical underflow (not enough progress).
70 OPT_NUMERIC_OVERFLOW = 7, ///< Numerical overflow. Step size parameter may be too large.
71 OPT_LS_FAILURE= 8, ///< Line search iteration limit hit.
72 OPT_IN_PROGRESS = 9, ///< Doing fine, hasn't completed yet.
73 };
74 
75 
76 
77 // Global constants for the optimization library
78 const double OPTIMIZATION_INFTY = 1.0e20; ///< Optimization method infinity
79 const double OPTIMIZATION_ZERO = 1.0e-10; ///< Optimization method zero.
80 
81 // Line search parameters
82 const double LS_INFTY = 1.0e20; ///< No steps that are too large.
83 const double LS_ZERO = 1.0e-9; ///< Smallest allowable step length.
84 const double LS_C1 = 1.0e-4; ///< Line search sufficient decrease parameters.
85 const double LS_C2 = 0.7; ///< Line search curvature approximation.
86 const int LS_MAX_ITER = 20; ///< Num func evals before a failed line search.
87 const double LS_SAFE_GUARD = 5.0e-2; ///< Safe guarding tolerance for line search.
88 const double LS_MAX_STEP_SIZE = 25.0; ///< Max allowable step size.
89 
90 /// Finite difference parameters (required for gradient checking)
91 const double FINITE_DIFFERENCE_EPSILON = 1e-5;
92 
93 /**
94  * Solver return type structure.
95  * \note The number of passes over the data need not be the same thing
96  * as the number of iterations. Each iteration could require multiple passes
97  * over the data (Eg. for line search).
98 */
100 
101  int iters = -1; /*!< Iterations taken */
102  double solve_time = -1; /*!< Wall clock time (s) */
103  DenseVector solution; /*!< Solution */
104  DenseVector gradient; /*!< Terminal gradient */
105  DenseMatrix hessian; /*!< Terminal hessian */
106  double residual; /*!< Residual norm */
107  double func_value; /*!< Function value */
108  int func_evals = 0; /*!< Function evals */
109  int gradient_evals = 0; /*!< Gradient evals */
110  int num_passes = 0; /*!< Number of passes */
112  sframe progress_table = sframe();
113 
114 };
115 typedef struct _solver_return solver_return;
116 
117 /**
118  * Line search return type.
119 */
120 struct _ls_return{
121 
122  double step_size = 1.0; /*!< Step size found */
123  bool status = false; /*!< Line search status*/
124  int func_evals = 0; /*!< Function evals */
125  int gradient_evals = 0; /*!< Gradient evals */
126  int num_passes = 0; /*!< Number of passes */
127 
128 };
129 typedef struct _ls_return ls_return;
130 
131 
132 // Classes
133 // ---------------------------------------------------------------------------
134 
135 /**
136  * The interface to inherit from to describe a first order optimization model.
137  *
138  * This model must implement methods to compute mini-batch gradient, and
139  * function value.
140  */
142 
143  public:
144 
145  /**
146  * Default desctuctor.
147  */
148  virtual ~first_order_opt_interface();
149 
150 
151 
152  /**
153  * Get the number of examples in the dataset (Required for SGD).
154  *
155  * \returns Number of examples in the data.
156  */
157  virtual size_t num_examples() const = 0;
158 
159  /**
160  * Get the number of variables in the optimization problem.
161  *
162  * \returns Number of variables (features) in the optimization.
163  *
164  * \note Statisticians beware. Bias terms are variables in the
165  * optimization problem.
166  */
167  virtual size_t num_variables() const = 0;
168 
169  /**
170  * Compute first order statistics at the given point. (Gradient & Function value)
171  *
172  * \param[in] point Point at which we are computing the stats.
173  * \param[out] gradient Dense gradient
174  * \param[out] function_value Function value
175  * \param[in] mbStart Minibatch start index
176  * \param[in] mbSize Minibatch size (-1 implies all)
177  *
178  * \warning DO NOT USE this if your model is truly sparse.
179  */
180  virtual void compute_first_order_statistics(const DenseVector &point,
181  DenseVector& gradient, double & function_value, const size_t mbStart = 0,
182  const size_t mbSize = -1) = 0;
183 
184  /**
185  * Optimizations for performance reasons.
186  * -------------------------------------------------------------------------
187  */
188 
189  /**
190  * Compute the function value at a given point.
191  * \param[in] point Point at which we are computing the gradient.
192  * \param[in] mbStart Minibatch start index
193  * \param[in] mbSize Minibatch size (-1 implies all)
194  * \returns Function value at "point"
195  */
196  virtual double compute_function_value(const DenseVector &point, const size_t
197  mbStart = 0, const size_t mbSize = -1);
198 
199  /**
200  * Compute a gradient at the given point.
201  *
202  * \param[in] point Point at which we are computing the gradient.
203  * \param[out] gradient Dense gradient
204  * \param[in] mbStart Minibatch start index
205  * \param[in] mbSize Minibatch size (-1 implies all)
206  *
207  * \warning See warning on "sparse" compute_gradient to see a potential danger
208  * of an infinite loop if you don't imeplement at least one of "dense"
209  * or "sparse" versions of compute_gradient.
210  *
211  * \warning DO NOT USE this if your model is truly sparse.
212  */
213  virtual void compute_gradient(const DenseVector &point, DenseVector&
214  gradient, const size_t mbStart = 0, const size_t mbSize = -1);
215 
216 
217 
218  /**
219  * Reset the state of the model's "randomness" source.
220  *
221  * \param[in] seed Seed that is the source of randomness.
222  *
223  */
224  virtual void reset(int seed);
225 
226  /**
227  * Get strings needed to print the header for the progress table.
228  *
229  * \param[in] a vector of strings to print at the beginning of the header.
230  */
231  virtual std::vector<std::pair<std::string,size_t>>
232  get_status_header(const std::vector<std::string>& stats);
233 
234  /**
235  * Get strings needed to print a row of the progress table.
236  *
237  * \param[in] a vector of model coefficients.
238  * \param[in] a vector of stats to print at the beginning of each row
239  */
240  virtual std::vector<std::string> get_status(const DenseVector& coefs,
241  const std::vector<std::string>& stats);
242 
243 
244 };
245 
246 
247 /**
248  *
249  * The interface to inherit from to describe a second order optimization model.
250  *
251  * This model must implement methods to compute hessian as well as gradient.
252  */
254 
255  public:
256 
257  /**
258  * Default desctuctor.
259  */
260  virtual ~second_order_opt_interface();
261 
262  /**
263  * Compute second order statistics at the given point. (Hessian, Gradient
264  * & Function value)
265  *
266  * \param[in] point Point at which we are computing the stats.
267  * \param[out] hessian Hessian computation (dense)
268  * \param[out] gradient Dense gradient
269  * \param[out] function_value Function value
270  *
271  */
272  virtual void compute_second_order_statistics(const DenseVector &point,
273  DenseMatrix& Hessian, DenseVector& gradient, double & function_value) = 0;
274 
275 
276  /**
277  * Optimizations for performance reasons.
278  * -------------------------------------------------------------------------
279  */
280 
281  /**
282  *
283  * Compute the hessian at the given point
284  * \param[in] point Point at which we are computing the function value.
285  * \param[out] hessian Returns a dense hessian matrix.
286  *
287  * \note Hessians are always dense matrices. Even if they are sparse, it is
288  * still really hard to factorize them by make sure the factors stay sparse.
289  *
290  */
291  virtual void compute_hessian(const DenseVector& point, DenseMatrix& hessian);
292 
293 
294 };
295 
296 /// \}
297 }
298 
299 } // turicreate
300 
301 #endif
const double LS_MAX_STEP_SIZE
Max allowable step size.
const double LS_C2
Line search curvature approximation.
const double LS_INFTY
No steps that are too large.
const double OPTIMIZATION_ZERO
Optimization method zero.
Model was loaded but the solution was not found.
OPTIMIZATION_STATUS
Optimization status.
const std::map< std::string, flex_type_enum > default_solver_option_types
Types of the solver options.
const double LS_C1
Line search sufficient decrease parameters.
const double FINITE_DIFFERENCE_EPSILON
Finite difference parameters (required for gradient checking)
const int LS_MAX_ITER
Num func evals before a failed line search.
const std::map< std::string, flexible_type > default_solver_options
Solver options type is a map from string to flexible_type.
const double LS_SAFE_GUARD
Safe guarding tolerance for line search.
Numerical overflow. Step size parameter may be too large.
const double OPTIMIZATION_INFTY
Optimization method infinity.
Doing fine, hasn&#39;t completed yet.
const double LS_ZERO
Smallest allowable step length.
Numerical underflow (not enough progress).