Turi Create  4.0
lbfgs.hpp
1 /* Copyright © 2017 Apple Inc. All rights reserved.
2  *
3  * Use of this source code is governed by a BSD-3-clause license that can
4  * be found in the LICENSE.txt file or at https://opensource.org/licenses/BSD-3-Clause
5  */
6 #ifndef TURI_LBFGS_2_H_
7 #define TURI_LBFGS_2_H_
8 
9 #include <ml/optimization/optimization_interface.hpp>
10 #include <core/data/flexible_type/flexible_type.hpp>
11 #include <core/storage/sframe_data/sframe.hpp>
12 
13 #include <ml/optimization/utils.hpp>
14 #include <ml/optimization/optimization_interface.hpp>
15 #include <ml/optimization/regularizer_interface.hpp>
16 #include <ml/optimization/line_search-inl.hpp>
17 #include <Eigen/Core>
18 
19 typedef Eigen::VectorXd DenseVector;
20 typedef Eigen::MatrixXd DenseMatrix;
21 
22 namespace turi {
23 
24 namespace optimization {
25 
26 /**
27  * Solver status.
28  */
29 struct solver_status {
30  size_t iteration = 0; /*!< Iterations taken */
31  double solver_time = 0; /*!< Wall clock time (s) */
32  DenseVector solution; /*!< Current Solution */
33  DenseVector gradient; /*!< Current gradient */
34  DenseMatrix hessian; /*!< Current hessian */
35  double residual = NAN; /*!< Residual norm */
36  double function_value = NAN; /*!< Function value */
37  size_t num_function_evaluations = 0; /*!< Function evals */
38  size_t num_gradient_evaluations = 0; /*!< Gradient evals */
39  double step_size = 0; /*!< Current step size */
40 
42 };
43 
44 
45 /**
46  * \ingroup group_optimization
47  * \addtogroup LBFGS LBFGS
48  * \{
49  */
50 
51 /**
52  *
53  * Solve a first_order_optimization_iterface model with an LBFGS
54  * implementation.
55  *
56  * The implementation is based on Algorithm 7.4 (pg 178) of [1].
57  *
58  * This subroutine solves an unconstrained minimization problem
59  * using the limited memory BFGS method. The routine is especially
60  * effective on problems involving a large number of variables. In
61  * a typical iteration of this method an approximation Hk to the
62  * inverse of the Hessian is obtained by applying M BFGS updates to
63  * a diagonal matrix Hk0, using information from the previous M steps.
64  * The user specifies the number M, which determines the amount of
65  * storage required by the routine. The user may also provide the
66  * diagonal matrices Hk0 if not satisfied with the default choice.
67  * The algorithm is described in [2].
68  *
69  * The user is required to calculate the function value and its
70  * gradient.
71  *
72  * The steplength is determined at each iteration by means of the
73  * line search routine MCVSRCH, which is a slight modification of
74  * the routine CSRCH written by More' and Thuente.
75  *
76  *
77  * References:
78  *
79  * (1) Wright S.J and J. Nocedal. Numerical optimization. Vol. 2.
80  * New York: Springer, 1999.
81  *
82  * (2) "On the limited memory BFGS method for large scale optimization", by D.
83  * Liu and J. Nocedal, Mathematical Programming B 45 (1989) 503-528.
84  *
85  * \param[in] model Model with first order optimization interface.
86  * \param[in] init_point Starting point for the solver.
87  * \param[in] opts Solver options.
88  * \param[in] reg Shared ptr to an interface to a smooth regularizer.
89  * \returns stats Solver return stats.
90  * \tparam Vector Sparse or dense gradient representation.
91  *
92  */
93 class lbfgs_solver {
94  public:
95 
96  /** Construct the solver around a specific model interface.
97  *
98  * \param[in] model Model with first order optimization interface.
99  */
100  lbfgs_solver(std::shared_ptr<first_order_opt_interface> _model)
101  : model(_model) {}
102 
103  /** Sets up (or resets) the solver.
104  *
105  * \param[in] init_point Starting point for the solver.
106  * \param[in] opts Solver options. Options are "lbfgs_memory_level" and
107  * "convergence_threshold". If not given, defaults are
108  * taken from the table in optimization_interface.hpp.
109  *
110  * \param[in] reg Shared ptr to an interface to a smooth regularizer.
111  */
112  void setup(const DenseVector& init_point,
113  const std::map<std::string, flexible_type>& opts,
114  const std::shared_ptr<smooth_regularizer_interface>& reg = nullptr);
115 
116  /** Perform the next update of the solution.
117  *
118  * Call this method repeatedly to perform the optimization.
119  * Each iteration updates the solution point with one step.
120  */
121  bool next_iteration();
122 
123  /** The status after a given iteration.
124  *
125  *
126  * The best solution so far is given by status().solution.
127  *
128  */
129  const solver_status& status() const { return m_status; }
130 
131  private:
132 
133  timer compute_timer;
134 
135  // The model used in the optimization.
136  std::shared_ptr<first_order_opt_interface> model;
137 
138  std::shared_ptr<smooth_regularizer_interface> reg;
139 
140  size_t num_variables = 0;
141  size_t lbfgs_memory_level = 0;
142  double function_value = NAN, function_scaling_factor = 1.0;
143 
144  solver_status m_status;
145 
146  // LBFGS storage
147  // The search steps and gradient differences are stored in a order
148  // controlled by the start point.
149 
150  // Step difference (prev m iters)
151  Eigen::Matrix<double, Eigen::Dynamic, Eigen::Dynamic, Eigen::ColMajor> y;
152 
153  // Gradient difference (prev m iters)
154  Eigen::Matrix<double, Eigen::Dynamic, Eigen::Dynamic, Eigen::ColMajor> s;
155 
156  DenseVector q; // Storage required for the 2-loop recursion
157  DenseVector rho; // Scaling factors (prev m iters)
158  DenseVector alpha; // Step sizes (prev m iters)
159 
160  // Buffers used internally. The function value and gradient here is scaled my
161  // m_status.function_scaling_value for numerical stability.
162  DenseVector delta_point, gradient, delta_grad, previous_gradient;
163 
164  double convergence_threshold = 0;
165 };
166 
167 // Old version for backwards compatibility with the previous interface.
168 // Includes printing.
169 solver_return lbfgs_compat(
170  std::shared_ptr<first_order_opt_interface> model,
171  const DenseVector& init_point,
172  const std::map<std::string, flexible_type>& opts,
173  const std::shared_ptr<smooth_regularizer_interface>& reg = nullptr);
174 
175 /** Solves lbgfgs problem end-to-end.
176  *
177  * This class wraps the above iterative solver in a convenience function,
178  * iterating the solution until completion.
179  *
180  * \param model The implementation of first_order_opt_interface used in the
181  * optimization.
182  *
183  * \param init_point The initial point at which the optimization starts.
184  *
185  * \param opts The options. Uses all the options given to setup() in the
186  * lbfgs_solver class, plus "max_iterations" to terminate the optimization after
187  * a given number of iterations.
188  *
189  * \param reg Optional regularization interface.
190  *
191  */
193  std::shared_ptr<first_order_opt_interface> model,
194  const DenseVector& init_point,
195  const std::map<std::string, flexible_type>& opts,
196  const std::shared_ptr<smooth_regularizer_interface>& reg = nullptr);
197 
198 
199 
200 } // namespace optimization
201 
202 /// \}
203 } // namespace turi
204 
205 #endif
OPTIMIZATION_STATUS
Optimization status.
OPTIMIZATION_STATUS status
Definition: lbfgs.hpp:41
solver_status lbfgs(std::shared_ptr< first_order_opt_interface > model, const DenseVector &init_point, const std::map< std::string, flexible_type > &opts, const std::shared_ptr< smooth_regularizer_interface > &reg=nullptr)
lbfgs_solver(std::shared_ptr< first_order_opt_interface > _model)
Definition: lbfgs.hpp:100
const solver_status & status() const
Definition: lbfgs.hpp:129
A simple class that can be used for benchmarking/timing up to microsecond resolution.
Definition: timer.hpp:59