Turi Create  4.0
gradient_descent-inl.hpp
1 /* Copyright © 2017 Apple Inc. All rights reserved.
2  *
3  * Use of this source code is governed by a BSD-3-clause license that can
4  * be found in the LICENSE.txt file or at https://opensource.org/licenses/BSD-3-Clause
5  */
6 #ifndef TURI_GRADIENT_DESCENT_H_
7 #define TURI_GRADIENT_DESCENT_H_
8 
9 #include <core/data/flexible_type/flexible_type.hpp>
10 #include <Eigen/Core>
11 
12 #include <ml/optimization/utils.hpp>
13 #include <ml/optimization/optimization_interface.hpp>
14 #include <ml/optimization/regularizer_interface.hpp>
15 #include <ml/optimization/line_search-inl.hpp>
16 #include <core/logging/table_printer/table_printer.hpp>
17 
18 
19 // TODO: List of todo's for this file
20 //------------------------------------------------------------------------------
21 // 1. Constant line seach tuning?
22 
23 namespace turi {
24 
25 namespace optimization {
26 
27 
28 /**
29  * \ingroup group_optimization
30  * \addtogroup gradient_descent Gradient Descent
31  * \{
32  */
33 
34 /**
35  *
36  * Solve a first_order_optimization_iterface model with a gradient descent
37  * method.
38  *
39  * \param[in,out] model Model with first order optimization interface.
40  * \param[in] init_point Starting point for the solver.
41  * \param[in,out] opts Solver options.
42  * \returns stats Solver return stats.
43  * \param[in] reg Shared ptr to an interface to a regularizer.
44  * \tparam Vector Sparse or dense gradient representation.
45  *
46  *
47 */
48 template <typename Vector = DenseVector>
50  const DenseVector& init_point,
51  std::map<std::string, flexible_type>& opts,
52  const std::shared_ptr<regularizer_interface> reg=NULL){
53 
54  // Benchmarking utils.
55  timer t;
56  double start_time = t.current_time();
57 
58  logprogress_stream << "Starting Gradient Descent " << std::endl;
59  logprogress_stream << "--------------------------------------------------------" << std::endl;
60  std::stringstream ss;
61  ss.str("");
62 
63  // Step 1: Algorithm option init
64  // ------------------------------------------------------------------------
65  // Check that all solver options are present.
66  // Load options
67  size_t iter_limit = opts["max_iterations"];
68  double convergence_threshold = opts["convergence_threshold"];
69  double step_size = opts["step_size"];
70  size_t iters = 1;
71  solver_return stats;
72 
73  // Print progress
74  table_printer printer(
75  model.get_status_header({"Iteration", "Passes", "Step size", "Elapsed Time"}));
76  printer.print_header();
77 
78 
79  // First compute the residual. Sometimes, you already have the solution
80  // during the starting point. In these settings, you don't want to waste
81  // time performing a step of the algorithm.
82  DenseVector point = init_point;
83  Vector gradient(point.size());
84  double func_value;
85  model.compute_first_order_statistics(point, gradient, func_value);
86  double residual = compute_residual(gradient);
87 
88  stats.func_evals++;
89  stats.gradient_evals++;
90 
91  // Needs to store previous point and gradient information
92  DenseVector delta_point = point;
93  delta_point.setZero();
94 
95  // First iteration will take longer. Warn the user.
96  logprogress_stream <<"Tuning step size. First iteration could take longer"
97  <<" than subsequent iterations." << std::endl;
98 
99 
100  // Nan Checking!
101  if (!std::isfinite(residual)) {
103  }
104 
105  // Step 2: Algorithm starts here
106  // ------------------------------------------------------------------------
107  // While not converged
108  while((residual >= convergence_threshold) && (iters <= iter_limit)){
109 
110 
111  // Line search for step size.
112  ls_return ls_stats;
113 
114  // Pick line search based on regularizers.
115  if (reg != NULL){
116  step_size *= 2;
117  ls_stats = backtracking(model,
118  step_size,
119  func_value,
120  point,
121  gradient,
122  -gradient,
123  reg);
124  } else {
125  ls_stats = more_thuente(model,
126  step_size,
127  func_value,
128  point,
129  gradient,
130  -gradient);
131  }
132 
133 
134  // Add info from line search
135  stats.func_evals += ls_stats.func_evals;
136  stats.gradient_evals += ls_stats.gradient_evals;
137  step_size = ls_stats.step_size;
138 
139  // Line search failed
140  if (ls_stats.status == false){
142  break;
143  }
144 
145  // \delta x_k = x_{k} - x_{k-1}
146  delta_point = point;
147  point = point -step_size * gradient;
148  if (reg != NULL)
149  reg->apply_proximal_operator(point, step_size);
150  delta_point = point - delta_point;
151 
152  // Numerical error: Insufficient progress.
153  if (delta_point.norm() <= OPTIMIZATION_ZERO){
155  break;
156  }
157  // Numerical error: Numerical overflow. (Step size was too large)
158  if (!delta_point.array().array().isFinite().all()) {
160  break;
161  }
162 
163  // Compute residual norm (to check for convergence)
164  model.compute_first_order_statistics(point, gradient, func_value);
165  stats.num_passes++;
166  residual = compute_residual(gradient);
167  iters++;
168 
169  // Print progress
170  auto stat_info = {std::to_string(iters),
171  std::to_string(stats.num_passes),
172  std::to_string(step_size),
173  std::to_string(t.current_time())};
174 
175  auto row = model.get_status(point, stat_info);
176  printer.print_progress_row_strs(iters, row);
177  }
178 
179  printer.print_footer();
180 
181  // Step 3: Return optimization model status.
182  // ------------------------------------------------------------------------
183  if (stats.status == OPTIMIZATION_STATUS::OPT_UNSET) {
184  if (iters < iter_limit){
186  } else {
188  }
189  }
190  stats.iters = static_cast<int>(iters);
191  stats.residual = residual;
192  stats.gradient = gradient;
193  stats.func_value = func_value;
194  stats.solve_time = t.current_time() - start_time;
195  stats.solution = point;
196  stats.progress_table = printer.get_tracked_table();
197 
198  // Display solver stats
200  return stats;
201 }
202 
203 
204 } // optimizaiton
205 
206 /// \}
207 } // turicreate
208 
209 #endif
ls_return backtracking(first_order_opt_interface &model, double init_step, double init_func_value, DenseVector point, Vector gradient, DenseVector direction, const std::shared_ptr< regularizer_interface > reg=NULL)
const double OPTIMIZATION_ZERO
Optimization method zero.
ls_return more_thuente(first_order_opt_interface &model, double init_step, double init_func_value, DenseVector point, Vector gradient, DenseVector direction, double function_scaling=1.0, const std::shared_ptr< smooth_regularizer_interface > reg=NULL, size_t max_function_evaluations=LS_MAX_ITER)
solver_return gradient_descent(first_order_opt_interface &model, const DenseVector &init_point, std::map< std::string, flexible_type > &opts, const std::shared_ptr< regularizer_interface > reg=NULL)
virtual std::vector< std::string > get_status(const DenseVector &coefs, const std::vector< std::string > &stats)
virtual std::vector< std::pair< std::string, size_t > > get_status_header(const std::vector< std::string > &stats)
double current_time() const
Returns the elapsed time in seconds since turi::timer::start was last called.
Definition: timer.hpp:83
#define logprogress_stream
Definition: logger.hpp:325
Numerical overflow. Step size parameter may be too large.
double compute_residual(const DenseVector &gradient)
void print_header() const
A simple class that can be used for benchmarking/timing up to microsecond resolution.
Definition: timer.hpp:59
void log_solver_summary_stats(const solver_return &stats, bool simple_mode=false)
virtual void compute_first_order_statistics(const DenseVector &point, DenseVector &gradient, double &function_value, const size_t mbStart=0, const size_t mbSize=-1)=0
Numerical underflow (not enough progress).