Turi Create  4.0
regularizers-inl.hpp
1 /* Copyright © 2017 Apple Inc. All rights reserved.
2  *
3  * Use of this source code is governed by a BSD-3-clause license that can
4  * be found in the LICENSE.txt file or at https://opensource.org/licenses/BSD-3-Clause
5  */
6 #ifndef TURI_REGULARIZER_H_
7 #define TURI_REGULARIZER_H_
8 
9 #include <string>
10 #include <core/data/flexible_type/flexible_type.hpp>
11 
12 // Eigen
13 #include <Eigen/Core>
14 #include <Eigen/SparseCore>
15 
16 // Optimizaiton
17 #include <ml/optimization/optimization_interface.hpp>
18 #include <ml/optimization/regularizer_interface.hpp>
19 
20 // TODO: List of todo's for this file
21 //------------------------------------------------------------------------------
22 //
23 
24 namespace turi {
25 
26 namespace optimization {
27 
28 
29 /**
30  * \ingroup group_optimization
31  * \addtogroup regularizers Regularizers
32  * \{
33  */
34 
35 
36 /**
37  * Interface for the regularizer (Scaled L2-norm)
38  *
39  * f(x) = \sum_{i} lambda_i * x_i^2
40  *
41  */
43 
44  protected:
45 
46  DenseVector lambda; /**< Penalty on the regularizer */
47  size_t variables; /**< # Variables in the problem */
48 
49  public:
50 
51 
52  /**
53  * Default constructor.
54  */
55  l2_norm(const DenseVector& _lambda){
56  lambda= _lambda;
57  variables = _lambda.size();
58  }
59 
60  /**
61  * Default desctuctor. Do nothing.
62  */
64  }
65 
66  /**
67  * Compute the hessian of the regularizer at a given point.
68  * \param[in] point Point at which we are computing the gradient.
69  * \param[in,out] hessian Diagonal matrix as the hessian gradient.
70  *
71  */
72  inline void compute_hessian(const DenseVector &point, DiagonalMatrix
73  &hessian) const {
74  hessian = 2 * lambda.asDiagonal();
75  }
76 
77  /**
78  * Compute the function value of the regularizer at a given point.
79  * \param[in] point Point at which we are computing the gradient.
80  *
81  */
82  inline double compute_function_value(const DenseVector &point) const{
83  DASSERT_EQ(variables, point.size());
84  return lambda.dot(point.cwiseAbs2());
85  }
86 
87 
88  /**
89  * Compute the gradient (or subgradient) at the given point.
90  *
91  * \param[in] point Point at which we are computing the gradient.
92  * \param[out] gradient Dense gradient
93  *
94  */
95  inline void compute_gradient(const DenseVector &point, DenseVector& gradient)
96  const{
97  DASSERT_EQ(variables, point.size());
98  gradient = 2 * lambda.cwiseProduct(point);
99  }
100 
101  /**
102  * Compute the proximal operator for the l2-regularizer
103  *
104  * \param[in,out] point Point at which we are computing the gradient.
105  * \param[in] penalty Penalty
106  *
107  * \note The proximal operator for lambda * ||x||^2 at the point v is
108  * given by
109  * v/(1 + 2*lambda*penalty)
110  *
111  */
112  inline void apply_proximal_operator(DenseVector &point, const double&
113  _penalty=0)const{
114  DASSERT_EQ(variables, point.size());
115  for(size_t i = 0; i < variables; i++)
116  point[i] = point[i] / (1 + 2*_penalty*lambda[i]);
117  }
118 
119 
120 };
121 
122 
123 /**
124  * Interface for the regularizer (Scaled L1-norm)
125  *
126  * f(x) = \sum_{i} lambda_i * |x_i|
127  *
128  */
130 
131  protected:
132 
133  DenseVector lambda; /**< Penalty on the regularizer */
134  size_t variables; /**< # Variables in the problem */
135 
136  public:
137 
138  /**
139  * Default constructor.
140  */
141  l1_norm(const DenseVector& _lambda){
142  lambda= _lambda;
143  variables = _lambda.size();
144  }
145 
146  /**
147  * Default desctuctor. Do nothing.
148  */
150  }
151 
152  /**
153  * Compute the function value of the regularizer at a given point.
154  * \param[in] point Point at which we are computing the gradient.
155  *
156  */
157  inline double compute_function_value(const DenseVector &point) const{
158  DASSERT_EQ(variables, point.size());
159  return lambda.dot(point.cwiseAbs());
160  }
161 
162 
163  /**
164  * Compute the subgradient at the given point.
165  *
166  * \param[in] point Point at which we are computing the gradient.
167  * \param[out] gradient Dense sub-gradient
168  *
169  */
170  inline void compute_gradient(const DenseVector &point, DenseVector& gradient) const{
171  DASSERT_EQ(variables, point.size());
172  gradient.setZero();
173  for(size_t i = 0; i < variables; i++)
174  if (gradient[i] > OPTIMIZATION_ZERO)
175  gradient[i] = lambda[i];
176  else if (gradient[i] < - OPTIMIZATION_ZERO)
177  gradient[i] = - lambda[i];
178  }
179 
180  /**
181  * Compute the proximal operator for the l2-regularizer
182  *
183  * \param[in,out] point Point at which we are computing the gradient.
184  * \param[in] penalty Penalty
185  *
186  * \note The proximal operator for lambda * ||x||_1 at the point v is
187  * given by
188  * soft(x, lambda) = (x - lambda)_+ - (-x - lambda)_+
189  *
190  */
191  inline void apply_proximal_operator(DenseVector &point, const double&
192  _penalty=0)const{
193  DASSERT_EQ(variables, point.size());
194  for(size_t i = 0; i < variables; i++)
195  point[i] = std::max(point[i] - _penalty*lambda[i], 0.0) -
196  std::max(-point[i] - _penalty*lambda[i], 0.0);
197  }
198 
199 
200 };
201 
202 
203 /**
204  * Interface for the elastic net regularizer (Scaled L1-norm)
205  *
206  * f(x) = \sum_{i} alpha_i * |x_i| + \sum_{i} beta_i * x_i^2
207  *
208  */
210 
211  protected:
212 
213  DenseVector alpha; /**< Penalty on the l1-regularizer */
214  DenseVector beta; /**< Penalty on the l2-regularizer */
215  size_t variables; /**< # Variables in the problem */
216 
217  public:
218 
219 
220  /**
221  * Default constructor.
222  */
223  elastic_net(const DenseVector& _alpha, const DenseVector& _beta){
224  DASSERT_EQ(_alpha.size(), _beta.size());
225  alpha = _alpha;
226  beta = _beta;
227  variables = _alpha.size();
228  }
229 
230  /**
231  * Default desctuctor. Do nothing.
232  */
234  }
235 
236  /**
237  * Compute the function value of the regularizer at a given point.
238  * \param[in] point Point at which we are computing the gradient.
239  *
240  */
241  inline double compute_function_value(const DenseVector &point) const{
242  DASSERT_EQ(variables, point.size());
243  return alpha.dot(point.cwiseAbs()) + beta.dot(point.cwiseAbs2());
244  }
245 
246 
247  /**
248  * Compute the subgradient at the given point.
249  *
250  * \param[in] point Point at which we are computing the gradient.
251  * \param[out] gradient Dense sub-gradient
252  *
253  */
254  inline void compute_gradient(const DenseVector &point, DenseVector& gradient) const{
255  DASSERT_EQ(variables, point.size());
256  gradient = 2 * beta.cwiseProduct(point);
257  for(size_t i = 0; i < variables; i++)
258  if (gradient[i] > OPTIMIZATION_ZERO)
259  gradient[i] += alpha[i];
260  else if (gradient[i] < - OPTIMIZATION_ZERO)
261  gradient[i] += -alpha[i];
262  }
263 
264  /**
265  * Compute the proximal operator for the elastic-regularizer
266  *
267  * \param[in,out] point Point at which we are computing the gradient.
268  * \param[in] penalty Penalty
269  *
270  * \note The proximal operator for alpha||x||_1 + beta||x||_2^2 at
271  * y = soft(x, alpha) = (x - alpha)_+ - (-x - alpha)_+
272  * x = y / (1 + 2 beta)
273  *
274  * \note Do not swap the order.
275  *
276  */
277  inline void apply_proximal_operator(DenseVector &point, const double&
278  _penalty=0)const{
279  DASSERT_EQ(variables, point.size());
280  for(size_t i = 0; i < variables; i++){
281  point[i] = std::max(point[i] - _penalty*alpha[i], 0.0) -
282  std::max(-point[i] - _penalty*alpha[i], 0.0);
283  point[i] = point[i] / (1 + 2*_penalty*beta[i]);
284  }
285  }
286 
287 
288 };
289 
290 /// \}
291 
292 } // optimization
293 } // turicreate
294 
295 #endif
void apply_proximal_operator(DenseVector &point, const double &_penalty=0) const
void apply_proximal_operator(DenseVector &point, const double &_penalty=0) const
l1_norm(const DenseVector &_lambda)
const double OPTIMIZATION_ZERO
Optimization method zero.
void compute_gradient(const DenseVector &point, DenseVector &gradient) const
void apply_proximal_operator(DenseVector &point, const double &_penalty=0) const
elastic_net(const DenseVector &_alpha, const DenseVector &_beta)
double compute_function_value(const DenseVector &point) const
void compute_gradient(const DenseVector &point, DenseVector &gradient) const
double compute_function_value(const DenseVector &point) const
l2_norm(const DenseVector &_lambda)
double compute_function_value(const DenseVector &point) const
void compute_gradient(const DenseVector &point, DenseVector &gradient) const
void compute_hessian(const DenseVector &point, DiagonalMatrix &hessian) const