6 #ifndef TURI_LINE_SEARCH_H_ 7 #define TURI_LINE_SEARCH_H_ 10 #include <core/data/flexible_type/flexible_type.hpp> 13 #include <ml/optimization/optimization_interface.hpp> 14 #include <ml/optimization/regularizer_interface.hpp> 16 #include <core/logging/assertions.hpp> 28 namespace optimization {
76 inline bool cstep(
double &stx,
double &fx,
double &dx,
77 double &sty,
double &fy,
double &dy,
78 double &stp,
double &fp,
double &dp,
79 bool &brackt,
double stpmin,
double stpmax){
82 const double p66 = 0.66;
87 if ((brackt && (stp <= std::min(stx,sty) || stp >= std::max(stx,sty)))
88 || (dx*(stp-stx) >=
LS_ZERO) || (stpmax < stpmin)){
94 double sgnd = dp*(dx/std::abs(dx));
98 double theta, s,
gamma, p, q, r, stpc, stpq, stpf;
109 theta = 3*(fx - fp)/(stp - stx) + dx + dp;
110 s = std::max(std::abs(theta), std::max(std::abs(dx),std::abs(dp)));
111 gamma = s*sqrt(pow(theta/s,2) - (dx/s)*(dp/s));
116 p = (gamma - dx) + theta;
117 q = ((gamma - dx) + gamma) + dp;
119 stpc = stx + r*(stp - stx);
120 stpq = stx + ((dx/((fx-fp)/(stp-stx)+dx))/2)*(stp - stx);
122 if (std::abs(stpc-stx) < std::abs(stpq-stx)){
125 stpf = stpc + (stpq - stpc)/2;
142 theta = 3*(fx - fp)/(stp - stx) + dx + dp;
143 s = std::max(std::abs(theta), std::max(std::abs(dx),std::abs(dp)));
144 gamma = s*sqrt(pow(theta/s,2) - (dx/s)*(dp/s));
149 p = (gamma - dp) + theta;
150 q = ((gamma - dp) + gamma) + dx;
152 stpc = stp + r*(stx - stp);
153 stpq = stp + (dp/(dp-dx))*(stx - stp);
154 if (std::abs(stpc-stp) > std::abs(stpq-stp)){
170 else if (std::abs(dp) < std::abs(dx)){
174 theta = 3*(fx - fp)/(stp - stx) + dx + dp;
175 s = std::max(std::abs(theta), std::max(std::abs(dx),std::abs(dp)));
180 gamma = s*sqrt(std::max(0., pow((theta/s),2) - (dx/s)*(dp/s)));
185 p = (gamma - dp) + theta;
186 q = (gamma + (dx - dp)) +
gamma;
189 stpc = stp + r*(stx - stp);
190 }
else if (stp > stx){
196 stpq = stp + (dp/(dp-dx))*(stx - stp);
198 if (std::abs(stp-stpc) < std::abs(stp-stpq)){
204 if (std::abs(stp-stpc) > std::abs(stp-stpq)){
221 theta = 3*(fp - fy)/(sty - stp) + dy + dp;
222 s = std::max(std::abs(theta), std::max(std::abs(dy),std::abs(dp)));
223 gamma = s*sqrt(pow(theta/s,2) - (dy/s)*(dp/s));
228 p = (gamma - dp) + theta;
229 q = ((gamma - dp) + gamma) + dy;
231 stpc = stp + r*(sty - stp);
233 }
else if (stp > stx){
259 stpf = std::min(stpmax,stpf);
260 stpf = std::max(stpmin,stpf);
262 if (brackt && bound){
264 stp = std::min(stx+p66*(sty-stx),stp);
266 stp = std::max(stx+p66*(sty-stx),stp);
273 <<
" Unable to interpolate step size intervals." 328 template <
typename Vector>
332 double init_func_value,
335 DenseVector direction,
336 double function_scaling = 1.0,
337 const std::shared_ptr<smooth_regularizer_interface> reg=NULL,
347 <<
" \nInitial step step less than "<<
LS_ZERO 356 DenseVector x0 = point;
357 double Dphi0 = gradient.dot(direction);
360 <<
" \nDetected numerical difficulties." << std::endl;
376 double fx = init_func_value;
380 double fy = init_func_value;
383 double stp = init_step;
384 double f = init_func_value;
387 DenseVector reg_gradient(gradient.size());
397 double wolfe_func_dec =
LS_C1*Dphi0;
398 double wolfe_curvature =
LS_C2*Dphi0;
399 double width = stmax - stmin;
400 double width2 = 2*width;
405 const double p5 = 0.5;
406 const double p66 = 0.66;
407 const double xtrapf = 4;
415 stmin = std::min(stx,sty);
416 stmax = std::max(stx,sty);
419 stmax = stp + xtrapf*(stp - stx);
428 if ( (infoc ==
false)
429 || (brackt && (stmax-stmin <=
LS_ZERO))){
432 <<
" Unusual termination criterion reached." 433 <<
"\nReturning the best step found so far." 434 <<
" This typically happens when the number of features is much" 435 <<
" larger than the number of training samples. Consider pruning" 436 <<
" features manually or increasing the regularization value." 442 if (
size_t(stats.
func_evals) >= max_function_evaluations){
450 point = x0 + stp * direction;
456 reg->compute_gradient(point, reg_gradient);
457 f += reg->compute_function_value(point);
461 if(function_scaling != 1.0) {
462 f *= function_scaling;
463 g *= function_scaling;
466 dg = g.dot(direction);
468 double ftest = init_func_value + stp*wolfe_func_dec;
475 if ( (brackt && ((stp <= stmin) || (stp >= stmax))) || (infoc ==
false)){
477 <<
" prevent further progress. \nThere may not be a step which" 478 <<
" satisfies the sufficient decrease and curvature conditions." 479 <<
" \nTolerances may be too small or dataset may be poorly scaled." 480 <<
" This typically happens when the number of features is much" 481 <<
" larger than the number of training samples. Consider pruning" 482 <<
" features manually or increasing the regularization value." 499 if ((stp <=
LS_ZERO) && ((f > ftest) || (dg >= wolfe_func_dec))){
501 <<
" Cannot proceed anymore." 511 if (brackt && (stmax-stmin <=
LS_ZERO)){
513 <<
"lower than step size limit." << std::endl;
519 if ((f <= ftest) && (std::abs(dg) <= -wolfe_curvature)){
527 if ( stage1 && (f <= ftest) && (dg >= wolfe_curvature)){
537 if (stage1 && (f <= fx) && (f > ftest)){
540 double fm = f - stp*wolfe_func_dec;
541 double fxm = fx - stx*wolfe_func_dec;
542 double fym = fy - sty*wolfe_func_dec;
543 double dgm = dg - wolfe_func_dec;
544 double dgxm = dgx - wolfe_func_dec;
545 double dgym = dgy - wolfe_func_dec;
549 infoc =
cstep(stx,fxm, dgxm,
556 fx = fxm + stx*wolfe_func_dec;
557 fy = fym + sty*wolfe_func_dec;
558 dgx = dgxm + wolfe_func_dec;
559 dgy = dgym + wolfe_func_dec;
565 infoc =
cstep(stx,fx, dgx,
576 if (std::abs(sty-stx) >= p66*width2){
577 stp = stx + p5*(sty - stx);
581 width = std::abs(sty-stx);
614 template <
typename Vector>
618 double init_func_value,
621 DenseVector direction){
627 double sufficient_decrease =
LS_C1*(gradient.dot(direction));
629 double step_size = init_step;
630 DenseVector new_point = point;
637 new_point = point + step_size * direction;
639 + step_size * sufficient_decrease){
648 stats.func_evals += 1;
680 template <
typename Vector>
684 double init_func_value,
687 DenseVector direction,
688 const std::shared_ptr<regularizer_interface> reg=NULL){
693 double step_size = init_step;
694 DenseVector delta_point(point.size());;
695 DenseVector new_point(point.size());;
702 new_point = point + step_size * direction;
704 reg->apply_proximal_operator(new_point, step_size);
707 delta_point = new_point - point;
709 init_func_value + gradient.dot(delta_point)
710 + 0.5 * delta_point.squaredNorm()/step_size){
745 double g1,
double g2) {
752 DASSERT_LE(g1, 1e-4);
753 DASSERT_GE(g2, -1e-4);
756 DASSERT_LE(f2 - g2, f1 + 1e-4);
757 DASSERT_LE(f1 + g1, f2 + 1e-4);
778 const double a = -2*(f2 - f1) + (g2 + g1);
779 const double b = 3*(f2 - f1) - g2 - 2*g1;
789 double best_value = std::min(f1, f2);
790 double best_loc = f1 < f2 ? 0 : 1;
793 for(
size_t m_iter = 0; m_iter < 32; ++m_iter) {
795 double t = 0.5 * (right + left);
798 double v = d + c*t + b*t*t + a*t*t*t;
806 if(right - left < 1e-6) {
810 double vp = c + 2*b*t + 3*a*t*t;
820 return dist * best_loc;
double gradient_bracketed_linesearch(double dist, double f1, double f2, double g1, double g2)
const double LS_MAX_STEP_SIZE
Max allowable step size.
ls_return backtracking(first_order_opt_interface &model, double init_step, double init_func_value, DenseVector point, Vector gradient, DenseVector direction, const std::shared_ptr< regularizer_interface > reg=NULL)
const double LS_C2
Line search curvature approximation.
const double OPTIMIZATION_ZERO
Optimization method zero.
ls_return more_thuente(first_order_opt_interface &model, double init_step, double init_func_value, DenseVector point, Vector gradient, DenseVector direction, double function_scaling=1.0, const std::shared_ptr< smooth_regularizer_interface > reg=NULL, size_t max_function_evaluations=LS_MAX_ITER)
bool cstep(double &stx, double &fx, double &dx, double &sty, double &fy, double &dy, double &stp, double &fp, double &dp, bool &brackt, double stpmin, double stpmax)
const double LS_C1
Line search sufficient decrease parameters.
const int LS_MAX_ITER
Num func evals before a failed line search.
#define logprogress_stream
double gamma(const double alpha=double(1))
virtual double compute_function_value(const DenseVector &point, const size_t mbStart=0, const size_t mbSize=-1)
const double LS_ZERO
Smallest allowable step length.
virtual void compute_first_order_statistics(const DenseVector &point, DenseVector &gradient, double &function_value, const size_t mbStart=0, const size_t mbSize=-1)=0
ls_return armijo_backtracking(first_order_opt_interface &model, double init_step, double init_func_value, DenseVector point, Vector gradient, DenseVector direction)