SHOGUN  v2.0.0
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Defines
LibLinear.cpp
Go to the documentation of this file.
00001 /*
00002  * This program is free software; you can redistribute it and/or modify
00003  * it under the terms of the GNU General Public License as published by
00004  * the Free Software Foundation; either version 3 of the License, or
00005  * (at your option) any later version.
00006  *
00007  * Written (W) 2007-2010 Soeren Sonnenburg
00008  * Copyright (c) 2007-2009 The LIBLINEAR Project.
00009  * Copyright (C) 2007-2010 Fraunhofer Institute FIRST and Max-Planck-Society
00010  */
00011 #include <shogun/lib/config.h>
00012 
00013 #ifdef HAVE_LAPACK
00014 #include <shogun/io/SGIO.h>
00015 #include <shogun/lib/Signal.h>
00016 #include <shogun/lib/Time.h>
00017 #include <shogun/base/Parameter.h>
00018 #include <shogun/classifier/svm/LibLinear.h>
00019 #include <shogun/optimization/liblinear/tron.h>
00020 #include <shogun/features/DotFeatures.h>
00021 #include <shogun/labels/BinaryLabels.h>
00022 
00023 using namespace shogun;
00024 
00025 CLibLinear::CLibLinear()
00026 : CLinearMachine()
00027 {
00028     init();
00029 }
00030 
00031 CLibLinear::CLibLinear(LIBLINEAR_SOLVER_TYPE l)
00032 : CLinearMachine()
00033 {
00034     init();
00035     liblinear_solver_type=l;
00036 }
00037 
00038 CLibLinear::CLibLinear(
00039     float64_t C, CDotFeatures* traindat, CLabels* trainlab)
00040 : CLinearMachine()
00041 {
00042     init();
00043     C1=C;
00044     C2=C;
00045     use_bias=true;
00046 
00047     set_features(traindat);
00048     set_labels(trainlab);
00049     init_linear_term();
00050 }
00051 
00052 void CLibLinear::init()
00053 {
00054     liblinear_solver_type=L2R_L1LOSS_SVC_DUAL;
00055     use_bias=false;
00056     C1=1;
00057     C2=1;
00058     set_max_iterations();
00059     epsilon=1e-5;
00060 
00061     SG_ADD(&C1, "C1", "C Cost constant 1.", MS_AVAILABLE);
00062     SG_ADD(&C2, "C2", "C Cost constant 2.", MS_AVAILABLE);
00063     SG_ADD(&use_bias, "use_bias", "Indicates if bias is used.",
00064             MS_NOT_AVAILABLE);
00065     SG_ADD(&epsilon, "epsilon", "Convergence precision.", MS_NOT_AVAILABLE);
00066     SG_ADD(&max_iterations, "max_iterations", "Max number of iterations.",
00067             MS_NOT_AVAILABLE);
00068     SG_ADD(&m_linear_term, "linear_term", "Linear Term", MS_NOT_AVAILABLE);
00069     SG_ADD((machine_int_t*) &liblinear_solver_type, "liblinear_solver_type",
00070             "Type of LibLinear solver.", MS_NOT_AVAILABLE);
00071 }
00072 
00073 CLibLinear::~CLibLinear()
00074 {
00075 }
00076 
00077 bool CLibLinear::train_machine(CFeatures* data)
00078 {
00079     CSignal::clear_cancel();
00080     ASSERT(m_labels);
00081     ASSERT(m_labels->get_label_type() == LT_BINARY);
00082 
00083     if (data)
00084     {
00085         if (!data->has_property(FP_DOT))
00086             SG_ERROR("Specified features are not of type CDotFeatures\n");
00087 
00088         set_features((CDotFeatures*) data);
00089     }
00090     ASSERT(features);
00091 
00092 
00093     int32_t num_train_labels=m_labels->get_num_labels();
00094     int32_t num_feat=features->get_dim_feature_space();
00095     int32_t num_vec=features->get_num_vectors();
00096 
00097     if (liblinear_solver_type == L1R_L2LOSS_SVC ||
00098             (liblinear_solver_type == L1R_LR) )
00099     {
00100         if (num_feat!=num_train_labels)
00101         {
00102             SG_ERROR("L1 methods require the data to be transposed: "
00103                     "number of features %d does not match number of "
00104                     "training labels %d\n",
00105                     num_feat, num_train_labels);
00106         }
00107         CMath::swap(num_feat, num_vec);
00108 
00109     }
00110     else
00111     {
00112         if (num_vec!=num_train_labels)
00113         {
00114             SG_ERROR("number of vectors %d does not match "
00115                     "number of training labels %d\n",
00116                     num_vec, num_train_labels);
00117         }
00118     }
00119     if (use_bias)
00120         w=SGVector<float64_t>(SG_MALLOC(float64_t, num_feat+1), num_feat);
00121     else
00122         w=SGVector<float64_t>(num_feat);
00123 
00124     problem prob;
00125     if (use_bias)
00126     {
00127         prob.n=w.vlen+1;
00128         memset(w.vector, 0, sizeof(float64_t)*(w.vlen+1));
00129     }
00130     else
00131     {
00132         prob.n=w.vlen;
00133         memset(w.vector, 0, sizeof(float64_t)*(w.vlen+0));
00134     }
00135     prob.l=num_vec;
00136     prob.x=features;
00137     prob.y=SG_MALLOC(double, prob.l);
00138     float64_t* Cs=SG_MALLOC(double, prob.l);
00139     prob.use_bias=use_bias;
00140     double Cp=C1;
00141     double Cn=C2;
00142 
00143     for (int32_t i=0; i<prob.l; i++)
00144     {
00145         prob.y[i]=((CBinaryLabels*) m_labels)->get_int_label(i);
00146         if (prob.y[i] == +1)
00147             Cs[i]=C1;
00148         else if (prob.y[i] == -1)
00149             Cs[i]=C2;
00150         else
00151             SG_ERROR("labels should be +1/-1 only\n");
00152     }
00153 
00154     int pos = 0;
00155     int neg = 0;
00156     for(int i=0;i<prob.l;i++)
00157     {
00158         if(prob.y[i]==+1)
00159             pos++;
00160     }
00161     neg = prob.l - pos;
00162 
00163     SG_INFO("%d training points %d dims\n", prob.l, prob.n);
00164 
00165     function *fun_obj=NULL;
00166     switch (liblinear_solver_type)
00167     {
00168         case L2R_LR:
00169         {
00170             fun_obj=new l2r_lr_fun(&prob, Cs);
00171             CTron tron_obj(fun_obj, epsilon*CMath::min(pos,neg)/prob.l, max_iterations);
00172             SG_DEBUG("starting L2R_LR training via tron\n");
00173             tron_obj.tron(w.vector, m_max_train_time);
00174             SG_DEBUG("done with tron\n");
00175             delete fun_obj;
00176             break;
00177         }
00178         case L2R_L2LOSS_SVC:
00179         {
00180             fun_obj=new l2r_l2_svc_fun(&prob, Cs);
00181             CTron tron_obj(fun_obj, epsilon*CMath::min(pos,neg)/prob.l, max_iterations);
00182             tron_obj.tron(w.vector, m_max_train_time);
00183             delete fun_obj;
00184             break;
00185         }
00186         case L2R_L2LOSS_SVC_DUAL:
00187             solve_l2r_l1l2_svc(&prob, epsilon, Cp, Cn, L2R_L2LOSS_SVC_DUAL);
00188             break;
00189         case L2R_L1LOSS_SVC_DUAL:
00190             solve_l2r_l1l2_svc(&prob, epsilon, Cp, Cn, L2R_L1LOSS_SVC_DUAL);
00191             break;
00192         case L1R_L2LOSS_SVC:
00193         {
00194             //ASSUME FEATURES ARE TRANSPOSED ALREADY
00195             solve_l1r_l2_svc(&prob, epsilon*CMath::min(pos,neg)/prob.l, Cp, Cn);
00196             break;
00197         }
00198         case L1R_LR:
00199         {
00200             //ASSUME FEATURES ARE TRANSPOSED ALREADY
00201             solve_l1r_lr(&prob, epsilon*CMath::min(pos,neg)/prob.l, Cp, Cn);
00202             break;
00203         }
00204         case L2R_LR_DUAL:
00205         {
00206             solve_l2r_lr_dual(&prob, epsilon, Cp, Cn);
00207             break;
00208         }
00209         default:
00210             SG_ERROR("Error: unknown solver_type\n");
00211             break;
00212     }
00213 
00214     if (use_bias)
00215         set_bias(w[w.vlen]);
00216     else
00217         set_bias(0);
00218 
00219     SG_FREE(prob.y);
00220     SG_FREE(Cs);
00221 
00222     return true;
00223 }
00224 
00225 // A coordinate descent algorithm for
00226 // L1-loss and L2-loss SVM dual problems
00227 //
00228 //  min_\alpha  0.5(\alpha^T (Q + D)\alpha) - e^T \alpha,
00229 //    s.t.      0 <= alpha_i <= upper_bound_i,
00230 //
00231 //  where Qij = yi yj xi^T xj and
00232 //  D is a diagonal matrix
00233 //
00234 // In L1-SVM case:
00235 //      upper_bound_i = Cp if y_i = 1
00236 //      upper_bound_i = Cn if y_i = -1
00237 //      D_ii = 0
00238 // In L2-SVM case:
00239 //      upper_bound_i = INF
00240 //      D_ii = 1/(2*Cp) if y_i = 1
00241 //      D_ii = 1/(2*Cn) if y_i = -1
00242 //
00243 // Given:
00244 // x, y, Cp, Cn
00245 // eps is the stopping tolerance
00246 //
00247 // solution will be put in w
00248 
00249 #undef GETI
00250 #define GETI(i) (y[i]+1)
00251 // To support weights for instances, use GETI(i) (i)
00252 
00253 void CLibLinear::solve_l2r_l1l2_svc(
00254             const problem *prob, double eps, double Cp, double Cn, LIBLINEAR_SOLVER_TYPE st)
00255 {
00256     int l = prob->l;
00257     int w_size = prob->n;
00258     int i, s, iter = 0;
00259     double C, d, G;
00260     double *QD = SG_MALLOC(double, l);
00261     int *index = SG_MALLOC(int, l);
00262     double *alpha = SG_MALLOC(double, l);
00263     int32_t *y = SG_MALLOC(int32_t, l);
00264     int active_size = l;
00265 
00266     // PG: projected gradient, for shrinking and stopping
00267     double PG;
00268     double PGmax_old = CMath::INFTY;
00269     double PGmin_old = -CMath::INFTY;
00270     double PGmax_new, PGmin_new;
00271 
00272     // default solver_type: L2R_L2LOSS_SVC_DUAL
00273     double diag[3] = {0.5/Cn, 0, 0.5/Cp};
00274     double upper_bound[3] = {CMath::INFTY, 0, CMath::INFTY};
00275     if(st == L2R_L1LOSS_SVC_DUAL)
00276     {
00277         diag[0] = 0;
00278         diag[2] = 0;
00279         upper_bound[0] = Cn;
00280         upper_bound[2] = Cp;
00281     }
00282 
00283     int n = prob->n;
00284 
00285     if (prob->use_bias)
00286         n--;
00287 
00288     for(i=0; i<w_size; i++)
00289         w[i] = 0;
00290 
00291     for(i=0; i<l; i++)
00292     {
00293         alpha[i] = 0;
00294         if(prob->y[i] > 0)
00295         {
00296             y[i] = +1;
00297         }
00298         else
00299         {
00300             y[i] = -1;
00301         }
00302         QD[i] = diag[GETI(i)];
00303 
00304         QD[i] += prob->x->dot(i, prob->x,i);
00305         index[i] = i;
00306     }
00307 
00308 
00309     CTime start_time;
00310     while (iter < max_iterations && !CSignal::cancel_computations())
00311     {
00312         if (m_max_train_time > 0 && start_time.cur_time_diff() > m_max_train_time)
00313           break;
00314 
00315         PGmax_new = -CMath::INFTY;
00316         PGmin_new = CMath::INFTY;
00317 
00318         for (i=0; i<active_size; i++)
00319         {
00320             int j = i+rand()%(active_size-i);
00321             CMath::swap(index[i], index[j]);
00322         }
00323 
00324         for (s=0;s<active_size;s++)
00325         {
00326             i = index[s];
00327             int32_t yi = y[i];
00328 
00329             G = prob->x->dense_dot(i, w.vector, n);
00330             if (prob->use_bias)
00331                 G+=w.vector[n];
00332 
00333             if (m_linear_term.vector)
00334                 G = G*yi + m_linear_term.vector[i];
00335             else
00336                 G = G*yi-1;
00337 
00338             C = upper_bound[GETI(i)];
00339             G += alpha[i]*diag[GETI(i)];
00340 
00341             PG = 0;
00342             if (alpha[i] == 0)
00343             {
00344                 if (G > PGmax_old)
00345                 {
00346                     active_size--;
00347                     CMath::swap(index[s], index[active_size]);
00348                     s--;
00349                     continue;
00350                 }
00351                 else if (G < 0)
00352                     PG = G;
00353             }
00354             else if (alpha[i] == C)
00355             {
00356                 if (G < PGmin_old)
00357                 {
00358                     active_size--;
00359                     CMath::swap(index[s], index[active_size]);
00360                     s--;
00361                     continue;
00362                 }
00363                 else if (G > 0)
00364                     PG = G;
00365             }
00366             else
00367                 PG = G;
00368 
00369             PGmax_new = CMath::max(PGmax_new, PG);
00370             PGmin_new = CMath::min(PGmin_new, PG);
00371 
00372             if(fabs(PG) > 1.0e-12)
00373             {
00374                 double alpha_old = alpha[i];
00375                 alpha[i] = CMath::min(CMath::max(alpha[i] - G/QD[i], 0.0), C);
00376                 d = (alpha[i] - alpha_old)*yi;
00377 
00378                 prob->x->add_to_dense_vec(d, i, w.vector, n);
00379 
00380                 if (prob->use_bias)
00381                     w.vector[n]+=d;
00382             }
00383         }
00384 
00385         iter++;
00386         float64_t gap=PGmax_new - PGmin_new;
00387         SG_SABS_PROGRESS(gap, -CMath::log10(gap), -CMath::log10(1), -CMath::log10(eps), 6);
00388 
00389         if(gap <= eps)
00390         {
00391             if(active_size == l)
00392                 break;
00393             else
00394             {
00395                 active_size = l;
00396                 PGmax_old = CMath::INFTY;
00397                 PGmin_old = -CMath::INFTY;
00398                 continue;
00399             }
00400         }
00401         PGmax_old = PGmax_new;
00402         PGmin_old = PGmin_new;
00403         if (PGmax_old <= 0)
00404             PGmax_old = CMath::INFTY;
00405         if (PGmin_old >= 0)
00406             PGmin_old = -CMath::INFTY;
00407     }
00408 
00409     SG_DONE();
00410     SG_INFO("optimization finished, #iter = %d\n",iter);
00411     if (iter >= max_iterations)
00412     {
00413         SG_WARNING("reaching max number of iterations\nUsing -s 2 may be faster"
00414                 "(also see liblinear FAQ)\n\n");
00415     }
00416 
00417     // calculate objective value
00418 
00419     double v = 0;
00420     int nSV = 0;
00421     for(i=0; i<w_size; i++)
00422         v += w.vector[i]*w.vector[i];
00423     for(i=0; i<l; i++)
00424     {
00425         v += alpha[i]*(alpha[i]*diag[GETI(i)] - 2);
00426         if(alpha[i] > 0)
00427             ++nSV;
00428     }
00429     SG_INFO("Objective value = %lf\n",v/2);
00430     SG_INFO("nSV = %d\n",nSV);
00431 
00432     SG_FREE(QD);
00433     SG_FREE(alpha);
00434     SG_FREE(y);
00435     SG_FREE(index);
00436 }
00437 
00438 // A coordinate descent algorithm for
00439 // L1-regularized L2-loss support vector classification
00440 //
00441 //  min_w \sum |wj| + C \sum max(0, 1-yi w^T xi)^2,
00442 //
00443 // Given:
00444 // x, y, Cp, Cn
00445 // eps is the stopping tolerance
00446 //
00447 // solution will be put in w
00448 
00449 #undef GETI
00450 #define GETI(i) (y[i]+1)
00451 // To support weights for instances, use GETI(i) (i)
00452 
00453 void CLibLinear::solve_l1r_l2_svc(
00454     problem *prob_col, double eps, double Cp, double Cn)
00455 {
00456     int l = prob_col->l;
00457     int w_size = prob_col->n;
00458     int j, s, iter = 0;
00459     int active_size = w_size;
00460     int max_num_linesearch = 20;
00461 
00462     double sigma = 0.01;
00463     double d, G_loss, G, H;
00464     double Gmax_old = CMath::INFTY;
00465     double Gmax_new;
00466     double Gmax_init=0;
00467     double d_old, d_diff;
00468     double loss_old=0, loss_new;
00469     double appxcond, cond;
00470 
00471     int *index = SG_MALLOC(int, w_size);
00472     int32_t *y = SG_MALLOC(int32_t, l);
00473     double *b = SG_MALLOC(double, l); // b = 1-ywTx
00474     double *xj_sq = SG_MALLOC(double, w_size);
00475 
00476     CDotFeatures* x = (CDotFeatures*) prob_col->x;
00477     void* iterator;
00478     int32_t ind;
00479     float64_t val;
00480 
00481     double C[3] = {Cn,0,Cp};
00482 
00483     int n = prob_col->n;
00484     if (prob_col->use_bias)
00485         n--;
00486 
00487     for(j=0; j<l; j++)
00488     {
00489         b[j] = 1;
00490         if(prob_col->y[j] > 0)
00491             y[j] = 1;
00492         else
00493             y[j] = -1;
00494     }
00495 
00496     for(j=0; j<w_size; j++)
00497     {
00498         w.vector[j] = 0;
00499         index[j] = j;
00500         xj_sq[j] = 0;
00501 
00502         if (use_bias && j==n)
00503         {
00504             for (ind=0; ind<l; ind++)
00505                 xj_sq[n] += C[GETI(ind)];
00506         }
00507         else
00508         {
00509             iterator=x->get_feature_iterator(j);
00510             while (x->get_next_feature(ind, val, iterator))
00511                 xj_sq[j] += C[GETI(ind)]*val*val;
00512             x->free_feature_iterator(iterator);
00513         }
00514     }
00515 
00516 
00517     CTime start_time;
00518     while (iter < max_iterations && !CSignal::cancel_computations())
00519     {
00520         if (m_max_train_time > 0 && start_time.cur_time_diff() > m_max_train_time)
00521           break;
00522 
00523         Gmax_new  = 0;
00524 
00525         for(j=0; j<active_size; j++)
00526         {
00527             int i = j+rand()%(active_size-j);
00528             CMath::swap(index[i], index[j]);
00529         }
00530 
00531         for(s=0; s<active_size; s++)
00532         {
00533             j = index[s];
00534             G_loss = 0;
00535             H = 0;
00536 
00537             if (use_bias && j==n)
00538             {
00539                 for (ind=0; ind<l; ind++)
00540                 {
00541                     if(b[ind] > 0)
00542                     {
00543                         double tmp = C[GETI(ind)]*y[ind];
00544                         G_loss -= tmp*b[ind];
00545                         H += tmp*y[ind];
00546                     }
00547                 }
00548             }
00549             else
00550             {
00551                 iterator=x->get_feature_iterator(j);
00552 
00553                 while (x->get_next_feature(ind, val, iterator))
00554                 {
00555                     if(b[ind] > 0)
00556                     {
00557                         double tmp = C[GETI(ind)]*val*y[ind];
00558                         G_loss -= tmp*b[ind];
00559                         H += tmp*val*y[ind];
00560                     }
00561                 }
00562                 x->free_feature_iterator(iterator);
00563             }
00564 
00565             G_loss *= 2;
00566 
00567             G = G_loss;
00568             H *= 2;
00569             H = CMath::max(H, 1e-12);
00570 
00571             double Gp = G+1;
00572             double Gn = G-1;
00573             double violation = 0;
00574             if(w.vector[j] == 0)
00575             {
00576                 if(Gp < 0)
00577                     violation = -Gp;
00578                 else if(Gn > 0)
00579                     violation = Gn;
00580                 else if(Gp>Gmax_old/l && Gn<-Gmax_old/l)
00581                 {
00582                     active_size--;
00583                     CMath::swap(index[s], index[active_size]);
00584                     s--;
00585                     continue;
00586                 }
00587             }
00588             else if(w.vector[j] > 0)
00589                 violation = fabs(Gp);
00590             else
00591                 violation = fabs(Gn);
00592 
00593             Gmax_new = CMath::max(Gmax_new, violation);
00594 
00595             // obtain Newton direction d
00596             if(Gp <= H*w.vector[j])
00597                 d = -Gp/H;
00598             else if(Gn >= H*w.vector[j])
00599                 d = -Gn/H;
00600             else
00601                 d = -w.vector[j];
00602 
00603             if(fabs(d) < 1.0e-12)
00604                 continue;
00605 
00606             double delta = fabs(w.vector[j]+d)-fabs(w.vector[j]) + G*d;
00607             d_old = 0;
00608             int num_linesearch;
00609             for(num_linesearch=0; num_linesearch < max_num_linesearch; num_linesearch++)
00610             {
00611                 d_diff = d_old - d;
00612                 cond = fabs(w.vector[j]+d)-fabs(w.vector[j]) - sigma*delta;
00613 
00614                 appxcond = xj_sq[j]*d*d + G_loss*d + cond;
00615                 if(appxcond <= 0)
00616                 {
00617                     if (use_bias && j==n)
00618                     {
00619                         for (ind=0; ind<l; ind++)
00620                             b[ind] += d_diff*y[ind];
00621                         break;
00622                     }
00623                     else
00624                     {
00625                         iterator=x->get_feature_iterator(j);
00626                         while (x->get_next_feature(ind, val, iterator))
00627                             b[ind] += d_diff*val*y[ind];
00628 
00629                         x->free_feature_iterator(iterator);
00630                         break;
00631                     }
00632                 }
00633 
00634                 if(num_linesearch == 0)
00635                 {
00636                     loss_old = 0;
00637                     loss_new = 0;
00638 
00639                     if (use_bias && j==n)
00640                     {
00641                         for (ind=0; ind<l; ind++)
00642                         {
00643                             if(b[ind] > 0)
00644                                 loss_old += C[GETI(ind)]*b[ind]*b[ind];
00645                             double b_new = b[ind] + d_diff*y[ind];
00646                             b[ind] = b_new;
00647                             if(b_new > 0)
00648                                 loss_new += C[GETI(ind)]*b_new*b_new;
00649                         }
00650                     }
00651                     else
00652                     {
00653                         iterator=x->get_feature_iterator(j);
00654                         while (x->get_next_feature(ind, val, iterator))
00655                         {
00656                             if(b[ind] > 0)
00657                                 loss_old += C[GETI(ind)]*b[ind]*b[ind];
00658                             double b_new = b[ind] + d_diff*val*y[ind];
00659                             b[ind] = b_new;
00660                             if(b_new > 0)
00661                                 loss_new += C[GETI(ind)]*b_new*b_new;
00662                         }
00663                         x->free_feature_iterator(iterator);
00664                     }
00665                 }
00666                 else
00667                 {
00668                     loss_new = 0;
00669                     if (use_bias && j==n)
00670                     {
00671                         for (ind=0; ind<l; ind++)
00672                         {
00673                             double b_new = b[ind] + d_diff*y[ind];
00674                             b[ind] = b_new;
00675                             if(b_new > 0)
00676                                 loss_new += C[GETI(ind)]*b_new*b_new;
00677                         }
00678                     }
00679                     else
00680                     {
00681                         iterator=x->get_feature_iterator(j);
00682                         while (x->get_next_feature(ind, val, iterator))
00683                         {
00684                             double b_new = b[ind] + d_diff*val*y[ind];
00685                             b[ind] = b_new;
00686                             if(b_new > 0)
00687                                 loss_new += C[GETI(ind)]*b_new*b_new;
00688                         }
00689                         x->free_feature_iterator(iterator);
00690                     }
00691                 }
00692 
00693                 cond = cond + loss_new - loss_old;
00694                 if(cond <= 0)
00695                     break;
00696                 else
00697                 {
00698                     d_old = d;
00699                     d *= 0.5;
00700                     delta *= 0.5;
00701                 }
00702             }
00703 
00704             w.vector[j] += d;
00705 
00706             // recompute b[] if line search takes too many steps
00707             if(num_linesearch >= max_num_linesearch)
00708             {
00709                 SG_INFO("#");
00710                 for(int i=0; i<l; i++)
00711                     b[i] = 1;
00712 
00713                 for(int i=0; i<n; i++)
00714                 {
00715                     if(w.vector[i]==0)
00716                         continue;
00717 
00718                     iterator=x->get_feature_iterator(i);
00719                     while (x->get_next_feature(ind, val, iterator))
00720                         b[ind] -= w.vector[i]*val*y[ind];
00721                     x->free_feature_iterator(iterator);
00722                 }
00723 
00724                 if (use_bias && w.vector[n])
00725                 {
00726                     for (ind=0; ind<l; ind++)
00727                         b[ind] -= w.vector[n]*y[ind];
00728                 }
00729             }
00730         }
00731 
00732         if(iter == 0)
00733             Gmax_init = Gmax_new;
00734         iter++;
00735 
00736         SG_SABS_PROGRESS(Gmax_new, -CMath::log10(Gmax_new),
00737                 -CMath::log10(Gmax_init), -CMath::log10(eps*Gmax_init), 6);
00738 
00739         if(Gmax_new <= eps*Gmax_init)
00740         {
00741             if(active_size == w_size)
00742                 break;
00743             else
00744             {
00745                 active_size = w_size;
00746                 Gmax_old = CMath::INFTY;
00747                 continue;
00748             }
00749         }
00750 
00751         Gmax_old = Gmax_new;
00752     }
00753 
00754     SG_DONE();
00755     SG_INFO("optimization finished, #iter = %d\n", iter);
00756     if(iter >= max_iterations)
00757         SG_WARNING("\nWARNING: reaching max number of iterations\n");
00758 
00759     // calculate objective value
00760 
00761     double v = 0;
00762     int nnz = 0;
00763     for(j=0; j<w_size; j++)
00764     {
00765         if(w.vector[j] != 0)
00766         {
00767             v += fabs(w.vector[j]);
00768             nnz++;
00769         }
00770     }
00771     for(j=0; j<l; j++)
00772         if(b[j] > 0)
00773             v += C[GETI(j)]*b[j]*b[j];
00774 
00775     SG_INFO("Objective value = %lf\n", v);
00776     SG_INFO("#nonzeros/#features = %d/%d\n", nnz, w_size);
00777 
00778     SG_FREE(index);
00779     SG_FREE(y);
00780     SG_FREE(b);
00781     SG_FREE(xj_sq);
00782 }
00783 
00784 // A coordinate descent algorithm for
00785 // L1-regularized logistic regression problems
00786 //
00787 //  min_w \sum |wj| + C \sum log(1+exp(-yi w^T xi)),
00788 //
00789 // Given:
00790 // x, y, Cp, Cn
00791 // eps is the stopping tolerance
00792 //
00793 // solution will be put in w
00794 
00795 #undef GETI
00796 #define GETI(i) (y[i]+1)
00797 // To support weights for instances, use GETI(i) (i)
00798 
00799 void CLibLinear::solve_l1r_lr(
00800     const problem *prob_col, double eps,
00801     double Cp, double Cn)
00802 {
00803     int l = prob_col->l;
00804     int w_size = prob_col->n;
00805     int j, s, iter = 0;
00806     int active_size = w_size;
00807     int max_num_linesearch = 20;
00808 
00809     double x_min = 0;
00810     double sigma = 0.01;
00811     double d, G, H;
00812     double Gmax_old = CMath::INFTY;
00813     double Gmax_new;
00814     double Gmax_init=0;
00815     double sum1, appxcond1;
00816     double sum2, appxcond2;
00817     double cond;
00818 
00819     int *index = SG_MALLOC(int, w_size);
00820     int32_t *y = SG_MALLOC(int32_t, l);
00821     double *exp_wTx = SG_MALLOC(double, l);
00822     double *exp_wTx_new = SG_MALLOC(double, l);
00823     double *xj_max = SG_MALLOC(double, w_size);
00824     double *C_sum = SG_MALLOC(double, w_size);
00825     double *xjneg_sum = SG_MALLOC(double, w_size);
00826     double *xjpos_sum = SG_MALLOC(double, w_size);
00827 
00828     CDotFeatures* x = prob_col->x;
00829     void* iterator;
00830     int ind;
00831     double val;
00832 
00833     double C[3] = {Cn,0,Cp};
00834 
00835     int n = prob_col->n;
00836     if (prob_col->use_bias)
00837         n--;
00838 
00839     for(j=0; j<l; j++)
00840     {
00841         exp_wTx[j] = 1;
00842         if(prob_col->y[j] > 0)
00843             y[j] = 1;
00844         else
00845             y[j] = -1;
00846     }
00847     for(j=0; j<w_size; j++)
00848     {
00849         w.vector[j] = 0;
00850         index[j] = j;
00851         xj_max[j] = 0;
00852         C_sum[j] = 0;
00853         xjneg_sum[j] = 0;
00854         xjpos_sum[j] = 0;
00855 
00856         if (use_bias && j==n)
00857         {
00858             for (ind=0; ind<l; ind++)
00859             {
00860                 x_min = CMath::min(x_min, 1.0);
00861                 xj_max[j] = CMath::max(xj_max[j], 1.0);
00862                 C_sum[j] += C[GETI(ind)];
00863                 if(y[ind] == -1)
00864                     xjneg_sum[j] += C[GETI(ind)];
00865                 else
00866                     xjpos_sum[j] += C[GETI(ind)];
00867             }
00868         }
00869         else
00870         {
00871             iterator=x->get_feature_iterator(j);
00872             while (x->get_next_feature(ind, val, iterator))
00873             {
00874                 x_min = CMath::min(x_min, val);
00875                 xj_max[j] = CMath::max(xj_max[j], val);
00876                 C_sum[j] += C[GETI(ind)];
00877                 if(y[ind] == -1)
00878                     xjneg_sum[j] += C[GETI(ind)]*val;
00879                 else
00880                     xjpos_sum[j] += C[GETI(ind)]*val;
00881             }
00882             x->free_feature_iterator(iterator);
00883         }
00884     }
00885 
00886     CTime start_time;
00887     while (iter < max_iterations && !CSignal::cancel_computations())
00888     {
00889         if (m_max_train_time > 0 && start_time.cur_time_diff() > m_max_train_time)
00890           break;
00891 
00892         Gmax_new = 0;
00893 
00894         for(j=0; j<active_size; j++)
00895         {
00896             int i = j+rand()%(active_size-j);
00897             CMath::swap(index[i], index[j]);
00898         }
00899 
00900         for(s=0; s<active_size; s++)
00901         {
00902             j = index[s];
00903             sum1 = 0;
00904             sum2 = 0;
00905             H = 0;
00906 
00907             if (use_bias && j==n)
00908             {
00909                 for (ind=0; ind<l; ind++)
00910                 {
00911                     double exp_wTxind = exp_wTx[ind];
00912                     double tmp1 = 1.0/(1+exp_wTxind);
00913                     double tmp2 = C[GETI(ind)]*tmp1;
00914                     double tmp3 = tmp2*exp_wTxind;
00915                     sum2 += tmp2;
00916                     sum1 += tmp3;
00917                     H += tmp1*tmp3;
00918                 }
00919             }
00920             else
00921             {
00922                 iterator=x->get_feature_iterator(j);
00923                 while (x->get_next_feature(ind, val, iterator))
00924                 {
00925                     double exp_wTxind = exp_wTx[ind];
00926                     double tmp1 = val/(1+exp_wTxind);
00927                     double tmp2 = C[GETI(ind)]*tmp1;
00928                     double tmp3 = tmp2*exp_wTxind;
00929                     sum2 += tmp2;
00930                     sum1 += tmp3;
00931                     H += tmp1*tmp3;
00932                 }
00933                 x->free_feature_iterator(iterator);
00934             }
00935 
00936             G = -sum2 + xjneg_sum[j];
00937 
00938             double Gp = G+1;
00939             double Gn = G-1;
00940             double violation = 0;
00941             if(w.vector[j] == 0)
00942             {
00943                 if(Gp < 0)
00944                     violation = -Gp;
00945                 else if(Gn > 0)
00946                     violation = Gn;
00947                 else if(Gp>Gmax_old/l && Gn<-Gmax_old/l)
00948                 {
00949                     active_size--;
00950                     CMath::swap(index[s], index[active_size]);
00951                     s--;
00952                     continue;
00953                 }
00954             }
00955             else if(w.vector[j] > 0)
00956                 violation = fabs(Gp);
00957             else
00958                 violation = fabs(Gn);
00959 
00960             Gmax_new = CMath::max(Gmax_new, violation);
00961 
00962             // obtain Newton direction d
00963             if(Gp <= H*w.vector[j])
00964                 d = -Gp/H;
00965             else if(Gn >= H*w.vector[j])
00966                 d = -Gn/H;
00967             else
00968                 d = -w.vector[j];
00969 
00970             if(fabs(d) < 1.0e-12)
00971                 continue;
00972 
00973             d = CMath::min(CMath::max(d,-10.0),10.0);
00974 
00975             double delta = fabs(w.vector[j]+d)-fabs(w.vector[j]) + G*d;
00976             int num_linesearch;
00977             for(num_linesearch=0; num_linesearch < max_num_linesearch; num_linesearch++)
00978             {
00979                 cond = fabs(w.vector[j]+d)-fabs(w.vector[j]) - sigma*delta;
00980 
00981                 if(x_min >= 0)
00982                 {
00983                     double tmp = exp(d*xj_max[j]);
00984                     appxcond1 = log(1+sum1*(tmp-1)/xj_max[j]/C_sum[j])*C_sum[j] + cond - d*xjpos_sum[j];
00985                     appxcond2 = log(1+sum2*(1/tmp-1)/xj_max[j]/C_sum[j])*C_sum[j] + cond + d*xjneg_sum[j];
00986                     if(CMath::min(appxcond1,appxcond2) <= 0)
00987                     {
00988                         if (use_bias && j==n)
00989                         {
00990                             for (ind=0; ind<l; ind++)
00991                                 exp_wTx[ind] *= exp(d);
00992                         }
00993 
00994                         else
00995                         {
00996                             iterator=x->get_feature_iterator(j);
00997                             while (x->get_next_feature(ind, val, iterator))
00998                                 exp_wTx[ind] *= exp(d*val);
00999                             x->free_feature_iterator(iterator);
01000                         }
01001                         break;
01002                     }
01003                 }
01004 
01005                 cond += d*xjneg_sum[j];
01006 
01007                 int i = 0;
01008 
01009                 if (use_bias && j==n)
01010                 {
01011                     for (ind=0; ind<l; ind++)
01012                     {
01013                         double exp_dx = exp(d);
01014                         exp_wTx_new[i] = exp_wTx[ind]*exp_dx;
01015                         cond += C[GETI(ind)]*log((1+exp_wTx_new[i])/(exp_dx+exp_wTx_new[i]));
01016                         i++;
01017                     }
01018                 }
01019                 else
01020                 {
01021 
01022                     iterator=x->get_feature_iterator(j);
01023                     while (x->get_next_feature(ind, val, iterator))
01024                     {
01025                         double exp_dx = exp(d*val);
01026                         exp_wTx_new[i] = exp_wTx[ind]*exp_dx;
01027                         cond += C[GETI(ind)]*log((1+exp_wTx_new[i])/(exp_dx+exp_wTx_new[i]));
01028                         i++;
01029                     }
01030                     x->free_feature_iterator(iterator);
01031                 }
01032 
01033                 if(cond <= 0)
01034                 {
01035                     i = 0;
01036                     if (use_bias && j==n)
01037                     {
01038                         for (ind=0; ind<l; ind++)
01039                         {
01040                             exp_wTx[ind] = exp_wTx_new[i];
01041                             i++;
01042                         }
01043                     }
01044                     else
01045                     {
01046                         iterator=x->get_feature_iterator(j);
01047                         while (x->get_next_feature(ind, val, iterator))
01048                         {
01049                             exp_wTx[ind] = exp_wTx_new[i];
01050                             i++;
01051                         }
01052                         x->free_feature_iterator(iterator);
01053                     }
01054                     break;
01055                 }
01056                 else
01057                 {
01058                     d *= 0.5;
01059                     delta *= 0.5;
01060                 }
01061             }
01062 
01063             w.vector[j] += d;
01064 
01065             // recompute exp_wTx[] if line search takes too many steps
01066             if(num_linesearch >= max_num_linesearch)
01067             {
01068                 SG_INFO("#");
01069                 for(int i=0; i<l; i++)
01070                     exp_wTx[i] = 0;
01071 
01072                 for(int i=0; i<w_size; i++)
01073                 {
01074                     if(w.vector[i]==0) continue;
01075 
01076                     if (use_bias && i==n)
01077                     {
01078                         for (ind=0; ind<l; ind++)
01079                             exp_wTx[ind] += w.vector[i];
01080                     }
01081                     else
01082                     {
01083                         iterator=x->get_feature_iterator(i);
01084                         while (x->get_next_feature(ind, val, iterator))
01085                             exp_wTx[ind] += w.vector[i]*val;
01086                         x->free_feature_iterator(iterator);
01087                     }
01088                 }
01089 
01090                 for(int i=0; i<l; i++)
01091                     exp_wTx[i] = exp(exp_wTx[i]);
01092             }
01093         }
01094 
01095         if(iter == 0)
01096             Gmax_init = Gmax_new;
01097         iter++;
01098         SG_SABS_PROGRESS(Gmax_new, -CMath::log10(Gmax_new), -CMath::log10(Gmax_init), -CMath::log10(eps*Gmax_init), 6);
01099 
01100         if(Gmax_new <= eps*Gmax_init)
01101         {
01102             if(active_size == w_size)
01103                 break;
01104             else
01105             {
01106                 active_size = w_size;
01107                 Gmax_old = CMath::INFTY;
01108                 continue;
01109             }
01110         }
01111 
01112         Gmax_old = Gmax_new;
01113     }
01114 
01115     SG_DONE();
01116     SG_INFO("optimization finished, #iter = %d\n", iter);
01117     if(iter >= max_iterations)
01118         SG_WARNING("\nWARNING: reaching max number of iterations\n");
01119 
01120     // calculate objective value
01121 
01122     double v = 0;
01123     int nnz = 0;
01124     for(j=0; j<w_size; j++)
01125         if(w.vector[j] != 0)
01126         {
01127             v += fabs(w.vector[j]);
01128             nnz++;
01129         }
01130     for(j=0; j<l; j++)
01131         if(y[j] == 1)
01132             v += C[GETI(j)]*log(1+1/exp_wTx[j]);
01133         else
01134             v += C[GETI(j)]*log(1+exp_wTx[j]);
01135 
01136     SG_INFO("Objective value = %lf\n", v);
01137     SG_INFO("#nonzeros/#features = %d/%d\n", nnz, w_size);
01138 
01139     SG_FREE(index);
01140     SG_FREE(y);
01141     SG_FREE(exp_wTx);
01142     SG_FREE(exp_wTx_new);
01143     SG_FREE(xj_max);
01144     SG_FREE(C_sum);
01145     SG_FREE(xjneg_sum);
01146     SG_FREE(xjpos_sum);
01147 }
01148 
01149 // A coordinate descent algorithm for 
01150 // the dual of L2-regularized logistic regression problems
01151 //
01152 //  min_\alpha  0.5(\alpha^T Q \alpha) + \sum \alpha_i log (\alpha_i) + (upper_bound_i - \alpha_i) log (upper_bound_i - \alpha_i),
01153 //    s.t.      0 <= \alpha_i <= upper_bound_i,
01154 // 
01155 //  where Qij = yi yj xi^T xj and 
01156 //  upper_bound_i = Cp if y_i = 1
01157 //  upper_bound_i = Cn if y_i = -1
01158 //
01159 // Given: 
01160 // x, y, Cp, Cn
01161 // eps is the stopping tolerance
01162 //
01163 // solution will be put in w
01164 //
01165 // See Algorithm 5 of Yu et al., MLJ 2010
01166 
01167 #undef GETI
01168 #define GETI(i) (y[i]+1)
01169 // To support weights for instances, use GETI(i) (i)
01170 
01171 void CLibLinear::solve_l2r_lr_dual(const problem *prob, double eps, double Cp, double Cn)
01172 {
01173     int l = prob->l;
01174     int w_size = prob->n;
01175     int i, s, iter = 0;
01176     double *xTx = new double[l];
01177     int max_iter = 1000;
01178     int *index = new int[l];        
01179     double *alpha = new double[2*l]; // store alpha and C - alpha
01180     int32_t *y = SG_MALLOC(int32_t, l);
01181     int max_inner_iter = 100; // for inner Newton
01182     double innereps = 1e-2; 
01183     double innereps_min = CMath::min(1e-8, eps);
01184     double upper_bound[3] = {Cn, 0, Cp};
01185     double Gmax_init = 0;
01186 
01187     for(i=0; i<l; i++)
01188     {
01189         if(prob->y[i] > 0)
01190         {
01191             y[i] = +1; 
01192         }
01193         else
01194         {
01195             y[i] = -1;
01196         }
01197     }
01198         
01199     // Initial alpha can be set here. Note that
01200     // 0 < alpha[i] < upper_bound[GETI(i)]
01201     // alpha[2*i] + alpha[2*i+1] = upper_bound[GETI(i)]
01202     for(i=0; i<l; i++)
01203     {
01204         alpha[2*i] = CMath::min(0.001*upper_bound[GETI(i)], 1e-8);
01205         alpha[2*i+1] = upper_bound[GETI(i)] - alpha[2*i];
01206     }
01207 
01208     for(i=0; i<w_size; i++)
01209         w[i] = 0;
01210     for(i=0; i<l; i++)
01211     {
01212         xTx[i] = prob->x->dot(i, prob->x,i);
01213         prob->x->add_to_dense_vec(y[i]*alpha[2*i], i, w.vector, w_size);
01214 
01215         if (prob->use_bias)
01216         {
01217             w.vector[w_size]+=y[i]*alpha[2*i];
01218             xTx[i]+=1;
01219         }
01220         index[i] = i;
01221     }
01222 
01223     while (iter < max_iter)
01224     {
01225         for (i=0; i<l; i++)
01226         {
01227             int j = i+rand()%(l-i);
01228             CMath::swap(index[i], index[j]);
01229         }
01230         int newton_iter = 0;
01231         double Gmax = 0;
01232         for (s=0; s<l; s++)
01233         {
01234             i = index[s];
01235             int32_t yi = y[i];
01236             double C = upper_bound[GETI(i)];
01237             double ywTx = 0, xisq = xTx[i];
01238 
01239             ywTx = prob->x->dense_dot(i, w.vector, w_size);
01240             if (prob->use_bias)
01241                 ywTx+=w.vector[w_size];
01242 
01243             ywTx *= y[i];
01244             double a = xisq, b = ywTx;
01245 
01246             // Decide to minimize g_1(z) or g_2(z)
01247             int ind1 = 2*i, ind2 = 2*i+1, sign = 1;
01248             if(0.5*a*(alpha[ind2]-alpha[ind1])+b < 0) 
01249             {
01250                 ind1 = 2*i+1;
01251                 ind2 = 2*i;
01252                 sign = -1;
01253             }
01254 
01255             //  g_t(z) = z*log(z) + (C-z)*log(C-z) + 0.5a(z-alpha_old)^2 + sign*b(z-alpha_old)
01256             double alpha_old = alpha[ind1];
01257             double z = alpha_old;
01258             if(C - z < 0.5 * C) 
01259                 z = 0.1*z;
01260             double gp = a*(z-alpha_old)+sign*b+CMath::log(z/(C-z));
01261             Gmax = CMath::max(Gmax, CMath::abs(gp));
01262 
01263             // Newton method on the sub-problem
01264             const double eta = 0.1; // xi in the paper
01265             int inner_iter = 0;
01266             while (inner_iter <= max_inner_iter) 
01267             {
01268                 if(fabs(gp) < innereps)
01269                     break;
01270                 double gpp = a + C/(C-z)/z;
01271                 double tmpz = z - gp/gpp;
01272                 if(tmpz <= 0) 
01273                     z *= eta;
01274                 else // tmpz in (0, C)
01275                     z = tmpz;
01276                 gp = a*(z-alpha_old)+sign*b+log(z/(C-z));
01277                 newton_iter++;
01278                 inner_iter++;
01279             }
01280 
01281             if(inner_iter > 0) // update w
01282             {
01283                 alpha[ind1] = z;
01284                 alpha[ind2] = C-z;
01285 
01286                 prob->x->add_to_dense_vec(sign*(z-alpha_old)*yi, i, w.vector, w_size);
01287 
01288                 if (prob->use_bias)
01289                     w.vector[w_size]+=sign*(z-alpha_old)*yi;
01290             }
01291         }
01292 
01293         iter++;
01294         if(iter == 0)
01295             Gmax_init = Gmax;
01296 
01297         SG_SABS_PROGRESS(Gmax, -CMath::log10(Gmax), -CMath::log10(Gmax_init), -CMath::log10(eps*Gmax_init), 6);
01298 
01299         if(Gmax < eps) 
01300             break;
01301 
01302         if(newton_iter <= l/10) 
01303             innereps = CMath::max(innereps_min, 0.1*innereps);
01304 
01305     }
01306 
01307     SG_DONE();
01308     SG_INFO("optimization finished, #iter = %d\n",iter);
01309     if (iter >= max_iter)
01310         SG_WARNING("reaching max number of iterations\nUsing -s 0 may be faster (also see FAQ)\n\n");
01311 
01312     // calculate objective value
01313     
01314     double v = 0;
01315     for(i=0; i<w_size; i++)
01316         v += w[i] * w[i];
01317     v *= 0.5;
01318     for(i=0; i<l; i++)
01319         v += alpha[2*i] * log(alpha[2*i]) + alpha[2*i+1] * log(alpha[2*i+1]) 
01320             - upper_bound[GETI(i)] * log(upper_bound[GETI(i)]);
01321     SG_INFO("Objective value = %lf\n", v);
01322 
01323     delete [] xTx;
01324     delete [] alpha;
01325     delete [] y;
01326     delete [] index;
01327 }
01328 
01329 
01330 void CLibLinear::set_linear_term(const SGVector<float64_t> linear_term)
01331 {
01332     if (!m_labels)
01333         SG_ERROR("Please assign labels first!\n");
01334 
01335     int32_t num_labels=m_labels->get_num_labels();
01336 
01337     if (num_labels!=linear_term.vlen)
01338     {
01339         SG_ERROR("Number of labels (%d) does not match number"
01340                 " of entries (%d) in linear term \n", num_labels,
01341                 linear_term.vlen);
01342     }
01343 
01344     m_linear_term=linear_term;
01345 }
01346 
01347 SGVector<float64_t> CLibLinear::get_linear_term()
01348 {
01349     if (!m_linear_term.vlen || !m_linear_term.vector)
01350         SG_ERROR("Please assign linear term first!\n");
01351 
01352     return m_linear_term;
01353 }
01354 
01355 void CLibLinear::init_linear_term()
01356 {
01357     if (!m_labels)
01358         SG_ERROR("Please assign labels first!\n");
01359 
01360     m_linear_term=SGVector<float64_t>(m_labels->get_num_labels());
01361     SGVector<float64_t>::fill_vector(m_linear_term.vector, m_linear_term.vlen, -1.0);
01362 }
01363 
01364 #endif //HAVE_LAPACK
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Defines

SHOGUN Machine Learning Toolbox - Documentation