SHOGUN
v2.0.0
|
00001 /* 00002 * This program is free software; you can redistribute it and/or modify 00003 * it under the terms of the GNU General Public License as published by 00004 * the Free Software Foundation; either version 3 of the License, or 00005 * (at your option) any later version. 00006 * 00007 * Written (W) 2012 Chiyuan Zhang 00008 * Copyright (C) 2012 Chiyuan Zhang 00009 */ 00010 00011 #include <algorithm> 00012 #include <limits> 00013 00014 #include <shogun/multiclass/ecoc/ECOCRandomDenseEncoder.h> 00015 #include <shogun/multiclass/ecoc/ECOCUtil.h> 00016 00017 using namespace shogun; 00018 00019 CECOCRandomDenseEncoder::CECOCRandomDenseEncoder(int32_t maxiter, int32_t codelen, float64_t pposone) 00020 :m_maxiter(maxiter), m_codelen(codelen), m_pposone(pposone) 00021 { 00022 if (!check_probability(pposone)) 00023 SG_ERROR("invalid probability of +1"); 00024 00025 init(); 00026 } 00027 00028 void CECOCRandomDenseEncoder::init() 00029 { 00030 SG_ADD(&m_maxiter, "maxiter", "max number of iterations", MS_NOT_AVAILABLE); 00031 SG_ADD(&m_codelen, "codelen", "code length", MS_NOT_AVAILABLE); 00032 SG_ADD(&m_pposone, "pposone", "probability of +1", MS_NOT_AVAILABLE); 00033 } 00034 00035 void CECOCRandomDenseEncoder::set_probability(float64_t pposone) 00036 { 00037 if (!check_probability(pposone)) 00038 SG_ERROR("probability of 0, +1 and -1 must sum to one"); 00039 00040 m_pposone = pposone; 00041 } 00042 00043 SGMatrix<int32_t> CECOCRandomDenseEncoder::create_codebook(int32_t num_classes) 00044 { 00045 int32_t codelen = m_codelen; 00046 if (codelen <= 0) 00047 codelen = get_default_code_length(num_classes); 00048 00049 00050 SGMatrix<int32_t> best_codebook(codelen, num_classes, true); 00051 int32_t best_dist = 0; 00052 00053 SGMatrix<int32_t> codebook(codelen, num_classes); 00054 int32_t n_iter = 0; 00055 while (true) 00056 { 00057 // fill codebook 00058 codebook.zero(); 00059 for (int32_t i=0; i < codelen; ++i) 00060 { 00061 for (int32_t j=0; j < num_classes; ++j) 00062 { 00063 float64_t randval = CMath::random(0.0, 1.0); 00064 if (randval > m_pposone) 00065 codebook(i, j) = -1; 00066 else 00067 codebook(i, j) = +1; 00068 } 00069 } 00070 00071 bool valid = true; 00072 for (int32_t i=0; i < codelen; ++i) 00073 { 00074 bool p1_occur = false, n1_occur = false; 00075 for (int32_t j=0; j < num_classes; ++j) 00076 if (codebook(i, j) == 1) 00077 p1_occur = true; 00078 else if (codebook(i, j) == -1) 00079 n1_occur = true; 00080 00081 if (!p1_occur || !n1_occur) 00082 { 00083 valid = false; 00084 break; 00085 } 00086 } 00087 00088 if (valid) 00089 { 00090 // see if this is a better codebook 00091 // compute the minimum pairwise code distance 00092 int32_t min_dist = std::numeric_limits<int32_t>::max(); 00093 for (int32_t i=0; i < num_classes; ++i) 00094 { 00095 for (int32_t j=i+1; j < num_classes; ++j) 00096 { 00097 int32_t dist = CECOCUtil::hamming_distance(codebook.get_column_vector(i), 00098 codebook.get_column_vector(j), codelen); 00099 if (dist < min_dist) 00100 min_dist = dist; 00101 } 00102 } 00103 00104 if (min_dist > best_dist) 00105 { 00106 best_dist = min_dist; 00107 std::copy(codebook.matrix, codebook.matrix + codelen*num_classes, 00108 best_codebook.matrix); 00109 } 00110 } 00111 00112 if (++n_iter >= m_maxiter) 00113 if (best_dist > 0) // already obtained a good codebook 00114 break; 00115 } 00116 00117 return best_codebook; 00118 }