angel  mercurial changeset:
graph_generator.cpp
Go to the documentation of this file.
00001 // $Id: graph_generator.cpp,v 1.6 2008/02/28 14:57:33 gottschling Exp $
00002 /*
00003 #############################################################
00004 # This file is part of angel released under the BSD license #
00005 # The full COPYRIGHT notice can be found in the top         #
00006 # level directory of the angel distribution                 #
00007 #############################################################
00008 */
00009 
00010 
00011 #include "angel/include/graph_generator.hpp"
00012 
00013 #include <vector>
00014 #include <cstdlib>
00015 #include <numeric>
00016 
00017 #include "angel/include/angel_exceptions.hpp"
00018 #include "angel/include/angel_types.hpp"
00019 #include "angel/include/angel_tools.hpp"
00020 
00021 #ifndef NDEBUG
00022 #include "angel/include/angel_io.hpp"
00023 #endif
00024 
00025 namespace angel {
00026 
00027 using namespace std;
00028 using namespace boost;
00029 
00030 // -------------------------------------------------------------------------
00031 
00032 void random_statement (int inputs, int expr, const vector<double>& p,
00033                        c_graph_t& statement) {
00034 
00035   typedef c_graph_t::vertex_t    vertex_t;
00036 
00037   int maxar= int (p.size()); // maximal arity of expressions
00038 
00039   // assert (inputs + expr - 1 <= expr * maxar); // all inputs usable
00040   THROW_DEBUG_EXCEPT_MACRO (inputs + expr - 1 > expr * maxar, consistency_exception,
00041                          "Un-usable inputs");
00042   
00043 
00044   // how many inputs each expression shall have
00045   // (statement inputs have zero)
00046   int V= inputs + expr; // number of vertices
00047   vector<int> expi (V);
00048   for (int i= inputs; i < V; i++)
00049     expi[i]= angel::random (p) + 1;
00050 
00051   // if expression inputs aren't enough then p will be ignored, 
00052   // i.e. expi increased randomly
00053   for (int m= inputs + expr - 1 - accumulate (expi.begin(), expi.end(), 0);
00054        m > 0; m--)
00055     for (;;) {
00056       int e= angel::random (inputs, V-1);
00057       if (expi[e] < maxar) {expi[e]++; break; }}
00058 
00059   // build graph with last vertex as dependent
00060   vector<vertex_t> deps (1);
00061   deps[0]= V - 1;
00062   c_graph_t gtmp (V, inputs, deps);
00063   
00064   // random successor for each input and intermediate expr.
00065   for (int i= V - 2; i >= 0; i--) {
00066     vertex_t v= vertex (i, gtmp);
00067     for (;;) {
00068       int s= angel::random (max (i+1, inputs), V-1);
00069       vertex_t vs= vertex (s, gtmp);
00070       if (int (in_degree (vs, gtmp)) < expi[s]) {
00071         add_edge (v, vs, gtmp); break;}
00072     }}
00073 
00074   // if some expression desires more inputs 
00075   // then reuse statement inputs
00076   for (int i= inputs; i < V; i++) {
00077     vertex_t v= vertex (i, gtmp);
00078     for (; int (in_degree (v, gtmp)) < expi[i];) {
00079       int pr= angel::random (0, inputs-1);
00080       vertex_t vpr= vertex (pr, gtmp);
00081       // c_graph_t::edge_t e;
00082       // bool found;
00083       // boost::tie (e, found)= edge (vpr, v, gtmp);
00084       // if (!found) add_edge (vpr, v, gtmp);
00085       add_edge (vpr, v, gtmp); // double arcs allowed e.g. x*x
00086     }
00087   }
00088 
00089   statement.swap (gtmp);
00090 }
00091 
00092 // -------------------------------------------------------------------------
00093 
00094 void random_statement_vector (int max_expr, double unary, 
00095                               std::vector<c_graph_t>& statement_vector) {
00096   // assert (unary >= 0.0 && unary <= 1.0);
00097   THROW_DEBUG_EXCEPT_MACRO (unary < 0.0 || unary > 1.0, consistency_exception,
00098                          "unary must be between 0 and 1"); 
00099   std::vector<double> p (2); // probabilities of arities
00100   p[0]= unary; p[1]= 1.0;
00101 
00102   // assert (statement_vector.size() > 0);
00103   THROW_DEBUG_EXCEPT_MACRO (statement_vector.size() == 0, consistency_exception,
00104                          "statement vector must be non-empty"); 
00105   for (size_t i= 0; i < statement_vector.size(); i++) {
00106     int expr= angel::random (1, max_expr); // number of expr.
00107     // #inputs in [1, expr+1], average most probable, good for unary=0.5
00108     int inputs= (angel::random (expr) + angel::random (expr)) / 2 + 1;
00109     random_statement (inputs, expr, p, statement_vector[i]);
00110 #ifndef NDEBUG
00111     write_graph ("random_statement", statement_vector[i]);
00112 #endif
00113   }
00114 }
00115 
00116 // -------------------------------------------------------------------------
00117 
00118 void stats2block (int inputs, int outputs, const std::vector<c_graph_t>& stats, 
00119                   c_graph_t& block) {
00120   typedef c_graph_t::vertex_t          vertex_t;
00121   typedef c_graph_t::oei_t             oei_t;
00122 
00123   int nstats= int (stats.size());
00124   // assert (nstats >= outputs); // not more outputs than statements
00125   THROW_DEBUG_EXCEPT_MACRO (nstats < outputs, consistency_exception,
00126                          "More outputs than statements"); 
00127 
00128 #ifndef NDEBUG
00129   for (int i= 0; i < nstats; i++) { // really statements ?
00130     const c_graph_t& stat= stats[i];
00131     // assert (stat.dependents.size() == 1);
00132     THROW_DEBUG_EXCEPT_MACRO (stat.dependents.size() != 1, consistency_exception,
00133                            "Statements must have 1 output"); 
00134     // assert (stat.dependents[0] == vertex (stat.v()-1, stat)); 
00135     THROW_DEBUG_EXCEPT_MACRO (stat.dependents[0] != vertex (stat.v()-1, stat), consistency_exception,
00136                            "Statements output must be last vertex"); }
00137 #endif
00138 
00139   int V= inputs; // number of vertices (in block)
00140   // difference of vertex number between block and statement
00141   vector<int> soffset (nstats); 
00142   for (int i= 0; i < nstats; i++) {
00143     soffset[i]= V - stats[i].x();
00144     V+= stats[i].v() - stats[i].x(); }
00145 
00146   // build graph without dependents
00147   vector<vertex_t> deps;
00148   c_graph_t gtmp (V, inputs, deps);
00149 
00150   // copy statements into block (without edges from statement inputs)
00151   int en= 0;
00152   for (int i= 0; i < nstats; i++) {
00153     const c_graph_t& stat= stats[i];
00154     int so= soffset[i];
00155     for (int iv= stat.x(); iv < stat.v(); iv++)
00156       take_over_successors (vertex (iv, stat), vertex (iv+so, gtmp), 
00157                             so, stat, en, gtmp); }
00158 
00159   // each output can be set to some statement output
00160   // since vertex 0 is always input it can be used to mark output as unset
00161   deps.resize (outputs); 
00162 
00163   // list of potential successors (each used once)
00164   // initialized with block outputs
00165   typedef pair<int,int> succ_t;
00166   vector<succ_t> sl;
00167   for (int i= 0; i < outputs; i++)
00168     sl.push_back (make_pair (-1, i));
00169 
00170   // find successor for each statement output 
00171   for (int i= nstats-1; i >= 0; i--) {
00172     vertex_t vb= vertex (soffset[i] + stats[i].v() - 1, gtmp); // stat. output
00173     int si= angel::random (int (sl.size()));
00174     succ_t succ= sl[si];
00175     int succs= succ.first, succi= succ.second;
00176     if (succs == -1)           // block output
00177       deps[succi]= vb;
00178     else {                     // statement input
00179       const c_graph_t& sstat= stats[succs];     // succ. statement
00180       vertex_t ivs= vertex (succi, sstat);       // input vertex in sstat
00181       // connect vb with successors of ivs (their equivalents in block)
00182       take_over_successors (ivs, vb, soffset[succs], sstat, en, gtmp);
00183     }
00184     // remove succ from list, maybe more efficient with index
00185     remove (sl.begin(), sl.end(), succ); sl.resize (sl.size()-1);
00186     // add current statement's input to successor list
00187     for (int j= 0; j < stats[i].x(); j++) 
00188       sl.push_back (make_pair (i, j));
00189   }
00190 
00191   // remaining outputs are assigned to some statement (not yet used as output)
00192   while (sl.size() > 0 && sl[0].first == -1) {
00193     int output= sl[0].second;
00194     // remove this output from successor list
00195     remove (sl.begin(), sl.end(), sl[0]); sl.resize (sl.size()-1);
00196     vertex_t vb;
00197     do {                                // find non-output statement
00198       int ss= angel::random (nstats);
00199       vb= vertex (soffset[ss] + stats[ss].v() - 1, gtmp); // stat. output
00200     } while (find (deps.begin(), deps.end(), vb) != deps.end());
00201     deps[output]= vb;
00202   }
00203 
00204   // each block input is unified with some free statement input
00205   for (int i= 0; i < inputs; i++) {
00206     // assert (sl.size() > 0); // enough free statement inputs ?
00207     THROW_EXCEPT_MACRO (sl.size() == 0, consistency_exception, "Not enough inputs"); 
00208     vertex_t vi= vertex (i, gtmp);
00209     succ_t succ= sl[angel::random (int (sl.size()))];
00210     int succs= succ.first, succi= succ.second;
00211     const c_graph_t& sstat= stats[succs];     // statement
00212     vertex_t ivs= vertex (succi, sstat);       // substituted vertex
00213     // connect block input with successors of ivs (their equivalents in block)
00214     take_over_successors (ivs, vi, soffset[succs], sstat, en, gtmp);
00215     // remove succ from list, maybe more efficient with index
00216     remove (sl.begin(), sl.end(), succ); sl.resize (sl.size()-1);
00217   }
00218 
00219   // remaining free statement inputs are unified 
00220   // with some block input or statement output (smaller statement)
00221   // potential predecessors stored in pl
00222   vector<vertex_t> pl;
00223   for (int i= 0; i < inputs; i++) {
00224     vertex_t v= vertex (i, gtmp); pl.push_back (v); }
00225   for (int i= 0; i < nstats; i++) {
00226     vertex_t v= vertex (soffset[i] + stats[i].v() - 1, gtmp); // stat. output
00227     pl.push_back (v); }
00228   while (sl.size() > 0) {
00229     succ_t succ= sl[sl.size()-1];              // last one easier to delete 
00230     int succs= succ.first, succi= succ.second;
00231     const c_graph_t& sstat= stats[succs];     // statement
00232     vertex_t ivs= vertex (succi, sstat);       // substituted vertex
00233     vertex_t vp= pl[angel::random (inputs+succs)];
00234     take_over_successors (ivs, vp, soffset[succs], sstat, en, gtmp);
00235     sl.resize (sl.size()-1);
00236   }
00237 
00238   gtmp.dependents.swap (deps);
00239 
00240   block.swap (gtmp);
00241 }
00242 
00243 // -------------------------------------------------------------------------
00244 
00245 void random_block (int inputs, int outputs, int stats, int max_exp, double unary,
00246                    c_graph_t& block) {
00247 
00248   std::vector<c_graph_t> statement_vector (stats);
00249   random_statement_vector (max_exp, unary, statement_vector);
00250   stats2block (inputs, outputs, statement_vector, block);
00251 
00252 }
00253 
00254 // -------------------------------------------------------------------------
00255 
00256 // will be confused if some vertex in block is both dependent and independent
00257 void block2loop (const c_graph_t& block, int loops,
00258                  c_graph_t& loop) {
00259 
00260 
00261   typedef c_graph_t::vertex_t vertex_t;
00262   typedef c_graph_t::oei_t    oei_t;
00263 
00264   // some vertex sizes
00265   int binputs= block.x(), boutputs= block.y(), bv= block.v(),
00266       linputs= binputs <= boutputs ? binputs : binputs + (loops-1)*(binputs-boutputs),
00267       // loutputs= binputs >= boutputs ? boutputs : boutputs + (loops-1)*(boutputs-binputs),
00268       isize= min (binputs, boutputs), // size of interface between blocks
00269       lv= loops * block.v() - (loops-1) * isize;
00270   vector<vertex_t> dependents;
00271   c_graph_t gtmp (lv, linputs, dependents);
00272 
00273   // input vertices of loop, initialized with input of block
00274   vector<vertex_t> input (binputs);
00275   for (int c= 0; c < binputs; c++) input[c]= c;
00276 
00277   // first block is simply copied
00278   int en= 0; // counter for edge_number
00279   c_graph_t::ei_t      ei, e_end;
00280   for (tie (ei, e_end)= edges (block); ei != e_end; ++ei)
00281     add_edge (source (*ei, block), target (*ei, block), en++, gtmp);
00282 
00283   for (int l= 1; l < loops; l++) {
00284     // vertex_t goffset= l * (lv - min (binputs, boutputs)); // first vertex in new block
00285     vertex_t goffset= l * (bv - isize), // corresponds to first vertex of new block in loop
00286              lgoffset= goffset - bv + isize;  // first vertex of previous block
00287 
00288     // unify output of last block with input of current block (first isize vertices resp.)
00289     for (int i= 0; i < isize; i++) {
00290       vertex_t bv= vertex (i, block); // vertex in block
00291       // vertex_t lv= vertex (lgoffset + block.dependents[i], gtmp);
00292       vertex_t lv= lgoffset + block.dependents[i]; // corresponds to vertex in loop
00293       oei_t      oei, oe_end;
00294       for (tie (oei, oe_end)= out_edges (bv, block); oei != oe_end; ++oei) {
00295         add_edge (lv, goffset + target (*oei, block), en++, gtmp); }
00296     }
00297 
00298     // look for loop inputs and outputs
00299     for (int i= isize; i < binputs; i++)
00300       input.push_back (goffset + vertex_t (i));
00301     for (int i= isize; i < boutputs; i++)
00302       dependents.push_back (lgoffset + block.dependents[i]);
00303 
00304     // copy the remainder of the block
00305     for (int i= isize; i < bv; i++) {
00306       vertex_t bv= vertex (i, block), // vertex in block
00307                lv= goffset + vertex_t (i);
00308       oei_t    oei, oe_end;
00309       for (tie (oei, oe_end)= out_edges (bv, block); oei != oe_end; ++oei) 
00310         add_edge (lv, vertex_t (goffset + target (*oei, block)), en++, gtmp);     
00311     }
00312   } 
00313   
00314   // output of last block is always output of loop
00315   vertex_t goffset= (loops - 1) * (bv - isize);
00316   for (int i= 0; i < boutputs; i++)
00317     dependents.push_back (goffset + block.dependents[i]);
00318 
00319   gtmp.X= int (input.size()); // will be used in independent_vertices_to_front
00320   gtmp.dependents.swap (dependents); 
00321 
00322   independent_vertices_to_front (gtmp, input, loop);
00323 
00324   // remove all vertices that do not lie on a path from independent 
00325   // to dependent nodes
00326   loop.clear_graph (); 
00327   loop.next_edge_number= renumber_edges (loop);
00328 }
00329 
00330 // Initializes the random generator unless SAME_RANDOM_VALUES is defined
00331 random_init_t random_init_object;
00332 
00333 } // namespace angel
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Defines