Main Page   Class Hierarchy   Compound List   File List   Compound Members   File Members  

sampler.h

Go to the documentation of this file.
00001 /** @file sampler.cc */
00002 /* 
00003  * Copyright (C) 2002 Laird Breyer
00004  *  
00005  * This program is free software; you can redistribute it and/or modify
00006  * it under the terms of the GNU General Public License as published by
00007  * the Free Software Foundation; either version 2 of the License, or
00008  * (at your option) any later version.
00009  * 
00010  * This program is distributed in the hope that it will be useful,
00011  * but WITHOUT ANY WARRANTY; without even the implied warranty of
00012  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
00013  * GNU General Public License for more details.
00014  * 
00015  * You should have received a copy of the GNU General Public License
00016  * along with this program; if not, write to the Free Software
00017  * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
00018  * 
00019  * Author:   Laird Breyer <laird@lbreyer.com>
00020  */
00021 
00022 #ifndef _SAMPLER_H_
00023 #define _SAMPLER_H_
00024 //#include "document.h"
00025 #include "webnode.h"
00026 #include "linkgraph.h"
00027 #include <gsl/gsl_rng.h>
00028 
00029 
00030 /// Abstract base class for all samplers. 
00031 /**
00032  * A sampler
00033  * populates the web link graph with occupation counts
00034  * which are interpreted as page ranking schemes. 
00035  * Supported samplers must have transition probabilities
00036  * of the form P(x,y) = eps * mu(y) + (1-eps) * Q(x,y),
00037  * where the derived class overrides the Q(x,y) definition
00038  * which is contained in the QEvolveFrom() virtual function.
00039  */
00040 class WebSampler {
00041  public:
00042 
00043   WebSampler(WebLinkGraph* g) throw (exception);
00044 
00045 
00046   void SimulateForward(long n);
00047   void ClearAllocForward();
00048   void SimulateAllocForward();
00049   void IncrementAllocForward(uint32 k, int32 num);
00050 
00051   void TaggedSimulateForward(long n, const uint32 *fromsetsize);
00052   /// performs a random transition, should be overridden by derived classes
00053   virtual WebNodePtr QEvolveFrom(WebNodePtr x) 
00054     { return x; } 
00055   /// returns an identifying string in supplied buffer
00056   virtual char *Name(char *buf) = 0;
00057   /// Clears occupation counts and run sizes
00058   void ClearCounts();
00059   void ClearScratch();
00060   void SetRandomSeed(unsigned long int s) { gsl_rng_set(r,s); }
00061   void PrintCounts(ostream& o);
00062   void PrintTagCounts(ostream& o);
00063   /// Number of samples produced by SimulateForward()
00064   uint32 LastRunSize() 
00065     { return last_run_size; }
00066   /// Number of candidates accepted by TaggedSimulateForward()
00067   uint32 LastTaggedRunSize() 
00068     { return last_tagged_run_size; }
00069 
00070  protected:
00071   WebLinkGraph* graph;
00072   uint32 number_of_nodes;
00073 
00074   gsl_rng* r; // for random numbers
00075   double eps;
00076 
00077   int32 *allocation_list; // for SimulateForward()
00078   LeafNodePtr xleaf;
00079 
00080   double probabilities[TAG_NUMBER_OF_BITS]; // for TaggedSimulateForward()
00081   long allocated[TAG_NUMBER_OF_BITS];
00082   uint32 fromsetsize_remaining[TAG_NUMBER_OF_BITS];
00083 
00084   uint32 last_run_size;
00085   uint32 last_tagged_run_size;
00086 };
00087 
00088 
00089 /// A WebSampler which implements the standard PageRank 
00090 /// chain.
00091 class PageRankSampler: public WebSampler {
00092  public:
00093   PageRankSampler(WebLinkGraph* agraph);
00094   void SetParameters(double epsilon);
00095   WebNodePtr QEvolveFrom(WebNodePtr x);
00096   virtual char *Name(char *buf);
00097 
00098 };
00099 
00100 /// A scratch structure used by DateBiasedPageRankSampler
00101 typedef union DBScratchStruct {
00102 
00103   DBScratchStruct(ScratchStruct s) 
00104   { ss = s; }
00105   ~DBScratchStruct() {}
00106 
00107   struct {
00108     uint16 filled;
00109     int16 minvalue;
00110   } db;
00111 
00112   float mass;
00113     
00114   ScratchStruct ss;
00115 };
00116 
00117 /// A WebSampler which implements page ranking by taking 
00118 /// into account the relative age of linked-to documents.
00119 class DateBiasedPageRankSampler: public WebSampler {
00120  public:
00121   DateBiasedPageRankSampler(WebLinkGraph* agraph);
00122   void SetParameters(double epsilon, double lambda, uint16 daterange);
00123   WebNodePtr QEvolveFrom(WebNodePtr x);
00124   virtual char *Name(char *buf);
00125  private:
00126   double lam;
00127   float lamhat;
00128   uint16 daterange;
00129 };
00130 
00131 typedef enum { undef, hubs, auth } ktype;
00132 
00133 /// A WebSampler which implements an epsilon approximation 
00134 /// of the Kleinberg hubs and authorities model.
00135 /**
00136  * The Kleinberg hubs and authorities model can be obtained by 
00137  * running a Markov chain with transition probs QQ^*, where Q 
00138  * follows tolinks at random, and Q^* follows fromlinks at random.
00139  * This chain is not UE, so doesn't fit into our framework. But
00140  * by choosing epsilon close to zero, we can get an approximation.
00141  *
00142  */
00143 class TruncatedKleinbergSampler: public WebSampler {
00144  public:
00145   TruncatedKleinbergSampler(WebLinkGraph* agraph);
00146   void SetParameters(double epsilon, ktype what);
00147   WebNodePtr QEvolveFrom(WebNodePtr x);
00148   virtual char *Name(char *buf);
00149  private:
00150   ktype which;
00151 };
00152 
00153 #endif

Generated on Wed May 29 11:37:15 2002 for MarkovPR by doxygen1.2.15