00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011
00012
00013
00014
00015
00016
00017
00018
00019
00020
00021
00022
00023
00024
00025 #include "trie.h"
00026 #include <cstring>
00027 #include <cassert>
00028
00029 Trie :: Trie(unsigned long slen, long jels) {
00030
00031 slen_ = slen;
00032 bigs = new char[slen];
00033 memset(bigs, 0, slen);
00034 end_of_bigs = 0;
00035
00036 jumptable = new SimpleCharPtrHashTable(jels);
00037
00038
00039 stats_bigstring_insertions = 0;
00040 stats_jumptable_insertions = 0;
00041 stats_cumulative_string_size = 0;
00042 }
00043
00044 void Trie :: Statistics(ostream& o) {
00045 o << "Total number of URL strings inserted: " << stats_bigstring_insertions << endl;
00046 o << "Total original size of URLs inserted: " << stats_cumulative_string_size << endl;
00047 o << "Heap needed for bigstring (bytes): " << end_of_bigs << endl;
00048 o << "Heap reserved for bigstring (bytes): " << slen_ << endl;
00049
00050 o << "\nTotal number of insertions in jumptable: " << stats_jumptable_insertions << endl;
00051 o << "Heap needed for jumptable (bytes): " << (stats_jumptable_insertions * sizeof(SimpleHashPair<char*>)) << endl;
00052 o << "Heap reserved for jumptable (bytes): " << (jumptable->Size() * sizeof(SimpleHashPair<char*>)) << endl;
00053 }
00054
00055
00056
00057
00058
00059
00060
00061
00062 ptrdiff_t Trie :: FindURL(const char *url) {
00063 char *p = bigs;
00064 const char *q = url;
00065
00066
00067
00068 while( *q ) {
00069
00070 while( (*q != 0) && ( *p == *q ) ) {p++; q++; };
00071
00072
00073
00074 if( (*q == 0) && (*p == 0) ) {
00075
00076 return p - bigs;
00077
00078 } else if( *q == 0 ) {
00079
00080
00081 char *nextp = jumptable->Find(p - bigs);
00082
00083 while( nextp && (*nextp != 0) ) {
00084 p = nextp;
00085
00086 nextp = jumptable->Find(p - bigs);
00087
00088 }
00089
00090 if( nextp && (*nextp == 0) ) {
00091
00092 return p - bigs;
00093
00094 } else {
00095
00096 return -1;
00097
00098 }
00099 } else {
00100
00101 char *nextp = jumptable->Find(p - bigs);
00102
00103 if( nextp ) {
00104 p = nextp;
00105
00106 } else {
00107
00108 return -1;
00109
00110 }
00111 }
00112 }
00113
00114 return -1;
00115 }
00116
00117
00118
00119
00120
00121
00122
00123
00124
00125
00126
00127
00128
00129
00130
00131 ptrdiff_t Trie :: InsertURL(const char *url) throw (overflow_error) {
00132 char *p = bigs;
00133 const char *q = url;
00134 char *end_of_inserted_string = NULL;
00135
00136 stats_cumulative_string_size += (strlen(url)+1);
00137
00138
00139
00140 if( end_of_bigs + strlen(url) > slen_ ) {
00141 Statistics(cout);
00142 cerr << "d'oh: out of string memory" << endl;
00143 throw overflow_error("");
00144 } else if ( !end_of_bigs ) {
00145
00146
00147 while( *q ) { bigs[end_of_bigs++] = *q++; }
00148 bigs[end_of_bigs] = 0;
00149
00150 end_of_inserted_string = bigs + end_of_bigs;
00151
00152 end_of_bigs++;
00153 stats_bigstring_insertions++;
00154
00155 } else {
00156
00157 while( *q ) {
00158
00159
00160 while( (*q != 0) && ( *p == *q ) ) {p++; q++; };
00161
00162
00163
00164
00165
00166 if( (*q == 0) && (*p == 0) ) {
00167
00168
00169
00170
00171 end_of_inserted_string = p;
00172
00173 } else if( *q == 0 ) {
00174
00175
00176
00177 char *nextp = jumptable->Find(p - bigs);
00178
00179 while( nextp && (*nextp != 0) ) {
00180 p = nextp;
00181
00182 nextp = jumptable->Find(p - bigs);
00183
00184
00185 }
00186
00187 if( nextp && (*nextp == 0) ) {
00188
00189
00190 p = nextp;
00191 end_of_inserted_string = nextp;
00192
00193 } else {
00194 assert( !nextp );
00195
00196
00197 jumptable->Insert(p - bigs, bigs + end_of_bigs);
00198
00199 stats_jumptable_insertions++;
00200 bigs[end_of_bigs] = 0;
00201
00202 end_of_inserted_string = bigs + end_of_bigs;
00203
00204 end_of_bigs++;
00205 stats_bigstring_insertions++;
00206 }
00207 } else {
00208
00209 char *nextp = jumptable->Find(p - bigs);
00210
00211
00212 if( nextp ) {
00213 p = nextp;
00214
00215 } else {
00216
00217
00218 jumptable->Insert(p - bigs, bigs + end_of_bigs);
00219
00220 stats_jumptable_insertions++;
00221 while( *q ) { bigs[end_of_bigs++] = *q++; }
00222 bigs[end_of_bigs] = 0;
00223
00224 end_of_inserted_string = bigs + end_of_bigs;
00225
00226 end_of_bigs++;
00227 stats_bigstring_insertions++;
00228
00229 }
00230 }
00231 }
00232
00233 }
00234
00235 assert(end_of_inserted_string);
00236 assert(*end_of_inserted_string == 0);
00237
00238 return end_of_inserted_string - bigs;
00239
00240 }
00241
00242
00243
00244 #ifdef UNIT_TEST
00245
00246 #define MAKE_TEST_STATEMENT(x) \
00247 do { \
00248 cout << "testing: " #x << endl; \
00249 cout << "output: " << endl; \
00250 (x); \
00251 } while(false)
00252
00253 #define MAKE_TEST(x,y) \
00254 do { \
00255 cout << argv[0] << ": " << (((x) == (y)) ? "PASSED" : "FAILED") << " " #x <<endl; \
00256 cout << "output: " << (x) << endl; \
00257 } while(false)
00258
00259 #define MAKE_TEST_EX(x,y) \
00260 do { \
00261 try { \
00262 (x); \
00263 cout << argv[0] << ": FAILED" << " " #x << endl; \
00264 } catch (y) { \
00265 cout << argv[0] << ": PASSED" << " " #x << endl; \
00266 } \
00267 } while(false)
00268
00269 int main(int argc, char** argv) {
00270 Trie *trie = new Trie(100, 5);
00271
00272 MAKE_TEST(trie->InsertURL("banana"),6);
00273 MAKE_TEST(trie->InsertURL("apple"),12);
00274 MAKE_TEST(trie->InsertURL("bananarama"),17);
00275
00276 MAKE_TEST(trie->FindURL("apple"),12);
00277 MAKE_TEST(trie->FindURL("coconut"),-1);
00278
00279 MAKE_TEST(trie->StatsCumulativeStringSize(), 48);
00280 MAKE_TEST(trie->StatsBigstringInsertions(), 3);
00281 MAKE_TEST(trie->StatsJumptableInsertions(), 2);
00282
00283 MAKE_TEST_EX(trie->InsertURL("abcdefghijklmnopqrstuvwxyzabcdefghijklmnopqrstuvwxyzabcdefghijklmnopqrstuvwxyzabcdefghijklmnopqrstuvwxyz"),overflow_error);
00284 }
00285 #endif