00001 /** @file urlfilter.h */ 00002 /* 00003 * Copyright (C) 2002 Laird Breyer 00004 * 00005 * This program is free software; you can redistribute it and/or modify 00006 * it under the terms of the GNU General Public License as published by 00007 * the Free Software Foundation; either version 2 of the License, or 00008 * (at your option) any later version. 00009 * 00010 * This program is distributed in the hope that it will be useful, 00011 * but WITHOUT ANY WARRANTY; without even the implied warranty of 00012 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 00013 * GNU General Public License for more details. 00014 * 00015 * You should have received a copy of the GNU General Public License 00016 * along with this program; if not, write to the Free Software 00017 * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. 00018 * 00019 * Author: Laird Breyer <laird@lbreyer.com> 00020 */ 00021 00022 #ifndef _URL_FILTER_H_ 00023 #define _URL_FILTER_H_ 00024 #include <string.h> 00025 #include <stdexcept> 00026 #include "content-type.h" 00027 00028 #define SLASHBIT 0x80 00029 00030 #define STRINGBUF_LEN0 511 00031 #define STRINGBUF_LEN1 1023 00032 #define STRINGBUF_LEN2 2047 00033 #define STRINGBUF_LEN3 4095 00034 00035 /// Contains the parts of a URL as separate strings. 00036 struct URLComponents { 00037 char scheme[STRINGBUF_LEN1+1]; 00038 char netloc[STRINGBUF_LEN1+1]; 00039 char query[STRINGBUF_LEN1+1]; 00040 char params[STRINGBUF_LEN1+1]; 00041 char path[STRINGBUF_LEN1+1]; 00042 00043 void Clear() { scheme[0] = netloc[0] = query[0] = params[0] = path[0] = 0; } 00044 }; 00045 00046 /// Knows how to parse a URL string and related functions 00047 class URLFilter { 00048 public: 00049 URLFilter(bool rs); 00050 const char* DeindexURL(const char *anurl); 00051 const char* CompressURL(const char *anurl); 00052 void ParseURL(const char *anurl, char *schemebuf, 00053 char *netlocbuf, char *querybuf, 00054 char *paramsbuf, char *pathbuf); 00055 void NormalizeURLPath(char *apath); 00056 ContentType ClassifyURLPath(const char *path); 00057 const char * FormatURL(const char *anurl, int anurl_len, 00058 URLComponents *baseurl, ContentType *foundtype) throw (domain_error); 00059 00060 private: 00061 00062 char scratchbuf0[STRINGBUF_LEN0+1]; 00063 char scratchbuf1[STRINGBUF_LEN2+1]; 00064 char scratchbuf2[STRINGBUF_LEN2+1]; 00065 char scratchbuf3[STRINGBUF_LEN2+1]; 00066 char scratchbuf4[STRINGBUF_LEN3+1]; 00067 char scratchbuf5[STRINGBUF_LEN1+1]; 00068 char scratchbuf6[STRINGBUF_LEN1+1]; 00069 00070 char comp_scratchbuf[STRINGBUF_LEN2+1]; 00071 char parse_scratchbuf[STRINGBUF_LEN1+1]; 00072 char deindex_scratchbuf[STRINGBUF_LEN2+1]; 00073 00074 struct { 00075 bool remove_html_suffix; 00076 bool rearrange_components; 00077 } flags; 00078 }; 00079 #endif