Main Page   Class Hierarchy   Compound List   File List   Compound Members   File Members  

parsehandler-preparse.h

Go to the documentation of this file.
00001 //***************************************************************************
00002 // This source code is copyrighted 2002 by Google Inc.  All rights
00003 // reserved.  You are given a limited license to use this source code for
00004 // purposes of participating in the Google programming contest.  If you
00005 // choose to use or distribute the source code for any other purpose, you
00006 // must either (1) first obtain written approval from Google, or (2)
00007 // prominently display the foregoing copyright notice and the following
00008 // warranty and liability disclaimer on each copy used or distributed.
00009 // 
00010 // The source code and repository (the "Software") is provided "AS IS",
00011 // with no warranty, express or implied, including but not limited to the
00012 // implied warranties of merchantability and fitness for a particular
00013 // use.  In no event shall Google Inc. be liable for any damages, direct
00014 // or indirect, even if advised of the possibility of such damages.
00015 //***************************************************************************
00016 
00017 
00018 // Define parse-element codes for the preparse parsehandler. 
00019 
00020 #define kWhitespaceBit 0x20     // OR into Term or Punct code if whitespace follows
00021 #define MAX_ENCODED_LENGTH 0x1F
00022 #define SET_WHITESPACE_FOLLOWS(x) ((x) | kWhitespaceBit)
00023 #define SET_LENGTH(x, len) ((x) | (len & 0x1F))
00024 #define SET_LENGTH_FOLLOWS(x) (x) // use low-order bits of 0 to mean length 
00025                                   // follows
00026 
00027 #define GET_WHITESPACE_FOLLOWS(x) ((x) & kWhitespaceBit)
00028 #define GET_LENGTH(x) ((x) & 0x1F)
00029 #define GET_LENGTH_FOLLOWS(x) (((x) & 0x1F) == 0)
00030 
00031 // Parse elements are represented by a 1 byte parse code followed by
00032 // any relevant arguments. The parse code byte is itself broken down
00033 // into bit fields that provide more information about the parse element.
00034 // We use bit twiddling operations to access the bits, but the following
00035 // definition in terms of structs of unions may make the representation
00036 // clearer. We don't use this definition in the code because of potential
00037 // portability issues.
00038 //
00039 //   struct parse_code {
00040 //     unsigned is_term_or_punct : 1;
00041 //     union {
00042 //       struct { // term or punctuation
00043 //         unsigned is_punct           : 1;
00044 //         unsigned whitespace_follows : 1;
00045 //         unsigned length             : 5; // if this equals LengthFollows
00046 //         static const int LengthFollows = 0; // then length is next varint32
00047 //       } t_p;
00048 //       enum { // other parse element
00049 //         Header = 1, ResponseCode, BaseURL, ..., MAX_ELEMENT
00050 //       } el;
00051 //     } u;
00052 //   };
00053  
00054 #define kParseElt_Term            0x80        // char* term
00055 #define kParseElt_Punctuation     0xC0        // char* text
00056 
00057 #define IS_TERM_OR_PUNCT(x) ((x) & 0x80)
00058 #define IS_TERM(x) (((x) & 0x40)==0)  // assuming IS_TERM_OR_PUNCT(x) != 0
00059 #define IS_PUNCT(x) (((x) & 0x40)!=0) // assuming IS_TERM_OR_PUNCT(x) != 0
00060 
00061 // Use for codes that are not Terms or Punct. Note: x must be less then 0x80!
00062 // Note: These are used as array indices in the code that reads preparsed
00063 // repositories, so be careful about changing their values.
00064 #define PCODE(x) (x)   // MSB is 0
00065 
00066 #define kParseElt_Header          PCODE(0x1)  // char* key, char* value
00067 #define kParseElt_ResponseCode    PCODE(0x2)  // int
00068 #define kParseElt_BaseURL         PCODE(0x3)  // char*
00069 #define kParseElt_Anchor          PCODE(0x4)  // char*
00070 #define kParseElt_LocalName       PCODE(0x5)  // char*
00071 #define kParseElt_AnchorDone      PCODE(0x6)
00072 #define kParseElt_ChangeFontColor PCODE(0x7)  // char*
00073 #define kParseElt_ChangeFontColorEnd PCODE(0x8)
00074 #define kParseElt_ChangeBGColor   PCODE(0x9)  // char*
00075 #define kParseElt_ChangeBGColorEnd PCODE(0xa)
00076 #define kParseElt_Image           PCODE(0xb) // char*
00077 /* removed - redundant with Image
00078 #define kParseElt_ImageHeight     PCODE(0xc) // char*
00079 #define kParseElt_ImageWidth      PCODE(0xd) // char*
00080 */
00081 #define kParseElt_Applet          PCODE(0xe)
00082 #define kParseElt_AppletDone      PCODE(0xf)
00083 #define kParseElt_IFrame          PCODE(0x10) // char*
00084 #define kParseElt_IFrameDone      PCODE(0x11)
00085 #define kParseElt_Frame           PCODE(0x12) // char*
00086 #define kParseElt_Area            PCODE(0x13) // char*
00087 #define kParseElt_Meta            PCODE(0x14) // char*
00088 #define kParseElt_Frameset        PCODE(0x15) // char*
00089 #define kParseElt_FramesetDone    PCODE(0x16)
00090 #define kParseElt_Body            PCODE(0x17) // char*
00091 #define kParseElt_BodyDone        PCODE(0x18)
00092 #define kParseElt_ParagraphStart  PCODE(0x19) // char*
00093 #define kParseElt_ParagraphEnd    PCODE(0x1a)
00094 #define kParseElt_Break           PCODE(0x1b)
00095 #define kParseElt_HorizontalRule  PCODE(0x1c)
00096 #define kParseElt_ListItem        PCODE(0x1d)
00097 #define kParseElt_UnorderedList   PCODE(0x1e)
00098 #define kParseElt_OrderedList     PCODE(0x1f)
00099 #define kParseElt_ListDone        PCODE(0x20)
00100 #define kParseElt_Div             PCODE(0x21) // char*
00101 #define kParseElt_DivDone         PCODE(0x22)
00102 #define kParseElt_Span            PCODE(0x23) // char*
00103 #define kParseElt_SpanDone        PCODE(0x24) // char*
00104 #define kParseElt_Table           PCODE(0x25)
00105 #define kParseElt_TableDone       PCODE(0x26)
00106 #define kParseElt_Caption         PCODE(0x27)
00107 #define kParseElt_CaptionDone     PCODE(0x28)
00108 #define kParseElt_TableHCell      PCODE(0x29) // char*
00109 #define kParseElt_TableDCell      PCODE(0x2a) // char*
00110 #define kParseElt_TableCellDone   PCODE(0x2b)
00111 #define kParseElt_TableRow        PCODE(0x2c)
00112 #define kParseElt_TableRowDone    PCODE(0x2d)
00113 #define kParseElt_Form            PCODE(0x2e) // char*
00114 #define kParseElt_FormDone        PCODE(0x2f)
00115 #define kParseElt_Select          PCODE(0x30) // char*
00116 #define kParseElt_SelectDone      PCODE(0x31)
00117 #define kParseElt_Option          PCODE(0x32) // char*
00118 #define kParseElt_OptionDone      PCODE(0x33)
00119 #define kParseElt_TextArea        PCODE(0x34) // char*
00120 #define kParseElt_TextAreaDone    PCODE(0x35)
00121 #define kParseElt_Input           PCODE(0x36) // char*
00122 #define kParseElt_Heading         PCODE(0x37) // int
00123 #define kParseElt_HeadingDone     PCODE(0x38)
00124 #define kParseElt_Noframes        PCODE(0x39)
00125 #define kParseElt_NoframesDone    PCODE(0x3a)
00126 #define kParseElt_Object          PCODE(0x3b) // char*
00127 #define kParseElt_ObjectDone      PCODE(0x3c)
00128 #define kParseElt_Param           PCODE(0x3d) // char*
00129 #define kParseElt_Embed           PCODE(0x3e) // char*
00130 #define kParseElt_Head            PCODE(0x3f) // char*
00131 #define kParseElt_HeadDone        PCODE(0x40)
00132 
00133 // These don't turn into callbacks, but they are used for providing
00134 // args to the AddTerm and AddPunctuation callbacks. 
00135 // We include the codes in the output only when the values change.
00136 #define kParseElt_SetFace         PCODE(0x41)     // int
00137 #define kParseElt_SetSize         PCODE(0x42)     // int
00138 
00139 // should be equal to largest parse code above
00140 #define kParseElt_LASTCODE        kParseElt_SetSize 
00141 
00142 #define kBeginDocMarker           PCODE(0x43)
00143 #define kEndDocMarker             PCODE(0x44)
00144 // no more codes bigger than 0x7f

Generated on Wed May 29 11:37:15 2002 for MarkovPR by doxygen1.2.15