Lucene++ - a full-featured, c++ search engine
API Documentation


StandardTokenizerImpl.h
Go to the documentation of this file.
1 // Copyright (c) 2009-2014 Alan Wright. All rights reserved.
3 // Distributable under the terms of either the Apache License (Version 2.0)
4 // or the GNU Lesser General Public License.
6 
7 #ifndef STANDARDTOKENIZERIMPL_H
8 #define STANDARDTOKENIZERIMPL_H
9 
10 #include "LuceneObject.h"
11 
12 namespace Lucene {
13 
15 public:
19 
21 
23 
24 protected:
26  static const int32_t ZZ_BUFFERSIZE;
27 
29  static CharArray _ZZ_CMAP;
30  static const wchar_t ZZ_CMAP_PACKED[];
31  static const int32_t ZZ_CMAP_LENGTH;
32  static const int32_t ZZ_CMAP_PACKED_LENGTH;
33 
35  static void ZZ_CMAP_INIT();
36  static const wchar_t* ZZ_CMAP();
37 
39  static IntArray _ZZ_ACTION;
40  static const wchar_t ZZ_ACTION_PACKED_0[];
41  static const int32_t ZZ_ACTION_LENGTH;
42  static const int32_t ZZ_ACTION_PACKED_LENGTH;
43 
45  static void ZZ_ACTION_INIT();
46  static const int32_t* ZZ_ACTION();
47 
49  static IntArray _ZZ_ROWMAP;
50  static const wchar_t ZZ_ROWMAP_PACKED_0[];
51  static const int32_t ZZ_ROWMAP_LENGTH;
52  static const int32_t ZZ_ROWMAP_PACKED_LENGTH;
53 
55  static void ZZ_ROWMAP_INIT();
56  static const int32_t* ZZ_ROWMAP();
57 
59  static IntArray _ZZ_TRANS;
60  static const wchar_t ZZ_TRANS_PACKED_0[];
61  static const int32_t ZZ_TRANS_LENGTH;
62  static const int32_t ZZ_TRANS_PACKED_LENGTH;
63 
65  static void ZZ_TRANS_INIT();
66  static const int32_t* ZZ_TRANS();
67 
68  // error codes
69  static const int32_t ZZ_UNKNOWN_ERROR;
70  static const int32_t ZZ_NO_MATCH;
71  static const int32_t ZZ_PUSHBACK_2BIG;
72 
73  static const wchar_t* ZZ_ERROR_MSG[];
74 
76  static IntArray _ZZ_ATTRIBUTE;
77  static const wchar_t ZZ_ATTRIBUTE_PACKED_0[];
78  static const int32_t ZZ_ATTRIBUTE_LENGTH;
79  static const int32_t ZZ_ATTRIBUTE_PACKED_LENGTH;
80 
82  static void ZZ_ATTRIBUTE_INIT();
83  static const int32_t* ZZ_ATTRIBUTE();
84 
87 
89  int32_t zzState;
90 
92  int32_t zzLexicalState;
93 
95  CharArray zzBuffer;
96 
98  int32_t zzMarkedPos;
99 
101  int32_t zzPushbackPos;
102 
104  int32_t zzCurrentPos;
105 
107  int32_t zzStartRead;
108 
110  int32_t zzEndRead;
111 
113  int32_t yyline;
114 
116  int32_t _yychar;
117 
119  int32_t yycolumn;
120 
122  bool zzAtBOL;
123 
125  bool zzAtEOF;
126 
127 public:
129  static const int32_t YYEOF;
130 
132  static const int32_t YYINITIAL;
133 
134 public:
135  int32_t yychar();
136 
138  void reset(const ReaderPtr& r);
139 
141  void getText(const TokenPtr& t);
142 
144  void getText(const TermAttributePtr& t);
145 
147  void yyclose();
148 
155  void yyreset(const ReaderPtr& reader);
156 
158  int32_t yystate();
159 
162  void yybegin(int32_t newState);
163 
165  String yytext();
166 
172  wchar_t yycharat(int32_t pos);
173 
175  int32_t yylength();
176 
181  void yypushback(int32_t number);
182 
185  int32_t getNextToken();
186 
187 protected:
189  bool zzRefill();
190 
200  void zzScanError(int32_t errorCode);
201 };
202 
203 }
204 
205 #endif
#define LUCENE_CLASS(Name)
Definition: LuceneObject.h:24
Base class for all Lucene classes.
Definition: LuceneObject.h:31
Definition: StandardTokenizerImpl.h:14
int32_t yycolumn
The number of characters from the last newline up to the start of the matched text.
Definition: StandardTokenizerImpl.h:119
int32_t zzEndRead
EndRead marks the last character in the buffer, that has been read from input.
Definition: StandardTokenizerImpl.h:110
int32_t zzMarkedPos
The text position at the last accepting state.
Definition: StandardTokenizerImpl.h:98
int32_t zzCurrentPos
The current text position in the buffer.
Definition: StandardTokenizerImpl.h:104
void yyclose()
Closes the input stream.
static const int32_t ZZ_ACTION_LENGTH
Definition: StandardTokenizerImpl.h:41
int32_t zzLexicalState
The current lexical state.
Definition: StandardTokenizerImpl.h:92
static IntArray _ZZ_ROWMAP
Translates a state to a row index in the transition table.
Definition: StandardTokenizerImpl.h:49
static void ZZ_ATTRIBUTE_INIT()
ZZ_ATTRIBUTE[aState] contains the attributes of state aState.
static const int32_t ZZ_ACTION_PACKED_LENGTH
Definition: StandardTokenizerImpl.h:42
static const int32_t ZZ_NO_MATCH
Definition: StandardTokenizerImpl.h:70
static void ZZ_ROWMAP_INIT()
Translates a state to a row index in the transition table.
static const int32_t ZZ_ATTRIBUTE_PACKED_LENGTH
Definition: StandardTokenizerImpl.h:79
static const int32_t ZZ_TRANS_LENGTH
Definition: StandardTokenizerImpl.h:61
static IntArray _ZZ_ACTION
Translates DFA states to action switch labels.
Definition: StandardTokenizerImpl.h:39
static void ZZ_CMAP_INIT()
Translates characters to character classes.
void reset(const ReaderPtr &r)
Resets the Tokenizer to a new Reader.
static const wchar_t * ZZ_CMAP()
static const int32_t YYEOF
This character denotes the end of file.
Definition: StandardTokenizerImpl.h:129
static const int32_t ZZ_ROWMAP_LENGTH
Definition: StandardTokenizerImpl.h:51
ReaderPtr zzReader
The input device.
Definition: StandardTokenizerImpl.h:86
static const int32_t ZZ_BUFFERSIZE
Initial size of the lookahead buffer.
Definition: StandardTokenizerImpl.h:22
static IntArray _ZZ_ATTRIBUTE
ZZ_ATTRIBUTE[aState] contains the attributes of state aState.
Definition: StandardTokenizerImpl.h:76
static const int32_t ZZ_CMAP_PACKED_LENGTH
Definition: StandardTokenizerImpl.h:32
void zzScanError(int32_t errorCode)
Reports an error that occurred while scanning.
String yytext()
Returns the text matched by the current regular expression.
static CharArray _ZZ_CMAP
Translates characters to character classes.
Definition: StandardTokenizerImpl.h:29
void yypushback(int32_t number)
Pushes the specified amount of characters back into the input stream.
static const int32_t * ZZ_ATTRIBUTE()
void yyreset(const ReaderPtr &reader)
Resets the scanner to read from a new input stream. Does not close the old reader.
static const int32_t ZZ_ATTRIBUTE_LENGTH
Definition: StandardTokenizerImpl.h:78
static const int32_t ZZ_CMAP_LENGTH
Definition: StandardTokenizerImpl.h:31
int32_t zzState
The current state of the DFA.
Definition: StandardTokenizerImpl.h:89
bool zzAtBOL
zzAtBOL == true if the scanner is currently at the beginning of a line
Definition: StandardTokenizerImpl.h:122
bool zzAtEOF
zzAtEOF == true if the scanner is at the EOF
Definition: StandardTokenizerImpl.h:125
static const int32_t * ZZ_TRANS()
int32_t yylength()
Returns the length of the matched text region.
wchar_t yycharat(int32_t pos)
Returns the character at position pos from the matched text.
void getText(const TokenPtr &t)
Fills Lucene token with the current token text.
int32_t getNextToken()
Resumes scanning until the next regular expression is matched, the end of input is encountered or an ...
CharArray zzBuffer
This buffer contains the current text to be matched and is the source of the yytext() string.
Definition: StandardTokenizerImpl.h:95
int32_t yystate()
Returns the current lexical state.
static const int32_t ZZ_PUSHBACK_2BIG
Definition: StandardTokenizerImpl.h:71
int32_t zzPushbackPos
The text position at the last state to be included in yytext.
Definition: StandardTokenizerImpl.h:101
static void ZZ_ACTION_INIT()
Translates DFA states to action switch labels.
bool zzRefill()
Refills the input buffer.
int32_t _yychar
The number of characters up to the start of the matched text.
Definition: StandardTokenizerImpl.h:116
static IntArray _ZZ_TRANS
The transition table of the DFA.
Definition: StandardTokenizerImpl.h:59
static const int32_t ZZ_UNKNOWN_ERROR
Definition: StandardTokenizerImpl.h:69
int32_t zzStartRead
StartRead marks the beginning of the yytext() string in the buffer.
Definition: StandardTokenizerImpl.h:107
static const int32_t * ZZ_ACTION()
static const wchar_t ZZ_TRANS_PACKED_0[]
Definition: StandardTokenizerImpl.h:60
StandardTokenizerImpl(const ReaderPtr &in)
Creates a new scanner.
void getText(const TermAttributePtr &t)
Fills TermAttribute with the current token text.
static const wchar_t ZZ_ATTRIBUTE_PACKED_0[]
Definition: StandardTokenizerImpl.h:77
static const wchar_t * ZZ_ERROR_MSG[]
Definition: StandardTokenizerImpl.h:73
static const wchar_t ZZ_ROWMAP_PACKED_0[]
Definition: StandardTokenizerImpl.h:50
static const wchar_t ZZ_CMAP_PACKED[]
Definition: StandardTokenizerImpl.h:30
static const int32_t * ZZ_ROWMAP()
void yybegin(int32_t newState)
Enters a new lexical state.
static const int32_t YYINITIAL
Lexical states.
Definition: StandardTokenizerImpl.h:132
static void ZZ_TRANS_INIT()
The transition table of the DFA.
static const wchar_t ZZ_ACTION_PACKED_0[]
Definition: StandardTokenizerImpl.h:40
static const int32_t ZZ_TRANS_PACKED_LENGTH
Definition: StandardTokenizerImpl.h:62
int32_t yyline
Number of newlines encountered up to the start of the matched text.
Definition: StandardTokenizerImpl.h:113
static const int32_t ZZ_ROWMAP_PACKED_LENGTH
Definition: StandardTokenizerImpl.h:52
Definition: AbstractAllTermDocs.h:12
boost::shared_ptr< TermAttribute > TermAttributePtr
Definition: LuceneTypes.h:58
boost::shared_ptr< Token > TokenPtr
Definition: LuceneTypes.h:59
boost::shared_ptr< Reader > ReaderPtr
Definition: LuceneTypes.h:547

clucene.sourceforge.net