Lucene++ - a full-featured, c++ search engine
API Documentation


Tokenizer.h
Go to the documentation of this file.
1 // Copyright (c) 2009-2014 Alan Wright. All rights reserved.
3 // Distributable under the terms of either the Apache License (Version 2.0)
4 // or the GNU Lesser General Public License.
6 
7 #ifndef TOKENIZER_H
8 #define TOKENIZER_H
9 
10 #include "TokenStream.h"
11 
12 namespace Lucene {
13 
20 class LPPAPI Tokenizer : public TokenStream {
21 protected:
24 
26  Tokenizer(const ReaderPtr& input);
27 
29  Tokenizer(const AttributeFactoryPtr& factory);
30 
32  Tokenizer(const AttributeFactoryPtr& factory, const ReaderPtr& input);
33 
35  Tokenizer(const AttributeSourcePtr& source);
36 
38  Tokenizer(const AttributeSourcePtr& source, const ReaderPtr& input);
39 
40 public:
41  virtual ~Tokenizer();
42 
44 
45 protected:
49 
50 public:
52  virtual void close();
53 
59  virtual int32_t correctOffset(int32_t currentOff);
60 
61  using TokenStream::reset;
62 
65  virtual void reset(const ReaderPtr& input);
66 };
67 
68 }
69 
70 #endif
#define LUCENE_CLASS(Name)
Definition: LuceneObject.h:24
A TokenStream enumerates the sequence of tokens, either from Fields of a Document or from query text.
Definition: TokenStream.h:44
virtual void reset()
Resets this stream to the beginning. This is an optional operation, so subclasses may or may not impl...
A Tokenizer is a TokenStream whose input is a Reader.
Definition: Tokenizer.h:20
virtual ~Tokenizer()
CharStreamPtr charStream
Definition: Tokenizer.h:48
Tokenizer()
Construct a tokenizer with null input.
virtual int32_t correctOffset(int32_t currentOff)
Return the corrected offset. If input is a CharStream subclass this method calls CharStream#correctOf...
virtual void close()
By default, closes the input Reader.
Tokenizer(const ReaderPtr &input)
Construct a token stream processing the given input.
Tokenizer(const AttributeSourcePtr &source)
Construct a token stream processing the given input using the given AttributeSource.
Tokenizer(const AttributeFactoryPtr &factory)
Construct a tokenizer with null input using the given AttributeFactory.
ReaderPtr input
The text source for this Tokenizer.
Definition: Tokenizer.h:43
virtual void reset(const ReaderPtr &input)
Reset the tokenizer to a new reader. Typically, an analyzer (in its reusableTokenStream method) will ...
Tokenizer(const AttributeSourcePtr &source, const ReaderPtr &input)
Construct a token stream processing the given input using the given AttributeSource.
Tokenizer(const AttributeFactoryPtr &factory, const ReaderPtr &input)
Construct a token stream processing the given input using the given AttributeFactory.
Definition: AbstractAllTermDocs.h:12
boost::shared_ptr< AttributeSource > AttributeSourcePtr
Definition: LuceneTypes.h:520
boost::shared_ptr< Reader > ReaderPtr
Definition: LuceneTypes.h:547
boost::shared_ptr< AttributeFactory > AttributeFactoryPtr
Definition: LuceneTypes.h:519
boost::shared_ptr< CharStream > CharStreamPtr
Definition: LuceneTypes.h:27

clucene.sourceforge.net