Main Page   Class Hierarchy   Alphabetical List   Compound List   Examples  
tokenizer.h
1/***************************************************************************
2 copyright : (C) 2002-2008 by Stefano Barbato
3 email : stefano@codesink.org
4
5 $Id: tokenizer.h,v 1.18 2008-10-07 11:44:38 tat Exp $
6 ***************************************************************************/
7#ifndef _MIMETIC_TOKENIZER_H_
8#define _MIMETIC_TOKENIZER_H_
9#include <iterator>
10#include <algorithm>
11#include <set>
12#include <string>
13#include <cstring>
14
15namespace mimetic
16{
17
18template<typename value_type>
19struct IsDelim: public std::unary_function<value_type,bool>
20{
21 bool operator()(const value_type& val) const
22 {
23 return m_delims.count(val) != 0;
24 }
25 template<typename Container>
26 void setDelimList(const Container& cont)
27 {
28 typename Container::const_iterator bit, eit;
29 bit = cont.begin(), eit = cont.end();
30 for(; bit != eit; ++bit)
31 m_delims.insert(*bit);
32 }
33 template<typename Iterator>
34 void setDelimList(Iterator bit, Iterator eit)
35 {
36 for(; bit != eit; ++bit)
37 m_delims.insert(*bit);
38 }
39 void addDelim(const value_type& value)
40 {
41 m_delims.insert(value);
42 }
43 void removeDelim(const value_type& value)
44 {
45 m_delims.erase(value);
46 }
47private:
48 std::set<value_type> m_delims;
49};
50
51template<>
52struct IsDelim<char>: public std::unary_function<char, bool>
53{
54 void setDelimList(const std::string& delims)
55 {
56 setDelimList(delims.begin(), delims.end());
57 }
58 template<typename Iterator>
59 void setDelimList(Iterator bit, Iterator eit)
60 {
61 memset(&m_lookup, 0, sizeof(m_lookup));
62 for(; bit != eit; ++bit)
63 m_lookup[(int)*bit] = 1;
64 }
65 bool operator()(unsigned char val) const
66 {
67 return m_lookup[val] != 0;
68 }
69private:
70 char m_lookup[256];
71};
72
73
74/// Iterator tokenizer template class
75template<class Iterator,typename value_type>
77{
78public:
79 ItTokenizer(Iterator bit, Iterator eit)
80 : m_bit(bit), m_eit(eit), m_tok_eit(bit)
81 {
82 }
83 void setSource(Iterator bit, Iterator eit)
84 {
85 m_bit = bit;
86 m_eit = eit;
87 m_tok_eit = bit;
88 }
89 template<typename DelimCont>
90 void setDelimList(const DelimCont& cont)
91 {
92 m_delimPred.setDelimList(cont);
93 }
94 template<typename It>
95 void setDelimList(It bit, It eit)
96 {
97 m_delimPred.setDelimList(bit, eit);
98 }
99 template<typename DestCont>
100 bool next(DestCont& dst)
101 {
102 dst.erase(dst.begin(), dst.end());
103 if(m_tok_eit == m_eit)
104 return false;
105 m_tok_eit = std::find_if(m_bit, m_eit, m_delimPred);
106 m_matched = 0; // end of input
107 if(m_tok_eit != m_eit)
108 m_matched = *m_tok_eit; // matched delimiter
109 std::copy(m_bit, m_tok_eit, std::back_inserter<DestCont>(dst));
110 m_bit = (m_tok_eit != m_eit && ++m_tok_eit != m_eit ? m_tok_eit :m_eit);
111 return true;
112 }
113 const value_type& matched() const
114 {
115 return m_matched;
116 }
117 void addDelim(const value_type& value)
118 {
119 m_delimPred.addDelim(value);
120 }
121 void removeDelim(const value_type& value)
122 {
123 m_delimPred.removeDelim(value);
124 }
125private:
126 Iterator m_bit, m_eit, m_tok_eit;
127 IsDelim<value_type> m_delimPred;
128 value_type m_matched;
129};
130
131
132/// char container tokenizer template class
133template<typename Container>
134struct ContTokenizer: public ItTokenizer<typename Container::const_iterator,typename Container::value_type>
135{
136 typedef typename Container::value_type value_type;
137 typedef typename Container::iterator iterator;
138 typedef typename Container::const_iterator const_iterator;
139 // i want to be fast here so i don't want to copy "cont"
140 // so "cont" MUST be in scope for all following calls
141 // to next(...).
142 ContTokenizer(const Container* cont)
143 : ItTokenizer<const_iterator, value_type>(cont->begin(), cont->end())
144 {
145 }
146 template<typename DelimCont>
147 ContTokenizer(const Container* cont, const DelimCont& delims)
148 : ItTokenizer<const_iterator,value_type>(cont->begin(), cont->end())
149 {
150 this->setDelimList(delims);
151 }
152 void setSource(const Container* cont)
153 {
154 ItTokenizer<const_iterator,value_type>::setSource(cont->begin(), cont->end());
155 }
156private:
158 ContTokenizer& operator=(const ContTokenizer&);
159};
160
161/// std::string tokenizer
163
164}
165
166#endif
167
Iterator tokenizer template class.
Definition: tokenizer.h:77
Definition: body.h:18
ContTokenizer< std::string > StringTokenizer
std::string tokenizer
Definition: tokenizer.h:162
char container tokenizer template class
Definition: tokenizer.h:135