Bitstream Interpretation Library (BIL)  0.1
StreamTokenizer.cpp
Go to the documentation of this file.
1 
6 #include <cstdlib>
9 
10 using namespace bil;
11 
12 const unsigned char AT_SEPARATOR = 0;
13 const unsigned char AT_WORD = 1;
14 const unsigned char AT_WHITESPACE = 2;
15 const unsigned char AT_COMMENT = 3;
16 
17 const size_t BUFFER_SIZE = 0x1000000;
18 const size_t TOKEN_MAX_SIZE = 0x7fe;
19 
20 
21 StreamTokenizer::StreamTokenizer(std::istream& inputStream):
22  m_inputStream(&inputStream),
23  m_buffer(0),
24  m_bufferSize(0),
25  m_readIndex(0),
26  m_tokenType(TT_NONE),
27  m_separatorToken(0),
28  m_wordBuffer(0),
29  m_wordBufferSize(0)
30 {
31  m_buffer = new char[BUFFER_SIZE];
32  m_wordBuffer = new char[TOKEN_MAX_SIZE+1];
33  resetSyntax();
34 }
35 
36 
37 StreamTokenizer::~StreamTokenizer()
38 {
39  delete[] m_wordBuffer;
40  m_wordBuffer = 0;
41  delete[] m_buffer;
42  m_buffer = 0;
43 }
44 
45 
46 void StreamTokenizer::inputStream(std::istream& inputStream)
47 {
48  m_inputStream = &inputStream;
49  reset();
50 }
51 
52 
54 {
55  m_bufferSize = 0;
56  m_readIndex = 0;
57  m_tokenType = TT_NONE;
58  m_separatorToken = 0;
59  m_wordBufferSize = 0;
60 }
61 
62 
64 {
65  m_syntaxTable[static_cast<unsigned char>(c)] = AT_SEPARATOR;
66 }
67 
68 
69 void StreamTokenizer::separatorChars(char firstChar, char lastChar)
70 {
71  for (char c = firstChar; c <= lastChar; ++c)
72  m_syntaxTable[static_cast<unsigned char>(c)] = AT_SEPARATOR;
73 }
74 
75 
77 {
78  m_syntaxTable[static_cast<unsigned char>(c)] = AT_WORD;
79 }
80 
81 
82 void StreamTokenizer::wordChars(char firstChar, char lastChar)
83 {
84  for (char c = firstChar; c <= lastChar; ++c)
85  m_syntaxTable[static_cast<unsigned char>(c)] = AT_WORD;
86 }
87 
88 
90 {
91  m_syntaxTable[static_cast<unsigned char>(c)] = AT_WHITESPACE;
92 }
93 
94 
96 {
97  m_syntaxTable[static_cast<unsigned char>(c)] = AT_COMMENT;
98 }
99 
100 
102 {
103  for (unsigned c = 0; c < 0xff; ++c)
104  m_syntaxTable[c] = AT_SEPARATOR;
105 }
106 
107 
109 {
110  for (;;)
111  {
112  // read character from buffer at current read index
113  if (0 == checkBuffer()) return TT_EOF;
114  char c = m_buffer[m_readIndex];
115 
116  // classify read character
117  switch (m_syntaxTable[static_cast<unsigned char>(c)])
118  {
119  case AT_WHITESPACE:
120  // skip whitespace char
121  ++m_readIndex;
122  continue;
123 
124  case AT_WORD:
125  // accumulate following word chars into one word
126  m_tokenType = TT_WORD;
127  m_wordBufferSize = 0;
128  do
129  {
130  if (m_wordBufferSize < TOKEN_MAX_SIZE) m_wordBuffer[m_wordBufferSize++] = c;
131  ++m_readIndex;
132  if (0 == checkBuffer()) return TT_WORD;
133  c = m_buffer[m_readIndex];
134  }
135  while (AT_WORD == (m_syntaxTable[static_cast<unsigned char>(c)]));
136  return TT_WORD;
137 
138  case AT_SEPARATOR:
139  // separator char
140  m_tokenType = TT_SEPARATOR;
141  m_separatorToken = c;
142  ++m_readIndex;
143  return TT_SEPARATOR;
144 
145  case AT_COMMENT:
146  // skip everything until next new line char
147  do
148  {
149  ++m_readIndex;
150  if (0 == checkBuffer())
151  {
152  m_tokenType = TT_EOF;
153  return TT_EOF;
154  }
155  c = m_buffer[m_readIndex];
156  }
157  while ('\n' != c);
158  continue;
159  }
160  }
161 }
162 
163 
165 {
166  return m_tokenType;
167 }
168 
169 
171 {
172  return m_separatorToken;
173 }
174 
175 
176 const char* StreamTokenizer::wordToken() const
177 {
178  m_wordBuffer[m_wordBufferSize] = 0;
179  return m_wordBuffer;
180 }
181 
182 
184 {
185  return m_wordBufferSize;
186 }
187 
188 
189 bool StreamTokenizer::uintToken(unsigned& val) const
190 {
191  m_wordBuffer[m_wordBufferSize] = 0;
192  char* endPtr;
193  val = strtol(m_wordBuffer, &endPtr, 10);
194  return ((0 != endPtr) && (0 == *endPtr));
195 }
196 
197 
198 size_t StreamTokenizer::checkBuffer()
199 {
200  // check if read index is out of buffer
201  if (m_bufferSize > m_readIndex) return m_bufferSize;
202  // if yes, refill buffer
203  m_inputStream->read(m_buffer, BUFFER_SIZE);
204  m_readIndex = 0;
205  m_bufferSize = m_inputStream->gcount();
206  // check for errors
207  if (m_inputStream->bad()) throw IOException();
208  // return the fill state of buffer
209  return m_bufferSize;
210 }