1 package org.apache.turbine.util.parser;
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22 import java.io.BufferedReader;
23 import java.io.IOException;
24 import java.io.InputStreamReader;
25 import java.io.Reader;
26 import java.io.StreamTokenizer;
27
28 import java.util.ArrayList;
29 import java.util.Collections;
30 import java.util.Iterator;
31 import java.util.List;
32 import java.util.NoSuchElementException;
33
34 import org.apache.commons.lang.exception.NestableRuntimeException;
35
36 /***
37 * DataStreamParser is used to parse a stream with a fixed format and
38 * generate ValueParser objects which can be used to extract the values
39 * in the desired type.
40 *
41 * <p>The class itself is abstract - a concrete subclass which implements
42 * the initTokenizer method such as CSVParser or TSVParser is required
43 * to use the functionality.
44 *
45 * <p>The class implements the java.util.Iterator interface for convenience.
46 * This allows simple use in a Velocity template for example:
47 *
48 * <pre>
49 * #foreach ($row in $datastream)
50 * Name: $row.Name
51 * Description: $row.Description
52 * #end
53 * </pre>
54 *
55 * @author <a href="mailto:sean@informage.net">Sean Legassick</a>
56 * @author <a href="mailto:martin@mvdb.net">Martin van den Bemt</a>
57 * @author <a href="mailto:hps@intermeta.de">Henning P. Schmiedehausen</a>
58 * @version $Id: DataStreamParser.java 534527 2007-05-02 16:10:59Z tv $
59 */
60 public abstract class DataStreamParser implements Iterator
61 {
62 /***
63 * The constant for empty fields
64 */
65 protected static final String EMPTYFIELDNAME = "UNKNOWNFIELD";
66
67 /***
68 * The list of column names.
69 */
70 private List columnNames = Collections.EMPTY_LIST;
71
72 /***
73 * The stream tokenizer for reading values from the input reader.
74 */
75 private StreamTokenizer tokenizer;
76
77 /***
78 * The parameter parser holding the values of columns for the current line.
79 */
80 private ValueParser lineValues;
81
82 /***
83 * Indicates whether or not the tokenizer has read anything yet.
84 */
85 private boolean neverRead = true;
86
87 /***
88 * The character encoding of the input
89 */
90 private String characterEncoding;
91
92 /***
93 * The fieldseperator, which can be almost any char
94 */
95 private char fieldSeparator;
96
97 /***
98 * Create a new DataStreamParser instance. Requires a Reader to read the
99 * comma-separated values from, a list of column names and a
100 * character encoding.
101 *
102 * @param in the input reader.
103 * @param columnNames a list of column names.
104 * @param characterEncoding the character encoding of the input.
105 */
106 public DataStreamParser(Reader in, List columnNames,
107 String characterEncoding)
108 {
109 setColumnNames(columnNames);
110
111 this.characterEncoding = characterEncoding;
112
113 if (this.characterEncoding == null)
114 {
115 if (in instanceof InputStreamReader)
116 {
117 this.characterEncoding = ((InputStreamReader) in).getEncoding();
118 }
119
120 if (this.characterEncoding == null)
121 {
122
123 this.characterEncoding = "US-ASCII";
124 }
125 }
126
127 tokenizer = new StreamTokenizer(new BufferedReader(in));
128 initTokenizer(tokenizer);
129 }
130
131 /***
132 * Initialize the StreamTokenizer instance used to read the lines
133 * from the input reader. This must be implemented in subclasses to
134 * set up other tokenizing properties.
135 *
136 * @param tokenizer the tokenizer to adjust
137 */
138 protected void initTokenizer(StreamTokenizer tokenizer)
139 {
140 tokenizer.resetSyntax();
141
142
143 tokenizer.wordChars(' ', Character.MAX_VALUE);
144
145
146 tokenizer.quoteChar('"');
147
148
149 tokenizer.eolIsSignificant(true);
150 }
151
152 /***
153 * This method must be called to setup the field seperator
154 * @param fieldSeparator the char which separates the fields
155 */
156 public void setFieldSeparator(char fieldSeparator)
157 {
158 this.fieldSeparator = fieldSeparator;
159
160 tokenizer.ordinaryChar(fieldSeparator);
161 }
162
163 /***
164 * Set the list of column names explicitly.
165 *
166 * @param columnNames A list of column names.
167 */
168 public void setColumnNames(List columnNames)
169 {
170 if (columnNames != null)
171 {
172 this.columnNames = columnNames;
173 }
174 }
175
176 /***
177 * get the list of column names.
178 *
179 */
180 public List getColumnNames()
181 {
182 return columnNames;
183 }
184
185 /***
186 * Read the list of column names from the input reader using the
187 * tokenizer. If fieldNames are empty, we use the current fieldNumber
188 * + the EMPTYFIELDNAME to make one up.
189 *
190 * @exception IOException an IOException occurred.
191 */
192 public void readColumnNames()
193 throws IOException
194 {
195 List columnNames = new ArrayList();
196 int fieldCounter = 0;
197
198 if (hasNextRow())
199 {
200 String colName = null;
201 boolean foundEol = false;
202
203 while(!foundEol)
204 {
205 tokenizer.nextToken();
206
207 if (tokenizer.ttype == '"'
208 || tokenizer.ttype == StreamTokenizer.TT_WORD)
209 {
210
211 colName = tokenizer.sval;
212 }
213 else
214 {
215
216 fieldCounter++;
217
218 if (colName == null)
219 {
220 colName = EMPTYFIELDNAME + fieldCounter;
221 }
222
223 columnNames.add(colName);
224 colName = null;
225 }
226
227
228 if (tokenizer.ttype == StreamTokenizer.TT_EOL)
229 {
230 foundEol = true;
231 }
232 else if (tokenizer.ttype == StreamTokenizer.TT_EOF)
233 {
234
235 tokenizer.pushBack();
236 foundEol = true;
237 }
238 }
239
240 setColumnNames(columnNames);
241 }
242 }
243
244 /***
245 * Determine whether a further row of values exists in the input.
246 *
247 * @return true if the input has more rows.
248 * @exception IOException an IOException occurred.
249 */
250 public boolean hasNextRow()
251 throws IOException
252 {
253
254
255 if (neverRead || tokenizer.ttype == StreamTokenizer.TT_EOL)
256 {
257 tokenizer.nextToken();
258 tokenizer.pushBack();
259 neverRead = false;
260 }
261 return tokenizer.ttype != StreamTokenizer.TT_EOF;
262 }
263
264 /***
265 * Returns a ValueParser object containing the next row of values.
266 *
267 * @return a ValueParser object.
268 * @exception IOException an IOException occurred.
269 * @exception NoSuchElementException there are no more rows in the input.
270 */
271 public ValueParser nextRow()
272 throws IOException, NoSuchElementException
273 {
274 if (!hasNextRow())
275 {
276 throw new NoSuchElementException();
277 }
278
279 if (lineValues == null)
280 {
281 lineValues = new BaseValueParser(characterEncoding);
282 }
283 else
284 {
285 lineValues.clear();
286 }
287
288 Iterator it = columnNames.iterator();
289
290 String currVal = "";
291 String colName = null;
292
293 boolean foundEol = false;
294 while (!foundEol || it.hasNext())
295 {
296 if (!foundEol)
297 {
298 tokenizer.nextToken();
299 }
300
301 if (colName == null && it.hasNext())
302 {
303 colName = String.valueOf(it.next());
304 }
305
306 if (tokenizer.ttype == '"'
307 || tokenizer.ttype == StreamTokenizer.TT_WORD)
308 {
309
310 currVal = tokenizer.sval;
311 }
312 else
313 {
314
315 lineValues.add(colName, currVal);
316 colName = null;
317 currVal = "";
318 }
319
320
321 if (tokenizer.ttype == StreamTokenizer.TT_EOL)
322 {
323 foundEol = true;
324 }
325 else if (tokenizer.ttype == StreamTokenizer.TT_EOF)
326 {
327
328 tokenizer.pushBack();
329 foundEol = true;
330 }
331 }
332
333 return lineValues;
334 }
335
336 /***
337 * Determine whether a further row of values exists in the input.
338 *
339 * @return true if the input has more rows.
340 */
341 public boolean hasNext()
342 {
343 boolean hasNext = false;
344
345 try
346 {
347 hasNext = hasNextRow();
348 }
349 catch (IOException e)
350 {
351 throw new NestableRuntimeException(e);
352 }
353
354 return hasNext;
355 }
356
357 /***
358 * Returns a ValueParser object containing the next row of values.
359 *
360 * @return a ValueParser object as an Object.
361 * @exception NoSuchElementException there are no more rows in the input
362 * or an IOException occurred.
363 */
364 public Object next()
365 throws NoSuchElementException
366 {
367 Object nextRow = null;
368
369 try
370 {
371 nextRow = nextRow();
372 }
373 catch (IOException e)
374 {
375 throw new NestableRuntimeException(e);
376 }
377
378 return nextRow;
379 }
380
381 /***
382 * The optional Iterator.remove method is not supported.
383 *
384 * @exception UnsupportedOperationException the operation is not supported.
385 */
386 public void remove()
387 throws UnsupportedOperationException
388 {
389 throw new UnsupportedOperationException();
390 }
391 }