1 package org.apache.fulcrum.parser;
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24 import java.io.BufferedReader;
25 import java.io.IOException;
26 import java.io.InputStreamReader;
27 import java.io.Reader;
28 import java.io.StreamTokenizer;
29 import java.util.ArrayList;
30 import java.util.Iterator;
31 import java.util.List;
32 import java.util.NoSuchElementException;
33
34 import org.apache.avalon.framework.logger.LogEnabled;
35 import org.apache.avalon.framework.logger.Logger;
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59 public abstract class DataStreamParser
60 implements Iterator<ValueParser>, LogEnabled
61 {
62
63
64
65 private List<String> columnNames;
66
67
68
69
70 private final StreamTokenizer tokenizer;
71
72
73
74
75 private ValueParser lineValues;
76
77
78
79
80 private boolean neverRead = true;
81
82
83
84
85 private String characterEncoding;
86
87
88
89
90 protected Logger log;
91
92
93
94
95
96
97
98
99
100
101 public DataStreamParser(Reader in, List<String> columnNames,
102 String characterEncoding)
103 {
104 this.columnNames = columnNames;
105 this.characterEncoding = characterEncoding;
106
107 if (this.characterEncoding == null)
108 {
109
110 this.characterEncoding = "US-ASCII";
111 try
112 {
113 this.characterEncoding = ((InputStreamReader)in).getEncoding();
114 }
115 catch (ClassCastException e)
116 {
117
118 }
119 }
120
121 tokenizer = new StreamTokenizer(new BufferedReader(in));
122 initTokenizer(tokenizer);
123 }
124
125
126
127
128
129
130
131
132 protected abstract void initTokenizer(StreamTokenizer tokenizer);
133
134
135
136
137
138
139 public void enableLogging(Logger logger)
140 {
141 this.log = logger.getChildLogger("DataStreamParser");
142 }
143
144
145
146
147
148
149 public void setColumnNames(List<String> columnNames)
150 {
151 this.columnNames = columnNames;
152 }
153
154
155
156
157
158
159
160 public void readColumnNames()
161 throws IOException
162 {
163 columnNames = new ArrayList<String>();
164
165 neverRead = false;
166 tokenizer.nextToken();
167 while (tokenizer.ttype == StreamTokenizer.TT_WORD
168 || tokenizer.ttype == '"')
169 {
170 columnNames.add(tokenizer.sval);
171 tokenizer.nextToken();
172 }
173 }
174
175
176
177
178
179
180
181 public boolean hasNextRow()
182 throws IOException
183 {
184
185
186 if (neverRead || tokenizer.ttype == StreamTokenizer.TT_EOL)
187 {
188 tokenizer.nextToken();
189 tokenizer.pushBack();
190 neverRead = false;
191 }
192 return tokenizer.ttype != StreamTokenizer.TT_EOF;
193 }
194
195
196
197
198
199
200
201
202 public ValueParser nextRow()
203 throws IOException, NoSuchElementException
204 {
205 if (!hasNextRow())
206 {
207 throw new NoSuchElementException();
208 }
209
210 if (lineValues == null)
211 {
212 lineValues = new BaseValueParser(characterEncoding);
213 }
214 else
215 {
216 lineValues.clear();
217 }
218
219 Iterator<String> it = columnNames.iterator();
220 tokenizer.nextToken();
221 while (tokenizer.ttype == StreamTokenizer.TT_WORD
222 || tokenizer.ttype == '"')
223 {
224
225
226 if (it.hasNext())
227 {
228 String colname = it.next().toString();
229 String colval = tokenizer.sval;
230 if (log.isDebugEnabled())
231 {
232 log.debug("DataStreamParser.nextRow(): " +
233 colname + '=' + colval);
234 }
235 lineValues.add(colname, colval);
236 }
237 tokenizer.nextToken();
238 }
239
240 return lineValues;
241 }
242
243
244
245
246
247
248 public boolean hasNext()
249 {
250 boolean hasNext = false;
251
252 try
253 {
254 hasNext = hasNextRow();
255 }
256 catch (IOException e)
257 {
258 log.error("IOException in CSVParser.hasNext", e);
259 }
260
261 return hasNext;
262 }
263
264
265
266
267
268
269
270
271 public ValueParser next()
272 throws NoSuchElementException
273 {
274 ValueParser nextRow = null;
275
276 try
277 {
278 nextRow = nextRow();
279 }
280 catch (IOException e)
281 {
282 log.error("IOException in CSVParser.next", e);
283 throw new NoSuchElementException();
284 }
285
286 return nextRow;
287 }
288
289
290
291
292
293
294 public void remove()
295 throws UnsupportedOperationException
296 {
297 throw new UnsupportedOperationException();
298 }
299 }