1 package org.apache.fulcrum.parser;
2
3
4 /*
5 * Licensed to the Apache Software Foundation (ASF) under one
6 * or more contributor license agreements. See the NOTICE file
7 * distributed with this work for additional information
8 * regarding copyright ownership. The ASF licenses this file
9 * to you under the Apache License, Version 2.0 (the
10 * "License"); you may not use this file except in compliance
11 * with the License. You may obtain a copy of the License at
12 *
13 * http://www.apache.org/licenses/LICENSE-2.0
14 *
15 * Unless required by applicable law or agreed to in writing,
16 * software distributed under the License is distributed on an
17 * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
18 * KIND, either express or implied. See the License for the
19 * specific language governing permissions and limitations
20 * under the License.
21 */
22
23
24 import java.io.Reader;
25 import java.io.StreamTokenizer;
26 import java.util.List;
27
28 /**
29 * TSVParser is used to parse a stream with tab-separated values and
30 * generate ParameterParser objects which can be used to
31 * extract the values in the desired type.
32 *
33 * <p>The class extends the abstract class DataStreamParser and implements
34 * initTokenizer with suitable values for TSV files to provide this
35 * functionality.
36 *
37 * <p>The class (indirectly through DataStreamParser) implements the
38 * java.util.Iterator interface for convenience.
39 * This allows simple use in a Velocity template for example:
40 *
41 * <pre>
42 * #foreach ($row in $tsvfile)
43 * Name: $row.Name
44 * Description: $row.Description
45 * #end
46 * </pre>
47 *
48 * @author <a href="mailto:sean@informage.net">Sean Legassick</a>
49 * @version $Id$
50 */
51 public class TSVParser
52 extends DataStreamParser
53 {
54 /**
55 * Create a new TSVParser instance. Requires a Reader to read the
56 * tab-separated values from. The column headers must be set
57 * independently either explicitly, or by reading the first line
58 * of the TSV values.
59 *
60 * @param in the input reader.
61 */
62 public TSVParser(Reader in)
63 {
64 super(in, null, null);
65 }
66
67 /**
68 * Create a new TSVParser instance. Requires a Reader to read the
69 * tab-separated values from, and a list of column names.
70 *
71 * @param in the input reader.
72 * @param columnNames a list of column names.
73 */
74 public TSVParser(Reader in, List<String> columnNames)
75 {
76 super(in, columnNames, null);
77 }
78
79 /**
80 * Create a new TSVParser instance. Requires a Reader to read the
81 * tab-separated values from, a list of column names and a
82 * character encoding.
83 *
84 * @param in the input reader.
85 * @param columnNames a list of column names.
86 * @param characterEncoding the character encoding of the input.
87 */
88 public TSVParser(Reader in, List<String> columnNames, String characterEncoding)
89 {
90 super(in, columnNames, characterEncoding);
91 }
92
93 /**
94 * Initialize the StreamTokenizer instance used to read the lines
95 * from the input reader.
96 *
97 * @param tokenizer the stream tokenizer to be used
98 */
99 protected void initTokenizer(StreamTokenizer tokenizer)
100 {
101 // set all numeric characters as ordinary characters
102 // (switches off number parsing)
103 tokenizer.ordinaryChars('0', '9');
104 tokenizer.ordinaryChars('-', '-');
105 tokenizer.ordinaryChars('.', '.');
106
107 // set all printable characters to be treated as word chars
108 tokenizer.wordChars(' ', Integer.MAX_VALUE);
109
110 // and finally say that end of line is significant
111 tokenizer.eolIsSignificant(true);
112 }
113 }