1 package org.apache.fulcrum.parser;
2
3
4 /*
5 * Licensed to the Apache Software Foundation (ASF) under one
6 * or more contributor license agreements. See the NOTICE file
7 * distributed with this work for additional information
8 * regarding copyright ownership. The ASF licenses this file
9 * to you under the Apache License, Version 2.0 (the
10 * "License"); you may not use this file except in compliance
11 * with the License. You may obtain a copy of the License at
12 *
13 * http://www.apache.org/licenses/LICENSE-2.0
14 *
15 * Unless required by applicable law or agreed to in writing,
16 * software distributed under the License is distributed on an
17 * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
18 * KIND, either express or implied. See the License for the
19 * specific language governing permissions and limitations
20 * under the License.
21 */
22
23
24 import java.io.Reader;
25 import java.io.StreamTokenizer;
26 import java.util.List;
27
28 /**
29 * CSVParser is used to parse a stream with comma-separated values and
30 * generate ParameterParser objects which can be used to
31 * extract the values in the desired type.
32 *
33 * <p>The class extends the abstract class DataStreamParser and implements
34 * initTokenizer with suitable values for CSV files to provide this
35 * functionality.
36 *
37 * <p>The class (indirectly through DataStreamParser) implements the
38 * java.util.Iterator interface for convenience.
39 * This allows simple use in a Velocity template for example:
40 *
41 * <pre>
42 * #foreach ($row in $csvfile)
43 * Name: $row.Name
44 * Description: $row.Description
45 * #end
46 * </pre>
47 *
48 * @author <a href="mailto:sean@informage.net">Sean Legassick</a>
49 * @version $Id$
50 */
51 public class CSVParser extends DataStreamParser
52 {
53 /**
54 * Create a new CSVParser instance. Requires a Reader to read the
55 * comma-separated values from. The column headers must be set
56 * independently either explicitly, or by reading the first line
57 * of the CSV values.
58 *
59 * @param in the input reader.
60 */
61 public CSVParser(Reader in)
62 {
63 super(in, null, null);
64 }
65
66 /**
67 * Create a new CSVParser instance. Requires a Reader to read the
68 * comma-separated values from, and a list of column names.
69 *
70 * @param in the input reader.
71 * @param columnNames a list of column names.
72 */
73 public CSVParser(Reader in, List<String> columnNames)
74 {
75 super(in, columnNames, null);
76 }
77
78 /**
79 * Create a new CSVParser instance. Requires a Reader to read the
80 * comma-separated values from, a list of column names and a
81 * character encoding.
82 *
83 * @param in the input reader.
84 * @param columnNames a list of column names.
85 * @param characterEncoding the character encoding of the input.
86 */
87 public CSVParser(Reader in, List<String> columnNames, String characterEncoding)
88 {
89 super(in, columnNames, characterEncoding);
90 }
91
92 /**
93 * Initialize the StreamTokenizer instance used to read the lines
94 * from the input reader.
95 */
96 protected void initTokenizer(StreamTokenizer tokenizer)
97 {
98 // set all numeric characters as ordinary characters
99 // (switches off number parsing)
100 tokenizer.ordinaryChars('0', '9');
101 tokenizer.ordinaryChars('-', '-');
102 tokenizer.ordinaryChars('.', '.');
103
104 // set all printable characters to be treated as word chars
105 tokenizer.wordChars(' ', Integer.MAX_VALUE);
106
107 // now set comma as the whitespace character
108 tokenizer.whitespaceChars(',', ',');
109
110 // and set the quote mark as the quoting character
111 tokenizer.quoteChar('"');
112
113 // and finally say that end of line is significant
114 tokenizer.eolIsSignificant(true);
115 }
116 }