View Javadoc
1   package org.apache.fulcrum.parser;
2   
3   
4   /*
5    * Licensed to the Apache Software Foundation (ASF) under one
6    * or more contributor license agreements.  See the NOTICE file
7    * distributed with this work for additional information
8    * regarding copyright ownership.  The ASF licenses this file
9    * to you under the Apache License, Version 2.0 (the
10   * "License"); you may not use this file except in compliance
11   * with the License.  You may obtain a copy of the License at
12   *
13   *   http://www.apache.org/licenses/LICENSE-2.0
14   *
15   * Unless required by applicable law or agreed to in writing,
16   * software distributed under the License is distributed on an
17   * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
18   * KIND, either express or implied.  See the License for the
19   * specific language governing permissions and limitations
20   * under the License.
21   */
22  
23  
24  import java.io.Reader;
25  import java.io.StreamTokenizer;
26  import java.util.List;
27  
28  /**
29   * CSVParser is used to parse a stream with comma-separated values and
30   * generate ParameterParser objects which can be used to
31   * extract the values in the desired type.
32   *
33   * <p>The class extends the abstract class DataStreamParser and implements
34   * initTokenizer with suitable values for CSV files to provide this
35   * functionality.
36   *
37   * <p>The class (indirectly through DataStreamParser) implements the
38   * java.util.Iterator interface for convenience.
39   * This allows simple use in a Velocity template for example:
40   *
41   * <pre>
42   * #foreach ($row in $csvfile)
43   *   Name: $row.Name
44   *   Description: $row.Description
45   * #end
46   * </pre>
47   *
48   * @author <a href="mailto:sean@informage.net">Sean Legassick</a>
49   * @version $Id$
50   */
51  public class CSVParser extends DataStreamParser
52  {
53      /**
54       * Create a new CSVParser instance. Requires a Reader to read the
55       * comma-separated values from. The column headers must be set
56       * independently either explicitly, or by reading the first line
57       * of the CSV values.
58       *
59       * @param in the input reader.
60       */
61      public CSVParser(Reader in)
62      {
63          super(in, null, null);
64      }
65  
66      /**
67       * Create a new CSVParser instance. Requires a Reader to read the
68       * comma-separated values from, and a list of column names.
69       *
70       * @param in the input reader.
71       * @param columnNames a list of column names.
72       */
73      public CSVParser(Reader in, List<String> columnNames)
74      {
75          super(in, columnNames, null);
76      }
77  
78      /**
79       * Create a new CSVParser instance. Requires a Reader to read the
80       * comma-separated values from, a list of column names and a
81       * character encoding.
82       *
83       * @param in the input reader.
84       * @param columnNames a list of column names.
85       * @param characterEncoding the character encoding of the input.
86       */
87      public CSVParser(Reader in, List<String> columnNames, String characterEncoding)
88      {
89          super(in, columnNames, characterEncoding);
90      }
91  
92      /**
93       * Initialize the StreamTokenizer instance used to read the lines
94       * from the input reader.
95       */
96      protected void initTokenizer(StreamTokenizer tokenizer)
97      {
98          // set all numeric characters as ordinary characters
99          // (switches off number parsing)
100         tokenizer.ordinaryChars('0', '9');
101         tokenizer.ordinaryChars('-', '-');
102         tokenizer.ordinaryChars('.', '.');
103 
104         // set all printable characters to be treated as word chars
105         tokenizer.wordChars(' ', Integer.MAX_VALUE);
106 
107         // now set comma as the whitespace character
108         tokenizer.whitespaceChars(',', ',');
109 
110         // and  set the quote mark as the quoting character
111         tokenizer.quoteChar('"');
112 
113         // and finally say that end of line is significant
114         tokenizer.eolIsSignificant(true);
115     }
116 }