1 package org.apache.fulcrum.parser; 2 3 4 /* 5 * Licensed to the Apache Software Foundation (ASF) under one 6 * or more contributor license agreements. See the NOTICE file 7 * distributed with this work for additional information 8 * regarding copyright ownership. The ASF licenses this file 9 * to you under the Apache License, Version 2.0 (the 10 * "License"); you may not use this file except in compliance 11 * with the License. You may obtain a copy of the License at 12 * 13 * http://www.apache.org/licenses/LICENSE-2.0 14 * 15 * Unless required by applicable law or agreed to in writing, 16 * software distributed under the License is distributed on an 17 * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 18 * KIND, either express or implied. See the License for the 19 * specific language governing permissions and limitations 20 * under the License. 21 */ 22 23 24 import java.io.Reader; 25 import java.io.StreamTokenizer; 26 import java.util.List; 27 28 /** 29 * CSVParser is used to parse a stream with comma-separated values and 30 * generate ParameterParser objects which can be used to 31 * extract the values in the desired type. 32 * 33 * <p>The class extends the abstract class DataStreamParser and implements 34 * initTokenizer with suitable values for CSV files to provide this 35 * functionality. 36 * 37 * <p>The class (indirectly through DataStreamParser) implements the 38 * java.util.Iterator interface for convenience. 39 * This allows simple use in a Velocity template for example: 40 * 41 * <pre> 42 * #foreach ($row in $csvfile) 43 * Name: $row.Name 44 * Description: $row.Description 45 * #end 46 * </pre> 47 * 48 * @author <a href="mailto:sean@informage.net">Sean Legassick</a> 49 * @version $Id$ 50 */ 51 public class CSVParser extends DataStreamParser 52 { 53 /** 54 * Create a new CSVParser instance. Requires a Reader to read the 55 * comma-separated values from. The column headers must be set 56 * independently either explicitly, or by reading the first line 57 * of the CSV values. 58 * 59 * @param in the input reader. 60 */ 61 public CSVParser(Reader in) 62 { 63 super(in, null, null); 64 } 65 66 /** 67 * Create a new CSVParser instance. Requires a Reader to read the 68 * comma-separated values from, and a list of column names. 69 * 70 * @param in the input reader. 71 * @param columnNames a list of column names. 72 */ 73 public CSVParser(Reader in, List<String> columnNames) 74 { 75 super(in, columnNames, null); 76 } 77 78 /** 79 * Create a new CSVParser instance. Requires a Reader to read the 80 * comma-separated values from, a list of column names and a 81 * character encoding. 82 * 83 * @param in the input reader. 84 * @param columnNames a list of column names. 85 * @param characterEncoding the character encoding of the input. 86 */ 87 public CSVParser(Reader in, List<String> columnNames, String characterEncoding) 88 { 89 super(in, columnNames, characterEncoding); 90 } 91 92 /** 93 * Initialize the StreamTokenizer instance used to read the lines 94 * from the input reader. 95 */ 96 protected void initTokenizer(StreamTokenizer tokenizer) 97 { 98 // set all numeric characters as ordinary characters 99 // (switches off number parsing) 100 tokenizer.ordinaryChars('0', '9'); 101 tokenizer.ordinaryChars('-', '-'); 102 tokenizer.ordinaryChars('.', '.'); 103 104 // set all printable characters to be treated as word chars 105 tokenizer.wordChars(' ', Integer.MAX_VALUE); 106 107 // now set comma as the whitespace character 108 tokenizer.whitespaceChars(',', ','); 109 110 // and set the quote mark as the quoting character 111 tokenizer.quoteChar('"'); 112 113 // and finally say that end of line is significant 114 tokenizer.eolIsSignificant(true); 115 } 116 }