001package org.apache.fulcrum.parser;
002
003
004/*
005 * Licensed to the Apache Software Foundation (ASF) under one
006 * or more contributor license agreements.  See the NOTICE file
007 * distributed with this work for additional information
008 * regarding copyright ownership.  The ASF licenses this file
009 * to you under the Apache License, Version 2.0 (the
010 * "License"); you may not use this file except in compliance
011 * with the License.  You may obtain a copy of the License at
012 *
013 *   http://www.apache.org/licenses/LICENSE-2.0
014 *
015 * Unless required by applicable law or agreed to in writing,
016 * software distributed under the License is distributed on an
017 * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
018 * KIND, either express or implied.  See the License for the
019 * specific language governing permissions and limitations
020 * under the License.
021 */
022
023
024import java.io.Reader;
025import java.io.StreamTokenizer;
026import java.util.List;
027
028/**
029 * TSVParser is used to parse a stream with tab-separated values and
030 * generate ParameterParser objects which can be used to
031 * extract the values in the desired type.
032 *
033 * <p>The class extends the abstract class DataStreamParser and implements
034 * initTokenizer with suitable values for TSV files to provide this
035 * functionality.
036 *
037 * <p>The class (indirectly through DataStreamParser) implements the
038 * java.util.Iterator interface for convenience.
039 * This allows simple use in a Velocity template for example:
040 *
041 * <pre>
042 * #foreach ($row in $tsvfile)
043 *   Name: $row.Name
044 *   Description: $row.Description
045 * #end
046 * </pre>
047 *
048 * @author <a href="mailto:sean@informage.net">Sean Legassick</a>
049 * @version $Id$
050 */
051public class TSVParser
052    extends DataStreamParser
053{
054    /**
055     * Create a new TSVParser instance. Requires a Reader to read the
056     * tab-separated values from. The column headers must be set
057     * independently either explicitly, or by reading the first line
058     * of the TSV values.
059     *
060     * @param in the input reader.
061     */
062    public TSVParser(Reader in)
063    {
064        super(in, null, null);
065    }
066
067    /**
068     * Create a new TSVParser instance. Requires a Reader to read the
069     * tab-separated values from, and a list of column names.
070     *
071     * @param in the input reader.
072     * @param columnNames a list of column names.
073     */
074    public TSVParser(Reader in, List<String> columnNames)
075    {
076        super(in, columnNames, null);
077    }
078
079    /**
080     * Create a new TSVParser instance. Requires a Reader to read the
081     * tab-separated values from, a list of column names and a
082     * character encoding.
083     *
084     * @param in the input reader.
085     * @param columnNames a list of column names.
086     * @param characterEncoding the character encoding of the input.
087     */
088    public TSVParser(Reader in, List<String> columnNames, String characterEncoding)
089    {
090        super(in, columnNames, characterEncoding);
091    }
092
093    /**
094     * Initialize the StreamTokenizer instance used to read the lines
095     * from the input reader.
096     * 
097     * @param tokenizer the stream tokenizer to be used
098     */
099    protected void initTokenizer(StreamTokenizer tokenizer)
100    {
101        // set all numeric characters as ordinary characters
102        // (switches off number parsing)
103        tokenizer.ordinaryChars('0', '9');
104        tokenizer.ordinaryChars('-', '-');
105        tokenizer.ordinaryChars('.', '.');
106
107        // set all printable characters to be treated as word chars
108        tokenizer.wordChars(' ', Integer.MAX_VALUE);
109
110        // and finally say that end of line is significant
111        tokenizer.eolIsSignificant(true);
112    }
113}