001package org.apache.fulcrum.parser; 002 003 004/* 005 * Licensed to the Apache Software Foundation (ASF) under one 006 * or more contributor license agreements. See the NOTICE file 007 * distributed with this work for additional information 008 * regarding copyright ownership. The ASF licenses this file 009 * to you under the Apache License, Version 2.0 (the 010 * "License"); you may not use this file except in compliance 011 * with the License. You may obtain a copy of the License at 012 * 013 * http://www.apache.org/licenses/LICENSE-2.0 014 * 015 * Unless required by applicable law or agreed to in writing, 016 * software distributed under the License is distributed on an 017 * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 018 * KIND, either express or implied. See the License for the 019 * specific language governing permissions and limitations 020 * under the License. 021 */ 022 023 024import java.io.Reader; 025import java.io.StreamTokenizer; 026import java.util.List; 027 028/** 029 * TSVParser is used to parse a stream with tab-separated values and 030 * generate ParameterParser objects which can be used to 031 * extract the values in the desired type. 032 * 033 * <p>The class extends the abstract class DataStreamParser and implements 034 * initTokenizer with suitable values for TSV files to provide this 035 * functionality. 036 * 037 * <p>The class (indirectly through DataStreamParser) implements the 038 * java.util.Iterator interface for convenience. 039 * This allows simple use in a Velocity template for example: 040 * 041 * <pre> 042 * #foreach ($row in $tsvfile) 043 * Name: $row.Name 044 * Description: $row.Description 045 * #end 046 * </pre> 047 * 048 * @author <a href="mailto:sean@informage.net">Sean Legassick</a> 049 * @version $Id$ 050 */ 051public class TSVParser 052 extends DataStreamParser 053{ 054 /** 055 * Create a new TSVParser instance. Requires a Reader to read the 056 * tab-separated values from. The column headers must be set 057 * independently either explicitly, or by reading the first line 058 * of the TSV values. 059 * 060 * @param in the input reader. 061 */ 062 public TSVParser(Reader in) 063 { 064 super(in, null, null); 065 } 066 067 /** 068 * Create a new TSVParser instance. Requires a Reader to read the 069 * tab-separated values from, and a list of column names. 070 * 071 * @param in the input reader. 072 * @param columnNames a list of column names. 073 */ 074 public TSVParser(Reader in, List<String> columnNames) 075 { 076 super(in, columnNames, null); 077 } 078 079 /** 080 * Create a new TSVParser instance. Requires a Reader to read the 081 * tab-separated values from, a list of column names and a 082 * character encoding. 083 * 084 * @param in the input reader. 085 * @param columnNames a list of column names. 086 * @param characterEncoding the character encoding of the input. 087 */ 088 public TSVParser(Reader in, List<String> columnNames, String characterEncoding) 089 { 090 super(in, columnNames, characterEncoding); 091 } 092 093 /** 094 * Initialize the StreamTokenizer instance used to read the lines 095 * from the input reader. 096 * 097 * @param tokenizer the stream tokenizer to be used 098 */ 099 protected void initTokenizer(StreamTokenizer tokenizer) 100 { 101 // set all numeric characters as ordinary characters 102 // (switches off number parsing) 103 tokenizer.ordinaryChars('0', '9'); 104 tokenizer.ordinaryChars('-', '-'); 105 tokenizer.ordinaryChars('.', '.'); 106 107 // set all printable characters to be treated as word chars 108 tokenizer.wordChars(' ', Integer.MAX_VALUE); 109 110 // and finally say that end of line is significant 111 tokenizer.eolIsSignificant(true); 112 } 113}