View Javadoc

1   package org.apache.turbine.services.mimetype.util;
2   
3   /* ====================================================================
4    * The Apache Software License, Version 1.1
5    *
6    * Copyright (c) 2001-2003 The Apache Software Foundation.  All rights
7    * reserved.
8    *
9    * Redistribution and use in source and binary forms, with or without
10   * modification, are permitted provided that the following conditions
11   * are met:
12   *
13   * 1. Redistributions of source code must retain the above copyright
14   *    notice, this list of conditions and the following disclaimer.
15   *
16   * 2. Redistributions in binary form must reproduce the above copyright
17   *    notice, this list of conditions and the following disclaimer in
18   *    the documentation and/or other materials provided with the
19   *    distribution.
20   *
21   * 3. The end-user documentation included with the redistribution,
22   *    if any, must include the following acknowledgment:
23   *       "This product includes software developed by the
24   *        Apache Software Foundation (http://www.apache.org/)."
25   *    Alternately, this acknowledgment may appear in the software itself,
26   *    if and wherever such third-party acknowledgments normally appear.
27   *
28   * 4. The names "Apache" and "Apache Software Foundation" and
29   *    "Apache Turbine" must not be used to endorse or promote products
30   *    derived from this software without prior written permission. For
31   *    written permission, please contact apache@apache.org.
32   *
33   * 5. Products derived from this software may not be called "Apache",
34   *    "Apache Turbine", nor may "Apache" appear in their name, without
35   *    prior written permission of the Apache Software Foundation.
36   *
37   * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESSED OR IMPLIED
38   * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
39   * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
40   * DISCLAIMED.  IN NO EVENT SHALL THE APACHE SOFTWARE FOUNDATION OR
41   * ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
42   * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
43   * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
44   * USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
45   * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
46   * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
47   * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
48   * SUCH DAMAGE.
49   * ====================================================================
50   *
51   * This software consists of voluntary contributions made by many
52   * individuals on behalf of the Apache Software Foundation.  For more
53   * information on the Apache Software Foundation, please see
54   * <http://www.apache.org/>.
55   */
56  
57  import java.io.File;
58  import java.io.FileInputStream;
59  import java.io.IOException;
60  import java.io.InputStream;
61  
62  import java.util.HashMap;
63  import java.util.Hashtable;
64  import java.util.Locale;
65  import java.util.Map;
66  import java.util.Properties;
67  
68  /***
69   * This class maintains a set of mappers defining mappings
70   * between locales and the corresponding charsets. The mappings
71   * are defined as properties between locale and charset names.
72   * The definitions can be listed in property files located in user's
73   * home directory, Java home directory or the current class jar.
74   * In addition, this class maintains static default mappings
75   * and constructors support application specific mappings.
76   *
77   * @deprecated Use the Fulcrum Mimetype component instead.
78   * @author <a href="mailto:ilkka.priha@simsoft.fi">Ilkka Priha</a>
79   * @version $Id: CharSetMap.java,v 1.5 2003/10/24 12:45:58 epugh Exp $
80   */
81  public class CharSetMap
82  {
83      /***
84       * The default charset when nothing else is applicable.
85       */
86      public static final String DEFAULT_CHARSET = "ISO-8859-1";
87  
88      /***
89       * The name for charset mapper resources.
90       */
91      public static final String CHARSET_RESOURCE = "charset.properties";
92  
93      /***
94       * Priorities of available mappers.
95       */
96      private static final int MAP_CACHE = 0;
97      private static final int MAP_PROG = 1;
98      private static final int MAP_HOME = 2;
99      private static final int MAP_SYS = 3;
100     private static final int MAP_JAR = 4;
101     private static final int MAP_COM = 5;
102 
103     /***
104      * A common charset mapper for languages.
105      */
106     private static HashMap commonMapper = new HashMap();
107 
108     static
109     {
110         commonMapper.put("ar", "ISO-8859-6");
111         commonMapper.put("be", "ISO-8859-5");
112         commonMapper.put("bg", "ISO-8859-5");
113         commonMapper.put("ca", "ISO-8859-1");
114         commonMapper.put("cs", "ISO-8859-2");
115         commonMapper.put("da", "ISO-8859-1");
116         commonMapper.put("de", "ISO-8859-1");
117         commonMapper.put("el", "ISO-8859-7");
118         commonMapper.put("en", "ISO-8859-1");
119         commonMapper.put("es", "ISO-8859-1");
120         commonMapper.put("et", "ISO-8859-1");
121         commonMapper.put("fi", "ISO-8859-1");
122         commonMapper.put("fr", "ISO-8859-1");
123         commonMapper.put("hr", "ISO-8859-2");
124         commonMapper.put("hu", "ISO-8859-2");
125         commonMapper.put("is", "ISO-8859-1");
126         commonMapper.put("it", "ISO-8859-1");
127         commonMapper.put("iw", "ISO-8859-8");
128         commonMapper.put("ja", "Shift_JIS");
129         commonMapper.put("ko", "EUC-KR");
130         commonMapper.put("lt", "ISO-8859-2");
131         commonMapper.put("lv", "ISO-8859-2");
132         commonMapper.put("mk", "ISO-8859-5");
133         commonMapper.put("nl", "ISO-8859-1");
134         commonMapper.put("no", "ISO-8859-1");
135         commonMapper.put("pl", "ISO-8859-2");
136         commonMapper.put("pt", "ISO-8859-1");
137         commonMapper.put("ro", "ISO-8859-2");
138         commonMapper.put("ru", "ISO-8859-5");
139         commonMapper.put("sh", "ISO-8859-5");
140         commonMapper.put("sk", "ISO-8859-2");
141         commonMapper.put("sl", "ISO-8859-2");
142         commonMapper.put("sq", "ISO-8859-2");
143         commonMapper.put("sr", "ISO-8859-5");
144         commonMapper.put("sv", "ISO-8859-1");
145         commonMapper.put("tr", "ISO-8859-9");
146         commonMapper.put("uk", "ISO-8859-5");
147         commonMapper.put("zh", "GB2312");
148         commonMapper.put("zh_TW", "Big5");
149     }
150 
151     /***
152      * An array of available charset mappers.
153      */
154     private Map mappers[] = new Map[6];
155 
156     /***
157      * Loads mappings from a stream.
158      *
159      * @param input an input stream.
160      * @return the mappings.
161      * @throws IOException for an incorrect stream.
162      */
163     protected static Map loadStream(InputStream input)
164             throws IOException
165     {
166         Properties props = new Properties();
167         props.load(input);
168         return new HashMap(props);
169     }
170 
171     /***
172      * Loads mappings from a file.
173      *
174      * @param file a file.
175      * @return the mappings.
176      * @throws IOException for an incorrect file.
177      */
178     protected static Map loadFile(File file)
179             throws IOException
180     {
181         return loadStream(new FileInputStream(file));
182     }
183 
184     /***
185      * Loads mappings from a file path.
186      *
187      * @param path a file path.
188      * @return the mappings.
189      * @throws IOException for an incorrect file.
190      */
191     protected static Map loadPath(String path)
192             throws IOException
193     {
194         return loadFile(new File(path));
195     }
196 
197     /***
198      * Loads mappings from a resource.
199      *
200      * @param name a resource name.
201      * @return the mappings.
202      */
203     protected static Map loadResource(String name)
204     {
205         InputStream input = CharSetMap.class.getResourceAsStream(name);
206         if (input != null)
207         {
208             try
209             {
210                 return loadStream(input);
211             }
212             catch (IOException x)
213             {
214                 return null;
215             }
216         }
217         else
218         {
219             return null;
220         }
221     }
222 
223     /***
224      * Constructs a new charset map with default mappers.
225      */
226     public CharSetMap()
227     {
228         String path;
229         try
230         {
231             // Check whether the user directory contains mappings.
232             path = System.getProperty("user.home");
233             if (path != null)
234             {
235                 path = path + File.separator + CHARSET_RESOURCE;
236                 mappers[MAP_HOME] = loadPath(path);
237             }
238         }
239         catch (Exception x)
240         {
241         }
242 
243         try
244         {
245             // Check whether the system directory contains mappings.
246             path = System.getProperty("java.home") +
247                     File.separator + "lib" + File.separator + CHARSET_RESOURCE;
248             mappers[MAP_SYS] = loadPath(path);
249         }
250         catch (Exception x)
251         {
252         }
253 
254         // Check whether the current class jar contains mappings.
255         mappers[MAP_JAR] = loadResource("/META-INF/" + CHARSET_RESOURCE);
256 
257         // Set the common mapper to have the lowest priority.
258         mappers[MAP_COM] = commonMapper;
259 
260         // Set the cache mapper to have the highest priority.
261         mappers[MAP_CACHE] = new Hashtable();
262     }
263 
264     /***
265      * Contructs a charset map from properties.
266      *
267      * @param props charset mapping propeties.
268      */
269     public CharSetMap(Properties props)
270     {
271         this();
272         mappers[MAP_PROG] = new HashMap(props);
273     }
274 
275     /***
276      * Contructs a charset map read from a stream.
277      *
278      * @param input an input stream.
279      * @throws IOException for an incorrect stream.
280      */
281     public CharSetMap(InputStream input)
282             throws IOException
283     {
284         this();
285         mappers[MAP_PROG] = loadStream(input);
286     }
287 
288     /***
289      * Contructs a charset map read from a property file.
290      *
291      * @param file a property file.
292      * @throws IOException for an incorrect property file.
293      */
294     public CharSetMap(File file)
295             throws IOException
296     {
297         this();
298         mappers[MAP_PROG] = loadFile(file);
299     }
300 
301     /***
302      * Contructs a charset map read from a property file path.
303      *
304      * @param path a property file path.
305      * @throws IOException for an incorrect property file.
306      */
307     public CharSetMap(String path)
308             throws IOException
309     {
310         this();
311         mappers[MAP_PROG] = loadPath(path);
312     }
313 
314     /***
315      * Sets a locale-charset mapping.
316      *
317      * @param key the key for the charset.
318      * @param charset the corresponding charset.
319      */
320     public synchronized void setCharSet(String key,
321                                         String charset)
322     {
323         HashMap mapper = (HashMap) mappers[MAP_PROG];
324         mapper = mapper != null ?
325                 (HashMap) mapper.clone() : new HashMap();
326         mapper.put(key, charset);
327         mappers[MAP_PROG] = mapper;
328         mappers[MAP_CACHE].clear();
329     }
330 
331     /***
332      * Gets the charset for a locale. First a locale specific charset
333      * is searched for, then a country specific one and lastly a language
334      * specific one. If none is found, the default charset is returned.
335      *
336      * @param locale the locale.
337      * @return the charset.
338      */
339     public String getCharSet(Locale locale)
340     {
341         // Check the cache first.
342         String key = locale.toString();
343         if (key.length() == 0)
344         {
345             key = "__" + locale.getVariant();
346             if (key.length() == 2)
347             {
348                 return DEFAULT_CHARSET;
349             }
350         }
351         String charset = searchCharSet(key);
352         if (charset.length() == 0)
353         {
354             // Not found, perform a full search and update the cache.
355             String[] items = new String[3];
356             items[2] = locale.getVariant();
357             items[1] = locale.getCountry();
358             items[0] = locale.getLanguage();
359             charset = searchCharSet(items);
360             if (charset.length() == 0)
361             {
362                 charset = DEFAULT_CHARSET;
363             }
364             mappers[MAP_CACHE].put(key, charset);
365         }
366         return charset;
367     }
368 
369     /***
370      * Gets the charset for a locale with a variant. The search
371      * is performed in the following order:
372      * "lang"_"country"_"variant"="charset",
373      * _"counry"_"variant"="charset",
374      * "lang"__"variant"="charset",
375      * __"variant"="charset",
376      * "lang"_"country"="charset",
377      * _"country"="charset",
378      * "lang"="charset".
379      * If nothing of the above is found, the default charset is returned.
380      *
381      * @param locale the locale.
382      * @param variant a variant field.
383      * @return the charset.
384      */
385     public String getCharSet(Locale locale,
386                              String variant)
387     {
388         // Check the cache first.
389         if ((variant != null) &&
390                 (variant.length() > 0))
391         {
392             String key = locale.toString();
393             if (key.length() == 0)
394             {
395                 key = "__" + locale.getVariant();
396                 if (key.length() > 2)
397                 {
398                     key += '_' + variant;
399                 }
400                 else
401                 {
402                     key += variant;
403                 }
404             }
405             else if (locale.getCountry().length() == 0)
406             {
407                 key += "__" + variant;
408             }
409             else
410             {
411                 key += '_' + variant;
412             }
413             String charset = searchCharSet(key);
414             if (charset.length() == 0)
415             {
416                 // Not found, perform a full search and update the cache.
417                 String[] items = new String[4];
418                 items[3] = variant;
419                 items[2] = locale.getVariant();
420                 items[1] = locale.getCountry();
421                 items[0] = locale.getLanguage();
422                 charset = searchCharSet(items);
423                 if (charset.length() == 0)
424                 {
425                     charset = DEFAULT_CHARSET;
426                 }
427                 mappers[MAP_CACHE].put(key, charset);
428             }
429             return charset;
430         }
431         else
432         {
433             return getCharSet(locale);
434         }
435     }
436 
437     /***
438      * Gets the charset for a specified key.
439      *
440      * @param key the key for the charset.
441      * @return the found charset or the default one.
442      */
443     public String getCharSet(String key)
444     {
445         String charset = searchCharSet(key);
446         return charset.length() > 0 ? charset : DEFAULT_CHARSET;
447     }
448 
449     /***
450      * Gets the charset for a specified key.
451      *
452      * @param key the key for the charset.
453      * @param def the default charset if none is found.
454      * @return the found charset or the given default.
455      */
456     public String getCharSet(String key,
457                              String def)
458     {
459         String charset = searchCharSet(key);
460         return charset.length() > 0 ? charset : def;
461     }
462 
463     /***
464      * Searches for a charset for a specified locale.
465      *
466      * @param items an array of locale items.
467      * @return the found charset or an empty string.
468      */
469     private String searchCharSet(String[] items)
470     {
471         String charset;
472         StringBuffer sb = new StringBuffer();
473         for (int i = items.length; i > 0; i--)
474         {
475             charset = searchCharSet(items, sb, i);
476             if (charset.length() > 0)
477             {
478                 return charset;
479             }
480             sb.setLength(0);
481         }
482         return "";
483     }
484 
485     /***
486      * Searches recursively for a charset for a specified locale.
487      *
488      * @param items an array of locale items.
489      * @param base a buffer of base items.
490      * @param count the number of items to go through.
491      * @return the found charset or an empty string.
492      */
493     private String searchCharSet(String[] items,
494                                  StringBuffer base,
495                                  int count)
496     {
497         if ((--count >= 0) &&
498                 (items[count] != null) &&
499                 (items[count].length() > 0))
500         {
501             String charset;
502             base.insert(0, items[count]);
503             int length = base.length();
504             for (int i = count; i > 0; i--)
505             {
506                 if ((i == count) ||
507                         (i <= 1))
508                 {
509                     base.insert(0, '_');
510                     length++;
511                 }
512                 charset = searchCharSet(items, base, i);
513                 if (charset.length() > 0)
514                 {
515                     return charset;
516                 }
517                 base.delete(0, base.length() - length);
518             }
519             return searchCharSet(base.toString());
520         }
521         else
522         {
523             return "";
524         }
525     }
526 
527     /***
528      * Searches for a charset for a specified key.
529      *
530      * @param key the key for the charset.
531      * @return the found charset or an empty string.
532      */
533     private String searchCharSet(String key)
534     {
535         if ((key != null) &&
536                 (key.length() > 0))
537         {
538             // Go through mappers.
539             Map mapper;
540             String charset;
541             for (int i = 0; i < mappers.length; i++)
542             {
543                 mapper = mappers[i];
544                 if (mapper != null)
545                 {
546                     charset = (String) mapper.get(key);
547                     if (charset != null)
548                     {
549                         // Update the cache.
550                         if (i > MAP_CACHE)
551                         {
552                             mappers[MAP_CACHE].put(key, charset);
553                         }
554                         return charset;
555                     }
556                 }
557             }
558 
559             // Not found, add an empty string to the cache.
560             mappers[MAP_CACHE].put(key, "");
561         }
562         return "";
563     }
564 
565     /***
566      * Sets a common locale-charset mapping.
567      *
568      * @param key the key for the charset.
569      * @param charset the corresponding charset.
570      */
571     protected synchronized void setCommonCharSet(String key,
572                                                  String charset)
573     {
574         HashMap mapper = (HashMap) ((HashMap) mappers[MAP_COM]).clone();
575         mapper.put(key, charset);
576         mappers[MAP_COM] = mapper;
577         mappers[MAP_CACHE].clear();
578     }
579 }