View Javadoc

1   package org.apache.turbine.services.mimetype.util;
2   
3   /*
4    * Licensed to the Apache Software Foundation (ASF) under one
5    * or more contributor license agreements.  See the NOTICE file
6    * distributed with this work for additional information
7    * regarding copyright ownership.  The ASF licenses this file
8    * to you under the Apache License, Version 2.0 (the
9    * "License"); you may not use this file except in compliance
10   * with the License.  You may obtain a copy of the License at
11   *
12   *   http://www.apache.org/licenses/LICENSE-2.0
13   *
14   * Unless required by applicable law or agreed to in writing,
15   * software distributed under the License is distributed on an
16   * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
17   * KIND, either express or implied.  See the License for the
18   * specific language governing permissions and limitations
19   * under the License.
20   */
21  
22  import java.io.File;
23  import java.io.FileInputStream;
24  import java.io.IOException;
25  import java.io.InputStream;
26  
27  import java.util.HashMap;
28  import java.util.Hashtable;
29  import java.util.Locale;
30  import java.util.Map;
31  import java.util.Properties;
32  
33  /***
34   * This class maintains a set of mappers defining mappings
35   * between locales and the corresponding charsets. The mappings
36   * are defined as properties between locale and charset names.
37   * The definitions can be listed in property files located in user's
38   * home directory, Java home directory or the current class jar.
39   * In addition, this class maintains static default mappings
40   * and constructors support application specific mappings.
41   *
42   * @author <a href="mailto:ilkka.priha@simsoft.fi">Ilkka Priha</a>
43   * @version $Id: CharSetMap.java 534527 2007-05-02 16:10:59Z tv $
44   */
45  public class CharSetMap
46  {
47      /***
48       * The default charset when nothing else is applicable.
49       */
50      public static final String DEFAULT_CHARSET = "ISO-8859-1";
51  
52      /***
53       * The name for charset mapper resources.
54       */
55      public static final String CHARSET_RESOURCE = "charset.properties";
56  
57      /***
58       * Priorities of available mappers.
59       */
60      private static final int MAP_CACHE = 0;
61      private static final int MAP_PROG = 1;
62      private static final int MAP_HOME = 2;
63      private static final int MAP_SYS = 3;
64      private static final int MAP_JAR = 4;
65      private static final int MAP_COM = 5;
66  
67      /***
68       * A common charset mapper for languages.
69       */
70      private static HashMap commonMapper = new HashMap();
71  
72      static
73      {
74          commonMapper.put("ar", "ISO-8859-6");
75          commonMapper.put("be", "ISO-8859-5");
76          commonMapper.put("bg", "ISO-8859-5");
77          commonMapper.put("ca", "ISO-8859-1");
78          commonMapper.put("cs", "ISO-8859-2");
79          commonMapper.put("da", "ISO-8859-1");
80          commonMapper.put("de", "ISO-8859-1");
81          commonMapper.put("el", "ISO-8859-7");
82          commonMapper.put("en", "ISO-8859-1");
83          commonMapper.put("es", "ISO-8859-1");
84          commonMapper.put("et", "ISO-8859-1");
85          commonMapper.put("fi", "ISO-8859-1");
86          commonMapper.put("fr", "ISO-8859-1");
87          commonMapper.put("hr", "ISO-8859-2");
88          commonMapper.put("hu", "ISO-8859-2");
89          commonMapper.put("is", "ISO-8859-1");
90          commonMapper.put("it", "ISO-8859-1");
91          commonMapper.put("iw", "ISO-8859-8");
92          commonMapper.put("ja", "Shift_JIS");
93          commonMapper.put("ko", "EUC-KR");
94          commonMapper.put("lt", "ISO-8859-2");
95          commonMapper.put("lv", "ISO-8859-2");
96          commonMapper.put("mk", "ISO-8859-5");
97          commonMapper.put("nl", "ISO-8859-1");
98          commonMapper.put("no", "ISO-8859-1");
99          commonMapper.put("pl", "ISO-8859-2");
100         commonMapper.put("pt", "ISO-8859-1");
101         commonMapper.put("ro", "ISO-8859-2");
102         commonMapper.put("ru", "ISO-8859-5");
103         commonMapper.put("sh", "ISO-8859-5");
104         commonMapper.put("sk", "ISO-8859-2");
105         commonMapper.put("sl", "ISO-8859-2");
106         commonMapper.put("sq", "ISO-8859-2");
107         commonMapper.put("sr", "ISO-8859-5");
108         commonMapper.put("sv", "ISO-8859-1");
109         commonMapper.put("tr", "ISO-8859-9");
110         commonMapper.put("uk", "ISO-8859-5");
111         commonMapper.put("zh", "GB2312");
112         commonMapper.put("zh_TW", "Big5");
113     }
114 
115     /***
116      * An array of available charset mappers.
117      */
118     private Map mappers[] = new Map[6];
119 
120     /***
121      * Loads mappings from a stream.
122      *
123      * @param input an input stream.
124      * @return the mappings.
125      * @throws IOException for an incorrect stream.
126      */
127     protected static Map loadStream(InputStream input)
128             throws IOException
129     {
130         Properties props = new Properties();
131         props.load(input);
132         return new HashMap(props);
133     }
134 
135     /***
136      * Loads mappings from a file.
137      *
138      * @param file a file.
139      * @return the mappings.
140      * @throws IOException for an incorrect file.
141      */
142     protected static Map loadFile(File file)
143             throws IOException
144     {
145         return loadStream(new FileInputStream(file));
146     }
147 
148     /***
149      * Loads mappings from a file path.
150      *
151      * @param path a file path.
152      * @return the mappings.
153      * @throws IOException for an incorrect file.
154      */
155     protected static Map loadPath(String path)
156             throws IOException
157     {
158         return loadFile(new File(path));
159     }
160 
161     /***
162      * Loads mappings from a resource.
163      *
164      * @param name a resource name.
165      * @return the mappings.
166      */
167     protected static Map loadResource(String name)
168     {
169         InputStream input = CharSetMap.class.getResourceAsStream(name);
170         if (input != null)
171         {
172             try
173             {
174                 return loadStream(input);
175             }
176             catch (IOException x)
177             {
178                 return null;
179             }
180         }
181         else
182         {
183             return null;
184         }
185     }
186 
187     /***
188      * Constructs a new charset map with default mappers.
189      */
190     public CharSetMap()
191     {
192         String path;
193         try
194         {
195             // Check whether the user directory contains mappings.
196             path = System.getProperty("user.home");
197             if (path != null)
198             {
199                 path = path + File.separator + CHARSET_RESOURCE;
200                 mappers[MAP_HOME] = loadPath(path);
201             }
202         }
203         catch (Exception x)
204         {
205         }
206 
207         try
208         {
209             // Check whether the system directory contains mappings.
210             path = System.getProperty("java.home") +
211                     File.separator + "lib" + File.separator + CHARSET_RESOURCE;
212             mappers[MAP_SYS] = loadPath(path);
213         }
214         catch (Exception x)
215         {
216         }
217 
218         // Check whether the current class jar contains mappings.
219         mappers[MAP_JAR] = loadResource("/META-INF/" + CHARSET_RESOURCE);
220 
221         // Set the common mapper to have the lowest priority.
222         mappers[MAP_COM] = commonMapper;
223 
224         // Set the cache mapper to have the highest priority.
225         mappers[MAP_CACHE] = new Hashtable();
226     }
227 
228     /***
229      * Contructs a charset map from properties.
230      *
231      * @param props charset mapping propeties.
232      */
233     public CharSetMap(Properties props)
234     {
235         this();
236         mappers[MAP_PROG] = new HashMap(props);
237     }
238 
239     /***
240      * Contructs a charset map read from a stream.
241      *
242      * @param input an input stream.
243      * @throws IOException for an incorrect stream.
244      */
245     public CharSetMap(InputStream input)
246             throws IOException
247     {
248         this();
249         mappers[MAP_PROG] = loadStream(input);
250     }
251 
252     /***
253      * Contructs a charset map read from a property file.
254      *
255      * @param file a property file.
256      * @throws IOException for an incorrect property file.
257      */
258     public CharSetMap(File file)
259             throws IOException
260     {
261         this();
262         mappers[MAP_PROG] = loadFile(file);
263     }
264 
265     /***
266      * Contructs a charset map read from a property file path.
267      *
268      * @param path a property file path.
269      * @throws IOException for an incorrect property file.
270      */
271     public CharSetMap(String path)
272             throws IOException
273     {
274         this();
275         mappers[MAP_PROG] = loadPath(path);
276     }
277 
278     /***
279      * Sets a locale-charset mapping.
280      *
281      * @param key the key for the charset.
282      * @param charset the corresponding charset.
283      */
284     public synchronized void setCharSet(String key,
285                                         String charset)
286     {
287         HashMap mapper = (HashMap) mappers[MAP_PROG];
288         mapper = mapper != null ?
289                 (HashMap) mapper.clone() : new HashMap();
290         mapper.put(key, charset);
291         mappers[MAP_PROG] = mapper;
292         mappers[MAP_CACHE].clear();
293     }
294 
295     /***
296      * Gets the charset for a locale. First a locale specific charset
297      * is searched for, then a country specific one and lastly a language
298      * specific one. If none is found, the default charset is returned.
299      *
300      * @param locale the locale.
301      * @return the charset.
302      */
303     public String getCharSet(Locale locale)
304     {
305         // Check the cache first.
306         String key = locale.toString();
307         if (key.length() == 0)
308         {
309             key = "__" + locale.getVariant();
310             if (key.length() == 2)
311             {
312                 return DEFAULT_CHARSET;
313             }
314         }
315         String charset = searchCharSet(key);
316         if (charset.length() == 0)
317         {
318             // Not found, perform a full search and update the cache.
319             String[] items = new String[3];
320             items[2] = locale.getVariant();
321             items[1] = locale.getCountry();
322             items[0] = locale.getLanguage();
323             charset = searchCharSet(items);
324             if (charset.length() == 0)
325             {
326                 charset = DEFAULT_CHARSET;
327             }
328             mappers[MAP_CACHE].put(key, charset);
329         }
330         return charset;
331     }
332 
333     /***
334      * Gets the charset for a locale with a variant. The search
335      * is performed in the following order:
336      * "lang"_"country"_"variant"="charset",
337      * _"counry"_"variant"="charset",
338      * "lang"__"variant"="charset",
339      * __"variant"="charset",
340      * "lang"_"country"="charset",
341      * _"country"="charset",
342      * "lang"="charset".
343      * If nothing of the above is found, the default charset is returned.
344      *
345      * @param locale the locale.
346      * @param variant a variant field.
347      * @return the charset.
348      */
349     public String getCharSet(Locale locale,
350                              String variant)
351     {
352         // Check the cache first.
353         if ((variant != null) &&
354                 (variant.length() > 0))
355         {
356             String key = locale.toString();
357             if (key.length() == 0)
358             {
359                 key = "__" + locale.getVariant();
360                 if (key.length() > 2)
361                 {
362                     key += '_' + variant;
363                 }
364                 else
365                 {
366                     key += variant;
367                 }
368             }
369             else if (locale.getCountry().length() == 0)
370             {
371                 key += "__" + variant;
372             }
373             else
374             {
375                 key += '_' + variant;
376             }
377             String charset = searchCharSet(key);
378             if (charset.length() == 0)
379             {
380                 // Not found, perform a full search and update the cache.
381                 String[] items = new String[4];
382                 items[3] = variant;
383                 items[2] = locale.getVariant();
384                 items[1] = locale.getCountry();
385                 items[0] = locale.getLanguage();
386                 charset = searchCharSet(items);
387                 if (charset.length() == 0)
388                 {
389                     charset = DEFAULT_CHARSET;
390                 }
391                 mappers[MAP_CACHE].put(key, charset);
392             }
393             return charset;
394         }
395         else
396         {
397             return getCharSet(locale);
398         }
399     }
400 
401     /***
402      * Gets the charset for a specified key.
403      *
404      * @param key the key for the charset.
405      * @return the found charset or the default one.
406      */
407     public String getCharSet(String key)
408     {
409         String charset = searchCharSet(key);
410         return charset.length() > 0 ? charset : DEFAULT_CHARSET;
411     }
412 
413     /***
414      * Gets the charset for a specified key.
415      *
416      * @param key the key for the charset.
417      * @param def the default charset if none is found.
418      * @return the found charset or the given default.
419      */
420     public String getCharSet(String key,
421                              String def)
422     {
423         String charset = searchCharSet(key);
424         return charset.length() > 0 ? charset : def;
425     }
426 
427     /***
428      * Searches for a charset for a specified locale.
429      *
430      * @param items an array of locale items.
431      * @return the found charset or an empty string.
432      */
433     private String searchCharSet(String[] items)
434     {
435         String charset;
436         StringBuffer sb = new StringBuffer();
437         for (int i = items.length; i > 0; i--)
438         {
439             charset = searchCharSet(items, sb, i);
440             if (charset.length() > 0)
441             {
442                 return charset;
443             }
444             sb.setLength(0);
445         }
446         return "";
447     }
448 
449     /***
450      * Searches recursively for a charset for a specified locale.
451      *
452      * @param items an array of locale items.
453      * @param base a buffer of base items.
454      * @param count the number of items to go through.
455      * @return the found charset or an empty string.
456      */
457     private String searchCharSet(String[] items,
458                                  StringBuffer base,
459                                  int count)
460     {
461         if ((--count >= 0) &&
462                 (items[count] != null) &&
463                 (items[count].length() > 0))
464         {
465             String charset;
466             base.insert(0, items[count]);
467             int length = base.length();
468             for (int i = count; i > 0; i--)
469             {
470                 if ((i == count) ||
471                         (i <= 1))
472                 {
473                     base.insert(0, '_');
474                     length++;
475                 }
476                 charset = searchCharSet(items, base, i);
477                 if (charset.length() > 0)
478                 {
479                     return charset;
480                 }
481                 base.delete(0, base.length() - length);
482             }
483             return searchCharSet(base.toString());
484         }
485         else
486         {
487             return "";
488         }
489     }
490 
491     /***
492      * Searches for a charset for a specified key.
493      *
494      * @param key the key for the charset.
495      * @return the found charset or an empty string.
496      */
497     private String searchCharSet(String key)
498     {
499         if ((key != null) &&
500                 (key.length() > 0))
501         {
502             // Go through mappers.
503             Map mapper;
504             String charset;
505             for (int i = 0; i < mappers.length; i++)
506             {
507                 mapper = mappers[i];
508                 if (mapper != null)
509                 {
510                     charset = (String) mapper.get(key);
511                     if (charset != null)
512                     {
513                         // Update the cache.
514                         if (i > MAP_CACHE)
515                         {
516                             mappers[MAP_CACHE].put(key, charset);
517                         }
518                         return charset;
519                     }
520                 }
521             }
522 
523             // Not found, add an empty string to the cache.
524             mappers[MAP_CACHE].put(key, "");
525         }
526         return "";
527     }
528 
529     /***
530      * Sets a common locale-charset mapping.
531      *
532      * @param key the key for the charset.
533      * @param charset the corresponding charset.
534      */
535     protected synchronized void setCommonCharSet(String key,
536                                                  String charset)
537     {
538         HashMap mapper = (HashMap) ((HashMap) mappers[MAP_COM]).clone();
539         mapper.put(key, charset);
540         mappers[MAP_COM] = mapper;
541         mappers[MAP_CACHE].clear();
542     }
543 }