1 package org.apache.turbine.services.mimetype.util;
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22 import java.io.File;
23 import java.io.FileInputStream;
24 import java.io.IOException;
25 import java.io.InputStream;
26
27 import java.util.HashMap;
28 import java.util.Hashtable;
29 import java.util.Locale;
30 import java.util.Map;
31 import java.util.Properties;
32
33 /***
34 * This class maintains a set of mappers defining mappings
35 * between locales and the corresponding charsets. The mappings
36 * are defined as properties between locale and charset names.
37 * The definitions can be listed in property files located in user's
38 * home directory, Java home directory or the current class jar.
39 * In addition, this class maintains static default mappings
40 * and constructors support application specific mappings.
41 *
42 * @author <a href="mailto:ilkka.priha@simsoft.fi">Ilkka Priha</a>
43 * @version $Id: CharSetMap.java 534527 2007-05-02 16:10:59Z tv $
44 */
45 public class CharSetMap
46 {
47 /***
48 * The default charset when nothing else is applicable.
49 */
50 public static final String DEFAULT_CHARSET = "ISO-8859-1";
51
52 /***
53 * The name for charset mapper resources.
54 */
55 public static final String CHARSET_RESOURCE = "charset.properties";
56
57 /***
58 * Priorities of available mappers.
59 */
60 private static final int MAP_CACHE = 0;
61 private static final int MAP_PROG = 1;
62 private static final int MAP_HOME = 2;
63 private static final int MAP_SYS = 3;
64 private static final int MAP_JAR = 4;
65 private static final int MAP_COM = 5;
66
67 /***
68 * A common charset mapper for languages.
69 */
70 private static HashMap commonMapper = new HashMap();
71
72 static
73 {
74 commonMapper.put("ar", "ISO-8859-6");
75 commonMapper.put("be", "ISO-8859-5");
76 commonMapper.put("bg", "ISO-8859-5");
77 commonMapper.put("ca", "ISO-8859-1");
78 commonMapper.put("cs", "ISO-8859-2");
79 commonMapper.put("da", "ISO-8859-1");
80 commonMapper.put("de", "ISO-8859-1");
81 commonMapper.put("el", "ISO-8859-7");
82 commonMapper.put("en", "ISO-8859-1");
83 commonMapper.put("es", "ISO-8859-1");
84 commonMapper.put("et", "ISO-8859-1");
85 commonMapper.put("fi", "ISO-8859-1");
86 commonMapper.put("fr", "ISO-8859-1");
87 commonMapper.put("hr", "ISO-8859-2");
88 commonMapper.put("hu", "ISO-8859-2");
89 commonMapper.put("is", "ISO-8859-1");
90 commonMapper.put("it", "ISO-8859-1");
91 commonMapper.put("iw", "ISO-8859-8");
92 commonMapper.put("ja", "Shift_JIS");
93 commonMapper.put("ko", "EUC-KR");
94 commonMapper.put("lt", "ISO-8859-2");
95 commonMapper.put("lv", "ISO-8859-2");
96 commonMapper.put("mk", "ISO-8859-5");
97 commonMapper.put("nl", "ISO-8859-1");
98 commonMapper.put("no", "ISO-8859-1");
99 commonMapper.put("pl", "ISO-8859-2");
100 commonMapper.put("pt", "ISO-8859-1");
101 commonMapper.put("ro", "ISO-8859-2");
102 commonMapper.put("ru", "ISO-8859-5");
103 commonMapper.put("sh", "ISO-8859-5");
104 commonMapper.put("sk", "ISO-8859-2");
105 commonMapper.put("sl", "ISO-8859-2");
106 commonMapper.put("sq", "ISO-8859-2");
107 commonMapper.put("sr", "ISO-8859-5");
108 commonMapper.put("sv", "ISO-8859-1");
109 commonMapper.put("tr", "ISO-8859-9");
110 commonMapper.put("uk", "ISO-8859-5");
111 commonMapper.put("zh", "GB2312");
112 commonMapper.put("zh_TW", "Big5");
113 }
114
115 /***
116 * An array of available charset mappers.
117 */
118 private Map mappers[] = new Map[6];
119
120 /***
121 * Loads mappings from a stream.
122 *
123 * @param input an input stream.
124 * @return the mappings.
125 * @throws IOException for an incorrect stream.
126 */
127 protected static Map loadStream(InputStream input)
128 throws IOException
129 {
130 Properties props = new Properties();
131 props.load(input);
132 return new HashMap(props);
133 }
134
135 /***
136 * Loads mappings from a file.
137 *
138 * @param file a file.
139 * @return the mappings.
140 * @throws IOException for an incorrect file.
141 */
142 protected static Map loadFile(File file)
143 throws IOException
144 {
145 return loadStream(new FileInputStream(file));
146 }
147
148 /***
149 * Loads mappings from a file path.
150 *
151 * @param path a file path.
152 * @return the mappings.
153 * @throws IOException for an incorrect file.
154 */
155 protected static Map loadPath(String path)
156 throws IOException
157 {
158 return loadFile(new File(path));
159 }
160
161 /***
162 * Loads mappings from a resource.
163 *
164 * @param name a resource name.
165 * @return the mappings.
166 */
167 protected static Map loadResource(String name)
168 {
169 InputStream input = CharSetMap.class.getResourceAsStream(name);
170 if (input != null)
171 {
172 try
173 {
174 return loadStream(input);
175 }
176 catch (IOException x)
177 {
178 return null;
179 }
180 }
181 else
182 {
183 return null;
184 }
185 }
186
187 /***
188 * Constructs a new charset map with default mappers.
189 */
190 public CharSetMap()
191 {
192 String path;
193 try
194 {
195
196 path = System.getProperty("user.home");
197 if (path != null)
198 {
199 path = path + File.separator + CHARSET_RESOURCE;
200 mappers[MAP_HOME] = loadPath(path);
201 }
202 }
203 catch (Exception x)
204 {
205 }
206
207 try
208 {
209
210 path = System.getProperty("java.home") +
211 File.separator + "lib" + File.separator + CHARSET_RESOURCE;
212 mappers[MAP_SYS] = loadPath(path);
213 }
214 catch (Exception x)
215 {
216 }
217
218
219 mappers[MAP_JAR] = loadResource("/META-INF/" + CHARSET_RESOURCE);
220
221
222 mappers[MAP_COM] = commonMapper;
223
224
225 mappers[MAP_CACHE] = new Hashtable();
226 }
227
228 /***
229 * Contructs a charset map from properties.
230 *
231 * @param props charset mapping propeties.
232 */
233 public CharSetMap(Properties props)
234 {
235 this();
236 mappers[MAP_PROG] = new HashMap(props);
237 }
238
239 /***
240 * Contructs a charset map read from a stream.
241 *
242 * @param input an input stream.
243 * @throws IOException for an incorrect stream.
244 */
245 public CharSetMap(InputStream input)
246 throws IOException
247 {
248 this();
249 mappers[MAP_PROG] = loadStream(input);
250 }
251
252 /***
253 * Contructs a charset map read from a property file.
254 *
255 * @param file a property file.
256 * @throws IOException for an incorrect property file.
257 */
258 public CharSetMap(File file)
259 throws IOException
260 {
261 this();
262 mappers[MAP_PROG] = loadFile(file);
263 }
264
265 /***
266 * Contructs a charset map read from a property file path.
267 *
268 * @param path a property file path.
269 * @throws IOException for an incorrect property file.
270 */
271 public CharSetMap(String path)
272 throws IOException
273 {
274 this();
275 mappers[MAP_PROG] = loadPath(path);
276 }
277
278 /***
279 * Sets a locale-charset mapping.
280 *
281 * @param key the key for the charset.
282 * @param charset the corresponding charset.
283 */
284 public synchronized void setCharSet(String key,
285 String charset)
286 {
287 HashMap mapper = (HashMap) mappers[MAP_PROG];
288 mapper = mapper != null ?
289 (HashMap) mapper.clone() : new HashMap();
290 mapper.put(key, charset);
291 mappers[MAP_PROG] = mapper;
292 mappers[MAP_CACHE].clear();
293 }
294
295 /***
296 * Gets the charset for a locale. First a locale specific charset
297 * is searched for, then a country specific one and lastly a language
298 * specific one. If none is found, the default charset is returned.
299 *
300 * @param locale the locale.
301 * @return the charset.
302 */
303 public String getCharSet(Locale locale)
304 {
305
306 String key = locale.toString();
307 if (key.length() == 0)
308 {
309 key = "__" + locale.getVariant();
310 if (key.length() == 2)
311 {
312 return DEFAULT_CHARSET;
313 }
314 }
315 String charset = searchCharSet(key);
316 if (charset.length() == 0)
317 {
318
319 String[] items = new String[3];
320 items[2] = locale.getVariant();
321 items[1] = locale.getCountry();
322 items[0] = locale.getLanguage();
323 charset = searchCharSet(items);
324 if (charset.length() == 0)
325 {
326 charset = DEFAULT_CHARSET;
327 }
328 mappers[MAP_CACHE].put(key, charset);
329 }
330 return charset;
331 }
332
333 /***
334 * Gets the charset for a locale with a variant. The search
335 * is performed in the following order:
336 * "lang"_"country"_"variant"="charset",
337 * _"counry"_"variant"="charset",
338 * "lang"__"variant"="charset",
339 * __"variant"="charset",
340 * "lang"_"country"="charset",
341 * _"country"="charset",
342 * "lang"="charset".
343 * If nothing of the above is found, the default charset is returned.
344 *
345 * @param locale the locale.
346 * @param variant a variant field.
347 * @return the charset.
348 */
349 public String getCharSet(Locale locale,
350 String variant)
351 {
352
353 if ((variant != null) &&
354 (variant.length() > 0))
355 {
356 String key = locale.toString();
357 if (key.length() == 0)
358 {
359 key = "__" + locale.getVariant();
360 if (key.length() > 2)
361 {
362 key += '_' + variant;
363 }
364 else
365 {
366 key += variant;
367 }
368 }
369 else if (locale.getCountry().length() == 0)
370 {
371 key += "__" + variant;
372 }
373 else
374 {
375 key += '_' + variant;
376 }
377 String charset = searchCharSet(key);
378 if (charset.length() == 0)
379 {
380
381 String[] items = new String[4];
382 items[3] = variant;
383 items[2] = locale.getVariant();
384 items[1] = locale.getCountry();
385 items[0] = locale.getLanguage();
386 charset = searchCharSet(items);
387 if (charset.length() == 0)
388 {
389 charset = DEFAULT_CHARSET;
390 }
391 mappers[MAP_CACHE].put(key, charset);
392 }
393 return charset;
394 }
395 else
396 {
397 return getCharSet(locale);
398 }
399 }
400
401 /***
402 * Gets the charset for a specified key.
403 *
404 * @param key the key for the charset.
405 * @return the found charset or the default one.
406 */
407 public String getCharSet(String key)
408 {
409 String charset = searchCharSet(key);
410 return charset.length() > 0 ? charset : DEFAULT_CHARSET;
411 }
412
413 /***
414 * Gets the charset for a specified key.
415 *
416 * @param key the key for the charset.
417 * @param def the default charset if none is found.
418 * @return the found charset or the given default.
419 */
420 public String getCharSet(String key,
421 String def)
422 {
423 String charset = searchCharSet(key);
424 return charset.length() > 0 ? charset : def;
425 }
426
427 /***
428 * Searches for a charset for a specified locale.
429 *
430 * @param items an array of locale items.
431 * @return the found charset or an empty string.
432 */
433 private String searchCharSet(String[] items)
434 {
435 String charset;
436 StringBuffer sb = new StringBuffer();
437 for (int i = items.length; i > 0; i--)
438 {
439 charset = searchCharSet(items, sb, i);
440 if (charset.length() > 0)
441 {
442 return charset;
443 }
444 sb.setLength(0);
445 }
446 return "";
447 }
448
449 /***
450 * Searches recursively for a charset for a specified locale.
451 *
452 * @param items an array of locale items.
453 * @param base a buffer of base items.
454 * @param count the number of items to go through.
455 * @return the found charset or an empty string.
456 */
457 private String searchCharSet(String[] items,
458 StringBuffer base,
459 int count)
460 {
461 if ((--count >= 0) &&
462 (items[count] != null) &&
463 (items[count].length() > 0))
464 {
465 String charset;
466 base.insert(0, items[count]);
467 int length = base.length();
468 for (int i = count; i > 0; i--)
469 {
470 if ((i == count) ||
471 (i <= 1))
472 {
473 base.insert(0, '_');
474 length++;
475 }
476 charset = searchCharSet(items, base, i);
477 if (charset.length() > 0)
478 {
479 return charset;
480 }
481 base.delete(0, base.length() - length);
482 }
483 return searchCharSet(base.toString());
484 }
485 else
486 {
487 return "";
488 }
489 }
490
491 /***
492 * Searches for a charset for a specified key.
493 *
494 * @param key the key for the charset.
495 * @return the found charset or an empty string.
496 */
497 private String searchCharSet(String key)
498 {
499 if ((key != null) &&
500 (key.length() > 0))
501 {
502
503 Map mapper;
504 String charset;
505 for (int i = 0; i < mappers.length; i++)
506 {
507 mapper = mappers[i];
508 if (mapper != null)
509 {
510 charset = (String) mapper.get(key);
511 if (charset != null)
512 {
513
514 if (i > MAP_CACHE)
515 {
516 mappers[MAP_CACHE].put(key, charset);
517 }
518 return charset;
519 }
520 }
521 }
522
523
524 mappers[MAP_CACHE].put(key, "");
525 }
526 return "";
527 }
528
529 /***
530 * Sets a common locale-charset mapping.
531 *
532 * @param key the key for the charset.
533 * @param charset the corresponding charset.
534 */
535 protected synchronized void setCommonCharSet(String key,
536 String charset)
537 {
538 HashMap mapper = (HashMap) ((HashMap) mappers[MAP_COM]).clone();
539 mapper.put(key, charset);
540 mappers[MAP_COM] = mapper;
541 mappers[MAP_CACHE].clear();
542 }
543 }