View Javadoc

1   /*
2    * $Header: /home/jerenkrantz/tmp/commons/commons-convert/cvs/home/cvs/jakarta-commons//httpclient/src/java/org/apache/commons/httpclient/URI.java,v 1.47 2004/05/13 04:03:25 mbecke Exp $
3    * $Revision: 179784 $
4    * $Date: 2005-06-03 09:18:34 -0400 (Fri, 03 Jun 2005) $
5    *
6    * ====================================================================
7    *
8    *  Copyright 2002-2004 The Apache Software Foundation
9    *
10   *  Licensed under the Apache License, Version 2.0 (the "License");
11   *  you may not use this file except in compliance with the License.
12   *  You may obtain a copy of the License at
13   *
14   *      http://www.apache.org/licenses/LICENSE-2.0
15   *
16   *  Unless required by applicable law or agreed to in writing, software
17   *  distributed under the License is distributed on an "AS IS" BASIS,
18   *  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
19   *  See the License for the specific language governing permissions and
20   *  limitations under the License.
21   * ====================================================================
22   *
23   * This software consists of voluntary contributions made by many
24   * individuals on behalf of the Apache Software Foundation.  For more
25   * information on the Apache Software Foundation, please see
26   * <http://www.apache.org/>.
27   *
28   */
29  
30  package org.apache.commons.httpclient;
31  
32  import java.io.IOException;
33  import java.io.ObjectInputStream;
34  import java.io.ObjectOutputStream;
35  import java.io.Serializable;
36  import java.util.Locale;
37  import java.util.BitSet;
38  import java.util.Hashtable;
39  
40  import org.apache.commons.codec.DecoderException;
41  import org.apache.commons.codec.net.URLCodec;
42  import org.apache.commons.httpclient.util.EncodingUtil;
43  
44  /***
45   * The interface for the URI(Uniform Resource Identifiers) version of RFC 2396.
46   * This class has the purpose of supportting of parsing a URI reference to
47   * extend any specific protocols, the character encoding of the protocol to 
48   * be transported and the charset of the document.
49   * <p>
50   * A URI is always in an "escaped" form, since escaping or unescaping a
51   * completed URI might change its semantics.  
52   * <p>
53   * Implementers should be careful not to escape or unescape the same string
54   * more than once, since unescaping an already unescaped string might lead to
55   * misinterpreting a percent data character as another escaped character,
56   * or vice versa in the case of escaping an already escaped string.
57   * <p>
58   * In order to avoid these problems, data types used as follows:
59   * <p><blockquote><pre>
60   *   URI character sequence: char
61   *   octet sequence: byte
62   *   original character sequence: String
63   * </pre></blockquote><p>
64   *
65   * So, a URI is a sequence of characters as an array of a char type, which
66   * is not always represented as a sequence of octets as an array of byte.
67   * <p>
68   * 
69   * URI Syntactic Components
70   * <p><blockquote><pre>
71   * - In general, written as follows:
72   *   Absolute URI = &lt;scheme&gt:&lt;scheme-specific-part&gt;
73   *   Generic URI = &lt;scheme&gt;://&lt;authority&gt;&lt;path&gt;?&lt;query&gt;
74   *
75   * - Syntax
76   *   absoluteURI   = scheme ":" ( hier_part | opaque_part )
77   *   hier_part     = ( net_path | abs_path ) [ "?" query ]
78   *   net_path      = "//" authority [ abs_path ]
79   *   abs_path      = "/"  path_segments
80   * </pre></blockquote><p>
81   *
82   * The following examples illustrate URI that are in common use.
83   * <pre>
84   * ftp://ftp.is.co.za/rfc/rfc1808.txt
85   *    -- ftp scheme for File Transfer Protocol services
86   * gopher://spinaltap.micro.umn.edu/00/Weather/California/Los%20Angeles
87   *    -- gopher scheme for Gopher and Gopher+ Protocol services
88   * http://www.math.uio.no/faq/compression-faq/part1.html
89   *    -- http scheme for Hypertext Transfer Protocol services
90   * mailto:mduerst@ifi.unizh.ch
91   *    -- mailto scheme for electronic mail addresses
92   * news:comp.infosystems.www.servers.unix
93   *    -- news scheme for USENET news groups and articles
94   * telnet://melvyl.ucop.edu/
95   *    -- telnet scheme for interactive services via the TELNET Protocol
96   * </pre>
97   * Please, notice that there are many modifications from URL(RFC 1738) and
98   * relative URL(RFC 1808).
99   * <p>
100  * <b>The expressions for a URI</b>
101  * <p><pre>
102  * For escaped URI forms
103  *  - URI(char[]) // constructor
104  *  - char[] getRawXxx() // method
105  *  - String getEscapedXxx() // method
106  *  - String toString() // method
107  * <p>
108  * For unescaped URI forms
109  *  - URI(String) // constructor
110  *  - String getXXX() // method
111  * </pre><p>
112  *
113  * @author <a href="mailto:jericho@apache.org">Sung-Gu</a>
114  * @author <a href="mailto:mbowler@GargoyleSoftware.com">Mike Bowler</a>
115  * @version $Revision: 179784 $ $Date: 2002/03/14 15:14:01 
116  */
117 public class URI implements Cloneable, Comparable, Serializable {
118 
119 
120     // ----------------------------------------------------------- Constructors
121 
122     /*** Create an instance as an internal use */
123     protected URI() {
124     }
125 
126     /***
127      * Construct a URI from a string with the given charset. The input string can 
128      * be either in escaped or unescaped form. 
129      *
130      * @param s URI character sequence
131      * @param escaped <tt>true</tt> if URI character sequence is in escaped form. 
132      *                <tt>false</tt> otherwise. 
133      * @param charset the charset string to do escape encoding, if required
134      * 
135      * @throws URIException If the URI cannot be created.
136      * @throws NullPointerException if input string is <code>null</code>
137      * 
138      * @see #getProtocolCharset
139      * 
140      * @since 3.0
141      */
142     public URI(String s, boolean escaped, String charset)
143         throws URIException, NullPointerException {
144         protocolCharset = charset;
145         parseUriReference(s, escaped);
146     }
147 
148     /***
149      * Construct a URI from a string with the given charset. The input string can 
150      * be either in escaped or unescaped form. 
151      *
152      * @param s URI character sequence
153      * @param escaped <tt>true</tt> if URI character sequence is in escaped form. 
154      *                <tt>false</tt> otherwise. 
155      * 
156      * @throws URIException If the URI cannot be created.
157      * @throws NullPointerException if input string is <code>null</code>
158      * 
159      * @see #getProtocolCharset
160      * 
161      * @since 3.0
162      */
163     public URI(String s, boolean escaped)
164         throws URIException, NullPointerException {
165         parseUriReference(s, escaped);
166     }
167 
168     /***
169      * Construct a URI as an escaped form of a character array with the given
170      * charset.
171      *
172      * @param escaped the URI character sequence
173      * @param charset the charset string to do escape encoding
174      * @throws URIException If the URI cannot be created.
175      * @throws NullPointerException if <code>escaped</code> is <code>null</code>
176      * @see #getProtocolCharset
177      * 
178      * @deprecated Use #URI(String, boolean, String)
179      */
180     public URI(char[] escaped, String charset) 
181         throws URIException, NullPointerException {
182         protocolCharset = charset;
183         parseUriReference(new String(escaped), true);
184     }
185 
186 
187     /***
188      * Construct a URI as an escaped form of a character array.
189      * An URI can be placed within double-quotes or angle brackets like 
190      * "http://test.com/" and &lt;http://test.com/&gt;
191      * 
192      * @param escaped the URI character sequence
193      * @throws URIException If the URI cannot be created.
194      * @throws NullPointerException if <code>escaped</code> is <code>null</code>
195      * @see #getDefaultProtocolCharset
196      * 
197      * @deprecated Use #URI(String, boolean)
198      */
199     public URI(char[] escaped) 
200         throws URIException, NullPointerException {
201         parseUriReference(new String(escaped), true);
202     }
203 
204 
205     /***
206      * Construct a URI from the given string with the given charset.
207      *
208      * @param original the string to be represented to URI character sequence
209      * It is one of absoluteURI and relativeURI.
210      * @param charset the charset string to do escape encoding
211      * @throws URIException If the URI cannot be created.
212      * @see #getProtocolCharset
213      * 
214      * @deprecated Use #URI(String, boolean, String)
215      */
216     public URI(String original, String charset) throws URIException {
217         protocolCharset = charset;
218         parseUriReference(original, false);
219     }
220 
221 
222     /***
223      * Construct a URI from the given string.
224      * <p><blockquote><pre>
225      *   URI-reference = [ absoluteURI | relativeURI ] [ "#" fragment ]
226      * </pre></blockquote><p>
227      * An URI can be placed within double-quotes or angle brackets like 
228      * "http://test.com/" and &lt;http://test.com/&gt;
229      *
230      * @param original the string to be represented to URI character sequence
231      * It is one of absoluteURI and relativeURI.
232      * @throws URIException If the URI cannot be created.
233      * @see #getDefaultProtocolCharset
234      * 
235      * @deprecated Use #URI(String, boolean)
236      */
237     public URI(String original) throws URIException {
238         parseUriReference(original, false);
239     }
240 
241 
242     /***
243      * Construct a general URI from the given components.
244      * <p><blockquote><pre>
245      *   URI-reference = [ absoluteURI | relativeURI ] [ "#" fragment ]
246      *   absoluteURI   = scheme ":" ( hier_part | opaque_part )
247      *   opaque_part   = uric_no_slash *uric
248      * </pre></blockquote><p>
249      * It's for absolute URI = &lt;scheme&gt;:&lt;scheme-specific-part&gt;#
250      * &lt;fragment&gt;.
251      *
252      * @param scheme the scheme string
253      * @param schemeSpecificPart scheme_specific_part
254      * @param fragment the fragment string
255      * @throws URIException If the URI cannot be created.
256      * @see #getDefaultProtocolCharset
257      */
258     public URI(String scheme, String schemeSpecificPart, String fragment)
259         throws URIException {
260 
261         // validate and contruct the URI character sequence
262         if (scheme == null) {
263            throw new URIException(URIException.PARSING, "scheme required");
264         }
265         char[] s = scheme.toLowerCase().toCharArray();
266         if (validate(s, URI.scheme)) {
267             _scheme = s; // is_absoluteURI
268         } else {
269             throw new URIException(URIException.PARSING, "incorrect scheme");
270         }
271         _opaque = encode(schemeSpecificPart, allowed_opaque_part,
272                 getProtocolCharset());
273         // Set flag
274         _is_opaque_part = true;
275         _fragment = fragment.toCharArray(); 
276 
277         setURI();
278     }
279 
280 
281     /***
282      * Construct a general URI from the given components.
283      * <p><blockquote><pre>
284      *   URI-reference = [ absoluteURI | relativeURI ] [ "#" fragment ]
285      *   absoluteURI   = scheme ":" ( hier_part | opaque_part )
286      *   relativeURI   = ( net_path | abs_path | rel_path ) [ "?" query ]
287      *   hier_part     = ( net_path | abs_path ) [ "?" query ]
288      * </pre></blockquote><p>
289      * It's for absolute URI = &lt;scheme&gt;:&lt;path&gt;?&lt;query&gt;#&lt;
290      * fragment&gt; and relative URI = &lt;path&gt;?&lt;query&gt;#&lt;fragment
291      * &gt;.
292      *
293      * @param scheme the scheme string
294      * @param authority the authority string
295      * @param path the path string
296      * @param query the query string
297      * @param fragment the fragment string
298      * @throws URIException If the new URI cannot be created.
299      * @see #getDefaultProtocolCharset
300      */
301     public URI(String scheme, String authority, String path, String query,
302                String fragment) throws URIException {
303 
304         // validate and contruct the URI character sequence
305         StringBuffer buff = new StringBuffer();
306         if (scheme != null) {
307             buff.append(scheme);
308             buff.append(':');
309         }
310         if (authority != null) {
311             buff.append("//");
312             buff.append(authority);
313         }
314         if (path != null) {  // accept empty path
315             if ((scheme != null || authority != null)
316                     && !path.startsWith("/")) {
317                 throw new URIException(URIException.PARSING,
318                         "abs_path requested");
319             }
320             buff.append(path);
321         }
322         if (query != null) {
323             buff.append('?');
324             buff.append(query);
325         }
326         if (fragment != null) {
327             buff.append('#');
328             buff.append(fragment);
329         }
330         parseUriReference(buff.toString(), false);
331     }
332 
333 
334     /***
335      * Construct a general URI from the given components.
336      *
337      * @param scheme the scheme string
338      * @param userinfo the userinfo string
339      * @param host the host string
340      * @param port the port number
341      * @throws URIException If the new URI cannot be created.
342      * @see #getDefaultProtocolCharset
343      */
344     public URI(String scheme, String userinfo, String host, int port)
345         throws URIException {
346 
347         this(scheme, userinfo, host, port, null, null, null);
348     }
349 
350 
351     /***
352      * Construct a general URI from the given components.
353      *
354      * @param scheme the scheme string
355      * @param userinfo the userinfo string
356      * @param host the host string
357      * @param port the port number
358      * @param path the path string
359      * @throws URIException If the new URI cannot be created.
360      * @see #getDefaultProtocolCharset
361      */
362     public URI(String scheme, String userinfo, String host, int port,
363             String path) throws URIException {
364 
365         this(scheme, userinfo, host, port, path, null, null);
366     }
367 
368 
369     /***
370      * Construct a general URI from the given components.
371      *
372      * @param scheme the scheme string
373      * @param userinfo the userinfo string
374      * @param host the host string
375      * @param port the port number
376      * @param path the path string
377      * @param query the query string
378      * @throws URIException If the new URI cannot be created.
379      * @see #getDefaultProtocolCharset
380      */
381     public URI(String scheme, String userinfo, String host, int port,
382             String path, String query) throws URIException {
383 
384         this(scheme, userinfo, host, port, path, query, null);
385     }
386 
387 
388     /***
389      * Construct a general URI from the given components.
390      *
391      * @param scheme the scheme string
392      * @param userinfo the userinfo string
393      * @param host the host string
394      * @param port the port number
395      * @param path the path string
396      * @param query the query string
397      * @param fragment the fragment string
398      * @throws URIException If the new URI cannot be created.
399      * @see #getDefaultProtocolCharset
400      */
401     public URI(String scheme, String userinfo, String host, int port,
402             String path, String query, String fragment) throws URIException {
403 
404         this(scheme, (host == null) ? null 
405             : ((userinfo != null) ? userinfo + '@' : "") + host 
406                 + ((port != -1) ? ":" + port : ""), path, query, fragment);
407     }
408 
409 
410     /***
411      * Construct a general URI from the given components.
412      *
413      * @param scheme the scheme string
414      * @param host the host string
415      * @param path the path string
416      * @param fragment the fragment string
417      * @throws URIException If the new URI cannot be created.
418      * @see #getDefaultProtocolCharset
419      */
420     public URI(String scheme, String host, String path, String fragment)
421         throws URIException {
422 
423         this(scheme, host, path, null, fragment);
424     }
425 
426 
427     /***
428      * Construct a general URI with the given relative URI string.
429      *
430      * @param base the base URI
431      * @param relative the relative URI string
432      * @throws URIException If the new URI cannot be created.
433      * 
434      * @deprecated Use #URI(URI, String, boolean)
435      */
436     public URI(URI base, String relative) throws URIException {
437         this(base, new URI(relative));
438     }
439 
440 
441     /***
442      * Construct a general URI with the given relative URI string.
443      *
444      * @param base the base URI
445      * @param relative the relative URI string
446      * @param escaped <tt>true</tt> if URI character sequence is in escaped form. 
447      *                <tt>false</tt> otherwise.
448      *  
449      * @throws URIException If the new URI cannot be created.
450      * 
451      * @since 3.0
452      */
453     public URI(URI base, String relative, boolean escaped) throws URIException {
454         this(base, new URI(relative, escaped));
455     }
456 
457 
458     /***
459      * Construct a general URI with the given relative URI.
460      * <p><blockquote><pre>
461      *   URI-reference = [ absoluteURI | relativeURI ] [ "#" fragment ]
462      *   relativeURI   = ( net_path | abs_path | rel_path ) [ "?" query ]
463      * </pre></blockquote><p>
464      * Resolving Relative References to Absolute Form.
465      *
466      * <strong>Examples of Resolving Relative URI References</strong>
467      *
468      * Within an object with a well-defined base URI of
469      * <p><blockquote><pre>
470      *   http://a/b/c/d;p?q
471      * </pre></blockquote><p>
472      * the relative URI would be resolved as follows:
473      *
474      * Normal Examples
475      *
476      * <p><blockquote><pre>
477      *   g:h           =  g:h
478      *   g             =  http://a/b/c/g
479      *   ./g           =  http://a/b/c/g
480      *   g/            =  http://a/b/c/g/
481      *   /g            =  http://a/g
482      *   //g           =  http://g
483      *   ?y            =  http://a/b/c/?y
484      *   g?y           =  http://a/b/c/g?y
485      *   #s            =  (current document)#s
486      *   g#s           =  http://a/b/c/g#s
487      *   g?y#s         =  http://a/b/c/g?y#s
488      *   ;x            =  http://a/b/c/;x
489      *   g;x           =  http://a/b/c/g;x
490      *   g;x?y#s       =  http://a/b/c/g;x?y#s
491      *   .             =  http://a/b/c/
492      *   ./            =  http://a/b/c/
493      *   ..            =  http://a/b/
494      *   ../           =  http://a/b/
495      *   ../g          =  http://a/b/g
496      *   ../..         =  http://a/
497      *   ../../        =  http://a/ 
498      *   ../../g       =  http://a/g
499      * </pre></blockquote><p>
500      *
501      * Some URI schemes do not allow a hierarchical syntax matching the
502      * <hier_part> syntax, and thus cannot use relative references.
503      *
504      * @param base the base URI
505      * @param relative the relative URI
506      * @throws URIException If the new URI cannot be created.
507      */
508     public URI(URI base, URI relative) throws URIException {
509 
510         if (base._scheme == null) {
511             throw new URIException(URIException.PARSING, "base URI required");
512         }
513         if (base._scheme != null) {
514             this._scheme = base._scheme;
515             this._authority = base._authority;
516         }
517         if (base._is_opaque_part || relative._is_opaque_part) {
518             this._scheme = base._scheme;
519             this._is_opaque_part = base._is_opaque_part 
520                 || relative._is_opaque_part;
521             this._opaque = relative._opaque;
522             this._fragment = relative._fragment;
523             this.setURI();
524             return;
525         }
526         if (relative._scheme != null) {
527             this._scheme = relative._scheme;
528             this._is_net_path = relative._is_net_path;
529             this._authority = relative._authority;
530             if (relative._is_server) {
531                 this._is_server = relative._is_server;
532                 this._userinfo = relative._userinfo;
533                 this._host = relative._host;
534                 this._port = relative._port;
535             } else if (relative._is_reg_name) {
536                 this._is_reg_name = relative._is_reg_name;
537             }
538             this._is_abs_path = relative._is_abs_path;
539             this._is_rel_path = relative._is_rel_path;
540             this._path = relative._path;
541         } else if (base._authority != null && relative._scheme == null) {
542             this._is_net_path = base._is_net_path;
543             this._authority = base._authority;
544             if (base._is_server) {
545                 this._is_server = base._is_server;
546                 this._userinfo = base._userinfo;
547                 this._host = base._host;
548                 this._port = base._port;
549             } else if (base._is_reg_name) {
550                 this._is_reg_name = base._is_reg_name;
551             }
552         }
553         if (relative._authority != null) {
554             this._is_net_path = relative._is_net_path;
555             this._authority = relative._authority;
556             if (relative._is_server) {
557                 this._is_server = relative._is_server;
558                 this._userinfo = relative._userinfo;
559                 this._host = relative._host;
560                 this._port = relative._port;
561             } else if (relative._is_reg_name) {
562                 this._is_reg_name = relative._is_reg_name;
563             }
564             this._is_abs_path = relative._is_abs_path;
565             this._is_rel_path = relative._is_rel_path;
566             this._path = relative._path;
567         }
568         // resolve the path and query if necessary
569         if (relative._scheme == null && relative._authority == null) {
570             if ((relative._path == null || relative._path.length == 0)
571                 && relative._query == null) {
572                 // handle a reference to the current document, see RFC 2396 
573                 // section 5.2 step 2
574                 this._path = base._path;
575                 this._query = base._query;
576             } else {
577                 this._path = resolvePath(base._path, relative._path);
578             }
579         }
580         // base._query removed
581         if (relative._query != null) {
582             this._query = relative._query;
583         }
584         // base._fragment removed
585         if (relative._fragment != null) {
586             this._fragment = relative._fragment;
587         }
588         this.setURI();
589         // reparse the newly built URI, this will ensure that all flags are set correctly.
590         // TODO there must be a better way to do this
591         parseUriReference(new String(_uri), true);
592     }
593 
594     // --------------------------------------------------- Instance Variables
595 
596     /*** Version ID for serialization */
597     static final long serialVersionUID = 604752400577948726L;
598 
599 
600     /***
601      * Cache the hash code for this URI.
602      */
603     protected int hash = 0;
604 
605 
606     /***
607      * This Uniform Resource Identifier (URI).
608      * The URI is always in an "escaped" form, since escaping or unescaping
609      * a completed URI might change its semantics.  
610      */
611     protected char[] _uri = null;
612 
613 
614     /***
615      * The charset of the protocol used by this URI instance.
616      */
617     protected String protocolCharset = null;
618 
619 
620     /***
621      * The default charset of the protocol.  RFC 2277, 2396
622      */
623     protected static String defaultProtocolCharset = "UTF-8";
624 
625 
626     /***
627      * The default charset of the document.  RFC 2277, 2396
628      * The platform's charset is used for the document by default.
629      */
630     protected static String defaultDocumentCharset = null;
631     protected static String defaultDocumentCharsetByLocale = null;
632     protected static String defaultDocumentCharsetByPlatform = null;
633     // Static initializer for defaultDocumentCharset
634     static {
635         Locale locale = Locale.getDefault();
636         // in order to support backward compatiblity
637         if (locale != null) {
638             defaultDocumentCharsetByLocale =
639                 LocaleToCharsetMap.getCharset(locale);
640             // set the default document charset
641             defaultDocumentCharset = defaultDocumentCharsetByLocale;
642         }
643         // in order to support platform encoding
644         try {
645             defaultDocumentCharsetByPlatform = System.getProperty("file.encoding");
646         } catch (SecurityException ignore) {
647         }
648         if (defaultDocumentCharset == null) {
649             // set the default document charset
650             defaultDocumentCharset = defaultDocumentCharsetByPlatform;
651         }
652     }
653 
654 
655     /***
656      * The scheme.
657      */
658     protected char[] _scheme = null;
659 
660 
661     /***
662      * The opaque.
663      */
664     protected char[] _opaque = null;
665 
666 
667     /***
668      * The authority.
669      */
670     protected char[] _authority = null;
671 
672 
673     /***
674      * The userinfo.
675      */
676     protected char[] _userinfo = null;
677 
678 
679     /***
680      * The host.
681      */
682     protected char[] _host = null;
683 
684 
685     /***
686      * The port.
687      */
688     protected int _port = -1;
689 
690 
691     /***
692      * The path.
693      */
694     protected char[] _path = null;
695 
696 
697     /***
698      * The query.
699      */
700     protected char[] _query = null;
701 
702 
703     /***
704      * The fragment.
705      */
706     protected char[] _fragment = null;
707 
708 
709     /***
710      * The root path.
711      */
712     protected static char[] rootPath = { '/' };
713 
714     // ---------------------- Generous characters for each component validation
715 
716     /***
717      * The percent "%" character always has the reserved purpose of being the
718      * escape indicator, it must be escaped as "%25" in order to be used as
719      * data within a URI.
720      */
721     protected static final BitSet percent = new BitSet(256);
722     // Static initializer for percent
723     static {
724         percent.set('%');
725     }
726 
727 
728     /***
729      * BitSet for digit.
730      * <p><blockquote><pre>
731      * digit    = "0" | "1" | "2" | "3" | "4" | "5" | "6" | "7" |
732      *            "8" | "9"
733      * </pre></blockquote><p>
734      */
735     protected static final BitSet digit = new BitSet(256);
736     // Static initializer for digit
737     static {
738         for (int i = '0'; i <= '9'; i++) {
739             digit.set(i);
740         }
741     }
742 
743 
744     /***
745      * BitSet for alpha.
746      * <p><blockquote><pre>
747      * alpha         = lowalpha | upalpha
748      * </pre></blockquote><p>
749      */
750     protected static final BitSet alpha = new BitSet(256);
751     // Static initializer for alpha
752     static {
753         for (int i = 'a'; i <= 'z'; i++) {
754             alpha.set(i);
755         }
756         for (int i = 'A'; i <= 'Z'; i++) {
757             alpha.set(i);
758         }
759     }
760 
761 
762     /***
763      * BitSet for alphanum (join of alpha &amp; digit).
764      * <p><blockquote><pre>
765      *  alphanum      = alpha | digit
766      * </pre></blockquote><p>
767      */
768     protected static final BitSet alphanum = new BitSet(256);
769     // Static initializer for alphanum
770     static {
771         alphanum.or(alpha);
772         alphanum.or(digit);
773     }
774 
775 
776     /***
777      * BitSet for hex.
778      * <p><blockquote><pre>
779      * hex           = digit | "A" | "B" | "C" | "D" | "E" | "F" |
780      *                         "a" | "b" | "c" | "d" | "e" | "f"
781      * </pre></blockquote><p>
782      */
783     protected static final BitSet hex = new BitSet(256);
784     // Static initializer for hex
785     static {
786         hex.or(digit);
787         for (int i = 'a'; i <= 'f'; i++) {
788             hex.set(i);
789         }
790         for (int i = 'A'; i <= 'F'; i++) {
791             hex.set(i);
792         }
793     }
794 
795 
796     /***
797      * BitSet for escaped.
798      * <p><blockquote><pre>
799      * escaped       = "%" hex hex
800      * </pre></blockquote><p>
801      */
802     protected static final BitSet escaped = new BitSet(256);
803     // Static initializer for escaped
804     static {
805         escaped.or(percent);
806         escaped.or(hex);
807     }
808 
809 
810     /***
811      * BitSet for mark.
812      * <p><blockquote><pre>
813      * mark          = "-" | "_" | "." | "!" | "~" | "*" | "'" |
814      *                 "(" | ")"
815      * </pre></blockquote><p>
816      */
817     protected static final BitSet mark = new BitSet(256);
818     // Static initializer for mark
819     static {
820         mark.set('-');
821         mark.set('_');
822         mark.set('.');
823         mark.set('!');
824         mark.set('~');
825         mark.set('*');
826         mark.set('\'');
827         mark.set('(');
828         mark.set(')');
829     }
830 
831 
832     /***
833      * Data characters that are allowed in a URI but do not have a reserved
834      * purpose are called unreserved.
835      * <p><blockquote><pre>
836      * unreserved    = alphanum | mark
837      * </pre></blockquote><p>
838      */
839     protected static final BitSet unreserved = new BitSet(256);
840     // Static initializer for unreserved
841     static {
842         unreserved.or(alphanum);
843         unreserved.or(mark);
844     }
845 
846 
847     /***
848      * BitSet for reserved.
849      * <p><blockquote><pre>
850      * reserved      = ";" | "/" | "?" | ":" | "@" | "&amp;" | "=" | "+" |
851      *                 "$" | ","
852      * </pre></blockquote><p>
853      */
854     protected static final BitSet reserved = new BitSet(256);
855     // Static initializer for reserved
856     static {
857         reserved.set(';');
858         reserved.set('/');
859         reserved.set('?');
860         reserved.set(':');
861         reserved.set('@');
862         reserved.set('&');
863         reserved.set('=');
864         reserved.set('+');
865         reserved.set('$');
866         reserved.set(',');
867     }
868 
869 
870     /***
871      * BitSet for uric.
872      * <p><blockquote><pre>
873      * uric          = reserved | unreserved | escaped
874      * </pre></blockquote><p>
875      */
876     protected static final BitSet uric = new BitSet(256);
877     // Static initializer for uric
878     static {
879         uric.or(reserved);
880         uric.or(unreserved);
881         uric.or(escaped);
882     }
883 
884 
885     /***
886      * BitSet for fragment (alias for uric).
887      * <p><blockquote><pre>
888      * fragment      = *uric
889      * </pre></blockquote><p>
890      */
891     protected static final BitSet fragment = uric;
892 
893 
894     /***
895      * BitSet for query (alias for uric).
896      * <p><blockquote><pre>
897      * query         = *uric
898      * </pre></blockquote><p>
899      */
900     protected static final BitSet query = uric;
901 
902 
903     /***
904      * BitSet for pchar.
905      * <p><blockquote><pre>
906      * pchar         = unreserved | escaped |
907      *                 ":" | "@" | "&amp;" | "=" | "+" | "$" | ","
908      * </pre></blockquote><p>
909      */
910     protected static final BitSet pchar = new BitSet(256);
911     // Static initializer for pchar
912     static {
913         pchar.or(unreserved);
914         pchar.or(escaped);
915         pchar.set(':');
916         pchar.set('@');
917         pchar.set('&');
918         pchar.set('=');
919         pchar.set('+');
920         pchar.set('$');
921         pchar.set(',');
922     }
923 
924 
925     /***
926      * BitSet for param (alias for pchar).
927      * <p><blockquote><pre>
928      * param         = *pchar
929      * </pre></blockquote><p>
930      */
931     protected static final BitSet param = pchar;
932 
933 
934     /***
935      * BitSet for segment.
936      * <p><blockquote><pre>
937      * segment       = *pchar *( ";" param )
938      * </pre></blockquote><p>
939      */
940     protected static final BitSet segment = new BitSet(256);
941     // Static initializer for segment
942     static {
943         segment.or(pchar);
944         segment.set(';');
945         segment.or(param);
946     }
947 
948 
949     /***
950      * BitSet for path segments.
951      * <p><blockquote><pre>
952      * path_segments = segment *( "/" segment )
953      * </pre></blockquote><p>
954      */
955     protected static final BitSet path_segments = new BitSet(256);
956     // Static initializer for path_segments
957     static {
958         path_segments.set('/');
959         path_segments.or(segment);
960     }
961 
962 
963     /***
964      * URI absolute path.
965      * <p><blockquote><pre>
966      * abs_path      = "/"  path_segments
967      * </pre></blockquote><p>
968      */
969     protected static final BitSet abs_path = new BitSet(256);
970     // Static initializer for abs_path
971     static {
972         abs_path.set('/');
973         abs_path.or(path_segments);
974     }
975 
976 
977     /***
978      * URI bitset for encoding typical non-slash characters.
979      * <p><blockquote><pre>
980      * uric_no_slash = unreserved | escaped | ";" | "?" | ":" | "@" |
981      *                 "&amp;" | "=" | "+" | "$" | ","
982      * </pre></blockquote><p>
983      */
984     protected static final BitSet uric_no_slash = new BitSet(256);
985     // Static initializer for uric_no_slash
986     static {
987         uric_no_slash.or(unreserved);
988         uric_no_slash.or(escaped);
989         uric_no_slash.set(';');
990         uric_no_slash.set('?');
991         uric_no_slash.set(';');
992         uric_no_slash.set('@');
993         uric_no_slash.set('&');
994         uric_no_slash.set('=');
995         uric_no_slash.set('+');
996         uric_no_slash.set('$');
997         uric_no_slash.set(',');
998     }
999     
1000 
1001     /***
1002      * URI bitset that combines uric_no_slash and uric.
1003      * <p><blockquote><pre>
1004      * opaque_part   = uric_no_slash *uric
1005      * </pre></blockquote><p>
1006      */
1007     protected static final BitSet opaque_part = new BitSet(256);
1008     // Static initializer for opaque_part
1009     static {
1010         // it's generous. because first character must not include a slash
1011         opaque_part.or(uric_no_slash);
1012         opaque_part.or(uric);
1013     }
1014     
1015 
1016     /***
1017      * URI bitset that combines absolute path and opaque part.
1018      * <p><blockquote><pre>
1019      * path          = [ abs_path | opaque_part ]
1020      * </pre></blockquote><p>
1021      */
1022     protected static final BitSet path = new BitSet(256);
1023     // Static initializer for path
1024     static {
1025         path.or(abs_path);
1026         path.or(opaque_part);
1027     }
1028 
1029 
1030     /***
1031      * Port, a logical alias for digit.
1032      */
1033     protected static final BitSet port = digit;
1034 
1035 
1036     /***
1037      * Bitset that combines digit and dot fo IPv$address.
1038      * <p><blockquote><pre>
1039      * IPv4address   = 1*digit "." 1*digit "." 1*digit "." 1*digit
1040      * </pre></blockquote><p>
1041      */
1042     protected static final BitSet IPv4address = new BitSet(256);
1043     // Static initializer for IPv4address
1044     static {
1045         IPv4address.or(digit);
1046         IPv4address.set('.');
1047     }
1048 
1049 
1050     /***
1051      * RFC 2373.
1052      * <p><blockquote><pre>
1053      * IPv6address = hexpart [ ":" IPv4address ]
1054      * </pre></blockquote><p>
1055      */
1056     protected static final BitSet IPv6address = new BitSet(256);
1057     // Static initializer for IPv6address reference
1058     static {
1059         IPv6address.or(hex); // hexpart
1060         IPv6address.set(':');
1061         IPv6address.or(IPv4address);
1062     }
1063 
1064 
1065     /***
1066      * RFC 2732, 2373.
1067      * <p><blockquote><pre>
1068      * IPv6reference   = "[" IPv6address "]"
1069      * </pre></blockquote><p>
1070      */
1071     protected static final BitSet IPv6reference = new BitSet(256);
1072     // Static initializer for IPv6reference
1073     static {
1074         IPv6reference.set('[');
1075         IPv6reference.or(IPv6address);
1076         IPv6reference.set(']');
1077     }
1078 
1079 
1080     /***
1081      * BitSet for toplabel.
1082      * <p><blockquote><pre>
1083      * toplabel      = alpha | alpha *( alphanum | "-" ) alphanum
1084      * </pre></blockquote><p>
1085      */
1086     protected static final BitSet toplabel = new BitSet(256);
1087     // Static initializer for toplabel
1088     static {
1089         toplabel.or(alphanum);
1090         toplabel.set('-');
1091     }
1092 
1093 
1094     /***
1095      * BitSet for domainlabel.
1096      * <p><blockquote><pre>
1097      * domainlabel   = alphanum | alphanum *( alphanum | "-" ) alphanum
1098      * </pre></blockquote><p>
1099      */
1100     protected static final BitSet domainlabel = toplabel;
1101 
1102 
1103     /***
1104      * BitSet for hostname.
1105      * <p><blockquote><pre>
1106      * hostname      = *( domainlabel "." ) toplabel [ "." ]
1107      * </pre></blockquote><p>
1108      */
1109     protected static final BitSet hostname = new BitSet(256);
1110     // Static initializer for hostname
1111     static {
1112         hostname.or(toplabel);
1113         // hostname.or(domainlabel);
1114         hostname.set('.');
1115     }
1116 
1117 
1118     /***
1119      * BitSet for host.
1120      * <p><blockquote><pre>
1121      * host          = hostname | IPv4address | IPv6reference
1122      * </pre></blockquote><p>
1123      */
1124     protected static final BitSet host = new BitSet(256);
1125     // Static initializer for host
1126     static {
1127         host.or(hostname);
1128         // host.or(IPv4address);
1129         host.or(IPv6reference); // IPv4address
1130     }
1131 
1132 
1133     /***
1134      * BitSet for hostport.
1135      * <p><blockquote><pre>
1136      * hostport      = host [ ":" port ]
1137      * </pre></blockquote><p>
1138      */
1139     protected static final BitSet hostport = new BitSet(256);
1140     // Static initializer for hostport
1141     static {
1142         hostport.or(host);
1143         hostport.set(':');
1144         hostport.or(port);
1145     }
1146 
1147 
1148     /***
1149      * Bitset for userinfo.
1150      * <p><blockquote><pre>
1151      * userinfo      = *( unreserved | escaped |
1152      *                    ";" | ":" | "&amp;" | "=" | "+" | "$" | "," )
1153      * </pre></blockquote><p>
1154      */
1155     protected static final BitSet userinfo = new BitSet(256);
1156     // Static initializer for userinfo
1157     static {
1158         userinfo.or(unreserved);
1159         userinfo.or(escaped);
1160         userinfo.set(';');
1161         userinfo.set(':');
1162         userinfo.set('&');
1163         userinfo.set('=');
1164         userinfo.set('+');
1165         userinfo.set('$');
1166         userinfo.set(',');
1167     }
1168 
1169 
1170     /***
1171      * BitSet for within the userinfo component like user and password.
1172      */
1173     public static final BitSet within_userinfo = new BitSet(256);
1174     // Static initializer for within_userinfo
1175     static {
1176         within_userinfo.or(userinfo);
1177         within_userinfo.clear(';'); // reserved within authority
1178         within_userinfo.clear(':');
1179         within_userinfo.clear('@');
1180         within_userinfo.clear('?');
1181         within_userinfo.clear('/');
1182     }
1183 
1184 
1185     /***
1186      * Bitset for server.
1187      * <p><blockquote><pre>
1188      * server        = [ [ userinfo "@" ] hostport ]
1189      * </pre></blockquote><p>
1190      */
1191     protected static final BitSet server = new BitSet(256);
1192     // Static initializer for server
1193     static {
1194         server.or(userinfo);
1195         server.set('@');
1196         server.or(hostport);
1197     }
1198 
1199 
1200     /***
1201      * BitSet for reg_name.
1202      * <p><blockquote><pre>
1203      * reg_name      = 1*( unreserved | escaped | "$" | "," |
1204      *                     ";" | ":" | "@" | "&amp;" | "=" | "+" )
1205      * </pre></blockquote><p>
1206      */
1207     protected static final BitSet reg_name = new BitSet(256);
1208     // Static initializer for reg_name
1209     static {
1210         reg_name.or(unreserved);
1211         reg_name.or(escaped);
1212         reg_name.set('$');
1213         reg_name.set(',');
1214         reg_name.set(';');
1215         reg_name.set(':');
1216         reg_name.set('@');
1217         reg_name.set('&');
1218         reg_name.set('=');
1219         reg_name.set('+');
1220     }
1221 
1222 
1223     /***
1224      * BitSet for authority.
1225      * <p><blockquote><pre>
1226      * authority     = server | reg_name
1227      * </pre></blockquote><p>
1228      */
1229     protected static final BitSet authority = new BitSet(256);
1230     // Static initializer for authority
1231     static {
1232         authority.or(server);
1233         authority.or(reg_name);
1234     }
1235 
1236 
1237     /***
1238      * BitSet for scheme.
1239      * <p><blockquote><pre>
1240      * scheme        = alpha *( alpha | digit | "+" | "-" | "." )
1241      * </pre></blockquote><p>
1242      */
1243     protected static final BitSet scheme = new BitSet(256);
1244     // Static initializer for scheme
1245     static {
1246         scheme.or(alpha);
1247         scheme.or(digit);
1248         scheme.set('+');
1249         scheme.set('-');
1250         scheme.set('.');
1251     }
1252 
1253 
1254     /***
1255      * BitSet for rel_segment.
1256      * <p><blockquote><pre>
1257      * rel_segment   = 1*( unreserved | escaped |
1258      *                     ";" | "@" | "&amp;" | "=" | "+" | "$" | "," )
1259      * </pre></blockquote><p>
1260      */
1261     protected static final BitSet rel_segment = new BitSet(256);
1262     // Static initializer for rel_segment
1263     static {
1264         rel_segment.or(unreserved);
1265         rel_segment.or(escaped);
1266         rel_segment.set(';');
1267         rel_segment.set('@');
1268         rel_segment.set('&');
1269         rel_segment.set('=');
1270         rel_segment.set('+');
1271         rel_segment.set('$');
1272         rel_segment.set(',');
1273     }
1274 
1275 
1276     /***
1277      * BitSet for rel_path.
1278      * <p><blockquote><pre>
1279      * rel_path      = rel_segment [ abs_path ]
1280      * </pre></blockquote><p>
1281      */
1282     protected static final BitSet rel_path = new BitSet(256);
1283     // Static initializer for rel_path
1284     static {
1285         rel_path.or(rel_segment);
1286         rel_path.or(abs_path);
1287     }
1288 
1289 
1290     /***
1291      * BitSet for net_path.
1292      * <p><blockquote><pre>
1293      * net_path      = "//" authority [ abs_path ]
1294      * </pre></blockquote><p>
1295      */
1296     protected static final BitSet net_path = new BitSet(256);
1297     // Static initializer for net_path
1298     static {
1299         net_path.set('/');
1300         net_path.or(authority);
1301         net_path.or(abs_path);
1302     }
1303     
1304 
1305     /***
1306      * BitSet for hier_part.
1307      * <p><blockquote><pre>
1308      * hier_part     = ( net_path | abs_path ) [ "?" query ]
1309      * </pre></blockquote><p>
1310      */
1311     protected static final BitSet hier_part = new BitSet(256);
1312     // Static initializer for hier_part
1313     static {
1314         hier_part.or(net_path);
1315         hier_part.or(abs_path);
1316         // hier_part.set('?'); aleady included
1317         hier_part.or(query);
1318     }
1319 
1320 
1321     /***
1322      * BitSet for relativeURI.
1323      * <p><blockquote><pre>
1324      * relativeURI   = ( net_path | abs_path | rel_path ) [ "?" query ]
1325      * </pre></blockquote><p>
1326      */
1327     protected static final BitSet relativeURI = new BitSet(256);
1328     // Static initializer for relativeURI
1329     static {
1330         relativeURI.or(net_path);
1331         relativeURI.or(abs_path);
1332         relativeURI.or(rel_path);
1333         // relativeURI.set('?'); aleady included
1334         relativeURI.or(query);
1335     }
1336 
1337 
1338     /***
1339      * BitSet for absoluteURI.
1340      * <p><blockquote><pre>
1341      * absoluteURI   = scheme ":" ( hier_part | opaque_part )
1342      * </pre></blockquote><p>
1343      */
1344     protected static final BitSet absoluteURI = new BitSet(256);
1345     // Static initializer for absoluteURI
1346     static {
1347         absoluteURI.or(scheme);
1348         absoluteURI.set(':');
1349         absoluteURI.or(hier_part);
1350         absoluteURI.or(opaque_part);
1351     }
1352 
1353 
1354     /***
1355      * BitSet for URI-reference.
1356      * <p><blockquote><pre>
1357      * URI-reference = [ absoluteURI | relativeURI ] [ "#" fragment ]
1358      * </pre></blockquote><p>
1359      */
1360     protected static final BitSet URI_reference = new BitSet(256);
1361     // Static initializer for URI_reference
1362     static {
1363         URI_reference.or(absoluteURI);
1364         URI_reference.or(relativeURI);
1365         URI_reference.set('#');
1366         URI_reference.or(fragment);
1367     }
1368 
1369     // ---------------------------- Characters disallowed within the URI syntax
1370     // Excluded US-ASCII Characters are like control, space, delims and unwise
1371 
1372     /***
1373      * BitSet for control.
1374      */
1375     public static final BitSet control = new BitSet(256);
1376     // Static initializer for control
1377     static {
1378         for (int i = 0; i <= 0x1F; i++) {
1379             control.set(i);
1380         }
1381         control.set(0x7F);
1382     }
1383 
1384     /***
1385      * BitSet for space.
1386      */
1387     public static final BitSet space = new BitSet(256);
1388     // Static initializer for space
1389     static {
1390         space.set(0x20);
1391     }
1392 
1393 
1394     /***
1395      * BitSet for delims.
1396      */
1397     public static final BitSet delims = new BitSet(256);
1398     // Static initializer for delims
1399     static {
1400         delims.set('<');
1401         delims.set('>');
1402         delims.set('#');
1403         delims.set('%');
1404         delims.set('"');
1405     }
1406 
1407 
1408     /***
1409      * BitSet for unwise.
1410      */
1411     public static final BitSet unwise = new BitSet(256);
1412     // Static initializer for unwise
1413     static {
1414         unwise.set('{');
1415         unwise.set('}');
1416         unwise.set('|');
1417         unwise.set('//');
1418         unwise.set('^');
1419         unwise.set('[');
1420         unwise.set(']');
1421         unwise.set('`');
1422     }
1423 
1424 
1425     /***
1426      * Disallowed rel_path before escaping.
1427      */
1428     public static final BitSet disallowed_rel_path = new BitSet(256);
1429     // Static initializer for disallowed_rel_path
1430     static {
1431         disallowed_rel_path.or(uric);
1432         disallowed_rel_path.andNot(rel_path);
1433     }
1434 
1435 
1436     /***
1437      * Disallowed opaque_part before escaping.
1438      */
1439     public static final BitSet disallowed_opaque_part = new BitSet(256);
1440     // Static initializer for disallowed_opaque_part
1441     static {
1442         disallowed_opaque_part.or(uric);
1443         disallowed_opaque_part.andNot(opaque_part);
1444     }
1445 
1446     // ----------------------- Characters allowed within and for each component
1447 
1448     /***
1449      * Those characters that are allowed for the authority component.
1450      */
1451     public static final BitSet allowed_authority = new BitSet(256);
1452     // Static initializer for allowed_authority
1453     static {
1454         allowed_authority.or(authority);
1455         allowed_authority.clear('%');
1456     }
1457 
1458 
1459     /***
1460      * Those characters that are allowed for the opaque_part.
1461      */
1462     public static final BitSet allowed_opaque_part = new BitSet(256);
1463     // Static initializer for allowed_opaque_part 
1464     static {
1465         allowed_opaque_part.or(opaque_part);
1466         allowed_opaque_part.clear('%');
1467     }
1468 
1469 
1470     /***
1471      * Those characters that are allowed for the reg_name.
1472      */
1473     public static final BitSet allowed_reg_name = new BitSet(256);
1474     // Static initializer for allowed_reg_name 
1475     static {
1476         allowed_reg_name.or(reg_name);
1477         // allowed_reg_name.andNot(percent);
1478         allowed_reg_name.clear('%');
1479     }
1480 
1481 
1482     /***
1483      * Those characters that are allowed for the userinfo component.
1484      */
1485     public static final BitSet allowed_userinfo = new BitSet(256);
1486     // Static initializer for allowed_userinfo
1487     static {
1488         allowed_userinfo.or(userinfo);
1489         // allowed_userinfo.andNot(percent);
1490         allowed_userinfo.clear('%');
1491     }
1492 
1493 
1494     /***
1495      * Those characters that are allowed for within the userinfo component.
1496      */
1497     public static final BitSet allowed_within_userinfo = new BitSet(256);
1498     // Static initializer for allowed_within_userinfo
1499     static {
1500         allowed_within_userinfo.or(within_userinfo);
1501         allowed_within_userinfo.clear('%');
1502     }
1503 
1504 
1505     /***
1506      * Those characters that are allowed for the IPv6reference component.
1507      * The characters '[', ']' in IPv6reference should be excluded.
1508      */
1509     public static final BitSet allowed_IPv6reference = new BitSet(256);
1510     // Static initializer for allowed_IPv6reference
1511     static {
1512         allowed_IPv6reference.or(IPv6reference);
1513         // allowed_IPv6reference.andNot(unwise);
1514         allowed_IPv6reference.clear('[');
1515         allowed_IPv6reference.clear(']');
1516     }
1517 
1518 
1519     /***
1520      * Those characters that are allowed for the host component.
1521      * The characters '[', ']' in IPv6reference should be excluded.
1522      */
1523     public static final BitSet allowed_host = new BitSet(256);
1524     // Static initializer for allowed_host
1525     static {
1526         allowed_host.or(hostname);
1527         allowed_host.or(allowed_IPv6reference);
1528     }
1529 
1530 
1531     /***
1532      * Those characters that are allowed for the authority component.
1533      */
1534     public static final BitSet allowed_within_authority = new BitSet(256);
1535     // Static initializer for allowed_within_authority
1536     static {
1537         allowed_within_authority.or(server);
1538         allowed_within_authority.or(reg_name);
1539         allowed_within_authority.clear(';');
1540         allowed_within_authority.clear(':');
1541         allowed_within_authority.clear('@');
1542         allowed_within_authority.clear('?');
1543         allowed_within_authority.clear('/');
1544     }
1545 
1546 
1547     /***
1548      * Those characters that are allowed for the abs_path.
1549      */
1550     public static final BitSet allowed_abs_path = new BitSet(256);
1551     // Static initializer for allowed_abs_path
1552     static {
1553         allowed_abs_path.or(abs_path);
1554         // allowed_abs_path.set('/');  // aleady included
1555         allowed_abs_path.andNot(percent);
1556     }
1557 
1558 
1559     /***
1560      * Those characters that are allowed for the rel_path.
1561      */
1562     public static final BitSet allowed_rel_path = new BitSet(256);
1563     // Static initializer for allowed_rel_path
1564     static {
1565         allowed_rel_path.or(rel_path);
1566         allowed_rel_path.clear('%');
1567     }
1568 
1569 
1570     /***
1571      * Those characters that are allowed within the path.
1572      */
1573     public static final BitSet allowed_within_path = new BitSet(256);
1574     // Static initializer for allowed_within_path
1575     static {
1576         allowed_within_path.or(abs_path);
1577         allowed_within_path.clear('/');
1578         allowed_within_path.clear(';');
1579         allowed_within_path.clear('=');
1580         allowed_within_path.clear('?');
1581     }
1582 
1583 
1584     /***
1585      * Those characters that are allowed for the query component.
1586      */
1587     public static final BitSet allowed_query = new BitSet(256);
1588     // Static initializer for allowed_query
1589     static {
1590         allowed_query.or(uric);
1591         allowed_query.clear('%');
1592     }
1593 
1594 
1595     /***
1596      * Those characters that are allowed within the query component.
1597      */
1598     public static final BitSet allowed_within_query = new BitSet(256);
1599     // Static initializer for allowed_within_query
1600     static {
1601         allowed_within_query.or(allowed_query);
1602         allowed_within_query.andNot(reserved); // excluded 'reserved'
1603     }
1604 
1605 
1606     /***
1607      * Those characters that are allowed for the fragment component.
1608      */
1609     public static final BitSet allowed_fragment = new BitSet(256);
1610     // Static initializer for allowed_fragment
1611     static {
1612         allowed_fragment.or(uric);
1613         allowed_fragment.clear('%');
1614     }
1615 
1616     // ------------------------------------------- Flags for this URI-reference
1617 
1618     // TODO: Figure out what all these variables are for and provide javadoc
1619 
1620     // URI-reference = [ absoluteURI | relativeURI ] [ "#" fragment ]
1621     // absoluteURI   = scheme ":" ( hier_part | opaque_part )
1622     protected boolean _is_hier_part;
1623     protected boolean _is_opaque_part;
1624     // relativeURI   = ( net_path | abs_path | rel_path ) [ "?" query ] 
1625     // hier_part     = ( net_path | abs_path ) [ "?" query ]
1626     protected boolean _is_net_path;
1627     protected boolean _is_abs_path;
1628     protected boolean _is_rel_path;
1629     // net_path      = "//" authority [ abs_path ] 
1630     // authority     = server | reg_name
1631     protected boolean _is_reg_name;
1632     protected boolean _is_server;  // = _has_server
1633     // server        = [ [ userinfo "@" ] hostport ]
1634     // host          = hostname | IPv4address | IPv6reference
1635     protected boolean _is_hostname;
1636     protected boolean _is_IPv4address;
1637     protected boolean _is_IPv6reference;
1638 
1639     // ------------------------------------------ Character and escape encoding
1640     
1641     /***
1642      * Encodes URI string.
1643      *
1644      * This is a two mapping, one from original characters to octets, and
1645      * subsequently a second from octets to URI characters:
1646      * <p><blockquote><pre>
1647      *   original character sequence->octet sequence->URI character sequence
1648      * </pre></blockquote><p>
1649      *
1650      * An escaped octet is encoded as a character triplet, consisting of the
1651      * percent character "%" followed by the two hexadecimal digits
1652      * representing the octet code. For example, "%20" is the escaped
1653      * encoding for the US-ASCII space character.
1654      * <p>
1655      * Conversion from the local filesystem character set to UTF-8 will
1656      * normally involve a two step process. First convert the local character
1657      * set to the UCS; then convert the UCS to UTF-8.
1658      * The first step in the process can be performed by maintaining a mapping
1659      * table that includes the local character set code and the corresponding
1660      * UCS code.
1661      * The next step is to convert the UCS character code to the UTF-8 encoding.
1662      * <p>
1663      * Mapping between vendor codepages can be done in a very similar manner
1664      * as described above.
1665      * <p>
1666      * The only time escape encodings can allowedly be made is when a URI is
1667      * being created from its component parts.  The escape and validate methods
1668      * are internally performed within this method.
1669      *
1670      * @param original the original character sequence
1671      * @param allowed those characters that are allowed within a component
1672      * @param charset the protocol charset
1673      * @return URI character sequence
1674      * @throws URIException null component or unsupported character encoding
1675      */
1676         
1677     protected static char[] encode(String original, BitSet allowed,
1678             String charset) throws URIException {
1679         if (original == null) {
1680             throw new IllegalArgumentException("Original string may not be null");
1681         }
1682         if (allowed == null) {
1683             throw new IllegalArgumentException("Allowed bitset may not be null");
1684         }
1685         byte[] rawdata = URLCodec.encodeUrl(allowed, EncodingUtil.getBytes(original, charset));
1686         return EncodingUtil.getAsciiString(rawdata).toCharArray();
1687     }
1688 
1689     /***
1690      * Decodes URI encoded string.
1691      *
1692      * This is a two mapping, one from URI characters to octets, and
1693      * subsequently a second from octets to original characters:
1694      * <p><blockquote><pre>
1695      *   URI character sequence->octet sequence->original character sequence
1696      * </pre></blockquote><p>
1697      *
1698      * A URI must be separated into its components before the escaped
1699      * characters within those components can be allowedly decoded.
1700      * <p>
1701      * Notice that there is a chance that URI characters that are non UTF-8
1702      * may be parsed as valid UTF-8.  A recent non-scientific analysis found
1703      * that EUC encoded Japanese words had a 2.7% false reading; SJIS had a
1704      * 0.0005% false reading; other encoding such as ASCII or KOI-8 have a 0%
1705      * false reading.
1706      * <p>
1707      * The percent "%" character always has the reserved purpose of being
1708      * the escape indicator, it must be escaped as "%25" in order to be used
1709      * as data within a URI.
1710      * <p>
1711      * The unescape method is internally performed within this method.
1712      *
1713      * @param component the URI character sequence
1714      * @param charset the protocol charset
1715      * @return original character sequence
1716      * @throws URIException incomplete trailing escape pattern or unsupported
1717      * character encoding
1718      */
1719     protected static String decode(char[] component, String charset) 
1720         throws URIException {
1721         if (component == null) {
1722             throw new IllegalArgumentException("Component array of chars may not be null");
1723         }
1724         return decode(new String(component), charset);
1725     }
1726 
1727     /***
1728      * Decodes URI encoded string.
1729      *
1730      * This is a two mapping, one from URI characters to octets, and
1731      * subsequently a second from octets to original characters:
1732      * <p><blockquote><pre>
1733      *   URI character sequence->octet sequence->original character sequence
1734      * </pre></blockquote><p>
1735      *
1736      * A URI must be separated into its components before the escaped
1737      * characters within those components can be allowedly decoded.
1738      * <p>
1739      * Notice that there is a chance that URI characters that are non UTF-8
1740      * may be parsed as valid UTF-8.  A recent non-scientific analysis found
1741      * that EUC encoded Japanese words had a 2.7% false reading; SJIS had a
1742      * 0.0005% false reading; other encoding such as ASCII or KOI-8 have a 0%
1743      * false reading.
1744      * <p>
1745      * The percent "%" character always has the reserved purpose of being
1746      * the escape indicator, it must be escaped as "%25" in order to be used
1747      * as data within a URI.
1748      * <p>
1749      * The unescape method is internally performed within this method.
1750      *
1751      * @param component the URI character sequence
1752      * @param charset the protocol charset
1753      * @return original character sequence
1754      * @throws URIException incomplete trailing escape pattern or unsupported
1755      * character encoding
1756      * 
1757      * @since 3.0
1758      */
1759     protected static String decode(String component, String charset) 
1760         throws URIException {
1761         if (component == null) {
1762             throw new IllegalArgumentException("Component array of chars may not be null");
1763         }
1764         byte[] rawdata = null;
1765         try { 
1766             rawdata = URLCodec.decodeUrl(EncodingUtil.getAsciiBytes(component));
1767         } catch (DecoderException e) {
1768             throw new URIException(e.getMessage());
1769         }
1770         return EncodingUtil.getString(rawdata, charset);
1771     }
1772     /***
1773      * Pre-validate the unescaped URI string within a specific component.
1774      *
1775      * @param component the component string within the component
1776      * @param disallowed those characters disallowed within the component
1777      * @return if true, it doesn't have the disallowed characters
1778      * if false, the component is undefined or an incorrect one
1779      */
1780     protected boolean prevalidate(String component, BitSet disallowed) {
1781         // prevalidate the given component by disallowed characters
1782         if (component == null) {
1783             return false; // undefined
1784         }
1785         char[] target = component.toCharArray();
1786         for (int i = 0; i < target.length; i++) {
1787             if (disallowed.get(target[i])) {
1788                 return false;
1789             }
1790         }
1791         return true;
1792     }
1793 
1794 
1795     /***
1796      * Validate the URI characters within a specific component.
1797      * The component must be performed after escape encoding. Or it doesn't
1798      * include escaped characters.
1799      *
1800      * @param component the characters sequence within the component
1801      * @param generous those characters that are allowed within a component
1802      * @return if true, it's the correct URI character sequence
1803      */
1804     protected boolean validate(char[] component, BitSet generous) {
1805         // validate each component by generous characters
1806         return validate(component, 0, -1, generous);
1807     }
1808 
1809 
1810     /***
1811      * Validate the URI characters within a specific component.
1812      * The component must be performed after escape encoding. Or it doesn't
1813      * include escaped characters.
1814      * <p>
1815      * It's not that much strict, generous.  The strict validation might be 
1816      * performed before being called this method.
1817      *
1818      * @param component the characters sequence within the component
1819      * @param soffset the starting offset of the given component
1820      * @param eoffset the ending offset of the given component
1821      * if -1, it means the length of the component
1822      * @param generous those characters that are allowed within a component
1823      * @return if true, it's the correct URI character sequence
1824      */
1825     protected boolean validate(char[] component, int soffset, int eoffset,
1826             BitSet generous) {
1827         // validate each component by generous characters
1828         if (eoffset == -1) {
1829             eoffset = component.length - 1;
1830         }
1831         for (int i = soffset; i <= eoffset; i++) {
1832             if (!generous.get(component[i])) { 
1833                 return false;
1834             }
1835         }
1836         return true;
1837     }
1838 
1839 
1840     /***
1841      * In order to avoid any possilbity of conflict with non-ASCII characters,
1842      * Parse a URI reference as a <code>String</code> with the character
1843      * encoding of the local system or the document.
1844      * <p>
1845      * The following line is the regular expression for breaking-down a URI
1846      * reference into its components.
1847      * <p><blockquote><pre>
1848      *   ^(([^:/?#]+):)?(//([^/?#]*))?([^?#]*)(\?([^#]*))?(#(.*))?
1849      *    12            3  4          5       6  7        8 9
1850      * </pre></blockquote><p>
1851      * For example, matching the above expression to
1852      *   http://jakarta.apache.org/ietf/uri/#Related
1853      * results in the following subexpression matches:
1854      * <p><blockquote><pre>
1855      *               $1 = http:
1856      *  scheme    =  $2 = http
1857      *               $3 = //jakarta.apache.org
1858      *  authority =  $4 = jakarta.apache.org
1859      *  path      =  $5 = /ietf/uri/
1860      *               $6 = <undefined>
1861      *  query     =  $7 = <undefined>
1862      *               $8 = #Related
1863      *  fragment  =  $9 = Related
1864      * </pre></blockquote><p>
1865      *
1866      * @param original the original character sequence
1867      * @param escaped <code>true</code> if <code>original</code> is escaped
1868      * @throws URIException If an error occurs.
1869      */
1870     protected void parseUriReference(String original, boolean escaped)
1871         throws URIException {
1872 
1873         // validate and contruct the URI character sequence
1874         if (original == null) {
1875             throw new URIException("URI-Reference required");
1876         }
1877 
1878         /* @
1879          *  ^(([^:/?#]+):)?(//([^/?#]*))?([^?#]*)(\?([^#]*))?(#(.*))?
1880          */
1881         String tmp = original.trim();
1882         
1883         /*
1884          * The length of the string sequence of characters.
1885          * It may not be equal to the length of the byte array.
1886          */
1887         int length = tmp.length();
1888 
1889         /*
1890          * Remove the delimiters like angle brackets around an URI.
1891          */
1892         if (length > 0) {
1893             char[] firstDelimiter = { tmp.charAt(0) };
1894             if (validate(firstDelimiter, delims)) {
1895                 if (length >= 2) {
1896                     char[] lastDelimiter = { tmp.charAt(length - 1) };
1897                     if (validate(lastDelimiter, delims)) {
1898                         tmp = tmp.substring(1, length - 1);
1899                         length = length - 2;
1900                     }
1901                 }
1902             }
1903         }
1904 
1905         /*
1906          * The starting index
1907          */
1908         int from = 0;
1909 
1910         /*
1911          * The test flag whether the URI is started from the path component.
1912          */
1913         boolean isStartedFromPath = false;
1914         int atColon = tmp.indexOf(':');
1915         int atSlash = tmp.indexOf('/');
1916         if (atColon <= 0 || (atSlash >= 0 && atSlash < atColon)) {
1917             isStartedFromPath = true;
1918         }
1919 
1920         /*
1921          * <p><blockquote><pre>
1922          *     @@@@@@@@
1923          *  ^(([^:/?#]+):)?(//([^/?#]*))?([^?#]*)(\?([^#]*))?(#(.*))?
1924          * </pre></blockquote><p>
1925          */
1926         int at = indexFirstOf(tmp, isStartedFromPath ? "/?#" : ":/?#", from);
1927         if (at == -1) { 
1928             at = 0;
1929         }
1930 
1931         /*
1932          * Parse the scheme.
1933          * <p><blockquote><pre>
1934          *  scheme    =  $2 = http
1935          *              @
1936          *  ^(([^:/?#]+):)?(//([^/?#]*))?([^?#]*)(\?([^#]*))?(#(.*))?
1937          * </pre></blockquote><p>
1938          */
1939         if (at > 0 && at < length && tmp.charAt(at) == ':') {
1940             char[] target = tmp.substring(0, at).toLowerCase().toCharArray();
1941             if (validate(target, scheme)) {
1942                 _scheme = target;
1943             } else {
1944                 throw new URIException("incorrect scheme");
1945             }
1946             from = ++at;
1947         }
1948 
1949         /*
1950          * Parse the authority component.
1951          * <p><blockquote><pre>
1952          *  authority =  $4 = jakarta.apache.org
1953          *                  @@
1954          *  ^(([^:/?#]+):)?(//([^/?#]*))?([^?#]*)(\?([^#]*))?(#(.*))?
1955          * </pre></blockquote><p>
1956          */
1957         // Reset flags
1958         _is_net_path = _is_abs_path = _is_rel_path = _is_hier_part = false;
1959         if (0 <= at && at < length && tmp.charAt(at) == '/') {
1960             // Set flag
1961             _is_hier_part = true;
1962             if (at + 2 < length && tmp.charAt(at + 1) == '/') {
1963                 // the temporary index to start the search from
1964                 int next = indexFirstOf(tmp, "/?#", at + 2);
1965                 if (next == -1) {
1966                     next = (tmp.substring(at + 2).length() == 0) ? at + 2 
1967                         : tmp.length();
1968                 }
1969                 parseAuthority(tmp.substring(at + 2, next), escaped);
1970                 from = at = next;
1971                 // Set flag
1972                 _is_net_path = true;
1973             }
1974             if (from == at) {
1975                 // Set flag
1976                 _is_abs_path = true;
1977             }
1978         }
1979 
1980         /*
1981          * Parse the path component.
1982          * <p><blockquote><pre>
1983          *  path      =  $5 = /ietf/uri/
1984          *                                @@@@@@
1985          *  ^(([^:/?#]+):)?(//([^/?#]*))?([^?#]*)(\?([^#]*))?(#(.*))?
1986          * </pre></blockquote><p>
1987          */
1988         if (from < length) {
1989             // rel_path = rel_segment [ abs_path ]
1990             int next = indexFirstOf(tmp, "?#", from);
1991             if (next == -1) {
1992                 next = tmp.length();
1993             }
1994             if (!_is_abs_path) {
1995                 if (!escaped 
1996                     && prevalidate(tmp.substring(from, next), disallowed_rel_path) 
1997                     || escaped 
1998                     && validate(tmp.substring(from, next).toCharArray(), rel_path)) {
1999                     // Set flag
2000                     _is_rel_path = true;
2001                 } else if (!escaped 
2002                     && prevalidate(tmp.substring(from, next), disallowed_opaque_part) 
2003                     || escaped 
2004                     && validate(tmp.substring(from, next).toCharArray(), opaque_part)) {
2005                     // Set flag
2006                     _is_opaque_part = true;
2007                 } else {
2008                     // the path component may be empty
2009                     _path = null;
2010                 }
2011             }
2012             if (escaped) {
2013                 setRawPath(tmp.substring(from, next).toCharArray());
2014             } else {
2015                 setPath(tmp.substring(from, next));
2016             }
2017             at = next;
2018         }
2019 
2020         // set the charset to do escape encoding
2021         String charset = getProtocolCharset();
2022 
2023         /*
2024          * Parse the query component.
2025          * <p><blockquote><pre>
2026          *  query     =  $7 = <undefined>
2027          *                                        @@@@@@@@@
2028          *  ^(([^:/?#]+):)?(//([^/?#]*))?([^?#]*)(\?([^#]*))?(#(.*))?
2029          * </pre></blockquote><p>
2030          */
2031         if (0 <= at && at + 1 < length && tmp.charAt(at) == '?') {
2032             int next = tmp.indexOf('#', at + 1);
2033             if (next == -1) {
2034                 next = tmp.length();
2035             }
2036             _query = (escaped) ? tmp.substring(at + 1, next).toCharArray() 
2037                 : encode(tmp.substring(at + 1, next), allowed_query, charset);
2038             at = next;
2039         }
2040 
2041         /*
2042          * Parse the fragment component.
2043          * <p><blockquote><pre>
2044          *  fragment  =  $9 = Related
2045          *                                                   @@@@@@@@
2046          *  ^(([^:/?#]+):)?(//([^/?#]*))?([^?#]*)(\?([^#]*))?(#(.*))?
2047          * </pre></blockquote><p>
2048          */
2049         if (0 <= at && at + 1 <= length && tmp.charAt(at) == '#') {
2050             if (at + 1 == length) { // empty fragment
2051                 _fragment = "".toCharArray();
2052             } else {
2053                 _fragment = (escaped) ? tmp.substring(at + 1).toCharArray() 
2054                     : encode(tmp.substring(at + 1), allowed_fragment, charset);
2055             }
2056         }
2057 
2058         // set this URI.
2059         setURI();
2060     }
2061 
2062 
2063     /***
2064      * Get the earlier index that to be searched for the first occurrance in
2065      * one of any of the given string.
2066      *
2067      * @param s the string to be indexed
2068      * @param delims the delimiters used to index
2069      * @return the earlier index if there are delimiters
2070      */
2071     protected int indexFirstOf(String s, String delims) {
2072         return indexFirstOf(s, delims, -1);
2073     }
2074 
2075 
2076     /***
2077      * Get the earlier index that to be searched for the first occurrance in
2078      * one of any of the given string.
2079      *
2080      * @param s the string to be indexed
2081      * @param delims the delimiters used to index
2082      * @param offset the from index
2083      * @return the earlier index if there are delimiters
2084      */
2085     protected int indexFirstOf(String s, String delims, int offset) {
2086         if (s == null || s.length() == 0) {
2087             return -1;
2088         }
2089         if (delims == null || delims.length() == 0) {
2090             return -1;
2091         }
2092         // check boundaries
2093         if (offset < 0) {
2094             offset = 0;
2095         } else if (offset > s.length()) {
2096             return -1;
2097         }
2098         // s is never null
2099         int min = s.length();
2100         char[] delim = delims.toCharArray();
2101         for (int i = 0; i < delim.length; i++) {
2102             int at = s.indexOf(delim[i], offset);
2103             if (at >= 0 && at < min) {
2104                 min = at;
2105             }
2106         }
2107         return (min == s.length()) ? -1 : min;
2108     }
2109 
2110 
2111     /***
2112      * Get the earlier index that to be searched for the first occurrance in
2113      * one of any of the given array.
2114      *
2115      * @param s the character array to be indexed
2116      * @param delim the delimiter used to index
2117      * @return the ealier index if there are a delimiter
2118      */
2119     protected int indexFirstOf(char[] s, char delim) {
2120         return indexFirstOf(s, delim, 0);
2121     }
2122 
2123 
2124     /***
2125      * Get the earlier index that to be searched for the first occurrance in
2126      * one of any of the given array.
2127      *
2128      * @param s the character array to be indexed
2129      * @param delim the delimiter used to index
2130      * @param offset The offset.
2131      * @return the ealier index if there is a delimiter
2132      */
2133     protected int indexFirstOf(char[] s, char delim, int offset) {
2134         if (s == null || s.length == 0) {
2135             return -1;
2136         }
2137         // check boundaries
2138         if (offset < 0) {
2139             offset = 0;
2140         } else if (offset > s.length) {
2141             return -1;
2142         }
2143         for (int i = offset; i < s.length; i++) {
2144             if (s[i] == delim) {
2145                 return i;
2146             }
2147         }
2148         return -1;
2149     }
2150 
2151 
2152     /***
2153      * Parse the authority component.
2154      *
2155      * @param original the original character sequence of authority component
2156      * @param escaped <code>true</code> if <code>original</code> is escaped
2157      * @throws URIException If an error occurs.
2158      */
2159     protected void parseAuthority(String original, boolean escaped)
2160         throws URIException {
2161 
2162         // Reset flags
2163         _is_reg_name = _is_server =
2164         _is_hostname = _is_IPv4address = _is_IPv6reference = false;
2165 
2166         // set the charset to do escape encoding
2167         String charset = getProtocolCharset();
2168 
2169         boolean hasPort = true;
2170         int from = 0;
2171         int next = original.indexOf('@');
2172         if (next != -1) { // neither -1 and 0
2173             // each protocol extented from URI supports the specific userinfo
2174             _userinfo = (escaped) ? original.substring(0, next).toCharArray() 
2175                 : encode(original.substring(0, next), allowed_userinfo,
2176                         charset);
2177             from = next + 1;
2178         }
2179         next = original.indexOf('[', from);
2180         if (next >= from) {
2181             next = original.indexOf(']', from);
2182             if (next == -1) {
2183                 throw new URIException(URIException.PARSING, "IPv6reference");
2184             } else {
2185                 next++;
2186             }
2187             // In IPv6reference, '[', ']' should be excluded
2188             _host = (escaped) ? original.substring(from, next).toCharArray() 
2189                 : encode(original.substring(from, next), allowed_IPv6reference,
2190                         charset);
2191             // Set flag
2192             _is_IPv6reference = true;
2193         } else { // only for !_is_IPv6reference
2194             next = original.indexOf(':', from);
2195             if (next == -1) {
2196                 next = original.length();
2197                 hasPort = false;
2198             }
2199             // REMINDME: it doesn't need the pre-validation
2200             _host = original.substring(from, next).toCharArray();
2201             if (validate(_host, IPv4address)) {
2202                 // Set flag
2203                 _is_IPv4address = true;
2204             } else if (validate(_host, hostname)) {
2205                 // Set flag
2206                 _is_hostname = true;
2207             } else {
2208                 // Set flag
2209                 _is_reg_name = true;
2210             }
2211         }
2212         if (_is_reg_name) {
2213             // Reset flags for a server-based naming authority
2214             _is_server = _is_hostname = _is_IPv4address =
2215             _is_IPv6reference = false;
2216             // set a registry-based naming authority
2217             _authority = (escaped) ? original.toString().toCharArray() 
2218                 : encode(original.toString(), allowed_reg_name, charset);
2219         } else {
2220             if (original.length() - 1 > next && hasPort 
2221                 && original.charAt(next) == ':') { // not empty
2222                 from = next + 1;
2223                 try {
2224                     _port = Integer.parseInt(original.substring(from));
2225                 } catch (NumberFormatException error) {
2226                     throw new URIException(URIException.PARSING,
2227                             "invalid port number");
2228                 }
2229             }
2230             // set a server-based naming authority
2231             StringBuffer buf = new StringBuffer();
2232             if (_userinfo != null) { // has_userinfo
2233                 buf.append(_userinfo);
2234                 buf.append('@');
2235             }
2236             if (_host != null) {
2237                 buf.append(_host);
2238                 if (_port != -1) {
2239                     buf.append(':');
2240                     buf.append(_port);
2241                 }
2242             }
2243             _authority = buf.toString().toCharArray();
2244             // Set flag
2245             _is_server = true;
2246         }
2247     }
2248 
2249 
2250     /***
2251      * Once it's parsed successfully, set this URI.
2252      *
2253      * @see #getRawURI
2254      */
2255     protected void setURI() {
2256         // set _uri
2257         StringBuffer buf = new StringBuffer();
2258         // ^(([^:/?#]+):)?(//([^/?#]*))?([^?#]*)(\?([^#]*))?(#(.*))?
2259         if (_scheme != null) {
2260             buf.append(_scheme);
2261             buf.append(':');
2262         }
2263         if (_is_net_path) {
2264             buf.append("//");
2265             if (_authority != null) { // has_authority
2266                 if (_userinfo != null) { // by default, remove userinfo part
2267                     if (_host != null) {
2268                         buf.append(_host);
2269                         if (_port != -1) {
2270                             buf.append(':');
2271                             buf.append(_port);
2272                         }
2273                     }
2274                 } else {
2275                     buf.append(_authority);
2276                 }
2277             }
2278         }
2279         if (_opaque != null && _is_opaque_part) {
2280             buf.append(_opaque);
2281         } else if (_path != null) {
2282             // _is_hier_part or _is_relativeURI
2283             if (_path.length != 0) {
2284                 buf.append(_path);
2285             }
2286         }
2287         if (_query != null) { // has_query
2288             buf.append('?');
2289             buf.append(_query);
2290         }
2291         // ignore the fragment identifier
2292         _uri = buf.toString().toCharArray();
2293         hash = 0;
2294     }
2295 
2296     // ----------------------------------------------------------- Test methods
2297   
2298 
2299     /***
2300      * Tell whether or not this URI is absolute.
2301      *
2302      * @return true iif this URI is absoluteURI
2303      */
2304     public boolean isAbsoluteURI() {
2305         return (_scheme != null);
2306     }
2307   
2308 
2309     /***
2310      * Tell whether or not this URI is relative.
2311      *
2312      * @return true iif this URI is relativeURI
2313      */
2314     public boolean isRelativeURI() {
2315         return (_scheme == null);
2316     }
2317 
2318 
2319     /***
2320      * Tell whether or not the absoluteURI of this URI is hier_part.
2321      *
2322      * @return true iif the absoluteURI is hier_part
2323      */
2324     public boolean isHierPart() {
2325         return _is_hier_part;
2326     }
2327 
2328 
2329     /***
2330      * Tell whether or not the absoluteURI of this URI is opaque_part.
2331      *
2332      * @return true iif the absoluteURI is opaque_part
2333      */
2334     public boolean isOpaquePart() {
2335         return _is_opaque_part;
2336     }
2337 
2338 
2339     /***
2340      * Tell whether or not the relativeURI or heir_part of this URI is net_path.
2341      * It's the same function as the has_authority() method.
2342      *
2343      * @return true iif the relativeURI or heir_part is net_path
2344      * @see #hasAuthority
2345      */
2346     public boolean isNetPath() {
2347         return _is_net_path || (_authority != null);
2348     }
2349 
2350 
2351     /***
2352      * Tell whether or not the relativeURI or hier_part of this URI is abs_path.
2353      *
2354      * @return true iif the relativeURI or hier_part is abs_path
2355      */
2356     public boolean isAbsPath() {
2357         return _is_abs_path;
2358     }
2359 
2360 
2361     /***
2362      * Tell whether or not the relativeURI of this URI is rel_path.
2363      *
2364      * @return true iif the relativeURI is rel_path
2365      */
2366     public boolean isRelPath() {
2367         return _is_rel_path;
2368     }
2369 
2370 
2371     /***
2372      * Tell whether or not this URI has authority.
2373      * It's the same function as the is_net_path() method.
2374      *
2375      * @return true iif this URI has authority
2376      * @see #isNetPath
2377      */
2378     public boolean hasAuthority() {
2379         return (_authority != null) || _is_net_path;
2380     }
2381 
2382     /***
2383      * Tell whether or not the authority component of this URI is reg_name.
2384      *
2385      * @return true iif the authority component is reg_name
2386      */
2387     public boolean isRegName() {
2388         return _is_reg_name;
2389     }
2390   
2391 
2392     /***
2393      * Tell whether or not the authority component of this URI is server.
2394      *
2395      * @return true iif the authority component is server
2396      */
2397     public boolean isServer() {
2398         return _is_server;
2399     }
2400   
2401 
2402     /***
2403      * Tell whether or not this URI has userinfo.
2404      *
2405      * @return true iif this URI has userinfo
2406      */
2407     public boolean hasUserinfo() {
2408         return (_userinfo != null);
2409     }
2410   
2411 
2412     /***
2413      * Tell whether or not the host part of this URI is hostname.
2414      *
2415      * @return true iif the host part is hostname
2416      */
2417     public boolean isHostname() {
2418         return _is_hostname;
2419     }
2420 
2421 
2422     /***
2423      * Tell whether or not the host part of this URI is IPv4address.
2424      *
2425      * @return true iif the host part is IPv4address
2426      */
2427     public boolean isIPv4address() {
2428         return _is_IPv4address;
2429     }
2430 
2431 
2432     /***
2433      * Tell whether or not the host part of this URI is IPv6reference.
2434      *
2435      * @return true iif the host part is IPv6reference
2436      */
2437     public boolean isIPv6reference() {
2438         return _is_IPv6reference;
2439     }
2440 
2441 
2442     /***
2443      * Tell whether or not this URI has query.
2444      *
2445      * @return true iif this URI has query
2446      */
2447     public boolean hasQuery() {
2448         return (_query != null);
2449     }
2450    
2451 
2452     /***
2453      * Tell whether or not this URI has fragment.
2454      *
2455      * @return true iif this URI has fragment
2456      */
2457     public boolean hasFragment() {
2458         return (_fragment != null);
2459     }
2460    
2461    
2462     // ---------------------------------------------------------------- Charset
2463 
2464 
2465     /***
2466      * Set the default charset of the protocol.
2467      * <p>
2468      * The character set used to store files SHALL remain a local decision and
2469      * MAY depend on the capability of local operating systems. Prior to the
2470      * exchange of URIs they SHOULD be converted into a ISO/IEC 10646 format
2471      * and UTF-8 encoded. This approach, while allowing international exchange
2472      * of URIs, will still allow backward compatibility with older systems
2473      * because the code set positions for ASCII characters are identical to the
2474      * one byte sequence in UTF-8.
2475      * <p>
2476      * An individual URI scheme may require a single charset, define a default
2477      * charset, or provide a way to indicate the charset used.
2478      *
2479      * <p>
2480      * Always all the time, the setter method is always succeeded and throws
2481      * <code>DefaultCharsetChanged</code> exception.
2482      *
2483      * So API programmer must follow the following way:
2484      * <code><pre>
2485      *  import org.apache.util.URI$DefaultCharsetChanged;
2486      *      .
2487      *      .
2488      *      .
2489      *  try {
2490      *      URI.setDefaultProtocolCharset("UTF-8");
2491      *  } catch (DefaultCharsetChanged cc) {
2492      *      // CASE 1: the exception could be ignored, when it is set by user
2493      *      if (cc.getReasonCode() == DefaultCharsetChanged.PROTOCOL_CHARSET) {
2494      *      // CASE 2: let user know the default protocol charset changed
2495      *      } else {
2496      *      // CASE 2: let user know the default document charset changed
2497      *      }
2498      *  }
2499      *  </pre></code>
2500      *
2501      * The API programmer is responsible to set the correct charset.
2502      * And each application should remember its own charset to support.
2503      *
2504      * @param charset the default charset for each protocol
2505      * @throws DefaultCharsetChanged default charset changed
2506      */
2507     public static void setDefaultProtocolCharset(String charset) 
2508         throws DefaultCharsetChanged {
2509             
2510         defaultProtocolCharset = charset;
2511         throw new DefaultCharsetChanged(DefaultCharsetChanged.PROTOCOL_CHARSET,
2512                 "the default protocol charset changed");
2513     }
2514 
2515 
2516     /***
2517      * Get the default charset of the protocol.
2518      * <p>
2519      * An individual URI scheme may require a single charset, define a default
2520      * charset, or provide a way to indicate the charset used.
2521      * <p>
2522      * To work globally either requires support of a number of character sets
2523      * and to be able to convert between them, or the use of a single preferred
2524      * character set.
2525      * For support of global compatibility it is STRONGLY RECOMMENDED that
2526      * clients and servers use UTF-8 encoding when exchanging URIs.
2527      *
2528      * @return the default charset string
2529      */
2530     public static String getDefaultProtocolCharset() {
2531         return defaultProtocolCharset;
2532     }
2533 
2534 
2535     /***
2536      * Get the protocol charset used by this current URI instance.
2537      * It was set by the constructor for this instance. If it was not set by
2538      * contructor, it will return the default protocol charset.
2539      *
2540      * @return the protocol charset string
2541      * @see #getDefaultProtocolCharset
2542      */
2543     public String getProtocolCharset() {
2544         return (protocolCharset != null) 
2545             ? protocolCharset 
2546             : defaultProtocolCharset;
2547     }
2548 
2549 
2550     /***
2551      * Set the default charset of the document.
2552      * <p>
2553      * Notice that it will be possible to contain mixed characters (e.g.
2554      * ftp://host/KoreanNamespace/ChineseResource). To handle the Bi-directional
2555      * display of these character sets, the protocol charset could be simply
2556      * used again. Because it's not yet implemented that the insertion of BIDI
2557      * control characters at different points during composition is extracted.
2558      * <p>
2559      *
2560      * Always all the time, the setter method is always succeeded and throws
2561      * <code>DefaultCharsetChanged</code> exception.
2562      *
2563      * So API programmer must follow the following way:
2564      * <code><pre>
2565      *  import org.apache.util.URI$DefaultCharsetChanged;
2566      *      .
2567      *      .
2568      *      .
2569      *  try {
2570      *      URI.setDefaultDocumentCharset("EUC-KR");
2571      *  } catch (DefaultCharsetChanged cc) {
2572      *      // CASE 1: the exception could be ignored, when it is set by user
2573      *      if (cc.getReasonCode() == DefaultCharsetChanged.DOCUMENT_CHARSET) {
2574      *      // CASE 2: let user know the default document charset changed
2575      *      } else {
2576      *      // CASE 2: let user know the default protocol charset changed
2577      *      }
2578      *  }
2579      *  </pre></code>
2580      *
2581      * The API programmer is responsible to set the correct charset.
2582      * And each application should remember its own charset to support.
2583      *
2584      * @param charset the default charset for the document
2585      * @throws DefaultCharsetChanged default charset changed
2586      */
2587     public static void setDefaultDocumentCharset(String charset) 
2588         throws DefaultCharsetChanged {
2589             
2590         defaultDocumentCharset = charset;
2591         throw new DefaultCharsetChanged(DefaultCharsetChanged.DOCUMENT_CHARSET,
2592                 "the default document charset changed");
2593     }
2594 
2595 
2596     /***
2597      * Get the recommended default charset of the document.
2598      *
2599      * @return the default charset string
2600      */
2601     public static String getDefaultDocumentCharset() {
2602         return defaultDocumentCharset;
2603     }
2604 
2605 
2606     /***
2607      * Get the default charset of the document by locale.
2608      *
2609      * @return the default charset string by locale
2610      */
2611     public static String getDefaultDocumentCharsetByLocale() {
2612         return defaultDocumentCharsetByLocale;
2613     }
2614 
2615 
2616     /***
2617      * Get the default charset of the document by platform.
2618      *
2619      * @return the default charset string by platform
2620      */
2621     public static String getDefaultDocumentCharsetByPlatform() {
2622         return defaultDocumentCharsetByPlatform;
2623     }
2624 
2625     // ------------------------------------------------------------- The scheme
2626 
2627     /***
2628      * Get the scheme.
2629      *
2630      * @return the scheme
2631      */
2632     public char[] getRawScheme() {
2633         return _scheme;
2634     }
2635 
2636 
2637     /***
2638      * Get the scheme.
2639      *
2640      * @return the scheme
2641      * null if undefined scheme
2642      */
2643     public String getScheme() {
2644         return (_scheme == null) ? null : new String(_scheme);
2645     }
2646 
2647     // ---------------------------------------------------------- The authority
2648 
2649     /***
2650      * Set the authority.  It can be one type of server, hostport, hostname,
2651      * IPv4address, IPv6reference and reg_name.
2652      * <p><blockquote><pre>
2653      *   authority     = server | reg_name
2654      * </pre></blockquote><p>
2655      *
2656      * @param escapedAuthority the raw escaped authority
2657      * @throws URIException If {@link 
2658      * #parseAuthority(java.lang.String,boolean)} fails
2659      * @throws NullPointerException null authority
2660      */
2661     public void setRawAuthority(char[] escapedAuthority) 
2662         throws URIException, NullPointerException {
2663             
2664         parseAuthority(new String(escapedAuthority), true);
2665         setURI();
2666     }
2667 
2668 
2669     /***
2670      * Set the authority.  It can be one type of server, hostport, hostname,
2671      * IPv4address, IPv6reference and reg_name.
2672      * Note that there is no setAuthority method by the escape encoding reason.
2673      *
2674      * @param escapedAuthority the escaped authority string
2675      * @throws URIException If {@link 
2676      * #parseAuthority(java.lang.String,boolean)} fails
2677      */
2678     public void setEscapedAuthority(String escapedAuthority)
2679         throws URIException {
2680 
2681         parseAuthority(escapedAuthority, true);
2682         setURI();
2683     }
2684 
2685 
2686     /***
2687      * Get the raw-escaped authority.
2688      *
2689      * @return the raw-escaped authority
2690      */
2691     public char[] getRawAuthority() {
2692         return _authority;
2693     }
2694 
2695 
2696     /***
2697      * Get the escaped authority.
2698      *
2699      * @return the escaped authority
2700      */
2701     public String getEscapedAuthority() {
2702         return (_authority == null) ? null : new String(_authority);
2703     }
2704 
2705 
2706     /***
2707      * Get the authority.
2708      *
2709      * @return the authority
2710      * @throws URIException If {@link #decode} fails
2711      */
2712     public String getAuthority() throws URIException {
2713         return (_authority == null) ? null : decode(_authority,
2714                 getProtocolCharset());
2715     }
2716 
2717     // ----------------------------------------------------------- The userinfo
2718 
2719     /***
2720      * Get the raw-escaped userinfo.
2721      *
2722      * @return the raw-escaped userinfo
2723      * @see #getAuthority
2724      */
2725     public char[] getRawUserinfo() {
2726         return _userinfo;
2727     }
2728 
2729 
2730     /***
2731      * Get the escaped userinfo.
2732      *
2733      * @return the escaped userinfo
2734      * @see #getAuthority
2735      */
2736     public String getEscapedUserinfo() {
2737         return (_userinfo == null) ? null : new String(_userinfo);
2738     }
2739 
2740 
2741     /***
2742      * Get the userinfo.
2743      *
2744      * @return the userinfo
2745      * @throws URIException If {@link #decode} fails
2746      * @see #getAuthority
2747      */
2748     public String getUserinfo() throws URIException {
2749         return (_userinfo == null) ? null : decode(_userinfo,
2750                 getProtocolCharset());
2751     }
2752 
2753     // --------------------------------------------------------------- The host
2754 
2755     /***
2756      * Get the host.
2757      * <p><blockquote><pre>
2758      *   host          = hostname | IPv4address | IPv6reference
2759      * </pre></blockquote><p>
2760      *
2761      * @return the host
2762      * @see #getAuthority
2763      */
2764     public char[] getRawHost() {
2765         return _host;
2766     }
2767 
2768 
2769     /***
2770      * Get the host.
2771      * <p><blockquote><pre>
2772      *   host          = hostname | IPv4address | IPv6reference
2773      * </pre></blockquote><p>
2774      *
2775      * @return the host
2776      * @throws URIException If {@link #decode} fails
2777      * @see #getAuthority
2778      */
2779     public String getHost() throws URIException {
2780         if (_host != null) {
2781             return decode(_host, getProtocolCharset());
2782         } else {
2783             return null;
2784         }
2785     }
2786 
2787     // --------------------------------------------------------------- The port
2788 
2789     /***
2790      * Get the port.  In order to get the specfic default port, the specific
2791      * protocol-supported class extended from the URI class should be used.
2792      * It has the server-based naming authority.
2793      *
2794      * @return the port
2795      * if -1, it has the default port for the scheme or the server-based
2796      * naming authority is not supported in the specific URI.
2797      */
2798     public int getPort() {
2799         return _port;
2800     }
2801 
2802     // --------------------------------------------------------------- The path
2803 
2804     /***
2805      * Set the raw-escaped path.
2806      *
2807      * @param escapedPath the path character sequence
2808      * @throws URIException encoding error or not proper for initial instance
2809      * @see #encode
2810      */
2811     public void setRawPath(char[] escapedPath) throws URIException {
2812         if (escapedPath == null || escapedPath.length == 0) {
2813             _path = _opaque = escapedPath;
2814             setURI();
2815             return;
2816         }
2817         // remove the fragment identifier
2818         escapedPath = removeFragmentIdentifier(escapedPath);
2819         if (_is_net_path || _is_abs_path) {
2820             if (escapedPath[0] != '/') {
2821                 throw new URIException(URIException.PARSING,
2822                         "not absolute path");
2823             }
2824             if (!validate(escapedPath, abs_path)) {
2825                 throw new URIException(URIException.ESCAPING,
2826                         "escaped absolute path not valid");
2827             }
2828             _path = escapedPath;
2829         } else if (_is_rel_path) {
2830             int at = indexFirstOf(escapedPath, '/');
2831             if (at == 0) {
2832                 throw new URIException(URIException.PARSING, "incorrect path");
2833             }
2834             if (at > 0 && !validate(escapedPath, 0, at - 1, rel_segment) 
2835                 && !validate(escapedPath, at, -1, abs_path) 
2836                 || at < 0 && !validate(escapedPath, 0, -1, rel_segment)) {
2837             
2838                 throw new URIException(URIException.ESCAPING,
2839                         "escaped relative path not valid");
2840             }
2841             _path = escapedPath;
2842         } else if (_is_opaque_part) {
2843             if (!uric_no_slash.get(escapedPath[0]) 
2844                 && !validate(escapedPath, 1, -1, uric)) {
2845                 throw new URIException(URIException.ESCAPING,
2846                     "escaped opaque part not valid");
2847             }
2848             _opaque = escapedPath;
2849         } else {
2850             throw new URIException(URIException.PARSING, "incorrect path");
2851         }
2852         setURI();
2853     }
2854 
2855 
2856     /***
2857      * Set the escaped path.
2858      *
2859      * @param escapedPath the escaped path string
2860      * @throws URIException encoding error or not proper for initial instance
2861      * @see #encode
2862      */
2863     public void setEscapedPath(String escapedPath) throws URIException {
2864         if (escapedPath == null) {
2865             _path = _opaque = null;
2866             setURI();
2867             return;
2868         }
2869         setRawPath(escapedPath.toCharArray());
2870     }
2871 
2872 
2873     /***
2874      * Set the path.
2875      *
2876      * @param path the path string
2877      * @throws URIException set incorrectly or fragment only
2878      * @see #encode
2879      */
2880     public void setPath(String path) throws URIException {
2881 
2882         if (path == null || path.length() == 0) {
2883             _path = _opaque = (path == null) ? null : path.toCharArray();
2884             setURI();
2885             return;
2886         }
2887         // set the charset to do escape encoding
2888         String charset = getProtocolCharset();
2889 
2890         if (_is_net_path || _is_abs_path) {
2891             _path = encode(path, allowed_abs_path, charset);
2892         } else if (_is_rel_path) {
2893             StringBuffer buff = new StringBuffer(path.length());
2894             int at = path.indexOf('/');
2895             if (at == 0) { // never 0
2896                 throw new URIException(URIException.PARSING,
2897                         "incorrect relative path");
2898             }
2899             if (at > 0) {
2900                 buff.append(encode(path.substring(0, at), allowed_rel_path,
2901                             charset));
2902                 buff.append(encode(path.substring(at), allowed_abs_path,
2903                             charset));
2904             } else {
2905                 buff.append(encode(path, allowed_rel_path, charset));
2906             }
2907             _path = buff.toString().toCharArray();
2908         } else if (_is_opaque_part) {
2909             StringBuffer buf = new StringBuffer();
2910             buf.insert(0, encode(path.substring(0, 1), uric_no_slash, charset));
2911             buf.insert(1, encode(path.substring(1), uric, charset));
2912             _opaque = buf.toString().toCharArray();
2913         } else {
2914             throw new URIException(URIException.PARSING, "incorrect path");
2915         }
2916         setURI();
2917     }
2918 
2919 
2920     /***
2921      * Resolve the base and relative path.
2922      *
2923      * @param basePath a character array of the basePath
2924      * @param relPath a character array of the relPath
2925      * @return the resolved path
2926      * @throws URIException no more higher path level to be resolved
2927      */
2928     protected char[] resolvePath(char[] basePath, char[] relPath)
2929         throws URIException {
2930 
2931         // REMINDME: paths are never null
2932         String base = (basePath == null) ? "" : new String(basePath);
2933         int at = base.lastIndexOf('/');
2934         if (at != -1) {
2935             basePath = base.substring(0, at + 1).toCharArray();
2936         }
2937         // _path could be empty
2938         if (relPath == null || relPath.length == 0) {
2939             return normalize(basePath);
2940         } else if (relPath[0] == '/') {
2941             return normalize(relPath);
2942         } else {
2943             StringBuffer buff = new StringBuffer(base.length() 
2944                 + relPath.length);
2945             buff.append((at != -1) ? base.substring(0, at + 1) : "/");
2946             buff.append(relPath);
2947             return normalize(buff.toString().toCharArray());
2948         }
2949     }
2950 
2951 
2952     /***
2953      * Get the raw-escaped current hierarchy level in the given path.
2954      * If the last namespace is a collection, the slash mark ('/') should be
2955      * ended with at the last character of the path string.
2956      *
2957      * @param path the path
2958      * @return the current hierarchy level
2959      * @throws URIException no hierarchy level
2960      */
2961     protected char[] getRawCurrentHierPath(char[] path) throws URIException {
2962 
2963         if (_is_opaque_part) {
2964             throw new URIException(URIException.PARSING, "no hierarchy level");
2965         }
2966         if (path == null) {
2967             throw new URIException(URIException.PARSING, "empty path");
2968         }
2969         String buff = new String(path);
2970         int first = buff.indexOf('/');
2971         int last = buff.lastIndexOf('/');
2972         if (last == 0) {
2973             return rootPath;
2974         } else if (first != last && last != -1) {
2975             return buff.substring(0, last).toCharArray();
2976         }
2977         // FIXME: it could be a document on the server side
2978         return path;
2979     }
2980 
2981 
2982     /***
2983      * Get the raw-escaped current hierarchy level.
2984      *
2985      * @return the raw-escaped current hierarchy level
2986      * @throws URIException If {@link #getRawCurrentHierPath(char[])} fails.
2987      */
2988     public char[] getRawCurrentHierPath() throws URIException {
2989         return (_path == null) ? null : getRawCurrentHierPath(_path);
2990     }
2991  
2992 
2993     /***
2994      * Get the escaped current hierarchy level.
2995      *
2996      * @return the escaped current hierarchy level
2997      * @throws URIException If {@link #getRawCurrentHierPath(char[])} fails.
2998      */
2999     public String getEscapedCurrentHierPath() throws URIException {
3000         char[] path = getRawCurrentHierPath();
3001         return (path == null) ? null : new String(path);
3002     }
3003  
3004 
3005     /***
3006      * Get the current hierarchy level.
3007      *
3008      * @return the current hierarchy level
3009      * @throws URIException If {@link #getRawCurrentHierPath(char[])} fails.
3010      * @see #decode
3011      */
3012     public String getCurrentHierPath() throws URIException {
3013         char[] path = getRawCurrentHierPath();
3014         return (path == null) ? null : decode(path, getProtocolCharset());
3015     }
3016 
3017 
3018     /***
3019      * Get the level above the this hierarchy level.
3020      *
3021      * @return the raw above hierarchy level
3022      * @throws URIException If {@link #getRawCurrentHierPath(char[])} fails.
3023      */
3024     public char[] getRawAboveHierPath() throws URIException {
3025         char[] path = getRawCurrentHierPath();
3026         return (path == null) ? null : getRawCurrentHierPath(path);
3027     }
3028 
3029 
3030     /***
3031      * Get the level above the this hierarchy level.
3032      *
3033      * @return the raw above hierarchy level
3034      * @throws URIException If {@link #getRawCurrentHierPath(char[])} fails.
3035      */
3036     public String getEscapedAboveHierPath() throws URIException {
3037         char[] path = getRawAboveHierPath();
3038         return (path == null) ? null : new String(path);
3039     }
3040 
3041 
3042     /***
3043      * Get the level above the this hierarchy level.
3044      *
3045      * @return the above hierarchy level
3046      * @throws URIException If {@link #getRawCurrentHierPath(char[])} fails.
3047      * @see #decode
3048      */
3049     public String getAboveHierPath() throws URIException {
3050         char[] path = getRawAboveHierPath();
3051         return (path == null) ? null : decode(path, getProtocolCharset());
3052     }
3053 
3054 
3055     /***
3056      * Get the raw-escaped path.
3057      * <p><blockquote><pre>
3058      *   path          = [ abs_path | opaque_part ]
3059      * </pre></blockquote><p>
3060      *
3061      * @return the raw-escaped path
3062      */
3063     public char[] getRawPath() {
3064         return _is_opaque_part ? _opaque : _path;
3065     }
3066 
3067 
3068     /***
3069      * Get the escaped path.
3070      * <p><blockquote><pre>
3071      *   path          = [ abs_path | opaque_part ]
3072      *   abs_path      = "/"  path_segments 
3073      *   opaque_part   = uric_no_slash *uric
3074      * </pre></blockquote><p>
3075      *
3076      * @return the escaped path string
3077      */
3078     public String getEscapedPath() {
3079         char[] path = getRawPath();
3080         return (path == null) ? null : new String(path);
3081     }
3082 
3083 
3084     /***
3085      * Get the path.
3086      * <p><blockquote><pre>
3087      *   path          = [ abs_path | opaque_part ]
3088      * </pre></blockquote><p>
3089      * @return the path string
3090      * @throws URIException If {@link #decode} fails.
3091      * @see #decode
3092      */
3093     public String getPath() throws URIException { 
3094         char[] path =  getRawPath();
3095         return (path == null) ? null : decode(path, getProtocolCharset());
3096     }
3097 
3098 
3099     /***
3100      * Get the raw-escaped basename of the path.
3101      *
3102      * @return the raw-escaped basename
3103      */
3104     public char[] getRawName() {
3105         if (_path == null) { 
3106             return null;
3107         }
3108 
3109         int at = 0;
3110         for (int i = _path.length - 1; i >= 0; i--) {
3111             if (_path[i] == '/') {
3112                 at = i + 1;
3113                 break;
3114             }
3115         }
3116         int len = _path.length - at;
3117         char[] basename =  new char[len];
3118         System.arraycopy(_path, at, basename, 0, len);
3119         return basename;
3120     }
3121 
3122 
3123     /***
3124      * Get the escaped basename of the path.
3125      *
3126      * @return the escaped basename string
3127      */
3128     public String getEscapedName() {
3129         char[] basename = getRawName();
3130         return (basename == null) ? null : new String(basename);
3131     }
3132 
3133 
3134     /***
3135      * Get the basename of the path.
3136      *
3137      * @return the basename string
3138      * @throws URIException incomplete trailing escape pattern or unsupported
3139      * character encoding
3140      * @see #decode
3141      */
3142     public String getName() throws URIException {
3143         char[] basename = getRawName();
3144         return (basename == null) ? null : decode(getRawName(),
3145                 getProtocolCharset());
3146     }
3147 
3148     // ----------------------------------------------------- The path and query 
3149 
3150     /***
3151      * Get the raw-escaped path and query.
3152      *
3153      * @return the raw-escaped path and query
3154      */
3155     public char[] getRawPathQuery() {
3156 
3157         if (_path == null && _query == null) {
3158             return null;
3159         }
3160         StringBuffer buff = new StringBuffer();
3161         if (_path != null) {
3162             buff.append(_path);
3163         }
3164         if (_query != null) {
3165             buff.append('?');
3166             buff.append(_query);
3167         }
3168         return buff.toString().toCharArray();
3169     }
3170 
3171 
3172     /***
3173      * Get the escaped query.
3174      *
3175      * @return the escaped path and query string
3176      */
3177     public String getEscapedPathQuery() {
3178         char[] rawPathQuery = getRawPathQuery();
3179         return (rawPathQuery == null) ? null : new String(rawPathQuery);
3180     }
3181 
3182 
3183     /***
3184      * Get the path and query.
3185      *
3186      * @return the path and query string.
3187      * @throws URIException incomplete trailing escape pattern or unsupported
3188      * character encoding
3189      * @see #decode
3190      */
3191     public String getPathQuery() throws URIException {
3192         char[] rawPathQuery = getRawPathQuery();
3193         return (rawPathQuery == null) ? null : decode(rawPathQuery,
3194                 getProtocolCharset());
3195     }
3196 
3197     // -------------------------------------------------------------- The query 
3198 
3199     /***
3200      * Set the raw-escaped query.
3201      *
3202      * @param escapedQuery the raw-escaped query
3203      * @throws URIException escaped query not valid
3204      */
3205     public void setRawQuery(char[] escapedQuery) throws URIException {
3206         if (escapedQuery == null || escapedQuery.length == 0) {
3207             _query = escapedQuery;
3208             setURI();
3209             return;
3210         }
3211         // remove the fragment identifier
3212         escapedQuery = removeFragmentIdentifier(escapedQuery);
3213         if (!validate(escapedQuery, query)) {
3214             throw new URIException(URIException.ESCAPING,
3215                     "escaped query not valid");
3216         }
3217         _query = escapedQuery;
3218         setURI();
3219     }
3220 
3221 
3222     /***
3223      * Set the escaped query string.
3224      *
3225      * @param escapedQuery the escaped query string
3226      * @throws URIException escaped query not valid
3227      */
3228     public void setEscapedQuery(String escapedQuery) throws URIException {
3229         if (escapedQuery == null) {
3230             _query = null;
3231             setURI();
3232             return;
3233         }
3234         setRawQuery(escapedQuery.toCharArray());
3235     }
3236 
3237 
3238     /***
3239      * Set the query.
3240      * <p>
3241      * When a query string is not misunderstood the reserved special characters
3242      * ("&amp;", "=", "+", ",", and "$") within a query component, it is
3243      * recommended to use in encoding the whole query with this method.
3244      * <p>
3245      * The additional APIs for the special purpose using by the reserved
3246      * special characters used in each protocol are implemented in each protocol
3247      * classes inherited from <code>URI</code>.  So refer to the same-named APIs
3248      * implemented in each specific protocol instance.
3249      *
3250      * @param query the query string.
3251      * @throws URIException incomplete trailing escape pattern or unsupported
3252      * character encoding
3253      * @see #encode
3254      */
3255     public void setQuery(String query) throws URIException {
3256         if (query == null || query.length() == 0) {
3257             _query = (query == null) ? null : query.toCharArray();
3258             setURI();
3259             return;
3260         }
3261         setRawQuery(encode(query, allowed_query, getProtocolCharset()));
3262     }
3263 
3264 
3265     /***
3266      * Get the raw-escaped query.
3267      *
3268      * @return the raw-escaped query
3269      */
3270     public char[] getRawQuery() {
3271         return _query;
3272     }
3273 
3274 
3275     /***
3276      * Get the escaped query.
3277      *
3278      * @return the escaped query string
3279      */
3280     public String getEscapedQuery() {
3281         return (_query == null) ? null : new String(_query);
3282     }
3283 
3284 
3285     /***
3286      * Get the query.
3287      *
3288      * @return the query string.
3289      * @throws URIException incomplete trailing escape pattern or unsupported
3290      * character encoding
3291      * @see #decode
3292      */
3293     public String getQuery() throws URIException {
3294         return (_query == null) ? null : decode(_query, getProtocolCharset());
3295     }
3296 
3297     // ----------------------------------------------------------- The fragment 
3298 
3299     /***
3300      * Set the raw-escaped fragment.
3301      *
3302      * @param escapedFragment the raw-escaped fragment
3303      * @throws URIException escaped fragment not valid
3304      */
3305     public void setRawFragment(char[] escapedFragment) throws URIException {
3306         if (escapedFragment == null || escapedFragment.length == 0) {
3307             _fragment = escapedFragment;
3308             hash = 0;
3309             return;
3310         }
3311         if (!validate(escapedFragment, fragment)) {
3312             throw new URIException(URIException.ESCAPING,
3313                     "escaped fragment not valid");
3314         }
3315         _fragment = escapedFragment;
3316         hash = 0;
3317     }
3318 
3319 
3320     /***
3321      * Set the escaped fragment string.
3322      *
3323      * @param escapedFragment the escaped fragment string
3324      * @throws URIException escaped fragment not valid
3325      */
3326     public void setEscapedFragment(String escapedFragment) throws URIException {
3327         if (escapedFragment == null) {
3328             _fragment = null;
3329             hash = 0;
3330             return;
3331         }
3332         setRawFragment(escapedFragment.toCharArray());
3333     }
3334 
3335 
3336     /***
3337      * Set the fragment.
3338      *
3339      * @param fragment the fragment string.
3340      * @throws URIException If an error occurs.
3341      */
3342     public void setFragment(String fragment) throws URIException {
3343         if (fragment == null || fragment.length() == 0) {
3344             _fragment = (fragment == null) ? null : fragment.toCharArray();
3345             hash = 0;
3346             return;
3347         }
3348         _fragment = encode(fragment, allowed_fragment, getProtocolCharset());
3349         hash = 0;
3350     }
3351 
3352 
3353     /***
3354      * Get the raw-escaped fragment.
3355      * <p>
3356      * The optional fragment identifier is not part of a URI, but is often used
3357      * in conjunction with a URI.
3358      * <p>
3359      * The format and interpretation of fragment identifiers is dependent on
3360      * the media type [RFC2046] of the retrieval result.
3361      * <p>
3362      * A fragment identifier is only meaningful when a URI reference is
3363      * intended for retrieval and the result of that retrieval is a document
3364      * for which the identified fragment is consistently defined.
3365      *
3366      * @return the raw-escaped fragment
3367      */
3368     public char[] getRawFragment() {
3369         return _fragment;
3370     }
3371 
3372 
3373     /***
3374      * Get the escaped fragment.
3375      *
3376      * @return the escaped fragment string
3377      */
3378     public String getEscapedFragment() {
3379         return (_fragment == null) ? null : new String(_fragment);
3380     }
3381 
3382 
3383     /***
3384      * Get the fragment.
3385      *
3386      * @return the fragment string
3387      * @throws URIException incomplete trailing escape pattern or unsupported
3388      * character encoding
3389      * @see #decode
3390      */
3391     public String getFragment() throws URIException {
3392         return (_fragment == null) ? null : decode(_fragment,
3393                 getProtocolCharset());
3394     }
3395 
3396     // ------------------------------------------------------------- Utilities 
3397 
3398     /***
3399      * Remove the fragment identifier of the given component.
3400      *
3401      * @param component the component that a fragment may be included
3402      * @return the component that the fragment identifier is removed
3403      */
3404     protected char[] removeFragmentIdentifier(char[] component) {
3405         if (component == null) { 
3406             return null;
3407         }
3408         int lastIndex = new String(component).indexOf('#');
3409         if (lastIndex != -1) {
3410             component = new String(component).substring(0,
3411                     lastIndex).toCharArray();
3412         }
3413         return component;
3414     }
3415 
3416 
3417     /***
3418      * Normalize the given hier path part.
3419      * 
3420      * <p>Algorithm taken from URI reference parser at 
3421      * http://www.apache.org/~fielding/uri/rev-2002/issues.html.
3422      *
3423      * @param path the path to normalize
3424      * @return the normalized path
3425      * @throws URIException no more higher path level to be normalized
3426      */
3427     protected char[] normalize(char[] path) throws URIException {
3428 
3429         if (path == null) { 
3430             return null;
3431         }
3432 
3433         String normalized = new String(path);
3434 
3435         // If the buffer begins with "./" or "../", the "." or ".." is removed.
3436         if (normalized.startsWith("./")) {
3437             normalized = normalized.substring(1);
3438         } else if (normalized.startsWith("../")) {
3439             normalized = normalized.substring(2);
3440         } else if (normalized.startsWith("..")) {
3441             normalized = normalized.substring(2);
3442         }
3443 
3444         // All occurrences of "/./" in the buffer are replaced with "/"
3445         int index = -1;
3446         while ((index = normalized.indexOf("/./")) != -1) {
3447             normalized = normalized.substring(0, index) + normalized.substring(index + 2);
3448         }
3449 
3450         // If the buffer ends with "/.", the "." is removed.
3451         if (normalized.endsWith("/.")) {
3452             normalized = normalized.substring(0, normalized.length() - 1);
3453         }
3454 
3455         int startIndex = 0;
3456 
3457         // All occurrences of "/<segment>/../" in the buffer, where ".."
3458         // and <segment> are complete path segments, are iteratively replaced
3459         // with "/" in order from left to right until no matching pattern remains.
3460         // If the buffer ends with "/<segment>/..", that is also replaced
3461         // with "/".  Note that <segment> may be empty.
3462         while ((index = normalized.indexOf("/../", startIndex)) != -1) {
3463             int slashIndex = normalized.lastIndexOf('/', index - 1);
3464             if (slashIndex >= 0) {
3465                 normalized = normalized.substring(0, slashIndex) + normalized.substring(index + 3);
3466             } else {
3467                 startIndex = index + 3;   
3468             }
3469         }
3470         if (normalized.endsWith("/..")) {
3471             int slashIndex = normalized.lastIndexOf('/', normalized.length() - 4);
3472             if (slashIndex >= 0) {
3473                 normalized = normalized.substring(0, slashIndex + 1);
3474             }
3475         }
3476 
3477         // All prefixes of "<segment>/../" in the buffer, where ".."
3478         // and <segment> are complete path segments, are iteratively replaced
3479         // with "/" in order from left to right until no matching pattern remains.
3480         // If the buffer ends with "<segment>/..", that is also replaced
3481         // with "/".  Note that <segment> may be empty.
3482         while ((index = normalized.indexOf("/../")) != -1) {
3483             int slashIndex = normalized.lastIndexOf('/', index - 1);
3484             if (slashIndex >= 0) {
3485                 break;
3486             } else {
3487                 normalized = normalized.substring(index + 3);
3488             }
3489         }
3490         if (normalized.endsWith("/..")) {
3491             int slashIndex = normalized.lastIndexOf('/', normalized.length() - 4);
3492             if (slashIndex < 0) {
3493                 normalized = "/";
3494             }
3495         }
3496 
3497         return normalized.toCharArray();
3498     }
3499 
3500 
3501     /***
3502      * Normalizes the path part of this URI.  Normalization is only meant to be performed on 
3503      * URIs with an absolute path.  Calling this method on a relative path URI will have no
3504      * effect.
3505      *
3506      * @throws URIException no more higher path level to be normalized
3507      * 
3508      * @see #isAbsPath()
3509      */
3510     public void normalize() throws URIException {
3511         if (isAbsPath()) {
3512             _path = normalize(_path);
3513             setURI();
3514         }
3515     }
3516 
3517 
3518     /***
3519      * Test if the first array is equal to the second array.
3520      *
3521      * @param first the first character array
3522      * @param second the second character array
3523      * @return true if they're equal
3524      */
3525     protected boolean equals(char[] first, char[] second) {
3526 
3527         if (first == null && second == null) {
3528             return true;
3529         }
3530         if (first == null || second == null) {
3531             return false;
3532         }
3533         if (first.length != second.length) {
3534             return false;
3535         }
3536         for (int i = 0; i < first.length; i++) {
3537             if (first[i] != second[i]) {
3538                 return false;
3539             }
3540         }
3541         return true;
3542     }
3543 
3544 
3545     /***
3546      * Test an object if this URI is equal to another.
3547      *
3548      * @param obj an object to compare
3549      * @return true if two URI objects are equal
3550      */
3551     public boolean equals(Object obj) {
3552 
3553         // normalize and test each components
3554         if (obj == this) {
3555             return true;
3556         }
3557         if (!(obj instanceof URI)) {
3558             return false;
3559         }
3560         URI another = (URI) obj;
3561         // scheme
3562         if (!equals(_scheme, another._scheme)) {
3563             return false;
3564         }
3565         // is_opaque_part or is_hier_part?  and opaque
3566         if (!equals(_opaque, another._opaque)) {
3567             return false;
3568         }
3569         // is_hier_part
3570         // has_authority
3571         if (!equals(_authority, another._authority)) {
3572             return false;
3573         }
3574         // path
3575         if (!equals(_path, another._path)) {
3576             return false;
3577         }
3578         // has_query
3579         if (!equals(_query, another._query)) {
3580             return false;
3581         }
3582         // has_fragment?  should be careful of the only fragment case.
3583         if (!equals(_fragment, another._fragment)) {
3584             return false;
3585         }
3586         return true;
3587     }
3588 
3589     // ---------------------------------------------------------- Serialization
3590 
3591     /***
3592      * Write the content of this URI.
3593      *
3594      * @param oos the object-output stream
3595      * @throws IOException If an IO problem occurs.
3596      */
3597     protected void writeObject(ObjectOutputStream oos)
3598         throws IOException {
3599 
3600         oos.defaultWriteObject();
3601     }
3602 
3603 
3604     /***
3605      * Read a URI.
3606      *
3607      * @param ois the object-input stream
3608      * @throws ClassNotFoundException If one of the classes specified in the
3609      * input stream cannot be found.
3610      * @throws IOException If an IO problem occurs.
3611      */
3612     protected void readObject(ObjectInputStream ois)
3613         throws ClassNotFoundException, IOException {
3614 
3615         ois.defaultReadObject();
3616     }
3617 
3618     // -------------------------------------------------------------- Hash code
3619 
3620     /***
3621      * Return a hash code for this URI.
3622      *
3623      * @return a has code value for this URI
3624      */
3625     public int hashCode() {
3626         if (hash == 0) {
3627             char[] c = _uri;
3628             if (c != null) {
3629                 for (int i = 0, len = c.length; i < len; i++) {
3630                     hash = 31 * hash + c[i];
3631                 }
3632             }
3633             c = _fragment;
3634             if (c != null) {
3635                 for (int i = 0, len = c.length; i < len; i++) {
3636                     hash = 31 * hash + c[i];
3637                 }
3638             }
3639         }
3640         return hash;
3641     }
3642 
3643     // ------------------------------------------------------------- Comparison 
3644 
3645     /***
3646      * Compare this URI to another object. 
3647      *
3648      * @param obj the object to be compared.
3649      * @return 0, if it's same,
3650      * -1, if failed, first being compared with in the authority component
3651      * @throws ClassCastException not URI argument
3652      */
3653     public int compareTo(Object obj) throws ClassCastException {
3654 
3655         URI another = (URI) obj;
3656         if (!equals(_authority, another.getRawAuthority())) { 
3657             return -1;
3658         }
3659         return toString().compareTo(another.toString());
3660     }
3661 
3662     // ------------------------------------------------------------------ Clone
3663 
3664     /***
3665      * Create and return a copy of this object, the URI-reference containing
3666      * the userinfo component.  Notice that the whole URI-reference including
3667      * the userinfo component counld not be gotten as a <code>String</code>.
3668      * <p>
3669      * To copy the identical <code>URI</code> object including the userinfo
3670      * component, it should be used.
3671      *
3672      * @return a clone of this instance
3673      */
3674     public synchronized Object clone() {
3675 
3676         URI instance = new URI();
3677 
3678         instance._uri = _uri;
3679         instance._scheme = _scheme;
3680         instance._opaque = _opaque;
3681         instance._authority = _authority;
3682         instance._userinfo = _userinfo;
3683         instance._host = _host;
3684         instance._port = _port;
3685         instance._path = _path;
3686         instance._query = _query;
3687         instance._fragment = _fragment;
3688         // the charset to do escape encoding for this instance
3689         instance.protocolCharset = protocolCharset;
3690         // flags
3691         instance._is_hier_part = _is_hier_part;
3692         instance._is_opaque_part = _is_opaque_part;
3693         instance._is_net_path = _is_net_path;
3694         instance._is_abs_path = _is_abs_path;
3695         instance._is_rel_path = _is_rel_path;
3696         instance._is_reg_name = _is_reg_name;
3697         instance._is_server = _is_server;
3698         instance._is_hostname = _is_hostname;
3699         instance._is_IPv4address = _is_IPv4address;
3700         instance._is_IPv6reference = _is_IPv6reference;
3701 
3702         return instance;
3703     }
3704 
3705     // ------------------------------------------------------------ Get the URI
3706 
3707     /***
3708      * It can be gotten the URI character sequence. It's raw-escaped.
3709      * For the purpose of the protocol to be transported, it will be useful.
3710      * <p>
3711      * It is clearly unwise to use a URL that contains a password which is
3712      * intended to be secret. In particular, the use of a password within
3713      * the 'userinfo' component of a URL is strongly disrecommended except
3714      * in those rare cases where the 'password' parameter is intended to be
3715      * public.
3716      * <p>
3717      * When you want to get each part of the userinfo, you need to use the
3718      * specific methods in the specific URL. It depends on the specific URL.
3719      *
3720      * @return the URI character sequence
3721      */
3722     public char[] getRawURI() {
3723         return _uri;
3724     }
3725 
3726 
3727     /***
3728      * It can be gotten the URI character sequence. It's escaped.
3729      * For the purpose of the protocol to be transported, it will be useful.
3730      *
3731      * @return the escaped URI string
3732      */
3733     public String getEscapedURI() {
3734         return (_uri == null) ? null : new String(_uri);
3735     }
3736     
3737 
3738     /***
3739      * It can be gotten the URI character sequence.
3740      *
3741      * @return the original URI string
3742      * @throws URIException incomplete trailing escape pattern or unsupported
3743      * character encoding
3744      * @see #decode
3745      */
3746     public String getURI() throws URIException {
3747         return (_uri == null) ? null : decode(_uri, getProtocolCharset());
3748     }
3749 
3750 
3751     /***
3752      * Get the URI reference character sequence.
3753      *
3754      * @return the URI reference character sequence
3755      */
3756     public char[] getRawURIReference() {
3757         if (_fragment == null) { 
3758             return _uri;
3759         }
3760         if (_uri == null) { 
3761             return _fragment;
3762         }
3763         // if _uri != null &&  _fragment != null
3764         String uriReference = new String(_uri) + "#" + new String(_fragment);
3765         return uriReference.toCharArray();
3766     }
3767 
3768 
3769     /***
3770      * Get the escaped URI reference string.
3771      *
3772      * @return the escaped URI reference string
3773      */
3774     public String getEscapedURIReference() {
3775         char[] uriReference = getRawURIReference();
3776         return (uriReference == null) ? null : new String(uriReference);
3777     }
3778 
3779 
3780     /***
3781      * Get the original URI reference string.
3782      *
3783      * @return the original URI reference string
3784      * @throws URIException If {@link #decode} fails.
3785      */
3786     public String getURIReference() throws URIException {
3787         char[] uriReference = getRawURIReference();
3788         return (uriReference == null) ? null : decode(uriReference,
3789                 getProtocolCharset());
3790     }
3791 
3792 
3793     /***
3794      * Get the escaped URI string.
3795      * <p>
3796      * On the document, the URI-reference form is only used without the userinfo
3797      * component like http://jakarta.apache.org/ by the security reason.
3798      * But the URI-reference form with the userinfo component could be parsed.
3799      * <p>
3800      * In other words, this URI and any its subclasses must not expose the
3801      * URI-reference expression with the userinfo component like
3802      * http://user:password@hostport/restricted_zone.<br>
3803      * It means that the API client programmer should extract each user and
3804      * password to access manually.  Probably it will be supported in the each
3805      * subclass, however, not a whole URI-reference expression.
3806      *
3807      * @return the escaped URI string
3808      * @see #clone()
3809      */
3810     public String toString() {
3811         return getEscapedURI();
3812     }
3813 
3814 
3815     // ------------------------------------------------------------ Inner class
3816 
3817     /*** 
3818      * The charset-changed normal operation to represent to be required to
3819      * alert to user the fact the default charset is changed.
3820      */
3821     public static class DefaultCharsetChanged extends RuntimeException {
3822 
3823         // ------------------------------------------------------- constructors
3824 
3825         /***
3826          * The constructor with a reason string and its code arguments.
3827          *
3828          * @param reasonCode the reason code
3829          * @param reason the reason
3830          */
3831         public DefaultCharsetChanged(int reasonCode, String reason) {
3832             super(reason);
3833             this.reason = reason;
3834             this.reasonCode = reasonCode;
3835         }
3836 
3837         // ---------------------------------------------------------- constants
3838 
3839         /*** No specified reason code. */
3840         public static final int UNKNOWN = 0;
3841 
3842         /*** Protocol charset changed. */
3843         public static final int PROTOCOL_CHARSET = 1;
3844 
3845         /*** Document charset changed. */
3846         public static final int DOCUMENT_CHARSET = 2;
3847 
3848         // ------------------------------------------------- instance variables
3849 
3850         /*** The reason code. */
3851         private int reasonCode;
3852 
3853         /*** The reason message. */
3854         private String reason;
3855 
3856         // ------------------------------------------------------------ methods
3857 
3858         /***
3859          * Get the reason code.
3860          *
3861          * @return the reason code
3862          */
3863         public int getReasonCode() {
3864             return reasonCode;
3865         }
3866 
3867         /***
3868          * Get the reason message.
3869          *
3870          * @return the reason message
3871          */
3872         public String getReason() {
3873             return reason;
3874         }
3875 
3876     }
3877 
3878 
3879     /*** 
3880      * A mapping to determine the (somewhat arbitrarily) preferred charset for a
3881      * given locale.  Supports all locales recognized in JDK 1.1.
3882      * <p>
3883      * The distribution of this class is Servlets.com.    It was originally
3884      * written by Jason Hunter [jhunter at acm.org] and used by with permission.
3885      */
3886     public static class LocaleToCharsetMap {
3887 
3888         /*** A mapping of language code to charset */
3889         private static final Hashtable LOCALE_TO_CHARSET_MAP;
3890         static {
3891             LOCALE_TO_CHARSET_MAP = new Hashtable();
3892             LOCALE_TO_CHARSET_MAP.put("ar", "ISO-8859-6");
3893             LOCALE_TO_CHARSET_MAP.put("be", "ISO-8859-5");
3894             LOCALE_TO_CHARSET_MAP.put("bg", "ISO-8859-5");
3895             LOCALE_TO_CHARSET_MAP.put("ca", "ISO-8859-1");
3896             LOCALE_TO_CHARSET_MAP.put("cs", "ISO-8859-2");
3897             LOCALE_TO_CHARSET_MAP.put("da", "ISO-8859-1");
3898             LOCALE_TO_CHARSET_MAP.put("de", "ISO-8859-1");
3899             LOCALE_TO_CHARSET_MAP.put("el", "ISO-8859-7");
3900             LOCALE_TO_CHARSET_MAP.put("en", "ISO-8859-1");
3901             LOCALE_TO_CHARSET_MAP.put("es", "ISO-8859-1");
3902             LOCALE_TO_CHARSET_MAP.put("et", "ISO-8859-1");
3903             LOCALE_TO_CHARSET_MAP.put("fi", "ISO-8859-1");
3904             LOCALE_TO_CHARSET_MAP.put("fr", "ISO-8859-1");
3905             LOCALE_TO_CHARSET_MAP.put("hr", "ISO-8859-2");
3906             LOCALE_TO_CHARSET_MAP.put("hu", "ISO-8859-2");
3907             LOCALE_TO_CHARSET_MAP.put("is", "ISO-8859-1");
3908             LOCALE_TO_CHARSET_MAP.put("it", "ISO-8859-1");
3909             LOCALE_TO_CHARSET_MAP.put("iw", "ISO-8859-8");
3910             LOCALE_TO_CHARSET_MAP.put("ja", "Shift_JIS");
3911             LOCALE_TO_CHARSET_MAP.put("ko", "EUC-KR");
3912             LOCALE_TO_CHARSET_MAP.put("lt", "ISO-8859-2");
3913             LOCALE_TO_CHARSET_MAP.put("lv", "ISO-8859-2");
3914             LOCALE_TO_CHARSET_MAP.put("mk", "ISO-8859-5");
3915             LOCALE_TO_CHARSET_MAP.put("nl", "ISO-8859-1");
3916             LOCALE_TO_CHARSET_MAP.put("no", "ISO-8859-1");
3917             LOCALE_TO_CHARSET_MAP.put("pl", "ISO-8859-2");
3918             LOCALE_TO_CHARSET_MAP.put("pt", "ISO-8859-1");
3919             LOCALE_TO_CHARSET_MAP.put("ro", "ISO-8859-2");
3920             LOCALE_TO_CHARSET_MAP.put("ru", "ISO-8859-5");
3921             LOCALE_TO_CHARSET_MAP.put("sh", "ISO-8859-5");
3922             LOCALE_TO_CHARSET_MAP.put("sk", "ISO-8859-2");
3923             LOCALE_TO_CHARSET_MAP.put("sl", "ISO-8859-2");
3924             LOCALE_TO_CHARSET_MAP.put("sq", "ISO-8859-2");
3925             LOCALE_TO_CHARSET_MAP.put("sr", "ISO-8859-5");
3926             LOCALE_TO_CHARSET_MAP.put("sv", "ISO-8859-1");
3927             LOCALE_TO_CHARSET_MAP.put("tr", "ISO-8859-9");
3928             LOCALE_TO_CHARSET_MAP.put("uk", "ISO-8859-5");
3929             LOCALE_TO_CHARSET_MAP.put("zh", "GB2312");
3930             LOCALE_TO_CHARSET_MAP.put("zh_TW", "Big5");
3931         }
3932        
3933         /***
3934          * Get the preferred charset for the given locale.
3935          *
3936          * @param locale the locale
3937          * @return the preferred charset or null if the locale is not
3938          * recognized.
3939          */
3940         public static String getCharset(Locale locale) {
3941             // try for an full name match (may include country)
3942             String charset =
3943                 (String) LOCALE_TO_CHARSET_MAP.get(locale.toString());
3944             if (charset != null) { 
3945                 return charset;
3946             }
3947            
3948             // if a full name didn't match, try just the language
3949             charset = (String) LOCALE_TO_CHARSET_MAP.get(locale.getLanguage());
3950             return charset;  // may be null
3951         }
3952 
3953     }
3954 
3955 }
3956