001/* URLStreamHandler.java -- Abstract superclass for all protocol handlers
002   Copyright (C) 1998, 1999, 2002, 2003, 2004 Free Software Foundation, Inc.
003
004This file is part of GNU Classpath.
005
006GNU Classpath is free software; you can redistribute it and/or modify
007it under the terms of the GNU General Public License as published by
008the Free Software Foundation; either version 2, or (at your option)
009any later version.
010
011GNU Classpath is distributed in the hope that it will be useful, but
012WITHOUT ANY WARRANTY; without even the implied warranty of
013MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
014General Public License for more details.
015
016You should have received a copy of the GNU General Public License
017along with GNU Classpath; see the file COPYING.  If not, write to the
018Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
01902110-1301 USA.
020
021Linking this library statically or dynamically with other modules is
022making a combined work based on this library.  Thus, the terms and
023conditions of the GNU General Public License cover the whole
024combination.
025
026As a special exception, the copyright holders of this library give you
027permission to link this library with independent modules to produce an
028executable, regardless of the license terms of these independent
029modules, and to copy and distribute the resulting executable under
030terms of your choice, provided that you also meet, for each linked
031independent module, the terms and conditions of the license of that
032module.  An independent module is a module which is not derived from
033or based on this library.  If you modify this library, you may extend
034this exception to your version of the library, but you are not
035obligated to do so.  If you do not wish to do so, delete this
036exception statement from your version. */
037
038package java.net;
039
040import gnu.java.lang.CPStringBuilder;
041
042import java.io.File;
043import java.io.IOException;
044
045
046/*
047 * Written using on-line Java Platform 1.2 API Specification, as well
048 * as "The Java Class Libraries", 2nd edition (Addison-Wesley, 1998).
049 * Status:  Believed complete and correct.
050 */
051
052/**
053 * This class is the superclass of all URL protocol handlers.  The URL
054 * class loads the appropriate protocol handler to establish a connection
055 * to a (possibly) remote service (eg, "http", "ftp") and to do protocol
056 * specific parsing of URL's.  Refer to the URL class documentation for
057 * details on how that class locates and loads protocol handlers.
058 * <p>
059 * A protocol handler implementation should override the openConnection()
060 * method, and optionally override the parseURL() and toExternalForm()
061 * methods if necessary. (The default implementations will parse/write all
062 * URL's in the same form as http URL's).  A protocol  specific subclass
063 * of URLConnection will most likely need to be created as well.
064 * <p>
065 * Note that the instance methods in this class are called as if they
066 * were static methods.  That is, a URL object to act on is passed with
067 * every call rather than the caller assuming the URL is stored in an
068 * instance variable of the "this" object.
069 * <p>
070 * The methods in this class are protected and accessible only to subclasses.
071 * URLStreamConnection objects are intended for use by the URL class only,
072 * not by other classes (unless those classes are implementing protocols).
073 *
074 * @author Aaron M. Renn (arenn@urbanophile.com)
075 * @author Warren Levy (warrenl@cygnus.com)
076 *
077 * @see URL
078 */
079public abstract class URLStreamHandler
080{
081  /**
082   * Creates a URLStreamHander
083   */
084  public URLStreamHandler()
085  {
086  }
087
088  /**
089   * Returns a URLConnection for the passed in URL.  Note that this should
090   * not actually create the connection to the (possibly) remote host, but
091   * rather simply return a URLConnection object.  The connect() method of
092   * URL connection is used to establish the actual connection, possibly
093   * after the caller sets up various connection options.
094   *
095   * @param url The URL to get a connection object for
096   *
097   * @return A URLConnection object for the given URL
098   *
099   * @exception IOException If an error occurs
100   */
101  protected abstract URLConnection openConnection(URL url)
102    throws IOException;
103
104  /**
105   * This method parses the string passed in as a URL and set's the
106   * instance data fields in the URL object passed in to the various values
107   * parsed out of the string.  The start parameter is the position to start
108   * scanning the string.  This is usually the position after the ":" which
109   * terminates the protocol name.  The end parameter is the position to
110   * stop scanning.  This will be either the end of the String, or the
111   * position of the "#" character, which separates the "file" portion of
112   * the URL from the "anchor" portion.
113   * <p>
114   * This method assumes URL's are formatted like http protocol URL's, so
115   * subclasses that implement protocols with URL's the follow a different
116   * syntax should override this method.  The lone exception is that if
117   * the protocol name set in the URL is "file", this method will accept
118   * an empty hostname (i.e., "file:///"), which is legal for that protocol
119   *
120   * @param url The URL object in which to store the results
121   * @param spec The String-ized URL to parse
122   * @param start The position in the string to start scanning from
123   * @param end The position in the string to stop scanning
124   */
125  protected void parseURL(URL url, String spec, int start, int end)
126  {
127    String host = url.getHost();
128    int port = url.getPort();
129    String file = url.getFile();
130    String ref = url.getRef();
131    String userInfo = url.getUserInfo();
132    String authority = url.getAuthority();
133    String query = null;
134    
135    // On Windows we need to change \ to / for file URLs
136    char separator = File.separatorChar;
137    if (url.getProtocol().equals("file") && separator != '/')
138      {
139        file = file.replace(separator, '/');
140        spec = spec.replace(separator, '/');
141      }
142
143    if (spec.regionMatches(start, "//", 0, 2))
144      {
145        String genuineHost;
146        int hostEnd;
147        int colon;
148        int at_host;
149
150        start += 2;
151        int slash = spec.indexOf('/', start);
152        if (slash >= 0)
153          hostEnd = slash;
154        else
155          hostEnd = end;
156
157        authority = host = spec.substring(start, hostEnd);
158
159        // We first need a genuine host name (with userinfo).
160        // So we check for '@': if it's present check the port in the
161        // section after '@' in the other case check it in the full string.
162        // P.S.: We don't care having '@' at the beginning of the string.
163        if ((at_host = host.indexOf('@')) >= 0)
164          {
165            genuineHost = host.substring(at_host);
166            userInfo = host.substring(0, at_host);
167          }
168        else
169          genuineHost = host;
170
171        // Look for optional port number.  It is valid for the non-port
172        // part of the host name to be null (e.g. a URL "http://:80").
173        // TBD: JDK 1.2 in this case sets host to null rather than "";
174        // this is undocumented and likely an unintended side effect in 1.2
175        // so we'll be simple here and stick with "". Note that
176        // "http://" or "http:///" produce a "" host in JDK 1.2.
177        if ((colon = genuineHost.indexOf(':')) >= 0)
178          {
179            try
180              {
181                port = Integer.parseInt(genuineHost.substring(colon + 1));
182              }
183            catch (NumberFormatException e)
184              {
185                // Ignore invalid port values; port is already set to u's
186                // port.
187              }
188
189            // Now we must cut the port number in the original string.
190            if (at_host >= 0)
191              host = host.substring(0, at_host + colon);
192            else
193              host = host.substring(0, colon);
194          }
195        file = null;
196        start = hostEnd;
197      }
198    else if (host == null)
199      host = "";
200
201    if (file == null || file.length() == 0
202        || (start < end && spec.charAt(start) == '/'))
203      {
204        // No file context available; just spec for file.
205        // Or this is an absolute path name; ignore any file context.
206        file = spec.substring(start, end);
207        ref = null;
208      }
209    else if (start < end)
210      {
211        // Context is available, but only override it if there is a new file.
212        int lastSlash = file.lastIndexOf('/');
213        if (lastSlash < 0)
214          file = spec.substring(start, end);
215        else
216          file = (file.substring(0, lastSlash)
217                  + '/' + spec.substring(start, end));
218
219        // For URLs constructed relative to a context, we
220        // need to canonicalise the file path.
221        file = canonicalizeFilename(file);
222
223        ref = null;
224      }
225
226    if (ref == null)
227      {
228        // Normally there should be no '#' in the file part,
229        // but we are nice.
230        int hash = file.indexOf('#');
231        if (hash != -1)
232          {
233            ref = file.substring(hash + 1, file.length());
234            file = file.substring(0, hash);
235          }
236      }
237
238    // We care about the query tag only if there is no reference at all.
239    if (ref == null)
240      {
241          int queryTag = file.indexOf('?');
242          if (queryTag != -1)
243            {
244              query = file.substring(queryTag + 1);
245              file = file.substring(0, queryTag);
246            }
247      }
248
249    // XXX - Classpath used to call PlatformHelper.toCanonicalForm() on
250    // the file part. It seems like overhead, but supposedly there is some
251    // benefit in windows based systems (it also lowercased the string).
252    setURL(url, url.getProtocol(), host, port, authority, userInfo, file, query, ref);
253  }
254
255  /*
256   * Canonicalize a filename.
257   */
258  private static String canonicalizeFilename(String file)
259  {
260    // XXX - GNU Classpath has an implementation that might be more appropriate
261    // for Windows based systems (gnu.java.io.PlatformHelper.toCanonicalForm)
262    int index;
263
264    // Replace "/./" with "/".  This probably isn't very efficient in
265    // the general case, but it's probably not bad most of the time.
266    while ((index = file.indexOf("/./")) >= 0)
267      file = file.substring(0, index) + file.substring(index + 2);
268
269    // Process "/../" correctly.  This probably isn't very efficient in
270    // the general case, but it's probably not bad most of the time.
271    while ((index = file.indexOf("/../")) >= 0)
272      {
273        // Strip of the previous directory - if it exists.
274        int previous = file.lastIndexOf('/', index - 1);
275        if (previous >= 0)
276          file = file.substring(0, previous) + file.substring(index + 3);
277        else
278          break;
279      }
280    return file;
281  }
282
283  /**
284   * Compares two URLs, excluding the fragment component
285   *
286   * @param url1 The first url
287   * @param url2 The second url to compare with the first
288   *
289   * @return True if both URLs point to the same file, false otherwise.
290   *
291   * @specnote Now protected
292   */
293  protected boolean sameFile(URL url1, URL url2)
294  {
295    if (url1 == url2)
296      return true;
297
298    // This comparison is very conservative.  It assumes that any
299    // field can be null.
300    if (url1 == null || url2 == null)
301      return false;
302    int p1 = url1.getPort();
303    if (p1 == -1)
304      p1 = url1.ph.getDefaultPort();
305    int p2 = url2.getPort();
306    if (p2 == -1)
307      p2 = url2.ph.getDefaultPort();
308    if (p1 != p2)
309      return false;
310    String s1;
311    String s2;
312    s1 = url1.getProtocol();
313    s2 = url2.getProtocol();
314    if (s1 != s2 && (s1 == null || ! s1.equals(s2)))
315      return false;
316    s1 = url1.getHost();
317    s2 = url2.getHost();
318    if (s1 != s2 && (s1 == null || ! s1.equals(s2)))
319      return false;
320    s1 = canonicalizeFilename(url1.getFile());
321    s2 = canonicalizeFilename(url2.getFile());
322    if (s1 != s2 && (s1 == null || ! s1.equals(s2)))
323      return false;
324    return true;
325  }
326
327  /**
328   * This methods sets the instance variables representing the various fields
329   * of the URL to the values passed in.
330   *
331   * @param u The URL to modify
332   * @param protocol The protocol to set
333   * @param host The host name to et
334   * @param port The port number to set
335   * @param file The filename to set
336   * @param ref The reference
337   *
338   * @exception SecurityException If the protocol handler of the URL is
339   * different from this one
340   *
341   * @deprecated 1.2 Please use
342   * #setURL(URL,String,String,int,String,String,String,String);
343   */
344  protected void setURL(URL u, String protocol, String host, int port,
345                        String file, String ref)
346  {
347    u.set(protocol, host, port, file, ref);
348  }
349
350  /**
351   * Sets the fields of the URL argument to the indicated values
352   *
353   * @param u The URL to modify
354   * @param protocol The protocol to set
355   * @param host The host name to set
356   * @param port The port number to set
357   * @param authority The authority to set
358   * @param userInfo The user information to set
359   * @param path The path/filename to set
360   * @param query The query part to set
361   * @param ref The reference
362   *
363   * @exception SecurityException If the protocol handler of the URL is
364   * different from this one
365   */
366  protected void setURL(URL u, String protocol, String host, int port,
367                        String authority, String userInfo, String path,
368                        String query, String ref)
369  {
370    u.set(protocol, host, port, authority, userInfo, path, query, ref);
371  }
372
373  /**
374   * This is the default method for computing whether two URLs are
375   * equivalent.  This method assumes that neither URL is null.
376   *
377   * @param url1 An URL object
378   * @param url2 Another URL object
379   *
380   * @return True if both given URLs are equal, false otherwise.
381   */
382  protected boolean equals(URL url1, URL url2)
383  {
384    // This comparison is very conservative.  It assumes that any
385    // field can be null.
386    int port1 = url1.getPort();
387    if (port1 == -1)
388      port1 = url1.getDefaultPort();
389    int port2 = url2.getPort();
390    if (port2 == -1)
391      port2 = url2.getDefaultPort();
392    // Note that we don't bother checking the 'authority'; it is
393    // redundant.
394    return (port1 == port2
395           && ((url1.getProtocol() == null && url2.getProtocol() == null)
396           || (url1.getProtocol() != null
397           && url1.getProtocol().equals(url2.getProtocol())))
398           && ((url1.getUserInfo() == null && url2.getUserInfo() == null)
399           || (url1.getUserInfo() != null
400           && url1.getUserInfo().equals(url2.getUserInfo())))
401           && ((url1.getHost() == null && url2.getHost() == null)
402           || (url1.getHost() != null && url1.getHost().equals(url2.getHost())))
403           && ((url1.getPath() == null && url2.getPath() == null)
404           || (url1.getPath() != null && url1.getPath().equals(url2.getPath())))
405           && ((url1.getQuery() == null && url2.getQuery() == null)
406           || (url1.getQuery() != null
407           && url1.getQuery().equals(url2.getQuery())))
408           && ((url1.getRef() == null && url2.getRef() == null)
409           || (url1.getRef() != null && url1.getRef().equals(url2.getRef()))));
410  }
411
412  /**
413   * Compares the host components of two URLs.
414   *
415   * @param url1 The first URL.
416   * @param url2 The second URL.
417   *
418   * @return True if both URLs contain the same host.
419   */
420  protected boolean hostsEqual(URL url1, URL url2)
421  {
422    InetAddress addr1 = getHostAddress(url1);
423    InetAddress addr2 = getHostAddress(url2);
424
425    if (addr1 != null && addr2 != null)
426      return addr1.equals(addr2);
427
428    String host1 = url1.getHost();
429    String host2 = url2.getHost();
430
431    if (host1 != null && host2 != null)
432      return host1.equalsIgnoreCase(host2);
433
434    return host1 == null && host2 == null;
435  }
436
437  /**
438   * Get the IP address of our host. An empty host field or a DNS failure will
439   * result in a null return.
440   *
441   * @param url The URL to return the host address for.
442   *
443   * @return The address of the hostname in url.
444   */
445  protected InetAddress getHostAddress(URL url)
446  {
447    String hostname = url.getHost();
448
449    if (hostname.equals(""))
450      return null;
451
452    try
453      {
454        return InetAddress.getByName(hostname);
455      }
456    catch (UnknownHostException e)
457      {
458        return null;
459      }
460  }
461
462  /**
463   * Returns the default port for a URL parsed by this handler. This method is
464   * meant to be overidden by handlers with default port numbers.
465   *
466   * @return The default port number.
467   */
468  protected int getDefaultPort()
469  {
470    return -1;
471  }
472
473  /**
474   * Provides the default hash calculation. May be overidden by handlers for
475   * other protocols that have different requirements for hashCode calculation.
476   *
477   * @param url The URL to calc the hashcode for.
478   *
479   * @return The hashcode for the given URL.
480   */
481  protected int hashCode(URL url)
482  {
483    return url.getProtocol().hashCode()
484           + ((url.getHost() == null) ? 0 : url.getHost().hashCode())
485           + url.getFile().hashCode() + url.getPort();
486  }
487
488  /**
489   * This method converts a URL object into a String.  This method creates
490   * Strings in the mold of http URL's, so protocol handlers which use URL's
491   * that have a different syntax should override this method
492   *
493   * @param url The URL object to convert
494   *
495   * @return A string representation of the url
496   */
497  protected String toExternalForm(URL url)
498  {
499    String protocol;
500    String file;
501    String ref;
502    String authority;
503
504    protocol = url.getProtocol();
505    authority = url.getAuthority();
506    if (authority == null)
507      authority = "";
508    
509    file = url.getFile();
510    ref = url.getRef();
511
512    // Guess a reasonable size for the string buffer so we have to resize
513    // at most once.
514    int size = protocol.length() + authority.length() + file.length() + 24;
515    CPStringBuilder sb = new CPStringBuilder(size);
516
517    if (protocol.length() > 0)
518      {
519        sb.append(protocol);
520        sb.append(":");
521      }
522    
523    // If we have superfluous leading slashes (that means, at least 2)
524    // we always add the authority component ("//" + host) to
525    // avoid ambiguity. Otherwise we would generate an URL like
526    // proto://home/foo
527    // where we meant: 
528    // host: <empty> - file: //home/foo
529    // but URL spec says it is:
530    // host: home - file: /foo
531    if (authority.length() != 0 || file.startsWith("//") )
532      sb.append("//").append(authority).append(file);
533    else
534      sb.append(file);
535
536    if (ref != null)
537      sb.append('#').append(ref);
538
539    return sb.toString();
540  }
541}