001/* Pattern.java -- Compiled regular expression ready to be applied.
002   Copyright (C) 2002, 2004, 2005, 2007 Free Software Foundation, Inc.
003
004This file is part of GNU Classpath.
005
006GNU Classpath is free software; you can redistribute it and/or modify
007it under the terms of the GNU General Public License as published by
008the Free Software Foundation; either version 2, or (at your option)
009any later version.
010
011GNU Classpath is distributed in the hope that it will be useful, but
012WITHOUT ANY WARRANTY; without even the implied warranty of
013MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
014General Public License for more details.
015
016You should have received a copy of the GNU General Public License
017along with GNU Classpath; see the file COPYING.  If not, write to the
018Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
01902110-1301 USA.
020
021Linking this library statically or dynamically with other modules is
022making a combined work based on this library.  Thus, the terms and
023conditions of the GNU General Public License cover the whole
024combination.
025
026As a special exception, the copyright holders of this library give you
027permission to link this library with independent modules to produce an
028executable, regardless of the license terms of these independent
029modules, and to copy and distribute the resulting executable under
030terms of your choice, provided that you also meet, for each linked
031independent module, the terms and conditions of the license of that
032module.  An independent module is a module which is not derived from
033or based on this library.  If you modify this library, you may extend
034this exception to your version of the library, but you are not
035obligated to do so.  If you do not wish to do so, delete this
036exception statement from your version. */
037
038package java.util.regex;
039
040import gnu.java.util.regex.RE;
041import gnu.java.util.regex.REException;
042import gnu.java.util.regex.RESyntax;
043
044import java.io.Serializable;
045import java.util.ArrayList;
046
047
048/**
049 * Compiled regular expression ready to be applied. 
050 *
051 * @since 1.4
052 */
053public final class Pattern implements Serializable
054{
055  private static final long serialVersionUID = 5073258162644648461L;
056  
057  public static final int CANON_EQ = 128;
058  public static final int CASE_INSENSITIVE = 2;
059  public static final int COMMENTS = 4;
060  public static final int DOTALL = 32;
061  public static final int MULTILINE = 8;
062  public static final int UNICODE_CASE = 64;
063  public static final int UNIX_LINES = 1;
064  
065  private final String regex;
066  private final int flags;
067
068  private final RE re;
069
070  private Pattern (String regex, int flags)
071    throws PatternSyntaxException
072  {
073    this.regex = regex;
074    this.flags = flags;
075
076    RESyntax syntax = RESyntax.RE_SYNTAX_JAVA_1_4;
077    int gnuFlags = 0;
078    gnuFlags |= RE.REG_ICASE_USASCII;
079    if ((flags & CASE_INSENSITIVE) != 0)
080      gnuFlags |= RE.REG_ICASE;
081    if ((flags & MULTILINE) != 0)
082      {
083        gnuFlags |= RE.REG_MULTILINE;
084        syntax = new RESyntax(syntax);
085        syntax.setLineSeparator(null);
086      }
087    if ((flags & DOTALL) != 0)
088      gnuFlags |= RE.REG_DOT_NEWLINE;
089    if ((flags & UNICODE_CASE) != 0)
090      gnuFlags &= ~RE.REG_ICASE_USASCII;
091    // not yet supported:
092    // if ((flags & CANON_EQ) != 0) gnuFlags =
093
094    if ((flags & UNIX_LINES) != 0)
095      {
096        // Use a syntax set with \n for linefeeds?
097        syntax = new RESyntax(syntax);
098        syntax.setLineSeparator("\n");
099      }
100
101    if ((flags & COMMENTS) != 0)
102      {
103        gnuFlags |= RE.REG_X_COMMENTS;
104      }
105
106    try
107      {
108        this.re = new RE(regex, gnuFlags, syntax);
109      }
110    catch (REException e)
111      {
112        PatternSyntaxException pse;
113        pse = new PatternSyntaxException(e.getMessage(),
114                                         regex, e.getPosition());
115        pse.initCause(e);
116        throw pse;
117      }
118  }
119 
120  // package private accessor method
121  RE getRE()
122  {
123    return re;
124  }
125
126  /**
127   * @param regex The regular expression
128   *
129   * @exception PatternSyntaxException If the expression's syntax is invalid
130   */
131  public static Pattern compile (String regex)
132    throws PatternSyntaxException
133  {
134    return compile(regex, 0);
135  }
136  
137  /**
138   * @param regex The regular expression
139   * @param flags The match flags, a bit mask
140   *
141   * @exception PatternSyntaxException If the expression's syntax is invalid
142   * @exception IllegalArgumentException If bit values other than those
143   * corresponding to the defined match flags are set in flags
144   */
145  public static Pattern compile (String regex, int flags)
146    throws PatternSyntaxException
147  {
148    // FIXME: check which flags are really accepted
149    if ((flags & ~0xEF) != 0)
150      throw new IllegalArgumentException ();
151    
152    return new Pattern (regex, flags); 
153  }
154  
155  public int flags ()
156  {
157    return this.flags;
158  }
159  
160  /**
161   * @param regex The regular expression
162   * @param input The character sequence to be matched
163   *
164   * @exception PatternSyntaxException If the expression's syntax is invalid
165   */
166  public static boolean matches (String regex, CharSequence input) 
167  {
168    return compile(regex).matcher(input).matches();
169  }
170  
171  /**
172   * @param input The character sequence to be matched
173   */
174  public Matcher matcher (CharSequence input)
175  {
176    return new Matcher(this, input);
177  }
178  
179  /**
180   * @param input The character sequence to be matched
181   */
182  public String[] split (CharSequence input)
183  {
184    return split(input, 0);
185  }
186  
187  /**
188   * @param input The character sequence to be matched
189   * @param limit The result threshold
190   */
191  public String[] split (CharSequence input, int limit)
192  {
193    Matcher matcher = new Matcher(this, input);
194    ArrayList<String> list = new ArrayList<String>();
195    int empties = 0;
196    int count = 0;
197    int start = 0;
198    int end;
199    boolean matched = matcher.find();
200
201    while (matched && (limit <= 0 || count < limit - 1))
202      {
203        ++count;
204        end = matcher.start();
205        if (start == end)
206          empties++;
207        else
208          {
209            while (empties > 0)
210              {
211                list.add("");
212                empties--;
213              }
214
215            String text = input.subSequence(start, end).toString();
216            list.add(text);
217          }
218        start = matcher.end();
219        matched = matcher.find();
220      }
221
222    // We matched nothing.
223    if (!matched && count == 0)
224      return new String[] { input.toString() };
225    
226    // Is the last token empty?
227    boolean emptyLast = (start == input.length());
228
229    // Can/Must we add empties or an extra last token at the end?
230    if (list.size() < limit || limit < 0 || (limit == 0 && !emptyLast))
231      {
232        if (limit > list.size())
233          {
234            int max = limit - list.size();
235            empties = (empties > max) ? max : empties;
236          }
237        while (empties > 0)
238          {
239            list.add("");
240            empties--;
241          }
242      }
243
244    // last token at end
245    if (limit != 0 || (limit == 0 && !emptyLast))
246      {
247        String t = input.subSequence(start, input.length()).toString();
248        if ("".equals(t) && limit == 0)
249          { /* Don't add. */ }
250        else
251          list.add(t);
252      }
253
254    return list.toArray(new String[list.size()]);
255  }
256  
257  public String pattern ()
258  {
259    return regex;
260  }
261
262  /**
263   * Return the regular expression used to construct this object.
264   * @specnote Prior to JDK 1.5 this method had a different behavior
265   * @since 1.5
266   */
267  public String toString()
268  {
269    return regex;
270  }
271}