001/* Bidi.java -- Bidirectional Algorithm implementation
002   Copyright (C) 2005, 2006  Free Software Foundation, Inc.
003
004This file is part of GNU Classpath.
005
006GNU Classpath is free software; you can redistribute it and/or modify
007it under the terms of the GNU General Public License as published by
008the Free Software Foundation; either version 2, or (at your option)
009any later version.
010 
011GNU Classpath is distributed in the hope that it will be useful, but
012WITHOUT ANY WARRANTY; without even the implied warranty of
013MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
014General Public License for more details.
015
016You should have received a copy of the GNU General Public License
017along with GNU Classpath; see the file COPYING.  If not, write to the
018Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
01902110-1301 USA.
020
021Linking this library statically or dynamically with other modules is
022making a combined work based on this library.  Thus, the terms and
023conditions of the GNU General Public License cover the whole
024combination.
025
026As a special exception, the copyright holders of this library give you
027permission to link this library with independent modules to produce an
028executable, regardless of the license terms of these independent
029modules, and to copy and distribute the resulting executable under
030terms of your choice, provided that you also meet, for each linked
031independent module, the terms and conditions of the license of that
032module.  An independent module is a module which is not derived from
033or based on this library.  If you modify this library, you may extend
034this exception to your version of the library, but you are not
035obligated to do so.  If you do not wish to do so, delete this
036exception statement from your version. */
037
038
039package java.text;
040
041import java.awt.font.NumericShaper;
042import java.awt.font.TextAttribute;
043import java.util.ArrayList;
044
045
046/**
047 * Bidirectional Algorithm implementation.
048 *
049 * The full algorithm is
050 * <a href="http://www.unicode.org/unicode/reports/tr9/">Unicode Standard
051 * Annex #9: The Bidirectional Algorithm</a>.
052 *
053 * @since 1.4
054 */
055public final class Bidi
056{
057  /**
058   * This indicates that a strongly directional character in the text should
059   * set the initial direction, but if no such character is found, then the
060   * initial direction will be left-to-right.
061   */
062  public static final int DIRECTION_DEFAULT_LEFT_TO_RIGHT = -2;
063
064  /**
065   * This indicates that a strongly directional character in the text should
066   * set the initial direction, but if no such character is found, then the
067   * initial direction will be right-to-left.
068   */
069  public static final int DIRECTION_DEFAULT_RIGHT_TO_LEFT = -1;
070
071  /**
072   * This indicates that the initial direction should be left-to-right.
073   */
074  public static final int DIRECTION_LEFT_TO_RIGHT = 0;
075
076  /**
077   * This indicates that the initial direction should be right-to-left.
078   */
079  public static final int DIRECTION_RIGHT_TO_LEFT = 1;
080
081  // Flags used when computing the result.
082  private static final int LTOR = 1 << DIRECTION_LEFT_TO_RIGHT;
083  private static final int RTOL = 1 << DIRECTION_RIGHT_TO_LEFT;
084
085  // The text we are examining, and the starting offset.
086  // If we had a better way to handle createLineBidi, we wouldn't
087  // need this at all -- which for the String case would be an
088  // efficiency win.
089  private char[] text;
090  private int textOffset;
091  // The embeddings corresponding to the text, and the starting offset.
092  private byte[] embeddings;
093  private int embeddingOffset;
094  // The length of the text (and embeddings) to use.
095  private int length;
096  // The flags.
097  private int flags;
098
099  // All instance fields following this point are initialized
100  // during analysis.  Fields before this must be set by the constructor.
101
102  // The initial embedding level.
103  private int baseEmbedding;
104  // The type of each character in the text.
105  private byte[] types;
106  // The levels we compute.
107  private byte[] levels;
108
109  // A list of indices where a formatting code was found.  These
110  // are indicies into the original text -- not into the text after
111  // the codes have been removed.
112  private ArrayList formatterIndices;
113
114  // Indices of the starts of runs in the text. 
115  private int[] runs;
116
117  // A convenience field where we keep track of what kinds of runs
118  // we've seen.
119  private int resultFlags;
120
121  /**
122   * Create a new Bidi object given an attributed character iterator.
123   * This constructor will examine various attributes of the text:
124   * <ul>
125   * <li> {@link TextAttribute#RUN_DIRECTION} is used to determine the
126   * paragraph's base embedding level.  This constructor will recognize
127   * either {@link TextAttribute#RUN_DIRECTION_LTR} or
128   * {@link TextAttribute#RUN_DIRECTION_RTL}.  If neither is given,
129   * {@link #DIRECTION_DEFAULT_LEFT_TO_RIGHT} is assumed.
130   * </li>
131   * 
132   * <li> If {@link TextAttribute#NUMERIC_SHAPING} is seen, then numeric
133   * shaping will be done before the Bidi algorithm is run.
134   * </li>
135   * 
136   * <li> If {@link TextAttribute#BIDI_EMBEDDING} is seen on a given
137   * character, then the value of this attribute will be used as an
138   * embedding level override.
139   * </li>
140   * </ul>
141   * @param iter the attributed character iterator to use
142   */
143  public Bidi(AttributedCharacterIterator iter)
144  {
145    // If set, this attribute should be set on all characters.
146    // We don't check this (should we?) but we do assume that we
147    // can simply examine the first character.
148    Object val = iter.getAttribute(TextAttribute.RUN_DIRECTION);
149    if (val == TextAttribute.RUN_DIRECTION_LTR)
150      this.flags = DIRECTION_LEFT_TO_RIGHT;
151    else if (val == TextAttribute.RUN_DIRECTION_RTL)
152      this.flags = DIRECTION_RIGHT_TO_LEFT;
153    else
154      this.flags = DIRECTION_DEFAULT_LEFT_TO_RIGHT;
155
156    // Likewise this attribute should be specified on the whole text.
157    // We read it here and then, if it is set, we apply the numeric shaper
158    // to the text before processing it.
159    NumericShaper shaper = null;
160    val = iter.getAttribute(TextAttribute.NUMERIC_SHAPING);
161    if (val instanceof NumericShaper)
162      shaper = (NumericShaper) val;
163
164    char[] text = new char[iter.getEndIndex() - iter.getBeginIndex()];
165    this.embeddings = new byte[this.text.length];
166    this.embeddingOffset = 0;
167    this.length = text.length;
168    for (int i = 0; i < this.text.length; ++i)
169      {
170        this.text[i] = iter.current();
171
172        val = iter.getAttribute(TextAttribute.BIDI_EMBEDDING);
173        if (val instanceof Integer)
174          {
175            int ival = ((Integer) val).intValue();
176            byte bval;
177            if (ival < -62 || ival > 62)
178              bval = 0;
179            else
180              bval = (byte) ival;
181            this.embeddings[i] = bval;
182          }
183      }
184
185    // Invoke the numeric shaper, if specified.
186    if (shaper != null)
187      shaper.shape(this.text, 0, this.length);
188
189    runBidi();
190  }
191
192  /**
193   * Create a new Bidi object with the indicated text and, possibly, explicit
194   * embedding settings.
195   * 
196   * If the embeddings array is null, it is ignored.  Otherwise it is taken to
197   * be explicit embedding settings corresponding to the text.  Positive values
198   * from 1 to 61 are embedding levels, and negative values from -1 to -61 are
199   * embedding overrides.  (FIXME: not at all clear what this really means.)
200   * 
201   * @param text the text to use
202   * @param offset the offset of the first character of the text
203   * @param embeddings the explicit embeddings, or null if there are none
204   * @param embedOffset the offset of the first embedding value to use
205   * @param length the length of both the text and the embeddings
206   * @param flags a flag indicating the base embedding direction
207   */
208  public Bidi(char[] text, int offset, byte[] embeddings, int embedOffset,
209              int length, int flags)
210  {
211    if (flags != DIRECTION_DEFAULT_LEFT_TO_RIGHT
212        && flags != DIRECTION_DEFAULT_RIGHT_TO_LEFT
213        && flags != DIRECTION_LEFT_TO_RIGHT
214        && flags != DIRECTION_RIGHT_TO_LEFT)
215      throw new IllegalArgumentException("unrecognized 'flags' argument: "
216                                         + flags);
217    this.text = text;
218    this.textOffset = offset;
219    this.embeddings = embeddings;
220    this.embeddingOffset = embedOffset;
221    this.length = length;
222    this.flags = flags;
223
224    runBidi();
225  }
226
227  /**
228   * Create a new Bidi object using the contents of the given String
229   * as the text.
230   * @param text the text to use
231   * @param flags a flag indicating the base embedding direction
232   */
233  public Bidi(String text, int flags)
234  {
235    if (flags != DIRECTION_DEFAULT_LEFT_TO_RIGHT
236        && flags != DIRECTION_DEFAULT_RIGHT_TO_LEFT
237        && flags != DIRECTION_LEFT_TO_RIGHT
238        && flags != DIRECTION_RIGHT_TO_LEFT)
239      throw new IllegalArgumentException("unrecognized 'flags' argument: "
240                                         + flags);
241
242    // This is inefficient, but it isn't clear whether it matters.
243    // If it does we can change our implementation a bit to allow either
244    // a String or a char[].
245    this.text = text.toCharArray();
246    this.textOffset = 0;
247    this.embeddings = null;
248    this.embeddingOffset = 0;
249    this.length = text.length();
250    this.flags = flags;
251
252    runBidi();
253  }
254
255  /**
256   * Implementation function which computes the initial type of
257   * each character in the input.
258   */
259  private void computeTypes()
260  {
261    types = new byte[length];
262    for (int i = 0; i < length; ++i)
263      types[i] = Character.getDirectionality(text[textOffset + i]);
264  }
265
266  /**
267   * An internal function which implements rules P2 and P3.
268   * This computes the base embedding level.
269   * @return the paragraph's base embedding level
270   */
271  private int computeParagraphEmbeddingLevel()
272  {
273    // First check to see if the user supplied a directionality override.
274    if (flags == DIRECTION_LEFT_TO_RIGHT
275        || flags == DIRECTION_RIGHT_TO_LEFT)
276      return flags;
277
278    // This implements rules P2 and P3.
279    // (Note that we don't need P1, as the user supplies
280    // a paragraph.)
281    for (int i = 0; i < length; ++i)
282      {
283        int dir = types[i];
284        if (dir == Character.DIRECTIONALITY_LEFT_TO_RIGHT)
285          return DIRECTION_LEFT_TO_RIGHT;
286        if (dir == Character.DIRECTIONALITY_RIGHT_TO_LEFT
287            || dir == Character.DIRECTIONALITY_RIGHT_TO_LEFT)
288          return DIRECTION_RIGHT_TO_LEFT;
289      }
290    return (flags == DIRECTION_DEFAULT_LEFT_TO_RIGHT
291            ? DIRECTION_LEFT_TO_RIGHT
292            : DIRECTION_RIGHT_TO_LEFT);
293  }
294
295  /**
296   * An internal function which implements rules X1 through X9.
297   * This computes the initial levels for the text, handling
298   * explicit overrides and embeddings.
299   */
300  private void computeExplicitLevels()
301  {
302    levels = new byte[length];
303    byte currentEmbedding = (byte) baseEmbedding;
304    // The directional override is a Character directionality
305    // constant.  -1 means there is no override.
306    byte directionalOverride = -1;
307    // The stack of pushed embeddings, and the stack pointer.
308    // Note that because the direction is inherent in the depth,
309    // and because we have a bit left over in a byte, we can encode
310    // the override, if any, directly in this value on the stack.
311    final int MAX_DEPTH = 62;
312    byte[] embeddingStack = new byte[MAX_DEPTH];
313    int sp = 0;
314
315    for (int i = 0; i < length; ++i)
316      {
317        // If we see an explicit embedding, we use that, even if
318        // the current character is itself a directional override.
319        if (embeddings != null && embeddings[embeddingOffset + i] != 0)
320          {
321            // It isn't at all clear what we're supposed to do here.
322            // What does a negative value really mean?
323            // Should we push on the embedding stack here?
324            currentEmbedding = embeddings[embeddingOffset + i]; 
325            if (currentEmbedding < 0)
326              {
327                currentEmbedding = (byte) -currentEmbedding;
328                directionalOverride
329                  = (((currentEmbedding % 2) == 0)
330                      ? Character.DIRECTIONALITY_LEFT_TO_RIGHT
331                      : Character.DIRECTIONALITY_RIGHT_TO_LEFT);
332              }
333            else
334              directionalOverride = -1;
335            continue;
336          }
337        // No explicit embedding.
338        boolean isLtoR = false;
339        boolean isSpecial = true;
340        switch (types[i])
341          {
342            case Character.DIRECTIONALITY_LEFT_TO_RIGHT_EMBEDDING:
343            case Character.DIRECTIONALITY_LEFT_TO_RIGHT_OVERRIDE:
344              isLtoR = true;
345              // Fall through.
346            case Character.DIRECTIONALITY_RIGHT_TO_LEFT_EMBEDDING:
347            case Character.DIRECTIONALITY_RIGHT_TO_LEFT_OVERRIDE:
348              {
349                byte newEmbedding;
350                if (isLtoR)
351                  {
352                    // Least greater even.
353                    newEmbedding = (byte) ((currentEmbedding & ~1) + 2);
354                  }
355                else
356                  {
357                    // Least greater odd.
358                    newEmbedding = (byte) ((currentEmbedding + 1) | 1);
359                  }
360                // FIXME: we don't properly handle invalid pushes.
361                if (newEmbedding < MAX_DEPTH)
362                  {
363                    // The new level is valid.  Push the old value.
364                    // See above for a comment on the encoding here.
365                    if (directionalOverride != -1)
366                      currentEmbedding |= Byte.MIN_VALUE;
367                    embeddingStack[sp++] = currentEmbedding;
368                    currentEmbedding = newEmbedding;
369                    if (types[i] == Character.DIRECTIONALITY_LEFT_TO_RIGHT_OVERRIDE)
370                      directionalOverride = Character.DIRECTIONALITY_LEFT_TO_RIGHT;
371                    else if (types[i] == Character.DIRECTIONALITY_RIGHT_TO_LEFT_OVERRIDE)
372                      directionalOverride = Character.DIRECTIONALITY_RIGHT_TO_LEFT;
373                    else
374                      directionalOverride = -1;
375                  }
376                }
377              break;
378            case Character.DIRECTIONALITY_POP_DIRECTIONAL_FORMAT:
379              {
380                // FIXME: we don't properly handle a pop with a corresponding
381                // invalid push.
382                if (sp == 0)
383                  {
384                    // We saw a pop without a push.  Just ignore it.
385                    break;
386                  }
387                byte newEmbedding = embeddingStack[--sp];
388                currentEmbedding = (byte) (newEmbedding & 0x7f);
389                if (newEmbedding < 0)
390                  directionalOverride
391                  = (((newEmbedding & 1) == 0)
392                      ? Character.DIRECTIONALITY_LEFT_TO_RIGHT
393                      : Character.DIRECTIONALITY_RIGHT_TO_LEFT);
394                else
395                  directionalOverride = -1;
396              }
397              break;
398            default:
399              isSpecial = false;
400              break;
401          }
402        levels[i] = currentEmbedding;
403        if (isSpecial)
404          {
405            // Mark this character for removal.
406            if (formatterIndices == null)
407              formatterIndices = new ArrayList();
408            formatterIndices.add(Integer.valueOf(i));
409          }
410        else if (directionalOverride != -1)
411          types[i] = directionalOverride;
412      }
413
414    // Remove the formatting codes and update both the arrays
415    // and 'length'.  It would be more efficient not to remove
416    // these codes, but it is also more complicated.  Also, the
417    // Unicode algorithm reference does not properly describe
418    // how this is to be done -- from what I can tell, their suggestions
419    // in this area will not yield the correct results.
420    if (formatterIndices == null)
421      return;
422    int output = 0, input = 0;
423    final int size = formatterIndices.size();
424    for (int i = 0; i <= size; ++i)
425      {
426        int nextFmt;
427        if (i == size)
428          nextFmt = length;
429        else
430          nextFmt = ((Integer) formatterIndices.get(i)).intValue();
431        // Non-formatter codes are from 'input' to 'nextFmt'.
432        int len = nextFmt - input;
433        System.arraycopy(levels, input, levels, output, len);
434        System.arraycopy(types, input, types, output, len);
435        output += len;
436        input = nextFmt + 1;
437      }
438    length -= formatterIndices.size();
439  }
440
441  /**
442   * An internal function to compute the boundaries of runs
443   * in the text.  It isn't strictly necessary to do this, but
444   * it lets us write some following passes in a less complicated
445   * way.  Also it lets us efficiently implement some of the public
446   * methods.  A run is simply a sequence of characters at the
447   * same level.
448   */
449  private void computeRuns()
450  {
451    int runCount = 0;
452    int currentEmbedding = baseEmbedding;
453    for (int i = 0; i < length; ++i)
454      {
455        if (levels[i] != currentEmbedding)
456          {
457            currentEmbedding = levels[i];
458            ++runCount;
459          }
460      }
461
462    // This may be called multiple times.  If so, and if
463    // the number of runs has not changed, then don't bother
464    // allocating a new array.
465    if (runs == null || runs.length != runCount + 1)
466      runs = new int[runCount + 1];
467    int where = 0;
468    int lastRunStart = 0;
469    currentEmbedding = baseEmbedding;
470    for (int i = 0; i < length; ++i)
471      {
472        if (levels[i] != currentEmbedding)
473          {
474            runs[where++] = lastRunStart;
475            lastRunStart = i;
476            currentEmbedding = levels[i];
477          }
478      }
479    runs[where++] = lastRunStart;
480  }
481
482  /**
483   * An internal method to resolve weak types.  This implements
484   * rules W1 through W7.
485   */
486  private void resolveWeakTypes()
487  {
488    final int runCount = getRunCount();
489    
490    int previousLevel = baseEmbedding;
491    for (int run = 0; run < runCount; ++run)
492      {
493        int start = getRunStart(run);
494        int end = getRunLimit(run);
495        int level = getRunLevel(run);
496
497        // These are the names used in the Bidi algorithm.
498        byte sor = (((Math.max(previousLevel, level) % 2) == 0)
499                      ? Character.DIRECTIONALITY_LEFT_TO_RIGHT
500                      : Character.DIRECTIONALITY_RIGHT_TO_LEFT);
501        int nextLevel;
502        if (run == runCount - 1)
503          nextLevel = baseEmbedding;
504        else
505          nextLevel = getRunLevel(run + 1);
506        byte eor = (((Math.max(level, nextLevel) % 2) == 0)
507                      ? Character.DIRECTIONALITY_LEFT_TO_RIGHT
508                      : Character.DIRECTIONALITY_RIGHT_TO_LEFT);
509
510        byte prevType = sor;
511        byte prevStrongType = sor;
512        for (int i = start; i < end; ++i)
513          {
514            final byte nextType = (i == end - 1) ? eor : types[i + 1];
515
516            // Rule W1: change NSM to the prevailing direction.
517            if (types[i] == Character.DIRECTIONALITY_NONSPACING_MARK)
518              types[i] = prevType;
519            else
520              prevType = types[i];
521
522            // Rule W2: change EN to AN in some cases.
523            if (types[i] == Character.DIRECTIONALITY_EUROPEAN_NUMBER)
524              {
525                if (prevStrongType == Character.DIRECTIONALITY_RIGHT_TO_LEFT_ARABIC)
526                  types[i] = Character.DIRECTIONALITY_ARABIC_NUMBER;
527              }
528            else if (types[i] == Character.DIRECTIONALITY_LEFT_TO_RIGHT
529                     || types[i] == Character.DIRECTIONALITY_RIGHT_TO_LEFT
530                     || types[i] == Character.DIRECTIONALITY_RIGHT_TO_LEFT_ARABIC)
531              prevStrongType = types[i];
532
533            // Rule W3: change AL to R.
534            if (types[i] == Character.DIRECTIONALITY_RIGHT_TO_LEFT_ARABIC)
535              types[i] = Character.DIRECTIONALITY_RIGHT_TO_LEFT;
536
537            // Rule W4: handle separators between two numbers.
538            if (prevType == Character.DIRECTIONALITY_EUROPEAN_NUMBER
539                && nextType == Character.DIRECTIONALITY_EUROPEAN_NUMBER)
540              {
541                if (types[i] == Character.DIRECTIONALITY_EUROPEAN_NUMBER_SEPARATOR
542                    || types[i] == Character.DIRECTIONALITY_COMMON_NUMBER_SEPARATOR)
543                  types[i] = nextType;
544              }
545            else if (prevType == Character.DIRECTIONALITY_ARABIC_NUMBER
546                     && nextType == Character.DIRECTIONALITY_ARABIC_NUMBER
547                     && types[i] == Character.DIRECTIONALITY_COMMON_NUMBER_SEPARATOR)
548              types[i] = nextType;
549
550            // Rule W5: change a sequence of european terminators to
551            // european numbers, if they are adjacent to european numbers.
552            // We also include BN characters in this.
553            if (types[i] == Character.DIRECTIONALITY_EUROPEAN_NUMBER_TERMINATOR
554                || types[i] == Character.DIRECTIONALITY_BOUNDARY_NEUTRAL)
555              {
556                if (prevType == Character.DIRECTIONALITY_EUROPEAN_NUMBER)
557                  types[i] = prevType;
558                else
559                  {
560                    // Look ahead to see if there is an EN terminating this
561                    // sequence of ETs.
562                    int j = i + 1;
563                    while (j < end
564                           && (types[j] == Character.DIRECTIONALITY_EUROPEAN_NUMBER_TERMINATOR
565                               || types[j] == Character.DIRECTIONALITY_BOUNDARY_NEUTRAL))
566                      ++j;
567                    if (j < end
568                        && types[j] == Character.DIRECTIONALITY_EUROPEAN_NUMBER)
569                      {
570                        // Change them all to EN now.
571                        for (int k = i; k < j; ++k)
572                          types[k] = Character.DIRECTIONALITY_EUROPEAN_NUMBER;
573                      }
574                  }
575              }
576
577            // Rule W6: separators and terminators change to ON.
578            // Again we include BN.
579            if (types[i] == Character.DIRECTIONALITY_EUROPEAN_NUMBER_TERMINATOR
580                || types[i] == Character.DIRECTIONALITY_EUROPEAN_NUMBER_TERMINATOR
581                || types[i] == Character.DIRECTIONALITY_COMMON_NUMBER_SEPARATOR
582                || types[i] == Character.DIRECTIONALITY_BOUNDARY_NEUTRAL)
583              types[i] = Character.DIRECTIONALITY_OTHER_NEUTRALS;
584
585            // Rule W7: change european number types.
586            if (prevStrongType == Character.DIRECTIONALITY_LEFT_TO_RIGHT
587                && types[i] == Character.DIRECTIONALITY_EUROPEAN_NUMBER)
588              types[i] = prevStrongType;
589          }
590
591        previousLevel = level;
592      }
593  }
594
595  /**
596   * An internal method to resolve neutral types.  This implements
597   * rules N1 and N2.
598   */
599  private void resolveNeutralTypes()
600  {
601    // This implements rules N1 and N2.
602    final int runCount = getRunCount();
603    
604    int previousLevel = baseEmbedding;
605    for (int run = 0; run < runCount; ++run)
606      {
607        int start = getRunStart(run);
608        int end = getRunLimit(run);
609        int level = getRunLevel(run);
610
611        byte embeddingDirection
612          = (((level % 2) == 0) ? Character.DIRECTIONALITY_LEFT_TO_RIGHT
613                                : Character.DIRECTIONALITY_RIGHT_TO_LEFT);
614        // These are the names used in the Bidi algorithm.
615        byte sor = (((Math.max(previousLevel, level) % 2) == 0)
616                      ? Character.DIRECTIONALITY_LEFT_TO_RIGHT
617                      : Character.DIRECTIONALITY_RIGHT_TO_LEFT);
618        int nextLevel;
619        if (run == runCount - 1)
620          nextLevel = baseEmbedding;
621        else
622          nextLevel = getRunLevel(run + 1);
623        byte eor = (((Math.max(level, nextLevel) % 2) == 0)
624                      ? Character.DIRECTIONALITY_LEFT_TO_RIGHT
625                      : Character.DIRECTIONALITY_RIGHT_TO_LEFT);
626
627        byte prevStrong = sor;
628        int neutralStart = -1;
629        for (int i = start; i <= end; ++i)
630          {
631            byte newStrong = -1;
632            byte thisType = i == end ? eor : types[i];
633            switch (thisType)
634              {
635              case Character.DIRECTIONALITY_LEFT_TO_RIGHT:
636                newStrong = Character.DIRECTIONALITY_LEFT_TO_RIGHT;
637                break;
638              case Character.DIRECTIONALITY_RIGHT_TO_LEFT:
639              case Character.DIRECTIONALITY_ARABIC_NUMBER:
640              case Character.DIRECTIONALITY_EUROPEAN_NUMBER:
641                newStrong = Character.DIRECTIONALITY_RIGHT_TO_LEFT;
642                break;
643              case Character.DIRECTIONALITY_BOUNDARY_NEUTRAL:
644              case Character.DIRECTIONALITY_OTHER_NEUTRALS:
645              case Character.DIRECTIONALITY_SEGMENT_SEPARATOR:
646              case Character.DIRECTIONALITY_PARAGRAPH_SEPARATOR:
647              case Character.DIRECTIONALITY_WHITESPACE:
648                if (neutralStart == -1)
649                  neutralStart = i;
650                break;
651              }
652            // If we see a strong character, update all the neutrals.
653            if (newStrong != -1)
654              {
655                if (neutralStart != -1)
656                  {
657                    byte override = (prevStrong == newStrong
658                                     ? prevStrong
659                                     : embeddingDirection);
660                    for (int j = neutralStart; j < i; ++j)
661                      types[j] = override;
662                  }
663                prevStrong = newStrong;
664                neutralStart = -1;
665              }
666          }
667
668        previousLevel = level;
669      }
670  }
671
672  /**
673   * An internal method to resolve implicit levels.
674   * This implements rules I1 and I2.
675   */
676  private void resolveImplicitLevels()
677  {
678    // This implements rules I1 and I2.
679    for (int i = 0; i < length; ++i)
680      {
681        if ((levels[i] & 1) == 0)
682          {
683            if (types[i] == Character.DIRECTIONALITY_RIGHT_TO_LEFT)
684              ++levels[i];
685            else if (types[i] == Character.DIRECTIONALITY_ARABIC_NUMBER
686                     || types[i] == Character.DIRECTIONALITY_EUROPEAN_NUMBER)
687              levels[i] += 2;
688          }
689        else
690          {
691            if (types[i] == Character.DIRECTIONALITY_LEFT_TO_RIGHT
692                || types[i] == Character.DIRECTIONALITY_ARABIC_NUMBER
693                || types[i] == Character.DIRECTIONALITY_EUROPEAN_NUMBER)
694              ++levels[i];
695          }
696
697        // Update the result flags.
698        resultFlags |= 1 << (levels[i] & 1);
699      }
700    // One final update of the result flags, using the base level.
701    resultFlags |= 1 << baseEmbedding;
702  }
703
704  /**
705   * This reinserts the formatting codes that we removed early on.
706   * Actually it does not insert formatting codes per se, but rather
707   * simply inserts new levels at the appropriate locations in the
708   * 'levels' array.
709   */
710  private void reinsertFormattingCodes()
711  {
712    if (formatterIndices == null)
713      return;
714    int input = length;
715    int output = levels.length;
716    // Process from the end as we are copying the array over itself here.
717    for (int index = formatterIndices.size() - 1; index >= 0; --index)
718      {
719        int nextFmt = ((Integer) formatterIndices.get(index)).intValue();
720
721        // nextFmt points to a location in the original array.  So,
722        // nextFmt+1 is the target of our copying.  output is the location
723        // to which we last copied, thus we can derive the length of the
724        // copy from it.
725        int len = output - nextFmt - 1;
726        output = nextFmt;
727        input -= len;
728        // Note that we no longer need 'types' at this point, so we
729        // only edit 'levels'.
730        if (nextFmt + 1 < levels.length)
731          System.arraycopy(levels, input, levels, nextFmt + 1, len);
732
733        // Now set the level at the reinsertion point.
734        int rightLevel;
735        if (output == levels.length - 1)
736          rightLevel = baseEmbedding;
737        else
738          rightLevel = levels[output + 1];
739        int leftLevel;
740        if (input == 0)
741          leftLevel = baseEmbedding;
742        else
743          leftLevel = levels[input];
744        levels[output] = (byte) Math.max(leftLevel, rightLevel);
745      }
746    length = levels.length;
747  }
748
749  /**
750   * This is the main internal entry point.  After a constructor
751   * has initialized the appropriate local state, it will call
752   * this method to do all the work.
753   */
754  private void runBidi()
755  {
756    computeTypes();
757    baseEmbedding = computeParagraphEmbeddingLevel();
758    computeExplicitLevels();
759    computeRuns();
760    resolveWeakTypes();
761    resolveNeutralTypes();
762    resolveImplicitLevels();
763    // We're done with the types.  Let the GC clean up.
764    types = null;
765    reinsertFormattingCodes();
766    // After resolving the implicit levels, the number
767    // of runs may have changed.
768    computeRuns();
769  }
770
771  /**
772   * Return true if the paragraph base embedding is left-to-right,
773   * false otherwise.
774   */
775  public boolean baseIsLeftToRight()
776  {
777    return baseEmbedding == DIRECTION_LEFT_TO_RIGHT;
778  }
779
780  /**
781   * Create a new Bidi object for a single line of text, taken
782   * from the text used when creating the current Bidi object.
783   * @param start the index of the first character of the line
784   * @param end the index of the final character of the line
785   * @return a new Bidi object for the indicated line of text
786   */
787  public Bidi createLineBidi(int start, int end)
788  {
789    // This isn't the most efficient implementation possible.
790    // This probably does not matter, so we choose simplicity instead.
791    int level = getLevelAt(start);
792    int flag = (((level % 2) == 0)
793                ? DIRECTION_LEFT_TO_RIGHT
794                : DIRECTION_RIGHT_TO_LEFT);
795    return new Bidi(text, textOffset + start,
796                    embeddings, embeddingOffset + start,
797                    end - start, flag);
798  }
799
800  /**
801   * Return the base embedding level of the paragraph.
802   */
803  public int getBaseLevel()
804  {
805    return baseEmbedding;
806  }
807
808  /**
809   * Return the length of the paragraph, in characters.
810   */
811  public int getLength()
812  {
813    return length;
814  }
815
816  /**
817   * Return the level at the indicated character.  If the
818   * supplied index is less than zero or greater than the length
819   * of the text, then the paragraph's base embedding level will
820   * be returned.
821   * @param offset the character to examine
822   * @return the level of that character
823   */
824  public int getLevelAt(int offset)
825  {
826    if (offset < 0 || offset >= length)
827      return getBaseLevel();
828    return levels[offset];
829  }
830
831  /**
832   * Return the number of runs in the result.  A run is
833   * a sequence of characters at the same embedding level.
834   */
835  public int getRunCount()
836  {
837    return runs.length;
838  }
839
840  /**
841   * Return the level of the indicated run.
842   * @param which the run to examine
843   * @return the level of that run
844   */
845  public int getRunLevel(int which)
846  {
847    return levels[runs[which]];
848  }
849
850  /**
851   * Return the index of the character just following the end
852   * of the indicated run.
853   * @param which the run to examine
854   * @return the index of the character after the final character
855   * of the run
856   */
857  public int getRunLimit(int which)
858  {
859    if (which == runs.length - 1)
860      return length;
861    return runs[which + 1];
862  }
863
864  /**
865   * Return the index of the first character in the indicated run.
866   * @param which the run to examine
867   * @return the index of the first character of the run
868   */
869  public int getRunStart(int which)
870  {
871    return runs[which];
872  }
873
874  /**
875   * Return true if the text is entirely left-to-right, and the
876   * base embedding is also left-to-right.
877   */
878  public boolean isLeftToRight()
879  {
880    return resultFlags == LTOR;
881  }
882
883  /**
884   * Return true if the text consists of mixed left-to-right and
885   * right-to-left runs, or if the text consists of one kind of run
886   * which differs from the base embedding direction.
887   */
888  public boolean isMixed()
889  {
890    return resultFlags == (LTOR | RTOL);
891  }
892
893  /**
894   * Return true if the text is entirely right-to-left, and the
895   * base embedding is also right-to-left.
896   */
897  public boolean isRightToLeft()
898  {
899    return resultFlags == RTOL;
900  }
901
902  /**
903   * Return a String describing the internal state of this object.
904   * This is only useful for debugging.
905   */
906  public String toString()
907  {
908    return "Bidi Bidi Bidi I like you, Buck!";
909  }
910
911  /**
912   * Reorder objects according to the levels passed in.  This implements
913   * reordering as defined by the Unicode bidirectional layout specification.
914   * The levels are integers from 0 to 62; even numbers represent left-to-right
915   * runs, and odd numbers represent right-to-left runs.
916   *
917   * @param levels the levels associated with each object
918   * @param levelOffset the index of the first level to use
919   * @param objs the objects to reorder according to the levels
920   * @param objOffset the index of the first object to use
921   * @param count the number of objects (and levels) to manipulate
922   */
923  public static void reorderVisually(byte[] levels, int levelOffset,
924                                     Object[] objs, int objOffset, int count)
925  {
926    // We need a copy of the 'levels' array, as we are going to modify it.
927    // This is unfortunate but difficult to avoid.
928    byte[] levelCopy = new byte[count];
929    // Do this explicitly so we can also find the maximum depth at the
930    // same time.
931    int max = 0;
932    int lowestOdd = 63;
933    for (int i = 0; i < count; ++i)
934      {
935        levelCopy[i] = levels[levelOffset + i];
936        max = Math.max(levelCopy[i], max);
937        if (levelCopy[i] % 2 != 0)
938          lowestOdd = Math.min(lowestOdd, levelCopy[i]);
939      }
940
941    // Reverse the runs starting with the deepest.
942    for (int depth = max; depth >= lowestOdd; --depth)
943      {
944        int start = 0;
945        while (start < count)
946          {
947            // Find the start of a run >= DEPTH.
948            while (start < count && levelCopy[start] < depth)
949              ++start;
950            if (start == count)
951              break;
952            // Find the end of the run.
953            int end = start + 1;
954            while (end < count && levelCopy[end] >= depth)
955              ++end;
956
957            // Reverse this run.
958            for (int i = 0; i < (end - start) / 2; ++i)
959              {
960                byte tmpb = levelCopy[end - i - 1];
961                levelCopy[end - i - 1] = levelCopy[start + i];
962                levelCopy[start + i] = tmpb;
963                Object tmpo = objs[objOffset + end - i - 1];
964                objs[objOffset + end - i - 1] = objs[objOffset + start + i];
965                objs[objOffset + start + i] = tmpo;
966              }
967
968            // Handle the next run.
969            start = end + 1;
970          }
971      }
972  }
973
974  /**
975   * Returns false if all characters in the text between start and end
976   * are all left-to-right text. This implementation is just calls
977   * <code>Character.getDirectionality(char)</code> on all characters
978   * and makes sure all characters are either explicitly left-to-right
979   * or neutral in directionality (character types L, EN, ES, ET, AN,
980   * CS, S and WS).
981   */
982  public static boolean requiresBidi(char[] text, int start, int end)
983  {
984    for (int i = start; i < end; i++)
985      {
986        byte dir = Character.getDirectionality(text[i]);
987        if (dir != Character.DIRECTIONALITY_LEFT_TO_RIGHT
988            && dir != Character.DIRECTIONALITY_EUROPEAN_NUMBER
989            && dir != Character.DIRECTIONALITY_EUROPEAN_NUMBER_SEPARATOR
990            && dir != Character.DIRECTIONALITY_EUROPEAN_NUMBER_TERMINATOR
991            && dir != Character.DIRECTIONALITY_ARABIC_NUMBER
992            && dir != Character.DIRECTIONALITY_COMMON_NUMBER_SEPARATOR
993            && dir != Character.DIRECTIONALITY_SEGMENT_SEPARATOR
994            && dir != Character.DIRECTIONALITY_WHITESPACE
995            && dir != Character.DIRECTIONALITY_PARAGRAPH_SEPARATOR)
996          return true;
997      }
998
999    return false;
1000  }
1001}