• Skip to content
  • Skip to link menu
KDE 4.3 API Reference
  • KDE API Reference
  • kdelibs
  • Sitemap
  • Contact Us
 

KDECore

ktranslit.cpp

Go to the documentation of this file.
00001 /*  This file is part of the KDE libraries    Copyright (C) 2007 Chusslove Illich <caslav.ilic@gmx.net>
00002 
00003     This library is free software; you can redistribute it and/or
00004     modify it under the terms of the GNU Library General Public
00005     License as published by the Free Software Foundation; either
00006     version 2 of the License, or (at your option) any later version.
00007 
00008     This library is distributed in the hope that it will be useful,
00009     but WITHOUT ANY WARRANTY; without even the implied warranty of
00010     MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
00011     Library General Public License for more details.
00012 
00013     You should have received a copy of the GNU Library General Public License
00014     along with this library; see the file COPYING.LIB.  If not, write to
00015     the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
00016     Boston, MA 02110-1301, USA.
00017 */
00018 
00019 #include <ktranslit_p.h>
00020 #include <kdebug.h>
00021 
00022 #include <config.h>
00023 
00024 #include <QHash>
00025 
00026 // -----------------------------------------------------------------------------
00027 // Base class.
00028 
00029 class KTranslitPrivate
00030 {
00031 };
00032 
00033 KTranslit::KTranslit ()
00034 : d(NULL)
00035 {
00036 }
00037 
00038 KTranslit::~KTranslit ()
00039 {
00040     delete d;
00041 }
00042 
00043 KTranslit *KTranslit::create (const QString &lang)
00044 {
00045     if (lang == QString::fromAscii("sr")) {
00046         return new KTranslitSerbian();
00047     }
00048     else {
00049         return NULL;
00050     }
00051 }
00052 
00053 QStringList KTranslit::fallbackList (const QString &lang)
00054 {
00055     QStringList fallbacks;
00056 
00057     if (lang.startsWith(QString::fromAscii("sr@"))) {
00058         fallbacks += QString::fromAscii("sr");
00059     }
00060 
00061     return fallbacks;
00062 }
00063 
00064 void splitLangScript (const QString &lang, QString &ln, QString &scr)
00065 {
00066     ln = lang;
00067     scr.clear();
00068     int pos = lang.indexOf('@');
00069     if (pos >= 0) {
00070         ln = lang.left(pos);
00071         scr = lang.mid(pos + 1);
00072     }
00073 }
00074 
00075 QString KTranslit::higherPriorityScript (const QString &lang,
00076                                          const KLocale *locale)
00077 {
00078     if (locale == NULL) {
00079         return QString();
00080     }
00081 
00082     // Split into pure language and script part.
00083     QString ln, scr;
00084     splitLangScript(lang, ln, scr);
00085 
00086     // Search through higher priority languages.
00087     QString finalScrHi;
00088     if (lang != KLocale::defaultLanguage()) {
00089         foreach (const QString &langHi, locale->languageList()) {
00090             // Don't search lower priority languages.
00091             if (langHi == lang)
00092                 break;
00093 
00094             // Split current spec into pure language and script parts.
00095             QString lnHi, scrHi;
00096             splitLangScript(langHi, lnHi, scrHi);
00097 
00098             // Return current script if languages match.
00099             if (lnHi == ln) {
00100                 finalScrHi = scrHi;
00101                 break;
00102             }
00103         }
00104     }
00105     return finalScrHi;
00106 }
00107 
00108 QString KTranslit::transliterate (const QString &str,
00109                                   const QString &script) const
00110 {
00111     Q_UNUSED(script);
00112     return str;
00113 }
00114 
00115 QString KTranslit::resolveInserts (const QString &str_, int nins, int ind,
00116                                    const QString &head) const
00117 {
00118     int hlen = head.length();
00119 
00120     QString str = str_;
00121     QString rstr;
00122     while (1) {
00123         int p = str.indexOf(head);
00124         if (p < 0) {
00125             break;
00126         }
00127 
00128         // Append segment before optional insert to resulting text.
00129         rstr.append(str.left(p));
00130 
00131         // Must have at least 2 characters after the head.
00132         if (str.length() < p + hlen + 2) {
00133             kDebug(173) << QString("Malformed optional inserts list in {%1}, "
00134                                    "starting here: {%2}").arg(str_, str);
00135             return str_;
00136         }
00137 
00138         // Read the separating character and trim original string.
00139         QChar sep = str[p + hlen];
00140         str.remove(0, p + hlen + 1);
00141 
00142         // Parse requested number of inserts,
00143         // choose the one with matching index for resulting text.
00144         for (int i = 0; i < nins; ++i) {
00145             // Ending separator for this insert.
00146             int p = str.indexOf(sep);
00147 
00148             // Must have exactly the requested number of inserts.
00149             if (p < 0) {
00150                 kDebug(173) << QString("Not enough inserts listed in {%1}, "
00151                                        "starting here: {%2}").arg(str_, str);
00152                 return str_;
00153             }
00154 
00155             // If index is matching requested, append to resulting text.
00156             if (i == ind) {
00157                 rstr.append(str.left(p));
00158             }
00159 
00160             // Trim original string.
00161             str.remove(0, p + 1);
00162         }
00163     }
00164     // Append the final segment to resulting text.
00165     rstr.append(str);
00166 
00167     return rstr;
00168 }
00169 
00170 // If the insert is just starting at position i, return the position of the
00171 // first character after the insert (or string length if none).
00172 // If the insert is not starting, return i itself.
00173 static int skipInsert (const QString &str, int i, int ninserts,
00174                        const QString &head)
00175 {
00176     int hlen = head.length();
00177 
00178     if (str.mid(i, hlen) == head) {
00179         int slen = str.length();
00180         int ia = i + hlen;
00181         if (ia >= slen) return slen;
00182         QChar sep = str[ia];
00183         for (int k = 0; k < ninserts; ++k) {
00184             ia = str.indexOf(sep, ia + 1);
00185             if (ia < 0) return slen;
00186         }
00187         return ia + 1;
00188     }
00189     else {
00190         return i;
00191     }
00192 }
00193 
00194 // -----------------------------------------------------------------------------
00195 // Serbian.
00196 
00197 class KTranslitSerbianPrivate
00198 {
00199     public:
00200     QHash<QString, bool> latinNames;
00201     QHash<QString, bool> ijekavianNames;
00202     QHash<QChar, QString> dictC2L;
00203     QHash<QString, QString> dictI2E;
00204     int maxReflexLen;
00205     QChar reflexMark;
00206 };
00207 
00208 KTranslitSerbian::KTranslitSerbian ()
00209 : d(new KTranslitSerbianPrivate())
00210 {
00211     #define SR_NAME_ENTRY(hash, name) do { \
00212         hash[QString::fromAscii(name)] = true; \
00213     } while (0)
00214     SR_NAME_ENTRY(d->latinNames, "latin");
00215     SR_NAME_ENTRY(d->latinNames, "Latn");
00216     SR_NAME_ENTRY(d->latinNames, "ijelatin");
00217     SR_NAME_ENTRY(d->latinNames, "jekavianlatin");
00218     SR_NAME_ENTRY(d->latinNames, "ijekavianlatin");
00219     SR_NAME_ENTRY(d->latinNames, "yekavianlatin");
00220     SR_NAME_ENTRY(d->latinNames, "iyekavianlatin");
00221     SR_NAME_ENTRY(d->ijekavianNames, "ije");
00222     SR_NAME_ENTRY(d->ijekavianNames, "ijelatin");
00223     SR_NAME_ENTRY(d->ijekavianNames, "jekavian");
00224     SR_NAME_ENTRY(d->ijekavianNames, "jekavianlatin");
00225     SR_NAME_ENTRY(d->ijekavianNames, "ijekavian");
00226     SR_NAME_ENTRY(d->ijekavianNames, "ijekavianlatin");
00227     SR_NAME_ENTRY(d->ijekavianNames, "yekavian");
00228     SR_NAME_ENTRY(d->ijekavianNames, "yekavianlatin");
00229     SR_NAME_ENTRY(d->ijekavianNames, "iyekavian");
00230     SR_NAME_ENTRY(d->ijekavianNames, "iyekavianlatin");
00231 
00232     #define SR_DICTC2L_ENTRY(a, b) do { \
00233         d->dictC2L[QString::fromUtf8(a)[0]] = QString::fromUtf8(b); \
00234     } while (0)
00235     SR_DICTC2L_ENTRY("а", "a");
00236     SR_DICTC2L_ENTRY("б", "b");
00237     SR_DICTC2L_ENTRY("в", "v");
00238     SR_DICTC2L_ENTRY("г", "g");
00239     SR_DICTC2L_ENTRY("д", "d");
00240     SR_DICTC2L_ENTRY("ђ", "đ");
00241     SR_DICTC2L_ENTRY("е", "e");
00242     SR_DICTC2L_ENTRY("ж", "ž");
00243     SR_DICTC2L_ENTRY("з", "z");
00244     SR_DICTC2L_ENTRY("и", "i");
00245     SR_DICTC2L_ENTRY("ј", "j");
00246     SR_DICTC2L_ENTRY("к", "k");
00247     SR_DICTC2L_ENTRY("л", "l");
00248     SR_DICTC2L_ENTRY("љ", "lj");
00249     SR_DICTC2L_ENTRY("м", "m");
00250     SR_DICTC2L_ENTRY("н", "n");
00251     SR_DICTC2L_ENTRY("њ", "nj");
00252     SR_DICTC2L_ENTRY("о", "o");
00253     SR_DICTC2L_ENTRY("п", "p");
00254     SR_DICTC2L_ENTRY("р", "r");
00255     SR_DICTC2L_ENTRY("с", "s");
00256     SR_DICTC2L_ENTRY("т", "t");
00257     SR_DICTC2L_ENTRY("ћ", "ć");
00258     SR_DICTC2L_ENTRY("у", "u");
00259     SR_DICTC2L_ENTRY("ф", "f");
00260     SR_DICTC2L_ENTRY("х", "h");
00261     SR_DICTC2L_ENTRY("ц", "c");
00262     SR_DICTC2L_ENTRY("ч", "č");
00263     SR_DICTC2L_ENTRY("џ", "dž");
00264     SR_DICTC2L_ENTRY("ш", "š");
00265     SR_DICTC2L_ENTRY("А", "A");
00266     SR_DICTC2L_ENTRY("Б", "B");
00267     SR_DICTC2L_ENTRY("В", "V");
00268     SR_DICTC2L_ENTRY("Г", "G");
00269     SR_DICTC2L_ENTRY("Д", "D");
00270     SR_DICTC2L_ENTRY("Ђ", "Đ");
00271     SR_DICTC2L_ENTRY("Е", "E");
00272     SR_DICTC2L_ENTRY("Ж", "Ž");
00273     SR_DICTC2L_ENTRY("З", "Z");
00274     SR_DICTC2L_ENTRY("И", "I");
00275     SR_DICTC2L_ENTRY("Ј", "J");
00276     SR_DICTC2L_ENTRY("К", "K");
00277     SR_DICTC2L_ENTRY("Л", "L");
00278     SR_DICTC2L_ENTRY("Љ", "Lj");
00279     SR_DICTC2L_ENTRY("М", "M");
00280     SR_DICTC2L_ENTRY("Н", "N");
00281     SR_DICTC2L_ENTRY("Њ", "Nj");
00282     SR_DICTC2L_ENTRY("О", "O");
00283     SR_DICTC2L_ENTRY("П", "P");
00284     SR_DICTC2L_ENTRY("Р", "R");
00285     SR_DICTC2L_ENTRY("С", "S");
00286     SR_DICTC2L_ENTRY("Т", "T");
00287     SR_DICTC2L_ENTRY("Ћ", "Ć");
00288     SR_DICTC2L_ENTRY("У", "U");
00289     SR_DICTC2L_ENTRY("Ф", "F");
00290     SR_DICTC2L_ENTRY("Х", "H");
00291     SR_DICTC2L_ENTRY("Ц", "C");
00292     SR_DICTC2L_ENTRY("Ч", "Č");
00293     SR_DICTC2L_ENTRY("Џ", "Dž");
00294     SR_DICTC2L_ENTRY("Ш", "Š");
00295     // ...and some accented letters existing as NFC:
00296     SR_DICTC2L_ENTRY("ѐ", "è");
00297     SR_DICTC2L_ENTRY("ѝ", "ì");
00298     SR_DICTC2L_ENTRY("ӣ", "ī");
00299     SR_DICTC2L_ENTRY("ӯ", "ū");
00300     SR_DICTC2L_ENTRY("Ѐ", "È");
00301     SR_DICTC2L_ENTRY("Ѝ", "Ì");
00302     SR_DICTC2L_ENTRY("Ӣ", "Ī");
00303     SR_DICTC2L_ENTRY("Ӯ", "Ū");
00304 
00305     d->reflexMark = QString::fromUtf8("›")[0];
00306     #define SR_DICTI2E_ENTRY(a, b) do { \
00307         d->dictI2E[QString::fromUtf8(a)] = QString::fromUtf8(b); \
00308     } while (0)
00309     // basic, Cyrillic
00310     SR_DICTI2E_ENTRY("ије", "е");
00311     SR_DICTI2E_ENTRY("Ије", "Е");
00312     SR_DICTI2E_ENTRY("ИЈЕ", "Е");
00313     SR_DICTI2E_ENTRY("иј", "е");
00314     SR_DICTI2E_ENTRY("Иј", "Е");
00315     SR_DICTI2E_ENTRY("ИЈ", "Е");
00316     SR_DICTI2E_ENTRY("је", "е");
00317     SR_DICTI2E_ENTRY("Је", "Е");
00318     SR_DICTI2E_ENTRY("ЈЕ", "Е");
00319     SR_DICTI2E_ENTRY("ље", "ле");
00320     SR_DICTI2E_ENTRY("Ље", "Ле");
00321     SR_DICTI2E_ENTRY("ЉЕ", "ЛЕ");
00322     SR_DICTI2E_ENTRY("ње", "не");
00323     SR_DICTI2E_ENTRY("Ње", "Не");
00324     SR_DICTI2E_ENTRY("ЊЕ", "НЕ");
00325     SR_DICTI2E_ENTRY("ио", "ео");
00326     SR_DICTI2E_ENTRY("Ио", "Ео");
00327     SR_DICTI2E_ENTRY("ИО", "ЕО");
00328     SR_DICTI2E_ENTRY("иљ", "ел");
00329     SR_DICTI2E_ENTRY("Иљ", "Ел");
00330     SR_DICTI2E_ENTRY("ИЉ", "ЕЛ");
00331     // basic, Latin
00332     SR_DICTI2E_ENTRY("ije", "e");
00333     SR_DICTI2E_ENTRY("Ije", "E");
00334     SR_DICTI2E_ENTRY("IJE", "E");
00335     SR_DICTI2E_ENTRY("ij", "e");
00336     SR_DICTI2E_ENTRY("Ij", "E");
00337     SR_DICTI2E_ENTRY("IJ", "E");
00338     SR_DICTI2E_ENTRY("je", "e");
00339     SR_DICTI2E_ENTRY("Je", "E");
00340     SR_DICTI2E_ENTRY("JE", "E");
00341     SR_DICTI2E_ENTRY("lje", "le");
00342     SR_DICTI2E_ENTRY("Lje", "Le");
00343     SR_DICTI2E_ENTRY("LJE", "LE");
00344     SR_DICTI2E_ENTRY("nje", "ne");
00345     SR_DICTI2E_ENTRY("Nje", "Ne");
00346     SR_DICTI2E_ENTRY("NJE", "NE");
00347     SR_DICTI2E_ENTRY("io", "eo");
00348     SR_DICTI2E_ENTRY("Io", "Eo");
00349     SR_DICTI2E_ENTRY("IO", "EO");
00350     SR_DICTI2E_ENTRY("ilj", "el");
00351     SR_DICTI2E_ENTRY("Ilj", "El");
00352     SR_DICTI2E_ENTRY("ILJ", "EL");
00353     // special cases, Cyrillic
00354     SR_DICTI2E_ENTRY("лије", "ли");
00355     SR_DICTI2E_ENTRY("Лије", "Ли");
00356     SR_DICTI2E_ENTRY("ЛИЈЕ", "ЛИ");
00357     SR_DICTI2E_ENTRY("лијен", "лењ");
00358     SR_DICTI2E_ENTRY("Лијен", "Лењ");
00359     SR_DICTI2E_ENTRY("ЛИЈЕН", "ЛЕЊ");
00360     SR_DICTI2E_ENTRY("мија", "меја");
00361     SR_DICTI2E_ENTRY("Мија", "Меја");
00362     SR_DICTI2E_ENTRY("МИЈА", "МЕЈА");
00363     SR_DICTI2E_ENTRY("мије", "мејe");
00364     SR_DICTI2E_ENTRY("Мије", "Мејe");
00365     SR_DICTI2E_ENTRY("МИЈЕ", "МЕЈE");
00366     SR_DICTI2E_ENTRY("није", "ни");
00367     SR_DICTI2E_ENTRY("Није", "Ни");
00368     SR_DICTI2E_ENTRY("НИЈЕ", "НИ");
00369     // special cases, Latin
00370     SR_DICTI2E_ENTRY("lije", "li");
00371     SR_DICTI2E_ENTRY("Lije", "Li");
00372     SR_DICTI2E_ENTRY("LIJE", "LI");
00373     SR_DICTI2E_ENTRY("lijen", "lenj");
00374     SR_DICTI2E_ENTRY("Lijen", "Lenj");
00375     SR_DICTI2E_ENTRY("LIJEN", "LENJ");
00376     SR_DICTI2E_ENTRY("mija", "meja");
00377     SR_DICTI2E_ENTRY("Mija", "Meja");
00378     SR_DICTI2E_ENTRY("MIJA", "MEJA");
00379     SR_DICTI2E_ENTRY("mije", "meje");
00380     SR_DICTI2E_ENTRY("Mije", "Meje");
00381     SR_DICTI2E_ENTRY("MIJE", "MEJE");
00382     SR_DICTI2E_ENTRY("nije", "ni");
00383     SR_DICTI2E_ENTRY("Nije", "Ni");
00384     SR_DICTI2E_ENTRY("NIJE", "NI");
00385 
00386     d->maxReflexLen = 0;
00387     foreach (const QString &reflex, d->dictI2E.keys()) {
00388         if (d->maxReflexLen < reflex.length()) {
00389             d->maxReflexLen = reflex.length();
00390         }
00391     }
00392 }
00393 
00394 KTranslitSerbian::~KTranslitSerbian ()
00395 {
00396     delete d;
00397 }
00398 
00399 QString KTranslitSerbian::transliterate (const QString &str_,
00400                                          const QString &script) const
00401 {
00402     static QString insHead("~@");
00403     static QString insHeadIje("~#");
00404 
00405     QString str = str_;
00406 
00407     // Resolve Ekavian/Ijekavian.
00408     if (d->ijekavianNames.contains(script)) {
00409         // Just remove reflex marks.
00410         str.remove(d->reflexMark);
00411         str = resolveInserts(str, 2, 1, insHeadIje);
00412     } else {
00413         QString nstr;
00414         int p = 0;
00415         while (true) {
00416             int pp = p;
00417             p = str.indexOf(d->reflexMark, p);
00418             if (p < 0) {
00419                 nstr.append(str.mid(pp));
00420                 break;
00421             }
00422             nstr.append(str.mid(pp, p - pp));
00423             p += 1;
00424 
00425             // Try to resolve yat-reflex.
00426             QString reflex;
00427             QString ekvform;
00428             for (int rl = d->maxReflexLen; rl > 0; --rl) {
00429                 reflex = str.mid(p, rl);
00430                 ekvform = d->dictI2E[reflex];
00431                 if (!ekvform.isEmpty()) {
00432                     break;
00433                 }
00434             }
00435 
00436             if (!ekvform.isEmpty()) {
00437                 nstr.append(ekvform);
00438                 p += reflex.length();
00439             } else {
00440                 QString dreflex = str.mid(p - 1, d->maxReflexLen + 1);
00441                 kDebug(173) << QString("Unknown yat-reflex {%1} "
00442                                        "in {%2}").arg(dreflex, str);
00443                 nstr.append(str.mid(p - 1, 1));
00444             }
00445         }
00446         str = resolveInserts(nstr, 2, 0, insHeadIje);
00447     }
00448 
00449     // Resolve Cyrillic/Latin.
00450     if (d->latinNames.contains(script)) {
00451         // NOTE: This loop has been somewhat optimized for speed.
00452         int slen = str.length();
00453         bool anyInserts = str.indexOf(insHead) >= 0;
00454         QString nstr;
00455         nstr.reserve(slen + 5);
00456         for (int i = 0; i < slen; ++i) {
00457             // Skip alternative inserts altogether, so that they can be used
00458             // as a mean to exclude from transliteration.
00459             if (anyInserts) {
00460                 int to = skipInsert(str, i, 2, insHead);
00461                 if (to > i) {
00462                     nstr.append(str.mid(i, to - i));
00463                     if (to >= slen) break;
00464                     i = to;
00465                 }
00466             }
00467             // Transliterate current character.
00468             QChar c = str[i];
00469             QString r = d->dictC2L[c];
00470             if (!r.isEmpty()) {
00471                 if (   r.length() > 1 && c.isUpper()
00472                     && (   (i + 1 < slen && str[i + 1].isUpper())
00473                         || (i > 0 && str[i - 1].isUpper()))) {
00474                     nstr.append(r.toUpper());
00475                 } else {
00476                     nstr.append(r);
00477                 }
00478             } else {
00479                 nstr.append(c);
00480             }
00481         }
00482         str = resolveInserts(nstr, 2, 1, insHead);
00483     } else {
00484         str = resolveInserts(str, 2, 0, insHead);
00485     }
00486 
00487     return str;
00488 }

KDECore

Skip menu "KDECore"
  • Main Page
  • Modules
  • Namespace List
  • Class Hierarchy
  • Alphabetical List
  • Class List
  • File List
  • Namespace Members
  • Class Members
  • Related Pages

kdelibs

Skip menu "kdelibs"
  • DNSSD
  • Interfaces
  •   KHexEdit
  •   KMediaPlayer
  •   KSpeech
  •   KTextEditor
  • Kate
  • kconf_update
  • KDE3Support
  •   KUnitTest
  • KDECore
  • KDED
  • KDEsu
  • KDEUI
  • KDocTools
  • KFile
  • KHTML
  • KImgIO
  • KInit
  • kio
  • KIOSlave
  • KJS
  •   KJS-API
  •   WTF
  • kjsembed
  • KNewStuff
  • KParts
  • KPty
  • Kross
  • KUtils
  • Nepomuk
  • Plasma
  • Solid
  • Sonnet
  • ThreadWeaver
Generated for kdelibs by doxygen 1.6.1
This website is maintained by Adriaan de Groot and Allen Winter.
KDE® and the K Desktop Environment® logo are registered trademarks of KDE e.V. | Legal