• Skip to content
  • Skip to link menu
KDE 4.3 API Reference
  • KDE API Reference
  • KDE-PIM Libraries
  • Sitemap
  • Contact Us
 

kpimutils

linklocator.cpp

Go to the documentation of this file.
00001 /*
00002   Copyright (c) 2002 Dave Corrie <kde@davecorrie.com>
00003 
00004   This library is free software; you can redistribute it and/or
00005   modify it under the terms of the GNU Library General Public
00006   License as published by the Free Software Foundation; either
00007   version 2 of the License, or (at your option) any later version.
00008 
00009   This library is distributed in the hope that it will be useful,
00010   but WITHOUT ANY WARRANTY; without even the implied warranty of
00011   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
00012   Library General Public License for more details.
00013 
00014   You should have received a copy of the GNU Library General Public License
00015   along with this library; see the file COPYING.LIB.  If not, write to
00016   the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
00017   Boston, MA 02110-1301, USA.
00018 */
00030 #include "linklocator.h"
00031 
00032 #include <kglobal.h>
00033 #include <kstandarddirs.h>
00034 #include <kcodecs.h>
00035 #include <kdebug.h>
00036 #include <kdeversion.h>
00037 #if KDE_IS_VERSION( 4, 0, 95 )
00038 #include <kemoticons.h>
00039 #endif
00040 
00041 #include <QtCore/QCoreApplication>
00042 #include <QtCore/QFile>
00043 #include <QtCore/QRegExp>
00044 #include <QtGui/QTextDocument>
00045 
00046 #include <limits.h>
00047 
00048 using namespace KPIMUtils;
00049 
00054 //@cond PRIVATE
00055 class KPIMUtils::LinkLocator::Private
00056 {
00057   public:
00058     int mMaxUrlLen;
00059     int mMaxAddressLen;
00060 };
00061 //@endcond
00062 
00063 #if KDE_IS_VERSION( 4, 0, 95 )
00064 // Use a static for this as calls to the KEmoticons constructor are expensive.
00065 K_GLOBAL_STATIC( KEmoticons, sEmoticons )
00066 #endif
00067 
00068 LinkLocator::LinkLocator( const QString &text, int pos )
00069   : mText( text ), mPos( pos ), d( new KPIMUtils::LinkLocator::Private )
00070 {
00071   d->mMaxUrlLen = 4096;
00072   d->mMaxAddressLen = 255;
00073 
00074   // If you change either of the above values for maxUrlLen or
00075   // maxAddressLen, then please also update the documentation for
00076   // setMaxUrlLen()/setMaxAddressLen() in the header file AND the
00077   // default values used for the maxUrlLen/maxAddressLen parameters
00078   // of convertToHtml().
00079 }
00080 
00081 LinkLocator::~LinkLocator()
00082 {
00083   delete d;
00084 }
00085 
00086 void LinkLocator::setMaxUrlLen( int length )
00087 {
00088   d->mMaxUrlLen = length;
00089 }
00090 
00091 int LinkLocator::maxUrlLen() const
00092 {
00093   return d->mMaxUrlLen;
00094 }
00095 
00096 void LinkLocator::setMaxAddressLen( int length )
00097 {
00098   d->mMaxAddressLen = length;
00099 }
00100 
00101 int LinkLocator::maxAddressLen() const
00102 {
00103   return d->mMaxAddressLen;
00104 }
00105 
00106 QString LinkLocator::getUrl()
00107 {
00108   QString url;
00109   if ( atUrl() ) {
00110     // NOTE: see http://tools.ietf.org/html/rfc3986#appendix-A and especially appendix-C
00111     // Appendix-C mainly says, that when extracting URLs from plain text, line breaks shall
00112     // be allowed and should be ignored when the URI is extracted.
00113 
00114     // This implementation follows this recommendation and
00115     // allows the URL to be enclosed within different kind of brackets/quotes
00116     // If an URL is enclosed, whitespace characters are allowed and removed, otherwise
00117     // the URL ends with the first whitespace
00118     // Also, if the URL is enclosed in brackets, the URL itself is not allowed
00119     // to contain the closing bracket, as this would be detected as the end of the URL
00120 
00121     QChar beforeUrl, afterUrl;
00122 
00123     // detect if the url has been surrounded by brackets or quotes
00124     if ( mPos > 0 ) {
00125       beforeUrl = mText[mPos - 1];
00126 
00127       if ( beforeUrl == '(' )
00128         afterUrl = ')';
00129       else if ( beforeUrl == '[' )
00130         afterUrl = ']';
00131       else if ( beforeUrl == '<' )
00132         afterUrl = '>';
00133       else if ( beforeUrl == '>' )    // for e.g. <link>http://.....</link>
00134         afterUrl = '<';
00135       else if ( beforeUrl == '"' )
00136         afterUrl = '"';
00137     }
00138 
00139     url.reserve( maxUrlLen() );  // avoid allocs
00140     int start = mPos;
00141     while ( ( mPos < (int)mText.length() ) &&
00142             ( mText[mPos].isPrint() || mText[mPos].isSpace() ) &&
00143             ( ( afterUrl.isNull() && !mText[mPos].isSpace() ) ||
00144               ( !afterUrl.isNull() && mText[mPos] != afterUrl ) )
00145           ) {
00146       if ( !mText[mPos].isSpace() ) {   // skip whitespace
00147         url.append( mText[mPos] );
00148         if ( url.length() > maxUrlLen() )
00149           break;
00150       }
00151 
00152       mPos++;
00153     }
00154 
00155     if ( isEmptyUrl(url) || ( url.length() > maxUrlLen() ) ) {
00156       mPos = start;
00157       url = "";
00158     } else {
00159       --mPos;
00160     }
00161   }
00162   return url;
00163 }
00164 
00165 // keep this in sync with KMMainWin::slotUrlClicked()
00166 bool LinkLocator::atUrl() const
00167 {
00168   // the following characters are allowed in a dot-atom (RFC 2822):
00169   // a-z A-Z 0-9 . ! # $ % & ' * + - / = ? ^ _ ` { | } ~
00170   const QString allowedSpecialChars = QString( ".!#$%&'*+-/=?^_`{|}~" );
00171 
00172   // the character directly before the URL must not be a letter, a number or
00173   // any other character allowed in a dot-atom (RFC 2822).
00174   if ( ( mPos > 0 ) &&
00175        ( mText[mPos-1].isLetterOrNumber() ||
00176          ( allowedSpecialChars.indexOf( mText[mPos-1] ) != -1 ) ) ) {
00177     return false;
00178   }
00179 
00180   QChar ch = mText[mPos];
00181   return
00182     ( ch == 'h' && ( mText.mid( mPos, 7 ) == "http://" ||
00183                      mText.mid( mPos, 8 ) == "https://" ) ) ||
00184     ( ch == 'v' && mText.mid( mPos, 6 ) == "vnc://" ) ||
00185     ( ch == 'f' && ( mText.mid( mPos, 7 ) == "fish://" ||
00186                      mText.mid( mPos, 6 ) == "ftp://" ||
00187                      mText.mid( mPos, 7 ) == "ftps://" ) ) ||
00188     ( ch == 's' && ( mText.mid( mPos, 7 ) == "sftp://" ||
00189                      mText.mid( mPos, 6 ) == "smb://" ) ) ||
00190     ( ch == 'm' && mText.mid( mPos, 7 ) == "mailto:" ) ||
00191     ( ch == 'w' && mText.mid( mPos, 4 ) == "www." ) ||
00192     ( ch == 'f' && ( mText.mid( mPos, 4 ) == "ftp." ||
00193                      mText.mid( mPos, 7 ) == "file://" ) ) ||
00194     ( ch == 'n' && mText.mid( mPos, 5 ) == "news:" );
00195 }
00196 
00197 bool LinkLocator::isEmptyUrl( const QString &url ) const
00198 {
00199   return url.isEmpty() ||
00200     url == "http://" ||
00201     url == "https://" ||
00202     url == "fish://" ||
00203     url == "ftp://" ||
00204     url == "ftps://" ||
00205     url == "sftp://" ||
00206     url == "smb://" ||
00207     url == "vnc://" ||
00208     url == "mailto" ||
00209     url == "www" ||
00210     url == "ftp" ||
00211     url == "news" ||
00212     url == "news://";
00213 }
00214 
00215 QString LinkLocator::getEmailAddress()
00216 {
00217   QString address;
00218 
00219   if ( mText[mPos] == '@' ) {
00220     // the following characters are allowed in a dot-atom (RFC 2822):
00221     // a-z A-Z 0-9 . ! # $ % & ' * + - / = ? ^ _ ` { | } ~
00222     const QString allowedSpecialChars = QString( ".!#$%&'*+-/=?^_`{|}~" );
00223 
00224     // determine the local part of the email address
00225     int start = mPos - 1;
00226     while ( start >= 0 && mText[start].unicode() < 128 &&
00227             ( mText[start].isLetterOrNumber() ||
00228               mText[start] == '@' || // allow @ to find invalid email addresses
00229               allowedSpecialChars.indexOf( mText[start] ) != -1 ) ) {
00230       if ( mText[start] == '@' ) {
00231         return QString(); // local part contains '@' -> no email address
00232       }
00233       --start;
00234     }
00235     ++start;
00236     // we assume that an email address starts with a letter or a digit
00237     while ( ( start < mPos ) && !mText[start].isLetterOrNumber() ) {
00238       ++start;
00239     }
00240     if ( start == mPos ) {
00241       return QString(); // local part is empty -> no email address
00242     }
00243 
00244     // determine the domain part of the email address
00245     int dotPos = INT_MAX;
00246     int end = mPos + 1;
00247     while ( end < (int)mText.length() &&
00248             ( mText[end].isLetterOrNumber() ||
00249               mText[end] == '@' || // allow @ to find invalid email addresses
00250               mText[end] == '.' ||
00251               mText[end] == '-' ) ) {
00252       if ( mText[end] == '@' ) {
00253         return QString(); // domain part contains '@' -> no email address
00254       }
00255       if ( mText[end] == '.' ) {
00256         dotPos = qMin( dotPos, end ); // remember index of first dot in domain
00257       }
00258       ++end;
00259     }
00260     // we assume that an email address ends with a letter or a digit
00261     while ( ( end > mPos ) && !mText[end - 1].isLetterOrNumber() ) {
00262       --end;
00263     }
00264     if ( end == mPos ) {
00265       return QString(); // domain part is empty -> no email address
00266     }
00267     if ( dotPos >= end ) {
00268       return QString(); // domain part doesn't contain a dot
00269     }
00270 
00271     if ( end - start > maxAddressLen() ) {
00272       return QString(); // too long -> most likely no email address
00273     }
00274     address = mText.mid( start, end - start );
00275 
00276     mPos = end - 1;
00277   }
00278   return address;
00279 }
00280 
00281 QString LinkLocator::convertToHtml( const QString &plainText, int flags,
00282                                     int maxUrlLen, int maxAddressLen )
00283 {
00284   LinkLocator locator( plainText );
00285   locator.setMaxUrlLen( maxUrlLen );
00286   locator.setMaxAddressLen( maxAddressLen );
00287 
00288   QString str;
00289   QString result( (QChar*)0, (int)locator.mText.length() * 2 );
00290   QChar ch;
00291   int x;
00292   bool startOfLine = true;
00293   QString emoticon;
00294 
00295   for ( locator.mPos = 0, x = 0; locator.mPos < (int)locator.mText.length();
00296         locator.mPos++, x++ ) {
00297     ch = locator.mText[locator.mPos];
00298     if ( flags & PreserveSpaces ) {
00299       if ( ch == ' ' ) {
00300         if ( locator.mPos + 1 < locator.mText.length() ) {
00301           if ( locator.mText[locator.mPos + 1] != ' ' ) {
00302 
00303             // A single space, make it breaking if not at the start or end of the line
00304             const bool endOfLine = locator.mText[locator.mPos + 1] == '\n';
00305             if ( !startOfLine && !endOfLine )
00306               result += ' ';
00307             else
00308               result += "&nbsp;";
00309           }
00310           else {
00311 
00312             // Whitespace of more than one space, make it all non-breaking
00313             while( locator.mPos < locator.mText.length() && locator.mText[locator.mPos] == ' ' ) {
00314               result += "&nbsp;";
00315               locator.mPos++;
00316               x++;
00317             }
00318 
00319             // We incremented once to often, undo that
00320             locator.mPos--;
00321             x--;
00322           }
00323         }
00324         else {
00325           // Last space in the text, it is non-breaking
00326           result += "&nbsp;";
00327         }
00328 
00329         if ( startOfLine ) {
00330           startOfLine = false;
00331         }
00332         continue;
00333       } else if ( ch == '\t' ) {
00334         do
00335         {
00336           result += "&nbsp;";
00337           x++;
00338         }
00339         while ( ( x & 7 ) != 0 );
00340         x--;
00341         startOfLine = false;
00342         continue;
00343       }
00344     }
00345     if ( ch == '\n' ) {
00346       result += "<br />\n"; // Keep the \n, so apps can figure out the quoting levels correctly.
00347       startOfLine = true;
00348       x = -1;
00349       continue;
00350     }
00351 
00352     startOfLine = false;
00353     if ( ch == '&' ) {
00354       result += "&amp;";
00355     } else if ( ch == '"' ) {
00356       result += "&quot;";
00357     } else if ( ch == '<' ) {
00358       result += "&lt;";
00359     } else if ( ch == '>' ) {
00360       result += "&gt;";
00361     } else {
00362       const int start = locator.mPos;
00363       if ( !( flags & IgnoreUrls ) ) {
00364         str = locator.getUrl();
00365         if ( !str.isEmpty() ) {
00366           QString hyperlink;
00367           if ( str.left( 4 ) == "www." ) {
00368             hyperlink = "http://" + str;
00369           } else if ( str.left( 4 ) == "ftp." ) {
00370             hyperlink = "ftp://" + str;
00371           } else {
00372             hyperlink = str;
00373           }
00374 
00375           str = str.replace( '&', "&amp;" );
00376           result += "<a href=\"" + hyperlink + "\">" + Qt::escape( str ) + "</a>";
00377           x += locator.mPos - start;
00378           continue;
00379         }
00380         str = locator.getEmailAddress();
00381         if ( !str.isEmpty() ) {
00382           // len is the length of the local part
00383           int len = str.indexOf( '@' );
00384           QString localPart = str.left( len );
00385 
00386           // remove the local part from the result (as '&'s have been expanded to
00387           // &amp; we have to take care of the 4 additional characters per '&')
00388           result.truncate( result.length() -
00389                            len - ( localPart.count( '&' ) * 4 ) );
00390           x -= len;
00391 
00392           result += "<a href=\"mailto:" + str + "\">" + str + "</a>";
00393           x += str.length() - 1;
00394           continue;
00395         }
00396       }
00397       if ( flags & HighlightText ) {
00398         str = locator.highlightedText();
00399         if ( !str.isEmpty() ) {
00400           result += str;
00401           x += locator.mPos - start;
00402           continue;
00403         }
00404       }
00405       result += ch;
00406     }
00407   }
00408 
00409 #if KDE_IS_VERSION( 4, 0, 95 )
00410   if ( flags & ReplaceSmileys ) {
00411     QStringList exclude;
00412     exclude << "(c)" << "(C)" << "&gt;:-(" << "&gt;:(" << "(B)" << "(b)" << "(P)" << "(p)";
00413     exclude << "(O)" << "(o)" << "(D)" << "(d)" << "(E)" << "(e)" << "(K)" << "(k)";
00414     exclude << "(I)" << "(i)" << "(L)" << "(l)" << "(8)" << "(T)" << "(t)" << "(G)";
00415     exclude << "(g)" << "(F)" << "(f)" << "(H)";
00416     exclude << "8)" << "(N)" << "(n)" << "(Y)" << "(y)" << "(U)" << "(u)" << "(W)" << "(w)";
00417     static QString cachedEmoticonsThemeName;
00418     if ( cachedEmoticonsThemeName.isEmpty() ) {
00419       cachedEmoticonsThemeName = KEmoticons::currentThemeName();
00420     }
00421     result =
00422       sEmoticons->theme( cachedEmoticonsThemeName ).parseEmoticons(
00423         result, KEmoticonsTheme::StrictParse | KEmoticonsTheme::SkipHTML, exclude );
00424   }
00425 #endif
00426 
00427   return result;
00428 }
00429 
00430 QString LinkLocator::pngToDataUrl( const QString &iconPath )
00431 {
00432   if ( iconPath.isEmpty() ) {
00433     return QString();
00434   }
00435 
00436   QFile pngFile( iconPath );
00437   if ( !pngFile.open( QIODevice::ReadOnly | QIODevice::Unbuffered ) ) {
00438     return QString();
00439   }
00440 
00441   QByteArray ba = pngFile.readAll();
00442   pngFile.close();
00443   return QString::fromLatin1( "data:image/png;base64,%1" ).arg( ba.toBase64().constData() );
00444 }
00445 
00446 QString LinkLocator::highlightedText()
00447 {
00448   // formating symbols must be prepended with a whitespace
00449   if ( ( mPos > 0 ) && !mText[mPos-1].isSpace() ) {
00450     return QString();
00451   }
00452 
00453   const QChar ch = mText[mPos];
00454   if ( ch != '/' && ch != '*' && ch != '_' ) {
00455     return QString();
00456   }
00457 
00458   QRegExp re =
00459     QRegExp( QString( "\\%1([0-9A-Za-z]+)\\%2" ).arg( ch ).arg( ch ) );
00460   if ( re.indexIn( mText, mPos ) == mPos ) {
00461     int length = re.matchedLength();
00462     // there must be a whitespace after the closing formating symbol
00463     if ( mPos + length < mText.length() && !mText[mPos + length].isSpace() ) {
00464       return QString();
00465     }
00466     mPos += length - 1;
00467     switch ( ch.toLatin1() ) {
00468     case '*':
00469       return "<b>" + re.cap( 1 ) + "</b>";
00470     case '_':
00471       return "<u>" + re.cap( 1 ) + "</u>";
00472     case '/':
00473       return "<i>" + re.cap( 1 ) + "</i>";
00474     }
00475   }
00476   return QString();
00477 }

kpimutils

Skip menu "kpimutils"
  • Main Page
  • Modules
  • Class Hierarchy
  • Alphabetical List
  • Class List
  • File List
  • Class Members

KDE-PIM Libraries

Skip menu "KDE-PIM Libraries"
  • akonadi
  • kabc
  • kblog
  • kcal
  • kholidays
  • kimap
  • kioslave
  •   imap4
  •   mbox
  • kldap
  • kmime
  • kpimidentities
  • kpimtextedit
  •   richtextbuilders
  • kpimutils
  • kresources
  • ktnef
  • kxmlrpcclient
  • mailtransport
  • microblog
  • qgpgme
  • syndication
  •   atom
  •   rdf
  •   rss2
Generated for KDE-PIM Libraries by doxygen 1.6.1
This website is maintained by Adriaan de Groot and Allen Winter.
KDE® and the K Desktop Environment® logo are registered trademarks of KDE e.V. | Legal