• Skip to content
  • Skip to link menu
KDE 4.3 API Reference
  • KDE API Reference
  • KDE-PIM Libraries
  • Sitemap
  • Contact Us
 

KMIME Library

kmime_util.cpp

00001 /*
00002   kmime_util.cpp
00003 
00004   KMime, the KDE internet mail/usenet news message library.
00005   Copyright (c) 2001 the KMime authors.
00006   See file AUTHORS for details
00007 
00008   This library is free software; you can redistribute it and/or
00009   modify it under the terms of the GNU Library General Public
00010   License as published by the Free Software Foundation; either
00011   version 2 of the License, or (at your option) any later version.
00012 
00013   This library is distributed in the hope that it will be useful,
00014   but WITHOUT ANY WARRANTY; without even the implied warranty of
00015   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
00016   Library General Public License for more details.
00017 
00018   You should have received a copy of the GNU Library General Public License
00019   along with this library; see the file COPYING.LIB.  If not, write to
00020   the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
00021   Boston, MA 02110-1301, USA.
00022 */
00023 
00024 #include "kmime_util.h"
00025 #include "kmime_util_p.h"
00026 #include "kmime_header_parsing.h"
00027 
00028 #include <config-kmime.h>
00029 #include <kdefakes.h> // for strcasestr
00030 #include <kglobal.h>
00031 #include <klocale.h>
00032 #include <kcharsets.h>
00033 #include <kcodecs.h>
00034 #include <kdebug.h>
00035 
00036 #include <QtCore/QList>
00037 #include <QtCore/QString>
00038 #include <QtCore/QTextCodec>
00039 
00040 #include <ctype.h>
00041 #include <time.h>
00042 #include <stdlib.h>
00043 #include <unistd.h>
00044 
00045 using namespace KMime;
00046 
00047 namespace KMime {
00048 
00049 QList<QByteArray> c_harsetCache;
00050 QList<QByteArray> l_anguageCache;
00051 
00052 QByteArray cachedCharset( const QByteArray &name )
00053 {
00054   foreach ( const QByteArray& charset, c_harsetCache ) {
00055     if ( qstricmp( name.data(), charset.data() ) == 0 ) {
00056       return charset;
00057     }
00058   }
00059 
00060   c_harsetCache.append( name.toUpper() );
00061   //kDebug(5320) << "KNMimeBase::cachedCharset() number of cs" << c_harsetCache.count();
00062   return c_harsetCache.last();
00063 }
00064 
00065 QByteArray cachedLanguage( const QByteArray &name )
00066 {
00067   foreach ( const QByteArray& language, l_anguageCache ) {
00068     if ( qstricmp( name.data(), language.data() ) == 0 ) {
00069       return language;
00070     }
00071   }
00072 
00073   l_anguageCache.append( name.toUpper() );
00074   //kDebug(5320) << "KNMimeBase::cachedCharset() number of cs" << c_harsetCache.count();
00075   return l_anguageCache.last();
00076 }
00077 
00078 bool isUsAscii( const QString &s )
00079 {
00080   uint sLength = s.length();
00081   for ( uint i=0; i<sLength; i++ ) {
00082     if ( s.at( i ).toLatin1() <= 0 ) { // c==0: non-latin1, c<0: non-us-ascii
00083       return false;
00084     }
00085   }
00086   return true;
00087 }
00088 
00089 // "(),.:;<>@[\]
00090 const uchar specialsMap[16] = {
00091   0x00, 0x00, 0x00, 0x00, // CTLs
00092   0x20, 0xCA, 0x00, 0x3A, // SPACE ... '?'
00093   0x80, 0x00, 0x00, 0x1C, // '@' ... '_'
00094   0x00, 0x00, 0x00, 0x00  // '`' ... DEL
00095 };
00096 
00097 // "(),:;<>@[\]/=?
00098 const uchar tSpecialsMap[16] = {
00099   0x00, 0x00, 0x00, 0x00, // CTLs
00100   0x20, 0xC9, 0x00, 0x3F, // SPACE ... '?'
00101   0x80, 0x00, 0x00, 0x1C, // '@' ... '_'
00102   0x00, 0x00, 0x00, 0x00  // '`' ... DEL
00103 };
00104 
00105 // all except specials, CTLs, SPACE.
00106 const uchar aTextMap[16] = {
00107   0x00, 0x00, 0x00, 0x00,
00108   0x5F, 0x35, 0xFF, 0xC5,
00109   0x7F, 0xFF, 0xFF, 0xE3,
00110   0xFF, 0xFF, 0xFF, 0xFE
00111 };
00112 
00113 // all except tspecials, CTLs, SPACE.
00114 const uchar tTextMap[16] = {
00115   0x00, 0x00, 0x00, 0x00,
00116   0x5F, 0x36, 0xFF, 0xC0,
00117   0x7F, 0xFF, 0xFF, 0xE3,
00118   0xFF, 0xFF, 0xFF, 0xFE
00119 };
00120 
00121 // none except a-zA-Z0-9!*+-/
00122 const uchar eTextMap[16] = {
00123   0x00, 0x00, 0x00, 0x00,
00124   0x40, 0x35, 0xFF, 0xC0,
00125   0x7F, 0xFF, 0xFF, 0xE0,
00126   0x7F, 0xFF, 0xFF, 0xE0
00127 };
00128 
00129 QString decodeRFC2047String( const QByteArray &src, QByteArray &usedCS,
00130                              const QByteArray &defaultCS, bool forceCS )
00131 {
00132   QByteArray result;
00133   QByteArray spaceBuffer;
00134   const char *scursor = src.constData();
00135   const char *send = scursor + src.length();
00136   bool onlySpacesSinceLastWord = false;
00137 
00138   while ( scursor != send ) {
00139      // space
00140     if ( isspace( *scursor ) && onlySpacesSinceLastWord ) {
00141       spaceBuffer += *scursor++;
00142       continue;
00143     }
00144 
00145     // possible start of an encoded word
00146     if ( *scursor == '=' ) {
00147       QByteArray language;
00148       QString decoded;
00149       ++scursor;
00150       const char *start = scursor;
00151       if ( HeaderParsing::parseEncodedWord( scursor, send, decoded, language, usedCS, defaultCS, forceCS ) ) {
00152         result += decoded.toUtf8();
00153         onlySpacesSinceLastWord = true;
00154         spaceBuffer.clear();
00155       } else {
00156         if ( onlySpacesSinceLastWord ) {
00157           result += spaceBuffer;
00158           onlySpacesSinceLastWord = false;
00159         }
00160         result += '=';
00161         scursor = start; // reset cursor after parsing failure
00162       }
00163       continue;
00164     } else {
00165       // unencoded data
00166       if ( onlySpacesSinceLastWord ) {
00167         result += spaceBuffer;
00168         onlySpacesSinceLastWord = false;
00169       }
00170       result += *scursor;
00171       ++scursor;
00172     }
00173   }
00174 
00175   return QString::fromUtf8(result);
00176 }
00177 
00178 QString decodeRFC2047String( const QByteArray &src )
00179 {
00180   QByteArray usedCS;
00181   return decodeRFC2047String( src, usedCS, "utf-8", false );
00182 }
00183 
00184 QByteArray encodeRFC2047String( const QString &src, const QByteArray &charset,
00185                                 bool addressHeader, bool allow8BitHeaders )
00186 {
00187   QByteArray encoded8Bit, result, usedCS;
00188   int start=0, end=0;
00189   bool nonAscii=false, ok=true, useQEncoding=false;
00190   QTextCodec *codec=0;
00191 
00192   usedCS = charset;
00193   codec = KGlobal::charsets()->codecForName( usedCS, ok );
00194 
00195   if ( !ok ) {
00196     //no codec available => try local8Bit and hope the best ;-)
00197     usedCS = KGlobal::locale()->encoding();
00198     codec = KGlobal::charsets()->codecForName( usedCS, ok );
00199   }
00200 
00201   if ( usedCS.contains( "8859-" ) ) { // use "B"-Encoding for non iso-8859-x charsets
00202     useQEncoding = true;
00203   }
00204 
00205   encoded8Bit = codec->fromUnicode( src );
00206 
00207   if ( allow8BitHeaders ) {
00208     return encoded8Bit;
00209   }
00210 
00211   uint encoded8BitLength = encoded8Bit.length();
00212   for ( unsigned int i=0; i<encoded8BitLength; i++ ) {
00213     if ( encoded8Bit[i] == ' ' ) { // encoding starts at word boundaries
00214       start = i + 1;
00215     }
00216 
00217     // encode escape character, for japanese encodings...
00218     if ( ( (signed char)encoded8Bit[i] < 0 ) || ( encoded8Bit[i] == '\033' ) ||
00219          ( addressHeader && ( strchr( "\"()<>@,.;:\\[]=", encoded8Bit[i] ) != 0 ) ) ) {
00220       end = start;   // non us-ascii char found, now we determine where to stop encoding
00221       nonAscii = true;
00222       break;
00223     }
00224   }
00225 
00226   if ( nonAscii ) {
00227     while ( ( end < encoded8Bit.length() ) && ( encoded8Bit[end] != ' ' ) ) {
00228       // we encode complete words
00229       end++;
00230     }
00231 
00232     for ( int x=end; x<encoded8Bit.length(); x++ ) {
00233       if ( ( (signed char)encoded8Bit[x]<0) || ( encoded8Bit[x] == '\033' ) ||
00234            ( addressHeader && ( strchr("\"()<>@,.;:\\[]=",encoded8Bit[x]) != 0 ) ) ) {
00235         end = encoded8Bit.length();     // we found another non-ascii word
00236 
00237         while ( ( end < encoded8Bit.length() ) && ( encoded8Bit[end] != ' ' ) ) {
00238           // we encode complete words
00239           end++;
00240         }
00241       }
00242     }
00243 
00244     result = encoded8Bit.left( start ) + "=?" + usedCS;
00245 
00246     if ( useQEncoding ) {
00247       result += "?Q?";
00248 
00249       char c, hexcode;// "Q"-encoding implementation described in RFC 2047
00250       for ( int i=start; i<end; i++ ) {
00251         c = encoded8Bit[i];
00252         if ( c == ' ' ) { // make the result readable with not MIME-capable readers
00253           result += '_';
00254         } else {
00255           if ( ( ( c >= 'a' ) && ( c <= 'z' ) ) || // paranoid mode, encode *all* special chars to avoid problems
00256               ( ( c >= 'A' ) && ( c <= 'Z' ) ) ||  // with "From" & "To" headers
00257               ( ( c >= '0' ) && ( c <= '9' ) ) ) {
00258             result += c;
00259           } else {
00260             result += '=';                 // "stolen" from KMail ;-)
00261             hexcode = ((c & 0xF0) >> 4) + 48;
00262             if ( hexcode >= 58 ) {
00263               hexcode += 7;
00264             }
00265             result += hexcode;
00266             hexcode = (c & 0x0F) + 48;
00267             if ( hexcode >= 58 ) {
00268               hexcode += 7;
00269             }
00270             result += hexcode;
00271           }
00272         }
00273       }
00274     } else {
00275       result += "?B?" + encoded8Bit.mid( start, end - start ).toBase64();
00276     }
00277 
00278     result +="?=";
00279     result += encoded8Bit.right( encoded8Bit.length() - end );
00280   } else {
00281     result = encoded8Bit;
00282   }
00283 
00284   return result;
00285 }
00286 
00287 QByteArray uniqueString()
00288 {
00289   static char chars[] = "0123456789abcdefghijklmnopqrstuvxyzABCDEFGHIJKLMNOPQRSTUVWXYZ";
00290   time_t now;
00291   char p[11];
00292   int pos, ran;
00293   unsigned int timeval;
00294 
00295   p[10] = '\0';
00296   now = time( 0 );
00297   ran = 1 + (int)(1000.0*rand() / (RAND_MAX + 1.0));
00298   timeval = (now / ran) + getpid();
00299 
00300   for ( int i=0; i<10; i++ ) {
00301     pos = (int) (61.0*rand() / (RAND_MAX + 1.0));
00302     //kDebug(5320) << pos;
00303     p[i] = chars[pos];
00304   }
00305 
00306   QByteArray ret;
00307   ret.setNum( timeval );
00308   ret += '.';
00309   ret += p;
00310 
00311   return ret;
00312 }
00313 
00314 QByteArray multiPartBoundary()
00315 {
00316   return "nextPart" + uniqueString();
00317 }
00318 
00319 QByteArray unfoldHeader( const QByteArray &header )
00320 {
00321   QByteArray result;
00322   int pos = 0, foldBegin = 0, foldMid = 0, foldEnd = 0;
00323   while ( ( foldMid = header.indexOf( '\n', pos ) ) >= 0 ) {
00324     foldBegin = foldEnd = foldMid;
00325     // find the first space before the line-break
00326     while ( foldBegin > 0 ) {
00327       if ( !QChar( header[foldBegin - 1] ).isSpace() ) {
00328         break;
00329       }
00330       --foldBegin;
00331     }
00332     // find the first non-space after the line-break
00333     while ( foldEnd <= header.length() - 1 ) {
00334       if ( !QChar( header[foldEnd] ).isSpace() ) {
00335         break;
00336       }
00337       ++foldEnd;
00338     }
00339     result += header.mid( pos, foldBegin - pos );
00340     if ( foldEnd < header.length() -1 )
00341       result += ' ';
00342     pos = foldEnd;
00343   }
00344   result += header.mid( pos, header.length() - pos );
00345   return result;
00346 }
00347 
00348 int indexOfHeader( const QByteArray &src, const QByteArray &name, int &end, int &dataBegin, bool *folded )
00349 {
00350   QByteArray n = name;
00351   n.append( ':' );
00352   int begin = -1;
00353 
00354   if ( qstrnicmp( n.constData(), src.constData(), n.length() ) == 0 ) {
00355     begin = 0;
00356   } else {
00357     n.prepend('\n');
00358     const char *p = strcasestr( src.constData(), n.constData() );
00359     if ( !p ) {
00360       begin = -1;
00361     } else {
00362       begin = p - src.constData();
00363       ++begin;
00364     }
00365   }
00366 
00367   if ( begin > -1) {     //there is a header with the given name
00368     dataBegin = begin + name.length() + 1; //skip the name
00369     // skip the usual space after the colon
00370     if ( src.at( dataBegin ) == ' ' ) {
00371       ++dataBegin;
00372     }
00373     end = dataBegin;
00374     int len = src.length() - 1;
00375     if ( folded )
00376       *folded = false;
00377 
00378     if ( src.at(end) != '\n' ) {  // check if the header is not empty
00379       while ( true ) {
00380         end = src.indexOf( '\n', end + 1 );
00381         if ( end == -1 || end == len ||
00382              ( src[end+1] != ' ' && src[end+1] != '\t' ) ) {
00383           //break if we reach the end of the string, honor folded lines
00384           break;
00385         } else {
00386           if ( folded )
00387             *folded = true;
00388         }
00389       }
00390     }
00391 
00392     if ( end < 0 ) {
00393       end = len + 1; //take the rest of the string
00394     }
00395     return begin;
00396 
00397   } else {
00398     dataBegin = -1;
00399     return -1; //header not found
00400   }
00401 }
00402 
00403 QByteArray extractHeader( const QByteArray &src, const QByteArray &name )
00404 {
00405   int begin, end;
00406   bool folded;
00407   indexOfHeader( src, name, end, begin, &folded );
00408 
00409   if ( begin >= 0 ) {
00410     if ( !folded ) {
00411       return src.mid( begin, end - begin );
00412     } else {
00413       QByteArray hdrValue = src.mid( begin, end - begin );
00414       return unfoldHeader( hdrValue );
00415     }
00416   } else {
00417     return QByteArray(); //header not found
00418   }
00419 }
00420 
00421 QList<QByteArray> extractHeaders( const QByteArray &src, const QByteArray &name )
00422 {
00423   int begin, end;
00424   bool folded;
00425   QList<QByteArray> result;
00426   QByteArray copySrc( src );
00427 
00428   indexOfHeader( copySrc, name, end, begin, &folded );
00429   while ( begin >= 0 ) {
00430     if ( !folded ) {
00431       result.append( copySrc.mid( begin, end - begin ) );
00432     } else {
00433       QByteArray hdrValue = copySrc.mid( begin, end - begin );
00434       result.append( unfoldHeader( hdrValue ) );
00435     }
00436 
00437     // get the next one, a tiny bit ugly, but we don't want the previous to be found again...
00438     copySrc = copySrc.mid( end );
00439     indexOfHeader( copySrc, name, end, begin, &folded );
00440   }
00441 
00442   return result;
00443 }
00444 
00445 void removeHeader( QByteArray &header, const QByteArray &name )
00446 {
00447   int begin, end, dummy;
00448   begin = indexOfHeader( header, name, end, dummy );
00449   if ( begin >= 0 ) {
00450     header.remove( begin, end - begin + 1 );
00451   }
00452 }
00453 
00454 QByteArray CRLFtoLF( const QByteArray &s )
00455 {
00456   QByteArray ret = s;
00457   ret.replace( "\r\n", "\n" );
00458   return ret;
00459 }
00460 
00461 QByteArray LFtoCRLF( const QByteArray &s )
00462 {
00463   QByteArray ret = s;
00464   ret.replace( "\n", "\r\n" );
00465   return ret;
00466 }
00467 
00468 namespace {
00469 template < typename T > void removeQuotesGeneric( T & str )
00470 {
00471   bool inQuote = false;
00472   for ( int i = 0; i < str.length(); ++i ) {
00473     if ( str[i] == '"' ) {
00474       str.remove( i, 1 );
00475       i--;
00476       inQuote = !inQuote;
00477     } else {
00478       if ( inQuote && ( str[i] == '\\' ) ) {
00479         str.remove( i, 1 );
00480       }
00481     }
00482   }
00483 }
00484 }
00485 
00486 void removeQuots( QByteArray &str )
00487 {
00488   removeQuotesGeneric( str );
00489 }
00490 
00491 void removeQuots( QString &str )
00492 {
00493   removeQuotesGeneric( str );
00494 }
00495 
00496 void addQuotes( QByteArray &str, bool forceQuotes )
00497 {
00498   bool needsQuotes=false;
00499   for ( int i=0; i < str.length(); i++ ) {
00500     if ( strchr("()<>@,.;:[]=\\\"", str[i] ) != 0 ) {
00501       needsQuotes = true;
00502     }
00503     if ( str[i] == '\\' || str[i] == '\"' ) {
00504       str.insert( i, '\\' );
00505       i++;
00506     }
00507   }
00508 
00509   if ( needsQuotes || forceQuotes ) {
00510     str.insert( 0, '\"' );
00511     str.append( "\"" );
00512   }
00513 }
00514 
00515 QString removeBidiControlChars( const QString &input )
00516 {
00517   const int LRO = 0x202D;
00518   const int RLO = 0x202E;
00519   const int LRE = 0x202A;
00520   const int RLE = 0x202B;
00521   QString result = input;
00522   result.remove( LRO );
00523   result.remove( RLO );
00524   result.remove( LRE );
00525   result.remove( RLE );
00526   return result;
00527 }
00528 
00529 } // namespace KMime

KMIME Library

Skip menu "KMIME Library"
  • Main Page
  • Namespace List
  • Class Hierarchy
  • Alphabetical List
  • Class List
  • File List
  • Namespace Members
  • Class Members
  • Related Pages

KDE-PIM Libraries

Skip menu "KDE-PIM Libraries"
  • akonadi
  • kabc
  • kblog
  • kcal
  • kholidays
  • kimap
  • kioslave
  •   imap4
  •   mbox
  • kldap
  • kmime
  • kpimidentities
  • kpimtextedit
  •   richtextbuilders
  • kpimutils
  • kresources
  • ktnef
  • kxmlrpcclient
  • mailtransport
  • microblog
  • qgpgme
  • syndication
  •   atom
  •   rdf
  •   rss2
Generated for KDE-PIM Libraries by doxygen 1.6.1
This website is maintained by Adriaan de Groot and Allen Winter.
KDE® and the K Desktop Environment® logo are registered trademarks of KDE e.V. | Legal