• Skip to content
  • Skip to link menu
KDE 4.3 API Reference
  • KDE API Reference
  • kdelibs
  • Sitemap
  • Contact Us
 

KDECore

nsSBCharSetProber.cpp

Go to the documentation of this file.
00001 /* -*- Mode: C; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
00002 /*  -*- C++ -*-
00003 *  Copyright (C) 1998 <developer@mozilla.org>
00004 *
00005 *
00006 *  Permission is hereby granted, free of charge, to any person obtaining
00007 *  a copy of this software and associated documentation files (the
00008 *  "Software"), to deal in the Software without restriction, including
00009 *  without limitation the rights to use, copy, modify, merge, publish,
00010 *  distribute, sublicense, and/or sell copies of the Software, and to
00011 *  permit persons to whom the Software is furnished to do so, subject to
00012 *  the following conditions:
00013 *
00014 *  The above copyright notice and this permission notice shall be included 
00015 *  in all copies or substantial portions of the Software.
00016 *
00017 *  THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
00018 *  EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
00019 *  MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
00020 *  NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
00021 *  LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
00022 *  OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
00023 *  WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
00024 */
00025 
00026 #include "nsSBCharSetProber.h"
00027 
00028 #include <stdio.h>
00029 
00030 namespace kencodingprober {
00031 nsProbingState nsSingleByteCharSetProber::HandleData(const char* aBuf, unsigned int aLen)
00032 {
00033   unsigned char order;
00034 
00035   for (unsigned int i = 0; i < aLen; i++)
00036   {
00037     order = mModel->charToOrderMap[(unsigned char)aBuf[i]];
00038 
00039     if (order < SYMBOL_CAT_ORDER)
00040       mTotalChar++;
00041     if (order < SAMPLE_SIZE)
00042     {
00043         mFreqChar++;
00044 
00045       if (mLastOrder < SAMPLE_SIZE)
00046       {
00047         mTotalSeqs++;
00048         if (!mReversed)
00049           ++(mSeqCounters[(int)mModel->precedenceMatrix[mLastOrder*SAMPLE_SIZE+order]]);
00050         else // reverse the order of the letters in the lookup
00051           ++(mSeqCounters[(int)mModel->precedenceMatrix[order*SAMPLE_SIZE+mLastOrder]]);
00052       }
00053     }
00054     mLastOrder = order;
00055   }
00056 
00057   if (mState == eDetecting)
00058     if (mTotalSeqs > SB_ENOUGH_REL_THRESHOLD)
00059     {
00060       float cf = GetConfidence();
00061       if (cf > POSITIVE_SHORTCUT_THRESHOLD)
00062         mState = eFoundIt;
00063       else if (cf < NEGATIVE_SHORTCUT_THRESHOLD)
00064         mState = eNotMe;
00065     }
00066 
00067   return mState;
00068 }
00069 
00070 void  nsSingleByteCharSetProber::Reset(void)
00071 {
00072   mState = eDetecting;
00073   mLastOrder = 255;
00074   for (unsigned int i = 0; i < NUMBER_OF_SEQ_CAT; i++)
00075     mSeqCounters[i] = 0;
00076   mTotalSeqs = 0;
00077   mTotalChar = 0;
00078   mFreqChar = 0;
00079 }
00080 
00081 //#define NEGATIVE_APPROACH 1
00082 
00083 float nsSingleByteCharSetProber::GetConfidence(void)
00084 {
00085 #ifdef NEGATIVE_APPROACH
00086   if (mTotalSeqs > 0)
00087     if (mTotalSeqs > mSeqCounters[NEGATIVE_CAT]*10 )
00088       return ((float)(mTotalSeqs - mSeqCounters[NEGATIVE_CAT]*10))/mTotalSeqs * mFreqChar / mTotalChar;
00089   return (float)0.01;
00090 #else  //POSITIVE_APPROACH
00091   float r;
00092 
00093   if (mTotalSeqs > 0) {
00094     r = ((float)1.0) * mSeqCounters[POSITIVE_CAT] / mTotalSeqs / mModel->mTypicalPositiveRatio;
00095     r = r*mFreqChar/mTotalChar;
00096     if (r >= (float)1.00)
00097       r = (float)0.99;
00098     return r;
00099   }
00100   return (float)0.01;
00101 #endif
00102 }
00103 
00104 const char* nsSingleByteCharSetProber::GetCharSetName() 
00105 {
00106   if (!mNameProber)
00107     return mModel->charsetName;
00108   return mNameProber->GetCharSetName();
00109 }
00110 
00111 #ifdef DEBUG_PROBE
00112 void nsSingleByteCharSetProber::DumpStatus()
00113 {
00114   printf("  SBCS: %1.3f [%s]\r\n", GetConfidence(), GetCharSetName());
00115 }
00116 #endif
00117 }
00118 
00119 

KDECore

Skip menu "KDECore"
  • Main Page
  • Modules
  • Namespace List
  • Class Hierarchy
  • Alphabetical List
  • Class List
  • File List
  • Namespace Members
  • Class Members
  • Related Pages

kdelibs

Skip menu "kdelibs"
  • DNSSD
  • Interfaces
  •   KHexEdit
  •   KMediaPlayer
  •   KSpeech
  •   KTextEditor
  • Kate
  • kconf_update
  • KDE3Support
  •   KUnitTest
  • KDECore
  • KDED
  • KDEsu
  • KDEUI
  • KDocTools
  • KFile
  • KHTML
  • KImgIO
  • KInit
  • kio
  • KIOSlave
  • KJS
  •   KJS-API
  •   WTF
  • kjsembed
  • KNewStuff
  • KParts
  • KPty
  • Kross
  • KUtils
  • Nepomuk
  • Plasma
  • Solid
  • Sonnet
  • ThreadWeaver
Generated for kdelibs by doxygen 1.6.1
This website is maintained by Adriaan de Groot and Allen Winter.
KDE® and the K Desktop Environment® logo are registered trademarks of KDE e.V. | Legal