KDECore
nsSBCharSetProber.h
Go to the documentation of this file.00001 /* -*- Mode: C; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 2 -*- */ 00002 /* -*- C++ -*- 00003 * Copyright (C) 1998 <developer@mozilla.org> 00004 * 00005 * 00006 * Permission is hereby granted, free of charge, to any person obtaining 00007 * a copy of this software and associated documentation files (the 00008 * "Software"), to deal in the Software without restriction, including 00009 * without limitation the rights to use, copy, modify, merge, publish, 00010 * distribute, sublicense, and/or sell copies of the Software, and to 00011 * permit persons to whom the Software is furnished to do so, subject to 00012 * the following conditions: 00013 * 00014 * The above copyright notice and this permission notice shall be included 00015 * in all copies or substantial portions of the Software. 00016 * 00017 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 00018 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 00019 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND 00020 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE 00021 * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION 00022 * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION 00023 * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. 00024 */ 00025 00026 #ifndef NSSBCHARSETPROBER_H 00027 #define NSSBCHARSETPROBER_H 00028 00029 #include "nsCharSetProber.h" 00030 00031 #define SAMPLE_SIZE 64 00032 #define SB_ENOUGH_REL_THRESHOLD 1024 00033 #define POSITIVE_SHORTCUT_THRESHOLD (float)0.95 00034 #define NEGATIVE_SHORTCUT_THRESHOLD (float)0.05 00035 #define SYMBOL_CAT_ORDER 250 00036 #define NUMBER_OF_SEQ_CAT 4 00037 #define POSITIVE_CAT (NUMBER_OF_SEQ_CAT-1) 00038 #define NEGATIVE_CAT 0 00039 00040 namespace kencodingprober { 00041 typedef struct 00042 { 00043 const unsigned char *charToOrderMap; // [256] table use to find a char's order 00044 const char *precedenceMatrix; // [SAMPLE_SIZE][SAMPLE_SIZE]; table to find a 2-char sequence's frequency 00045 float mTypicalPositiveRatio; // = freqSeqs / totalSeqs 00046 bool keepEnglishLetter; // says if this script contains English characters (not implemented) 00047 const char* charsetName; 00048 } SequenceModel; 00049 00050 00051 class KDE_NO_EXPORT nsSingleByteCharSetProber : public nsCharSetProber{ 00052 public: 00053 nsSingleByteCharSetProber(SequenceModel *model) 00054 :mModel(model), mReversed(false), mNameProber(0) { Reset(); } 00055 nsSingleByteCharSetProber(SequenceModel *model, bool reversed, nsCharSetProber* nameProber) 00056 :mModel(model), mReversed(reversed), mNameProber(nameProber) { Reset(); } 00057 00058 virtual const char* GetCharSetName(); 00059 virtual nsProbingState HandleData(const char* aBuf, unsigned int aLen); 00060 virtual nsProbingState GetState(void) {return mState;}; 00061 virtual void Reset(void); 00062 virtual float GetConfidence(void); 00063 virtual void SetOpion() {}; 00064 00065 // This feature is not implemented yet. any current language model 00066 // contain this parameter as false. No one is looking at this 00067 // parameter or calling this method. 00068 // Moreover, the nsSBCSGroupProber which calls the HandleData of this 00069 // prober has a hard-coded call to FilterWithoutEnglishLetters which gets rid 00070 // of the English letters. 00071 bool KeepEnglishLetters() {return mModel->keepEnglishLetter;}; // (not implemented) 00072 00073 #ifdef DEBUG_PROBE 00074 virtual void DumpStatus(); 00075 #endif 00076 00077 protected: 00078 nsProbingState mState; 00079 const SequenceModel *mModel; 00080 const bool mReversed; // true if we need to reverse every pair in the model lookup 00081 00082 //char order of last character 00083 unsigned char mLastOrder; 00084 00085 unsigned int mTotalSeqs; 00086 unsigned int mSeqCounters[NUMBER_OF_SEQ_CAT]; 00087 00088 unsigned int mTotalChar; 00089 //characters that fall in our sampling range 00090 unsigned int mFreqChar; 00091 00092 // Optional auxiliary prober for name decision. created and destroyed by the GroupProber 00093 nsCharSetProber* mNameProber; 00094 00095 }; 00096 00097 00098 extern SequenceModel Koi8rModel; 00099 extern SequenceModel Win1251Model; 00100 extern SequenceModel Latin5Model; 00101 extern SequenceModel MacCyrillicModel; 00102 extern SequenceModel Ibm866Model; 00103 extern SequenceModel Ibm855Model; 00104 extern SequenceModel Latin7Model; 00105 extern SequenceModel Win1253Model; 00106 extern SequenceModel Latin5BulgarianModel; 00107 extern SequenceModel Win1251BulgarianModel; 00108 extern SequenceModel Latin2HungarianModel; 00109 extern SequenceModel Win1250HungarianModel; 00110 extern SequenceModel Win1255Model; 00111 } 00112 #endif /* NSSBCHARSETPROBER_H */ 00113