Commit 33a8c3ba authored by Medicean's avatar Medicean

(Other:node_modules) Add jschardet modules

parent 30b0f4b3
António Afonso <aadsm> - author
Markus Ast <brainafk> - transformed into an npm package, multiple bug fixes
Leon <idealhack> - multiple bug fixes
\ No newline at end of file
This diff is collapsed.
[![NPM](https://nodei.co/npm/jschardet.png?downloads=true&downloadRank=true)](https://nodei.co/npm/jschardet/)
JsChardet
=========
Port of python's chardet (https://github.com/chardet/chardet).
License
-------
LGPL
How To Use It
-------------
### Node
```
npm install jschardet
```
var jschardet = require("jschardet")
// "àíàçã" in UTF-8
jschardet.detect("\xc3\xa0\xc3\xad\xc3\xa0\xc3\xa7\xc3\xa3")
// { encoding: "UTF-8", confidence: 0.9690625 }
// "次常用國字標準字體表" in Big5
jschardet.detect("\xa6\xb8\xb1\x60\xa5\xce\xb0\xea\xa6\x72\xbc\xd0\xb7\xc7\xa6\x72\xc5\xe9\xaa\xed")
// { encoding: "Big5", confidence: 0.99 }
### Browser
Copy and include [jschardet.min.js](https://github.com/aadsm/jschardet/tree/master/dist/jschardet.min.js) in your web page.
This library is also available in [cdnjs](https://cdnjs.com) at [https://cdnjs.cloudflare.com/ajax/libs/jschardet/1.4.1/jschardet.min.js](https://cdnjs.cloudflare.com/ajax/libs/jschardet/1.4.1/jschardet.min.js)
Options
-------
```javascript
// See all information related to the confidence levels of each encoding.
// This is useful to see why you're not getting the expected encoding.
jschardet.Constants._debug = true;
// Default minimum accepted confidence level is 0.20 but sometimes this is not
// enough, specially when dealing with files mostly with numbers.
// To change this to 0 to always get something or any other value that can
// work for you.
jschardet.Constants.MINIMUM_THRESHOLD = 0;
```
Supported Charsets
------------------
* Big5, GB2312/GB18030, EUC-TW, HZ-GB-2312, and ISO-2022-CN (Traditional and Simplified Chinese)
* EUC-JP, SHIFT_JIS, and ISO-2022-JP (Japanese)
* EUC-KR and ISO-2022-KR (Korean)
* KOI8-R, MacCyrillic, IBM855, IBM866, ISO-8859-5, and windows-1251 (Russian)
* ISO-8859-2 and windows-1250 (Hungarian)
* ISO-8859-5 and windows-1251 (Bulgarian)
* windows-1252
* ISO-8859-7 and windows-1253 (Greek)
* ISO-8859-8 and windows-1255 (Visual and Logical Hebrew)
* TIS-620 (Thai)
* UTF-32 BE, LE, 3412-ordered, or 2143-ordered (with a BOM)
* UTF-16 BE or LE (with a BOM)
* UTF-8 (with or without a BOM)
* ASCII
Technical Information
---------------------
I haven't been able to create tests to correctly detect:
* ISO-2022-CN
* windows-1250 in Hungarian
* windows-1251 in Bulgarian
* windows-1253 in Greek
* EUC-CN
Development
-----------
Use `npm run dist` to update the distribution files. They're available at https://github.com/aadsm/jschardet/tree/master/dist.
Authors
-------
Ported from python to JavaScript by António Afonso (https://github.com/aadsm/jschardet)
Transformed into an npm package by Markus Ast (https://github.com/brainafk)
This diff is collapsed.
This source diff could not be displayed because it is too large. You can view the blob instead.
module.exports = require('./src')
\ No newline at end of file
{
"_from": "jschardet",
"_id": "jschardet@1.6.0",
"_inBundle": false,
"_integrity": "sha1-x9GnHtz/KDnbL57DD8XV69PBpng=",
"_location": "/jschardet",
"_phantomChildren": {},
"_requested": {
"type": "tag",
"registry": true,
"raw": "jschardet",
"name": "jschardet",
"escapedName": "jschardet",
"rawSpec": "",
"saveSpec": null,
"fetchSpec": "latest"
},
"_requiredBy": [
"#USER",
"/"
],
"_resolved": "http://registry.npm.taobao.org/jschardet/download/jschardet-1.6.0.tgz",
"_shasum": "c7d1a71edcff2839db2f9ec30fc5d5ebd3c1a678",
"_spec": "jschardet",
"_where": "/Users/medicean/workspace/antSword",
"author": {
"name": "António Afonso"
},
"bugs": {
"url": "https://github.com/aadsm/jschardet/issues"
},
"bundleDependencies": false,
"dependencies": {},
"deprecated": false,
"description": "Character encoding auto-detection in JavaScript (port of python's chardet)",
"devDependencies": {
"browserify": "~12.0.1",
"google-closure-compiler": "20151015.0.0"
},
"directories": {
"lib": "./lib",
"test": "./test"
},
"engines": {
"node": ">=0.1.90"
},
"homepage": "https://github.com/aadsm/jschardet#readme",
"keywords": [
"encoding",
"charset"
],
"license": "LGPL-2.1+",
"main": "src/init",
"name": "jschardet",
"repository": {
"type": "git",
"url": "git+https://github.com/aadsm/jschardet.git"
},
"scripts": {
"dist": "npm run dist-dev && java -jar node_modules/google-closure-compiler/compiler.jar --warning_level QUIET --compilation_level SIMPLE_OPTIMIZATIONS --js dist/jschardet.js > dist/jschardet.min.js",
"dist-dev": "mkdir -p dist && browserify index.js -s jschardet --detect-globals false -o dist/jschardet.js"
},
"version": "1.6.0"
}
This diff is collapsed.
/*
* The Original Code is Mozilla Universal charset detector code.
*
* The Initial Developer of the Original Code is
* Netscape Communications Corporation.
* Portions created by the Initial Developer are Copyright (C) 2001
* the Initial Developer. All Rights Reserved.
*
* Contributor(s):
* António Afonso (antonio.afonso gmail.com) - port to JavaScript
* Mark Pilgrim - port to Python
* Shy Shalom - original C code
*
* This library is free software; you can redistribute it and/or
* modify it under the terms of the GNU Lesser General Public
* License as published by the Free Software Foundation; either
* version 2.1 of the License, or (at your option) any later version.
*
* This library is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public
* License along with this library; if not, write to the Free Software
* Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA
* 02110-1301 USA
*/
!function(jschardet) {
jschardet.Big5Prober = function() {
jschardet.MultiByteCharSetProber.apply(this);
var self = this;
function init() {
self._mCodingSM = new jschardet.CodingStateMachine(jschardet.Big5SMModel);
self._mDistributionAnalyzer = new jschardet.Big5DistributionAnalysis();
self.reset();
}
this.getCharsetName = function() {
return "Big5";
}
init();
}
jschardet.Big5Prober.prototype = new jschardet.MultiByteCharSetProber();
}(require('./init'));
This diff is collapsed.
/*
* The Original Code is Mozilla Universal charset detector code.
*
* The Initial Developer of the Original Code is
* Netscape Communications Corporation.
* Portions created by the Initial Developer are Copyright (C) 2001
* the Initial Developer. All Rights Reserved.
*
* Contributor(s):
* António Afonso (antonio.afonso gmail.com) - port to JavaScript
* Mark Pilgrim - port to Python
* Shy Shalom - original C code
*
* This library is free software; you can redistribute it and/or
* modify it under the terms of the GNU Lesser General Public
* License as published by the Free Software Foundation; either
* version 2.1 of the License, or (at your option) any later version.
*
* This library is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public
* License along with this library; if not, write to the Free Software
* Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA
* 02110-1301 USA
*/
!function(jschardet) {
jschardet.CharSetGroupProber = function() {
jschardet.CharSetProber.apply(this);
var self = this;
function init() {
self._mActiveNum = 0;
self._mProbers = [];
self._mBestGuessProber = null;
}
this.reset = function() {
jschardet.CharSetGroupProber.prototype.reset.apply(this);
this._mActiveNum = 0;
for( var i = 0, prober; prober = this._mProbers[i]; i++ ) {
if( prober ) {
prober.reset();
prober.active = true;
this._mActiveNum++;
}
}
this._mBestGuessProber = null;
}
this.getCharsetName = function() {
if( !this._mBestGuessProber ) {
this.getConfidence();
if( !this._mBestGuessProber ) return null;
}
return this._mBestGuessProber.getCharsetName();
}
this.feed = function(aBuf) {
for( var i = 0, prober; prober = this._mProbers[i]; i++ ) {
if( !prober || !prober.active ) continue;
var st = prober.feed(aBuf);
if( !st ) continue;
if( st == jschardet.Constants.foundIt ) {
this._mBestGuessProber = prober;
return this.getState();
} else if( st == jschardet.Constants.notMe ) {
prober.active = false;
this._mActiveNum--;
if( this._mActiveNum <= 0 ) {
this._mState = jschardet.Constants.notMe;
return this.getState();
}
}
}
return this.getState();
}
this.getConfidence = function() {
var st = this.getState();
if( st == jschardet.Constants.foundIt ) {
return 0.99;
} else if( st == jschardet.Constants.notMe ) {
return 0.01;
}
var bestConf = 0.0;
this._mBestGuessProber = null;
for( var i = 0, prober; prober = this._mProbers[i]; i++ ) {
if( !prober ) continue;
if( !prober.active ) {
if( jschardet.Constants._debug ) {
jschardet.log(prober.getCharsetName() + " not active\n");
}
continue;
}
var cf = prober.getConfidence();
if( jschardet.Constants._debug ) {
jschardet.log(prober.getCharsetName() + " confidence = " + cf + "\n");
}
if( bestConf < cf ) {
bestConf = cf;
this._mBestGuessProber = prober;
}
}
if( !this._mBestGuessProber ) return 0.0;
return bestConf;
}
init();
}
jschardet.CharSetGroupProber.prototype = new jschardet.CharSetProber();
}(require('./init'));
/*
* The Original Code is Mozilla Universal charset detector code.
*
* The Initial Developer of the Original Code is
* Netscape Communications Corporation.
* Portions created by the Initial Developer are Copyright (C) 2001
* the Initial Developer. All Rights Reserved.
*
* Contributor(s):
* António Afonso (antonio.afonso gmail.com) - port to JavaScript
* Mark Pilgrim - port to Python
* Shy Shalom - original C code
*
* This library is free software; you can redistribute it and/or
* modify it under the terms of the GNU Lesser General Public
* License as published by the Free Software Foundation; either
* version 2.1 of the License, or (at your option) any later version.
*
* This library is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public
* License along with this library; if not, write to the Free Software
* Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA
* 02110-1301 USA
*/
!function(jschardet) {
jschardet.CharSetProber = function() {
this.reset = function() {
this._mState = jschardet.Constants.detecting;
}
this.getCharsetName = function() {
return null;
}
this.feed = function(aBuf) {
}
this.getState = function() {
return this._mState;
}
this.getConfidence = function() {
return 0.0;
}
this.filterHighBitOnly = function(aBuf) {
aBuf = aBuf.replace(/[\x00-\x7F]+/g, " ");
return aBuf;
}
this.filterWithoutEnglishLetters = function(aBuf) {
aBuf = aBuf.replace(/[A-Za-z]+/g, " ");
return aBuf;
}
// Input: aBuf is a string containing all different types of characters
// Output: a string that contains all alphabetic letters, high-byte characters, and word immediately preceding `>`, but nothing else within `<>`
// Ex: input - '¡£º <div blah blah> abcdef</div> apples! * and oranges 9jd93jd>'
// output - '¡£º blah div apples and oranges jd jd '
this.filterWithEnglishLetters = function(aBuf) {
var result = '';
var inTag = false;
var prev = 0;
for (var curr = 0; curr < aBuf.length; curr++) {
var c = aBuf[curr];
if (c == '>') {
inTag = false;
} else if (c == '<') {
inTag = true;
}
var isAlpha = /[a-zA-Z]/.test(c);
var isASCII = /^[\x00-\x7F]*$/.test(c);
if (isASCII && !isAlpha) {
if (curr > prev && !inTag) {
result = result + aBuf.substring(prev, curr) + ' ';
}
prev = curr + 1;
}
}
if (!inTag) {
result = result + aBuf.substring(prev);
}
return result;
}
}
}(require('./init'));
/*
* The Original Code is Mozilla Universal charset detector code.
*
* The Initial Developer of the Original Code is
* Netscape Communications Corporation.
* Portions created by the Initial Developer are Copyright (C) 2001
* the Initial Developer. All Rights Reserved.
*
* Contributor(s):
* António Afonso (antonio.afonso gmail.com) - port to JavaScript
* Mark Pilgrim - port to Python
* Shy Shalom - original C code
*
* This library is free software; you can redistribute it and/or
* modify it under the terms of the GNU Lesser General Public
* License as published by the Free Software Foundation; either
* version 2.1 of the License, or (at your option) any later version.
*
* This library is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public
* License along with this library; if not, write to the Free Software
* Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA
* 02110-1301 USA
*/
!function(jschardet) {
jschardet.CodingStateMachine = function(sm) {
var self = this;
function init(sm) {
self._mModel = sm;
self._mCurrentBytePos = 0;
self._mCurrentCharLen = 0;
self.reset();
}
this.reset = function() {
this._mCurrentState = jschardet.Constants.start;
}
this.nextState = function(c) {
// for each byte we get its class
// if it is first byte, we also get byte length
var byteCls = this._mModel.classTable[c.charCodeAt(0)];
if( this._mCurrentState == jschardet.Constants.start ) {
this._mCurrentBytePos = 0;
this._mCurrentCharLen = this._mModel.charLenTable[byteCls];
}
// from byte's class and stateTable, we get its next state
this._mCurrentState = this._mModel.stateTable[this._mCurrentState * this._mModel.classFactor + byteCls];
this._mCurrentBytePos++;
return this._mCurrentState;
}
this.getCurrentCharLen = function() {
return this._mCurrentCharLen;
}
this.getCodingStateMachine = function() {
return this._mModel.name;
}
init(sm);
}
}(require('./init'));
/*
* The Original Code is Mozilla Universal charset detector code.
*
* The Initial Developer of the Original Code is
* Netscape Communications Corporation.
* Portions created by the Initial Developer are Copyright (C) 2001
* the Initial Developer. All Rights Reserved.
*
* Contributor(s):
* António Afonso (antonio.afonso gmail.com) - port to JavaScript
* Mark Pilgrim - port to Python
* Shy Shalom - original C code
*
* This library is free software; you can redistribute it and/or
* modify it under the terms of the GNU Lesser General Public
* License as published by the Free Software Foundation; either
* version 2.1 of the License, or (at your option) any later version.
*
* This library is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public
* License along with this library; if not, write to the Free Software
* Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA
* 02110-1301 USA
*/
!function(jschardet) {
jschardet.Constants = {
_debug : false,
detecting : 0,
foundIt : 1,
notMe : 2,
start : 0,
error : 1,
itsMe : 2,
MINIMUM_THRESHOLD : 0.20,
SHORTCUT_THRESHOLD : 0.95
};
}(require('./init'));
/*
* The Original Code is Mozilla Universal charset detector code.
*
* The Initial Developer of the Original Code is
* Netscape Communications Corporation.
* Portions created by the Initial Developer are Copyright (C) 2001
* the Initial Developer. All Rights Reserved.
*
* Contributor(s):
* António Afonso (antonio.afonso gmail.com) - port to JavaScript
* Mark Pilgrim - port to Python
* Shy Shalom - original C code
*
* This library is free software; you can redistribute it and/or
* modify it under the terms of the GNU Lesser General Public
* License as published by the Free Software Foundation; either
* version 2.1 of the License, or (at your option) any later version.
*
* This library is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public
* License along with this library; if not, write to the Free Software
* Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA
* 02110-1301 USA
*/
!function(jschardet) {
jschardet.EscCharSetProber = function() {
jschardet.CharSetProber.apply(this);
var self = this;
function init() {
self._mCodingSM = [
new jschardet.CodingStateMachine(jschardet.HZSMModel),
new jschardet.CodingStateMachine(jschardet.ISO2022CNSMModel),
new jschardet.CodingStateMachine(jschardet.ISO2022JPSMModel),
new jschardet.CodingStateMachine(jschardet.ISO2022KRSMModel)
];
self.reset();
}
this.reset = function() {
jschardet.EscCharSetProber.prototype.reset.apply(this);
for( var i = 0, codingSM; codingSM = this._mCodingSM[i]; i++ ) {
if( !codingSM ) continue;
codingSM.active = true;
codingSM.reset();
}
this._mActiveSM = self._mCodingSM.length;
this._mDetectedCharset = null;
}
this.getCharsetName = function() {
return this._mDetectedCharset;
}
this.getConfidence = function() {
if( this._mDetectedCharset ) {
return 0.99;
} else {
return 0.00;
}
}
this.feed = function(aBuf) {
for( var i = 0, c; i < aBuf.length; i++ ) {
c = aBuf[i];
for( var j = 0, codingSM; codingSM = this._mCodingSM[j]; j++ ) {
if( !codingSM || !codingSM.active ) continue;
var codingState = codingSM.nextState(c);
if( codingState == jschardet.Constants.error ) {
codingSM.active = false;
this._mActiveSM--;
if( this._mActiveSM <= 0 ) {
this._mState = jschardet.Constants.notMe;
return this.getState();
}
} else if( codingState == jschardet.Constants.itsMe ) {
this._mState = jschardet.Constants.foundIt;
this._mDetectedCharset = codingSM.getCodingStateMachine();
return this.getState();
}
}
}
return this.getState();
}
init();
}
jschardet.EscCharSetProber.prototype = new jschardet.CharSetProber();
}(require('./init'));
This diff is collapsed.
/*
* The Original Code is Mozilla Universal charset detector code.
*
* The Initial Developer of the Original Code is
* Netscape Communications Corporation.
* Portions created by the Initial Developer are Copyright (C) 2001
* the Initial Developer. All Rights Reserved.
*
* Contributor(s):
* António Afonso (antonio.afonso gmail.com) - port to JavaScript
* Mark Pilgrim - port to Python
* Shy Shalom - original C code
*
* This library is free software; you can redistribute it and/or
* modify it under the terms of the GNU Lesser General Public
* License as published by the Free Software Foundation; either
* version 2.1 of the License, or (at your option) any later version.
*
* This library is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public
* License along with this library; if not, write to the Free Software
* Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA
* 02110-1301 USA
*/
!function(jschardet) {
jschardet.EUCJPProber = function() {
jschardet.MultiByteCharSetProber.apply(this);
var self = this;
function init() {
self._mCodingSM = new jschardet.CodingStateMachine(jschardet.EUCJPSMModel);
self._mDistributionAnalyzer = new jschardet.EUCJPDistributionAnalysis();
self._mContextAnalyzer = new jschardet.EUCJPContextAnalysis();
self.reset();
}
this.reset = function() {
jschardet.EUCJPProber.prototype.reset.apply(this);
this._mContextAnalyzer.reset();
}
this.getCharsetName = function() {
return "EUC-JP";
}
this.feed = function(aBuf) {
var aLen = aBuf.length;
for( var i = 0; i < aLen; i++ ) {
var codingState = this._mCodingSM.nextState(aBuf[i]);
if( codingState == jschardet.Constants.error ) {
if( jschardet.Constants._debug ) {
jschardet.log(this.getCharsetName() + " prober hit error at byte " + i + "\n");
}
this._mState = jschardet.Constants.notMe;
break;
} else if( codingState == jschardet.Constants.itsMe ) {
this._mState = jschardet.Constants.foundIt;
break;
} else if( codingState == jschardet.Constants.start ) {
var charLen = this._mCodingSM.getCurrentCharLen();
if( i == 0 ) {
this._mLastChar[1] = aBuf[0];
this._mContextAnalyzer.feed(this._mLastChar, charLen);
this._mDistributionAnalyzer.feed(this._mLastChar, charLen);
} else {
this._mContextAnalyzer.feed(aBuf.slice(i-1,i+1), charLen);
this._mDistributionAnalyzer.feed(aBuf.slice(i-1,i+1), charLen);
}
}
}
this._mLastChar[0] = aBuf[aLen - 1];
if( this.getState() == jschardet.Constants.detecting ) {
if( this._mContextAnalyzer.gotEnoughData() &&
this.getConfidence() > jschardet.Constants.SHORTCUT_THRESHOLD ) {
this._mState = jschardet.Constants.foundIt;
}
}
return this.getState();
}
this.getConfidence = function() {
var contxtCf = this._mContextAnalyzer.getConfidence();
var distribCf = this._mDistributionAnalyzer.getConfidence();
return Math.max(contxtCf, distribCf);
}
init();
}
jschardet.EUCJPProber.prototype = new jschardet.MultiByteCharSetProber();
}(require('./init'));
This diff is collapsed.
/*
* The Original Code is Mozilla Universal charset detector code.
*
* The Initial Developer of the Original Code is
* Netscape Communications Corporation.
* Portions created by the Initial Developer are Copyright (C) 2001
* the Initial Developer. All Rights Reserved.
*
* Contributor(s):
* António Afonso (antonio.afonso gmail.com) - port to JavaScript
* Mark Pilgrim - port to Python
* Shy Shalom - original C code
*
* This library is free software; you can redistribute it and/or
* modify it under the terms of the GNU Lesser General Public
* License as published by the Free Software Foundation; either
* version 2.1 of the License, or (at your option) any later version.
*
* This library is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public
* License along with this library; if not, write to the Free Software
* Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA
* 02110-1301 USA
*/
!function(jschardet) {
jschardet.EUCKRProber = function() {
jschardet.MultiByteCharSetProber.apply(this);
var self = this;
function init() {
self._mCodingSM = new jschardet.CodingStateMachine(jschardet.EUCKRSMModel);
self._mDistributionAnalyzer = new jschardet.EUCKRDistributionAnalysis();
self.reset();
}
this.getCharsetName = function() {
return "EUC-KR";
}
init();
}
jschardet.EUCKRProber.prototype = new jschardet.MultiByteCharSetProber();
}(require('./init'));
This diff is collapsed.
/*
* The Original Code is Mozilla Universal charset detector code.
*
* The Initial Developer of the Original Code is
* Netscape Communications Corporation.
* Portions created by the Initial Developer are Copyright (C) 2001
* the Initial Developer. All Rights Reserved.
*
* Contributor(s):
* António Afonso (antonio.afonso gmail.com) - port to JavaScript
* Mark Pilgrim - port to Python
* Shy Shalom - original C code
*
* This library is free software; you can redistribute it and/or
* modify it under the terms of the GNU Lesser General Public
* License as published by the Free Software Foundation; either
* version 2.1 of the License, or (at your option) any later version.
*
* This library is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public
* License along with this library; if not, write to the Free Software
* Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA
* 02110-1301 USA
*/
!function(jschardet) {
jschardet.EUCTWProber = function() {
jschardet.MultiByteCharSetProber.apply(this);
var self = this;
function init() {
self._mCodingSM = new jschardet.CodingStateMachine(jschardet.EUCTWSMModel);
self._mDistributionAnalyzer = new jschardet.EUCTWDistributionAnalysis();
self.reset();
}
this.getCharsetName = function() {
return "EUC-TW";
}
init();
}
jschardet.EUCTWProber.prototype = new jschardet.MultiByteCharSetProber();
}(require('./init'));
This diff is collapsed.
/*
* The Original Code is Mozilla Universal charset detector code.
*
* The Initial Developer of the Original Code is
* Netscape Communications Corporation.
* Portions created by the Initial Developer are Copyright (C) 2001
* the Initial Developer. All Rights Reserved.
*
* Contributor(s):
* António Afonso (antonio.afonso gmail.com) - port to JavaScript
* Mark Pilgrim - port to Python
* Shy Shalom - original C code
*
* This library is free software; you can redistribute it and/or
* modify it under the terms of the GNU Lesser General Public
* License as published by the Free Software Foundation; either
* version 2.1 of the License, or (at your option) any later version.
*
* This library is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public
* License along with this library; if not, write to the Free Software
* Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA
* 02110-1301 USA
*/
!function(jschardet) {
jschardet.GB2312Prober = function() {
jschardet.MultiByteCharSetProber.apply(this);
var self = this;
function init() {
self._mCodingSM = new jschardet.CodingStateMachine(jschardet.GB2312SMModel);
self._mDistributionAnalyzer = new jschardet.GB2312DistributionAnalysis();
self.reset();
}
this.getCharsetName = function() {
return "GB2312";
}
init();
}
jschardet.GB2312Prober.prototype = new jschardet.MultiByteCharSetProber();
}(require('./init'));
This diff is collapsed.
module.exports = require('./init')
\ No newline at end of file
/*
* The Original Code is Mozilla Universal charset detector code.
*
* The Initial Developer of the Original Code is
* Netscape Communications Corporation.
* Portions created by the Initial Developer are Copyright (C) 2001
* the Initial Developer. All Rights Reserved.
*
* Contributor(s):
* António Afonso (antonio.afonso gmail.com) - port to JavaScript
* Mark Pilgrim - port to Python
* Shy Shalom - original C code
*
* This library is free software; you can redistribute it and/or
* modify it under the terms of the GNU Lesser General Public
* License as published by the Free Software Foundation; either
* version 2.1 of the License, or (at your option) any later version.
*
* This library is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public
* License along with this library; if not, write to the Free Software
* Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA
* 02110-1301 USA
*/
var jschardet = exports;
require('./constants');
require('./codingstatemachine');
require('./escsm');
require('./mbcssm');
require('./charsetprober');
require('./mbcharsetprober');
require('./jisfreq');
require('./gb2312freq');
require('./euckrfreq');
require('./big5freq');
require('./euctwfreq');
require('./chardistribution');
require('./jpcntx');
require('./sjisprober');
require('./utf8prober');
require('./charsetgroupprober');
require('./eucjpprober');
require('./gb2312prober');
require('./euckrprober');
require('./big5prober');
require('./euctwprober');
require('./mbcsgroupprober');
require('./sbcharsetprober');
require('./langgreekmodel');
require('./langthaimodel');
require('./langbulgarianmodel');
require('./langcyrillicmodel');
require('./hebrewprober');
require('./langhebrewmodel');
require('./langhungarianmodel');
require('./sbcsgroupprober');
require('./latin1prober');
require('./escprober');
require('./universaldetector');
jschardet.VERSION = "1.4.1";
jschardet.detect = function(buffer) {
var u = new jschardet.UniversalDetector();
u.reset();
if( typeof Buffer == 'function' && buffer instanceof Buffer ) {
u.feed(buffer.toString('binary'));
} else {
u.feed(buffer);
}
u.close();
return u.result;
}
jschardet.log = function() {
console.log.apply(console, arguments);
}
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment