[Pkg-javascript-commits] [node-text-encoding] 01/09: Imported Upstream version 0.6.0
Julien Puydt
julien.puydt at laposte.net
Sun May 15 13:45:41 UTC 2016
This is an automated email from the git hooks/post-receive script.
jpuydt-guest pushed a commit to branch master
in repository node-text-encoding.
commit 7eae61c644d78c23fa2cdbb54aae5eae22d628af
Author: Julien Puydt <julien.puydt at laposte.net>
Date: Sun May 15 08:08:35 2016 +0200
Imported Upstream version 0.6.0
---
README.md | 67 +--
bower.json | 12 +-
lib/encoding-indexes.js | 9 +-
lib/encoding.js | 1132 ++++++++++++++++++++++---------------------
package.json | 12 +-
test/test-misc.js | 129 +++--
test/test-utf.js | 23 +-
test/test-x-user-defined.js | 2 +-
8 files changed, 745 insertions(+), 641 deletions(-)
diff --git a/README.md b/README.md
index 9e5acf2..c0f0c18 100644
--- a/README.md
+++ b/README.md
@@ -1,14 +1,16 @@
text-encoding
==============
-This is a polyfill for the [Encoding Living Standard](https://encoding.spec.whatwg.org/)
-API for the Web, allowing encoding and decoding of textual data to and from Typed Array
-buffers for binary data in JavaScript.
+This is a polyfill for the [Encoding Living
+Standard](https://encoding.spec.whatwg.org/) API for the Web, allowing
+encoding and decoding of textual data to and from Typed Array buffers
+for binary data in JavaScript.
-By default it adheres to the spec and does not support *encoding* to non-UTF encodings,
-only *decoding*. It is also implemented to match the specification's algorithms, rather
-than for performance. The intended use is within Web pages, so it has no dependency
-on server frameworks or particular module schemes.
+By default it adheres to the spec and does not support *encoding* to
+legacy encodings, only *decoding*. It is also implemented to match the
+specification's algorithms, rather than for performance. The intended
+use is within Web pages, so it has no dependency on server frameworks
+or particular module schemes.
Basic examples and tests are included.
@@ -49,14 +51,14 @@ Or add it to your `bower.json` dependencies.
Basic Usage
```js
- var uint8array = TextEncoder(encoding).encode(string);
- var string = TextDecoder(encoding).decode(uint8array);
+ var uint8array = new TextEncoder().encode(string);
+ var string = new TextDecoder(encoding).decode(uint8array);
```
Streaming Decode
```js
- var string = "", decoder = TextDecoder(encoding), buffer;
+ var string = "", decoder = new TextDecoder(encoding), buffer;
while (buffer = next_chunk()) {
string += decoder.decode(buffer, {stream:true});
}
@@ -67,30 +69,38 @@ Streaming Decode
All encodings from the Encoding specification are supported:
-utf-8 ibm866 iso-8859-2 iso-8859-3 iso-8859-4 iso-8859-5 iso-8859-6 iso-8859-7 iso-8859-8 iso-8859-8-i iso-8859-10 iso-8859-13 iso-8859-14 iso-8859-15 iso-8859-16 koi8-r koi8-u macintosh windows-874 windows-1250 windows-1251 windows-1252 windows-1253 windows-1254 windows-1255 windows-1256 windows-1257 windows-1258 x-mac-cyrillic gb18030 hz-gb-2312 big5 euc-jp iso-2022-jp shift_jis euc-kr replacement utf-16be utf-16le x-user-defined
-
-(Some encodings may be supported under other names, e.g. ascii, iso-8859-1, etc.
-See [Encoding](https://encoding.spec.whatwg.org/) for additional labels for each encoding.)
-
-Encodings other than **utf-8**, **utf-16le** and **utf-16be** require an additional
-`encoding-indexes.js` file to be included. It is rather large
-(596kB uncompressed, 188kB gzipped); portions may be deleted if
+utf-8 ibm866 iso-8859-2 iso-8859-3 iso-8859-4 iso-8859-5 iso-8859-6
+iso-8859-7 iso-8859-8 iso-8859-8-i iso-8859-10 iso-8859-13 iso-8859-14
+iso-8859-15 iso-8859-16 koi8-r koi8-u macintosh windows-874
+windows-1250 windows-1251 windows-1252 windows-1253 windows-1254
+windows-1255 windows-1256 windows-1257 windows-1258 x-mac-cyrillic
+gb18030 hz-gb-2312 big5 euc-jp iso-2022-jp shift_jis euc-kr
+replacement utf-16be utf-16le x-user-defined
+
+(Some encodings may be supported under other names, e.g. ascii,
+iso-8859-1, etc. See [Encoding](https://encoding.spec.whatwg.org/) for
+additional labels for each encoding.)
+
+Encodings other than **utf-8**, **utf-16le** and **utf-16be** require
+an additional `encoding-indexes.js` file to be included. It is rather
+large (596kB uncompressed, 188kB gzipped); portions may be deleted if
support for some encodings is not required.
### Non-Standard Behavior ###
-As required by the specification, only encoding to **utf-8**,
-**utf-16le** and **utf-16be** is supported. If you want to try it out, you can
-force a non-standard behavior by passing the `NONSTANDARD_allowLegacyEncoding`
-option to TextEncoder. For example:
+As required by the specification, only encoding to **utf-8**, is
+supported. If you want to try it out, you can force a non-standard
+behavior by passing the `NONSTANDARD_allowLegacyEncoding` option to
+TextEncoder and a label. For example:
```js
var uint8array = new TextEncoder(
'windows-1252', { NONSTANDARD_allowLegacyEncoding: true }).encode(text);
```
-But note that the above won't work if you're using the polyfill in a browser that
-natively supports the TextEncoder API natively, since the polyfill won't be used!
+But note that the above won't work if you're using the polyfill in a
+browser that natively supports the TextEncoder API natively, since the
+polyfill won't be used!
You can force the polyfill to be used by using this before the polyfill:
@@ -100,7 +110,8 @@ window.TextEncoder = window.TextDecoder = null;
</script>
```
-To support the legacy encodings (which may be stateful), the TextEncoder `encode()`
-method accepts an optional dictionary and `stream` option,
-e.g. `encoder.encode(string, {stream: true});` This is not needed for the
-stateless UTF encodings since the input is always in complete code points.
+To support the legacy encodings (which may be stateful), the
+TextEncoder `encode()` method accepts an optional dictionary and
+`stream` option, e.g. `encoder.encode(string, {stream: true});` This
+is not needed for standard encoding since the input is always in
+complete code points.
diff --git a/bower.json b/bower.json
index f595b2a..9c83327 100644
--- a/bower.json
+++ b/bower.json
@@ -1,9 +1,17 @@
{
"name": "text-encoding",
- "version": "0.5.4",
+ "version": "0.6.0",
"homepage": "https://github.com/inexorabletash/text-encoding",
"authors": [
- "Joshua Bell <inexorabletash at gmail.com>"
+ "Joshua Bell <inexorabletash at gmail.com>",
+ "Rick Eyre <rick.eyre at outlook.com>",
+ "Eugen Podaru <eugen.podaru at live.com>",
+ "Filip Dupanović <filip.dupanovic at gmail.com>",
+ "Anne van Kesteren <annevk at annevk.nl>",
+ "Author: Francis Avila <francisga at gmail.com>",
+ "Michael J. Ryan <tracker1 at gmail.com>",
+ "Pierre Queinnec <pierre at queinnec.org>",
+ "Zack Weinberg <zackw at panix.com>"
],
"description": "Polyfill for the Encoding Living Standard's API",
"main": [ "lib/encoding.js", "lib/encoding-indexes.js" ],
diff --git a/lib/encoding-indexes.js b/lib/encoding-indexes.js
index ac4fb8e..4091600 100644
--- a/lib/encoding-indexes.js
+++ b/lib/encoding-indexes.js
@@ -1,9 +1,10 @@
(function(global) {
'use strict';
- global["encoding-indexes"] = {
+ global["encoding-indexes"] =
+{
"big5":[null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,nu [...]
"euc-kr":[44034,44035,44037,44038,44043,44044,44045,44046,44047,44056,44062,44063,44065,44066,44067,44069,44070,44071,44072,44073,44074,44075,44078,44082,44083,44084,null,null,null,null,null,null,44085,44086,44087,44090,44091,44093,44094,44095,44097,44098,44099,44100,44101,44102,44103,44104,44105,44106,44108,44110,44111,44112,44113,44114,44115,44117,null,null,null,null,null,null,44118,44119,44121,44122,44123,44125,44126,44127,44128,44129,44130,44131,44132,44133,44134,44135,44136,44137, [...]
- "gb18030":[19970,19972,19973,19974,19983,19986,19991,19999,20000,20001,20003,20006,20009,20014,20015,20017,20019,20021,20023,20028,20032,20033,20034,20036,20038,20042,20049,20053,20055,20058,20059,20066,20067,20068,20069,20071,20072,20074,20075,20076,20077,20078,20079,20082,20084,20085,20086,20087,20088,20089,20090,20091,20092,20093,20095,20096,20097,20098,20099,20100,20101,20103,20106,20112,20118,20119,20121,20124,20125,20126,20131,20138,20143,20144,20145,20148,20150,20151,20152,20153 [...]
+ "gb18030":[19970,19972,19973,19974,19983,19986,19991,19999,20000,20001,20003,20006,20009,20014,20015,20017,20019,20021,20023,20028,20032,20033,20034,20036,20038,20042,20049,20053,20055,20058,20059,20066,20067,20068,20069,20071,20072,20074,20075,20076,20077,20078,20079,20082,20084,20085,20086,20087,20088,20089,20090,20091,20092,20093,20095,20096,20097,20098,20099,20100,20101,20103,20106,20112,20118,20119,20121,20124,20125,20126,20131,20138,20143,20144,20145,20148,20150,20151,20152,20153 [...]
"gb18030-ranges":[[0,128],[36,165],[38,169],[45,178],[50,184],[81,216],[89,226],[95,235],[96,238],[100,244],[103,248],[104,251],[105,253],[109,258],[126,276],[133,284],[148,300],[172,325],[175,329],[179,334],[208,364],[306,463],[307,465],[308,467],[309,469],[310,471],[311,473],[312,475],[313,477],[341,506],[428,594],[443,610],[544,712],[545,716],[558,730],[741,930],[742,938],[749,962],[750,970],[805,1026],[819,1104],[820,1106],[7922,8209],[7924,8215],[7925,8218],[7927,8222],[7934,8231] [...]
"jis0208":[12288,12289,12290,65292,65294,12539,65306,65307,65311,65281,12443,12444,180,65344,168,65342,65507,65343,12541,12542,12445,12446,12291,20189,12293,12294,12295,12540,8213,8208,65295,65340,65374,8741,65372,8230,8229,8216,8217,8220,8221,65288,65289,12308,12309,65339,65341,65371,65373,12296,12297,12298,12299,12300,12301,12302,12303,12304,12305,65291,65293,177,215,247,65309,8800,65308,65310,8806,8807,8734,8756,9794,9792,176,8242,8243,8451,65509,65284,65504,65505,65285,65283,65286, [...]
"jis0212":[null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null [...]
@@ -34,5 +35,5 @@
"windows-1257":[8364,129,8218,131,8222,8230,8224,8225,136,8240,138,8249,140,168,711,184,144,8216,8217,8220,8221,8226,8211,8212,152,8482,154,8250,156,175,731,159,160,null,162,163,164,null,166,167,216,169,342,171,172,173,174,198,176,177,178,179,180,181,182,183,248,185,343,187,188,189,190,230,260,302,256,262,196,197,280,274,268,201,377,278,290,310,298,315,352,323,325,211,332,213,214,215,370,321,346,362,220,379,381,223,261,303,257,263,228,229,281,275,269,233,378,279,291,311,299,316,353,324 [...]
"windows-1258":[8364,129,8218,402,8222,8230,8224,8225,710,8240,138,8249,338,141,142,143,144,8216,8217,8220,8221,8226,8211,8212,732,8482,154,8250,339,157,158,376,160,161,162,163,164,165,166,167,168,169,170,171,172,173,174,175,176,177,178,179,180,181,182,183,184,185,186,187,188,189,190,191,192,193,194,258,196,197,198,199,200,201,202,203,768,205,206,207,272,209,777,211,212,416,214,215,216,217,218,219,220,431,771,223,224,225,226,259,228,229,230,231,232,233,234,235,769,237,238,239,273,241,8 [...]
"x-mac-cyrillic":[1040,1041,1042,1043,1044,1045,1046,1047,1048,1049,1050,1051,1052,1053,1054,1055,1056,1057,1058,1059,1060,1061,1062,1063,1064,1065,1066,1067,1068,1069,1070,1071,8224,176,1168,163,167,8226,182,1030,174,169,8482,1026,1106,8800,1027,1107,8734,177,8804,8805,1110,181,1169,1032,1028,1108,1031,1111,1033,1113,1034,1114,1112,1029,172,8730,402,8776,8710,171,187,8230,160,1035,1115,1036,1116,1109,8211,8212,8220,8221,8216,8217,247,8222,1038,1118,1039,1119,8470,1025,1105,1103,1072,1 [...]
-};
-}(this));
+}
+;}(this));
diff --git a/lib/encoding.js b/lib/encoding.js
index 95e7404..1f021d6 100644
--- a/lib/encoding.js
+++ b/lib/encoding.js
@@ -29,21 +29,12 @@ if (typeof module !== "undefined" && module.exports) {
}
/**
- * @param {*} item The item to look for in the list.
- * @param {!Array.<*>} list The list to check.
- * @return {boolean} True if the item appears in the list.
+ * @param {!Array.<*>} array The array to check.
+ * @param {*} item The item to look for in the array.
+ * @return {boolean} True if the item appears in the array.
*/
- function isOneOf(item, list) {
- return list.indexOf(item) !== -1;
- }
-
- /**
- * @param {*} item The item to look for in the list.
- * @param {!Array.<*>} list The list to check.
- * @return {boolean} True if the item does not appear in the list.
- */
- function isNoneOf(item, list) {
- return list.indexOf(item) === -1;
+ function includes(array, item) {
+ return array.indexOf(item) !== -1;
}
var floor = Math.floor;
@@ -274,12 +265,6 @@ if (typeof module !== "undefined" && module.exports) {
/** @const */
var finished = -1;
- /** @const */
- var error_mode_replacement = false;
-
- /** @const */
- var error_mode_fatal = true;
-
/**
* @param {boolean} fatal If true, decoding errors raise an exception.
* @param {number=} opt_code_point Override the standard fallback code point.
@@ -354,459 +339,459 @@ if (typeof module !== "undefined" && module.exports) {
* }>}
*/
var encodings = [
- {
- "encodings": [
- {
- "labels": [
- "unicode-1-1-utf-8",
- "utf-8",
- "utf8"
- ],
- "name": "utf-8"
- }
- ],
- "heading": "The Encoding"
- },
- {
- "encodings": [
- {
- "labels": [
- "866",
- "cp866",
- "csibm866",
- "ibm866"
- ],
- "name": "ibm866"
- },
- {
- "labels": [
- "csisolatin2",
- "iso-8859-2",
- "iso-ir-101",
- "iso8859-2",
- "iso88592",
- "iso_8859-2",
- "iso_8859-2:1987",
- "l2",
- "latin2"
- ],
- "name": "iso-8859-2"
- },
- {
- "labels": [
- "csisolatin3",
- "iso-8859-3",
- "iso-ir-109",
- "iso8859-3",
- "iso88593",
- "iso_8859-3",
- "iso_8859-3:1988",
- "l3",
- "latin3"
- ],
- "name": "iso-8859-3"
- },
- {
- "labels": [
- "csisolatin4",
- "iso-8859-4",
- "iso-ir-110",
- "iso8859-4",
- "iso88594",
- "iso_8859-4",
- "iso_8859-4:1988",
- "l4",
- "latin4"
- ],
- "name": "iso-8859-4"
- },
- {
- "labels": [
- "csisolatincyrillic",
- "cyrillic",
- "iso-8859-5",
- "iso-ir-144",
- "iso8859-5",
- "iso88595",
- "iso_8859-5",
- "iso_8859-5:1988"
- ],
- "name": "iso-8859-5"
- },
- {
- "labels": [
- "arabic",
- "asmo-708",
- "csiso88596e",
- "csiso88596i",
- "csisolatinarabic",
- "ecma-114",
- "iso-8859-6",
- "iso-8859-6-e",
- "iso-8859-6-i",
- "iso-ir-127",
- "iso8859-6",
- "iso88596",
- "iso_8859-6",
- "iso_8859-6:1987"
- ],
- "name": "iso-8859-6"
- },
- {
- "labels": [
- "csisolatingreek",
- "ecma-118",
- "elot_928",
- "greek",
- "greek8",
- "iso-8859-7",
- "iso-ir-126",
- "iso8859-7",
- "iso88597",
- "iso_8859-7",
- "iso_8859-7:1987",
- "sun_eu_greek"
- ],
- "name": "iso-8859-7"
- },
- {
- "labels": [
- "csiso88598e",
- "csisolatinhebrew",
- "hebrew",
- "iso-8859-8",
- "iso-8859-8-e",
- "iso-ir-138",
- "iso8859-8",
- "iso88598",
- "iso_8859-8",
- "iso_8859-8:1988",
- "visual"
- ],
- "name": "iso-8859-8"
- },
- {
- "labels": [
- "csiso88598i",
- "iso-8859-8-i",
- "logical"
- ],
- "name": "iso-8859-8-i"
- },
- {
- "labels": [
- "csisolatin6",
- "iso-8859-10",
- "iso-ir-157",
- "iso8859-10",
- "iso885910",
- "l6",
- "latin6"
- ],
- "name": "iso-8859-10"
- },
- {
- "labels": [
- "iso-8859-13",
- "iso8859-13",
- "iso885913"
- ],
- "name": "iso-8859-13"
- },
- {
- "labels": [
- "iso-8859-14",
- "iso8859-14",
- "iso885914"
- ],
- "name": "iso-8859-14"
- },
- {
- "labels": [
- "csisolatin9",
- "iso-8859-15",
- "iso8859-15",
- "iso885915",
- "iso_8859-15",
- "l9"
- ],
- "name": "iso-8859-15"
- },
- {
- "labels": [
- "iso-8859-16"
- ],
- "name": "iso-8859-16"
- },
- {
- "labels": [
- "cskoi8r",
- "koi",
- "koi8",
- "koi8-r",
- "koi8_r"
- ],
- "name": "koi8-r"
- },
- {
- "labels": [
- "koi8-ru",
- "koi8-u"
- ],
- "name": "koi8-u"
- },
- {
- "labels": [
- "csmacintosh",
- "mac",
- "macintosh",
- "x-mac-roman"
- ],
- "name": "macintosh"
- },
- {
- "labels": [
- "dos-874",
- "iso-8859-11",
- "iso8859-11",
- "iso885911",
- "tis-620",
- "windows-874"
- ],
- "name": "windows-874"
- },
- {
- "labels": [
- "cp1250",
- "windows-1250",
- "x-cp1250"
- ],
- "name": "windows-1250"
- },
- {
- "labels": [
- "cp1251",
- "windows-1251",
- "x-cp1251"
- ],
- "name": "windows-1251"
- },
- {
- "labels": [
- "ansi_x3.4-1968",
- "ascii",
- "cp1252",
- "cp819",
- "csisolatin1",
- "ibm819",
- "iso-8859-1",
- "iso-ir-100",
- "iso8859-1",
- "iso88591",
- "iso_8859-1",
- "iso_8859-1:1987",
- "l1",
- "latin1",
- "us-ascii",
- "windows-1252",
- "x-cp1252"
- ],
- "name": "windows-1252"
- },
- {
- "labels": [
- "cp1253",
- "windows-1253",
- "x-cp1253"
- ],
- "name": "windows-1253"
- },
- {
- "labels": [
- "cp1254",
- "csisolatin5",
- "iso-8859-9",
- "iso-ir-148",
- "iso8859-9",
- "iso88599",
- "iso_8859-9",
- "iso_8859-9:1989",
- "l5",
- "latin5",
- "windows-1254",
- "x-cp1254"
- ],
- "name": "windows-1254"
- },
- {
- "labels": [
- "cp1255",
- "windows-1255",
- "x-cp1255"
- ],
- "name": "windows-1255"
- },
- {
- "labels": [
- "cp1256",
- "windows-1256",
- "x-cp1256"
- ],
- "name": "windows-1256"
- },
- {
- "labels": [
- "cp1257",
- "windows-1257",
- "x-cp1257"
- ],
- "name": "windows-1257"
- },
- {
- "labels": [
- "cp1258",
- "windows-1258",
- "x-cp1258"
- ],
- "name": "windows-1258"
- },
- {
- "labels": [
- "x-mac-cyrillic",
- "x-mac-ukrainian"
- ],
- "name": "x-mac-cyrillic"
- }
- ],
- "heading": "Legacy single-byte encodings"
- },
- {
- "encodings": [
- {
- "labels": [
- "chinese",
- "csgb2312",
- "csiso58gb231280",
- "gb2312",
- "gb_2312",
- "gb_2312-80",
- "gbk",
- "iso-ir-58",
- "x-gbk"
- ],
- "name": "gbk"
- },
- {
- "labels": [
- "gb18030"
- ],
- "name": "gb18030"
- }
- ],
- "heading": "Legacy multi-byte Chinese (simplified) encodings"
- },
- {
- "encodings": [
- {
- "labels": [
- "big5",
- "big5-hkscs",
- "cn-big5",
- "csbig5",
- "x-x-big5"
- ],
- "name": "big5"
- }
- ],
- "heading": "Legacy multi-byte Chinese (traditional) encodings"
- },
- {
- "encodings": [
- {
- "labels": [
- "cseucpkdfmtjapanese",
- "euc-jp",
- "x-euc-jp"
- ],
- "name": "euc-jp"
- },
- {
- "labels": [
- "csiso2022jp",
- "iso-2022-jp"
- ],
- "name": "iso-2022-jp"
- },
- {
- "labels": [
- "csshiftjis",
- "ms932",
- "ms_kanji",
- "shift-jis",
- "shift_jis",
- "sjis",
- "windows-31j",
- "x-sjis"
- ],
- "name": "shift_jis"
- }
- ],
- "heading": "Legacy multi-byte Japanese encodings"
- },
- {
- "encodings": [
- {
- "labels": [
- "cseuckr",
- "csksc56011987",
- "euc-kr",
- "iso-ir-149",
- "korean",
- "ks_c_5601-1987",
- "ks_c_5601-1989",
- "ksc5601",
- "ksc_5601",
- "windows-949"
- ],
- "name": "euc-kr"
- }
- ],
- "heading": "Legacy multi-byte Korean encodings"
- },
- {
- "encodings": [
- {
- "labels": [
- "csiso2022kr",
- "hz-gb-2312",
- "iso-2022-cn",
- "iso-2022-cn-ext",
- "iso-2022-kr"
- ],
- "name": "replacement"
- },
- {
- "labels": [
- "utf-16be"
- ],
- "name": "utf-16be"
- },
- {
- "labels": [
- "utf-16",
- "utf-16le"
- ],
- "name": "utf-16le"
- },
- {
- "labels": [
- "x-user-defined"
- ],
- "name": "x-user-defined"
- }
- ],
- "heading": "Legacy miscellaneous encodings"
- }
+ {
+ "encodings": [
+ {
+ "labels": [
+ "unicode-1-1-utf-8",
+ "utf-8",
+ "utf8"
+ ],
+ "name": "UTF-8"
+ }
+ ],
+ "heading": "The Encoding"
+ },
+ {
+ "encodings": [
+ {
+ "labels": [
+ "866",
+ "cp866",
+ "csibm866",
+ "ibm866"
+ ],
+ "name": "IBM866"
+ },
+ {
+ "labels": [
+ "csisolatin2",
+ "iso-8859-2",
+ "iso-ir-101",
+ "iso8859-2",
+ "iso88592",
+ "iso_8859-2",
+ "iso_8859-2:1987",
+ "l2",
+ "latin2"
+ ],
+ "name": "ISO-8859-2"
+ },
+ {
+ "labels": [
+ "csisolatin3",
+ "iso-8859-3",
+ "iso-ir-109",
+ "iso8859-3",
+ "iso88593",
+ "iso_8859-3",
+ "iso_8859-3:1988",
+ "l3",
+ "latin3"
+ ],
+ "name": "ISO-8859-3"
+ },
+ {
+ "labels": [
+ "csisolatin4",
+ "iso-8859-4",
+ "iso-ir-110",
+ "iso8859-4",
+ "iso88594",
+ "iso_8859-4",
+ "iso_8859-4:1988",
+ "l4",
+ "latin4"
+ ],
+ "name": "ISO-8859-4"
+ },
+ {
+ "labels": [
+ "csisolatincyrillic",
+ "cyrillic",
+ "iso-8859-5",
+ "iso-ir-144",
+ "iso8859-5",
+ "iso88595",
+ "iso_8859-5",
+ "iso_8859-5:1988"
+ ],
+ "name": "ISO-8859-5"
+ },
+ {
+ "labels": [
+ "arabic",
+ "asmo-708",
+ "csiso88596e",
+ "csiso88596i",
+ "csisolatinarabic",
+ "ecma-114",
+ "iso-8859-6",
+ "iso-8859-6-e",
+ "iso-8859-6-i",
+ "iso-ir-127",
+ "iso8859-6",
+ "iso88596",
+ "iso_8859-6",
+ "iso_8859-6:1987"
+ ],
+ "name": "ISO-8859-6"
+ },
+ {
+ "labels": [
+ "csisolatingreek",
+ "ecma-118",
+ "elot_928",
+ "greek",
+ "greek8",
+ "iso-8859-7",
+ "iso-ir-126",
+ "iso8859-7",
+ "iso88597",
+ "iso_8859-7",
+ "iso_8859-7:1987",
+ "sun_eu_greek"
+ ],
+ "name": "ISO-8859-7"
+ },
+ {
+ "labels": [
+ "csiso88598e",
+ "csisolatinhebrew",
+ "hebrew",
+ "iso-8859-8",
+ "iso-8859-8-e",
+ "iso-ir-138",
+ "iso8859-8",
+ "iso88598",
+ "iso_8859-8",
+ "iso_8859-8:1988",
+ "visual"
+ ],
+ "name": "ISO-8859-8"
+ },
+ {
+ "labels": [
+ "csiso88598i",
+ "iso-8859-8-i",
+ "logical"
+ ],
+ "name": "ISO-8859-8-I"
+ },
+ {
+ "labels": [
+ "csisolatin6",
+ "iso-8859-10",
+ "iso-ir-157",
+ "iso8859-10",
+ "iso885910",
+ "l6",
+ "latin6"
+ ],
+ "name": "ISO-8859-10"
+ },
+ {
+ "labels": [
+ "iso-8859-13",
+ "iso8859-13",
+ "iso885913"
+ ],
+ "name": "ISO-8859-13"
+ },
+ {
+ "labels": [
+ "iso-8859-14",
+ "iso8859-14",
+ "iso885914"
+ ],
+ "name": "ISO-8859-14"
+ },
+ {
+ "labels": [
+ "csisolatin9",
+ "iso-8859-15",
+ "iso8859-15",
+ "iso885915",
+ "iso_8859-15",
+ "l9"
+ ],
+ "name": "ISO-8859-15"
+ },
+ {
+ "labels": [
+ "iso-8859-16"
+ ],
+ "name": "ISO-8859-16"
+ },
+ {
+ "labels": [
+ "cskoi8r",
+ "koi",
+ "koi8",
+ "koi8-r",
+ "koi8_r"
+ ],
+ "name": "KOI8-R"
+ },
+ {
+ "labels": [
+ "koi8-ru",
+ "koi8-u"
+ ],
+ "name": "KOI8-U"
+ },
+ {
+ "labels": [
+ "csmacintosh",
+ "mac",
+ "macintosh",
+ "x-mac-roman"
+ ],
+ "name": "macintosh"
+ },
+ {
+ "labels": [
+ "dos-874",
+ "iso-8859-11",
+ "iso8859-11",
+ "iso885911",
+ "tis-620",
+ "windows-874"
+ ],
+ "name": "windows-874"
+ },
+ {
+ "labels": [
+ "cp1250",
+ "windows-1250",
+ "x-cp1250"
+ ],
+ "name": "windows-1250"
+ },
+ {
+ "labels": [
+ "cp1251",
+ "windows-1251",
+ "x-cp1251"
+ ],
+ "name": "windows-1251"
+ },
+ {
+ "labels": [
+ "ansi_x3.4-1968",
+ "ascii",
+ "cp1252",
+ "cp819",
+ "csisolatin1",
+ "ibm819",
+ "iso-8859-1",
+ "iso-ir-100",
+ "iso8859-1",
+ "iso88591",
+ "iso_8859-1",
+ "iso_8859-1:1987",
+ "l1",
+ "latin1",
+ "us-ascii",
+ "windows-1252",
+ "x-cp1252"
+ ],
+ "name": "windows-1252"
+ },
+ {
+ "labels": [
+ "cp1253",
+ "windows-1253",
+ "x-cp1253"
+ ],
+ "name": "windows-1253"
+ },
+ {
+ "labels": [
+ "cp1254",
+ "csisolatin5",
+ "iso-8859-9",
+ "iso-ir-148",
+ "iso8859-9",
+ "iso88599",
+ "iso_8859-9",
+ "iso_8859-9:1989",
+ "l5",
+ "latin5",
+ "windows-1254",
+ "x-cp1254"
+ ],
+ "name": "windows-1254"
+ },
+ {
+ "labels": [
+ "cp1255",
+ "windows-1255",
+ "x-cp1255"
+ ],
+ "name": "windows-1255"
+ },
+ {
+ "labels": [
+ "cp1256",
+ "windows-1256",
+ "x-cp1256"
+ ],
+ "name": "windows-1256"
+ },
+ {
+ "labels": [
+ "cp1257",
+ "windows-1257",
+ "x-cp1257"
+ ],
+ "name": "windows-1257"
+ },
+ {
+ "labels": [
+ "cp1258",
+ "windows-1258",
+ "x-cp1258"
+ ],
+ "name": "windows-1258"
+ },
+ {
+ "labels": [
+ "x-mac-cyrillic",
+ "x-mac-ukrainian"
+ ],
+ "name": "x-mac-cyrillic"
+ }
+ ],
+ "heading": "Legacy single-byte encodings"
+ },
+ {
+ "encodings": [
+ {
+ "labels": [
+ "chinese",
+ "csgb2312",
+ "csiso58gb231280",
+ "gb2312",
+ "gb_2312",
+ "gb_2312-80",
+ "gbk",
+ "iso-ir-58",
+ "x-gbk"
+ ],
+ "name": "GBK"
+ },
+ {
+ "labels": [
+ "gb18030"
+ ],
+ "name": "gb18030"
+ }
+ ],
+ "heading": "Legacy multi-byte Chinese (simplified) encodings"
+ },
+ {
+ "encodings": [
+ {
+ "labels": [
+ "big5",
+ "big5-hkscs",
+ "cn-big5",
+ "csbig5",
+ "x-x-big5"
+ ],
+ "name": "Big5"
+ }
+ ],
+ "heading": "Legacy multi-byte Chinese (traditional) encodings"
+ },
+ {
+ "encodings": [
+ {
+ "labels": [
+ "cseucpkdfmtjapanese",
+ "euc-jp",
+ "x-euc-jp"
+ ],
+ "name": "EUC-JP"
+ },
+ {
+ "labels": [
+ "csiso2022jp",
+ "iso-2022-jp"
+ ],
+ "name": "ISO-2022-JP"
+ },
+ {
+ "labels": [
+ "csshiftjis",
+ "ms932",
+ "ms_kanji",
+ "shift-jis",
+ "shift_jis",
+ "sjis",
+ "windows-31j",
+ "x-sjis"
+ ],
+ "name": "Shift_JIS"
+ }
+ ],
+ "heading": "Legacy multi-byte Japanese encodings"
+ },
+ {
+ "encodings": [
+ {
+ "labels": [
+ "cseuckr",
+ "csksc56011987",
+ "euc-kr",
+ "iso-ir-149",
+ "korean",
+ "ks_c_5601-1987",
+ "ks_c_5601-1989",
+ "ksc5601",
+ "ksc_5601",
+ "windows-949"
+ ],
+ "name": "EUC-KR"
+ }
+ ],
+ "heading": "Legacy multi-byte Korean encodings"
+ },
+ {
+ "encodings": [
+ {
+ "labels": [
+ "csiso2022kr",
+ "hz-gb-2312",
+ "iso-2022-cn",
+ "iso-2022-cn-ext",
+ "iso-2022-kr"
+ ],
+ "name": "replacement"
+ },
+ {
+ "labels": [
+ "utf-16be"
+ ],
+ "name": "UTF-16BE"
+ },
+ {
+ "labels": [
+ "utf-16",
+ "utf-16le"
+ ],
+ "name": "UTF-16LE"
+ },
+ {
+ "labels": [
+ "x-user-defined"
+ ],
+ "name": "x-user-defined"
+ }
+ ],
+ "heading": "Legacy miscellaneous encodings"
+ }
];
// Label to encoding registry.
@@ -875,7 +860,10 @@ if (typeof module !== "undefined" && module.exports) {
if ((pointer > 39419 && pointer < 189000) || (pointer > 1237575))
return null;
- // 2. Let offset be the last pointer in index gb18030 ranges that
+ // 2. If pointer is 7457, return code point U+E7C7.
+ if (pointer === 7457) return 0xE7C7;
+
+ // 3. Let offset be the last pointer in index gb18030 ranges that
// is equal to or less than pointer and let code point offset be
// its corresponding code point.
var offset = 0;
@@ -893,7 +881,7 @@ if (typeof module !== "undefined" && module.exports) {
}
}
- // 3. Return a code point whose value is code point offset +
+ // 4. Return a code point whose value is code point offset +
// pointer − offset.
return code_point_offset + pointer - offset;
}
@@ -904,7 +892,10 @@ if (typeof module !== "undefined" && module.exports) {
* gb18030 index.
*/
function indexGB18030RangesPointerFor(code_point) {
- // 1. Let offset be the last code point in index gb18030 ranges
+ // 1. If code point is U+E7C7, return pointer 7457.
+ if (code_point === 0xE7C7) return 7457;
+
+ // 2. Let offset be the last code point in index gb18030 ranges
// that is equal to or less than code point and let pointer offset
// be its corresponding pointer.
var offset = 0;
@@ -922,7 +913,7 @@ if (typeof module !== "undefined" && module.exports) {
}
}
- // 2. Return a pointer whose value is pointer offset + code point
+ // 3. Return a pointer whose value is pointer offset + code point
// − offset.
return pointer_offset + code_point - offset;
}
@@ -1002,8 +993,8 @@ if (typeof module !== "undefined" && module.exports) {
this._ignoreBOM = false;
/** @private @type {boolean} */
this._BOMseen = false;
- /** @private @type {boolean} */
- this._error_mode = error_mode_replacement;
+ /** @private @type {string} */
+ this._error_mode = 'replacement';
/** @private @type {boolean} */
this._do_not_flush = false;
@@ -1029,7 +1020,7 @@ if (typeof module !== "undefined" && module.exports) {
// 5. If options's fatal member is true, set dec's error mode to
// fatal.
if (Boolean(options['fatal']))
- dec._error_mode = error_mode_fatal;
+ dec._error_mode = 'fatal';
// 6. If options's ignoreBOM member is true, set dec's ignore BOM
// flag.
@@ -1038,8 +1029,8 @@ if (typeof module !== "undefined" && module.exports) {
// For pre-ES5 runtimes:
if (!Object.defineProperty) {
- this.encoding = dec._encoding.name;
- this.fatal = dec._error_mode;
+ this.encoding = dec._encoding.name.toLowerCase();
+ this.fatal = dec._error_mode === 'fatal';
this.ignoreBOM = dec._ignoreBOM;
}
@@ -1051,14 +1042,14 @@ if (typeof module !== "undefined" && module.exports) {
// The encoding attribute's getter must return encoding's name.
Object.defineProperty(TextDecoder.prototype, 'encoding', {
/** @this {TextDecoder} */
- get: function() { return this._encoding.name; }
+ get: function() { return this._encoding.name.toLowerCase(); }
});
// The fatal attribute's getter must return true if error mode
// is fatal, and false otherwise.
Object.defineProperty(TextDecoder.prototype, 'fatal', {
/** @this {TextDecoder} */
- get: function() { return this._error_mode === error_mode_fatal; }
+ get: function() { return this._error_mode === 'fatal'; }
});
// The ignoreBOM attribute's getter must return true if ignore
@@ -1093,7 +1084,8 @@ if (typeof module !== "undefined" && module.exports) {
// encoding's decoder, set stream to a new stream, and unset the
// BOM seen flag.
if (!this._do_not_flush) {
- this._decoder = decoders[this._encoding.name]({fatal: this._error_mode});
+ this._decoder = decoders[this._encoding.name]({
+ fatal: this._error_mode === 'fatal'});
this._BOMseen = false;
}
@@ -1171,10 +1163,9 @@ if (typeof module !== "undefined" && module.exports) {
// 1. Let token be the result of reading from stream.
// (Done in-place on array, rather than as a stream)
- // 2. If encoding is one of utf-8, utf-16be, and utf-16le, and
- // ignore BOM flag and BOM seen flag are unset, run these
- // subsubsteps:
- if (isOneOf(this.encoding, ['utf-8', 'utf-16le', 'utf-16be']) &&
+ // 2. If encoding is UTF-8, UTF-16BE, or UTF-16LE, and ignore
+ // BOM flag and BOM seen flag are unset, run these subsubsteps:
+ if (includes(['UTF-8', 'UTF-16LE', 'UTF-16BE'], this._encoding.name) &&
!this._ignoreBOM && !this._BOMseen) {
if (stream.length > 0 && stream[0] === 0xFEFF) {
// 1. If token is U+FEFF, set BOM seen flag.
@@ -1201,15 +1192,13 @@ if (typeof module !== "undefined" && module.exports) {
/**
* @constructor
- * @param {string=} label The label of the encoding;
- * defaults to 'utf-8'.
- * @param {Object=} options
+ * @param {string=} label The label of the encoding. NONSTANDARD.
+ * @param {Object=} options NONSTANDARD.
*/
function TextEncoder(label, options) {
// Web IDL conventions
if (!(this instanceof TextEncoder))
throw TypeError('Called as a function. Did you forget \'new\'?');
- label = label !== undefined ? String(label) : DEFAULT_ENCODING;
options = ToDictionary(options);
// A TextEncoder object has an associated encoding and encoder.
@@ -1222,37 +1211,39 @@ if (typeof module !== "undefined" && module.exports) {
// Non-standard
/** @private @type {boolean} */
this._do_not_flush = false;
- /** @private @type {boolean} */
- this._fatal = Boolean(options['fatal']);
-
- // 1. Let encoding be the result of getting an encoding from utfLabel.
- var encoding = getEncoding(label);
+ /** @private @type {string} */
+ this._fatal = Boolean(options['fatal']) ? 'fatal' : 'replacement';
- // 2. If encoding is failure, or is none of utf-8, utf-16be, and
- // utf-16le, throw a RangeError.
- if (encoding === null || encoding.name === 'replacement' ||
- (isNoneOf(encoding.name, ['utf-8','utf-16le', 'utf-16be']) &&
- !Boolean(options['NONSTANDARD_allowLegacyEncoding'])))
- throw RangeError('Unknown encoding: ' + label);
- if (!encoders[encoding.name]) {
- throw Error('Encoder not present.' +
- ' Did you forget to include encoding-indexes.js?');
- }
-
- // 3. Let enc be a new TextEncoder object.
+ // 1. Let enc be a new TextEncoder object.
var enc = this;
- // 4. Set enc's encoding to encoding.
- enc._encoding = encoding;
+ // 2. Set enc's encoding to UTF-8's encoder.
+ if (Boolean(options['NONSTANDARD_allowLegacyEncoding'])) {
+ // NONSTANDARD behavior.
+ label = label !== undefined ? String(label) : DEFAULT_ENCODING;
+ var encoding = getEncoding(label);
+ if (encoding === null || encoding.name === 'replacement')
+ throw RangeError('Unknown encoding: ' + label);
+ if (!encoders[encoding.name]) {
+ throw Error('Encoder not present.' +
+ ' Did you forget to include encoding-indexes.js?');
+ }
+ enc._encoding = encoding;
+ } else {
+ // Standard behavior.
+ enc._encoding = getEncoding('utf-8');
- // 5. Set enc's encoder to a new enc's encoding's encoder.
- // (Done during encode itself, due to nonstandard streaming support.)
+ if (label !== undefined && 'console' in global) {
+ console.warn('TextEncoder constructor called with encoding label, '
+ + 'which is ignored.');
+ }
+ }
// For pre-ES5 runtimes:
if (!Object.defineProperty)
- this.encoding = enc._encoding.name;
+ this.encoding = enc._encoding.name.toLowerCase();
- // 6. Return enc.
+ // 3. Return enc.
return enc;
}
@@ -1260,14 +1251,14 @@ if (typeof module !== "undefined" && module.exports) {
// The encoding attribute's getter must return encoding's name.
Object.defineProperty(TextEncoder.prototype, 'encoding', {
/** @this {TextEncoder} */
- get: function() { return this._encoding.name; }
+ get: function() { return this._encoding.name.toLowerCase(); }
});
}
/**
* @param {string=} opt_string The string to encode.
* @param {Object=} options
- * @return {Uint8Array} Encoded bytes, as a Uint8Array.
+ * @return {!Uint8Array} Encoded bytes, as a Uint8Array.
*/
TextEncoder.prototype.encode = function encode(opt_string, options) {
opt_string = opt_string ? String(opt_string) : '';
@@ -1277,7 +1268,8 @@ if (typeof module !== "undefined" && module.exports) {
// permitted for encoding (i.e. UTF-8, UTF-16) are stateful when
// the input is a USVString so streaming is not necessary.
if (!this._do_not_flush)
- this._encoder = encoders[this._encoding.name]({fatal: this._fatal});
+ this._encoder = encoders[this._encoding.name]({
+ fatal: this._fatal === 'fatal'});
this._do_not_flush = Boolean(options['stream']);
// 1. Convert input to a stream.
@@ -1540,11 +1532,11 @@ if (typeof module !== "undefined" && module.exports) {
}
/** @param {{fatal: boolean}} options */
- encoders['utf-8'] = function(options) {
+ encoders['UTF-8'] = function(options) {
return new UTF8Encoder(options);
};
/** @param {{fatal: boolean}} options */
- decoders['utf-8'] = function(options) {
+ decoders['UTF-8'] = function(options) {
return new UTF8Decoder(options);
};
@@ -1636,7 +1628,7 @@ if (typeof module !== "undefined" && module.exports) {
return;
category.encodings.forEach(function(encoding) {
var name = encoding.name;
- var idx = index(name);
+ var idx = index(name.toLowerCase());
/** @param {{fatal: boolean}} options */
decoders[name] = function(options) {
return new SingleByteDecoder(idx, options);
@@ -1658,14 +1650,14 @@ if (typeof module !== "undefined" && module.exports) {
// 11.1.1 gbk decoder
// gbk's decoder is gb18030's decoder.
/** @param {{fatal: boolean}} options */
- decoders['gbk'] = function(options) {
+ decoders['GBK'] = function(options) {
return new GB18030Decoder(options);
};
// 11.1.2 gbk encoder
// gbk's encoder is gb18030's encoder with its gbk flag set.
/** @param {{fatal: boolean}} options */
- encoders['gbk'] = function(options) {
+ encoders['GBK'] = function(options) {
return new GB18030Encoder(options, true);
};
@@ -1852,16 +1844,20 @@ if (typeof module !== "undefined" && module.exports) {
if (isASCIICodePoint(code_point))
return code_point;
- // 3. If the gbk flag is set and code point is U+20AC, return
+ // 3. If code point is U+E5E5, return error with code point.
+ if (code_point === 0xE5E5)
+ return encoderError(code_point);
+
+ // 4. If the gbk flag is set and code point is U+20AC, return
// byte 0x80.
if (gbk_flag && code_point === 0x20AC)
return 0x80;
- // 4. Let pointer be the index pointer for code point in index
+ // 5. Let pointer be the index pointer for code point in index
// gb18030.
var pointer = indexPointerFor(code_point, index('gb18030'));
- // 5. If pointer is not null, run these substeps:
+ // 6. If pointer is not null, run these substeps:
if (pointer !== null) {
// 1. Let lead be floor(pointer / 190) + 0x81.
@@ -1877,33 +1873,33 @@ if (typeof module !== "undefined" && module.exports) {
return [lead, trail + offset];
}
- // 6. If gbk flag is set, return error with code point.
+ // 7. If gbk flag is set, return error with code point.
if (gbk_flag)
return encoderError(code_point);
- // 7. Set pointer to the index gb18030 ranges pointer for code
+ // 8. Set pointer to the index gb18030 ranges pointer for code
// point.
pointer = indexGB18030RangesPointerFor(code_point);
- // 8. Let byte1 be floor(pointer / 10 / 126 / 10).
+ // 9. Let byte1 be floor(pointer / 10 / 126 / 10).
var byte1 = floor(pointer / 10 / 126 / 10);
- // 9. Set pointer to pointer − byte1 × 10 × 126 × 10.
+ // 10. Set pointer to pointer − byte1 × 10 × 126 × 10.
pointer = pointer - byte1 * 10 * 126 * 10;
- // 10. Let byte2 be floor(pointer / 10 / 126).
+ // 11. Let byte2 be floor(pointer / 10 / 126).
var byte2 = floor(pointer / 10 / 126);
- // 11. Set pointer to pointer − byte2 × 10 × 126.
+ // 12. Set pointer to pointer − byte2 × 10 × 126.
pointer = pointer - byte2 * 10 * 126;
- // 12. Let byte3 be floor(pointer / 10).
+ // 13. Let byte3 be floor(pointer / 10).
var byte3 = floor(pointer / 10);
- // 13. Let byte4 be pointer − byte3 × 10.
+ // 14. Let byte4 be pointer − byte3 × 10.
var byte4 = pointer - byte3 * 10;
- // 14. Return four bytes whose values are byte1 + 0x81, byte2 +
+ // 15. Return four bytes whose values are byte1 + 0x81, byte2 +
// 0x30, byte3 + 0x81, byte4 + 0x30.
return [byte1 + 0x81,
byte2 + 0x30,
@@ -2077,11 +2073,11 @@ if (typeof module !== "undefined" && module.exports) {
}
/** @param {{fatal: boolean}} options */
- encoders['big5'] = function(options) {
+ encoders['Big5'] = function(options) {
return new Big5Encoder(options);
};
/** @param {{fatal: boolean}} options */
- decoders['big5'] = function(options) {
+ decoders['Big5'] = function(options) {
return new Big5Decoder(options);
};
@@ -2255,11 +2251,11 @@ if (typeof module !== "undefined" && module.exports) {
}
/** @param {{fatal: boolean}} options */
- encoders['euc-jp'] = function(options) {
+ encoders['EUC-JP'] = function(options) {
return new EUCJPEncoder(options);
};
/** @param {{fatal: boolean}} options */
- decoders['euc-jp'] = function(options) {
+ decoders['EUC-JP'] = function(options) {
return new EUCJPDecoder(options);
};
@@ -2605,6 +2601,7 @@ if (typeof module !== "undefined" && module.exports) {
if (code_point === end_of_stream &&
iso2022jp_state !== states.ASCII) {
stream.prepend(code_point);
+ iso2022jp_state = states.ASCII;
return [0x1B, 0x28, 0x42];
}
@@ -2613,19 +2610,28 @@ if (typeof module !== "undefined" && module.exports) {
if (code_point === end_of_stream && iso2022jp_state === states.ASCII)
return finished;
- // 3. If iso-2022-jp encoder state is ASCII and code point is an
+ // 3. If ISO-2022-JP encoder state is ASCII or Roman, and code
+ // point is U+000E, U+000F, or U+001B, return error with U+FFFD.
+ if ((iso2022jp_state === states.ASCII ||
+ iso2022jp_state === states.Roman) &&
+ (code_point === 0x000E || code_point === 0x000F ||
+ code_point === 0x001B)) {
+ return encoderError(0xFFFD);
+ }
+
+ // 4. If iso-2022-jp encoder state is ASCII and code point is an
// ASCII code point, return a byte whose value is code point.
if (iso2022jp_state === states.ASCII &&
isASCIICodePoint(code_point))
return code_point;
- // 4. If iso-2022-jp encoder state is Roman and code point is an
+ // 5. If iso-2022-jp encoder state is Roman and code point is an
// ASCII code point, excluding U+005C and U+007E, or is U+00A5
// or U+203E, run these substeps:
if (iso2022jp_state === states.Roman &&
- (isASCIICodePoint(code_point) &&
+ ((isASCIICodePoint(code_point) &&
code_point !== 0x005C && code_point !== 0x007E) ||
- (code_point == 0x00A5 || code_point == 0x203E)) {
+ (code_point == 0x00A5 || code_point == 0x203E))) {
// 1. If code point is an ASCII code point, return a byte
// whose value is code point.
@@ -2641,7 +2647,7 @@ if (typeof module !== "undefined" && module.exports) {
return 0x7E;
}
- // 5. If code point is an ASCII code point, and iso-2022-jp
+ // 6. If code point is an ASCII code point, and iso-2022-jp
// encoder state is not ASCII, prepend code point to stream, set
// iso-2022-jp encoder state to ASCII, and return three bytes
// 0x1B 0x28 0x42.
@@ -2652,7 +2658,7 @@ if (typeof module !== "undefined" && module.exports) {
return [0x1B, 0x28, 0x42];
}
- // 6. If code point is either U+00A5 or U+203E, and iso-2022-jp
+ // 7. If code point is either U+00A5 or U+203E, and iso-2022-jp
// encoder state is not Roman, prepend code point to stream, set
// iso-2022-jp encoder state to Roman, and return three bytes
// 0x1B 0x28 0x4A.
@@ -2663,19 +2669,19 @@ if (typeof module !== "undefined" && module.exports) {
return [0x1B, 0x28, 0x4A];
}
- // 7. If code point is U+2212, set it to U+FF0D.
+ // 8. If code point is U+2212, set it to U+FF0D.
if (code_point === 0x2212)
code_point = 0xFF0D;
- // 8. Let pointer be the index pointer for code point in index
+ // 9. Let pointer be the index pointer for code point in index
// jis0208.
var pointer = indexPointerFor(code_point, index('jis0208'));
- // 9. If pointer is null, return error with code point.
+ // 10. If pointer is null, return error with code point.
if (pointer === null)
return encoderError(code_point);
- // 10. If iso-2022-jp encoder state is not jis0208, prepend code
+ // 11. If iso-2022-jp encoder state is not jis0208, prepend code
// point to stream, set iso-2022-jp encoder state to jis0208,
// and return three bytes 0x1B 0x24 0x42.
if (iso2022jp_state !== states.jis0208) {
@@ -2684,23 +2690,23 @@ if (typeof module !== "undefined" && module.exports) {
return [0x1B, 0x24, 0x42];
}
- // 11. Let lead be floor(pointer / 94) + 0x21.
+ // 12. Let lead be floor(pointer / 94) + 0x21.
var lead = floor(pointer / 94) + 0x21;
- // 12. Let trail be pointer % 94 + 0x21.
+ // 13. Let trail be pointer % 94 + 0x21.
var trail = pointer % 94 + 0x21;
- // 13. Return two bytes whose values are lead and trail.
+ // 14. Return two bytes whose values are lead and trail.
return [lead, trail];
};
}
/** @param {{fatal: boolean}} options */
- encoders['iso-2022-jp'] = function(options) {
+ encoders['ISO-2022-JP'] = function(options) {
return new ISO2022JPEncoder(options);
};
/** @param {{fatal: boolean}} options */
- decoders['iso-2022-jp'] = function(options) {
+ decoders['ISO-2022-JP'] = function(options) {
return new ISO2022JPDecoder(options);
};
@@ -2873,11 +2879,11 @@ if (typeof module !== "undefined" && module.exports) {
}
/** @param {{fatal: boolean}} options */
- encoders['shift_jis'] = function(options) {
+ encoders['Shift_JIS'] = function(options) {
return new ShiftJISEncoder(options);
};
/** @param {{fatal: boolean}} options */
- decoders['shift_jis'] = function(options) {
+ decoders['Shift_JIS'] = function(options) {
return new ShiftJISDecoder(options);
};
@@ -3009,11 +3015,11 @@ if (typeof module !== "undefined" && module.exports) {
}
/** @param {{fatal: boolean}} options */
- encoders['euc-kr'] = function(options) {
+ encoders['EUC-KR'] = function(options) {
return new EUCKREncoder(options);
};
/** @param {{fatal: boolean}} options */
- decoders['euc-kr'] = function(options) {
+ decoders['EUC-KR'] = function(options) {
return new EUCKRDecoder(options);
};
@@ -3185,24 +3191,24 @@ if (typeof module !== "undefined" && module.exports) {
// 15.3 utf-16be
// 15.3.1 utf-16be decoder
/** @param {{fatal: boolean}} options */
- encoders['utf-16be'] = function(options) {
+ encoders['UTF-16BE'] = function(options) {
return new UTF16Encoder(true, options);
};
// 15.3.2 utf-16be encoder
/** @param {{fatal: boolean}} options */
- decoders['utf-16be'] = function(options) {
+ decoders['UTF-16BE'] = function(options) {
return new UTF16Decoder(true, options);
};
// 15.4 utf-16le
// 15.4.1 utf-16le decoder
/** @param {{fatal: boolean}} options */
- encoders['utf-16le'] = function(options) {
+ encoders['UTF-16LE'] = function(options) {
return new UTF16Encoder(false, options);
};
// 15.4.2 utf-16le encoder
/** @param {{fatal: boolean}} options */
- decoders['utf-16le'] = function(options) {
+ decoders['UTF-16LE'] = function(options) {
return new UTF16Decoder(false, options);
};
diff --git a/package.json b/package.json
index 3bae282..d7a21cc 100644
--- a/package.json
+++ b/package.json
@@ -2,9 +2,17 @@
"name": "text-encoding",
"author": "Joshua Bell <inexorabletash at gmail.com>",
"contributors": [
- "Rick Eyre <rick.eyre at outlook.com>"
+ "Joshua Bell <inexorabletash at gmail.com>",
+ "Rick Eyre <rick.eyre at outlook.com>",
+ "Eugen Podaru <eugen.podaru at live.com>",
+ "Filip Dupanović <filip.dupanovic at gmail.com>",
+ "Anne van Kesteren <annevk at annevk.nl>",
+ "Author: Francis Avila <francisga at gmail.com>",
+ "Michael J. Ryan <tracker1 at gmail.com>",
+ "Pierre Queinnec <pierre at queinnec.org>",
+ "Zack Weinberg <zackw at panix.com>"
],
- "version": "0.5.4",
+ "version": "0.6.0",
"description": "Polyfill for the Encoding Living Standard's API.",
"main": "index.js",
"files": [
diff --git a/test/test-misc.js b/test/test-misc.js
index c5ababa..dc12036 100644
--- a/test/test-misc.js
+++ b/test/test-misc.js
@@ -1,7 +1,7 @@
// This is free and unencumbered software released into the public domain.
// See LICENSE.md for more information.
-var UTF_ENCODINGS = ['utf-8', 'utf-16le', 'utf-16be'];
+var THE_ENCODING = ['utf-8'];
var LEGACY_ENCODINGS = [
'ibm866', 'iso-8859-2', 'iso-8859-3', 'iso-8859-4', 'iso-8859-5',
@@ -11,9 +11,14 @@ var LEGACY_ENCODINGS = [
'windows-1252', 'windows-1253', 'windows-1254', 'windows-1255',
'windows-1256', 'windows-1257', 'windows-1258', 'x-mac-cyrillic',
'gbk', 'gb18030', 'big5', 'euc-jp', 'iso-2022-jp', 'shift_jis',
- 'euc-kr'
+ 'euc-kr', 'utf-16le', 'utf-16be'
];
+var ASCII_SUPERSETS = THE_ENCODING.concat(LEGACY_ENCODINGS)
+ .filter(function(e) {
+ return e !== 'utf-16le' && e !== 'utf-16be';
+ });
+
// Miscellaneous tests
test(function() {
@@ -29,7 +34,6 @@ test(function() {
test(function() {
assert_true('encoding' in new TextEncoder());
assert_equals(new TextEncoder().encoding, 'utf-8');
- assert_equals(new TextEncoder('utf-16le').encoding, 'utf-16le');
assert_true('encoding' in new TextDecoder());
assert_equals(new TextDecoder().encoding, 'utf-8');
@@ -53,8 +57,8 @@ test(function() {
badStrings.forEach(
function(t) {
- var encoded = new TextEncoder('utf-8').encode(t.input);
- var decoded = new TextDecoder('utf-8').decode(encoded);
+ var encoded = new TextEncoder().encode(t.input);
+ var decoded = new TextDecoder().decode(encoded);
assert_equals(t.expected, decoded);
});
}, 'bad data');
@@ -167,21 +171,40 @@ test(function() {
}, 'Encoding names');
test(function() {
- ['utf-8', 'utf-16le', 'utf-16be'].forEach(function(encoding) {
- var string = '\x00123ABCabc\x80\xFF\u0100\u1000\uFFFD\uD800\uDC00\uDBFF\uDFFF';
- var encoded = new TextEncoder(encoding).encode(string);
+ var string = '\x00123ABCabc\x80\xFF\u0100\u1000\uFFFD\uD800\uDC00\uDBFF\uDFFF';
+ var cases = [
+ {
+ encoding: 'utf-8',
+ encoded: [0, 49, 50, 51, 65, 66, 67, 97, 98, 99, 194, 128, 195, 191, 196,
+ 128, 225, 128, 128, 239, 191, 189, 240, 144, 128, 128, 244, 143,
+ 191, 191]
+ },
+ {
+ encoding: 'utf-16le',
+ encoded: [0, 0, 49, 0, 50, 0, 51, 0, 65, 0, 66, 0, 67, 0, 97, 0, 98, 0,
+ 99, 0, 128, 0, 255, 0, 0, 1, 0, 16, 253, 255, 0, 216, 0, 220,
+ 255, 219, 255, 223]
+ },
+ {
+ encoding: 'utf-16be',
+ encoded: [0, 0, 0, 49, 0, 50, 0, 51, 0, 65, 0, 66, 0, 67, 0, 97, 0, 98, 0,
+ 99, 0, 128, 0, 255, 1, 0, 16, 0, 255, 253, 216, 0, 220, 0, 219,
+ 255, 223, 255]
+ }
+ ];
+ cases.forEach(function(c) {
for (var len = 1; len <= 5; ++len) {
- var out = '', decoder = new TextDecoder(encoding);
- for (var i = 0; i < encoded.length; i += len) {
+ var out = '', decoder = new TextDecoder(c.encoding);
+ for (var i = 0; i < c.encoded.length; i += len) {
var sub = [];
- for (var j = i; j < encoded.length && j < i + len; ++j) {
- sub.push(encoded[j]);
+ for (var j = i; j < c.encoded.length && j < i + len; ++j) {
+ sub.push(c.encoded[j]);
}
out += decoder.decode(new Uint8Array(sub), {stream: true});
}
out += decoder.decode();
- assert_equals(out, string, 'streaming decode ' + encoding);
+ assert_equals(out, string, 'streaming decode ' + c.encoding);
}
});
}, 'Streaming Decode');
@@ -193,9 +216,7 @@ test(function() {
}, 'Shift_JIS Decode');
test(function() {
- var encodings = ['utf-8'].concat(LEGACY_ENCODINGS);
-
- encodings.forEach(function(encoding) {
+ ASCII_SUPERSETS.forEach(function(encoding) {
var string = '', bytes = [];
for (var i = 0; i < 128; ++i) {
@@ -207,9 +228,8 @@ test(function() {
string += String.fromCharCode(i);
bytes.push(i);
}
- var ascii_encoded = new TextEncoder('utf-8').encode(string);
+ var ascii_encoded = new TextEncoder().encode(string);
assert_equals(new TextDecoder(encoding).decode(ascii_encoded), string, encoding);
- //assert_array_equals(new TextEncoder(encoding).encode(string), bytes, encoding);
});
}, 'Supersets of ASCII decode ASCII correctly');
@@ -228,16 +248,11 @@ test(function() {
}, 'Non-fatal errors at EOF');
test(function() {
- UTF_ENCODINGS.forEach(function(encoding) {
- assert_equals(new TextDecoder(encoding).encoding, encoding);
- assert_equals(new TextEncoder(encoding).encoding, encoding);
- });
-
LEGACY_ENCODINGS.forEach(function(encoding) {
assert_equals(new TextDecoder(encoding).encoding, encoding);
- assert_throws({name: 'RangeError'}, function() { new TextEncoder(encoding); });
+ assert_equals(new TextEncoder(encoding).encoding, 'utf-8');
});
-}, 'Non-UTF encodings supported only for decode, not encode');
+}, 'Legacy encodings supported only for decode, not encode');
test(function() {
[
@@ -248,8 +263,7 @@ test(function() {
'iso-2022-kr'
].forEach(function(encoding) {
- assert_throws({name: 'RangeError'},
- function() { new TextEncoder(encoding); });
+ assert_equals(new TextEncoder(encoding).encoding, 'utf-8');
assert_throws({name: 'RangeError'},
function() {
@@ -300,10 +314,6 @@ test(function() {
assert_throws({name: 'TypeError'},
function() { new TextDecoder('utf-8').decode(null, ''); },
'String should not coerce to dictionary.');
-
- assert_throws({name: 'RangeError'},
- function() { new TextEncoder(null); },
- 'Null should coerce to "null" and be invalid encoding name.');
}, 'Invalid parameters');
test(function() {
@@ -312,3 +322,60 @@ test(function() {
new TextEncoder('big5', {NONSTANDARD_allowLegacyEncoding: true})
.encode('\u2550\u255E\u2561\u256A\u5341\u5345'));
}, 'NONSTANDARD - regression tests');
+
+test(function() {
+ // Regression test for https://github.com/whatwg/encoding/issues/22
+ assert_equals(
+ new TextDecoder('gb18030').decode(new Uint8Array([
+ 0xA8, 0xBC,
+ 0x81, 0x35, 0xF4, 0x37
+ ])), '\u1E3F\uE7C7');
+}, 'GB 18030 2000 vs 2005: U+1E3F, U+E7C7 (decoding)');
+
+test(function() {
+ // Regression test for https://github.com/whatwg/encoding/issues/22
+ assert_array_equals(
+ new TextEncoder('gb18030', {NONSTANDARD_allowLegacyEncoding: true})
+ .encode('\u1E3F\uE7C7'),
+ [
+ 0xA8, 0xBC,
+ 0x81, 0x35, 0xF4, 0x37
+ ]);
+}, 'NONSTANDARD - GB 18030 2000 vs 2005: U+1E3F, U+E7C7 (encoding)');
+
+test(function() {
+ // Regression test for https://github.com/whatwg/encoding/issues/17
+ assert_throws(
+ new TypeError,
+ function() {
+ new TextEncoder('gb18030', {NONSTANDARD_allowLegacyEncoding: true})
+ .encode('\uE5E5');
+ });
+}, 'NONSTANDARD - gb18030: U+E5E5 (encoding)');
+
+
+test(function() {
+ // Regression test for https://github.com/whatwg/encoding/issues/15
+ var encoder =
+ new TextEncoder('iso-2022-jp', {NONSTANDARD_allowLegacyEncoding: true});
+
+ [
+ //'\u000E', '\u000F', '\u001B',
+ '\u00A5\u000E', //'\u00A5\u000F', '\u00A5\u001B'
+ ].forEach(function(s) {
+ assert_throws(new TypeError, function() { encoder.encode(s); });
+ });
+
+}, 'NONSTANDARD - iso-2022-jp encoding attack (encoding)');
+
+['utf-16le', 'utf-16be'].forEach(function(encoding) {
+ test(function() {
+ var encoder = new TextEncoder(encoding, {NONSTANDARD_allowLegacyEncoding: true});
+ var decoder = new TextDecoder(encoding);
+
+ var sample = "z\xA2\u6C34\uD834\uDD1E\uDBFF\uDFFD";
+
+ assert_equals(decoder.decode(encoder.encode(sample)), sample);
+
+ }, 'NONSTANDARD - ' + encoding + ' (encoding)');
+});
diff --git a/test/test-utf.js b/test/test-utf.js
index 786c945..e469f5c 100644
--- a/test/test-utf.js
+++ b/test/test-utf.js
@@ -71,19 +71,26 @@ function genblock(from, len, skip) {
return block.join('');
}
+function encode_utf16le(s) { return encode_utf16(s, true); }
+function encode_utf16be(s) { return encode_utf16(s, false); }
+function encode_utf16(s, le) {
+ var a = new Uint8Array(s.length * 2), view = new DataView(a.buffer);
+ s.split('').forEach(function(c, i) {
+ view.setUint16(i * 2, c.charCodeAt(0), le);
+ });
+ return a;
+}
+
function test_utf_roundtrip () {
var MIN_CODEPOINT = 0;
var MAX_CODEPOINT = 0x10FFFF;
var BLOCK_SIZE = 0x1000;
var SKIP_SIZE = 31;
- var TE_U16LE = new TextEncoder("UTF-16LE");
var TD_U16LE = new TextDecoder("UTF-16LE");
-
- var TE_U16BE = new TextEncoder("UTF-16BE");
var TD_U16BE = new TextDecoder("UTF-16BE");
- var TE_U8 = new TextEncoder("UTF-8");
+ var TE_U8 = new TextEncoder();
var TD_U8 = new TextDecoder("UTF-8");
for (var i = MIN_CODEPOINT; i < MAX_CODEPOINT; i += BLOCK_SIZE) {
@@ -91,11 +98,11 @@ function test_utf_roundtrip () {
var block = genblock(i, BLOCK_SIZE, SKIP_SIZE);
// test UTF-16LE, UTF-16BE, and UTF-8 encodings against themselves
- var encoded = TE_U16LE.encode(block);
+ var encoded = encode_utf16le(block);
var decoded = TD_U16LE.decode(encoded);
assert_string_equals(block, decoded, "UTF-16LE round trip " + block_tag);
- encoded = TE_U16BE.encode(block);
+ encoded = encode_utf16be(block);
decoded = TD_U16BE.decode(encoded);
assert_string_equals(block, decoded, "UTF-16BE round trip " + block_tag);
@@ -130,10 +137,6 @@ function test_utf_samples () {
cases.forEach(
function(t) {
- var encoded = new TextEncoder(t.encoding).encode(sample);
- assert_array_equals(encoded, t.expected,
- "expected equal encodings - " + t.encoding);
-
var decoded = new TextDecoder(t.encoding)
.decode(new Uint8Array(t.expected));
assert_equals(decoded, sample,
diff --git a/test/test-x-user-defined.js b/test/test-x-user-defined.js
index 401511a..e8df0da 100644
--- a/test/test-x-user-defined.js
+++ b/test/test-x-user-defined.js
@@ -3,7 +3,7 @@
test(
function() {
- assert_throws({name: 'RangeError'}, function() { new TextEncoder('x-user-defined'); });
+ assert_equals(new TextEncoder('x-user-defined').encoding, 'utf-8');
var decoder = new TextDecoder('x-user-defined');
for (var i = 0; i < 0x80; ++i) {
--
Alioth's /usr/local/bin/git-commit-notice on /srv/git.debian.org/git/pkg-javascript/node-text-encoding.git
More information about the Pkg-javascript-commits
mailing list