[Pkg-javascript-commits] [node-text-encoding] 01/03: New upstream version 0.6.1

Julien Puydt julien.puydt at laposte.net
Fri Sep 16 04:59:19 UTC 2016


This is an automated email from the git hooks/post-receive script.

jpuydt-guest pushed a commit to branch master
in repository node-text-encoding.

commit 63ce23190944e7cb67b221834b8982eabd02b7bc
Author: Julien Puydt <julien.puydt at laposte.net>
Date:   Thu Sep 15 16:54:52 2016 +0200

    New upstream version 0.6.1
---
 README.md                |   2 +-
 examples-no-indexes.html |   8 +-
 examples.html            |   8 +-
 lib/encoding.js          | 300 ++++++++++++++++++++++++-----------------------
 package.json             |   2 +-
 test/test-gb18030.js     |  13 ++
 test/tests.html          |   3 +-
 7 files changed, 178 insertions(+), 158 deletions(-)

diff --git a/README.md b/README.md
index c0f0c18..b2781aa 100644
--- a/README.md
+++ b/README.md
@@ -88,7 +88,7 @@ support for some encodings is not required.
 
 ### Non-Standard Behavior ###
 
-As required by the specification, only encoding to **utf-8**, is
+As required by the specification, only encoding to **utf-8** is
 supported. If you want to try it out, you can force a non-standard
 behavior by passing the `NONSTANDARD_allowLegacyEncoding` option to
 TextEncoder and a label. For example:
diff --git a/examples-no-indexes.html b/examples-no-indexes.html
index 58d70d5..1c210b9 100644
--- a/examples-no-indexes.html
+++ b/examples-no-indexes.html
@@ -1,16 +1,16 @@
 <!DOCTYPE html>
 <script src="lib/encoding.js"></script>
 <script>
-function encodeArrayOfStrings(strings, encoding) {
+function encodeArrayOfStrings(strings) {
   var encoder, encoded, len, i, bytes, view, offset;
 
-  encoder = new TextEncoder(encoding);
+  encoder = new TextEncoder();
   encoded = [];
 
   len = Uint32Array.BYTES_PER_ELEMENT;
   for (i = 0; i < strings.length; i += 1) {
     len += Uint32Array.BYTES_PER_ELEMENT;
-    encoded[i] = new TextEncoder(encoding).encode(strings[i]);
+    encoded[i] = new TextEncoder().encode(strings[i]);
     len += encoded[i].byteLength;
   }
 
@@ -51,7 +51,7 @@ function decodeArrayOfStrings(buffer, encoding) {
 }
 
 var strings = ["Hello", "string", "encoding!"];
-var buffer = encodeArrayOfStrings(strings, "utf-8");
+var buffer = encodeArrayOfStrings(strings);
 var results = decodeArrayOfStrings(buffer, "utf-8");
 document.write("Encoded " + JSON.stringify(strings) + "<br>");
 document.write("Decoded " + JSON.stringify(results) + "<br>");
diff --git a/examples.html b/examples.html
index b60dd2b..e0f61c8 100644
--- a/examples.html
+++ b/examples.html
@@ -2,16 +2,16 @@
 <script src="lib/encoding-indexes.js"></script>
 <script src="lib/encoding.js"></script>
 <script>
-function encodeArrayOfStrings(strings, encoding) {
+function encodeArrayOfStrings(strings) {
   var encoder, encoded, len, i, bytes, view, offset;
 
-  encoder = new TextEncoder(encoding);
+  encoder = new TextEncoder();
   encoded = [];
 
   len = Uint32Array.BYTES_PER_ELEMENT;
   for (i = 0; i < strings.length; i += 1) {
     len += Uint32Array.BYTES_PER_ELEMENT;
-    encoded[i] = new TextEncoder(encoding).encode(strings[i]);
+    encoded[i] = new TextEncoder().encode(strings[i]);
     len += encoded[i].byteLength;
   }
 
@@ -52,7 +52,7 @@ function decodeArrayOfStrings(buffer, encoding) {
 }
 
 var strings = ["Hello", "string", "encoding!"];
-var buffer = encodeArrayOfStrings(strings, "utf-8");
+var buffer = encodeArrayOfStrings(strings);
 var results = decodeArrayOfStrings(buffer, "utf-8");
 document.write("Encoded " + JSON.stringify(strings) + "<br>");
 document.write("Decoded " + JSON.stringify(results) + "<br>");
diff --git a/lib/encoding.js b/lib/encoding.js
index 1f021d6..2011e9d 100644
--- a/lib/encoding.js
+++ b/lib/encoding.js
@@ -98,7 +98,7 @@ if (typeof module !== "undefined" && module.exports) {
         // 2. Otherwise, i < n−1:
         else {
           // 1. Let d be the code unit in S at index i+1.
-          var d = string.charCodeAt(i + 1);
+          var d = s.charCodeAt(i + 1);
 
           // 2. If 0xDC00 ≤ d ≤ 0xDFFF, then:
           if (0xDC00 <= d && d <= 0xDFFF) {
@@ -868,7 +868,7 @@ if (typeof module !== "undefined" && module.exports) {
     // its corresponding code point.
     var offset = 0;
     var code_point_offset = 0;
-    var idx = index('gb18030');
+    var idx = index('gb18030-ranges');
     var i;
     for (i = 0; i < idx.length; ++i) {
       /** @type {!Array.<number>} */
@@ -900,7 +900,7 @@ if (typeof module !== "undefined" && module.exports) {
     // be its corresponding pointer.
     var offset = 0;
     var pointer_offset = 0;
-    var idx = index('gb18030');
+    var idx = index('gb18030-ranges');
     var i;
     for (i = 0; i < idx.length; ++i) {
       /** @type {!Array.<number>} */
@@ -919,21 +919,24 @@ if (typeof module !== "undefined" && module.exports) {
   }
 
   /**
-   * @param {number} code_point The |code_point| to search for in the shift_jis
+   * @param {number} code_point The |code_point| to search for in the Shift_JIS
    *     index.
    * @return {?number} The code point corresponding to |pointer| in |index|,
-   *     or null if |code point| is not in the shift_jis index.
+   *     or null if |code point| is not in the Shift_JIS index.
    */
   function indexShiftJISPointerFor(code_point) {
-    // 1. Let index be index jis0208 excluding all pointers in the
-    // range 8272 to 8835.
-    var pointer = indexPointerFor(code_point, index('jis0208'));
-    if (pointer === null || inRange(pointer, 8272, 8835))
-      return null;
+    // 1. Let index be index jis0208 excluding all entries whose
+    // pointer is in the range 8272 to 8835, inclusive.
+    shift_jis_index = shift_jis_index ||
+      index('jis0208').map(function(code_point, pointer) {
+        return inRange(pointer, 8272, 8835) ? null : code_point;
+      });
+    var index_ = shift_jis_index;
 
     // 2. Return the index pointer for code point in index.
-    return pointer;
+    return index_.indexOf(code_point);
   }
+  var shift_jis_index;
 
   /**
    * @param {number} code_point The |code_point| to search for in the big5
@@ -942,9 +945,12 @@ if (typeof module !== "undefined" && module.exports) {
    *     or null if |code point| is not in the big5 index.
    */
   function indexBig5PointerFor(code_point) {
-
-    // 1. Let index be index big5.
-    var index_ = index('big5');
+    // 1. Let index be index Big5 excluding all entries whose pointer
+    big5_index_no_hkscs = big5_index_no_hkscs ||
+      index('big5').map(function(code_point, pointer) {
+        return (pointer < (0xA1 - 0x81) * 157) ? null : code_point;
+      });
+    var index_ = big5_index_no_hkscs;
 
     // 2. If code point is U+2550, U+255E, U+2561, U+256A, U+5341, or
     // U+5345, return the last pointer corresponding to code point in
@@ -958,6 +964,7 @@ if (typeof module !== "undefined" && module.exports) {
     // 3. Return the index pointer for code point in index.
     return indexPointerFor(code_point, index_);
   }
+  var big5_index_no_hkscs;
 
   //
   // 8. API
@@ -1370,11 +1377,12 @@ if (typeof module !== "undefined" && module.exports) {
         }
 
         // 0xC2 to 0xDF
-        if (inRange(bite, 0xC2, 0xDF)) {
-          // Set utf-8 bytes needed to 1 and utf-8 code point to byte
-          // − 0xC0.
+        else if (inRange(bite, 0xC2, 0xDF)) {
+          // 1. Set utf-8 bytes needed to 1.
           utf8_bytes_needed = 1;
-          utf8_code_point = bite - 0xC0;
+
+          // 2. Set UTF-8 code point to byte & 0x1F.
+          utf8_code_point = bite & 0x1F;
         }
 
         // 0xE0 to 0xEF
@@ -1385,10 +1393,10 @@ if (typeof module !== "undefined" && module.exports) {
           // 2. If byte is 0xED, set utf-8 upper boundary to 0x9F.
           if (bite === 0xED)
             utf8_upper_boundary = 0x9F;
-          // 3. Set utf-8 bytes needed to 2 and utf-8 code point to
-          // byte − 0xE0.
+          // 3. Set utf-8 bytes needed to 2.
           utf8_bytes_needed = 2;
-          utf8_code_point = bite - 0xE0;
+          // 4. Set UTF-8 code point to byte & 0xF.
+          utf8_code_point = bite & 0xF;
         }
 
         // 0xF0 to 0xF4
@@ -1399,10 +1407,10 @@ if (typeof module !== "undefined" && module.exports) {
           // 2. If byte is 0xF4, set utf-8 upper boundary to 0x8F.
           if (bite === 0xF4)
             utf8_upper_boundary = 0x8F;
-          // 3. Set utf-8 bytes needed to 3 and utf-8 code point to
-          // byte − 0xF0.
+          // 3. Set utf-8 bytes needed to 3.
           utf8_bytes_needed = 3;
-          utf8_code_point = bite - 0xF0;
+          // 4. Set UTF-8 code point to byte & 0x7.
+          utf8_code_point = bite & 0x7;
         }
 
         // Otherwise
@@ -1411,10 +1419,7 @@ if (typeof module !== "undefined" && module.exports) {
           return decoderError(fatal);
         }
 
-        // Then (byte is in the range 0xC2 to 0xF4, inclusive) set
-        // utf-8 code point to utf-8 code point << (6 × utf-8 bytes
-        // needed) and return continue.
-        utf8_code_point = utf8_code_point << (6 * utf8_bytes_needed);
+        // Return continue.
         return null;
       }
 
@@ -1441,26 +1446,26 @@ if (typeof module !== "undefined" && module.exports) {
       utf8_lower_boundary = 0x80;
       utf8_upper_boundary = 0xBF;
 
-      // 6. Increase utf-8 bytes seen by one and set utf-8 code point
-      // to utf-8 code point + (byte − 0x80) << (6 × (utf-8 bytes
-      // needed − utf-8 bytes seen)).
+      // 6. Set UTF-8 code point to (UTF-8 code point << 6) | (byte &
+      // 0x3F)
+      utf8_code_point = (utf8_code_point << 6) | (bite & 0x3F);
+
+      // 7. Increase utf-8 bytes seen by one.
       utf8_bytes_seen += 1;
-      utf8_code_point += (bite - 0x80) << (6 * (utf8_bytes_needed -
-                                                utf8_bytes_seen));
 
-      // 7. If utf-8 bytes seen is not equal to utf-8 bytes needed,
+      // 8. If utf-8 bytes seen is not equal to utf-8 bytes needed,
       // continue.
       if (utf8_bytes_seen !== utf8_bytes_needed)
         return null;
 
-      // 8. Let code point be utf-8 code point.
+      // 9. Let code point be utf-8 code point.
       var code_point = utf8_code_point;
 
-      // 9. Set utf-8 code point, utf-8 bytes needed, and utf-8 bytes
+      // 10. Set utf-8 code point, utf-8 bytes needed, and utf-8 bytes
       // seen to 0.
       utf8_code_point = utf8_bytes_needed = utf8_bytes_seen = 0;
 
-      // 10. Return a code point whose value is code point.
+      // 11. Return a code point whose value is code point.
       return code_point;
     };
   }
@@ -1483,9 +1488,9 @@ if (typeof module !== "undefined" && module.exports) {
       if (code_point === end_of_stream)
         return finished;
 
-      // 2. If code point is in the range U+0000 to U+007F, return a
-      // byte whose value is code point.
-      if (inRange(code_point, 0x0000, 0x007f))
+      // 2. If code point is an ASCII code point, return a byte whose
+      // value is code point.
+      if (isASCIICodePoint(code_point))
         return code_point;
 
       // 3. Set count and offset based on the range code point is in:
@@ -1509,7 +1514,7 @@ if (typeof module !== "undefined" && module.exports) {
         offset = 0xF0;
       }
 
-      // 4.Let bytes be a byte sequence whose first byte is (code
+      // 4. Let bytes be a byte sequence whose first byte is (code
       // point >> (6 × count)) + offset.
       var bytes = [(code_point >> (6 * count)) + offset];
 
@@ -1706,14 +1711,14 @@ if (typeof module !== "undefined" && module.exports) {
       if (gb18030_third !== 0x00) {
         // 1. Let code point be null.
         code_point = null;
-        // 2. If byte is in the range 0x30 to 0x39, set code point to
-        // the index gb18030 ranges code point for (((gb18030 first −
-        // 0x81) × 10 + gb18030 second − 0x30) × 126 + gb18030 third −
-        // 0x81) × 10 + byte − 0x30.
+        // 2. If byte is in the range 0x30 to 0x39, inclusive, set
+        // code point to the index gb18030 ranges code point for
+        // (((gb18030 first − 0x81) × 10 + gb18030 second − 0x30) ×
+        // 126 + gb18030 third − 0x81) × 10 + byte − 0x30.
         if (inRange(bite, 0x30, 0x39)) {
           code_point = indexGB18030RangesCodePointFor(
-              (((gb18030_first - 0x81) * 10 + (gb18030_second - 0x30)) * 126 +
-               (gb18030_third - 0x81)) * 10 + bite - 0x30);
+              (((gb18030_first - 0x81) * 10 + gb18030_second - 0x30) * 126 +
+               gb18030_third - 0x81) * 10 + bite - 0x30);
         }
 
         // 3. Let buffer be a byte sequence consisting of gb18030
@@ -1740,8 +1745,8 @@ if (typeof module !== "undefined" && module.exports) {
       // 4. If gb18030 second is not 0x00, run these substeps:
       if (gb18030_second !== 0x00) {
 
-        // 1. If byte is in the range 0x81 to 0xFE, set gb18030 third
-        // to byte and return continue.
+        // 1. If byte is in the range 0x81 to 0xFE, inclusive, set
+        // gb18030 third to byte and return continue.
         if (inRange(bite, 0x81, 0xFE)) {
           gb18030_third = bite;
           return null;
@@ -1758,8 +1763,8 @@ if (typeof module !== "undefined" && module.exports) {
       // 5. If gb18030 first is not 0x00, run these substeps:
       if (gb18030_first !== 0x00) {
 
-        // 1. If byte is in the range 0x30 to 0x39, set gb18030 second
-        // to byte and return continue.
+        // 1. If byte is in the range 0x30 to 0x39, inclusive, set
+        // gb18030 second to byte and return continue.
         if (inRange(bite, 0x30, 0x39)) {
           gb18030_second = bite;
           return null;
@@ -1775,8 +1780,9 @@ if (typeof module !== "undefined" && module.exports) {
         // otherwise.
         var offset = bite < 0x7F ? 0x40 : 0x41;
 
-        // 4. If byte is in the range 0x40 to 0x7E or 0x80 to 0xFE,
-        // set pointer to (lead − 0x81) × 190 + (byte − offset).
+        // 4. If byte is in the range 0x40 to 0x7E, inclusive, or 0x80
+        // to 0xFE, inclusive, set pointer to (lead − 0x81) × 190 +
+        // (byte − offset).
         if (inRange(bite, 0x40, 0x7E) || inRange(bite, 0x80, 0xFE))
           pointer = (lead - 0x81) * 190 + (bite - offset);
 
@@ -1807,8 +1813,8 @@ if (typeof module !== "undefined" && module.exports) {
       if (bite === 0x80)
         return 0x20AC;
 
-      // 8. If byte is in the range 0x81 to 0xFE, set gb18030 first to
-      // byte and return continue.
+      // 8. If byte is in the range 0x81 to 0xFE, inclusive, set
+      // gb18030 first to byte and return continue.
       if (inRange(bite, 0x81, 0xFE)) {
         gb18030_first = bite;
         return null;
@@ -1922,9 +1928,9 @@ if (typeof module !== "undefined" && module.exports) {
   // 12. Legacy multi-byte Chinese (traditional) encodings
   //
 
-  // 12.1 big5
+  // 12.1 Big5
 
-  // 12.1.1 big5 decoder
+  // 12.1.1 Big5 decoder
   /**
    * @constructor
    * @implements {Decoder}
@@ -1932,8 +1938,8 @@ if (typeof module !== "undefined" && module.exports) {
    */
   function Big5Decoder(options) {
     var fatal = options.fatal;
-    // big5's decoder has an associated big5 lead (initially 0x00).
-    var /** @type {number} */ big5_lead = 0x00;
+    // Big5's decoder has an associated Big5 lead (initially 0x00).
+    var /** @type {number} */ Big5_lead = 0x00;
 
     /**
      * @param {Stream} stream The stream of bytes being decoded.
@@ -1943,32 +1949,33 @@ if (typeof module !== "undefined" && module.exports) {
      *     stream to decode a complete code point.
      */
     this.handler = function(stream, bite) {
-      // 1. If byte is end-of-stream and big5 lead is not 0x00, set
-      // big5 lead to 0x00 and return error.
-      if (bite === end_of_stream && big5_lead !== 0x00) {
-        big5_lead = 0x00;
+      // 1. If byte is end-of-stream and Big5 lead is not 0x00, set
+      // Big5 lead to 0x00 and return error.
+      if (bite === end_of_stream && Big5_lead !== 0x00) {
+        Big5_lead = 0x00;
         return decoderError(fatal);
       }
 
-      // 2. If byte is end-of-stream and big5 lead is 0x00, return
+      // 2. If byte is end-of-stream and Big5 lead is 0x00, return
       // finished.
-      if (bite === end_of_stream && big5_lead === 0x00)
+      if (bite === end_of_stream && Big5_lead === 0x00)
         return finished;
 
-      // 3. If big5 lead is not 0x00, let lead be big5 lead, let
-      // pointer be null, set big5 lead to 0x00, and then run these
+      // 3. If Big5 lead is not 0x00, let lead be Big5 lead, let
+      // pointer be null, set Big5 lead to 0x00, and then run these
       // substeps:
-      if (big5_lead !== 0x00) {
-        var lead = big5_lead;
+      if (Big5_lead !== 0x00) {
+        var lead = Big5_lead;
         var pointer = null;
-        big5_lead = 0x00;
+        Big5_lead = 0x00;
 
         // 1. Let offset be 0x40 if byte is less than 0x7F and 0x62
         // otherwise.
         var offset = bite < 0x7F ? 0x40 : 0x62;
 
-        // 2. If byte is in the range 0x40 to 0x7E or 0xA1 to 0xFE,
-        // set pointer to (lead − 0x81) × 157 + (byte − offset).
+        // 2. If byte is in the range 0x40 to 0x7E, inclusive, or 0xA1
+        // to 0xFE, inclusive, set pointer to (lead − 0x81) × 157 +
+        // (byte − offset).
         if (inRange(bite, 0x40, 0x7E) || inRange(bite, 0xA1, 0xFE))
           pointer = (lead - 0x81) * 157 + (bite - offset);
 
@@ -1989,7 +1996,7 @@ if (typeof module !== "undefined" && module.exports) {
         }
 
         // 4. Let code point be null if pointer is null and the index
-        // code point for pointer in index big5 otherwise.
+        // code point for pointer in index Big5 otherwise.
         var code_point = (pointer === null) ? null :
             indexCodePointFor(pointer, index('big5'));
 
@@ -2011,10 +2018,10 @@ if (typeof module !== "undefined" && module.exports) {
       if (isASCIIByte(bite))
         return bite;
 
-      // 5. If byte is in the range 0x81 to 0xFE, set big5 lead to
-      // byte and return continue.
+      // 5. If byte is in the range 0x81 to 0xFE, inclusive, set Big5
+      // lead to byte and return continue.
       if (inRange(bite, 0x81, 0xFE)) {
-        big5_lead = bite;
+        Big5_lead = bite;
         return null;
       }
 
@@ -2023,7 +2030,7 @@ if (typeof module !== "undefined" && module.exports) {
     };
   }
 
-  // 12.1.2 big5 encoder
+  // 12.1.2 Big5 encoder
   /**
    * @constructor
    * @implements {Encoder}
@@ -2046,7 +2053,7 @@ if (typeof module !== "undefined" && module.exports) {
       if (isASCIICodePoint(code_point))
         return code_point;
 
-      // 3. Let pointer be the index big5 pointer for code point.
+      // 3. Let pointer be the index Big5 pointer for code point.
       var pointer = indexBig5PointerFor(code_point);
 
       // 4. If pointer is null, return error with code point.
@@ -2123,16 +2130,16 @@ if (typeof module !== "undefined" && module.exports) {
         return finished;
 
       // 3. If euc-jp lead is 0x8E and byte is in the range 0xA1 to
-      // 0xDF, set euc-jp lead to 0x00 and return a code point whose
-      // value is 0xFF61 + byte − 0xA1.
+      // 0xDF, inclusive, set euc-jp lead to 0x00 and return a code
+      // point whose value is 0xFF61 − 0xA1 + byte.
       if (eucjp_lead === 0x8E && inRange(bite, 0xA1, 0xDF)) {
         eucjp_lead = 0x00;
-        return 0xFF61 + bite - 0xA1;
+        return 0xFF61 - 0xA1 + bite;
       }
 
       // 4. If euc-jp lead is 0x8F and byte is in the range 0xA1 to
-      // 0xFE, set the euc-jp jis0212 flag, set euc-jp lead to byte,
-      // and return continue.
+      // 0xFE, inclusive, set the euc-jp jis0212 flag, set euc-jp lead
+      // to byte, and return continue.
       if (eucjp_lead === 0x8F && inRange(bite, 0xA1, 0xFE)) {
         eucjp_jis0212_flag = true;
         eucjp_lead = bite;
@@ -2148,10 +2155,10 @@ if (typeof module !== "undefined" && module.exports) {
         // 1. Let code point be null.
         var code_point = null;
 
-        // 2. If lead and byte are both in the range 0xA1 to 0xFE, set
-        // code point to the index code point for (lead − 0xA1) × 94 +
-        // byte − 0xA1 in index jis0208 if the euc-jp jis0212 flag is
-        // unset and in index jis0212 otherwise.
+        // 2. If lead and byte are both in the range 0xA1 to 0xFE,
+        // inclusive, set code point to the index code point for (lead
+        // − 0xA1) × 94 + byte − 0xA1 in index jis0208 if the euc-jp
+        // jis0212 flag is unset and in index jis0212 otherwise.
         if (inRange(lead, 0xA1, 0xFE) && inRange(bite, 0xA1, 0xFE)) {
           code_point = indexCodePointFor(
             (lead - 0xA1) * 94 + (bite - 0xA1),
@@ -2161,8 +2168,8 @@ if (typeof module !== "undefined" && module.exports) {
         // 3. Unset the euc-jp jis0212 flag.
         eucjp_jis0212_flag = false;
 
-        // 4. If byte is not in the range 0xA1 to 0xFE, prepend byte
-        // to stream.
+        // 4. If byte is not in the range 0xA1 to 0xFE, inclusive,
+        // prepend byte to stream.
         if (!inRange(bite, 0xA1, 0xFE))
           stream.prepend(bite);
 
@@ -2179,8 +2186,8 @@ if (typeof module !== "undefined" && module.exports) {
       if (isASCIIByte(bite))
         return bite;
 
-      // 7. If byte is 0x8E, 0x8F, or in the range 0xA1 to 0xFE, set
-      // euc-jp lead to byte and return continue.
+      // 7. If byte is 0x8E, 0x8F, or in the range 0xA1 to 0xFE,
+      // inclusive, set euc-jp lead to byte and return continue.
       if (bite === 0x8E || bite === 0x8F || inRange(bite, 0xA1, 0xFE)) {
         eucjp_lead = bite;
         return null;
@@ -2222,8 +2229,9 @@ if (typeof module !== "undefined" && module.exports) {
       if (code_point === 0x203E)
         return 0x7E;
 
-      // 5. If code point is in the range U+FF61 to U+FF9F, return two
-      // bytes whose values are 0x8E and code point − 0xFF61 + 0xA1.
+      // 5. If code point is in the range U+FF61 to U+FF9F, inclusive,
+      // return two bytes whose values are 0x8E and code point −
+      // 0xFF61 + 0xA1.
       if (inRange(code_point, 0xFF61, 0xFF9F))
         return [0x8E, code_point - 0xFF61 + 0xA1];
 
@@ -2393,9 +2401,9 @@ if (typeof module !== "undefined" && module.exports) {
         // 0x21 to 0x5F
         if (inRange(bite, 0x21, 0x5F)) {
           // Unset the iso-2022-jp output flag and return a code point
-          // whose value is 0xFF61 + byte − 0x21.
+          // whose value is 0xFF61 − 0x21 + byte.
           iso2022jp_output_flag = false;
-          return 0xFF61 + bite - 0x21;
+          return 0xFF61 - 0x21 + bite;
         }
 
         // end-of-stream
@@ -2710,9 +2718,9 @@ if (typeof module !== "undefined" && module.exports) {
     return new ISO2022JPDecoder(options);
   };
 
-  // 13.3 shift_jis
+  // 13.3 Shift_JIS
 
-  // 13.3.1 shift_jis decoder
+  // 13.3.1 Shift_JIS decoder
   /**
    * @constructor
    * @implements {Decoder}
@@ -2720,9 +2728,9 @@ if (typeof module !== "undefined" && module.exports) {
    */
   function ShiftJISDecoder(options) {
     var fatal = options.fatal;
-    // shift_jis's decoder has an associated shift_jis lead (initially
+    // Shift_JIS's decoder has an associated Shift_JIS lead (initially
     // 0x00).
-    var /** @type {number} */ shiftjis_lead = 0x00;
+    var /** @type {number} */ Shift_JIS_lead = 0x00;
     /**
      * @param {Stream} stream The stream of bytes being decoded.
      * @param {number} bite The next byte read from the stream.
@@ -2731,25 +2739,25 @@ if (typeof module !== "undefined" && module.exports) {
      *     stream to decode a complete code point.
      */
     this.handler = function(stream, bite) {
-      // 1. If byte is end-of-stream and shift_jis lead is not 0x00,
-      // set shift_jis lead to 0x00 and return error.
-      if (bite === end_of_stream && shiftjis_lead !== 0x00) {
-        shiftjis_lead = 0x00;
+      // 1. If byte is end-of-stream and Shift_JIS lead is not 0x00,
+      // set Shift_JIS lead to 0x00 and return error.
+      if (bite === end_of_stream && Shift_JIS_lead !== 0x00) {
+        Shift_JIS_lead = 0x00;
         return decoderError(fatal);
       }
 
-      // 2. If byte is end-of-stream and shift_jis lead is 0x00,
+      // 2. If byte is end-of-stream and Shift_JIS lead is 0x00,
       // return finished.
-      if (bite === end_of_stream && shiftjis_lead === 0x00)
+      if (bite === end_of_stream && Shift_JIS_lead === 0x00)
         return finished;
 
-      // 3. If shift_jis lead is not 0x00, let lead be shift_jis lead,
-      // let pointer be null, set shift_jis lead to 0x00, and then run
+      // 3. If Shift_JIS lead is not 0x00, let lead be Shift_JIS lead,
+      // let pointer be null, set Shift_JIS lead to 0x00, and then run
       // these substeps:
-      if (shiftjis_lead !== 0x00) {
-        var lead = shiftjis_lead;
+      if (Shift_JIS_lead !== 0x00) {
+        var lead = Shift_JIS_lead;
         var pointer = null;
-        shiftjis_lead = 0x00;
+        Shift_JIS_lead = 0x00;
 
         // 1. Let offset be 0x40, if byte is less than 0x7F, and 0x41
         // otherwise.
@@ -2759,23 +2767,22 @@ if (typeof module !== "undefined" && module.exports) {
         // 0xC1 otherwise.
         var lead_offset = (lead < 0xA0) ? 0x81 : 0xC1;
 
-        // 3. If byte is in the range 0x40 to 0x7E or 0x80 to 0xFC,
-        // set pointer to (lead − lead offset) × 188 + byte − offset.
+        // 3. If byte is in the range 0x40 to 0x7E, inclusive, or 0x80
+        // to 0xFC, inclusive, set pointer to (lead − lead offset) ×
+        // 188 + byte − offset.
         if (inRange(bite, 0x40, 0x7E) || inRange(bite, 0x80, 0xFC))
           pointer = (lead - lead_offset) * 188 + bite - offset;
 
-        // 4. Let code point be null, if pointer is null, and the
+        // 4. If pointer is in the range 8836 to 10715, inclusive,
+        // return a code point whose value is 0xE000 − 8836 + pointer.
+        if (inRange(pointer, 8836, 10715))
+          return 0xE000 - 8836 + pointer;
+
+        // 5. Let code point be null, if pointer is null, and the
         // index code point for pointer in index jis0208 otherwise.
         var code_point = (pointer === null) ? null :
               indexCodePointFor(pointer, index('jis0208'));
 
-        // 5. If code point is null and pointer is in the range 8836
-        // to 10528, return a code point whose value is 0xE000 +
-        // pointer − 8836.
-        if (code_point === null && pointer !== null &&
-            inRange(pointer, 8836, 10528))
-          return 0xE000 + pointer - 8836;
-
         // 6. If code point is null and byte is an ASCII byte, prepend
         // byte to stream.
         if (code_point === null && isASCIIByte(bite))
@@ -2794,15 +2801,16 @@ if (typeof module !== "undefined" && module.exports) {
       if (isASCIIByte(bite) || bite === 0x80)
         return bite;
 
-      // 5. If byte is in the range 0xA1 to 0xDF, return a code point
-      // whose value is 0xFF61 + byte − 0xA1.
+      // 5. If byte is in the range 0xA1 to 0xDF, inclusive, return a
+      // code point whose value is 0xFF61 − 0xA1 + byte.
       if (inRange(bite, 0xA1, 0xDF))
-        return 0xFF61 + bite - 0xA1;
+        return 0xFF61 - 0xA1 + bite;
 
-      // 6. If byte is in the range 0x81 to 0x9F or 0xE0 to 0xFC, set
-      // shift_jis lead to byte and return continue.
+      // 6. If byte is in the range 0x81 to 0x9F, inclusive, or 0xE0
+      // to 0xFC, inclusive, set Shift_JIS lead to byte and return
+      // continue.
       if (inRange(bite, 0x81, 0x9F) || inRange(bite, 0xE0, 0xFC)) {
-        shiftjis_lead = bite;
+        Shift_JIS_lead = bite;
         return null;
       }
 
@@ -2811,7 +2819,7 @@ if (typeof module !== "undefined" && module.exports) {
     };
   }
 
-  // 13.3.2 shift_jis encoder
+  // 13.3.2 Shift_JIS encoder
   /**
    * @constructor
    * @implements {Encoder}
@@ -2842,8 +2850,8 @@ if (typeof module !== "undefined" && module.exports) {
       if (code_point === 0x203E)
         return 0x7E;
 
-      // 5. If code point is in the range U+FF61 to U+FF9F, return a
-      // byte whose value is code point − 0xFF61 + 0xA1.
+      // 5. If code point is in the range U+FF61 to U+FF9F, inclusive,
+      // return a byte whose value is code point − 0xFF61 + 0xA1.
       if (inRange(code_point, 0xFF61, 0xFF9F))
         return code_point - 0xFF61 + 0xA1;
 
@@ -2851,7 +2859,7 @@ if (typeof module !== "undefined" && module.exports) {
       if (code_point === 0x2212)
         code_point = 0xFF0D;
 
-      // 7. Let pointer be the index shift_jis pointer for code point.
+      // 7. Let pointer be the index Shift_JIS pointer for code point.
       var pointer = indexShiftJISPointerFor(code_point);
 
       // 8. If pointer is null, return error with code point.
@@ -2932,8 +2940,8 @@ if (typeof module !== "undefined" && module.exports) {
         var pointer = null;
         euckr_lead = 0x00;
 
-        // 1. If byte is in the range 0x41 to 0xFE, set pointer to
-        // (lead − 0x81) × 190 + (byte − 0x41).
+        // 1. If byte is in the range 0x41 to 0xFE, inclusive, set
+        // pointer to (lead − 0x81) × 190 + (byte − 0x41).
         if (inRange(bite, 0x41, 0xFE))
           pointer = (lead - 0x81) * 190 + (bite - 0x41);
 
@@ -2960,8 +2968,8 @@ if (typeof module !== "undefined" && module.exports) {
       if (isASCIIByte(bite))
         return bite;
 
-      // 5. If byte is in the range 0x81 to 0xFE, set euc-kr lead to
-      // byte and return continue.
+      // 5. If byte is in the range 0x81 to 0xFE, inclusive, set
+      // euc-kr lead to byte and return continue.
       if (inRange(bite, 0x81, 0xFE)) {
         euckr_lead = bite;
         return null;
@@ -3116,9 +3124,9 @@ if (typeof module !== "undefined" && module.exports) {
         var lead_surrogate = utf16_lead_surrogate;
         utf16_lead_surrogate = null;
 
-        // 1. If code unit is in the range U+DC00 to U+DFFF, return a
-        // code point whose value is 0x10000 + ((lead surrogate −
-        // 0xD800) << 10) + (code unit − 0xDC00).
+        // 1. If code unit is in the range U+DC00 to U+DFFF,
+        // inclusive, return a code point whose value is 0x10000 +
+        // ((lead surrogate − 0xD800) << 10) + (code unit − 0xDC00).
         if (inRange(code_unit, 0xDC00, 0xDFFF)) {
           return 0x10000 + (lead_surrogate - 0xD800) * 0x400 +
               (code_unit - 0xDC00);
@@ -3131,15 +3139,15 @@ if (typeof module !== "undefined" && module.exports) {
         return decoderError(fatal);
       }
 
-      // 6. If code unit is in the range U+D800 to U+DBFF, set utf-16
-      // lead surrogate to code unit and return continue.
+      // 6. If code unit is in the range U+D800 to U+DBFF, inclusive,
+      // set utf-16 lead surrogate to code unit and return continue.
       if (inRange(code_unit, 0xD800, 0xDBFF)) {
         utf16_lead_surrogate = code_unit;
         return null;
       }
 
-      // 7. If code unit is in the range U+DC00 to U+DFFF, return
-      // error.
+      // 7. If code unit is in the range U+DC00 to U+DFFF, inclusive,
+      // return error.
       if (inRange(code_unit, 0xDC00, 0xDFFF))
         return decoderError(fatal);
 
@@ -3167,9 +3175,9 @@ if (typeof module !== "undefined" && module.exports) {
       if (code_point === end_of_stream)
         return finished;
 
-      // 2. If code point is in the range U+0000 to U+FFFF, return the
-      // sequence resulting of converting code point to bytes using
-      // utf-16be encoder flag.
+      // 2. If code point is in the range U+0000 to U+FFFF, inclusive,
+      // return the sequence resulting of converting code point to
+      // bytes using utf-16be encoder flag.
       if (inRange(code_point, 0x0000, 0xFFFF))
         return convertCodeUnitToBytes(code_point, utf16_be);
 
@@ -3267,8 +3275,8 @@ if (typeof module !== "undefined" && module.exports) {
       if (isASCIICodePoint(code_point))
         return code_point;
 
-      // 3. If code point is in the range U+F780 to U+F7FF, return a
-      // byte whose value is code point − 0xF780 + 0x80.
+      // 3. If code point is in the range U+F780 to U+F7FF, inclusive,
+      // return a byte whose value is code point − 0xF780 + 0x80.
       if (inRange(code_point, 0xF780, 0xF7FF))
         return code_point - 0xF780 + 0x80;
 
diff --git a/package.json b/package.json
index d7a21cc..d46c443 100644
--- a/package.json
+++ b/package.json
@@ -12,7 +12,7 @@
     "Pierre Queinnec <pierre at queinnec.org>",
     "Zack Weinberg <zackw at panix.com>"
   ],
-  "version": "0.6.0",
+  "version": "0.6.1",
   "description": "Polyfill for the Encoding Living Standard's API.",
   "main": "index.js",
   "files": [
diff --git a/test/test-gb18030.js b/test/test-gb18030.js
new file mode 100644
index 0000000..e1184d0
--- /dev/null
+++ b/test/test-gb18030.js
@@ -0,0 +1,13 @@
+// This is free and unencumbered software released into the public domain.
+// See LICENSE.md for more information.
+
+test(function() {
+  var cases = [
+    {bytes: [148, 57, 218, 51], string: '\uD83D\uDCA9' } // U+1F4A9 PILE OF POO
+  ];
+
+  cases.forEach(function(c) {
+    assert_equals(new TextDecoder('gb18030').decode(new Uint8Array(c.bytes)),
+                  c.string);
+  });
+}, 'gb18030 ranges');
diff --git a/test/tests.html b/test/tests.html
index e964ecb..1d0b219 100644
--- a/test/tests.html
+++ b/test/tests.html
@@ -18,11 +18,10 @@ self.TextDecoder = null;
 <script src="test-utf.js"></script>
 <!-- TODO: test for all single-byte encoding indexes -->
 
-<!-- TODO: gb18030 -->
-
 <script src="test-big5.js"></script>
 <script src="test-euc-jp.js"></script>
 <script src="test-iso-2022-jp.js"></script>
 <script src="test-shift_jis.js"></script>
 <script src="test-euc-kr.js"></script>
+<script src="test-gb18030.js"></script>
 <script src="test-x-user-defined.js"></script>

-- 
Alioth's /usr/local/bin/git-commit-notice on /srv/git.debian.org/git/pkg-javascript/node-text-encoding.git



More information about the Pkg-javascript-commits mailing list