[Git][java-team/jcodings][debian/sid] 40 commits: [maven-release-plugin] prepare for next development iteration
Hideki Yamane
gitlab at salsa.debian.org
Mon Sep 17 10:31:11 BST 2018
Hideki Yamane pushed to branch debian/sid at Debian Java Maintainers / jcodings
Commits:
1f71e160 by Thomas E. Enebo at 2018-04-16T21:10:16Z
[maven-release-plugin] prepare for next development iteration
- - - - -
d687283a by lopex at 2018-04-18T08:44:12Z
fix whitespace
- - - - -
82ff7a08 by lopex at 2018-04-18T09:18:24Z
dont use deprecated apis
- - - - -
2ea61407 by lopex at 2018-04-18T09:47:06Z
calculate max property length at generation time
- - - - -
b86ac8c9 by lopex at 2018-04-18T11:56:28Z
better naming
- - - - -
8cb55508 by Marcin Mielzynski at 2018-04-18T20:46:27Z
fix ISO8859 case fold map and make test_iso_8859 clean
- - - - -
e5eb18c0 by Marcin Mielzynski at 2018-04-18T21:08:11Z
realign ISO8859_3 case map
- - - - -
6fde8c5e by Marcin Mielzynski at 2018-04-18T21:15:37Z
add iso8859-4 caseMap
- - - - -
c239f92a by Marcin Mielzynski at 2018-04-18T21:27:30Z
add iso8859-5 caseMap
- - - - -
6f276487 by Marcin Mielzynski at 2018-04-18T21:34:23Z
add iso8859-7 caseMap
- - - - -
e7f99c85 by Marcin Mielzynski at 2018-04-18T21:44:02Z
add iso8859-9 caseMap
- - - - -
0fafb1f2 by Marcin Mielzynski at 2018-04-18T21:48:32Z
add iso8859-10 caseMap
- - - - -
89650fb7 by Marcin Mielzynski at 2018-04-18T21:52:00Z
add iso8859-13 caseMap
- - - - -
4136a057 by Marcin Mielzynski at 2018-04-18T21:55:28Z
add iso8859-14 caseMap
- - - - -
5c3daf05 by Marcin Mielzynski at 2018-04-18T21:59:13Z
add iso8859-15 caseMap
- - - - -
757f7bb1 by Marcin Mielzynski at 2018-04-18T22:01:31Z
add iso8859-16 caseMap
- - - - -
5c6a4d27 by Marcin Mielzynski at 2018-04-18T22:15:55Z
default to specialized singleByteAsciiOnlyCaseMap for all remaining SingleByteEncoding(s)
- - - - -
0149fb1d by lopex at 2018-04-19T08:19:59Z
add windows-1250 caseMap
- - - - -
e296af2e by lopex at 2018-04-19T08:21:59Z
add windows-1251 caseMap
- - - - -
bf2a9415 by lopex at 2018-04-19T08:24:14Z
add windows-1252 caseMap
- - - - -
43e941ab by lopex at 2018-04-19T08:28:44Z
add windows-1253 caseMap
- - - - -
e6989a07 by lopex at 2018-04-19T08:33:34Z
add windows-1254 caseMap
- - - - -
3b27707a by lopex at 2018-04-19T08:36:53Z
add windows-1257 caseMap
- - - - -
588eedf2 by lopex at 2018-04-20T12:07:21Z
move multibyte caseMap to MultiByteEncoding
- - - - -
695e9647 by lopex at 2018-04-20T12:15:09Z
remove indirection for base trans table in BaseBIG5Encoding
- - - - -
8914725b by Thomas E. Enebo at 2018-08-13T15:26:38Z
[maven-release-plugin] prepare release jcodings-1.0.31
- - - - -
a71866ed by Thomas E. Enebo at 2018-08-13T15:26:43Z
[maven-release-plugin] prepare for next development iteration
- - - - -
fee64cd8 by Marcin Mielzynski at 2018-08-13T18:51:29Z
use Encoding.NEW_LINE
- - - - -
bdea837a by Marcin Mielzynski at 2018-08-22T21:08:54Z
Fixes #24
- - - - -
090e1984 by Thomas E. Enebo at 2018-09-06T16:14:07Z
[maven-release-plugin] prepare release jcodings-1.0.32
- - - - -
98e852d8 by Hideki Yamane at 2018-09-17T09:08:01Z
Merge tag 'jcodings-1.0.31' into debian/sid
[maven-release-plugin] copy for tag jcodings-1.0.31
- - - - -
c403e6c4 by Hideki Yamane at 2018-09-17T09:08:25Z
New upstream release
- - - - -
db7ae994 by Hideki Yamane at 2018-09-17T09:12:05Z
Merge tag 'jcodings-1.0.32' into debian/sid
[maven-release-plugin] copy for tag jcodings-1.0.32
- - - - -
d1054806 by Hideki Yamane at 2018-09-17T09:12:19Z
New upstream release
- - - - -
6c53b974 by Hideki Yamane at 2018-09-17T09:16:32Z
drop get-orig-source
- - - - -
2d5e6f16 by Hideki Yamane at 2018-09-17T09:17:17Z
set Standards-Version: 4.2.1
- - - - -
71f743cf by Hideki Yamane at 2018-09-17T09:18:45Z
convert from cdbs to dh
- - - - -
fe387767 by Hideki Yamane at 2018-09-17T09:22:08Z
dh11
- - - - -
8418d9fb by Hideki Yamane at 2018-09-17T09:23:54Z
use pkg-java-maintainers at alioth-lists.debian.net for address
- - - - -
e7f33ac8 by Hideki Yamane at 2018-09-17T09:24:06Z
upload to unstable
- - - - -
30 changed files:
- debian/changelog
- debian/compat
- debian/control
- debian/rules
- pom.xml
- scripts/UnicodePropertiesTemplate.java
- scripts/generate.rb
- src/org/jcodings/AbstractEncoding.java
- src/org/jcodings/MultiByteEncoding.java
- src/org/jcodings/SingleByteEncoding.java
- src/org/jcodings/specific/BaseBIG5Encoding.java
- src/org/jcodings/specific/ISO8859_10Encoding.java
- src/org/jcodings/specific/ISO8859_13Encoding.java
- src/org/jcodings/specific/ISO8859_14Encoding.java
- src/org/jcodings/specific/ISO8859_15Encoding.java
- src/org/jcodings/specific/ISO8859_16Encoding.java
- src/org/jcodings/specific/ISO8859_3Encoding.java
- src/org/jcodings/specific/ISO8859_4Encoding.java
- src/org/jcodings/specific/ISO8859_5Encoding.java
- src/org/jcodings/specific/ISO8859_7Encoding.java
- src/org/jcodings/specific/ISO8859_9Encoding.java
- src/org/jcodings/specific/Windows_1250Encoding.java
- src/org/jcodings/specific/Windows_1251Encoding.java
- src/org/jcodings/specific/Windows_1252Encoding.java
- src/org/jcodings/specific/Windows_1253Encoding.java
- src/org/jcodings/specific/Windows_1254Encoding.java
- src/org/jcodings/specific/Windows_1257Encoding.java
- src/org/jcodings/transcode/TranscodeFunctions.java
- src/org/jcodings/unicode/UnicodeEncoding.java
- src/org/jcodings/unicode/UnicodeProperties.java
Changes:
=====================================
debian/changelog
=====================================
@@ -1,3 +1,19 @@
+jcodings (1.0.32-1) unstable; urgency=medium
+
+ * New upstream release
+ * debian/rules
+ - drop unnecessary get-orig-source target
+ - convert build system from cdbs to dh
+ * debian/control
+ - set Standards-Version: 4.2.1 without changes
+ - drop Build-Depends: cdbs
+ - set Build-Depends: debhelper (>= 11)
+ - use maintainer address: pkg-java-maintainers at alioth-lists.debian.net
+ * debian/compat
+ - set 11
+
+ -- Hideki Yamane <henrich at debian.org> Mon, 17 Sep 2018 18:24:04 +0900
+
jcodings (1.0.30-2) unstable; urgency=medium
* Update Vcs-* to use salsa.debian.org
=====================================
debian/compat
=====================================
@@ -1 +1 @@
-10
+11
=====================================
debian/control
=====================================
@@ -1,11 +1,11 @@
Source: jcodings
Section: java
Priority: optional
-Maintainer: Debian Java Maintainers <pkg-java-maintainers at lists.alioth.debian.org>
+Maintainer: Debian Java Maintainers <pkg-java-maintainers at alioth-lists.debian.net>
Uploaders: Torsten Werner <twerner at debian.org>, Hideki Yamane <henrich at debian.org>
-Build-Depends: default-jdk, debhelper (>= 10), cdbs, maven-debian-helper
+Build-Depends: default-jdk, debhelper (>= 11), maven-debian-helper
Build-Depends-Indep: junit4 (>= 4.10)
-Standards-Version: 4.1.4
+Standards-Version: 4.2.1
Vcs-Git: https://salsa.debian.org/java-team/jcodings.git
Vcs-Browser: https://salsa.debian.org/java-team/jcodings
Homepage: https://github.com/jruby/jcodings
=====================================
debian/rules
=====================================
@@ -1,9 +1,4 @@
#!/usr/bin/make -f
-include /usr/share/cdbs/1/rules/debhelper.mk
-include /usr/share/cdbs/1/class/maven.mk
-
-JAVA_HOME := /usr/lib/jvm/default-java
-
-get-orig-source:
- uscan --download-version $(DEB_UPSTREAM_VERSION) --force-download --rename
+%:
+ dh $@ --buildsystem=maven
=====================================
pom.xml
=====================================
@@ -3,7 +3,7 @@
<modelVersion>4.0.0</modelVersion>
<groupId>org.jruby.jcodings</groupId>
<artifactId>jcodings</artifactId>
- <version>1.0.30</version>
+ <version>1.0.32</version>
<name>JCodings</name>
<description>
Byte based encoding support library for java
=====================================
scripts/UnicodePropertiesTemplate.java
=====================================
@@ -25,4 +25,6 @@ public class UnicodeProperties {
static final CodeRangeEntry[]CodeRangeTable = new CodeRangeEntry[] {
%{extcrs}
};
+
+ static final int MAX_WORD_LENGTH = %{max_length};
}
=====================================
scripts/generate.rb
=====================================
@@ -180,9 +180,10 @@ def generate_coderange_list
name = "#{$1}=#{$2}" if name =~ /(graphemeclusterbreak)(.*)/i
([name] + aliases[name].to_a).map{|n|[n, range]}
end.flatten(1)
+ max_length = out.max_by{|name, table|name.length}.first.length.to_s
open("#{SRC_DIR}/unicode/UnicodeProperties.java", "wb") do |f| f <<
- open("UnicodePropertiesTemplate.java", "rb").read.sub(/%\{extcrs\}/, out.map{|name, table| "#{INDENT * 2}" + "new CodeRangeEntry(\"#{name}\", \"CR_#{table}\")"}.join(",\n"))
+ open("UnicodePropertiesTemplate.java", "rb").read.sub(/%\{max_length\}/, max_length).sub(/%\{extcrs\}/, out.map{|name, table| "#{INDENT * 2}" + "new CodeRangeEntry(\"#{name}\", \"CR_#{table}\")"}.join(",\n"))
end
end
=====================================
src/org/jcodings/AbstractEncoding.java
=====================================
@@ -50,7 +50,7 @@ abstract class AbstractEncoding extends Encoding {
*/
@Override
public boolean isNewLine(byte[]bytes, int p, int end) {
- return p < end ? bytes[p] == (byte)0x0a : false;
+ return p < end ? bytes[p] == Encoding.NEW_LINE : false;
}
protected final int asciiMbcCaseFold(int flag, byte[]bytes, IntHolder pp, int end, byte[]lower) {
@@ -134,11 +134,28 @@ abstract class AbstractEncoding extends Encoding {
return toP - toStart;
}
- @Override
- public int caseMap(IntHolder flagP, byte[] bytes, IntHolder pp, int end, byte[] to, int toP, int toEnd) {
- return asciiOnlyCaseMap(flagP, bytes, pp, end, to, toP, toEnd);
- }
+ int singleByteAsciiOnlyCaseMap(IntHolder flagP, byte[]bytes, IntHolder pp, int end, byte[]to, int toP, int toEnd) {
+ int toStart = toP;
+ int flags = flagP.value;
+
+ while (pp.value < end && toP < toEnd) {
+ int code = bytes[pp.value++] & 0xff;
+ if (code >= 'a' && code <= 'z' && ((flags & Config.CASE_UPCASE) != 0)) {
+ flags |= Config.CASE_MODIFIED;
+ code += 'A' - 'a';
+ } else if (code >= 'A' && code <= 'Z' && ((flags & (Config.CASE_DOWNCASE | Config.CASE_FOLD)) != 0)) {
+ flags |= Config.CASE_MODIFIED;
+ code += 'a' - 'A';
+ }
+ to[toP++] = (byte)code;
+ if ((flags & Config.CASE_TITLECASE) != 0) {
+ flags ^= (Config.CASE_UPCASE | Config.CASE_DOWNCASE | Config.CASE_TITLECASE);
+ }
+ }
+ flagP.value = flags;
+ return toP - toStart;
+ }
/** onigenc_minimum_property_name_to_ctype
* notably overridden by unicode encodings
@@ -147,6 +164,6 @@ abstract class AbstractEncoding extends Encoding {
public int propertyNameToCType(byte[]bytes, int p, int end) {
Integer ctype = PosixBracket.PBSTableUpper.get(bytes, p, end);
if (ctype != null) return ctype;
- throw new CharacterPropertyException(EncodingError.ERR_INVALID_CHAR_PROPERTY_NAME, new String(bytes, p, end - p));
+ throw new CharacterPropertyException(EncodingError.ERR_INVALID_CHAR_PROPERTY_NAME, bytes, p, end - p);
}
}
=====================================
src/org/jcodings/MultiByteEncoding.java
=====================================
@@ -122,6 +122,11 @@ public abstract class MultiByteEncoding extends AbstractEncoding {
return n;
}
+ @Override
+ public int caseMap(IntHolder flagP, byte[] bytes, IntHolder pp, int end, byte[] to, int toP, int toEnd) {
+ return asciiOnlyCaseMap(flagP, bytes, pp, end, to, toP, toEnd);
+ }
+
protected final int mbnMbcCaseFold(int flag, byte[]bytes, IntHolder pp, int end, byte[]lower) {
int p = pp.value;
int lowerP = 0;
=====================================
src/org/jcodings/SingleByteEncoding.java
=====================================
@@ -54,6 +54,11 @@ public abstract class SingleByteEncoding extends AbstractEncoding {
}
// onigenc_is_mbc_newline_0x0a here
+ @Override
+ public int caseMap(IntHolder flagP, byte[] bytes, IntHolder pp, int end, byte[] to, int toP, int toEnd) {
+ return singleByteAsciiOnlyCaseMap(flagP, bytes, pp, end, to, toP, toEnd);
+ }
+
/** onigenc_single_byte_mbc_to_code
*/
@Override
=====================================
src/org/jcodings/specific/BaseBIG5Encoding.java
=====================================
@@ -24,18 +24,17 @@ import org.jcodings.IntHolder;
import org.jcodings.ascii.AsciiTables;
public abstract class BaseBIG5Encoding extends CanBeTrailTableEncoding {
-
- private final int transIndex;
+ private final int[]TransBase;
protected BaseBIG5Encoding(String name, int[]EncLen, int transIndex) {
super(name, 1, 2, EncLen, BIG5Trans, AsciiTables.AsciiCtypeTable, BIG5_CAN_BE_TRAIL_TABLE);
- this.transIndex = transIndex;
+ TransBase = Trans[transIndex];
}
@Override
public int length(byte[]bytes, int p, int end) {
int b = bytes[p++] & 0xff;
- int s = Trans[transIndex][b];
+ int s = TransBase[b];
if (s < 0) return s == A ? 1 : CHAR_INVALID;
if (p == end) return missing(EncLen[b] - 1);
s = Trans[s][bytes[p] & 0xff];
=====================================
src/org/jcodings/specific/ISO8859_10Encoding.java
=====================================
@@ -19,7 +19,10 @@
*/
package org.jcodings.specific;
+import org.jcodings.Config;
import org.jcodings.ISOEncoding;
+import org.jcodings.IntHolder;
+import org.jcodings.constants.CharacterType;
public final class ISO8859_10Encoding extends ISOEncoding {
@@ -27,6 +30,44 @@ public final class ISO8859_10Encoding extends ISOEncoding {
super("ISO-8859-10", ISO8859_10CtypeTable, ISO8859_10ToLowerCaseTable, ISO8859_10CaseFoldMap);
}
+ @Override
+ public int caseMap(IntHolder flagP, byte[] bytes, IntHolder pp, int end, byte[] to, int toP, int toEnd) {
+ int toStart = toP;
+ int flags = flagP.value;
+
+ while (pp.value < end && toP < toEnd) {
+ int code = bytes[pp.value++] & 0xff;
+ if (code == SHARP_s) {
+ if ((flags & Config.CASE_UPCASE) != 0) {
+ flags |= Config.CASE_MODIFIED;
+ to[toP++] = 'S';
+ code = (flags & Config.CASE_TITLECASE) != 0 ? 's' : 'S';
+ } else if ((flags & Config.CASE_FOLD) != 0) {
+ flags |= Config.CASE_MODIFIED;
+ to[toP++] = 's';
+ code = 's';
+ }
+ } else if (code == 0xBD || code == 0xFF) {
+ } else if ((ISO8859_10CtypeTable[code] & CharacterType.BIT_UPPER) != 0 && (flags & (Config.CASE_DOWNCASE | Config.CASE_FOLD)) != 0) {
+ flags |= Config.CASE_MODIFIED;
+ code = LowerCaseTable[code];
+ } else if ((ISO8859_10CtypeTable[code] & CharacterType.BIT_LOWER) != 0 && (flags & Config.CASE_UPCASE) != 0) {
+ flags |= Config.CASE_MODIFIED;
+ if (code >= 0xA0 && code <= 0xBF) {
+ code -= 0x10;
+ } else {
+ code -= 0x20;
+ }
+ }
+ to[toP++] = (byte)code;
+ if ((flags & Config.CASE_TITLECASE) != 0) {
+ flags ^= (Config.CASE_UPCASE | Config.CASE_DOWNCASE | Config.CASE_TITLECASE);
+ }
+ }
+ flagP.value = flags;
+ return toP - toStart;
+ }
+
static final short ISO8859_10CtypeTable[] = {
0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008,
0x4008, 0x420c, 0x4209, 0x4208, 0x4208, 0x4208, 0x4008, 0x4008,
=====================================
src/org/jcodings/specific/ISO8859_13Encoding.java
=====================================
@@ -19,7 +19,10 @@
*/
package org.jcodings.specific;
+import org.jcodings.Config;
import org.jcodings.ISOEncoding;
+import org.jcodings.IntHolder;
+import org.jcodings.constants.CharacterType;
public final class ISO8859_13Encoding extends ISOEncoding {
@@ -27,6 +30,45 @@ public final class ISO8859_13Encoding extends ISOEncoding {
super("ISO-8859-13", ISO8859_13CtypeTable, ISO8859_13ToLowerCaseTable, ISO8859_13CaseFoldMap);
}
+ @Override
+ public int caseMap(IntHolder flagP, byte[] bytes, IntHolder pp, int end, byte[] to, int toP, int toEnd) {
+ int toStart = toP;
+ int flags = flagP.value;
+
+ while (pp.value < end && toP < toEnd) {
+ int code = bytes[pp.value++] & 0xff;
+ if (code == SHARP_s) {
+ if ((flags & Config.CASE_UPCASE) != 0) {
+ flags |= Config.CASE_MODIFIED;
+ to[toP++] = 'S';
+ code = (flags & Config.CASE_TITLECASE) != 0 ? 's' : 'S';
+ } else if ((flags & Config.CASE_FOLD) != 0) {
+ flags |= Config.CASE_MODIFIED;
+ to[toP++] = 's';
+ code = 's';
+ }
+ } else if ((ISO8859_13CtypeTable[code] & CharacterType.BIT_UPPER) != 0 && (flags & (Config.CASE_DOWNCASE | Config.CASE_FOLD)) != 0) {
+ flags |= Config.CASE_MODIFIED;
+ code = LowerCaseTable[code];
+ } else if (code == 0xB5) {
+ } else if ((ISO8859_13CtypeTable[code] & CharacterType.BIT_LOWER) != 0 && (flags & Config.CASE_UPCASE) != 0) {
+ flags |= Config.CASE_MODIFIED;
+ if (code == 0xB8 || code == 0xBA || code == 0xBF) {
+ code -= 0x10;
+ } else {
+ code -= 0x20;
+ }
+ }
+ to[toP++] = (byte)code;
+ if ((flags & Config.CASE_TITLECASE) != 0) {
+ flags ^= (Config.CASE_UPCASE | Config.CASE_DOWNCASE | Config.CASE_TITLECASE);
+ }
+ }
+ flagP.value = flags;
+ return toP - toStart;
+ }
+
+
static final short ISO8859_13CtypeTable[] = {
0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008,
0x4008, 0x420c, 0x4209, 0x4208, 0x4208, 0x4208, 0x4008, 0x4008,
=====================================
src/org/jcodings/specific/ISO8859_14Encoding.java
=====================================
@@ -19,7 +19,10 @@
*/
package org.jcodings.specific;
+import org.jcodings.Config;
import org.jcodings.ISOEncoding;
+import org.jcodings.IntHolder;
+import org.jcodings.constants.CharacterType;
public final class ISO8859_14Encoding extends ISOEncoding {
@@ -27,6 +30,52 @@ public final class ISO8859_14Encoding extends ISOEncoding {
super("ISO-8859-14", ISO8859_14CtypeTable, ISO8859_14ToLowerCaseTable, ISO8859_14CaseFoldMap);
}
+ @Override
+ public int caseMap(IntHolder flagP, byte[] bytes, IntHolder pp, int end, byte[] to, int toP, int toEnd) {
+ int toStart = toP;
+ int flags = flagP.value;
+
+ while (pp.value < end && toP < toEnd) {
+ int code = bytes[pp.value++] & 0xff;
+ if (code == SHARP_s) {
+ if ((flags & Config.CASE_UPCASE) != 0) {
+ flags |= Config.CASE_MODIFIED;
+ to[toP++] = 'S';
+ code = (flags & Config.CASE_TITLECASE) != 0 ? 's' : 'S';
+ } else if ((flags & Config.CASE_FOLD) != 0) {
+ flags |= Config.CASE_MODIFIED;
+ to[toP++] = 's';
+ code = 's';
+ }
+ } else if ((ISO8859_14CtypeTable[code] & CharacterType.BIT_UPPER) != 0 && (flags & (Config.CASE_DOWNCASE | Config.CASE_FOLD)) != 0) {
+ flags |= Config.CASE_MODIFIED;
+ code = LowerCaseTable[code];
+ } else if ((ISO8859_14CtypeTable[code] & CharacterType.BIT_LOWER) != 0 && (flags & Config.CASE_UPCASE) != 0) {
+ flags |= Config.CASE_MODIFIED;
+ if (code == 0xA2 || code == 0xA5 || code == 0xB1 || code == 0xB3 || code == 0xB5 || code == 0xBE)
+ code -= 0x1;
+ else if (code == 0xAB)
+ code -= 0x5;
+ else if (code == 0xFF)
+ code -= 0x50;
+ else if (code == 0xB9)
+ code -= 0x2;
+ else if (code == 0xBF)
+ code -= 0x4;
+ else if (code == 0xB8 || code == 0xBA || code == 0xBC)
+ code -= 0x10;
+ else
+ code -= 0x20;
+ }
+ to[toP++] = (byte)code;
+ if ((flags & Config.CASE_TITLECASE) != 0) {
+ flags ^= (Config.CASE_UPCASE | Config.CASE_DOWNCASE | Config.CASE_TITLECASE);
+ }
+ }
+ flagP.value = flags;
+ return toP - toStart;
+ }
+
static final short ISO8859_14CtypeTable[] = {
0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008,
0x4008, 0x420c, 0x4209, 0x4208, 0x4208, 0x4208, 0x4008, 0x4008,
=====================================
src/org/jcodings/specific/ISO8859_15Encoding.java
=====================================
@@ -19,7 +19,10 @@
*/
package org.jcodings.specific;
+import org.jcodings.Config;
import org.jcodings.ISOEncoding;
+import org.jcodings.IntHolder;
+import org.jcodings.constants.CharacterType;
public final class ISO8859_15Encoding extends ISOEncoding {
@@ -27,6 +30,49 @@ public final class ISO8859_15Encoding extends ISOEncoding {
super("ISO-8859-15", ISO8859_15CtypeTable, ISO8859_15ToLowerCaseTable, ISO8859_15CaseFoldMap);
}
+ @Override
+ public int caseMap(IntHolder flagP, byte[] bytes, IntHolder pp, int end, byte[] to, int toP, int toEnd) {
+ int toStart = toP;
+ int flags = flagP.value;
+
+ while (pp.value < end && toP < toEnd) {
+ int code = bytes[pp.value++] & 0xff;
+ if (code == SHARP_s) {
+ if ((flags & Config.CASE_UPCASE) != 0) {
+ flags |= Config.CASE_MODIFIED;
+ to[toP++] = 'S';
+ code = (flags & Config.CASE_TITLECASE) != 0 ? 's' : 'S';
+ } else if ((flags & Config.CASE_FOLD) != 0) {
+ flags |= Config.CASE_MODIFIED;
+ to[toP++] = 's';
+ code = 's';
+ }
+ } else if (code == 0xAA || code == 0xBA || code == 0xB5) {
+ } else if ((ISO8859_15CtypeTable[code] & CharacterType.BIT_UPPER) != 0 && (flags & (Config.CASE_DOWNCASE | Config.CASE_FOLD)) != 0) {
+ flags |= Config.CASE_MODIFIED;
+ code = LowerCaseTable[code];
+ } else if ((ISO8859_15CtypeTable[code] & CharacterType.BIT_LOWER) != 0 && (flags & Config.CASE_UPCASE) != 0) {
+ flags |= Config.CASE_MODIFIED;
+ if (code == 0xA8)
+ code -= 2;
+ else if (code == 0xB8)
+ code -= 4;
+ else if (code == 0xBD)
+ code -= 1;
+ else if (code == 0xFF)
+ code -= 0x41;
+ else
+ code -= 0x20;
+ }
+ to[toP++] = (byte)code;
+ if ((flags & Config.CASE_TITLECASE) != 0) {
+ flags ^= (Config.CASE_UPCASE | Config.CASE_DOWNCASE | Config.CASE_TITLECASE);
+ }
+ }
+ flagP.value = flags;
+ return toP - toStart;
+ }
+
static final short ISO8859_15CtypeTable[] = {
0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008,
0x4008, 0x420c, 0x4209, 0x4208, 0x4208, 0x4208, 0x4008, 0x4008,
=====================================
src/org/jcodings/specific/ISO8859_16Encoding.java
=====================================
@@ -19,7 +19,10 @@
*/
package org.jcodings.specific;
+import org.jcodings.Config;
import org.jcodings.ISOEncoding;
+import org.jcodings.IntHolder;
+import org.jcodings.constants.CharacterType;
public final class ISO8859_16Encoding extends ISOEncoding {
@@ -27,6 +30,52 @@ public final class ISO8859_16Encoding extends ISOEncoding {
super("ISO-8859-16", ISO8859_16CtypeTable, ISO8859_16ToLowerCaseTable, ISO8859_16CaseFoldMap);
}
+ @Override
+ public int caseMap(IntHolder flagP, byte[] bytes, IntHolder pp, int end, byte[] to, int toP, int toEnd) {
+ int toStart = toP;
+ int flags = flagP.value;
+
+ while (pp.value < end && toP < toEnd) {
+ int code = bytes[pp.value++] & 0xff;
+ if (code == SHARP_s) {
+ if ((flags & Config.CASE_UPCASE) != 0) {
+ flags |= Config.CASE_MODIFIED;
+ to[toP++] = 'S';
+ code = (flags & Config.CASE_TITLECASE) != 0 ? 's' : 'S';
+ } else if ((flags & Config.CASE_FOLD) != 0) {
+ flags |= Config.CASE_MODIFIED;
+ to[toP++] = 's';
+ code = 's';
+ }
+ } else if ((ISO8859_16CtypeTable[code] & CharacterType.BIT_UPPER) != 0 && (flags & (Config.CASE_DOWNCASE | Config.CASE_FOLD)) != 0) {
+ flags |= Config.CASE_MODIFIED;
+ code = LowerCaseTable[code];
+ } else if ((ISO8859_16CtypeTable[code] & CharacterType.BIT_LOWER) != 0 && (flags & Config.CASE_UPCASE) != 0) {
+ flags |= Config.CASE_MODIFIED;
+ if (code == 0xA2 || code == 0xBD)
+ code--;
+ else if (code == 0xB3 || code == 0xBA || code == 0xBF)
+ code -= 0x10;
+ else if (code == 0xA8 || code == 0xAE)
+ code -= 0x02;
+ else if (code == 0xB9)
+ code -= 0x07;
+ else if (code == 0xB8)
+ code -= 0x04;
+ else if (code == 0xFF)
+ code -= 0x41;
+ else
+ code -= 0x20;
+ }
+ to[toP++] = (byte)code;
+ if ((flags & Config.CASE_TITLECASE) != 0) {
+ flags ^= (Config.CASE_UPCASE | Config.CASE_DOWNCASE | Config.CASE_TITLECASE);
+ }
+ }
+ flagP.value = flags;
+ return toP - toStart;
+ }
+
static final short ISO8859_16CtypeTable[] = {
0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008,
0x4008, 0x420c, 0x4209, 0x4208, 0x4208, 0x4208, 0x4008, 0x4008,
=====================================
src/org/jcodings/specific/ISO8859_3Encoding.java
=====================================
@@ -30,8 +30,8 @@ public final class ISO8859_3Encoding extends ISOEncoding {
super("ISO-8859-3", ISO8859_3CtypeTable, ISO8859_3ToLowerCaseTable, ISO8859_3CaseFoldMap);
}
- static final int DOTLESS_i = 0xFD;
- static final int I_WITH_DOT_ABOVE = 0xDD;
+ static final int DOTLESS_i = 0xB9;
+ static final int I_WITH_DOT_ABOVE = 0xA9;
@Override
public int caseMap(IntHolder flagP, byte[] bytes, IntHolder pp, int end, byte[] to, int toP, int toEnd) {
@@ -62,7 +62,7 @@ public final class ISO8859_3Encoding extends ISOEncoding {
} else if ((ISO8859_3CtypeTable[code] & CharacterType.BIT_LOWER) != 0 && (flags & Config.CASE_UPCASE) != 0) {
flags |= Config.CASE_MODIFIED;
if (code == 'i') {
- code = (flags & Config.CASE_FOLD_TURKISH_AZERI) != 0 ? DOTLESS_i : 'I';
+ code = (flags & Config.CASE_FOLD_TURKISH_AZERI) != 0 ? I_WITH_DOT_ABOVE : 'I';
} else if (code == DOTLESS_i) {
code = 'I';
} else if (code >= 0xB0 && code <= 0xBF) {
=====================================
src/org/jcodings/specific/ISO8859_4Encoding.java
=====================================
@@ -19,7 +19,10 @@
*/
package org.jcodings.specific;
+import org.jcodings.Config;
import org.jcodings.ISOEncoding;
+import org.jcodings.IntHolder;
+import org.jcodings.constants.CharacterType;
public final class ISO8859_4Encoding extends ISOEncoding {
@@ -27,6 +30,47 @@ public final class ISO8859_4Encoding extends ISOEncoding {
super("ISO-8859-4", ISO8859_4CtypeTable, ISO8859_4ToLowerCaseTable, ISO8859_4CaseFoldMap);
}
+ @Override
+ public int caseMap(IntHolder flagP, byte[] bytes, IntHolder pp, int end, byte[] to, int toP, int toEnd) {
+ int toStart = toP;
+ int flags = flagP.value;
+
+ while (pp.value < end && toP < toEnd) {
+ int code = bytes[pp.value++] & 0xff;
+ if (code == SHARP_s) {
+ if ((flags & Config.CASE_UPCASE) != 0) {
+ flags |= Config.CASE_MODIFIED;
+ to[toP++] = 'S';
+ code = (flags & Config.CASE_TITLECASE) != 0 ? 's' : 'S';
+ } else if ((flags & Config.CASE_FOLD) != 0) {
+ flags |= Config.CASE_MODIFIED;
+ to[toP++] = 's';
+ code = 's';
+ }
+ } else if ((ISO8859_4CtypeTable[code] & CharacterType.BIT_UPPER) != 0 && (flags & (Config.CASE_DOWNCASE | Config.CASE_FOLD)) != 0) {
+ flags |= Config.CASE_MODIFIED;
+ code = LowerCaseTable[code];
+ } else if (code == 0xA2) {
+ } else if ((ISO8859_4CtypeTable[code] & CharacterType.BIT_LOWER) != 0 && (flags & Config.CASE_UPCASE) != 0) {
+ flags |= Config.CASE_MODIFIED;
+ if (code >= 0xA0 && code <= 0xBF) {
+ if (code == 0xBF)
+ code -= 0x02;
+ else
+ code -= 0x10;
+ } else {
+ code -= 0x20;
+ }
+ }
+ to[toP++] = (byte)code;
+ if ((flags & Config.CASE_TITLECASE) != 0) {
+ flags ^= (Config.CASE_UPCASE | Config.CASE_DOWNCASE | Config.CASE_TITLECASE);
+ }
+ }
+ flagP.value = flags;
+ return toP - toStart;
+ }
+
static final short ISO8859_4CtypeTable[] = {
0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008,
0x4008, 0x420c, 0x4209, 0x4208, 0x4208, 0x4208, 0x4008, 0x4008,
=====================================
src/org/jcodings/specific/ISO8859_5Encoding.java
=====================================
@@ -19,8 +19,10 @@
*/
package org.jcodings.specific;
+import org.jcodings.Config;
import org.jcodings.ISOEncoding;
import org.jcodings.IntHolder;
+import org.jcodings.constants.CharacterType;
public final class ISO8859_5Encoding extends ISOEncoding {
@@ -28,6 +30,34 @@ public final class ISO8859_5Encoding extends ISOEncoding {
super("ISO-8859-5", ISO8859_5CtypeTable, ISO8859_5ToLowerCaseTable, ISO8859_5CaseFoldMap, false);
}
+ @Override
+ public int caseMap(IntHolder flagP, byte[] bytes, IntHolder pp, int end, byte[] to, int toP, int toEnd) {
+ int toStart = toP;
+ int flags = flagP.value;
+
+ while (pp.value < end && toP < toEnd) {
+ int code = bytes[pp.value++] & 0xff;
+ if ((ISO8859_5CtypeTable[code] & CharacterType.BIT_UPPER) != 0 && (flags & (Config.CASE_DOWNCASE | Config.CASE_FOLD)) != 0) {
+ flags |= Config.CASE_MODIFIED;
+ code = LowerCaseTable[code];
+ } else if ((ISO8859_5CtypeTable[code] & CharacterType.BIT_LOWER) != 0 && (flags & Config.CASE_UPCASE) != 0) {
+ flags |= Config.CASE_MODIFIED;
+ if (0xF1 <= code && code <= 0xFF) {
+ code -= 0x50;
+ } else {
+ code -= 0x20;
+ }
+ }
+ to[toP++] = (byte)code;
+ if ((flags & Config.CASE_TITLECASE) != 0) {
+ flags ^= (Config.CASE_UPCASE | Config.CASE_DOWNCASE | Config.CASE_TITLECASE);
+ }
+ }
+ flagP.value = flags;
+ return toP - toStart;
+ }
+
+
@Override
public int mbcCaseFold(int flag, byte[]bytes, IntHolder pp, int end, byte[]lower) {
int p = pp.value;
@@ -141,7 +171,7 @@ public final class ISO8859_5Encoding extends ISOEncoding {
{ 0xbb, 0xdb },
{ 0xbc, 0xdc },
{ 0xbd, 0xdd },
- { 0xbe, 0xdf },
+ { 0xbe, 0xde },
{ 0xbf, 0xdf },
{ 0xc0, 0xe0 },
=====================================
src/org/jcodings/specific/ISO8859_7Encoding.java
=====================================
@@ -19,8 +19,10 @@
*/
package org.jcodings.specific;
+import org.jcodings.Config;
import org.jcodings.ISOEncoding;
import org.jcodings.IntHolder;
+import org.jcodings.constants.CharacterType;
public final class ISO8859_7Encoding extends ISOEncoding {
@@ -28,6 +30,49 @@ public final class ISO8859_7Encoding extends ISOEncoding {
super("ISO-8859-7", ISO8859_7CtypeTable, ISO8859_7ToLowerCaseTable, ISO8859_7CaseFoldMap, false);
}
+ @Override
+ public int caseMap(IntHolder flagP, byte[] bytes, IntHolder pp, int end, byte[] to, int toP, int toEnd) {
+ int toStart = toP;
+ int flags = flagP.value;
+
+ while (pp.value < end && toP < toEnd) {
+ int code = bytes[pp.value++] & 0xff;
+ if (code == 0xF2) {
+ if ((flags & Config.CASE_UPCASE) != 0) {
+ flags |= Config.CASE_MODIFIED;
+ code = 0xD3;
+ } else if ((flags & Config.CASE_FOLD) != 0) {
+ flags |= Config.CASE_MODIFIED;
+ code = 0xF3;
+ }
+ } else if ((ISO8859_7CtypeTable[code] & CharacterType.BIT_UPPER) != 0 && (flags & (Config.CASE_DOWNCASE | Config.CASE_FOLD)) != 0) {
+ flags |= Config.CASE_MODIFIED;
+ code = LowerCaseTable[code];
+ } else if (code == 0xC0 || code == 0xE0) {
+ } else if ((ISO8859_7CtypeTable[code] & CharacterType.BIT_LOWER) != 0 && (flags & Config.CASE_UPCASE) != 0) {
+ flags |= Config.CASE_MODIFIED;
+ if (code == 0xDC) {
+ code -= 0x26;
+ } else if (code >= 0xDD && code <= 0xDF) {
+ code -= 0x25;
+ } else if (code == 0xFC) {
+ code -= 0x40;
+ } else if (code == 0xFD || code == 0xFE) {
+ code -= 0x3F;
+ } else {
+ code -= 0x20;
+ }
+ }
+
+ to[toP++] = (byte)code;
+ if ((flags & Config.CASE_TITLECASE) != 0) {
+ flags ^= (Config.CASE_UPCASE | Config.CASE_DOWNCASE | Config.CASE_TITLECASE);
+ }
+ }
+ flagP.value = flags;
+ return toP - toStart;
+ }
+
@Override
public int mbcCaseFold(int flag, byte[]bytes, IntHolder pp, int end, byte[]lower) {
int p = pp.value;
=====================================
src/org/jcodings/specific/ISO8859_9Encoding.java
=====================================
@@ -19,7 +19,10 @@
*/
package org.jcodings.specific;
+import org.jcodings.Config;
import org.jcodings.ISOEncoding;
+import org.jcodings.IntHolder;
+import org.jcodings.constants.CharacterType;
public final class ISO8859_9Encoding extends ISOEncoding {
@@ -27,6 +30,54 @@ public final class ISO8859_9Encoding extends ISOEncoding {
super("ISO-8859-9", ISO8859_9CtypeTable, ISO8859_9ToLowerCaseTable, ISO8859_9CaseFoldMap);
}
+ static final int DOTLESS_i = 0xFD;
+ static final int I_WITH_DOT_ABOVE = 0xDD;
+
+ @Override
+ public int caseMap(IntHolder flagP, byte[] bytes, IntHolder pp, int end, byte[] to, int toP, int toEnd) {
+ int toStart = toP;
+ int flags = flagP.value;
+
+ while (pp.value < end && toP < toEnd) {
+ int code = bytes[pp.value++] & 0xff;
+ if (code == SHARP_s) {
+ if ((flags & Config.CASE_UPCASE) != 0) {
+ flags |= Config.CASE_MODIFIED;
+ to[toP++] = 'S';
+ code = (flags & Config.CASE_TITLECASE) != 0 ? 's' : 'S';
+ } else if ((flags & Config.CASE_FOLD) != 0) {
+ flags |= Config.CASE_MODIFIED;
+ to[toP++] = 's';
+ code = 's';
+ }
+ }
+ else if (code == 0xAA || code == 0xB5 || code == 0xBA || code == 0xFF);
+ else if ((ISO8859_9CtypeTable[code] & CharacterType.BIT_UPPER) != 0 && (flags & (Config.CASE_DOWNCASE | Config.CASE_FOLD)) != 0) {
+ flags |= Config.CASE_MODIFIED;
+ if (code == 'I') {
+ code = (flags & Config.CASE_FOLD_TURKISH_AZERI) != 0 ? DOTLESS_i : 'i';
+ } else {
+ code = LowerCaseTable[code];
+ }
+ } else if ((ISO8859_9CtypeTable[code] & CharacterType.BIT_LOWER) != 0 && (flags & Config.CASE_UPCASE) != 0) {
+ flags |= Config.CASE_MODIFIED;
+ if (code == 'i') {
+ code = (flags & Config.CASE_FOLD_TURKISH_AZERI) != 0 ? I_WITH_DOT_ABOVE : 'I';
+ } else if (code == DOTLESS_i) {
+ code = 'I';
+ } else {
+ code -= 0x20;
+ }
+ }
+ to[toP++] = (byte)code;
+ if ((flags & Config.CASE_TITLECASE) != 0) {
+ flags ^= (Config.CASE_UPCASE | Config.CASE_DOWNCASE | Config.CASE_TITLECASE);
+ }
+ }
+ flagP.value = flags;
+ return toP - toStart;
+ }
+
static final short ISO8859_9CtypeTable[] = {
0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008,
0x4008, 0x420c, 0x4209, 0x4208, 0x4208, 0x4208, 0x4008, 0x4008,
=====================================
src/org/jcodings/specific/Windows_1250Encoding.java
=====================================
@@ -20,7 +20,10 @@
package org.jcodings.specific;
import org.jcodings.CaseFoldMapEncoding;
+import org.jcodings.Config;
+import org.jcodings.ISOEncoding;
import org.jcodings.IntHolder;
+import org.jcodings.constants.CharacterType;
final public class Windows_1250Encoding extends CaseFoldMapEncoding {
@@ -28,6 +31,47 @@ final public class Windows_1250Encoding extends CaseFoldMapEncoding {
super("Windows-1250", CP1250_CtypeTable, CP1250_ToLowerCaseTable, CP1250_CaseFoldMap, true);
}
+ @Override
+ public int caseMap(IntHolder flagP, byte[] bytes, IntHolder pp, int end, byte[] to, int toP, int toEnd) {
+ int toStart = toP;
+ int flags = flagP.value;
+
+ while (pp.value < end && toP < toEnd) {
+ int code = bytes[pp.value++] & 0xff;
+ if (code == ISOEncoding.SHARP_s) {
+ if ((flags & Config.CASE_UPCASE) != 0) {
+ flags |= Config.CASE_MODIFIED;
+ to[toP++] = 'S';
+ code = (flags & Config.CASE_TITLECASE) != 0 ? 's' : 'S';
+ } else if ((flags & Config.CASE_FOLD) != 0) {
+ flags |= Config.CASE_MODIFIED;
+ to[toP++] = 's';
+ code = 's';
+ }
+ } else if ((CP1250_CtypeTable[code] & CharacterType.BIT_UPPER) != 0 && (flags & (Config.CASE_DOWNCASE | Config.CASE_FOLD)) != 0) {
+ flags |= Config.CASE_MODIFIED;
+ code = LowerCaseTable[code];
+ } else if (code == 0xB5) {
+ } else if ((CP1250_CtypeTable[code] & CharacterType.BIT_LOWER) != 0 && (flags & Config.CASE_UPCASE) != 0) {
+ flags |= Config.CASE_MODIFIED;
+ if (code == 0xB9)
+ code = 0xA5;
+ else if (code == 0xBE)
+ code = 0xBC;
+ else if (code >= 0x8A && code <= 0xBF && code != 0xB9)
+ code -= 0x10;
+ else
+ code -= 0x20;
+ }
+ to[toP++] = (byte)code;
+ if ((flags & Config.CASE_TITLECASE) != 0) {
+ flags ^= (Config.CASE_UPCASE | Config.CASE_DOWNCASE | Config.CASE_TITLECASE);
+ }
+ }
+ flagP.value = flags;
+ return toP - toStart;
+ }
+
@Override
public int mbcCaseFold(int flag, byte[]bytes, IntHolder pp, int end, byte[]lower) {
int p = pp.value;
=====================================
src/org/jcodings/specific/Windows_1251Encoding.java
=====================================
@@ -20,7 +20,10 @@
package org.jcodings.specific;
import org.jcodings.CaseFoldMapEncoding;
+import org.jcodings.Config;
+import org.jcodings.ISOEncoding;
import org.jcodings.IntHolder;
+import org.jcodings.constants.CharacterType;
final public class Windows_1251Encoding extends CaseFoldMapEncoding {
@@ -28,6 +31,42 @@ final public class Windows_1251Encoding extends CaseFoldMapEncoding {
super("Windows-1251", CP1251_CtypeTable, CP1251_ToLowerCaseTable, CP1251_CaseFoldMap, false);
}
+ @Override
+ public int caseMap(IntHolder flagP, byte[] bytes, IntHolder pp, int end, byte[] to, int toP, int toEnd) {
+ int toStart = toP;
+ int flags = flagP.value;
+
+ while (pp.value < end && toP < toEnd) {
+ int code = bytes[pp.value++] & 0xff;
+ if ((CP1251_CtypeTable[code] & CharacterType.BIT_UPPER) != 0 && (flags & (Config.CASE_DOWNCASE | Config.CASE_FOLD)) != 0) {
+ flags |= Config.CASE_MODIFIED;
+ code = LowerCaseTable[code];
+ } else if (code == 0xB5) {
+ } else if ((CP1251_CtypeTable[code] & CharacterType.BIT_LOWER) != 0 && (flags & Config.CASE_UPCASE) != 0) {
+ flags |= Config.CASE_MODIFIED;
+ if ((0x61 <= code && code <= 0x7A) || (0xE0 <= code && code <= 0xFF))
+ code -= 0x20;
+ else if (code == 0xA2 || code == 0xB3 || code == 0xBE)
+ code -= 0x01;
+ else if (code == 0x83)
+ code = 0x81;
+ else if (code == 0xBC)
+ code = 0xA3;
+ else if (code == 0xB4)
+ code = 0xA5;
+ else
+ code -= 0x10;
+ }
+ to[toP++] = (byte)code;
+ if ((flags & Config.CASE_TITLECASE) != 0) {
+ flags ^= (Config.CASE_UPCASE | Config.CASE_DOWNCASE | Config.CASE_TITLECASE);
+ }
+ }
+ flagP.value = flags;
+ return toP - toStart;
+ }
+
+
@Override
public int mbcCaseFold(int flag, byte[]bytes, IntHolder pp, int end, byte[]lower) {
int p = pp.value;
=====================================
src/org/jcodings/specific/Windows_1252Encoding.java
=====================================
@@ -20,7 +20,10 @@
package org.jcodings.specific;
import org.jcodings.CaseFoldMapEncoding;
+import org.jcodings.Config;
+import org.jcodings.ISOEncoding;
import org.jcodings.IntHolder;
+import org.jcodings.constants.CharacterType;
final public class Windows_1252Encoding extends CaseFoldMapEncoding {
@@ -28,6 +31,45 @@ final public class Windows_1252Encoding extends CaseFoldMapEncoding {
super("Windows-1252", CP1252_CtypeTable, CP1252_ToLowerCaseTable, CP1252_CaseFoldMap, true);
}
+ @Override
+ public int caseMap(IntHolder flagP, byte[] bytes, IntHolder pp, int end, byte[] to, int toP, int toEnd) {
+ int toStart = toP;
+ int flags = flagP.value;
+
+ while (pp.value < end && toP < toEnd) {
+ int code = bytes[pp.value++] & 0xff;
+ if (code == ISOEncoding.SHARP_s) {
+ if ((flags & Config.CASE_UPCASE) != 0) {
+ flags |= Config.CASE_MODIFIED;
+ to[toP++] = 'S';
+ code = (flags & Config.CASE_TITLECASE) != 0 ? 's' : 'S';
+ } else if ((flags & Config.CASE_FOLD) != 0) {
+ flags |= Config.CASE_MODIFIED;
+ to[toP++] = 's';
+ code = 's';
+ }
+ } else if ((CP1252_CtypeTable[code] & CharacterType.BIT_UPPER) != 0 && (flags & (Config.CASE_DOWNCASE | Config.CASE_FOLD)) != 0) {
+ flags |= Config.CASE_MODIFIED;
+ code = LowerCaseTable[code];
+ } else if (code == 0x83 || code == 0xAA || code == 0xBA || code == 0xB5) {
+ } else if ((CP1252_CtypeTable[code] & CharacterType.BIT_LOWER) != 0 && (flags & Config.CASE_UPCASE) != 0) {
+ flags |= Config.CASE_MODIFIED;
+ if (code == 0x9A || code == 0x9C || code == 0x9E)
+ code -= 0x10;
+ else if (code == 0xFF)
+ code -= 0x60;
+ else
+ code -= 0x20;
+ }
+ to[toP++] = (byte)code;
+ if ((flags & Config.CASE_TITLECASE) != 0) {
+ flags ^= (Config.CASE_UPCASE | Config.CASE_DOWNCASE | Config.CASE_TITLECASE);
+ }
+ }
+ flagP.value = flags;
+ return toP - toStart;
+ }
+
@Override
public int mbcCaseFold(int flag, byte[]bytes, IntHolder pp, int end, byte[]lower) {
int p = pp.value;
=====================================
src/org/jcodings/specific/Windows_1253Encoding.java
=====================================
@@ -20,7 +20,9 @@
package org.jcodings.specific;
import org.jcodings.CaseFoldMapEncoding;
+import org.jcodings.Config;
import org.jcodings.IntHolder;
+import org.jcodings.constants.CharacterType;
final public class Windows_1253Encoding extends CaseFoldMapEncoding {
@@ -28,6 +30,57 @@ final public class Windows_1253Encoding extends CaseFoldMapEncoding {
super("Windows-1253", CP1253_CtypeTable, CP1253_ToLowerCaseTable, CP1253_CaseFoldMap, true);
}
+ @Override
+ public int caseMap(IntHolder flagP, byte[] bytes, IntHolder pp, int end, byte[] to, int toP, int toEnd) {
+ int toStart = toP;
+ int flags = flagP.value;
+
+ while (pp.value < end && toP < toEnd) {
+ int code = bytes[pp.value++] & 0xff;
+ if (code == 0xF2) {
+ if ((flags & Config.CASE_UPCASE) != 0) {
+ flags |= Config.CASE_MODIFIED;
+ code = 0xD3;
+ } else if ((flags & Config.CASE_FOLD) != 0) {
+ flags |= Config.CASE_MODIFIED;
+ code = 0xF3;
+ }
+ } else if (code == 0xB5) {
+ if ((flags & Config.CASE_UPCASE) != 0) {
+ flags |= Config.CASE_MODIFIED;
+ code = 0xCC;
+ } else if ((flags & Config.CASE_FOLD) != 0) {
+ flags |= Config.CASE_MODIFIED;
+ code = 0xEC;
+ }
+ } else if (code == 0xC0 || code == 0xE0 || code == 0xB6) {
+ } else if ((CP1253_CtypeTable[code] & CharacterType.BIT_UPPER) != 0 && (flags & (Config.CASE_DOWNCASE | Config.CASE_FOLD)) != 0) {
+ flags |= Config.CASE_MODIFIED;
+ code = LowerCaseTable[code];
+ } else if (code == 0xC0 || code == 0xE0) {
+ } else if ((CP1253_CtypeTable[code] & CharacterType.BIT_LOWER) != 0 && (flags & Config.CASE_UPCASE) != 0) {
+ flags |= Config.CASE_MODIFIED;
+ if (code == 0xDC)
+ code = 0xA2;
+ else if (code >= 0xDD && code <= 0xDF)
+ code -= 0x25;
+ else if (code == 0xFC)
+ code = 0xBC;
+ else if (code == 0xFD || code == 0xFE)
+ code -= 0x3F;
+ else
+ code -= 0x20;
+ }
+
+ to[toP++] = (byte)code;
+ if ((flags & Config.CASE_TITLECASE) != 0) {
+ flags ^= (Config.CASE_UPCASE | Config.CASE_DOWNCASE | Config.CASE_TITLECASE);
+ }
+ }
+ flagP.value = flags;
+ return toP - toStart;
+ }
+
@Override
public int mbcCaseFold(int flag, byte[]bytes, IntHolder pp, int end, byte[]lower) {
int p = pp.value;
=====================================
src/org/jcodings/specific/Windows_1254Encoding.java
=====================================
@@ -20,7 +20,10 @@
package org.jcodings.specific;
import org.jcodings.CaseFoldMapEncoding;
+import org.jcodings.Config;
+import org.jcodings.ISOEncoding;
import org.jcodings.IntHolder;
+import org.jcodings.constants.CharacterType;
final public class Windows_1254Encoding extends CaseFoldMapEncoding {
@@ -28,6 +31,56 @@ final public class Windows_1254Encoding extends CaseFoldMapEncoding {
super("Windows-1254", CP1254_CtypeTable, CP1254_ToLowerCaseTable, CP1254_CaseFoldMap, true);
}
+ static final int DOTLESS_i = 0xFD;
+ static final int I_WITH_DOT_ABOVE = 0xDD;
+
+ @Override
+ public int caseMap(IntHolder flagP, byte[] bytes, IntHolder pp, int end, byte[] to, int toP, int toEnd) {
+ int toStart = toP;
+ int flags = flagP.value;
+
+ while (pp.value < end && toP < toEnd) {
+ int code = bytes[pp.value++] & 0xff;
+ if (code == ISOEncoding.SHARP_s) {
+ if ((flags & Config.CASE_UPCASE) != 0) {
+ flags |= Config.CASE_MODIFIED;
+ to[toP++] = 'S';
+ code = (flags & Config.CASE_TITLECASE) != 0 ? 's' : 'S';
+ } else if ((flags & Config.CASE_FOLD) != 0) {
+ flags |= Config.CASE_MODIFIED;
+ to[toP++] = 's';
+ code = 's';
+ }
+ } else if ((CP1254_CtypeTable[code] & CharacterType.BIT_UPPER) != 0 && (flags & (Config.CASE_DOWNCASE | Config.CASE_FOLD)) != 0) {
+ flags |= Config.CASE_MODIFIED;
+ if (code == 'I') {
+ code = (flags & Config.CASE_FOLD_TURKISH_AZERI) != 0 ? DOTLESS_i : 'i';
+ } else {
+ code = LowerCaseTable[code];
+ }
+ } else if (code == 0x83 || code == 0xAA || code == 0xBA || code == 0xB5) {
+ } else if ((CP1254_CtypeTable[code] & CharacterType.BIT_LOWER) != 0 && (flags & Config.CASE_UPCASE) != 0) {
+ flags |= Config.CASE_MODIFIED;
+ if (code == 'i') {
+ code = (flags & Config.CASE_FOLD_TURKISH_AZERI) != 0 ? I_WITH_DOT_ABOVE : 'I';
+ } else if (code == DOTLESS_i) {
+ code = 'I';
+ } else if (code == 0x9A || code == 0x9C || code == 0x9E)
+ code -= 0x10;
+ else if (code == 0xFF)
+ code -= 0x60;
+ else
+ code -= 0x20;
+ }
+ to[toP++] = (byte)code;
+ if ((flags & Config.CASE_TITLECASE) != 0) {
+ flags ^= (Config.CASE_UPCASE | Config.CASE_DOWNCASE | Config.CASE_TITLECASE);
+ }
+ }
+ flagP.value = flags;
+ return toP - toStart;
+ }
+
@Override
public int mbcCaseFold(int flag, byte[]bytes, IntHolder pp, int end, byte[]lower) {
int p = pp.value;
=====================================
src/org/jcodings/specific/Windows_1257Encoding.java
=====================================
@@ -20,7 +20,10 @@
package org.jcodings.specific;
import org.jcodings.CaseFoldMapEncoding;
+import org.jcodings.Config;
+import org.jcodings.ISOEncoding;
import org.jcodings.IntHolder;
+import org.jcodings.constants.CharacterType;
final public class Windows_1257Encoding extends CaseFoldMapEncoding {
@@ -28,6 +31,54 @@ final public class Windows_1257Encoding extends CaseFoldMapEncoding {
super("Windows-1257", CP1257_CtypeTable, CP1257_ToLowerCaseTable, CP1257_CaseFoldMap, true);
}
+ static final int DOTLESS_i = 0xB9;
+ static final int I_WITH_DOT_ABOVE = 0xA9;
+
+ @Override
+ public int caseMap(IntHolder flagP, byte[] bytes, IntHolder pp, int end, byte[] to, int toP, int toEnd) {
+ int toStart = toP;
+ int flags = flagP.value;
+
+ while (pp.value < end && toP < toEnd) {
+ int code = bytes[pp.value++] & 0xff;
+ if (code == ISOEncoding.SHARP_s) {
+ if ((flags & Config.CASE_UPCASE) != 0) {
+ flags |= Config.CASE_MODIFIED;
+ to[toP++] = 'S';
+ code = (flags & Config.CASE_TITLECASE) != 0 ? 's' : 'S';
+ } else if ((flags & Config.CASE_FOLD) != 0) {
+ flags |= Config.CASE_MODIFIED;
+ to[toP++] = 's';
+ code = 's';
+ }
+ } else if (code == 0xB5) {
+ } else if ((CP1257_CtypeTable[code] & CharacterType.BIT_UPPER) != 0 && (flags & (Config.CASE_DOWNCASE | Config.CASE_FOLD)) != 0) {
+ flags |= Config.CASE_MODIFIED;
+ if (code == 'I') {
+ code = (flags & Config.CASE_FOLD_TURKISH_AZERI) != 0 ? DOTLESS_i : 'i';
+ } else {
+ code = LowerCaseTable[code];
+ }
+ } else if ((CP1257_CtypeTable[code] & CharacterType.BIT_LOWER) != 0 && (flags & Config.CASE_UPCASE) != 0) {
+ flags |= Config.CASE_MODIFIED;
+ if (code == 'i') {
+ code = (flags & Config.CASE_FOLD_TURKISH_AZERI) != 0 ? I_WITH_DOT_ABOVE : 'I';
+ } else if (code == DOTLESS_i) {
+ code = 'I';
+ } else if (code >= 0xB0 && code <= 0xBF)
+ code -= 0x10;
+ else
+ code -= 0x20;
+ }
+ to[toP++] = (byte)code;
+ if ((flags & Config.CASE_TITLECASE) != 0) {
+ flags ^= (Config.CASE_UPCASE | Config.CASE_DOWNCASE | Config.CASE_TITLECASE);
+ }
+ }
+ flagP.value = flags;
+ return toP - toStart;
+ }
+
@Override
public int mbcCaseFold(int flag, byte[]bytes, IntHolder pp, int end, byte[]lower) {
int p = pp.value;
=====================================
src/org/jcodings/transcode/TranscodeFunctions.java
=====================================
@@ -420,11 +420,11 @@ public class TranscodeFunctions {
{
long s0 = s[sStart] & 0xFF;
long s1 = s[sStart+1] & 0xFF;
- long s2 = s[sStart+2] & 0xFF;
- long s3 = s[sStart+3] & 0xFF;
long diff = info >> 8;
long u; /* Unicode Scalar Value */
if ((diff & 0x20000) != 0) { /* GB18030 4 bytes */
+ long s2 = s[sStart+2] & 0xFF;
+ long s3 = s[sStart+3] & 0xFF;
u = (((s0 * 10 + s1) * 126 + s2) * 10 + s3 - diff - 0x170000) & 0xFFFFFFFFL;
}
else { /* GB18030 2 bytes */
@@ -498,7 +498,7 @@ public class TranscodeFunctions {
public static final int EMACS_MULE_LEADING_CODE_JISX0208_1978 = 0220;
public static final int EMACS_MULE_LEADING_CODE_JISX0208_1983 = 0222;
-
+
public static final byte[] tbl0208 = {
(byte)0x21, (byte)0x23, (byte)0x21, (byte)0x56, (byte)0x21, (byte)0x57, (byte)0x21, (byte)0x22, (byte)0x21, (byte)0x26, (byte)0x25, (byte)0x72, (byte)0x25, (byte)0x21, (byte)0x25, (byte)0x23,
(byte)0x25, (byte)0x25, (byte)0x25, (byte)0x27, (byte)0x25, (byte)0x29, (byte)0x25, (byte)0x63, (byte)0x25, (byte)0x65, (byte)0x25, (byte)0x67, (byte)0x25, (byte)0x43, (byte)0x21, (byte)0x3C,
=====================================
src/org/jcodings/unicode/UnicodeEncoding.java
=====================================
@@ -21,7 +21,6 @@ package org.jcodings.unicode;
import java.io.DataInputStream;
import java.io.IOException;
-import java.util.ArrayList;
import org.jcodings.ApplyAllCaseFoldFunction;
import org.jcodings.CaseFoldCodeItem;
@@ -31,17 +30,15 @@ import org.jcodings.IntHolder;
import org.jcodings.MultiByteEncoding;
import org.jcodings.constants.CharacterType;
import org.jcodings.exception.CharacterPropertyException;
+import org.jcodings.exception.EncodingError;
import org.jcodings.exception.ErrorMessages;
import org.jcodings.util.ArrayReader;
import org.jcodings.util.CaseInsensitiveBytesHash;
import org.jcodings.util.IntArrayHash;
import org.jcodings.util.IntHash;
-
public abstract class UnicodeEncoding extends MultiByteEncoding {
-
- private static final int MAX_WORD_LENGTH = Config.USE_UNICODE_PROPERTIES ? 44 : 6;
- private static final int PROPERTY_NAME_MAX_SIZE = MAX_WORD_LENGTH + 1;
+ private static final int PROPERTY_NAME_MAX_SIZE = UnicodeProperties.MAX_WORD_LENGTH + 1;
static final int I_WITH_DOT_ABOVE = 0x0130;
static final int DOTLESS_i = 0x0131;
static final int DOT_ABOVE = 0x0307;
@@ -93,13 +90,13 @@ public abstract class UnicodeEncoding extends MultiByteEncoding {
for(int p_ = p; p_ < end; p_+= length(name, p_, end)) {
int code = mbcToCode(name, p_, end);
if (code == ' ' || code == '-' || code == '_') continue;
- if (code >= 0x80) throw new CharacterPropertyException(ErrorMessages.ERR_INVALID_CHAR_PROPERTY_NAME);
+ if (code >= 0x80) throw new CharacterPropertyException(EncodingError.ERR_INVALID_CHAR_PROPERTY_NAME, name, p, end);
buf[len++] = (byte)code;
- if (len >= PROPERTY_NAME_MAX_SIZE) throw new CharacterPropertyException(ErrorMessages.ERR_INVALID_CHAR_PROPERTY_NAME, name, p, end);
+ if (len >= PROPERTY_NAME_MAX_SIZE) throw new CharacterPropertyException(EncodingError.ERR_INVALID_CHAR_PROPERTY_NAME, name, p, end);
}
- Integer ctype = CTypeName.CTypeNameHash.get(buf, 0, len);
- if (ctype == null) throw new CharacterPropertyException(ErrorMessages.ERR_INVALID_CHAR_PROPERTY_NAME, name, p, end);
+ Integer ctype = CTypeName.Values.get(buf, 0, len);
+ if (ctype == null) throw new CharacterPropertyException(EncodingError.ERR_INVALID_CHAR_PROPERTY_NAME, name, p, end);
return ctype;
}
@@ -123,7 +120,7 @@ public abstract class UnicodeEncoding extends MultiByteEncoding {
}
}
- CodeList to = CaseFold.Hash.get(code);
+ CodeList to = CaseFold.Values.get(code);
if (to != null) {
if (to.codes.length == 1) {
return codeToMbc(to.codes[0], fold, foldP);
@@ -275,7 +272,7 @@ public abstract class UnicodeEncoding extends MultiByteEncoding {
int n = 0;
int fn = 0;
- CodeList to = CaseFold.Hash.get(code);
+ CodeList to = CaseFold.Values.get(code);
CaseFoldCodeItem[]items = null;
if (to != null) {
items = new CaseFoldCodeItem[Config.ENC_GET_CASE_FOLD_CODES_MAX_NUM];
@@ -287,7 +284,7 @@ public abstract class UnicodeEncoding extends MultiByteEncoding {
n++;
code = to.codes[0];
- to = CaseUnfold11.Hash.get(code);
+ to = CaseUnfold11.Values.get(code);
if (to != null) {
for (int i=0; i<to.codes.length; i++) {
@@ -303,7 +300,7 @@ public abstract class UnicodeEncoding extends MultiByteEncoding {
for (fn=0; fn<to.codes.length; fn++) {
cs[fn][0] = to.codes[fn];
- CodeList z3 = CaseUnfold11.Hash.get(cs[fn][0]);
+ CodeList z3 = CaseUnfold11.Values.get(cs[fn][0]);
if (z3 != null) {
for (int i=0; i<z3.codes.length; i++) {
cs[fn][i+1] = z3.codes[i];
@@ -322,7 +319,7 @@ public abstract class UnicodeEncoding extends MultiByteEncoding {
}
}
- CodeList z2 = CaseUnfold12.Hash.get(to.codes);
+ CodeList z2 = CaseUnfold12.Values.get(to.codes);
if (z2 != null) {
for (int i=0; i<z2.codes.length; i++) {
if (z2.codes[i] == code) continue;
@@ -339,7 +336,7 @@ public abstract class UnicodeEncoding extends MultiByteEncoding {
}
}
}
- CodeList z2 = CaseUnfold13.Hash.get(to.codes);
+ CodeList z2 = CaseUnfold13.Values.get(to.codes);
if (z2 != null) {
for (int i=0; i<z2.codes.length; i++) {
if (z2.codes[i] == code) continue;
@@ -352,7 +349,7 @@ public abstract class UnicodeEncoding extends MultiByteEncoding {
flag = 0; /* DISABLE_CASE_FOLD_MULTI_CHAR(flag); */
}
} else {
- to = CaseUnfold11.Hash.get(code);
+ to = CaseUnfold11.Values.get(code);
if (to != null) {
items = new CaseFoldCodeItem[Config.ENC_GET_CASE_FOLD_CODES_MAX_NUM];
for (int i=0; i<to.codes.length; i++) {
@@ -370,7 +367,7 @@ public abstract class UnicodeEncoding extends MultiByteEncoding {
final int codes0 = code;
final int codes1;
code = mbcToCode(bytes, p, end);
- to = CaseFold.Hash.get(code);
+ to = CaseFold.Values.get(code);
if (to != null && to.codes.length == 1) {
codes1 = to.codes[0];
} else {
@@ -379,7 +376,7 @@ public abstract class UnicodeEncoding extends MultiByteEncoding {
int clen = length(bytes, p, end);
len += clen;
- CodeList z2 = CaseUnfold12.Hash.get(codes0, codes1);
+ CodeList z2 = CaseUnfold12.Values.get(codes0, codes1);
if (z2 != null) {
for (int i=0; i<z2.codes.length; i++) {
items[n] = CaseFoldCodeItem.create(len, z2.codes[i]);
@@ -391,7 +388,7 @@ public abstract class UnicodeEncoding extends MultiByteEncoding {
if (p < end) {
final int codes2;
code = mbcToCode(bytes, p, end);
- to = CaseFold.Hash.get(code);
+ to = CaseFold.Values.get(code);
if (to != null && to.codes.length == 1) {
codes2 = to.codes[0];
} else {
@@ -399,7 +396,7 @@ public abstract class UnicodeEncoding extends MultiByteEncoding {
}
clen = length(bytes, p, end);
len += clen;
- z2 = CaseUnfold13.Hash.get(codes0, codes1, codes2);
+ z2 = CaseUnfold13.Values.get(codes0, codes1, codes2);
if (z2 != null) {
for (int i=0; i<z2.codes.length; i++) {
items[n] = CaseFoldCodeItem.create(len, z2.codes[i]);
@@ -464,7 +461,7 @@ public abstract class UnicodeEncoding extends MultiByteEncoding {
flags |= Config.CASE_MODIFIED;
code = 'I';
}
- } else if ((folded = CaseFold.Hash.get(code)) != null) {
+ } else if ((folded = CaseFold.Values.get(code)) != null) {
if ((flags & Config.CASE_TITLECASE) != 0 && (folded.flags & Config.CASE_IS_TITLECASE) != 0) {
} else if ((flags & folded.flags) != 0) {
@@ -507,14 +504,15 @@ public abstract class UnicodeEncoding extends MultiByteEncoding {
code = codes[i];
}
}
- } else if ((folded = CaseUnfold11.Hash.get(code)) != null && (flags & folded.flags) != 0) {
+ } else if ((folded = CaseUnfold11.Values.get(code)) != null && (flags & folded.flags) != 0) {
flags |= Config.CASE_MODIFIED;
code = folded.codes[(flags & folded.flags & Config.CASE_TITLECASE) != 0 ? 1 : 0];
}
}
toP += codeToMbc(code, to, toP);
if ((flags & Config.CASE_TITLECASE) != 0) {
- flags ^= (Config.CASE_UPCASE | Config.CASE_DOWNCASE | Config.CASE_TITLECASE | Config.CASE_UP_SPECIAL | Config.CASE_DOWN_SPECIAL);}
+ flags ^= (Config.CASE_UPCASE | Config.CASE_DOWNCASE | Config.CASE_TITLECASE | Config.CASE_UP_SPECIAL | Config.CASE_DOWN_SPECIAL);
+ }
} // while
flagP.value = flags;
@@ -572,7 +570,7 @@ public abstract class UnicodeEncoding extends MultiByteEncoding {
}
static class CTypeName {
- private static final CaseInsensitiveBytesHash<Integer> CTypeNameHash = initializeCTypeNameTable();
+ private static final CaseInsensitiveBytesHash<Integer> Values = initializeCTypeNameTable();
private static CaseInsensitiveBytesHash<Integer> initializeCTypeNameTable() {
CaseInsensitiveBytesHash<Integer> table = new CaseInsensitiveBytesHash<Integer>();
@@ -613,7 +611,7 @@ public abstract class UnicodeEncoding extends MultiByteEncoding {
}
}
- static final IntHash<CodeList>Hash = read("CaseFold");
+ static final IntHash<CodeList>Values = read("CaseFold");
}
private static class CaseUnfold11 {
@@ -659,7 +657,7 @@ public abstract class UnicodeEncoding extends MultiByteEncoding {
}
return hash;
}
- static final IntHash<CodeList> Hash = initializeUnfold1Hash();
+ static final IntHash<CodeList> Values = initializeUnfold1Hash();
}
private static Object[] readFoldN(int fromSize, String table) {
@@ -709,7 +707,7 @@ public abstract class UnicodeEncoding extends MultiByteEncoding {
return unfold2;
}
- static final IntArrayHash<CodeList> Hash = initializeUnfold2Hash();
+ static final IntArrayHash<CodeList> Values = initializeUnfold2Hash();
}
private static class CaseUnfold13 {
@@ -731,7 +729,7 @@ public abstract class UnicodeEncoding extends MultiByteEncoding {
return unfold3;
}
- static final IntArrayHash<CodeList> Hash = initializeUnfold3Hash();
+ static final IntArrayHash<CodeList> Values = initializeUnfold3Hash();
}
private static int extractLength(int packed) {
=====================================
src/org/jcodings/unicode/UnicodeProperties.java
=====================================
@@ -819,4 +819,6 @@ public class UnicodeProperties {
new CodeRangeEntry("insupplementaryprivateuseareab", "CR_In_Supplementary_Private_Use_Area_B"),
new CodeRangeEntry("innoblock", "CR_In_No_Block")
};
+
+ static final int MAX_WORD_LENGTH = 44;
}
View it on GitLab: https://salsa.debian.org/java-team/jcodings/compare/5982972a2801b3660dca1490cf8ac2248742040f...e7f33ac81e5d98470df0c0a3c40000b5d8e7d4f0
--
View it on GitLab: https://salsa.debian.org/java-team/jcodings/compare/5982972a2801b3660dca1490cf8ac2248742040f...e7f33ac81e5d98470df0c0a3c40000b5d8e7d4f0
You're receiving this email because of your account on salsa.debian.org.
-------------- next part --------------
An HTML attachment was scrubbed...
URL: <http://alioth-lists.debian.net/pipermail/pkg-java-commits/attachments/20180917/1305b152/attachment.html>
More information about the pkg-java-commits
mailing list