[Git][debian-gis-team/mkgmap][upstream] New upstream version 0.0.0+svn4821

Bas Couwenberg (@sebastic) gitlab at salsa.debian.org
Wed Dec 1 06:54:44 GMT 2021



Bas Couwenberg pushed to branch upstream at Debian GIS Project / mkgmap


Commits:
7d50ef82 by Bas Couwenberg at 2021-12-01T07:46:48+01:00
New upstream version 0.0.0+svn4821
- - - - -


10 changed files:

- doc/options.txt
- resources/mkgmap-version.properties
- src/uk/me/parabola/imgfmt/app/labelenc/AnyCharsetEncoder.java
- src/uk/me/parabola/imgfmt/app/labelenc/Format6Encoder.java
- src/uk/me/parabola/imgfmt/app/labelenc/Utf8Encoder.java
- src/uk/me/parabola/imgfmt/app/mdr/Mdr11.java
- src/uk/me/parabola/imgfmt/app/mdr/Mdr25.java
- src/uk/me/parabola/imgfmt/app/srt/Sort.java
- src/uk/me/parabola/mkgmap/reader/osm/POIGeneratorHook.java
- test/uk/me/parabola/imgfmt/app/labelenc/CodeFunctionsTest.java


Changes:

=====================================
doc/options.txt
=====================================
@@ -74,8 +74,8 @@ The directory name is --family-name with extension .gmap.
 
 ;--gmapi-minimal[=<include-pattern>]
 : 	Special option for map providers to reduce disk writes when updating. Works like
---gmapi but does not write Product data for input files which are provided as
-*.img. It is assumed that the content of those files wasn't changed and thus
+--gmapi but does not write Product data for input files which are provided
+as *.img. It is assumed that the content of those files wasn't changed and thus
 doesn't need a rewrite. The optional include-pattern is a regular expression
 which can be used to specify *.img files for which a write should be forced. The
 pattern is used on the full path to the input file. The global index files and


=====================================
resources/mkgmap-version.properties
=====================================
@@ -1,2 +1,2 @@
-svn.version: 4810
-build.timestamp: 2021-10-25T08:27:43+0100
+svn.version: 4821
+build.timestamp: 2021-11-30T13:17:18+0000


=====================================
src/uk/me/parabola/imgfmt/app/labelenc/AnyCharsetEncoder.java
=====================================
@@ -22,6 +22,7 @@ import java.nio.charset.Charset;
 import java.nio.charset.CharsetEncoder;
 import java.nio.charset.CoderResult;
 import java.nio.charset.CodingErrorAction;
+import java.text.Normalizer;
 import java.util.Arrays;
 import java.util.Locale;
 
@@ -59,6 +60,7 @@ public class AnyCharsetEncoder extends BaseEncoder implements CharacterEncoder {
 			ucText = text.toUpperCase(Locale.ENGLISH);
 		else
 			ucText = text;
+		ucText = Normalizer.normalize(ucText, Normalizer.Form.NFC);
 
 		// Allocate a buffer for the encoded text. This will be large enough in almost all cases,
 		// but the code below allocates more space if necessary.
@@ -74,9 +76,9 @@ public class AnyCharsetEncoder extends BaseEncoder implements CharacterEncoder {
 				// There is a character that cannot be represented in the target code page.
 				// Read the character(s), transliterate them, and add them to the output.
 				// We then continue onward with the rest of the string.
-				String s;
+				String s0;
 				if (result.length() == 1) {
-					s = String.valueOf(charBuffer.get());
+					s0 = String.valueOf(charBuffer.get());
 				} else {
 					// Don't know under what circumstances this will be called and may not be the
 					// correct thing to do when it does happen.
@@ -84,17 +86,23 @@ public class AnyCharsetEncoder extends BaseEncoder implements CharacterEncoder {
 					for (int i = 0; i < result.length(); i++)
 						sb.append(charBuffer.get());
 
-					s = sb.toString();
+					s0 = sb.toString();
 				}
 
-				s = transliterator.transliterate(s);
+				String s = transliterator.transliterate(s0);
 
 				// Make sure that there is enough space for the transliterated string
 				while (outBuf.limit() < outBuf.position() + s.length())
 					outBuf = reallocBuf(outBuf);
 
-				for (int i = 0; i < s.length(); i++)
-					outBuf.put((byte) s.charAt(i));
+				if (s.equals(s0)) {
+					// string is still unmappable
+					outBuf.put(encoder.replacement()); //typically '?'
+				} else {
+					for (int i = 0; i < s.length(); i++) {
+						outBuf.put((byte) s.charAt(i));
+					}
+				}
 
 			} else if (result == CoderResult.OVERFLOW) {
 				// Ran out of space in the output


=====================================
src/uk/me/parabola/imgfmt/app/labelenc/Format6Encoder.java
=====================================
@@ -16,6 +16,7 @@
  */
 package uk.me.parabola.imgfmt.app.labelenc;
 
+import java.text.Normalizer;
 import java.util.Locale;
 
 /**
@@ -60,8 +61,8 @@ public class Format6Encoder extends BaseEncoder implements CharacterEncoder {
 	public EncodedText encodeText(String text) {
 		if (text == null || text.isEmpty())
 			return NO_TEXT;
-
-		String s = transliterator.transliterate(text).toUpperCase(Locale.ENGLISH);
+		String normalisedText = Normalizer.normalize(text, Normalizer.Form.NFC);
+		String s = transliterator.transliterate(normalisedText).toUpperCase(Locale.ENGLISH);
 
 		// Allocate more than enough space on average for the label.
 		// if you overdo it then it will waste a lot of space , but


=====================================
src/uk/me/parabola/imgfmt/app/labelenc/Utf8Encoder.java
=====================================
@@ -18,6 +18,7 @@ package uk.me.parabola.imgfmt.app.labelenc;
 
 import java.util.Locale;
 import java.nio.charset.StandardCharsets;
+import java.text.Normalizer;
 
 /**
  * Encoder for labels in utf-8.
@@ -35,7 +36,8 @@ public class Utf8Encoder extends BaseEncoder implements CharacterEncoder {
 			uctext = text.toUpperCase(Locale.ENGLISH);
 		else
 			uctext = text;
-
+		uctext = Normalizer.normalize(uctext, Normalizer.Form.NFC);
+		
 		EncodedText et;
 		byte[] buf = uctext.getBytes(StandardCharsets.UTF_8);
 		byte[] res = new byte[buf.length + 1];


=====================================
src/uk/me/parabola/imgfmt/app/mdr/Mdr11.java
=====================================
@@ -13,13 +13,14 @@
 
 package uk.me.parabola.imgfmt.app.mdr;
 
+import java.text.Collator;
 import java.util.ArrayList;
-import java.util.Arrays;
 import java.util.List;
 import java.util.Map;
 
 import uk.me.parabola.imgfmt.app.ImgFileWriter;
 import uk.me.parabola.imgfmt.app.srt.Sort;
+import uk.me.parabola.imgfmt.app.srt.Sort.SrtCollator;
 import uk.me.parabola.imgfmt.app.srt.SortKey;
 import uk.me.parabola.imgfmt.app.trergn.Point;
 
@@ -131,6 +132,7 @@ public class Mdr11 extends MdrMapSection {
 
 	public List<Mdr8Record> getIndex() {
 		List<Mdr8Record> list = new ArrayList<>();
+		Sort.SrtCollator collator = (SrtCollator) getConfig().getSort().getCollator();
 		for (int number = 1; number <= pois.size(); number += 10240) {
 			char[] prefix = getPrefixForRecord(number);
 
@@ -138,7 +140,9 @@ public class Mdr11 extends MdrMapSection {
 			int rec = number;
 			while (rec > 1) {
 				char[] p = getPrefixForRecord(rec);
-				if (!Arrays.equals(p, prefix)) {
+				int cmp = collator.compareOneStrengthWithLength(prefix, p, Collator.PRIMARY,
+						MdrUtils.POI_INDEX_PREFIX_LEN);
+				if (cmp != 0) {
 					rec++;
 					break;
 				}


=====================================
src/uk/me/parabola/imgfmt/app/mdr/Mdr25.java
=====================================
@@ -52,9 +52,9 @@ public class Mdr25 extends MdrSection {
 		for (SortKey<Mdr5Record> key : keys) {
 			Mdr5Record city = key.getObject();
 
-			if (lastCity == null ||
-					(!city.getName().equals(lastCity.getName()) || !(city.getRegionName().equals(lastCity.getRegionName()))))
-			{
+			if (lastCity == null || !city.getName().equals(lastCity.getName())
+					|| !city.getRegionName().equals(lastCity.getRegionName())
+					|| !city.getCountryName().equals(lastCity.getCountryName())) {
 				record++;
 
 				// Record in the 29 index if there is one for this record


=====================================
src/uk/me/parabola/imgfmt/app/srt/Sort.java
=====================================
@@ -72,6 +72,8 @@ public class Sort {
 	private int headerLen = SRTHeader.HEADER_LEN; 
 	private int header3Len = -1;
 
+	private int maxPrimary = 0;  // max seen while loading resource/sort/cp*.txt file. == 10690 for cp65001.txt on 18Oct2021
+
 	public Sort() {
 		pages[0] = new Page();
 	}
@@ -80,9 +82,11 @@ public class Sort {
 		ensurePage(ch >>> 8);
 		if (getPrimary(ch) != 0)
 			throw new ExitException(String.format("Repeated primary index 0x%x", ch & 0xff));
-		setPrimary (ch, primary);
+		if (primary > maxPrimary)
+			maxPrimary = primary;
+		setPrimary(ch, primary);
 		setSecondary(ch, secondary);
-		setTertiary( ch, tertiary);
+		setTertiary(ch, tertiary);
 
 		setFlags(ch, flags);
 		int numExp = (flags >> 4) & 0xf;
@@ -393,6 +397,13 @@ public class Sort {
 		return fillKey(Collator.TERTIARY, bVal, key, start);
 	}
 
+	private static int writeSort(int strength, int pos, byte[] outKey, int start) {
+		if (strength == Collator.PRIMARY)
+			outKey[start++] = (byte) ((pos >> 8) & 0xff); // for 2 byte charsets
+		outKey[start++] = (byte) (pos & 0xff);
+		return start;
+	}
+
 	/**
 	 * Fill in the output key for a given strength.
 	 *
@@ -405,8 +416,11 @@ public class Sort {
 		int index = start;
 		for (char c : input) {
 
-			if (!hasPage(c >>> 8))
+			if (!hasPage(c >>> 8)) {
+				if (isMulti() && type == Collator.PRIMARY) // attempt to avoid conflict with defined sorts. Be consistent with SrtCollator
+					index = writeSort(type, c + maxPrimary, outKey, index);
 				continue;
+			}
 
 			int exp = (getFlags(c) >> 4) & 0xf;
 			if (exp == 0) {
@@ -416,11 +430,7 @@ public class Sort {
 				int idx = getPrimary(c);
 				for (int i = idx - 1; i < idx + exp; i++) {
 					int pos = expansions.get(i).getPosition(type);
-					if (pos != 0) {
-						if (type == Collator.PRIMARY)
-							outKey[index++] = (byte) ((pos >>> 8) & 0xff);
-						outKey[index++] = (byte) pos;
-					}
+					index = writeSort(type, pos, outKey, index);
 				}
 			}
 		}
@@ -681,11 +691,8 @@ public class Sort {
 		 */
 		public int writePos(int strength, int ch, byte[] outKey, int start) {
 			int pos = getPos(strength, ch);
-			if (pos != 0) {
-				if (strength == Collator.PRIMARY)
-					outKey[start++] = (byte) ((pos >> 8) & 0xff); // for 2 byte charsets
-				outKey[start++] = (byte) (pos & 0xff);
-			}
+			if (pos != 0)
+				start = writeSort(strength, pos, outKey, start);
 			return start;
 		}
 	}
@@ -851,8 +858,10 @@ public class Sort {
 						}
 
 						// Get the first non-ignorable at this level
-						int c = chars[pos++ & 0xff];
+						int c = chars[pos++];
 						if (!hasPage(c >>> 8)) {
+							if (isMulti() && type == Collator.PRIMARY) // order by char itself if no sortpos
+								return (c + maxPrimary) & 0xffff; // attempt to avoid conflict with defined sorts. Be consistent with fillKey
 							next = 0;
 							continue;
 						}


=====================================
src/uk/me/parabola/mkgmap/reader/osm/POIGeneratorHook.java
=====================================
@@ -452,7 +452,7 @@ public class POIGeneratorHook implements OsmReadingHooks {
 								// boundary relations may have a node with role admin_centre, if yes, use the 
 								// location of it
 								String pName = nameFinder.getName(el);
-								if (relName.equals(pName)){
+								if (relName.equals(pName) || pName == null){
 									adminCentre = (Node) el;
 									if (log.isDebugEnabled())
 										log.debug("using admin_centre node as location for POI for rel",r.getId(),relName,"at",((Node) el).getLocation());
@@ -460,12 +460,12 @@ public class POIGeneratorHook implements OsmReadingHooks {
 							}
 						} else if ("label".equals(role)){
 							String label = nameFinder.getName(el);
-							if (relName.equals(label)){
+							if (relName.equals(label) || label == null){
 								labelPOI = (Node) el;
 								log.debug("using label node as location for POI for rel", r.getId(), relName, "at", ((Node) el).getLocation());
 								break;
 							} else {
-								log.warn("rel",r.toBrowseURL(),",node with role label is ignored because it has a different name");
+								log.warn("role label", el.toBrowseURL(), label, "is ignored because it has a different name than" ,r.toBrowseURL(), relName);
 							}
 						}
 					}


=====================================
test/uk/me/parabola/imgfmt/app/labelenc/CodeFunctionsTest.java
=====================================
@@ -58,17 +58,18 @@ public class CodeFunctionsTest {
 		CodeFunctions functions = CodeFunctions.createEncoderForLBL(6, 0);
 
 		CharacterEncoder encoder = functions.getEncoder();
-		Transliterator transliterator = new TableTransliterator("ascii");
-		EncodedText text = encoder.encodeText(transliterator.transliterate("Körnerstraße, Velkomezeříčská, Skólavörðustigur"));
+		// Twülpstedt contains u + "COMBINING DIAERESIS" (0x75 + 0x308)
+		EncodedText text = encoder.encodeText("Körnerstraße, Twülpstedt, Velkomezeříčská, Skólavörðustigur");
 
 		CharacterDecoder decoder = functions.getDecoder();
 		byte[] ctext = text.getCtext();
-		for (int i = 0; i < text.getLength(); i++) {
-			decoder.addByte(ctext[i]);
+		boolean finished = false;
+		int i = 0;
+		while (!finished) {
+			finished = decoder.addByte(ctext[i++]);
 		}
-		decoder.addByte(0xff);
 		String result = decoder.getText().getText();
-		assertEquals("transliterated text", "KORNERSTRASSE, VELKOMEZERICSKA, SKOLAVORDUSTIGUR", result);
+		assertEquals("transliterated text", "KORNERSTRASSE, TWULPSTEDT, VELKOMEZERICSKA, SKOLAVORDUSTIGUR", result);
 	}
 
 	/**
@@ -80,17 +81,20 @@ public class CodeFunctionsTest {
 		CodeFunctions functions = CodeFunctions.createEncoderForLBL("latin1");
 
 		CharacterEncoder encoder = functions.getEncoder();
-		Transliterator transliterator = new TableTransliterator("latin1");
-		EncodedText text = encoder.encodeText(transliterator.transliterate("Körnerstraße, Velkomezeříčská, Skólavörðustigur"));
+		// Twülpstedt contains u + "COMBINING DIAERESIS" (0x75 + 0x308)
+		EncodedText text = encoder.encodeText("Körnerstraße, Twülpstedt, Velkomezeříčská, Skólavörðustigur");
 
 		CharacterDecoder decoder = functions.getDecoder();
 		byte[] ctext = text.getCtext();
-		for (int i = 0; i < text.getLength(); i++) {
-			decoder.addByte(ctext[i]);
+		boolean finished = false;
+		int i = 0;
+		while (!finished) {
+			finished = decoder.addByte(ctext[i++]);
 		}
 
 		String result = decoder.getText().getText();
-		assertEquals("transliterated text", "Körnerstraße, Velkomezerícská, Skólavörðustigur", result);
+		// Twülpstedt now contains LATIN SMALL LETTER U WITH DIAERESIS (u+00fc)
+		assertEquals("transliterated text", "Körnerstraße, Twülpstedt, Velkomezerícská, Skólavörðustigur", result);
 	}
 
 	/**
@@ -122,4 +126,6 @@ public class CodeFunctionsTest {
 			assertEquals("character", i, text.getCtext()[i-1] & 0xff);
 		}
 	}
+	
+	
 }



View it on GitLab: https://salsa.debian.org/debian-gis-team/mkgmap/-/commit/7d50ef8207da080160e716b9dfd35ad849bc5511

-- 
View it on GitLab: https://salsa.debian.org/debian-gis-team/mkgmap/-/commit/7d50ef8207da080160e716b9dfd35ad849bc5511
You're receiving this email because of your account on salsa.debian.org.


-------------- next part --------------
An HTML attachment was scrubbed...
URL: <http://alioth-lists.debian.net/pipermail/pkg-grass-devel/attachments/20211201/b029eb28/attachment-0001.htm>


More information about the Pkg-grass-devel mailing list