[Git][java-team/libdsiutils-java][upstream] New upstream version 2.6.7

Pierre Gruet gitlab at salsa.debian.org
Thu Oct 22 21:02:26 BST 2020



Pierre Gruet pushed to branch upstream at Debian Java Maintainers / libdsiutils-java


Commits:
240adfa8 by Pierre Gruet at 2020-10-21T18:53:43+02:00
New upstream version 2.6.7
- - - - -


8 changed files:

- CHANGES
- build.properties
- src/it/unimi/dsi/Util.java
- + src/it/unimi/dsi/big/util/FrontCodedStringBigList.java
- + src/it/unimi/dsi/big/util/PermutedFrontCodedStringBigList.java
- src/it/unimi/dsi/util/PermutedFrontCodedStringList.java
- test/it/unimi/dsi/UtilTest.java
- + test/it/unimi/dsi/big/util/FrontCodedStringBigListTest.java


Changes:

=====================================
CHANGES
=====================================
@@ -1,3 +1,9 @@
+2.6.7
+
+- New methods for composing permutations.
+
+- Big versions of front-coded string lists.
+
 2.6.6
 
 - Fixed SLF4J dependencies (again).


=====================================
build.properties
=====================================
@@ -1,4 +1,4 @@
-version=2.6.6
+version=2.6.7
 
 build.sysclasspath=ignore
 
@@ -19,7 +19,7 @@ instrumented=instr
 j2se.apiurl=https://docs.oracle.com/javase/8/docs/api/
 fastutil.apiurl=http://fastutil.di.unimi.it/docs/
 jsap.apiurl=http://www.martiansoftware.com/jsap/doc/javadoc/
-junit.apiurl=http://junit.sourceforge.net/javadoc_40/
+junit.apiurl=https://junit.org/junit4/javadoc/latest/
 log4j.apiurl=http://logging.apache.org/log4j/1.2/apidocs/
 slf4j.apiurl=http://www.slf4j.org/apidocs/
 commons-configuration.apiurl=http://commons.apache.org/proper/commons-configuration/javadocs/v1.10/apidocs/


=====================================
src/it/unimi/dsi/Util.java
=====================================
@@ -349,6 +349,72 @@ public final class Util {
 		return identity(new int[n]);
 	}
 
+	/**
+	 * Computes the composition of two permutations expressed as arrays of <var>n</var> distinct
+	 * integers in [0 .. <var>n</var>).
+	 *
+	 * <p>
+	 * <strong>Warning</strong>: if the arguments are not permutations, essentially anything can happen.
+	 *
+	 * @param p the first permutation.
+	 * @param q the second permutation.
+	 * @param r an array that will store the resulting permutation: {@code r[i] = q[p[i]]}.
+	 * @return {@code r}.
+	 */
+	public static int[] composePermutations(final int[] p, final int[] q, final int[] r) {
+		final int length = p.length;
+		for (int i = 0; i < length; i++) r[i] = q[p[i]];
+		return r;
+	}
+
+	/**
+	 * Computes the composition of two permutations expressed as arrays of <var>n</var> distinct
+	 * integers in [0 .. <var>n</var>).
+	 *
+	 * <p>
+	 * <strong>Warning</strong>: if the arguments are not permutations, essentially anything can happen.
+	 *
+	 * @param p the first permutation.
+	 * @param q the second permutation.
+	 * @return an array {@code r} containing the resulting permutation: {@code r[i] = q[p[i]]}.
+	 * @see Util#composePermutations(int[], int[], int[])
+	 */
+	public static int[] composePermutations(final int[] p, final int[] q) {
+		final int[] r = p.clone();
+		composePermutations(p, q, r);
+		return r;
+	}
+
+	/**
+	 * Computes in place the composition of two permutations expressed as arrays of <var>n</var>
+	 * distinct integers in [0 .. <var>n</var>).
+	 *
+	 * <p>
+	 * <strong>Warning</strong>: if the arguments are not permutations, essentially anything can happen.
+	 *
+	 * @param p the first permutation.
+	 * @param q the second permutation, which will contain the result at the end.
+	 * @return {@code q}.
+	 * @see Util#composePermutations(int[], int[], int[])
+	 */
+	public static int[] composePermutationsInPlace(final int[] p, final int[] q) {
+		final int length = p.length;
+		for (int i = 0; i < length; i++) {
+			if (q[i] < 0) continue;
+			final int firstIndex = i;
+			final int firstElement = q[i];
+			assert firstElement >= 0;
+			int j = i;
+			while (p[j] != firstIndex) {
+				assert q[p[j]] >= 0;
+				q[j] = -q[p[j]] - 1;
+				j = p[j];
+			}
+			q[j] = -firstElement - 1;
+		}
+		for (int i = 0; i < length; i++) q[i] = -q[i] - 1;
+		return q;
+	}
 
 	/** Computes in place the inverse of a permutation expressed
 	 * as a {@linkplain BigArrays big array} of <var>n</var> distinct long integers in [0 .. <var>n</var>).
@@ -440,4 +506,79 @@ public final class Util {
 	public static long[][] identity(final long n) {
 		return identity(LongBigArrays.newBigArray(n));
 	}
+
+	/**
+	 * Computes the composition of two permutations expressed as {@linkplain BigArrays big arrays} of
+	 * <var>n</var> distinct long integers in [0 .. <var>n</var>).
+	 *
+	 * <p>
+	 * <strong>Warning</strong>: if the arguments are not permutations, essentially anything can happen.
+	 *
+	 * @param p the first permutation.
+	 * @param q the second permutation.
+	 * @param r an array that will store the resulting permutation: {@code r[i] = q[p[i]]}.
+	 * @return {@code r}.
+	 */
+	public static long[][] composePermutations(final long[][] p, final long[][] q, final long[][] r) {
+		final long length = length(p);
+		for (long i = 0; i < length; i++) set(r, i, get(q, get(p, i)));
+		return r;
+	}
+
+	/**
+	 * Computes the composition of two permutations expressed as {@linkplain BigArrays big arrays} of
+	 * <var>n</var> distinct long integers in [0 .. <var>n</var>).
+	 *
+	 * <p>
+	 * <strong>Warning</strong>: if the arguments are not permutations, essentially anything can happen.
+	 *
+	 * @param p the first permutation.
+	 * @param q the second permutation.
+	 * @return an array {@code r} containing the resulting permutation: {@code r[i] = q[p[i]]}.
+	 * @see Util#composePermutations(long[][], long[][], long[][])
+	 */
+	public static long[][] composePermutations(final long[][] p, final long[][] q) {
+		final long[][] r = LongBigArrays.newBigArray(length(p));
+		composePermutations(p, q, r);
+		return r;
+	}
+
+	/**
+	 * Computes in place the composition of two permutations expressed as {@linkplain BigArrays big
+	 * arrays} of <var>n</var> distinct long integers in [0 .. <var>n</var>).
+	 *
+	 * <p>
+	 * <strong>Warning</strong>: if the arguments are not permutations, essentially anything can happen.
+	 *
+	 * @param p the first permutation.
+	 * @param q the second permutation, which will contain the result at the end.
+	 * @return {@code q}.
+	 * @see Util#composePermutations(long[][], long[][], long[][])
+	 */
+	public static long[][] composePermutationsInPlace(final long[][] p, final long[][] q) {
+		final long length = length(p);
+		for (long i = 0; i < length; i++) {
+			if (get(q, i) < 0) continue;
+			final long firstIndex = i;
+			final long firstElement = get(q, i);
+			assert firstElement >= 0;
+			long j = i;
+			while (get(p, j) != firstIndex) {
+				assert get(q, get(p, j)) >= 0;
+				set(q, j, -get(q, get(p, j)) - 1);
+				j = get(p, j);
+			}
+			set(q, j, -firstElement - 1);
+		}
+
+		for (final long[] element : q) {
+		     final long[] t = element;
+		     final int l = t.length;
+		     for(int d = 0; d < l; d++) {
+		          t[d] = -t[d] - 1;
+		     }
+		 }
+		return q;
+	}
+
 }


=====================================
src/it/unimi/dsi/big/util/FrontCodedStringBigList.java
=====================================
@@ -0,0 +1,327 @@
+package it.unimi.dsi.big.util;
+
+/*
+ * DSI utilities
+ *
+ * Copyright (C) 2002-2020 Sebastiano Vigna
+ *
+ *  This library is free software; you can redistribute it and/or modify it
+ *  under the terms of the GNU Lesser General Public License as published by the Free
+ *  Software Foundation; either version 3 of the License, or (at your option)
+ *  any later version.
+ *
+ *  This library is distributed in the hope that it will be useful, but
+ *  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+ *  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public License
+ *  for more details.
+ *
+ *  You should have received a copy of the GNU Lesser General Public License
+ *  along with this program; if not, see <http://www.gnu.org/licenses/>.
+ *
+ */
+
+import java.io.IOException;
+import java.io.InputStreamReader;
+import java.io.Serializable;
+import java.nio.charset.Charset;
+import java.util.Collection;
+import java.util.Iterator;
+import java.util.RandomAccess;
+import java.util.zip.GZIPInputStream;
+
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+import com.google.common.base.Charsets;
+import com.martiansoftware.jsap.FlaggedOption;
+import com.martiansoftware.jsap.JSAP;
+import com.martiansoftware.jsap.JSAPException;
+import com.martiansoftware.jsap.JSAPResult;
+import com.martiansoftware.jsap.Parameter;
+import com.martiansoftware.jsap.SimpleJSAP;
+import com.martiansoftware.jsap.Switch;
+import com.martiansoftware.jsap.UnflaggedOption;
+import com.martiansoftware.jsap.stringparsers.ForNameStringParser;
+import com.martiansoftware.jsap.stringparsers.IntSizeStringParser;
+
+import it.unimi.dsi.fastutil.bytes.ByteArrayFrontCodedBigList;
+import it.unimi.dsi.fastutil.chars.CharArrayFrontCodedBigList;
+import it.unimi.dsi.fastutil.io.BinIO;
+import it.unimi.dsi.fastutil.objects.AbstractObjectBigList;
+import it.unimi.dsi.fastutil.objects.ObjectBigListIterator;
+import it.unimi.dsi.fastutil.objects.ObjectIterator;
+import it.unimi.dsi.io.FastBufferedReader;
+import it.unimi.dsi.io.LineIterator;
+import it.unimi.dsi.lang.MutableString;
+import it.unimi.dsi.logging.ProgressLogger;
+import it.unimi.dsi.util.FrontCodedStringList;
+
+/**
+ * Compact storage of strings using front-coding compression (a.k.a. compression by
+ * prefix omission).
+ *
+ * <P>
+ * This class is functionally identical to {@link FrontCodedStringList}, except for the larger size
+ * allowed.
+ *
+ * @see FrontCodedStringList
+ */
+
+public class FrontCodedStringBigList extends AbstractObjectBigList<MutableString> implements RandomAccess, Serializable {
+
+	public static final long serialVersionUID = 1;
+
+	/** The underlying {@link ByteArrayFrontCodedBigList}, or {@code null}. */
+	protected final ByteArrayFrontCodedBigList byteFrontCodedBigList;
+
+	/** The underlying {@link CharArrayFrontCodedBigList}, or {@code null}. */
+	protected final CharArrayFrontCodedBigList charFrontCodedBigList;
+
+	/** Whether this front-coded list is UTF-8 encoded. */
+	protected final boolean utf8;
+
+	/**
+	 * Creates a new front-coded string list containing the character sequences returned by the given
+	 * iterator.
+	 *
+	 * @param words an iterator returning {@linkplain CharSequence character sequences}.
+	 * @param ratio the desired ratio.
+	 * @param utf8 if true, the strings will be stored as UTF-8 byte arrays.
+	 */
+
+	public FrontCodedStringBigList(final Iterator<? extends CharSequence> words, final int ratio, final boolean utf8) {
+		this.utf8 = utf8;
+		if (utf8) {
+			byteFrontCodedBigList = new ByteArrayFrontCodedBigList(new ObjectIterator<byte[]>() {
+				@Override
+				public boolean hasNext() {
+					return words.hasNext();
+				}
+
+				@SuppressWarnings("null")
+				@Override
+				public byte[] next() {
+					return words.next().toString().getBytes(Charsets.UTF_8);
+				}
+			}, ratio);
+			charFrontCodedBigList = null;
+		} else {
+			charFrontCodedBigList = new CharArrayFrontCodedBigList(new ObjectIterator<char[]>() {
+				@Override
+				public boolean hasNext() {
+					return words.hasNext();
+				}
+
+				@Override
+				public char[] next() {
+					final CharSequence s = words.next();
+					int i = s.length();
+					final char[] a = new char[i];
+					while (i-- != 0) a[i] = s.charAt(i);
+					return a;
+				}
+			}, ratio);
+			byteFrontCodedBigList = null;
+		}
+
+	}
+
+	/**
+	 * Creates a new front-coded string list containing the character sequences contained in the given
+	 * collection.
+	 *
+	 * @param c a collection containing {@linkplain CharSequence character sequences}.
+	 * @param ratio the desired ratio.
+	 * @param utf8 if true, the strings will be stored as UTF-8 byte arrays.
+	 */
+	public FrontCodedStringBigList(final Collection<? extends CharSequence> c, final int ratio, final boolean utf8) {
+		this(c.iterator(), ratio, utf8);
+	}
+
+	/**
+	 * Returns whether this front-coded string list is storing its strings as UTF-8 encoded bytes.
+	 *
+	 * @return true if this front-coded string list is keeping its data as an array of UTF-8 encoded
+	 *         bytes.
+	 */
+	public boolean utf8() {
+		return utf8;
+	}
+
+	/**
+	 * Returns the ratio of the underlying front-coded list.
+	 *
+	 * @return the ratio of the underlying front-coded list.
+	 */
+	public int ratio() {
+		return utf8 ? byteFrontCodedBigList.ratio() : charFrontCodedBigList.ratio();
+	}
+
+	/**
+	 * Returns the element at the specified position in this front-coded as a mutable string.
+	 *
+	 * @param index an index in the list.
+	 * @return a {@link MutableString} that will contain the string at the specified position. The
+	 *         string may be freely modified.
+	 */
+	@Override
+	public MutableString get(final long index) {
+		return MutableString.wrap(utf8 ? byte2Char(byteFrontCodedBigList.getArray(index), null) : charFrontCodedBigList.getArray(index));
+	}
+
+	/**
+	 * Returns the element at the specified position in this front-coded list by storing it in a mutable
+	 * string.
+	 *
+	 * @param index an index in the list.
+	 * @param s a mutable string that will contain the string at the specified position.
+	 */
+	public void get(final long index, final MutableString s) {
+		if (utf8) {
+			final byte[] a = byteFrontCodedBigList.getArray(index);
+			s.length(countUTF8Chars(a));
+			byte2Char(a, s.array());
+		} else {
+			s.length(s.array().length);
+			int res = charFrontCodedBigList.get(index, s.array());
+			if (res < 0) {
+				s.length(s.array().length - res);
+				res = charFrontCodedBigList.get(index, s.array());
+			} else s.length(res);
+		}
+	}
+
+	/*
+	 * The following methods are highly optimized UTF-8 converters exploiting the fact that since it was
+	 * ourselves in the first place who created the coding, we can be sure it is correct.
+	 */
+
+	protected static int countUTF8Chars(final byte[] a) {
+		final int length = a.length;
+		int result = 0, b;
+		for (int i = 0; i < length; i++) {
+			b = (a[i] & 0xFF) >> 4;
+			if (b < 8) result++;
+			else if (b < 14) {
+				result++;
+				i++;
+			} else if (b < 15) {
+				result++;
+				i += 2;
+			} else {
+				// Surrogate pair (yuck!)
+				result += 2;
+				i += 4;
+			}
+		}
+
+		return result;
+	}
+
+	protected static char[] byte2Char(final byte[] a, char[] s) {
+		final int length = a.length;
+		if (s == null) s = new char[countUTF8Chars(a)];
+		int b, c, d, t;
+
+		for (int i = 0, j = 0; i < length; i++) {
+			b = a[i] & 0xFF;
+			t = b >> 4;
+
+			if (t < 8) s[j++] = (char)b;
+			else if (t < 14) {
+				c = a[++i] & 0xFF;
+				if ((c & 0xC0) != 0x80) throw new IllegalStateException("Malformed internal UTF-8 encoding");
+				s[j++] = (char)(((b & 0x1F) << 6) | (c & 0x3F));
+			} else if (t < 15) {
+				c = a[++i] & 0xFF;
+				d = a[++i];
+				if ((c & 0xC0) != 0x80 || (d & 0xC0) != 0x80) throw new IllegalStateException("Malformed internal UTF-8 encoding");
+				s[j++] = (char)(((b & 0x0F) << 12) | ((c & 0x3F) << 6) | ((d & 0x3F) << 0));
+			} else {
+				// Surrogate pair (yuck!)
+				final String surrogatePair = new String(a, i, 4, Charsets.UTF_8);
+				s[j++] = surrogatePair.charAt(0);
+				s[j++] = surrogatePair.charAt(1);
+				i += 3;
+			}
+		}
+
+		return s;
+	}
+
+	@Override
+	public ObjectBigListIterator<MutableString> listIterator(final long k) {
+		return new ObjectBigListIterator<MutableString>() {
+			ObjectBigListIterator<?> i = utf8 ? byteFrontCodedBigList.listIterator(k) : charFrontCodedBigList.listIterator(k);
+
+			@Override
+			public boolean hasNext() {
+				return i.hasNext();
+			}
+
+			@Override
+			public boolean hasPrevious() {
+				return i.hasPrevious();
+			}
+
+			@Override
+			public MutableString next() {
+				return MutableString.wrap(utf8 ? byte2Char((byte[])i.next(), null) : (char[])i.next());
+			}
+
+			@Override
+			public MutableString previous() {
+				return MutableString.wrap(utf8 ? byte2Char((byte[])i.previous(), null) : (char[])i.previous());
+			}
+
+			@Override
+			public long nextIndex() {
+				return i.nextIndex();
+			}
+
+			@Override
+			public long previousIndex() {
+				return i.previousIndex();
+			}
+		};
+	}
+
+	@Override
+	public long size64() {
+		return utf8 ? byteFrontCodedBigList.size64() : charFrontCodedBigList.size64();
+	}
+
+	public static void main(final String[] arg) throws IOException, JSAPException, NoSuchMethodException {
+
+		final SimpleJSAP jsap = new SimpleJSAP(FrontCodedStringBigList.class.getName(), "Builds a front-coded string list reading from standard input a newline-separated ordered list of strings.", new Parameter[] {
+				new FlaggedOption("bufferSize", IntSizeStringParser.getParser(), "64Ki", JSAP.NOT_REQUIRED, 'b', "buffer-size", "The size of the I/O buffer used to read strings."),
+				new FlaggedOption("encoding", ForNameStringParser.getParser(Charset.class), "UTF-8", JSAP.NOT_REQUIRED, 'e', "encoding", "The file encoding."),
+				new FlaggedOption("ratio", IntSizeStringParser.getParser(), "4", JSAP.NOT_REQUIRED, 'r', "ratio", "The compression ratio."),
+				new Switch("utf8", 'u', "utf8", "Store the strings as UTF-8 byte arrays."),
+				new Switch("zipped", 'z', "zipped", "The string list is compressed in gzip format."),
+				new UnflaggedOption("frontCodedList", JSAP.STRING_PARSER, JSAP.NO_DEFAULT, JSAP.REQUIRED, JSAP.NOT_GREEDY, "The filename for the serialised front-coded list.") });
+
+		final JSAPResult jsapResult = jsap.parse(arg);
+		if (jsap.messagePrinted()) return;
+
+		final int bufferSize = jsapResult.getInt("bufferSize");
+		final int ratio = jsapResult.getInt("ratio");
+		final boolean utf8 = jsapResult.getBoolean("utf8");
+		final boolean zipped = jsapResult.getBoolean("zipped");
+		final String listName = jsapResult.getString("frontCodedList");
+		final Charset encoding = (Charset)jsapResult.getObject("encoding");
+
+		final Logger logger = LoggerFactory.getLogger(FrontCodedStringBigList.class);
+		final ProgressLogger pl = new ProgressLogger(logger);
+		pl.displayFreeMemory = true;
+		pl.displayLocalSpeed = true;
+		pl.itemsName = "strings";
+		pl.start("Reading strings...");
+		final FrontCodedStringBigList frontCodedStringBigList = new FrontCodedStringBigList(new LineIterator(new FastBufferedReader(new InputStreamReader(zipped ? new GZIPInputStream(System.in) : System.in, encoding), bufferSize), pl), ratio, utf8);
+		pl.done();
+
+		logger.info("Writing front-coded list to file...");
+		BinIO.storeObject(frontCodedStringBigList, listName);
+		logger.info("Completed.");
+	}
+}


=====================================
src/it/unimi/dsi/big/util/PermutedFrontCodedStringBigList.java
=====================================
@@ -0,0 +1,153 @@
+package it.unimi.dsi.big.util;
+
+import java.io.IOException;
+import java.io.Serializable;
+
+import com.martiansoftware.jsap.JSAP;
+import com.martiansoftware.jsap.JSAPException;
+import com.martiansoftware.jsap.JSAPResult;
+import com.martiansoftware.jsap.Parameter;
+import com.martiansoftware.jsap.SimpleJSAP;
+import com.martiansoftware.jsap.Switch;
+import com.martiansoftware.jsap.UnflaggedOption;
+
+/*
+ * DSI utilities
+ *
+ * Copyright (C) 2002-2020 Sebastiano Vigna
+ *
+ *  This library is free software; you can redistribute it and/or modify it
+ *  under the terms of the GNU Lesser General Public License as published by the Free
+ *  Software Foundation; either version 3 of the License, or (at your option)
+ *  any later version.
+ *
+ *  This library is distributed in the hope that it will be useful, but
+ *  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+ *  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public License
+ *  for more details.
+ *
+ *  You should have received a copy of the GNU Lesser General Public License
+ *  along with this program; if not, see <http://www.gnu.org/licenses/>.
+ *
+ */
+
+import it.unimi.dsi.Util;
+import it.unimi.dsi.fastutil.BigArrays;
+import it.unimi.dsi.fastutil.io.BinIO;
+import it.unimi.dsi.fastutil.io.TextIO;
+import it.unimi.dsi.fastutil.longs.LongBidirectionalIterator;
+import it.unimi.dsi.fastutil.longs.LongIterators;
+import it.unimi.dsi.fastutil.objects.AbstractObjectBigList;
+import it.unimi.dsi.fastutil.objects.ObjectBigListIterator;
+import it.unimi.dsi.lang.MutableString;
+import it.unimi.dsi.util.FrontCodedStringList;
+import it.unimi.dsi.util.PermutedFrontCodedStringList;
+
+/**
+ * A {@link it.unimi.dsi.big.util.FrontCodedStringBigList} whose indices are permuted.
+ *
+ * <P>
+ * This class is functionally identical to {@link PermutedFrontCodedStringList}, except for the
+ * larger size allowed.
+ *
+ * @see FrontCodedStringList
+ * @see PermutedFrontCodedStringList
+ */
+
+public class PermutedFrontCodedStringBigList extends AbstractObjectBigList<CharSequence> implements Serializable {
+
+	public static final long serialVersionUID = 1;
+
+	/** The underlying front-coded string list. */
+	final protected FrontCodedStringBigList frontCodedStringBigList;
+	/** The permutation. */
+	final protected long[][] permutation;
+
+	/**
+	 * Creates a new permuted front-coded string list using a given front-coded string list and
+	 * permutation.
+	 *
+	 * @param frontCodedStringBihList the underlying front-coded string big list.
+	 * @param permutation the underlying permutation (a {@linkplain BigArrays big array} of longs).
+	 */
+
+	public PermutedFrontCodedStringBigList(final FrontCodedStringBigList frontCodedStringBihList, final long[][] permutation) {
+		if (frontCodedStringBihList.size64() != BigArrays.length(permutation)) throw new IllegalArgumentException("The front-coded string big list contains " + frontCodedStringBihList.size64() + " strings, but the permutation is on " + BigArrays.length(permutation) + " elements.");
+		this.frontCodedStringBigList = frontCodedStringBihList;
+		this.permutation = permutation;
+	}
+
+	@Override
+	public MutableString get(final long index) {
+		return frontCodedStringBigList.get(BigArrays.get(permutation, index));
+	}
+
+	/** Returns the element at the specified position in this front-coded list by storing it in a mutable string.
+	 *
+	 * @param index an index in the list.
+	 * @param s a mutable string that will contain the string at the specified position.
+	 */
+	public void get(final long index, final MutableString s) {
+		frontCodedStringBigList.get(BigArrays.get(permutation, index), s);
+	}
+
+	@Override
+	public long size64() {
+		return frontCodedStringBigList.size64();
+	}
+
+	@Override
+	public ObjectBigListIterator<CharSequence> listIterator(final long k) {
+		return new ObjectBigListIterator<CharSequence>() {
+			final LongBidirectionalIterator i = LongIterators.fromTo(0, frontCodedStringBigList.size64());
+			long p = 0;
+
+			@Override
+			public boolean hasNext() { return i.hasNext(); }
+			@Override
+			public boolean hasPrevious() { return i.hasPrevious(); }
+			@Override
+			public CharSequence next() {
+				p++;
+				return frontCodedStringBigList.get(BigArrays.get(permutation, i.nextLong()));
+			}
+			@Override
+			public CharSequence previous() {
+				p--;
+				return frontCodedStringBigList.get(BigArrays.get(permutation, i.previousLong()));
+			}
+			@Override
+			public long nextIndex() {
+				return p;
+			}
+			@Override
+			public long previousIndex() {
+				return p - 1;
+			}
+		};
+	}
+
+	public static void main(final String[] arg) throws IOException, ClassNotFoundException, JSAPException {
+
+		final SimpleJSAP jsap = new SimpleJSAP(PermutedFrontCodedStringList.class.getName(), "Builds a permuted front-coded list of strings using a given front-coded string list and a permutation (either in text or binary format).",
+				new Parameter[] {
+						new Switch("invert", 'i', "invert", "Invert permutation before creating the permuted list."),
+						new Switch("text", 't', "text", "The permutation is a text file."),
+						new UnflaggedOption("list", JSAP.STRING_PARSER, JSAP.REQUIRED, "A front-coded string big list."),
+						new UnflaggedOption("permutation", JSAP.STRING_PARSER, JSAP.REQUIRED, "A permutation for the indices of the list (longs in DataInput format, unless you specify --text)."),
+						new UnflaggedOption("permutedList", JSAP.STRING_PARSER, JSAP.REQUIRED, "A the filename for the resulting permuted big list."),
+			});
+
+		final JSAPResult jsapResult = jsap.parse(arg);
+		if (jsap.messagePrinted()) return;
+
+		final String permutationFile = jsapResult.getString("permutation");
+		final long[][] permutation = jsapResult.userSpecified("text") ? LongIterators.unwrapBig(TextIO.asLongIterator(permutationFile)) : BinIO.loadLongsBig(permutationFile);
+		if (jsapResult.getBoolean("invert")) Util.invertPermutationInPlace(permutation);
+
+		BinIO.storeObject(
+				new PermutedFrontCodedStringBigList((FrontCodedStringBigList)BinIO.loadObject(jsapResult.getString("list")), permutation),
+				jsapResult.getString("permutedList")
+		);
+	}
+}


=====================================
src/it/unimi/dsi/util/PermutedFrontCodedStringList.java
=====================================
@@ -94,7 +94,7 @@ public class PermutedFrontCodedStringList extends AbstractObjectList<CharSequenc
 	}
 
 	@Override
-	public CharSequence get(final int index) {
+	public MutableString get(final int index) {
 		return frontCodedStringList.get(permutation[index]);
 	}
 
@@ -133,16 +133,16 @@ public class PermutedFrontCodedStringList extends AbstractObjectList<CharSequenc
 
 	public static void main(final String[] arg) throws IOException, ClassNotFoundException, JSAPException {
 
-		SimpleJSAP jsap = new SimpleJSAP(PermutedFrontCodedStringList.class.getName(), "Builds a permuted front-coded list of strings using a given front-coded string list and a permutation (either in text or binary format).",
+		final SimpleJSAP jsap = new SimpleJSAP(PermutedFrontCodedStringList.class.getName(), "Builds a permuted front-coded list of strings using a given front-coded string list and a permutation (either in text or binary format).",
 				new Parameter[] {
-					new Switch("invert", 'i', "invert", "Invert permutation before creating the permuted list."),
-					new Switch("text", 't', "text", "The permutation is a text file."),
-					new UnflaggedOption("list", JSAP.STRING_PARSER, JSAP.REQUIRED, "A front-coded string list."),
-					new UnflaggedOption("permutation", JSAP.STRING_PARSER, JSAP.REQUIRED, "A permutation for the indices of the list (in DataInput format, unless you specify --text)."),
-					new UnflaggedOption("permutedList", JSAP.STRING_PARSER, JSAP.REQUIRED, "A the filename for the resulting permuted list."),
+						new Switch("invert", 'i', "invert", "Invert permutation before creating the permuted list."),
+						new Switch("text", 't', "text", "The permutation is a text file."),
+						new UnflaggedOption("list", JSAP.STRING_PARSER, JSAP.REQUIRED, "A front-coded string list."),
+						new UnflaggedOption("permutation", JSAP.STRING_PARSER, JSAP.REQUIRED, "A permutation for the indices of the list (ints in DataInput format, unless you specify --text)."),
+						new UnflaggedOption("permutedList", JSAP.STRING_PARSER, JSAP.REQUIRED, "A the filename for the resulting permuted list."),
 			});
 
-		JSAPResult jsapResult = jsap.parse(arg);
+		final JSAPResult jsapResult = jsap.parse(arg);
 		if (jsap.messagePrinted()) return;
 
 		final String permutationFile = jsapResult.getString("permutation");


=====================================
test/it/unimi/dsi/UtilTest.java
=====================================
@@ -24,15 +24,17 @@ package it.unimi.dsi;
 import static org.junit.Assert.assertArrayEquals;
 import static org.junit.Assert.assertEquals;
 import static org.junit.Assert.assertTrue;
-import it.unimi.dsi.fastutil.ints.IntArrayList;
-import it.unimi.dsi.fastutil.ints.IntArrays;
-import it.unimi.dsi.fastutil.longs.LongBigArrays;
-import it.unimi.dsi.util.SplitMix64Random;
 
 import java.util.Collections;
 
 import org.junit.Test;
 
+import it.unimi.dsi.fastutil.ints.IntArrayList;
+import it.unimi.dsi.fastutil.ints.IntArrays;
+import it.unimi.dsi.fastutil.longs.LongBigArrays;
+import it.unimi.dsi.util.SplitMix64Random;
+import it.unimi.dsi.util.XoRoShiRo128PlusPlusRandom;
+
 public class UtilTest {
 
 	@Test
@@ -43,7 +45,7 @@ public class UtilTest {
 		try {
 			Util.formatBinarySize(6);
 		}
-		catch(IllegalArgumentException e) {
+		catch(final IllegalArgumentException e) {
 			ok = true;
 		}
 		assertTrue(ok);
@@ -128,4 +130,46 @@ public class UtilTest {
 			assertArrayEquals(q, p);
 		}
 	}
+
+	@Test
+	public void testComposePermutation() {
+		final XoRoShiRo128PlusPlusRandom r = new XoRoShiRo128PlusPlusRandom(0);
+		for (final int s : new int[] { 10, 100, 1000 }) {
+			final int[] identity = Util.identity(s);
+			final int[] shuffle = Util.identity(s);
+			IntArrays.shuffle(shuffle, r);
+			assertArrayEquals(shuffle, Util.composePermutations(identity, shuffle));
+			assertArrayEquals(shuffle, Util.composePermutations(shuffle, identity));
+			assertArrayEquals(identity, Util.composePermutations(shuffle, Util.invertPermutation(shuffle)));
+			assertArrayEquals(identity, Util.composePermutations(Util.invertPermutation(shuffle), shuffle));
+
+			final int[] shuffle2 = Util.identity(s);
+			IntArrays.shuffle(shuffle2, r);
+
+			final int[] result = Util.composePermutations(shuffle, shuffle2);
+			Util.composePermutationsInPlace(shuffle, shuffle2);
+			assertArrayEquals(result, shuffle2);
+		}
+	}
+
+	@Test
+	public void testComposePermutationBig() {
+		final XoRoShiRo128PlusPlusRandom r = new XoRoShiRo128PlusPlusRandom(0);
+		for (final long s : new int[] { 10, 100, 1000 }) {
+			final long[][] identity = Util.identity(s);
+			final long[][] shuffle = Util.identity(s);
+			LongBigArrays.shuffle(shuffle, r);
+			assertArrayEquals(shuffle, Util.composePermutations(identity, shuffle));
+			assertArrayEquals(shuffle, Util.composePermutations(shuffle, identity));
+			assertArrayEquals(identity, Util.composePermutations(shuffle, Util.invertPermutation(shuffle)));
+			assertArrayEquals(identity, Util.composePermutations(Util.invertPermutation(shuffle), shuffle));
+
+			final long[][] shuffle2 = Util.identity(s);
+			LongBigArrays.shuffle(shuffle2, r);
+
+			final long[][] result = Util.composePermutations(shuffle, shuffle2);
+			Util.composePermutationsInPlace(shuffle, shuffle2);
+			assertArrayEquals(result, shuffle2);
+		}
+	}
 }


=====================================
test/it/unimi/dsi/big/util/FrontCodedStringBigListTest.java
=====================================
@@ -0,0 +1,100 @@
+package it.unimi.dsi.big.util;
+
+/*
+ * DSI utilities
+ *
+ * Copyright (C) 2010-2020 Sebastiano Vigna
+ *
+ *  This library is free software; you can redistribute it and/or modify it
+ *  under the terms of the GNU Lesser General Public License as published by the Free
+ *  Software Foundation; either version 3 of the License, or (at your option)
+ *  any later version.
+ *
+ *  This library is distributed in the hope that it will be useful, but
+ *  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+ *  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public License
+ *  for more details.
+ *
+ *  You should have received a copy of the GNU Lesser General Public License
+ *  along with this program; if not, see <http://www.gnu.org/licenses/>.
+ *
+ */
+
+import static org.junit.Assert.assertEquals;
+
+import java.nio.charset.StandardCharsets;
+import java.util.Arrays;
+import java.util.Collections;
+import java.util.Iterator;
+import java.util.List;
+import java.util.SplittableRandom;
+
+import org.junit.Test;
+
+import it.unimi.dsi.lang.MutableString;
+
+public class FrontCodedStringBigListTest {
+
+	@Test
+	public void testLargeSet() {
+		final List<String> c = Arrays.asList(TernaryIntervalSearchTreeTest.WORDS.clone());
+		final MutableString s = new MutableString();
+		for(int p = 0; p < 2; p++) {
+			for(final boolean utf8: new boolean[] { false, true })
+				for(int ratio = 1; ratio < 8; ratio++) {
+					final FrontCodedStringBigList fcl = new FrontCodedStringBigList(c.iterator(), ratio, utf8);
+					for (int i = 0; i < fcl.size64(); i++) {
+						assertEquals(Integer.toString(i), c.get(i), fcl.get(i).toString());
+						fcl.get(i, s);
+						assertEquals(Integer.toString(i), c.get(i), s.toString());
+					}
+				}
+
+			Collections.sort(c);
+		}
+	}
+
+	@Test
+	public void testSurrogatePairs() {
+		final List<String> c = Arrays.asList(new String[] { "a", "AB\uE000AB", "\uD800\uDF02", "\uD800\uDF03", "b" });
+		for(final boolean utf8: new boolean[] { false, true })
+			for(int ratio = 1; ratio < 8; ratio++) {
+				final FrontCodedStringBigList fcl = new FrontCodedStringBigList(c.iterator(), ratio, utf8);
+				for (int i = 0; i < fcl.size64(); i++) {
+					assertEquals(Integer.toString(i), c.get(i), fcl.get(i).toString());
+				}
+			}
+	}
+
+	// @Ignore("Needs a lot of memory")
+	@Test
+	public void testbig() {
+		final long size = (1L << 31) + 10000;
+		final FrontCodedStringBigList byteArrayFrontCodedBigList = new FrontCodedStringBigList(new Iterator<String>() {
+			SplittableRandom r = new SplittableRandom(0);
+			long i = 0;
+
+			@Override
+			public boolean hasNext() {
+				return i < size;
+			}
+
+			@Override
+			public String next() {
+				i++;
+				return new String(new byte[] { (byte)r.nextLong() }, StandardCharsets.ISO_8859_1);
+			}
+		}, 10, true);
+		SplittableRandom r = new SplittableRandom(0);
+		for (long i = 0; i < size; i++) {
+			assertEquals(new String(new byte[] { (byte)r.nextLong() }, StandardCharsets.ISO_8859_1), byteArrayFrontCodedBigList.get(i));
+		}
+		r = new SplittableRandom(0);
+		final MutableString s = new MutableString();
+		for (long i = 0; i < size; i++) {
+			byteArrayFrontCodedBigList.get(i, s);
+			assertEquals(new String(new byte[] { (byte)r.nextLong() }, StandardCharsets.ISO_8859_1), s);
+		}
+	}
+
+}



View it on GitLab: https://salsa.debian.org/java-team/libdsiutils-java/-/commit/240adfa801e9e99460ba5f919960166b879d2b1e

-- 
View it on GitLab: https://salsa.debian.org/java-team/libdsiutils-java/-/commit/240adfa801e9e99460ba5f919960166b879d2b1e
You're receiving this email because of your account on salsa.debian.org.


-------------- next part --------------
An HTML attachment was scrubbed...
URL: <http://alioth-lists.debian.net/pipermail/pkg-java-commits/attachments/20201022/c742ea82/attachment.html>


More information about the pkg-java-commits mailing list