[Git][java-team/libdsiutils-java][upstream] New upstream version 2.6.7
Pierre Gruet
gitlab at salsa.debian.org
Thu Oct 22 21:02:26 BST 2020
Pierre Gruet pushed to branch upstream at Debian Java Maintainers / libdsiutils-java
Commits:
240adfa8 by Pierre Gruet at 2020-10-21T18:53:43+02:00
New upstream version 2.6.7
- - - - -
8 changed files:
- CHANGES
- build.properties
- src/it/unimi/dsi/Util.java
- + src/it/unimi/dsi/big/util/FrontCodedStringBigList.java
- + src/it/unimi/dsi/big/util/PermutedFrontCodedStringBigList.java
- src/it/unimi/dsi/util/PermutedFrontCodedStringList.java
- test/it/unimi/dsi/UtilTest.java
- + test/it/unimi/dsi/big/util/FrontCodedStringBigListTest.java
Changes:
=====================================
CHANGES
=====================================
@@ -1,3 +1,9 @@
+2.6.7
+
+- New methods for composing permutations.
+
+- Big versions of front-coded string lists.
+
2.6.6
- Fixed SLF4J dependencies (again).
=====================================
build.properties
=====================================
@@ -1,4 +1,4 @@
-version=2.6.6
+version=2.6.7
build.sysclasspath=ignore
@@ -19,7 +19,7 @@ instrumented=instr
j2se.apiurl=https://docs.oracle.com/javase/8/docs/api/
fastutil.apiurl=http://fastutil.di.unimi.it/docs/
jsap.apiurl=http://www.martiansoftware.com/jsap/doc/javadoc/
-junit.apiurl=http://junit.sourceforge.net/javadoc_40/
+junit.apiurl=https://junit.org/junit4/javadoc/latest/
log4j.apiurl=http://logging.apache.org/log4j/1.2/apidocs/
slf4j.apiurl=http://www.slf4j.org/apidocs/
commons-configuration.apiurl=http://commons.apache.org/proper/commons-configuration/javadocs/v1.10/apidocs/
=====================================
src/it/unimi/dsi/Util.java
=====================================
@@ -349,6 +349,72 @@ public final class Util {
return identity(new int[n]);
}
+ /**
+ * Computes the composition of two permutations expressed as arrays of <var>n</var> distinct
+ * integers in [0 .. <var>n</var>).
+ *
+ * <p>
+ * <strong>Warning</strong>: if the arguments are not permutations, essentially anything can happen.
+ *
+ * @param p the first permutation.
+ * @param q the second permutation.
+ * @param r an array that will store the resulting permutation: {@code r[i] = q[p[i]]}.
+ * @return {@code r}.
+ */
+ public static int[] composePermutations(final int[] p, final int[] q, final int[] r) {
+ final int length = p.length;
+ for (int i = 0; i < length; i++) r[i] = q[p[i]];
+ return r;
+ }
+
+ /**
+ * Computes the composition of two permutations expressed as arrays of <var>n</var> distinct
+ * integers in [0 .. <var>n</var>).
+ *
+ * <p>
+ * <strong>Warning</strong>: if the arguments are not permutations, essentially anything can happen.
+ *
+ * @param p the first permutation.
+ * @param q the second permutation.
+ * @return an array {@code r} containing the resulting permutation: {@code r[i] = q[p[i]]}.
+ * @see Util#composePermutations(int[], int[], int[])
+ */
+ public static int[] composePermutations(final int[] p, final int[] q) {
+ final int[] r = p.clone();
+ composePermutations(p, q, r);
+ return r;
+ }
+
+ /**
+ * Computes in place the composition of two permutations expressed as arrays of <var>n</var>
+ * distinct integers in [0 .. <var>n</var>).
+ *
+ * <p>
+ * <strong>Warning</strong>: if the arguments are not permutations, essentially anything can happen.
+ *
+ * @param p the first permutation.
+ * @param q the second permutation, which will contain the result at the end.
+ * @return {@code q}.
+ * @see Util#composePermutations(int[], int[], int[])
+ */
+ public static int[] composePermutationsInPlace(final int[] p, final int[] q) {
+ final int length = p.length;
+ for (int i = 0; i < length; i++) {
+ if (q[i] < 0) continue;
+ final int firstIndex = i;
+ final int firstElement = q[i];
+ assert firstElement >= 0;
+ int j = i;
+ while (p[j] != firstIndex) {
+ assert q[p[j]] >= 0;
+ q[j] = -q[p[j]] - 1;
+ j = p[j];
+ }
+ q[j] = -firstElement - 1;
+ }
+ for (int i = 0; i < length; i++) q[i] = -q[i] - 1;
+ return q;
+ }
/** Computes in place the inverse of a permutation expressed
* as a {@linkplain BigArrays big array} of <var>n</var> distinct long integers in [0 .. <var>n</var>).
@@ -440,4 +506,79 @@ public final class Util {
public static long[][] identity(final long n) {
return identity(LongBigArrays.newBigArray(n));
}
+
+ /**
+ * Computes the composition of two permutations expressed as {@linkplain BigArrays big arrays} of
+ * <var>n</var> distinct long integers in [0 .. <var>n</var>).
+ *
+ * <p>
+ * <strong>Warning</strong>: if the arguments are not permutations, essentially anything can happen.
+ *
+ * @param p the first permutation.
+ * @param q the second permutation.
+ * @param r an array that will store the resulting permutation: {@code r[i] = q[p[i]]}.
+ * @return {@code r}.
+ */
+ public static long[][] composePermutations(final long[][] p, final long[][] q, final long[][] r) {
+ final long length = length(p);
+ for (long i = 0; i < length; i++) set(r, i, get(q, get(p, i)));
+ return r;
+ }
+
+ /**
+ * Computes the composition of two permutations expressed as {@linkplain BigArrays big arrays} of
+ * <var>n</var> distinct long integers in [0 .. <var>n</var>).
+ *
+ * <p>
+ * <strong>Warning</strong>: if the arguments are not permutations, essentially anything can happen.
+ *
+ * @param p the first permutation.
+ * @param q the second permutation.
+ * @return an array {@code r} containing the resulting permutation: {@code r[i] = q[p[i]]}.
+ * @see Util#composePermutations(long[][], long[][], long[][])
+ */
+ public static long[][] composePermutations(final long[][] p, final long[][] q) {
+ final long[][] r = LongBigArrays.newBigArray(length(p));
+ composePermutations(p, q, r);
+ return r;
+ }
+
+ /**
+ * Computes in place the composition of two permutations expressed as {@linkplain BigArrays big
+ * arrays} of <var>n</var> distinct long integers in [0 .. <var>n</var>).
+ *
+ * <p>
+ * <strong>Warning</strong>: if the arguments are not permutations, essentially anything can happen.
+ *
+ * @param p the first permutation.
+ * @param q the second permutation, which will contain the result at the end.
+ * @return {@code q}.
+ * @see Util#composePermutations(long[][], long[][], long[][])
+ */
+ public static long[][] composePermutationsInPlace(final long[][] p, final long[][] q) {
+ final long length = length(p);
+ for (long i = 0; i < length; i++) {
+ if (get(q, i) < 0) continue;
+ final long firstIndex = i;
+ final long firstElement = get(q, i);
+ assert firstElement >= 0;
+ long j = i;
+ while (get(p, j) != firstIndex) {
+ assert get(q, get(p, j)) >= 0;
+ set(q, j, -get(q, get(p, j)) - 1);
+ j = get(p, j);
+ }
+ set(q, j, -firstElement - 1);
+ }
+
+ for (final long[] element : q) {
+ final long[] t = element;
+ final int l = t.length;
+ for(int d = 0; d < l; d++) {
+ t[d] = -t[d] - 1;
+ }
+ }
+ return q;
+ }
+
}
=====================================
src/it/unimi/dsi/big/util/FrontCodedStringBigList.java
=====================================
@@ -0,0 +1,327 @@
+package it.unimi.dsi.big.util;
+
+/*
+ * DSI utilities
+ *
+ * Copyright (C) 2002-2020 Sebastiano Vigna
+ *
+ * This library is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU Lesser General Public License as published by the Free
+ * Software Foundation; either version 3 of the License, or (at your option)
+ * any later version.
+ *
+ * This library is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+ * or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License
+ * for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public License
+ * along with this program; if not, see <http://www.gnu.org/licenses/>.
+ *
+ */
+
+import java.io.IOException;
+import java.io.InputStreamReader;
+import java.io.Serializable;
+import java.nio.charset.Charset;
+import java.util.Collection;
+import java.util.Iterator;
+import java.util.RandomAccess;
+import java.util.zip.GZIPInputStream;
+
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+import com.google.common.base.Charsets;
+import com.martiansoftware.jsap.FlaggedOption;
+import com.martiansoftware.jsap.JSAP;
+import com.martiansoftware.jsap.JSAPException;
+import com.martiansoftware.jsap.JSAPResult;
+import com.martiansoftware.jsap.Parameter;
+import com.martiansoftware.jsap.SimpleJSAP;
+import com.martiansoftware.jsap.Switch;
+import com.martiansoftware.jsap.UnflaggedOption;
+import com.martiansoftware.jsap.stringparsers.ForNameStringParser;
+import com.martiansoftware.jsap.stringparsers.IntSizeStringParser;
+
+import it.unimi.dsi.fastutil.bytes.ByteArrayFrontCodedBigList;
+import it.unimi.dsi.fastutil.chars.CharArrayFrontCodedBigList;
+import it.unimi.dsi.fastutil.io.BinIO;
+import it.unimi.dsi.fastutil.objects.AbstractObjectBigList;
+import it.unimi.dsi.fastutil.objects.ObjectBigListIterator;
+import it.unimi.dsi.fastutil.objects.ObjectIterator;
+import it.unimi.dsi.io.FastBufferedReader;
+import it.unimi.dsi.io.LineIterator;
+import it.unimi.dsi.lang.MutableString;
+import it.unimi.dsi.logging.ProgressLogger;
+import it.unimi.dsi.util.FrontCodedStringList;
+
+/**
+ * Compact storage of strings using front-coding compression (a.k.a. compression by
+ * prefix omission).
+ *
+ * <P>
+ * This class is functionally identical to {@link FrontCodedStringList}, except for the larger size
+ * allowed.
+ *
+ * @see FrontCodedStringList
+ */
+
+public class FrontCodedStringBigList extends AbstractObjectBigList<MutableString> implements RandomAccess, Serializable {
+
+ public static final long serialVersionUID = 1;
+
+ /** The underlying {@link ByteArrayFrontCodedBigList}, or {@code null}. */
+ protected final ByteArrayFrontCodedBigList byteFrontCodedBigList;
+
+ /** The underlying {@link CharArrayFrontCodedBigList}, or {@code null}. */
+ protected final CharArrayFrontCodedBigList charFrontCodedBigList;
+
+ /** Whether this front-coded list is UTF-8 encoded. */
+ protected final boolean utf8;
+
+ /**
+ * Creates a new front-coded string list containing the character sequences returned by the given
+ * iterator.
+ *
+ * @param words an iterator returning {@linkplain CharSequence character sequences}.
+ * @param ratio the desired ratio.
+ * @param utf8 if true, the strings will be stored as UTF-8 byte arrays.
+ */
+
+ public FrontCodedStringBigList(final Iterator<? extends CharSequence> words, final int ratio, final boolean utf8) {
+ this.utf8 = utf8;
+ if (utf8) {
+ byteFrontCodedBigList = new ByteArrayFrontCodedBigList(new ObjectIterator<byte[]>() {
+ @Override
+ public boolean hasNext() {
+ return words.hasNext();
+ }
+
+ @SuppressWarnings("null")
+ @Override
+ public byte[] next() {
+ return words.next().toString().getBytes(Charsets.UTF_8);
+ }
+ }, ratio);
+ charFrontCodedBigList = null;
+ } else {
+ charFrontCodedBigList = new CharArrayFrontCodedBigList(new ObjectIterator<char[]>() {
+ @Override
+ public boolean hasNext() {
+ return words.hasNext();
+ }
+
+ @Override
+ public char[] next() {
+ final CharSequence s = words.next();
+ int i = s.length();
+ final char[] a = new char[i];
+ while (i-- != 0) a[i] = s.charAt(i);
+ return a;
+ }
+ }, ratio);
+ byteFrontCodedBigList = null;
+ }
+
+ }
+
+ /**
+ * Creates a new front-coded string list containing the character sequences contained in the given
+ * collection.
+ *
+ * @param c a collection containing {@linkplain CharSequence character sequences}.
+ * @param ratio the desired ratio.
+ * @param utf8 if true, the strings will be stored as UTF-8 byte arrays.
+ */
+ public FrontCodedStringBigList(final Collection<? extends CharSequence> c, final int ratio, final boolean utf8) {
+ this(c.iterator(), ratio, utf8);
+ }
+
+ /**
+ * Returns whether this front-coded string list is storing its strings as UTF-8 encoded bytes.
+ *
+ * @return true if this front-coded string list is keeping its data as an array of UTF-8 encoded
+ * bytes.
+ */
+ public boolean utf8() {
+ return utf8;
+ }
+
+ /**
+ * Returns the ratio of the underlying front-coded list.
+ *
+ * @return the ratio of the underlying front-coded list.
+ */
+ public int ratio() {
+ return utf8 ? byteFrontCodedBigList.ratio() : charFrontCodedBigList.ratio();
+ }
+
+ /**
+ * Returns the element at the specified position in this front-coded as a mutable string.
+ *
+ * @param index an index in the list.
+ * @return a {@link MutableString} that will contain the string at the specified position. The
+ * string may be freely modified.
+ */
+ @Override
+ public MutableString get(final long index) {
+ return MutableString.wrap(utf8 ? byte2Char(byteFrontCodedBigList.getArray(index), null) : charFrontCodedBigList.getArray(index));
+ }
+
+ /**
+ * Returns the element at the specified position in this front-coded list by storing it in a mutable
+ * string.
+ *
+ * @param index an index in the list.
+ * @param s a mutable string that will contain the string at the specified position.
+ */
+ public void get(final long index, final MutableString s) {
+ if (utf8) {
+ final byte[] a = byteFrontCodedBigList.getArray(index);
+ s.length(countUTF8Chars(a));
+ byte2Char(a, s.array());
+ } else {
+ s.length(s.array().length);
+ int res = charFrontCodedBigList.get(index, s.array());
+ if (res < 0) {
+ s.length(s.array().length - res);
+ res = charFrontCodedBigList.get(index, s.array());
+ } else s.length(res);
+ }
+ }
+
+ /*
+ * The following methods are highly optimized UTF-8 converters exploiting the fact that since it was
+ * ourselves in the first place who created the coding, we can be sure it is correct.
+ */
+
+ protected static int countUTF8Chars(final byte[] a) {
+ final int length = a.length;
+ int result = 0, b;
+ for (int i = 0; i < length; i++) {
+ b = (a[i] & 0xFF) >> 4;
+ if (b < 8) result++;
+ else if (b < 14) {
+ result++;
+ i++;
+ } else if (b < 15) {
+ result++;
+ i += 2;
+ } else {
+ // Surrogate pair (yuck!)
+ result += 2;
+ i += 4;
+ }
+ }
+
+ return result;
+ }
+
+ protected static char[] byte2Char(final byte[] a, char[] s) {
+ final int length = a.length;
+ if (s == null) s = new char[countUTF8Chars(a)];
+ int b, c, d, t;
+
+ for (int i = 0, j = 0; i < length; i++) {
+ b = a[i] & 0xFF;
+ t = b >> 4;
+
+ if (t < 8) s[j++] = (char)b;
+ else if (t < 14) {
+ c = a[++i] & 0xFF;
+ if ((c & 0xC0) != 0x80) throw new IllegalStateException("Malformed internal UTF-8 encoding");
+ s[j++] = (char)(((b & 0x1F) << 6) | (c & 0x3F));
+ } else if (t < 15) {
+ c = a[++i] & 0xFF;
+ d = a[++i];
+ if ((c & 0xC0) != 0x80 || (d & 0xC0) != 0x80) throw new IllegalStateException("Malformed internal UTF-8 encoding");
+ s[j++] = (char)(((b & 0x0F) << 12) | ((c & 0x3F) << 6) | ((d & 0x3F) << 0));
+ } else {
+ // Surrogate pair (yuck!)
+ final String surrogatePair = new String(a, i, 4, Charsets.UTF_8);
+ s[j++] = surrogatePair.charAt(0);
+ s[j++] = surrogatePair.charAt(1);
+ i += 3;
+ }
+ }
+
+ return s;
+ }
+
+ @Override
+ public ObjectBigListIterator<MutableString> listIterator(final long k) {
+ return new ObjectBigListIterator<MutableString>() {
+ ObjectBigListIterator<?> i = utf8 ? byteFrontCodedBigList.listIterator(k) : charFrontCodedBigList.listIterator(k);
+
+ @Override
+ public boolean hasNext() {
+ return i.hasNext();
+ }
+
+ @Override
+ public boolean hasPrevious() {
+ return i.hasPrevious();
+ }
+
+ @Override
+ public MutableString next() {
+ return MutableString.wrap(utf8 ? byte2Char((byte[])i.next(), null) : (char[])i.next());
+ }
+
+ @Override
+ public MutableString previous() {
+ return MutableString.wrap(utf8 ? byte2Char((byte[])i.previous(), null) : (char[])i.previous());
+ }
+
+ @Override
+ public long nextIndex() {
+ return i.nextIndex();
+ }
+
+ @Override
+ public long previousIndex() {
+ return i.previousIndex();
+ }
+ };
+ }
+
+ @Override
+ public long size64() {
+ return utf8 ? byteFrontCodedBigList.size64() : charFrontCodedBigList.size64();
+ }
+
+ public static void main(final String[] arg) throws IOException, JSAPException, NoSuchMethodException {
+
+ final SimpleJSAP jsap = new SimpleJSAP(FrontCodedStringBigList.class.getName(), "Builds a front-coded string list reading from standard input a newline-separated ordered list of strings.", new Parameter[] {
+ new FlaggedOption("bufferSize", IntSizeStringParser.getParser(), "64Ki", JSAP.NOT_REQUIRED, 'b', "buffer-size", "The size of the I/O buffer used to read strings."),
+ new FlaggedOption("encoding", ForNameStringParser.getParser(Charset.class), "UTF-8", JSAP.NOT_REQUIRED, 'e', "encoding", "The file encoding."),
+ new FlaggedOption("ratio", IntSizeStringParser.getParser(), "4", JSAP.NOT_REQUIRED, 'r', "ratio", "The compression ratio."),
+ new Switch("utf8", 'u', "utf8", "Store the strings as UTF-8 byte arrays."),
+ new Switch("zipped", 'z', "zipped", "The string list is compressed in gzip format."),
+ new UnflaggedOption("frontCodedList", JSAP.STRING_PARSER, JSAP.NO_DEFAULT, JSAP.REQUIRED, JSAP.NOT_GREEDY, "The filename for the serialised front-coded list.") });
+
+ final JSAPResult jsapResult = jsap.parse(arg);
+ if (jsap.messagePrinted()) return;
+
+ final int bufferSize = jsapResult.getInt("bufferSize");
+ final int ratio = jsapResult.getInt("ratio");
+ final boolean utf8 = jsapResult.getBoolean("utf8");
+ final boolean zipped = jsapResult.getBoolean("zipped");
+ final String listName = jsapResult.getString("frontCodedList");
+ final Charset encoding = (Charset)jsapResult.getObject("encoding");
+
+ final Logger logger = LoggerFactory.getLogger(FrontCodedStringBigList.class);
+ final ProgressLogger pl = new ProgressLogger(logger);
+ pl.displayFreeMemory = true;
+ pl.displayLocalSpeed = true;
+ pl.itemsName = "strings";
+ pl.start("Reading strings...");
+ final FrontCodedStringBigList frontCodedStringBigList = new FrontCodedStringBigList(new LineIterator(new FastBufferedReader(new InputStreamReader(zipped ? new GZIPInputStream(System.in) : System.in, encoding), bufferSize), pl), ratio, utf8);
+ pl.done();
+
+ logger.info("Writing front-coded list to file...");
+ BinIO.storeObject(frontCodedStringBigList, listName);
+ logger.info("Completed.");
+ }
+}
=====================================
src/it/unimi/dsi/big/util/PermutedFrontCodedStringBigList.java
=====================================
@@ -0,0 +1,153 @@
+package it.unimi.dsi.big.util;
+
+import java.io.IOException;
+import java.io.Serializable;
+
+import com.martiansoftware.jsap.JSAP;
+import com.martiansoftware.jsap.JSAPException;
+import com.martiansoftware.jsap.JSAPResult;
+import com.martiansoftware.jsap.Parameter;
+import com.martiansoftware.jsap.SimpleJSAP;
+import com.martiansoftware.jsap.Switch;
+import com.martiansoftware.jsap.UnflaggedOption;
+
+/*
+ * DSI utilities
+ *
+ * Copyright (C) 2002-2020 Sebastiano Vigna
+ *
+ * This library is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU Lesser General Public License as published by the Free
+ * Software Foundation; either version 3 of the License, or (at your option)
+ * any later version.
+ *
+ * This library is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+ * or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License
+ * for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public License
+ * along with this program; if not, see <http://www.gnu.org/licenses/>.
+ *
+ */
+
+import it.unimi.dsi.Util;
+import it.unimi.dsi.fastutil.BigArrays;
+import it.unimi.dsi.fastutil.io.BinIO;
+import it.unimi.dsi.fastutil.io.TextIO;
+import it.unimi.dsi.fastutil.longs.LongBidirectionalIterator;
+import it.unimi.dsi.fastutil.longs.LongIterators;
+import it.unimi.dsi.fastutil.objects.AbstractObjectBigList;
+import it.unimi.dsi.fastutil.objects.ObjectBigListIterator;
+import it.unimi.dsi.lang.MutableString;
+import it.unimi.dsi.util.FrontCodedStringList;
+import it.unimi.dsi.util.PermutedFrontCodedStringList;
+
+/**
+ * A {@link it.unimi.dsi.big.util.FrontCodedStringBigList} whose indices are permuted.
+ *
+ * <P>
+ * This class is functionally identical to {@link PermutedFrontCodedStringList}, except for the
+ * larger size allowed.
+ *
+ * @see FrontCodedStringList
+ * @see PermutedFrontCodedStringList
+ */
+
+public class PermutedFrontCodedStringBigList extends AbstractObjectBigList<CharSequence> implements Serializable {
+
+ public static final long serialVersionUID = 1;
+
+ /** The underlying front-coded string list. */
+ final protected FrontCodedStringBigList frontCodedStringBigList;
+ /** The permutation. */
+ final protected long[][] permutation;
+
+ /**
+ * Creates a new permuted front-coded string list using a given front-coded string list and
+ * permutation.
+ *
+ * @param frontCodedStringBihList the underlying front-coded string big list.
+ * @param permutation the underlying permutation (a {@linkplain BigArrays big array} of longs).
+ */
+
+ public PermutedFrontCodedStringBigList(final FrontCodedStringBigList frontCodedStringBihList, final long[][] permutation) {
+ if (frontCodedStringBihList.size64() != BigArrays.length(permutation)) throw new IllegalArgumentException("The front-coded string big list contains " + frontCodedStringBihList.size64() + " strings, but the permutation is on " + BigArrays.length(permutation) + " elements.");
+ this.frontCodedStringBigList = frontCodedStringBihList;
+ this.permutation = permutation;
+ }
+
+ @Override
+ public MutableString get(final long index) {
+ return frontCodedStringBigList.get(BigArrays.get(permutation, index));
+ }
+
+ /** Returns the element at the specified position in this front-coded list by storing it in a mutable string.
+ *
+ * @param index an index in the list.
+ * @param s a mutable string that will contain the string at the specified position.
+ */
+ public void get(final long index, final MutableString s) {
+ frontCodedStringBigList.get(BigArrays.get(permutation, index), s);
+ }
+
+ @Override
+ public long size64() {
+ return frontCodedStringBigList.size64();
+ }
+
+ @Override
+ public ObjectBigListIterator<CharSequence> listIterator(final long k) {
+ return new ObjectBigListIterator<CharSequence>() {
+ final LongBidirectionalIterator i = LongIterators.fromTo(0, frontCodedStringBigList.size64());
+ long p = 0;
+
+ @Override
+ public boolean hasNext() { return i.hasNext(); }
+ @Override
+ public boolean hasPrevious() { return i.hasPrevious(); }
+ @Override
+ public CharSequence next() {
+ p++;
+ return frontCodedStringBigList.get(BigArrays.get(permutation, i.nextLong()));
+ }
+ @Override
+ public CharSequence previous() {
+ p--;
+ return frontCodedStringBigList.get(BigArrays.get(permutation, i.previousLong()));
+ }
+ @Override
+ public long nextIndex() {
+ return p;
+ }
+ @Override
+ public long previousIndex() {
+ return p - 1;
+ }
+ };
+ }
+
+ public static void main(final String[] arg) throws IOException, ClassNotFoundException, JSAPException {
+
+ final SimpleJSAP jsap = new SimpleJSAP(PermutedFrontCodedStringList.class.getName(), "Builds a permuted front-coded list of strings using a given front-coded string list and a permutation (either in text or binary format).",
+ new Parameter[] {
+ new Switch("invert", 'i', "invert", "Invert permutation before creating the permuted list."),
+ new Switch("text", 't', "text", "The permutation is a text file."),
+ new UnflaggedOption("list", JSAP.STRING_PARSER, JSAP.REQUIRED, "A front-coded string big list."),
+ new UnflaggedOption("permutation", JSAP.STRING_PARSER, JSAP.REQUIRED, "A permutation for the indices of the list (longs in DataInput format, unless you specify --text)."),
+ new UnflaggedOption("permutedList", JSAP.STRING_PARSER, JSAP.REQUIRED, "A the filename for the resulting permuted big list."),
+ });
+
+ final JSAPResult jsapResult = jsap.parse(arg);
+ if (jsap.messagePrinted()) return;
+
+ final String permutationFile = jsapResult.getString("permutation");
+ final long[][] permutation = jsapResult.userSpecified("text") ? LongIterators.unwrapBig(TextIO.asLongIterator(permutationFile)) : BinIO.loadLongsBig(permutationFile);
+ if (jsapResult.getBoolean("invert")) Util.invertPermutationInPlace(permutation);
+
+ BinIO.storeObject(
+ new PermutedFrontCodedStringBigList((FrontCodedStringBigList)BinIO.loadObject(jsapResult.getString("list")), permutation),
+ jsapResult.getString("permutedList")
+ );
+ }
+}
=====================================
src/it/unimi/dsi/util/PermutedFrontCodedStringList.java
=====================================
@@ -94,7 +94,7 @@ public class PermutedFrontCodedStringList extends AbstractObjectList<CharSequenc
}
@Override
- public CharSequence get(final int index) {
+ public MutableString get(final int index) {
return frontCodedStringList.get(permutation[index]);
}
@@ -133,16 +133,16 @@ public class PermutedFrontCodedStringList extends AbstractObjectList<CharSequenc
public static void main(final String[] arg) throws IOException, ClassNotFoundException, JSAPException {
- SimpleJSAP jsap = new SimpleJSAP(PermutedFrontCodedStringList.class.getName(), "Builds a permuted front-coded list of strings using a given front-coded string list and a permutation (either in text or binary format).",
+ final SimpleJSAP jsap = new SimpleJSAP(PermutedFrontCodedStringList.class.getName(), "Builds a permuted front-coded list of strings using a given front-coded string list and a permutation (either in text or binary format).",
new Parameter[] {
- new Switch("invert", 'i', "invert", "Invert permutation before creating the permuted list."),
- new Switch("text", 't', "text", "The permutation is a text file."),
- new UnflaggedOption("list", JSAP.STRING_PARSER, JSAP.REQUIRED, "A front-coded string list."),
- new UnflaggedOption("permutation", JSAP.STRING_PARSER, JSAP.REQUIRED, "A permutation for the indices of the list (in DataInput format, unless you specify --text)."),
- new UnflaggedOption("permutedList", JSAP.STRING_PARSER, JSAP.REQUIRED, "A the filename for the resulting permuted list."),
+ new Switch("invert", 'i', "invert", "Invert permutation before creating the permuted list."),
+ new Switch("text", 't', "text", "The permutation is a text file."),
+ new UnflaggedOption("list", JSAP.STRING_PARSER, JSAP.REQUIRED, "A front-coded string list."),
+ new UnflaggedOption("permutation", JSAP.STRING_PARSER, JSAP.REQUIRED, "A permutation for the indices of the list (ints in DataInput format, unless you specify --text)."),
+ new UnflaggedOption("permutedList", JSAP.STRING_PARSER, JSAP.REQUIRED, "A the filename for the resulting permuted list."),
});
- JSAPResult jsapResult = jsap.parse(arg);
+ final JSAPResult jsapResult = jsap.parse(arg);
if (jsap.messagePrinted()) return;
final String permutationFile = jsapResult.getString("permutation");
=====================================
test/it/unimi/dsi/UtilTest.java
=====================================
@@ -24,15 +24,17 @@ package it.unimi.dsi;
import static org.junit.Assert.assertArrayEquals;
import static org.junit.Assert.assertEquals;
import static org.junit.Assert.assertTrue;
-import it.unimi.dsi.fastutil.ints.IntArrayList;
-import it.unimi.dsi.fastutil.ints.IntArrays;
-import it.unimi.dsi.fastutil.longs.LongBigArrays;
-import it.unimi.dsi.util.SplitMix64Random;
import java.util.Collections;
import org.junit.Test;
+import it.unimi.dsi.fastutil.ints.IntArrayList;
+import it.unimi.dsi.fastutil.ints.IntArrays;
+import it.unimi.dsi.fastutil.longs.LongBigArrays;
+import it.unimi.dsi.util.SplitMix64Random;
+import it.unimi.dsi.util.XoRoShiRo128PlusPlusRandom;
+
public class UtilTest {
@Test
@@ -43,7 +45,7 @@ public class UtilTest {
try {
Util.formatBinarySize(6);
}
- catch(IllegalArgumentException e) {
+ catch(final IllegalArgumentException e) {
ok = true;
}
assertTrue(ok);
@@ -128,4 +130,46 @@ public class UtilTest {
assertArrayEquals(q, p);
}
}
+
+ @Test
+ public void testComposePermutation() {
+ final XoRoShiRo128PlusPlusRandom r = new XoRoShiRo128PlusPlusRandom(0);
+ for (final int s : new int[] { 10, 100, 1000 }) {
+ final int[] identity = Util.identity(s);
+ final int[] shuffle = Util.identity(s);
+ IntArrays.shuffle(shuffle, r);
+ assertArrayEquals(shuffle, Util.composePermutations(identity, shuffle));
+ assertArrayEquals(shuffle, Util.composePermutations(shuffle, identity));
+ assertArrayEquals(identity, Util.composePermutations(shuffle, Util.invertPermutation(shuffle)));
+ assertArrayEquals(identity, Util.composePermutations(Util.invertPermutation(shuffle), shuffle));
+
+ final int[] shuffle2 = Util.identity(s);
+ IntArrays.shuffle(shuffle2, r);
+
+ final int[] result = Util.composePermutations(shuffle, shuffle2);
+ Util.composePermutationsInPlace(shuffle, shuffle2);
+ assertArrayEquals(result, shuffle2);
+ }
+ }
+
+ @Test
+ public void testComposePermutationBig() {
+ final XoRoShiRo128PlusPlusRandom r = new XoRoShiRo128PlusPlusRandom(0);
+ for (final long s : new int[] { 10, 100, 1000 }) {
+ final long[][] identity = Util.identity(s);
+ final long[][] shuffle = Util.identity(s);
+ LongBigArrays.shuffle(shuffle, r);
+ assertArrayEquals(shuffle, Util.composePermutations(identity, shuffle));
+ assertArrayEquals(shuffle, Util.composePermutations(shuffle, identity));
+ assertArrayEquals(identity, Util.composePermutations(shuffle, Util.invertPermutation(shuffle)));
+ assertArrayEquals(identity, Util.composePermutations(Util.invertPermutation(shuffle), shuffle));
+
+ final long[][] shuffle2 = Util.identity(s);
+ LongBigArrays.shuffle(shuffle2, r);
+
+ final long[][] result = Util.composePermutations(shuffle, shuffle2);
+ Util.composePermutationsInPlace(shuffle, shuffle2);
+ assertArrayEquals(result, shuffle2);
+ }
+ }
}
=====================================
test/it/unimi/dsi/big/util/FrontCodedStringBigListTest.java
=====================================
@@ -0,0 +1,100 @@
+package it.unimi.dsi.big.util;
+
+/*
+ * DSI utilities
+ *
+ * Copyright (C) 2010-2020 Sebastiano Vigna
+ *
+ * This library is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU Lesser General Public License as published by the Free
+ * Software Foundation; either version 3 of the License, or (at your option)
+ * any later version.
+ *
+ * This library is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+ * or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License
+ * for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public License
+ * along with this program; if not, see <http://www.gnu.org/licenses/>.
+ *
+ */
+
+import static org.junit.Assert.assertEquals;
+
+import java.nio.charset.StandardCharsets;
+import java.util.Arrays;
+import java.util.Collections;
+import java.util.Iterator;
+import java.util.List;
+import java.util.SplittableRandom;
+
+import org.junit.Test;
+
+import it.unimi.dsi.lang.MutableString;
+
+public class FrontCodedStringBigListTest {
+
+ @Test
+ public void testLargeSet() {
+ final List<String> c = Arrays.asList(TernaryIntervalSearchTreeTest.WORDS.clone());
+ final MutableString s = new MutableString();
+ for(int p = 0; p < 2; p++) {
+ for(final boolean utf8: new boolean[] { false, true })
+ for(int ratio = 1; ratio < 8; ratio++) {
+ final FrontCodedStringBigList fcl = new FrontCodedStringBigList(c.iterator(), ratio, utf8);
+ for (int i = 0; i < fcl.size64(); i++) {
+ assertEquals(Integer.toString(i), c.get(i), fcl.get(i).toString());
+ fcl.get(i, s);
+ assertEquals(Integer.toString(i), c.get(i), s.toString());
+ }
+ }
+
+ Collections.sort(c);
+ }
+ }
+
+ @Test
+ public void testSurrogatePairs() {
+ final List<String> c = Arrays.asList(new String[] { "a", "AB\uE000AB", "\uD800\uDF02", "\uD800\uDF03", "b" });
+ for(final boolean utf8: new boolean[] { false, true })
+ for(int ratio = 1; ratio < 8; ratio++) {
+ final FrontCodedStringBigList fcl = new FrontCodedStringBigList(c.iterator(), ratio, utf8);
+ for (int i = 0; i < fcl.size64(); i++) {
+ assertEquals(Integer.toString(i), c.get(i), fcl.get(i).toString());
+ }
+ }
+ }
+
+ // @Ignore("Needs a lot of memory")
+ @Test
+ public void testbig() {
+ final long size = (1L << 31) + 10000;
+ final FrontCodedStringBigList byteArrayFrontCodedBigList = new FrontCodedStringBigList(new Iterator<String>() {
+ SplittableRandom r = new SplittableRandom(0);
+ long i = 0;
+
+ @Override
+ public boolean hasNext() {
+ return i < size;
+ }
+
+ @Override
+ public String next() {
+ i++;
+ return new String(new byte[] { (byte)r.nextLong() }, StandardCharsets.ISO_8859_1);
+ }
+ }, 10, true);
+ SplittableRandom r = new SplittableRandom(0);
+ for (long i = 0; i < size; i++) {
+ assertEquals(new String(new byte[] { (byte)r.nextLong() }, StandardCharsets.ISO_8859_1), byteArrayFrontCodedBigList.get(i));
+ }
+ r = new SplittableRandom(0);
+ final MutableString s = new MutableString();
+ for (long i = 0; i < size; i++) {
+ byteArrayFrontCodedBigList.get(i, s);
+ assertEquals(new String(new byte[] { (byte)r.nextLong() }, StandardCharsets.ISO_8859_1), s);
+ }
+ }
+
+}
View it on GitLab: https://salsa.debian.org/java-team/libdsiutils-java/-/commit/240adfa801e9e99460ba5f919960166b879d2b1e
--
View it on GitLab: https://salsa.debian.org/java-team/libdsiutils-java/-/commit/240adfa801e9e99460ba5f919960166b879d2b1e
You're receiving this email because of your account on salsa.debian.org.
-------------- next part --------------
An HTML attachment was scrubbed...
URL: <http://alioth-lists.debian.net/pipermail/pkg-java-commits/attachments/20201022/c742ea82/attachment.html>
More information about the pkg-java-commits
mailing list