[Git][java-team/libexternalsortinginjava-java][upstream] New upstream version 0.2.4

Andreas Tille gitlab at salsa.debian.org
Wed May 2 07:42:21 BST 2018


Andreas Tille pushed to branch upstream at Debian Java Maintainers / libexternalsortinginjava-java


Commits:
51baca56 by Andreas Tille at 2018-05-02T08:03:42+02:00
New upstream version 0.2.4
- - - - -


4 changed files:

- README.md
- pom.xml
- src/main/java/com/google/code/externalsorting/ExternalSort.java
- src/test/java/com/google/code/externalsorting/ExternalSortTest.java


Changes:

=====================================
README.md
=====================================
--- a/README.md
+++ b/README.md
@@ -12,6 +12,8 @@ External-Memory Sorting in Java: useful to sort very large files using multiple 
 The versions 0.1 of the library are compatible with Java 6 and above. Versions 0.2 and above
 require at least Java 8. 
 
+This code is used in [Apache Jackrabbit Oak](https://github.com/apache/jackrabbit-oak).
+
 Code sample
 ------------
 


=====================================
pom.xml
=====================================
--- a/pom.xml
+++ b/pom.xml
@@ -3,7 +3,7 @@
     <groupId>com.google.code.externalsortinginjava</groupId>
     <artifactId>externalsortinginjava</artifactId>
     <packaging>jar</packaging>
-    <version>0.2.3</version>
+    <version>0.2.4</version>
     <name>externalsortinginjava</name>
     <url>http://github.com/lemire/externalsortinginjava/</url>
     <description>Sometimes, you want to sort large file without first loading them into memory. The solution is to use External Sorting. You divide the files into small blocks, sort each block in RAM, and then merge the result.
@@ -198,6 +198,6 @@
         <connection>scm:git:git at github.com:lemire/externalsortinginjava.git</connection>
         <url>scm:git:git at github.com:lemire/externalsortinginjava.git</url>
         <developerConnection>scm:git:git at github.com:lemire/externalsortinginjava.git</developerConnection>
-        <tag>externalsortinginjava-0.2.3</tag>
+        <tag>externalsortinginjava-0.2.4</tag>
     </scm>
 </project>


=====================================
src/main/java/com/google/code/externalsorting/ExternalSort.java
=====================================
--- a/src/main/java/com/google/code/externalsorting/ExternalSort.java
+++ b/src/main/java/com/google/code/externalsorting/ExternalSort.java
@@ -210,7 +210,7 @@ public class ExternalSort {
          * @throws IOException generic IO exception
          *
          */
-        public static int mergeSortedFiles(BufferedWriter fbw,
+        public static long mergeSortedFiles(BufferedWriter fbw,
                 final Comparator<String> cmp, boolean distinct,
                 List<BinaryFileBuffer> buffers) throws IOException {
                 PriorityQueue<BinaryFileBuffer> pq = new PriorityQueue<>(
@@ -226,7 +226,7 @@ public class ExternalSort {
                                 pq.add(bfb);
                         }
                 }
-                int rowcounter = 0;
+                long rowcounter = 0;
                 try {
                         if (!distinct) {
                             while (pq.size() > 0) {
@@ -290,7 +290,7 @@ public class ExternalSort {
          * @return The number of lines sorted.
          * @throws IOException generic IO exception
          */
-        public static int mergeSortedFiles(List<File> files, File outputfile)
+        public static long mergeSortedFiles(List<File> files, File outputfile)
                 throws IOException {
                 return mergeSortedFiles(files, outputfile, defaultcomparator,
                         Charset.defaultCharset());
@@ -306,7 +306,7 @@ public class ExternalSort {
          * @return The number of lines sorted.
          * @throws IOException generic IO exception
          */
-        public static int mergeSortedFiles(List<File> files, File outputfile,
+        public static long mergeSortedFiles(List<File> files, File outputfile,
                 final Comparator<String> cmp) throws IOException {
                 return mergeSortedFiles(files, outputfile, cmp,
                         Charset.defaultCharset());
@@ -324,7 +324,7 @@ public class ExternalSort {
          * @return The number of lines sorted.
          * @throws IOException generic IO exception
          */
-        public static int mergeSortedFiles(List<File> files, File outputfile,
+        public static long mergeSortedFiles(List<File> files, File outputfile,
                 final Comparator<String> cmp, boolean distinct)
                 throws IOException {
                 return mergeSortedFiles(files, outputfile, cmp,
@@ -343,7 +343,7 @@ public class ExternalSort {
          * @return The number of lines sorted.
          * @throws IOException generic IO exception
          */
-        public static int mergeSortedFiles(List<File> files, File outputfile,
+        public static long mergeSortedFiles(List<File> files, File outputfile,
                 final Comparator<String> cmp, Charset cs) throws IOException {
                 return mergeSortedFiles(files, outputfile, cmp, cs, false);
         }
@@ -363,7 +363,7 @@ public class ExternalSort {
          * @throws IOException generic IO exception
          * @since v0.1.2
          */
-        public static int mergeSortedFiles(List<File> files, File outputfile,
+        public static long mergeSortedFiles(List<File> files, File outputfile,
                 final Comparator<String> cmp, Charset cs, boolean distinct)
                 throws IOException {
                 return mergeSortedFiles(files, outputfile, cmp, cs, distinct,
@@ -389,7 +389,7 @@ public class ExternalSort {
          * @throws IOException generic IO exception
          * @since v0.1.4
          */
-        public static int mergeSortedFiles(List<File> files, File outputfile,
+        public static long mergeSortedFiles(List<File> files, File outputfile,
                 final Comparator<String> cmp, Charset cs, boolean distinct,
                 boolean append, boolean usegzip) throws IOException {
                 ArrayList<BinaryFileBuffer> bfbs = new ArrayList<>();
@@ -412,7 +412,7 @@ public class ExternalSort {
                 }
                 BufferedWriter fbw = new BufferedWriter(new OutputStreamWriter(
                         new FileOutputStream(outputfile, append), cs));
-                int rowcounter = mergeSortedFiles(fbw, cmp, distinct, bfbs);
+                long rowcounter = mergeSortedFiles(fbw, cmp, distinct, bfbs);
                 for (File f : files) {
                         f.delete();
                 }


=====================================
src/test/java/com/google/code/externalsorting/ExternalSortTest.java
=====================================
--- a/src/test/java/com/google/code/externalsorting/ExternalSortTest.java
+++ b/src/test/java/com/google/code/externalsorting/ExternalSortTest.java
@@ -13,13 +13,21 @@ import java.io.FileReader;
 import java.io.IOException;
 import java.nio.channels.FileChannel;
 import java.nio.charset.Charset;
+import java.nio.charset.StandardCharsets;
+import java.nio.file.Files;
+import java.nio.file.Path;
+import java.nio.file.StandardOpenOption;
 import java.util.ArrayList;
 import java.util.Arrays;
 import java.util.Comparator;
 import java.util.List;
 import java.util.Scanner;
+import java.util.stream.Collectors;
+import java.util.stream.IntStream;
+
 import org.junit.After;
 import org.junit.Before;
+import org.junit.Ignore;
 import org.junit.Test;
 import org.github.jamm.*;
 
@@ -407,4 +415,47 @@ public class ExternalSortTest {
         }
     }
 
+    /**
+     * Sort a text file with lines greater than {@link Integer#MAX_VALUE}.
+     *
+     * @throws IOException
+     */
+    @Ignore("This test takes too long to execute")
+    @Test
+    public void sortVeryLargeFile() throws IOException {
+        final Path veryLargeFile = getTestFile();
+        final Path outputFile = Files.createTempFile("Merged-File", ".tmp");
+        final long sortedLines = ExternalSort.mergeSortedFiles(ExternalSort.sortInBatch(veryLargeFile.toFile()), outputFile.toFile());
+        final long expectedLines = 2148L * 1000000L;
+        assertEquals(expectedLines, sortedLines);
+    }
+
+    /**
+     * Generate a test file with 2148 million lines.
+     *
+     * @throws IOException
+     */
+    private Path getTestFile() throws IOException {
+        System.out.println("Temp File Creation: Started");
+        final Path path = Files.createTempFile("IntegrationTestFile", ".txt");
+        final List<String> idList = new ArrayList<>();
+        final int saneLimit = 1000000;
+        IntStream.range(0, saneLimit)
+                .forEach(i -> idList.add("A"));
+        final String content = idList.stream().collect(Collectors.joining("\n"));
+        Files.write(path, content.getBytes(StandardCharsets.UTF_8), StandardOpenOption.TRUNCATE_EXISTING);
+        final String newLine = "\n";
+        IntStream.range(1, 2148)
+                .forEach(i -> {
+                    try {
+                        Files.write(path, newLine.getBytes(StandardCharsets.UTF_8), StandardOpenOption.APPEND);
+                        Files.write(path, content.getBytes(StandardCharsets.UTF_8), StandardOpenOption.APPEND);
+                    } catch (IOException e) {
+                        throw new RuntimeException(e.getMessage());
+                    }
+                });
+        System.out.println("Temp File Creation: Finished");
+        return path;
+    }
+
 }



View it on GitLab: https://salsa.debian.org/java-team/libexternalsortinginjava-java/commit/51baca56c4552d1985d40fb4c24fa5651a13f4d8

---
View it on GitLab: https://salsa.debian.org/java-team/libexternalsortinginjava-java/commit/51baca56c4552d1985d40fb4c24fa5651a13f4d8
You're receiving this email because of your account on salsa.debian.org.
-------------- next part --------------
An HTML attachment was scrubbed...
URL: <http://alioth-lists.debian.net/pipermail/pkg-java-commits/attachments/20180502/75ca733d/attachment.html>


More information about the pkg-java-commits mailing list