[med-svn] [Git][med-team/picard-tools][upstream] New upstream version 2.27.2+dfsg
Pierre Gruet (@pgt)
gitlab at salsa.debian.org
Sun Jun 26 21:37:39 BST 2022
Pierre Gruet pushed to branch upstream at Debian Med / picard-tools
Commits:
39c820da by Pierre Gruet at 2022-05-26T13:53:05+02:00
New upstream version 2.27.2+dfsg
- - - - -
7 changed files:
- src/main/java/picard/cmdline/CommandLineProgram.java
- src/main/java/picard/cmdline/CommandLineSyntaxTranslater.java
- src/main/java/picard/sam/FastqToSam.java
- src/main/java/picard/sam/RevertSam.java
- src/test/java/picard/cmdline/PicardCommandLineTest.java
- src/test/java/picard/sam/AbstractAlignmentMergerTest.java
- src/test/java/picard/sam/FastqToSamTest.java
Changes:
=====================================
src/main/java/picard/cmdline/CommandLineProgram.java
=====================================
@@ -40,7 +40,6 @@ import htsjdk.samtools.util.BlockCompressedOutputStream;
import htsjdk.samtools.util.BlockGunzipper;
import htsjdk.samtools.util.IOUtil;
import htsjdk.samtools.util.Log;
-import htsjdk.samtools.util.zip.DeflaterFactory;
import htsjdk.variant.variantcontext.writer.Options;
import htsjdk.variant.variantcontext.writer.VariantContextWriterBuilder;
import org.broadinstitute.barclay.argparser.Argument;
@@ -51,7 +50,6 @@ import org.broadinstitute.barclay.argparser.CommandLineParser;
import org.broadinstitute.barclay.argparser.CommandLineParserOptions;
import org.broadinstitute.barclay.argparser.LegacyCommandLineArgumentParser;
import org.broadinstitute.barclay.argparser.SpecialArgumentsCollection;
-import picard.PicardException;
import picard.cmdline.argumentcollections.OptionalReferenceArgumentCollection;
import picard.cmdline.argumentcollections.ReferenceArgumentCollection;
import picard.cmdline.argumentcollections.RequiredReferenceArgumentCollection;
@@ -88,6 +86,8 @@ public abstract class CommandLineProgram {
private static String PROPERTY_USE_LEGACY_PARSER = "picard.useLegacyParser";
private static String PROPERTY_CONVERT_LEGACY_COMMAND_LINE = "picard.convertCommandLine";
private static Boolean useLegacyParser;
+ public static String SYNTAX_TRANSITION_URL =
+ "https://github.com/broadinstitute/picard/wiki/Command-Line-Syntax-Transition-For-Users-(Pre-Transition)";
/**
* CommandLineProgramProperties oneLineSummary attribute must be shorted than this in order to maintain
@@ -207,7 +207,7 @@ public abstract class CommandLineProgram {
"********** NOTE: Picard's command line syntax is changing.",
"**********",
"********** For more information, please see:",
- "********** https://github.com/broadinstitute/picard/wiki/Command-Line-Syntax-Transition-For-Users-(Pre-Transition)",
+ "********** ", SYNTAX_TRANSITION_URL,
"**********",
"********** The command line looks like this in the new syntax:",
"**********",
=====================================
src/main/java/picard/cmdline/CommandLineSyntaxTranslater.java
=====================================
@@ -1,5 +1,7 @@
package picard.cmdline;
+import htsjdk.samtools.util.Log;
+
import java.util.Arrays;
import java.util.List;
import java.util.stream.Collectors;
@@ -9,6 +11,7 @@ import java.util.stream.Collectors;
* used for running tests written with Picard style syntax against the Barclay command line parser.
*/
public class CommandLineSyntaxTranslater {
+ private final static Log log = Log.getInstance(CommandLineSyntaxTranslater.class);
// Prefixes used by the Barclay parser for short/long prefixes
private static final String BARCLAY_SHORT_OPTION_PREFIX = "-";
@@ -18,12 +21,23 @@ public class CommandLineSyntaxTranslater {
// Return true when the command line arguments appear to use Picard's legacy syntax.
public static boolean isLegacyPicardStyle(final String argv[]) {
- return Arrays.stream(argv).anyMatch(
- putativeLegacyArg ->
- !putativeLegacyArg.startsWith(BARCLAY_SHORT_OPTION_PREFIX) &&
- !putativeLegacyArg.startsWith(BARCLAY_LONG_OPTION_PREFIX) &&
- putativeLegacyArg.contains(LEGACY_VALUE_SEPARATOR)
+ final boolean anyLegacy = Arrays.stream(argv).anyMatch(
+ arg -> !arg.startsWith(BARCLAY_SHORT_OPTION_PREFIX) &&
+ !arg.startsWith(BARCLAY_LONG_OPTION_PREFIX) &&
+ arg.contains(LEGACY_VALUE_SEPARATOR)
);
+ if (anyLegacy && Arrays.stream(argv).anyMatch(
+ arg -> arg.startsWith(BARCLAY_SHORT_OPTION_PREFIX) || arg.startsWith(BARCLAY_LONG_OPTION_PREFIX))) {
+ // There appear to be both legacy and posix style args. Prefer/choose posix in this case since there are
+ // legitimate cases where argument values might contain embedded "=" (i.e,
+ // "--INPUT path/to/some.bam --SOME_ARG date=01/01/2022"), which makes them appear to be
+ // legacy style args, even though they are not), whereas its very unlikely to encounter a legitimate
+ // legacy option that starts with a posix prefix ("--" or "-")
+ log.warn("!!!!!!Possible mixed (legacy and new style) arguments detected!!!!!!!\n"
+ + "Assuming new-style arguments are intended. See: " + CommandLineProgram.SYNTAX_TRANSITION_URL);
+ return false;
+ }
+ return anyLegacy;
}
public static String[] convertPicardStyleToPosixStyle(final String argv[]) {
=====================================
src/main/java/picard/sam/FastqToSam.java
=====================================
@@ -51,9 +51,15 @@ import picard.PicardException;
import picard.cmdline.CommandLineProgram;
import picard.cmdline.StandardOptionDefinitions;
import picard.cmdline.programgroups.ReadDataManipulationProgramGroup;
+import picard.nio.PicardHtsPath;
import java.io.File;
+import java.io.IOException;
+import java.nio.file.Files;
+import java.nio.file.Path;
+import java.nio.file.Paths;
import java.util.ArrayList;
+import java.util.Arrays;
import java.util.List;
/**
@@ -150,10 +156,10 @@ public class FastqToSam extends CommandLineProgram {
private static final Log LOG = Log.getInstance(FastqToSam.class);
@Argument(shortName="F1", doc="Input fastq file (optionally gzipped) for single end data, or first read in paired end data.")
- public File FASTQ;
+ public PicardHtsPath FASTQ;
@Argument(shortName="F2", doc="Input fastq file (optionally gzipped) for the second read of paired end data.", optional=true)
- public File FASTQ2;
+ public PicardHtsPath FASTQ2;
@Argument(doc="Use sequential fastq files with the suffix <prefix>_###.fastq or <prefix>_###.fastq.gz." +
"The files should be named:\n" +
@@ -274,8 +280,8 @@ public class FastqToSam extends CommandLineProgram {
* RUNNAME_S8_L005_R1_004.fastq
* where `baseFastq` is the first in that list.
*/
- protected static List<File> getSequentialFileList(final File baseFastq) {
- final List<File> files = new ArrayList<>();
+ protected static List<Path> getSequentialFileList(final Path baseFastq) {
+ final List<Path> files = new ArrayList<>();
files.add(baseFastq);
// Find the correct extension used in the base FASTQ
@@ -283,25 +289,25 @@ public class FastqToSam extends CommandLineProgram {
String suffix = null; // store the suffix including the extension
for (final FastqExtensions ext : FastqExtensions.values()) {
suffix = "_001" + ext.getExtension();
- if (baseFastq.getAbsolutePath().endsWith(suffix)) {
+ if (baseFastq.toString().endsWith(suffix)) {
fastqExtensions = ext;
break;
}
}
if (null == fastqExtensions) {
- throw new PicardException(String.format("Could not parse the FASTQ extension (expected '_001' + '%s'): %s", FastqExtensions.values().toString(), baseFastq));
+ throw new PicardException(String.format("Could not parse the FASTQ extension (expected '_001' + '%s'): %s", Arrays.toString(FastqExtensions.values()), baseFastq));
}
// Find all the files
for (int idx = 2; true; idx++) {
- String fastq = baseFastq.getAbsolutePath();
+ String fastq = baseFastq.toAbsolutePath().toString();
fastq = String.format("%s_%03d%s", fastq.substring(0, fastq.length() - suffix.length()), idx, fastqExtensions.getExtension());
try {
- IOUtil.assertFileIsReadable(new File(fastq));
+ IOUtil.assertFileIsReadable(Paths.get(fastq));
} catch (final SAMException e) { // the file is not readable, so do not continue
break;
}
- files.add(new File(fastq));
+ files.add(Paths.get(fastq));
}
return files;
@@ -309,9 +315,9 @@ public class FastqToSam extends CommandLineProgram {
/* Simply invokes the right method for unpaired or paired data. */
protected int doWork() {
- IOUtil.assertFileIsReadable(FASTQ);
+ IOUtil.assertFileIsReadable(FASTQ.toPath());
if (FASTQ2 != null) {
- IOUtil.assertFileIsReadable(FASTQ2);
+ IOUtil.assertFileIsReadable(FASTQ2.toPath());
}
IOUtil.assertFileIsWritable(OUTPUT);
@@ -319,8 +325,8 @@ public class FastqToSam extends CommandLineProgram {
final SAMFileWriter writer = new SAMFileWriterFactory().makeWriter(header, false, OUTPUT, REFERENCE_SEQUENCE);
// Set the quality format
- QUALITY_FORMAT = FastqToSam.determineQualityFormat(fileToFastqReader(FASTQ),
- (FASTQ2 == null) ? null : fileToFastqReader(FASTQ2),
+ QUALITY_FORMAT = FastqToSam.determineQualityFormat(fileToFastqReader(FASTQ.toPath()),
+ (FASTQ2 == null) ? null : fileToFastqReader(FASTQ2.toPath()),
QUALITY_FORMAT);
// Lists for sequential files, but also used when not sequential
@@ -329,11 +335,11 @@ public class FastqToSam extends CommandLineProgram {
if (USE_SEQUENTIAL_FASTQS) {
// Get all the files
- for (final File fastq : getSequentialFileList(FASTQ)) {
+ for (final Path fastq : getSequentialFileList(FASTQ.toPath())) {
readers1.add(fileToFastqReader(fastq));
}
if (null != FASTQ2) {
- for (final File fastq : getSequentialFileList(FASTQ2)) {
+ for (final Path fastq : getSequentialFileList(FASTQ2.toPath())) {
readers2.add(fileToFastqReader(fastq));
}
if (readers1.size() != readers2.size()) {
@@ -342,9 +348,9 @@ public class FastqToSam extends CommandLineProgram {
}
}
else {
- readers1.add(fileToFastqReader(FASTQ));
+ readers1.add(fileToFastqReader(FASTQ.toPath()));
if (FASTQ2 != null) {
- readers2.add(fileToFastqReader(FASTQ2));
+ readers2.add(fileToFastqReader(FASTQ2.toPath()));
}
}
@@ -428,8 +434,12 @@ public class FastqToSam extends CommandLineProgram {
return readCount;
}
- private FastqReader fileToFastqReader(final File file) {
- return new FastqReader(file, ALLOW_AND_IGNORE_EMPTY_LINES);
+ private FastqReader fileToFastqReader(final Path path) throws PicardException {
+ try {
+ return new FastqReader(null, Files.newBufferedReader(path), ALLOW_AND_IGNORE_EMPTY_LINES);
+ } catch (IOException e){
+ throw new PicardException("cannot create a reader for " + path, e);
+ }
}
private SAMRecord createSamRecord(final SAMFileHeader header, final String baseName, final FastqRecord frec, final boolean paired) {
=====================================
src/main/java/picard/sam/RevertSam.java
=====================================
@@ -57,6 +57,7 @@ import picard.PicardException;
import picard.cmdline.CommandLineProgram;
import picard.cmdline.StandardOptionDefinitions;
import picard.cmdline.programgroups.ReadDataManipulationProgramGroup;
+import picard.nio.PicardHtsPath;
import picard.util.TabbedTextFileWithHeaderParser;
import java.io.File;
@@ -144,7 +145,7 @@ public class RevertSam extends CommandLineProgram {
"(e.g. invalid alignment information will be obviated when the REMOVE_ALIGNMENT_INFORMATION option is used).\n" +
"";
@Argument(shortName = StandardOptionDefinitions.INPUT_SHORT_NAME, doc = "The input SAM/BAM/CRAM file to revert the state of.")
- public File INPUT;
+ public PicardHtsPath INPUT;
@Argument(mutex = {"OUTPUT_MAP"}, shortName = StandardOptionDefinitions.OUTPUT_SHORT_NAME, doc = "The output SAM/BAM/CRAM file to create, or an output directory if OUTPUT_BY_READGROUP is true.")
public File OUTPUT;
@@ -249,11 +250,11 @@ public class RevertSam extends CommandLineProgram {
}
protected int doWork() {
- IOUtil.assertFileIsReadable(INPUT);
+ IOUtil.assertFileIsReadable(INPUT.toPath());
ValidationUtil.assertWritable(OUTPUT, OUTPUT_BY_READGROUP);
final boolean sanitizing = SANITIZE;
- final SamReader in = SamReaderFactory.makeDefault().referenceSequence(REFERENCE_SEQUENCE).validationStringency(VALIDATION_STRINGENCY).open(INPUT);
+ final SamReader in = SamReaderFactory.makeDefault().referenceSequence(REFERENCE_SEQUENCE).validationStringency(VALIDATION_STRINGENCY).open(INPUT.toPath());
final SAMFileHeader inHeader = in.getFileHeader();
ValidationUtil.validateHeaderOverrides(inHeader, SAMPLE_ALIAS, LIBRARY_NAME);
@@ -326,8 +327,14 @@ public class RevertSam extends CommandLineProgram {
out.close();
} else {
final Map<SAMReadGroupRecord, FastqQualityFormat> readGroupToFormat;
+ final Path referenceSequencePath;
try {
- readGroupToFormat = createReadGroupFormatMap(inHeader, REFERENCE_SEQUENCE, VALIDATION_STRINGENCY, INPUT, RESTORE_ORIGINAL_QUALITIES);
+ if (REFERENCE_SEQUENCE != null) {
+ referenceSequencePath = REFERENCE_SEQUENCE.toPath();
+ } else {
+ referenceSequencePath = null;
+ }
+ readGroupToFormat = createReadGroupFormatMap(inHeader, referenceSequencePath, VALIDATION_STRINGENCY, INPUT.toPath(), RESTORE_ORIGINAL_QUALITIES);
} catch (final PicardException e) {
log.error(e.getMessage());
return -1;
@@ -613,9 +620,9 @@ public class RevertSam extends CommandLineProgram {
private Map<SAMReadGroupRecord, FastqQualityFormat> createReadGroupFormatMap(
final SAMFileHeader inHeader,
- final File referenceSequence,
+ final Path referenceSequence,
final ValidationStringency validationStringency,
- final File input,
+ final Path input,
final boolean restoreOriginalQualities) {
final Map<SAMReadGroupRecord, FastqQualityFormat> readGroupToFormat = new HashMap<>();
=====================================
src/test/java/picard/cmdline/PicardCommandLineTest.java
=====================================
@@ -5,6 +5,7 @@ import org.broadinstitute.barclay.argparser.CommandLineException;
import org.broadinstitute.barclay.argparser.CommandLineProgramProperties;
import org.testng.Assert;
import org.testng.annotations.BeforeClass;
+import org.testng.annotations.DataProvider;
import org.testng.annotations.Test;
import java.util.*;
@@ -81,4 +82,38 @@ public class PicardCommandLineTest {
});
}
+ @DataProvider(name="isLegacyPicardStyleTests")
+ public final Object[][] getIsLegacyPicardStyle() {
+ return new Object[][] {
+ //arg list, is legacy style
+
+ // legacy base cases
+ {Arrays.asList("--INPUT", "path/to/some.bam"), false},
+ {Arrays.asList("--INPUT", "path/to/some.bam", "--VALIDATION_STRINGENCY", "LENIENT"), false},
+
+ // posix base cases
+ {Arrays.asList("INPUT=path/to/some.bam"), true },
+ {Arrays.asList("INPUT=path/to/some.bam", "VALIDATION_STRINGENCY=LENIENT"), true},
+
+ // mixed syntax cases
+
+ // APPEARS to isLegacyPicardStyle to contain a mix of styles, but is actually (in theory) a legitimate
+ // posix style arg list, so select the posix parser, but issue a warning about possible mixed
+ // args set
+ {Arrays.asList("--INPUT", "path/to/some.bam", "--SOME_ARG", "date=01/01/2022"), false},
+
+ // appears to isLegacyPicardStyle to contain a mix of styles, but is probably not valid, so select the
+ // posix parser, issue a warning, and let the parser decide if its legitimate
+ {Arrays.asList("--INPUT", "path/to/some.bam", "VALIDATION_STRINGENCY=LENIENT"), false},
+
+ // appears to isLegacyPicardStyle to contain a mix of styles, but is probably not valid, so select the
+ // posix parser, issue a warning, and let the parser decide if its legitimate
+ {Arrays.asList("INPUT=path/to/some.bam", "--ARG=somevalue"), false},
+ };
+ }
+
+ @Test(dataProvider="isLegacyPicardStyleTests")
+ public void testIsLegacyPicardStyle(final List<String> args, final boolean isLegacy) {
+ Assert.assertEquals(CommandLineSyntaxTranslater.isLegacyPicardStyle(args.toArray(new String[0])), isLegacy);
+ }
}
\ No newline at end of file
=====================================
src/test/java/picard/sam/AbstractAlignmentMergerTest.java
=====================================
@@ -15,9 +15,11 @@ import org.testng.annotations.DataProvider;
import org.testng.annotations.Test;
import picard.cmdline.CommandLineProgramTest;
import picard.cmdline.argumentcollections.RequiredReferenceArgumentCollection;
+import picard.nio.PicardHtsPath;
import java.io.File;
import java.io.IOException;
+import java.nio.file.Path;
import java.util.Collections;
import java.util.Iterator;
import java.util.LinkedList;
@@ -675,9 +677,9 @@ public class AbstractAlignmentMergerTest extends CommandLineProgramTest {
// builder.addFrag("frag3",1,500,false,false,"20S20M60S",null, 45);
// builder.addFrag("frag4",1,500,true,false,"20S20M60S",null, 45);
- final File file = newTempSamFile("aligned");
+ final PicardHtsPath file = PicardHtsPath.fromPath(newTempSamFile("aligned").toPath());
- try (SAMFileWriter writer = new SAMFileWriterFactory().makeWriter(builder.getHeader(), true, file, null)) {
+ try (SAMFileWriter writer = new SAMFileWriterFactory().makeWriter(builder.getHeader(), true, file.toPath(), (Path) null)) {
builder.getRecords().forEach(writer::addAlignment);
}
@@ -697,7 +699,7 @@ public class AbstractAlignmentMergerTest extends CommandLineProgramTest {
MergeBamAlignment mergeBamAlignment = new MergeBamAlignment();
- mergeBamAlignment.ALIGNED_BAM = Collections.singletonList(file);
+ mergeBamAlignment.ALIGNED_BAM = Collections.singletonList(file.toPath().toFile()); // TODO update to use Path when MergeBamAlignment is updated to use Path
mergeBamAlignment.UNMAPPED_BAM = fileUnaligned;
mergeBamAlignment.UNMAP_CONTAMINANT_READS = true;
=====================================
src/test/java/picard/sam/FastqToSamTest.java
=====================================
@@ -38,6 +38,7 @@ import picard.PicardException;
import java.io.File;
import java.io.IOException;
+import java.nio.file.Paths;
import java.util.ArrayList;
import java.util.List;
@@ -185,7 +186,7 @@ public class FastqToSamTest extends CommandLineProgramTest {
final File fastq2 = (fastqFilename2 != null) ? new File(TEST_DATA_DIR, fastqFilename2) : null;
final File samFile = newTempSamFile(fastq1.getName());
- final List<String> args =new ArrayList<String>();
+ final List<String> args = new ArrayList<>();
args.add("FASTQ=" + fastq1.getAbsolutePath());
args.add("OUTPUT=" + samFile.getAbsolutePath());
@@ -296,9 +297,9 @@ public class FastqToSamTest extends CommandLineProgramTest {
final String pairedEnd1 = "sequential-files/paired_end_R1_001.fastq";
final String pairedEnd2 = "sequential-files/paired_end_R2_001.fastq";
- Assert.assertEquals(FastqToSam.getSequentialFileList(new File(TEST_DATA_DIR, "/" + singleEnd)).size(), 2);
- Assert.assertEquals(FastqToSam.getSequentialFileList(new File(TEST_DATA_DIR, "/" + pairedEnd1)).size(), 2);
- Assert.assertEquals(FastqToSam.getSequentialFileList(new File(TEST_DATA_DIR, "/" + pairedEnd2)).size(), 2);
+ Assert.assertEquals(FastqToSam.getSequentialFileList(Paths.get(TEST_DATA_DIR.getPath(), singleEnd)).size(), 2);
+ Assert.assertEquals(FastqToSam.getSequentialFileList(Paths.get(TEST_DATA_DIR.getPath(), pairedEnd1)).size(), 2);
+ Assert.assertEquals(FastqToSam.getSequentialFileList(Paths.get(TEST_DATA_DIR.getPath(), pairedEnd2)).size(), 2);
convertFileAndVerifyRecordCount(1, singleEnd, null, FastqQualityFormat.Illumina, true, false);
convertFileAndVerifyRecordCount(2, singleEnd, null, FastqQualityFormat.Illumina, true, true);
View it on GitLab: https://salsa.debian.org/med-team/picard-tools/-/commit/39c820da8705e5ebb3fbae505fc70aa7b651891c
--
View it on GitLab: https://salsa.debian.org/med-team/picard-tools/-/commit/39c820da8705e5ebb3fbae505fc70aa7b651891c
You're receiving this email because of your account on salsa.debian.org.
-------------- next part --------------
An HTML attachment was scrubbed...
URL: <http://alioth-lists.debian.net/pipermail/debian-med-commit/attachments/20220626/846e6541/attachment-0001.htm>
More information about the debian-med-commit
mailing list