[med-svn] [Git][med-team/drop-seq][master] 4 commits: New upstream version 2.5.3+dfsg
Pierre Gruet (@pgt)
gitlab at salsa.debian.org
Sun Jun 25 20:33:02 BST 2023
Pierre Gruet pushed to branch master at Debian Med / drop-seq
Commits:
f0cb7659 by Pierre Gruet at 2023-06-25T14:43:52+02:00
New upstream version 2.5.3+dfsg
- - - - -
9637b53d by Pierre Gruet at 2023-06-25T14:45:17+02:00
Update upstream source from tag 'upstream/2.5.3+dfsg'
Update to upstream version '2.5.3+dfsg'
with Debian dir 9667b6075506ad7994b70b2442371eeaeb1cfc84
- - - - -
1ad4b2ad by Pierre Gruet at 2023-06-25T14:46:18+02:00
Updating changelog
- - - - -
8887ed0e by Pierre Gruet at 2023-06-25T14:46:36+02:00
Upload to unstable
- - - - -
24 changed files:
- build.xml
- debian/changelog
- src/ant/defs.xml
- + src/java/org/broadinstitute/dropseqrna/barnyard/ChimericUmi.java
- + src/java/org/broadinstitute/dropseqrna/barnyard/ChimericUmiCollection.java
- + src/java/org/broadinstitute/dropseqrna/barnyard/MarkChimericReads.java
- + src/tests/java/org/broadinstitute/dropseqrna/barnyard/MarkChimericReadsTest.java
- + testdata/org/broadinstitute/dropseq/barnyard/6T.chimeric_read_metrics
- + testdata/org/broadinstitute/dropseq/barnyard/6T.chimeric_report.txt
- + testdata/org/broadinstitute/dropseq/barnyard/MarkChimericReads.6T.sam
- + testdata/org/broadinstitute/dropseq/barnyard/MarkChimericReads.both.sam
- + testdata/org/broadinstitute/dropseq/barnyard/MarkChimericReads.input.sam
- + testdata/org/broadinstitute/dropseq/barnyard/MarkChimericReads.input2.sam
- + testdata/org/broadinstitute/dropseq/barnyard/MarkChimericReads.umiReuse.sam
- + testdata/org/broadinstitute/dropseq/barnyard/MarkChimericReads.umiReuse10x.sam
- + testdata/org/broadinstitute/dropseq/barnyard/both.chimeric_read_metrics
- + testdata/org/broadinstitute/dropseq/barnyard/both.chimeric_report.txt
- + testdata/org/broadinstitute/dropseq/barnyard/ed_collapse.chimeric_report.txt
- + testdata/org/broadinstitute/dropseq/barnyard/selectedCellBarcodes.txt
- + testdata/org/broadinstitute/dropseq/barnyard/two_cells.chimeric_report.txt
- + testdata/org/broadinstitute/dropseq/barnyard/umiReuse.chimeric_read_metrics
- + testdata/org/broadinstitute/dropseq/barnyard/umiReuse.chimeric_report.txt
- + testdata/org/broadinstitute/dropseq/barnyard/umiReuse10x.chimeric_read_metrics
- + testdata/org/broadinstitute/dropseq/barnyard/umiReuse10x.chimeric_report.txt
Changes:
=====================================
build.xml
=====================================
@@ -139,6 +139,7 @@
<package-command visibility="public" title="BaseDistributionAtReadPosition"/>
<package-command visibility="public" title="TagReadWithInterval"/>
<package-command visibility="public" title="DigitalExpression"/>
+ <package-command visibility="public" title="MarkChimericReads"/>
<package-command visibility="public" title="GatherMolecularBarcodeDistributionByGene"/>
<package-command visibility="public" title="SingleCellRnaSeqMetricsCollector"/>
<package-command visibility="public" title="PolyATrimmer"/>
=====================================
debian/changelog
=====================================
@@ -1,3 +1,9 @@
+drop-seq (2.5.3+dfsg-1) unstable; urgency=medium
+
+ * New upstream version 2.5.3+dfsg
+
+ -- Pierre Gruet <pgt at debian.org> Sun, 25 Jun 2023 14:46:21 +0200
+
drop-seq (2.5.2+dfsg-1) unstable; urgency=medium
* New upstream version 2.5.2+dfsg
=====================================
src/ant/defs.xml
=====================================
@@ -32,7 +32,7 @@
<property name="repository.revision" value=""/>
-<property name="Drop-seq.version" value="2.5.2"/>
+<property name="Drop-seq.version" value="2.5.3"/>
<property name="zip.label" value="Drop-seq_tools-${Drop-seq.version}"/>
<property name="javac.debug" value="true"/>
<property name="javac.target" value="1.8"/>
=====================================
src/java/org/broadinstitute/dropseqrna/barnyard/ChimericUmi.java
=====================================
@@ -0,0 +1,87 @@
+package org.broadinstitute.dropseqrna.barnyard;
+
+import java.util.Collections;
+import java.util.Set;
+
+import org.broadinstitute.dropseqrna.utils.ObjectCounter;
+
+public class ChimericUmi {
+
+ private final String molecularBarcode;
+ private final Set<String> chimericGenes;
+
+ public enum CHIMERIC_STRATEGY {
+ REMOVE_ALL, RETAIN_MOST_SUPPORTED;
+ }
+
+ /**
+ * Construct a ChimericUmi object for a single cell.
+ * This object receives the number of reads supporting each gene for a cell and UMI sequence, and based on the strategy
+ * remembers the set of genes that are chimeric.
+ * @param molecularBarcode The molecular barcode
+ * @param genes A collection of gene symbols with the number of supporting reads for this cell/molecular barcode.
+ * @param strategy The strategy to discover chimeric reads. REMOVE_ALL removes all genes for this UMI if there is more than 1 gene. RETAIN_MOST_SUPPORTED retains
+ * the gene with the highest read count and flags all other genes as chimeric. If the most supported gene is ambiguous (there are multiple genes with the highest count) then
+ * all genes are chimeric.
+ */
+ public ChimericUmi(String molecularBarcode, ObjectCounter<String> genes, CHIMERIC_STRATEGY strategy) {
+ this.molecularBarcode=molecularBarcode;
+ this.chimericGenes=getChimericGenes(genes,strategy);
+ }
+
+ public String getMolecularBarcode() {
+ return molecularBarcode;
+ }
+
+ public Set<String> getChimericGenes() {
+ return chimericGenes;
+ }
+
+ /**
+ * Find the set of 0 or more genes where the UMI
+ * @return
+ */
+ static Set<String> getChimericGenes (ObjectCounter<String> genes, CHIMERIC_STRATEGY strategy) {
+ switch (strategy) {
+ case REMOVE_ALL: return (getChimericGenesRemoveAll(genes));
+ case RETAIN_MOST_SUPPORTED: return(getChimericGenesRetainMostSupported(genes));
+ default:
+ throw new IllegalArgumentException("Chimeric Strategy not supported");
+ }
+ }
+
+ /**
+ * Simple strategy to filter chimeric genes. If multiple genes share the same UMI, all genes are chimeric.
+ * @param genes
+ * @return
+ */
+ static Set <String> getChimericGenesRemoveAll(ObjectCounter<String> genes) {
+ if (genes.getSize()<2) return Collections.emptySet();
+ return (Set<String>) genes.getKeys();
+ }
+
+ /**
+ * If there are multiple genes with varying degrees of support, flag all but the most
+ * supported gene as chimeric. If multiple genes are the most supported, the result is ambiguous
+ * and all genes for this UMI are considered chimeric.
+ * @param genes A set of genes with read counts
+ * @return A set of chimeric gene symbols. This set can be empty.
+ */
+ static Set<String> getChimericGenesRetainMostSupported(ObjectCounter<String> genes) {
+ // no chimeric genes.
+ if (genes.getSize()<2) return Collections.emptySet();
+ // at least one chimeric gene.
+ String mostSupportedGene=genes.getMax();
+ int counts = genes.getCountForKey(mostSupportedGene);
+ int genesAtSupport=genes.getNumberOfSize(counts);
+ if (genesAtSupport==1) {
+ Set<String> chimeric = (Set<String>) genes.getKeys();
+ chimeric.remove(mostSupportedGene);
+ return chimeric;
+ }
+ // ambiguous, return all genes as chimeric.
+ return (Set<String>)genes.getKeys();
+ }
+
+
+}
\ No newline at end of file
=====================================
src/java/org/broadinstitute/dropseqrna/barnyard/ChimericUmiCollection.java
=====================================
@@ -0,0 +1,164 @@
+package org.broadinstitute.dropseqrna.barnyard;
+
+import java.util.Collections;
+import java.util.HashMap;
+import java.util.HashSet;
+import java.util.Map;
+import java.util.Set;
+
+import org.broadinstitute.dropseqrna.barnyard.ChimericUmi.CHIMERIC_STRATEGY;
+import org.broadinstitute.dropseqrna.barnyard.digitalexpression.UMICollection;
+import org.broadinstitute.dropseqrna.utils.ObjectCounter;
+
+import htsjdk.samtools.util.Log;
+
+/**
+ * A collection of all chimeric UMIs / genes for a single cell.
+ * @author nemesh
+ *
+ */
+public class ChimericUmiCollection {
+
+ private static final Log log = Log.getInstance(ChimericUmiCollection.class);
+
+ private final String cellBarcode;
+ // map from a UMI to the genes that are chimeric for that UMI.
+ private Map<String, ChimericUmi> map;
+ // key is the molecular barcode, value is the number of reads per gene.
+
+ private Set<String> problematicUmis;
+
+ private Map<String, ObjectCounter<String>> umiGeneSupport;
+ private boolean isFinal = false;
+ private final ChimericUmi.CHIMERIC_STRATEGY strategy;
+ private int totalUMIs=0;
+
+
+ public ChimericUmiCollection (String cellBarcode, ChimericUmi.CHIMERIC_STRATEGY strategy) {
+ this.cellBarcode=cellBarcode;
+ this.strategy=strategy;
+ totalUMIs=0;
+ map = new HashMap<>();
+ // A map from the UMI sequence to the genes that belong to that UMI with the number of reads per gene.
+ // This will be removed when the data is finalized to save memory.
+ umiGeneSupport = new HashMap<String, ObjectCounter<String>>();
+ this.problematicUmis=new HashSet<>();
+ }
+
+ /**
+ * Add data from a UMICollection directly.
+ * @param u
+ */
+ public void add (UMICollection u) {
+ add(u.getGeneName(), u.getMolecularBarcodeCounts());
+ }
+
+ /**
+ * Add data for a single gene and the collection of molecular barcodes for that gene with their corresponding read counts
+ * @param gene
+ * @param molBCCounts
+ */
+ public void add (String gene, ObjectCounter<String> molBCCounts) {
+ if (isFinal)
+ throw new IllegalStateException("This object has been finalized, no more data can be added");
+
+ for (String molBC: molBCCounts.getKeys()) {
+
+ int support = molBCCounts.getCountForKey(molBC);
+ ObjectCounter<String> geneSupportCounts = umiGeneSupport.get(molBC);
+ if (geneSupportCounts==null) {
+ geneSupportCounts=new ObjectCounter<>();
+ umiGeneSupport.put(molBC, geneSupportCounts);
+ }
+ geneSupportCounts.incrementByCount(gene, support);
+ totalUMIs = getTotalUMIs() + 1;
+ }
+ }
+
+ /**
+ * Get a list of 0 or more genes that are chimeric for the requested UMI.
+ * When this method is called the object is finalized and no more data can be added.
+ * @param umi A molecular barcode to query
+ * @return A set with 0 or more entries of chimeric genes. If the UMI does not exist for this cell, return an empty set of genes.
+ */
+ public Set<String> getChimericGenes (String umi) {
+ if (isFinal==false)
+ buildChimericUmis();
+ ChimericUmi cu = map.get(umi);
+ if (cu==null)
+ return Collections.emptySet();
+ return cu.getChimericGenes();
+ }
+
+ /**
+ * Checks if a UMI is chimeric, either via the chimerism test or via additionally registered UMIs that are problematic.
+ * @param umi The molecular barcode to test.
+ * @return true if the read is chimeric, or has been registered as problematic.
+ */
+ public boolean isChimericOrProblematic (String umi, String gene) {
+ if (isFinal==false)
+ buildChimericUmis();
+ if (this.problematicUmis.contains(umi))
+ return true;
+ Set<String> chimericGenes = getChimericGenes(umi);
+ return chimericGenes.contains(gene);
+ }
+
+ /**
+ * Record that a UMI is problematic for this cell by a different test than the chimeric read test.
+ * @param umi
+ */
+ public void registerProblematicUmi (String umi) {
+ this.problematicUmis.add(umi);
+ }
+
+ public Set<String> getChimericUmis () {
+ return map.keySet();
+ }
+
+ public Set<String> getProblematicUmis() {
+ return problematicUmis;
+ }
+
+
+
+ /**
+ * Finalize the data set. Chimeric UMIs are discovered from the input data.
+ * Input data is discarded to save memory, and no more data can be added.
+ * It is recommended to finalize each cell barcode's data as it is entered to save memory
+ * since only the genes that are chimeric for a given molecular barcode are retained.
+ */
+ public void buildChimericUmis () {
+ for (String molBC: umiGeneSupport.keySet()) {
+ ChimericUmi cu = new ChimericUmi(molBC, umiGeneSupport.get(molBC), this.strategy);
+ map.put(molBC, cu);
+ }
+ this.isFinal=true;
+ this.umiGeneSupport=null;
+ }
+
+ public String getCellBarcode() {
+ return cellBarcode;
+ }
+
+ public int getTotalUMIs() {
+ return totalUMIs;
+ }
+
+ /**
+ * Find the number of chimeric UMI/gene pairs for this cell.
+ * @return
+ */
+ public int getTotalUMisChimeric() {
+ if (isFinal==false)
+ buildChimericUmis();
+ int count=0;
+ for (String umi: this.map.keySet()) {
+ count+=map.get(umi).getChimericGenes().size();
+ }
+ return (count);
+ }
+
+
+
+}
=====================================
src/java/org/broadinstitute/dropseqrna/barnyard/MarkChimericReads.java
=====================================
@@ -0,0 +1,347 @@
+/*
+ * MIT License
+ *
+ * Copyright 2021 Broad Institute
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+package org.broadinstitute.dropseqrna.barnyard;
+
+import com.google.common.base.CharMatcher;
+import htsjdk.samtools.SAMFileWriter;
+import htsjdk.samtools.SAMFileWriterFactory;
+import htsjdk.samtools.SAMRecord;
+import htsjdk.samtools.SamReaderFactory;
+import htsjdk.samtools.metrics.MetricBase;
+import htsjdk.samtools.metrics.MetricsFile;
+import htsjdk.samtools.util.*;
+import org.apache.commons.lang.StringUtils;
+import org.broadinstitute.barclay.argparser.Argument;
+import org.broadinstitute.barclay.argparser.CommandLineProgramProperties;
+import org.broadinstitute.dropseqrna.barnyard.ChimericUmi.CHIMERIC_STRATEGY;
+import org.broadinstitute.dropseqrna.barnyard.digitalexpression.UMICollection;
+import org.broadinstitute.dropseqrna.cmdline.CustomCommandLineValidationHelper;
+import org.broadinstitute.dropseqrna.cmdline.DropSeq;
+import org.broadinstitute.dropseqrna.utils.FileListParsingUtils;
+import org.broadinstitute.dropseqrna.utils.ObjectCounter;
+import org.broadinstitute.dropseqrna.utils.OutputWriterUtil;
+import org.broadinstitute.dropseqrna.utils.SamHeaderUtil;
+import org.broadinstitute.dropseqrna.utils.StringInterner;
+import org.broadinstitute.dropseqrna.utils.readiterators.GeneFunctionProcessor;
+import org.broadinstitute.dropseqrna.utils.readiterators.SamFileMergeUtil;
+import org.broadinstitute.dropseqrna.utils.readiterators.SamHeaderAndIterator;
+import org.broadinstitute.dropseqrna.utils.readiterators.StrandStrategy;
+import org.broadinstitute.dropseqrna.utils.readiterators.UMIIterator;
+import picard.cmdline.StandardOptionDefinitions;
+import picard.illumina.BarcodeMetric;
+
+import java.io.BufferedWriter;
+import java.io.File;
+import java.io.IOException;
+import java.util.*;
+import java.util.stream.Collectors;
+
+ at CommandLineProgramProperties(summary = "Identify UMIs assigned to multiple genes in the same cell, then mark reads for such cell and UMI "
+ + "by adjusting the MAPQ, and/or generate a report for genes and UMIs in selected cells.", oneLineSummary = "Mark reads with UMIs that are assigned to multiple genes.", programGroup = DropSeq.class)
+public class MarkChimericReads extends GeneFunctionCommandLineBase {
+ private static final Log log = Log.getInstance(MarkChimericReads.class);
+
+ private static final CHIMERIC_STRATEGY DEFAULT_STRATEGY = CHIMERIC_STRATEGY.RETAIN_MOST_SUPPORTED;
+ public static final String CHIMERIC_COLUMN = "CHIMERIC";
+
+ @Argument(shortName = StandardOptionDefinitions.INPUT_SHORT_NAME, doc = "The input SAM or BAM file to analyze. This argument can accept wildcards, or a file with the "
+ + "suffix .bam_list that contains the locations of multiple BAM files", minElements = 1)
+ public List<File> INPUT;
+
+ @Argument(shortName = StandardOptionDefinitions.OUTPUT_SHORT_NAME, doc = "SAM or BAM file with MAPQ of putative "
+ + "chimeric reads adjusted.", optional = true)
+ public File OUTPUT;
+
+ @Argument(doc="The strategy to use to detect and filter chimeric reads. REMOVE_ALL marks all genes that share the same UMI on the same cell. "
+ + "RETAIN_MOST_SUPPORTED retains the gene with the largest number of reads for a UMI/cell. If RETAIN_MOST_SUPPORTED most suported gene is ambiguous,"
+ + "then all genes/UMI pairs are flagged as chimeric.", optional=false)
+ public ChimericUmi.CHIMERIC_STRATEGY STRATEGY=DEFAULT_STRATEGY;
+
+ @Argument(doc = "List of CELL_BARCODES to evaluate for chimeric reads.", optional=true)
+ public File CELL_BC_FILE;
+
+ @Argument(doc = "Tab-separated file with a row for each {CELL_BARCODE, MOLECULAR_BARCODE} found in selected cells, "
+ + "with the number of observations and chimeric status. This file can end with .gz to be gzipped, which is recommended for larger data sets.", optional = true)
+ public File OUTPUT_REPORT;
+
+ @Argument(doc = "Produce report of number of unique {CELL_BARCODE, MOLECULAR_BARCODE}, and number marked.", shortName = StandardOptionDefinitions.METRICS_FILE_SHORT_NAME, optional = true)
+ public File METRICS;
+
+ @Argument(doc = "Mark as chimeric all reads in which the {CELL_BARCODE, MOLECULAR_BARCODE} is assigned to multiple genes.")
+ public boolean MARK_UMI_REUSE = true;
+
+ @Argument(doc = "Mark as chimeric reads with UMIs containing at least this many Ts.", optional = true)
+ public Integer T_RICH_THRESHOLD;
+
+ @Argument(doc = "Ignore reads with MAPQ < this when making chimeric determination.")
+ public Integer READ_MQ = 10;
+
+ @Argument(doc = "Tag in which to store original MAPQ, if read is marked as chimeric. " + "Set to NULL to suppress tag creation.", optional = true)
+ public String MAPQ_TAG = "YM";
+
+ @Argument(doc = "Chimeric reads are tagged by setting MAPQ to this value.")
+ public int CHIMERIC_MAPQ = 0;
+
+ @Argument(doc = "The cell barcode tag.")
+ public String CELL_BARCODE_TAG = "XC";
+
+ @Argument(doc = "The molecular barcode tag.")
+ public String MOLECULAR_BARCODE_TAG = "XM";
+
+ @Override
+ protected String[] customCommandLineValidation() {
+ if (OUTPUT != null) {
+ IOUtil.assertFileIsWritable(OUTPUT);
+ }
+ if (OUTPUT_REPORT != null) {
+ IOUtil.assertFileIsWritable(OUTPUT_REPORT);
+ }
+
+ final ArrayList<String> list = new ArrayList<>(1);
+ if (OUTPUT == null && OUTPUT_REPORT == null) {
+ list.add("It does not make sense for neither OUTPUT nor OUTPUT_REPORT to be set.");
+ }
+ return CustomCommandLineValidationHelper.makeValue(super.customCommandLineValidation(), list);
+ }
+
+ @Override
+ protected int doWork() {
+ INPUT = FileListParsingUtils.expandFileList(INPUT);
+
+ // FIRST PASS: accumulate chimeric UMIs for each CBC, and optionally write report
+ final Map<String, ChimericUmiCollection> chimerics = identifyChimericsAndWriteReport(MARK_UMI_REUSE, T_RICH_THRESHOLD);
+
+ // PASS 2: Write BAM with chimeric reads marked.
+ if (OUTPUT != null) {
+ long numMarked = markReads(chimerics);
+ log.info(numMarked + " reads marked chimeric.");
+ }
+
+ return 0;
+ }
+
+ private long markReads(final Map<String, ChimericUmiCollection> chimerics) {
+ final GeneFunctionProcessor p = new GeneFunctionProcessor(GENE_NAME_TAG, GENE_STRAND_TAG, GENE_FUNCTION_TAG, false, STRAND_STRATEGY, LOCUS_FUNCTION_LIST);
+
+ long numMarked = 0;
+ SamHeaderAndIterator headerAndIter = SamFileMergeUtil.mergeInputs(this.INPUT, false, SamReaderFactory.makeDefault());
+ SamHeaderUtil.addPgRecord(headerAndIter.header, this);
+ SAMFileWriter out = new SAMFileWriterFactory().makeSAMOrBAMWriter(headerAndIter.header, true, OUTPUT);
+ ProgressLogger progLog = new ProgressLogger(log, 1000000, "marked");
+ log.info("Marking chimeric UMIs in BAM");
+ for (final SAMRecord rec : new IterableAdapter<>(headerAndIter.iterator)) {
+ progLog.record(rec);
+ // Parse the gene function tags, and get the interpreted tags for the given filters.
+ // Using a deep copy of the record because otherwise processRead alters the record as a side effect.
+ // TODO: should processRead be making a deep copy internally to avoid the side effect?
+ List<SAMRecord> processedRec = p.processRead (rec.deepCopy());
+
+ // only process the record for chimeric tags if the read can be interpreted with one consistent gene.
+ if (processedRec.size()==1) {
+ String geneName = processedRec.get(0) .getStringAttribute(GENE_NAME_TAG);
+ String umi = rec.getStringAttribute(MOLECULAR_BARCODE_TAG);
+ ChimericUmiCollection cuc = chimerics.get(rec.getStringAttribute(CELL_BARCODE_TAG));
+ if (cuc!=null && cuc.isChimericOrProblematic(umi, geneName)) {
+ markChimeric(rec);
+ ++numMarked;
+ }
+ }
+ out.addAlignment(rec);
+ }
+ out.close();
+ CloserUtil.close(headerAndIter.iterator);
+ return numMarked;
+ }
+
+ private void markChimeric(final SAMRecord rec) {
+ if (MAPQ_TAG != null) {
+ rec.setAttribute(MAPQ_TAG, rec.getMappingQuality());
+ }
+ rec.setMappingQuality(CHIMERIC_MAPQ);
+ }
+
+ private static void writePerTranscriptHeader(final BufferedWriter out) {
+ String[] header = { GatherMolecularBarcodeDistributionByGene.COLUMN_LABEL.CELL_BARCODE.toString(),
+ GatherMolecularBarcodeDistributionByGene.COLUMN_LABEL.GENE.toString(),
+ GatherMolecularBarcodeDistributionByGene.COLUMN_LABEL.MOLECULAR_BARCODE.toString(),
+ GatherMolecularBarcodeDistributionByGene.COLUMN_LABEL.NUM_OBS.toString(), CHIMERIC_COLUMN};
+ String h = StringUtils.join(header, "\t");
+ OutputWriterUtil.writeResult(h, out);
+ }
+
+ private void writePerTranscriptStats(final String gene, final String cellBarcode, final ObjectCounter<String> counts, final ChimericUmiCollection chimerics,
+ final BufferedWriter out, final MarkChimericReadMetrics metrics) {
+
+ for (String key : counts.getKeys()) {
+ int value = counts.getCountForKey(key);
+ boolean chimeric = chimerics.isChimericOrProblematic(key, gene);
+ String[] line = { cellBarcode, gene, key, Integer.toString(value), Boolean.toString(chimeric) };
+ if (chimeric) {
+ ++metrics.NUM_MARKED_UMIS;
+ }
+ String h = StringUtils.join(line, "\t");
+ OutputWriterUtil.writeResult(h, out);
+ }
+ }
+
+
+ /**
+ * @return Map with key=CBC, value=Set of chimeric UMIs for that CBC
+ */
+ private Map<String, ChimericUmiCollection> identifyChimericsAndWriteReport(boolean markUmiReuse, Integer tRichThreshold) {
+ final BufferedWriter report_out;
+ if (OUTPUT_REPORT != null) {
+ IOUtil.assertFileIsWritable(OUTPUT_REPORT);
+ report_out = IOUtil.openFileForBufferedWriting(OUTPUT_REPORT);
+
+ writePerTranscriptHeader(report_out);
+ } else {
+ report_out = null;
+ }
+
+ // Set up the cell barcodes. Can be null to try and repair the whole BAM.
+ final Set<String> cellBarcodes=getCellBarcodes();
+
+ PeekableIterator<UMICollection> umiIterator = new PeekableIterator<>(
+ new UMIIterator(SamFileMergeUtil.mergeInputs(this.INPUT, false),
+ GENE_NAME_TAG, GENE_STRAND_TAG, GENE_FUNCTION_TAG,
+ this.STRAND_STRATEGY, this.LOCUS_FUNCTION_LIST, this.CELL_BARCODE_TAG, this.MOLECULAR_BARCODE_TAG,
+ this.READ_MQ, false, cellBarcodes, true, false));
+
+ // Remember {CBC, UMI, Gene} pairs to be marked chimeric
+ final Map<String, ChimericUmiCollection> chimerics = new HashMap<>();
+ final MarkChimericReadMetrics metrics = new MarkChimericReadMetrics();
+
+ // TODO: Is this premature optimization? Genes will be reused across many cells so should be useful.
+ StringInterner geneStringCache = new StringInterner();
+
+ // progress logger
+ ProgressLogger progLog = new ProgressLogger(log, 100, "cells chimeric marked");
+
+ while (umiIterator.hasNext()) {
+
+ final String cellBarcode = umiIterator.peek().getCellBarcode();
+ progLog.record(cellBarcode, 0);
+ ChimericUmiCollection cuc = new ChimericUmiCollection(cellBarcode, this.STRATEGY);
+
+ final List<UMICollection> umiCollectionsForCell = new ArrayList<>();
+
+ // retain Alec's comments about slurping up data.
+ while (umiIterator.hasNext() && cellBarcode.equals(umiIterator.peek().getCellBarcode())) {
+ UMICollection umiC= umiIterator.next();
+ // only add items if you want to find chimeras later. If polyA only then this is false.
+ if (markUmiReuse)
+ cuc.add(geneStringCache.intern(umiC.getGeneName()), umiC.getMolecularBarcodeCounts());
+ // cuc.add(umiC);
+ umiCollectionsForCell.add(umiC);
+ }
+ // finalize the collection.
+ cuc.buildChimericUmis();
+
+ if (markUmiReuse) {
+ // Count number of UMIs for this CBC in case emitting metrics file.
+ metrics.NUM_UMIS += cuc.getTotalUMIs();
+ metrics.NUM_REUSED_UMIS += cuc.getTotalUMisChimeric();
+ }
+
+ if (tRichThreshold != null) {
+ final Set<String> umisForCellBarcode = new HashSet<>();
+ umiCollectionsForCell.forEach(umiCollection -> umisForCellBarcode.addAll(umiCollection.getMolecularBarcodeCounts().getKeys()));
+ if (!markUmiReuse && METRICS != null) {
+ // Count number of UMIs for this CBC in case emitting metrics file, and not counted above
+ metrics.NUM_UMIS += umiCollectionsForCell.stream().mapToLong(umiCollection -> umiCollection.getMolecularBarcodeCounts().getSize()).sum();
+ }
+ final CharMatcher tMatcher = CharMatcher.is('T');
+ final Collection<String> polyTUmis = umisForCellBarcode.stream().filter(umi -> tMatcher.countIn(umi) >= tRichThreshold).collect(Collectors.toList());
+ metrics.NUM_T_RICH_UMIS += polyTUmis.size();
+ polyTUmis.forEach(cuc::registerProblematicUmi);
+ }
+
+ chimerics.put(cellBarcode, cuc);
+ if (report_out != null) {
+ for (final UMICollection batch : umiCollectionsForCell) {
+
+ writePerTranscriptStats(batch.getGeneName(), batch.getCellBarcode(),
+ batch.getMolecularBarcodeCounts(), cuc, report_out, metrics);
+ }
+ }
+ }
+ CloserUtil.close(umiIterator);
+
+ log.info("Total cell/UMIs observed [" +metrics.NUM_UMIS+"] Marked Chimeric [" + metrics.NUM_MARKED_UMIS+"] % ["+ String.format("%.2f%%",metrics.getPercentMarked())+"]");
+
+ try {
+ if (report_out != null) {
+ report_out.close();
+ }
+ } catch (IOException e) {
+ throw new RuntimeIOException("Exception writing " + OUTPUT_REPORT, e);
+ }
+
+ if (METRICS != null) {
+ final MetricsFile<MarkChimericReadMetrics, Integer> metricsFile = new MetricsFile<>();
+ metricsFile.addMetric(metrics);
+ metricsFile.write(METRICS);
+ }
+
+ return chimerics;
+ }
+
+ private Set<String> getCellBarcodes () {
+ if (CELL_BC_FILE==null) {
+ log.info("No cell barcode file - will process all cell barcodes");
+ return null;
+ }
+
+ Set<String> cellBarcodes=new HashSet<>(ParseBarcodeFile.readCellBarcodeFile(CELL_BC_FILE));
+ log.info("Found " + cellBarcodes.size()+ " cell barcodes in file");
+ return cellBarcodes;
+ }
+
+ public static class MarkChimericReadMetrics extends MetricBase {
+ public long NUM_UMIS;
+ public long NUM_MARKED_UMIS;
+ public long NUM_T_RICH_UMIS;
+ public long NUM_REUSED_UMIS;
+
+ public double getPercentMarked () {
+ return ((double) NUM_MARKED_UMIS / (double) NUM_UMIS)*100;
+ }
+
+ /**
+ * Adds the non-calculated metrics (which is all of them)
+ */
+ public void merge(final MarkChimericReadMetrics metric) {
+ this.NUM_UMIS += metric.NUM_UMIS;
+ this.NUM_MARKED_UMIS += metric.NUM_MARKED_UMIS;
+ this.NUM_T_RICH_UMIS += metric.NUM_T_RICH_UMIS;
+ this.NUM_REUSED_UMIS += metric.NUM_REUSED_UMIS;
+ }
+ }
+
+ /** Stock main method. */
+ public static void main(final String[] args) {
+ new MarkChimericReads().instanceMainWithExit(args);
+ }
+}
=====================================
src/tests/java/org/broadinstitute/dropseqrna/barnyard/MarkChimericReadsTest.java
=====================================
@@ -0,0 +1,116 @@
+/*
+ * MIT License
+ *
+ * Copyright 2021 Broad Institute
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+package org.broadinstitute.dropseqrna.barnyard;
+
+import org.broadinstitute.dropseqrna.barnyard.MarkChimericReads;
+import org.broadinstitute.dropseqrna.barnyard.ChimericUmi.CHIMERIC_STRATEGY;
+import org.broadinstitute.dropseqrna.utils.TestUtils;
+import org.testng.Assert;
+import org.testng.annotations.Test;
+
+import java.io.File;
+import java.util.Arrays;
+
+public class MarkChimericReadsTest {
+ public static File TEST_DATA_DIR = new File("testdata/org/broadinstitute/dropseq/barnyard");
+ public static final File CELL_BC_FILE = new File(TEST_DATA_DIR, "selectedCellBarcodes.txt");
+ public static final File EXPECTED_CHIMERIC_REPORT = new File(TEST_DATA_DIR, "umiReuse.chimeric_report.txt");
+
+ @Test(enabled = true)
+ public void testUmiReuse() {
+ final MarkChimericReads clp = new MarkChimericReads();
+ clp.OUTPUT = TestUtils.getTempReportFile("MarkChimericReads.", ".sam");
+ clp.OUTPUT.deleteOnExit();
+ clp.OUTPUT_REPORT = TestUtils.getTempReportFile("chimeric_report.", ".txt");
+ clp.OUTPUT_REPORT.deleteOnExit();
+ clp.METRICS = TestUtils.getTempReportFile("MarkChimericReads.", ".chimeric_read_metrics");
+ clp.METRICS.deleteOnExit();
+ clp.INPUT = Arrays.asList(new File(TEST_DATA_DIR, "MarkChimericReads.input.sam"));
+ clp.CELL_BC_FILE = CELL_BC_FILE;
+ Assert.assertEquals(clp.doWork(), 0);
+ TestUtils.assertSamFilesSame(clp.OUTPUT, new File(TEST_DATA_DIR, "MarkChimericReads.umiReuse.sam"), false);
+ Assert.assertTrue(TestUtils.testFilesSame(clp.OUTPUT_REPORT, EXPECTED_CHIMERIC_REPORT));
+ Assert.assertTrue(TestUtils.testFilesSame(clp.METRICS, new File(TEST_DATA_DIR, "umiReuse.chimeric_read_metrics")));
+ }
+
+ // MarkChimericReads.input2.sam
+ @Test(enabled = true)
+ public void testUmiReuse10xStrategy() {
+ // in this case, for molecular barcode ATTTAGTAAATG, there are 2 reads for USMG5 but only one for CXorf56.
+ // USMG5 is retained, CXorf56 is marked as chimeric.
+ // This is the 10x strategy for removing chimeric reads
+ final MarkChimericReads clp = new MarkChimericReads();
+ clp.OUTPUT = TestUtils.getTempReportFile("MarkChimericReads.", ".sam");
+ clp.OUTPUT.deleteOnExit();
+ clp.OUTPUT_REPORT = TestUtils.getTempReportFile("chimeric_report.", ".txt");
+ clp.OUTPUT_REPORT.deleteOnExit();
+ clp.METRICS = TestUtils.getTempReportFile("MarkChimericReads.", ".chimeric_read_metrics");
+ clp.METRICS.deleteOnExit();
+ clp.INPUT = Arrays.asList(new File(TEST_DATA_DIR, "MarkChimericReads.input2.sam"));
+ clp.CELL_BC_FILE = new File(TEST_DATA_DIR, "selectedCellBarcodes.txt");
+ clp.STRATEGY = CHIMERIC_STRATEGY.RETAIN_MOST_SUPPORTED;
+ Assert.assertEquals(clp.doWork(), 0);
+ TestUtils.assertSamFilesSame(clp.OUTPUT, new File(TEST_DATA_DIR, "MarkChimericReads.umiReuse10x.sam"), false);
+ Assert.assertTrue(TestUtils.testFilesSame(clp.OUTPUT_REPORT, new File(TEST_DATA_DIR, "umiReuse10x.chimeric_report.txt")));
+ Assert.assertTrue(TestUtils.testFilesSame(clp.METRICS, new File(TEST_DATA_DIR, "umiReuse10x.chimeric_read_metrics")));
+ }
+
+ @Test(enabled = true)
+ public void testPolyT() {
+ final MarkChimericReads clp = new MarkChimericReads();
+ clp.OUTPUT = TestUtils.getTempReportFile("MarkChimericReads.", ".sam");
+ clp.OUTPUT.deleteOnExit();
+ clp.OUTPUT_REPORT = TestUtils.getTempReportFile("chimeric_report.", ".txt");
+ clp.OUTPUT_REPORT.deleteOnExit();
+ clp.METRICS = TestUtils.getTempReportFile("MarkChimericReads.", ".chimeric_read_metrics");
+ clp.METRICS.deleteOnExit();
+ clp.INPUT = Arrays.asList(new File(TEST_DATA_DIR, "MarkChimericReads.input.sam"));
+ clp.CELL_BC_FILE = new File(TEST_DATA_DIR, "selectedCellBarcodes.txt");
+ clp.MARK_UMI_REUSE = false;
+ clp.T_RICH_THRESHOLD = 6;
+ Assert.assertEquals(clp.doWork(), 0);
+ TestUtils.assertSamFilesSame(clp.OUTPUT, new File(TEST_DATA_DIR, "MarkChimericReads.6T.sam"), false);
+ Assert.assertTrue(TestUtils.testFilesSame(clp.OUTPUT_REPORT, new File(TEST_DATA_DIR, "6T.chimeric_report.txt")));
+ Assert.assertTrue(TestUtils.testFilesSame(clp.METRICS, new File(TEST_DATA_DIR, "6T.chimeric_read_metrics")));
+ }
+
+ @Test(enabled = true)
+ public void testBoth() {
+ final MarkChimericReads clp = new MarkChimericReads();
+ clp.OUTPUT = TestUtils.getTempReportFile("MarkChimericReads.", ".sam");
+ clp.OUTPUT.deleteOnExit();
+ clp.OUTPUT_REPORT = TestUtils.getTempReportFile("chimeric_report.", ".txt");
+ clp.OUTPUT_REPORT.deleteOnExit();
+ clp.METRICS = TestUtils.getTempReportFile("MarkChimericReads.", ".chimeric_read_metrics");
+ clp.METRICS.deleteOnExit();
+ clp.INPUT = Arrays.asList(new File(TEST_DATA_DIR, "MarkChimericReads.input.sam"));
+ clp.CELL_BC_FILE = new File(TEST_DATA_DIR, "selectedCellBarcodes.txt");
+ clp.T_RICH_THRESHOLD = 6;
+ Assert.assertEquals(clp.doWork(), 0);
+ TestUtils.assertSamFilesSame(clp.OUTPUT, new File(TEST_DATA_DIR, "MarkChimericReads.both.sam"), false);
+ Assert.assertTrue(TestUtils.testFilesSame(clp.OUTPUT_REPORT, new File(TEST_DATA_DIR, "both.chimeric_report.txt")));
+ Assert.assertTrue(TestUtils.testFilesSame(clp.METRICS, new File(TEST_DATA_DIR, "both.chimeric_read_metrics")));
+ }
+
+}
=====================================
testdata/org/broadinstitute/dropseq/barnyard/6T.chimeric_read_metrics
=====================================
@@ -0,0 +1,6 @@
+
+## METRICS CLASS org.broadinstitute.dropseqrna.priv.barnyard.MarkChimericReads$MarkChimericReadMetrics
+NUM_UMIS NUM_MARKED_UMIS NUM_T_RICH_UMIS NUM_REUSED_UMIS
+163 20 20 0
+
+
=====================================
testdata/org/broadinstitute/dropseq/barnyard/6T.chimeric_report.txt
=====================================
@@ -0,0 +1,165 @@
+CELL_BARCODE GENE MOLECULAR_BARCODE NUM_OBS CHIMERIC
+ACCAAACGTTCTCACC AC025594.3 TACATTACAGCT 1 false
+ACCAAACGTTCTCACC ACOT13 AGAGTAATATGA 1 false
+ACCAAACGTTCTCACC AFF4 TAAACTGTTTAA 1 false
+ACCAAACGTTCTCACC AJAP1 CCGGATTATTGC 1 false
+ACCAAACGTTCTCACC AL645728.1 CTGCGATTACGG 1 false
+ACCAAACGTTCTCACC ALDOA CTTCTGCAATGG 1 false
+ACCAAACGTTCTCACC ALG1L9P CCAGCGCTAATT 1 false
+ACCAAACGTTCTCACC ANKRD10 AATTTTATGAAT 1 true
+ACCAAACGTTCTCACC ANKRD10-IT1 ATCCTCGTTCTG 1 false
+ACCAAACGTTCTCACC AP3B2 ACTCGGATACCT 1 false
+ACCAAACGTTCTCACC APBA2 CATCCTGCGGGC 1 false
+ACCAAACGTTCTCACC ARGLU1 ACGAGTATTGCC 1 false
+ACCAAACGTTCTCACC ARHGEF3 TATTTGTCTGAG 1 true
+ACCAAACGTTCTCACC ARNT2 TGCTATCTTAAC 1 false
+ACCAAACGTTCTCACC ARPP21 CTGTGTTAATTG 1 true
+ACCAAACGTTCTCACC ATAD1 AGAAGACGGCCG 1 false
+ACCAAACGTTCTCACC ATP9B AGCAAGCACCGA 1 false
+ACCAAACGTTCTCACC BRD1 GCACTCATTTCG 1 false
+ACCAAACGTTCTCACC C19orf66 AAAATTTGGCAA 1 false
+ACCAAACGTTCTCACC CACNA1A TCACCTCCATAC 1 false
+ACCAAACGTTCTCACC CAMK1D ATGATGGTAACA 1 false
+ACCAAACGTTCTCACC CAPZB GCATGCTTAGAT 1 false
+ACCAAACGTTCTCACC CBL TAACCCTATGAT 1 false
+ACCAAACGTTCTCACC CCDC3 GACATGGCTCAC 1 false
+ACCAAACGTTCTCACC CCDC84 TCTCTAGGGTCC 1 false
+ACCAAACGTTCTCACC CDK16 CAGGGACCATAC 1 false
+ACCAAACGTTCTCACC CELF2 GCAAGGCATTGT 1 false
+ACCAAACGTTCTCACC CFDP1 GAGTAGTACGAC 1 false
+ACCAAACGTTCTCACC CIAO1 ATCTTAAAGTAT 1 false
+ACCAAACGTTCTCACC CREBZF ATTCACCACCGA 1 false
+ACCAAACGTTCTCACC CRYM GCCCATCCCCAG 1 false
+ACCAAACGTTCTCACC CSMD3 TTTATCGACTAG 1 false
+ACCAAACGTTCTCACC CTXN1 CCACGCAACTGT 1 false
+ACCAAACGTTCTCACC CXorf56 ATTTAGTAAATG 1 false
+ACCAAACGTTCTCACC DAAM1 CATATCTACATG 1 false
+ACCAAACGTTCTCACC DCLK1 GTCACCTTAGCC 1 false
+ACCAAACGTTCTCACC DDHD1 AATACGACATTA 1 false
+ACCAAACGTTCTCACC DDX5 ACTCGAAAGTCC 1 false
+ACCAAACGTTCTCACC DNAJB14 CGGAATTGATAC 1 false
+ACCAAACGTTCTCACC DOCK4 TTCATTTCAGGA 1 false
+ACCAAACGTTCTCACC DOCK7 CGGCTCCGGTCA 1 false
+ACCAAACGTTCTCACC DOK6 TGCCCTCTCACT 1 false
+ACCAAACGTTCTCACC EPM2AIP1 TAGACCGTGTTG 1 false
+ACCAAACGTTCTCACC ETFRF1 ACTACCCCGGGA 1 false
+ACCAAACGTTCTCACC FANCA TCACCTGCCTCC 1 false
+ACCAAACGTTCTCACC FKBP2 GTATCAAATGTA 1 false
+ACCAAACGTTCTCACC GGA3 TCTCACGGGTTT 1 false
+ACCAAACGTTCTCACC GNG2 GATTGCACCCAT 1 false
+ACCAAACGTTCTCACC GPM6A AAACCCCACGGC 1 false
+ACCAAACGTTCTCACC GRIN2B GGCTTTAATTCA 1 false
+ACCAAACGTTCTCACC GRIN2B GTTAAACGCATC 1 false
+ACCAAACGTTCTCACC HAGH TTTTCAACGCAC 1 false
+ACCAAACGTTCTCACC KCNIP4-IT1 ACGCATATTCCC 1 false
+ACCAAACGTTCTCACC KIAA0586 CATCCCCACCCC 1 false
+ACCAAACGTTCTCACC KIDINS220 TGTTGATCATCT 1 true
+ACCAAACGTTCTCACC LARS ACGCACCATCAC 1 false
+ACCAAACGTTCTCACC LIMCH1 TAGACCTATTGC 1 false
+ACCAAACGTTCTCACC LINC-PINT CCCCCCTATCGT 1 false
+ACCAAACGTTCTCACC LINC01352 TTAGACTGACCA 1 false
+ACCAAACGTTCTCACC LUC7L3 CGTATCATAACA 1 false
+ACCAAACGTTCTCACC MALAT1 ACCACAAAATAA 1 false
+ACCAAACGTTCTCACC MALAT1 ACACATATTTAC 1 false
+ACCAAACGTTCTCACC MALAT1 GATCTATAGTTC 1 false
+ACCAAACGTTCTCACC MALAT1 AATCTTTTTCAA 1 true
+ACCAAACGTTCTCACC MALAT1 TTCGGGTCTAAC 1 false
+ACCAAACGTTCTCACC MALAT1 AAATATCTGGGC 1 false
+ACCAAACGTTCTCACC MALAT1 ATTTTAGGATTT 1 true
+ACCAAACGTTCTCACC MALAT1 GGATAATGAATT 1 false
+ACCAAACGTTCTCACC MALAT1 TGGCCGACTACA 1 false
+ACCAAACGTTCTCACC MALAT1 TTTACATGCACA 1 false
+ACCAAACGTTCTCACC MALAT1 CTTATTAGAATA 1 false
+ACCAAACGTTCTCACC MALAT1 AAATAGTAGATA 1 false
+ACCAAACGTTCTCACC MALAT1 GATGGAGTATGA 1 false
+ACCAAACGTTCTCACC MALAT1 AAATTAATGAAG 1 false
+ACCAAACGTTCTCACC MALAT1 ATGATTTCCTAT 1 true
+ACCAAACGTTCTCACC MALAT1 AAGCTTTTTTCA 1 true
+ACCAAACGTTCTCACC MALAT1 GTAATCCAGCAC 1 false
+ACCAAACGTTCTCACC MALAT1 CACTGCTCAACT 1 false
+ACCAAACGTTCTCACC MALAT1 TTCCTTAGGGCA 1 false
+ACCAAACGTTCTCACC MALAT1 GGGATTTTACTA 1 false
+ACCAAACGTTCTCACC MAP1A CGTTCTAGGAAA 1 false
+ACCAAACGTTCTCACC MAP1B CATGGCAATACG 1 false
+ACCAAACGTTCTCACC MAP4 GCCTGTTAGCAT 1 false
+ACCAAACGTTCTCACC MAP6 ACACACATCTCT 1 false
+ACCAAACGTTCTCACC MDM4 TGTACCACTGAT 1 false
+ACCAAACGTTCTCACC MEG3 GGAATGGAAGTA 1 false
+ACCAAACGTTCTCACC MEG3 TTCTTCGCATTG 1 true
+ACCAAACGTTCTCACC MIS18BP1 ACTATCAGCTGT 1 false
+ACCAAACGTTCTCACC MOG TATAACATTTGC 1 false
+ACCAAACGTTCTCACC MPHOSPH8 GGTTCCATAGCC 1 false
+ACCAAACGTTCTCACC MT-ATP6 AAAAGGCACGGC 1 false
+ACCAAACGTTCTCACC MT-ATP6 TTATTACATAAC 1 false
+ACCAAACGTTCTCACC MT-ATP6 TCTCTCCTCTAT 1 true
+ACCAAACGTTCTCACC MT-ND5 GTTCATAAGCTG 1 false
+ACCAAACGTTCTCACC MTX2 TAGCGATTTAAT 1 false
+ACCAAACGTTCTCACC MYO5A CCGTCCCGCAGG 1 false
+ACCAAACGTTCTCACC MYO5C GACAATAGCCTC 1 false
+ACCAAACGTTCTCACC NAB1 CTGGTTTTGTTG 1 true
+ACCAAACGTTCTCACC NBAS TTCCCGCAAGCT 1 false
+ACCAAACGTTCTCACC NDFIP1 CTGCCATTATGG 1 false
+ACCAAACGTTCTCACC NPTN ATGGATTATAGG 1 false
+ACCAAACGTTCTCACC NR3C1 GTGTTGTTAATG 1 true
+ACCAAACGTTCTCACC ORAOV1 GAGTTCCTCGAA 1 false
+ACCAAACGTTCTCACC PFN2 AACTACTACCTA 1 false
+ACCAAACGTTCTCACC PGAP1 CAAGTATCCGGC 1 false
+ACCAAACGTTCTCACC PRKAR1B CTAAGATTGTTT 1 true
+ACCAAACGTTCTCACC PRKDC AGCACCTCAGCT 1 false
+ACCAAACGTTCTCACC PSIP1 TTTATGAAACGG 1 false
+ACCAAACGTTCTCACC PSMA1 TCCCGTTACACA 1 false
+ACCAAACGTTCTCACC PTPN18 AAATTCTAATAC 1 false
+ACCAAACGTTCTCACC RAP1GAP2 ATGCTCGTCCAG 1 false
+ACCAAACGTTCTCACC RIMS1 TCTTGGTACAAA 1 false
+ACCAAACGTTCTCACC RN7SK TCATTGCTAATT 1 true
+ACCAAACGTTCTCACC RPL10A ACGAGTAACATC 1 false
+ACCAAACGTTCTCACC RPL30 CTGGAAGCGTAA 1 false
+ACCAAACGTTCTCACC RPL34 TGCACCACAAGG 1 false
+ACCAAACGTTCTCACC RPLP1 AGAGCTTATCTA 1 false
+ACCAAACGTTCTCACC RYR2 CAGCAGTCCGTA 1 false
+ACCAAACGTTCTCACC SAMD8 TCCGCCATATTA 1 false
+ACCAAACGTTCTCACC SDF4 TTGAGTGTAAGT 1 false
+ACCAAACGTTCTCACC SH3PXD2A GATTTGGGGCGT 1 false
+ACCAAACGTTCTCACC SLC17A7 AATTGATTAAGT 1 false
+ACCAAACGTTCTCACC SLC26A10 TAGCGTCTTTAG 1 false
+ACCAAACGTTCTCACC SLC8A1 CCGCATTCCAGG 1 false
+ACCAAACGTTCTCACC SLC8A1-AS1 CCTTACAGGCAA 1 false
+ACCAAACGTTCTCACC SLF1 TAATCGTTCGGC 1 false
+ACCAAACGTTCTCACC SMC1A CTCATGATTGTT 1 true
+ACCAAACGTTCTCACC SMURF1 GGCTCCATTGAC 1 false
+ACCAAACGTTCTCACC SNRNP70 CGAACTTGCTGC 1 false
+ACCAAACGTTCTCACC SPATA18 CATCAACATCGG 1 false
+ACCAAACGTTCTCACC SPTBN1 TCTTATGTCTCG 1 true
+ACCAAACGTTCTCACC SRSF11 TCCAGGCTGCAT 2 false
+ACCAAACGTTCTCACC SSTR2 AATTGTATCTTA 1 true
+ACCAAACGTTCTCACC STRN TGATAGTCCCCG 1 false
+ACCAAACGTTCTCACC SV2B TACAAAAATGTT 1 false
+ACCAAACGTTCTCACC SYNE1 GATTTCTAAGTC 1 false
+ACCAAACGTTCTCACC TMED4 ACTTCGTGTTCA 1 false
+ACCAAACGTTCTCACC TMEM117 GGTATGCGTGTA 1 false
+ACCAAACGTTCTCACC TMEM246 GACATAGCTAAA 1 false
+ACCAAACGTTCTCACC TMEM87A TGCCAGCTGACT 1 false
+ACCAAACGTTCTCACC TMOD2 AATTAAGAGTTA 1 false
+ACCAAACGTTCTCACC TOMM7 GAATACTCTGGT 1 false
+ACCAAACGTTCTCACC TRIM27 CCAAGTCCGGAT 1 false
+ACCAAACGTTCTCACC TRIM44 GATCTGCCCGAC 1 false
+ACCAAACGTTCTCACC TRIM59 TATATATGCGAT 1 false
+ACCAAACGTTCTCACC TTC28 ATTCTAGACCCT 1 false
+ACCAAACGTTCTCACC TTC9B GCCTCCCTTAGT 1 false
+ACCAAACGTTCTCACC TVP23A TTCTCTTTTTCC 1 true
+ACCAAACGTTCTCACC UNC119 GCCGGAGTTTGC 1 false
+ACCAAACGTTCTCACC UQCRH TCACTCATATGT 1 false
+ACCAAACGTTCTCACC USMG5 ATTTAGTAAATG 1 false
+ACCAAACGTTCTCACC USP25 AGGACCCGATCA 1 false
+ACCAAACGTTCTCACC USP25 TGCAGTCCCCGG 1 false
+ACCAAACGTTCTCACC UTP6 CCCAGGCAGCTA 1 false
+ACCAAACGTTCTCACC VGLL4 CCGACGGACACA 1 false
+ACCAAACGTTCTCACC YBEY TTGCTGGATTTA 1 true
+ACCAAACGTTCTCACC YEATS2 CTCAACATCATG 1 false
+ACCAAACGTTCTCACC YWHAH GATATTGGGTCA 1 false
+ACCAAACGTTCTCACC ZC2HC1A AGATGGAAATCC 1 false
+ACCAAACGTTCTCACC ZFP28 CCTTTTACTGTA 1 true
+ACCAAACGTTCTCACC ZFYVE16 ATGCAACTTCGG 2 false
+ACCAAACGTTCTCACC ZNF516 GTGATTGTGCGC 1 false
+ACCAAACGTTCTCACC ZNF528 CATCCGGACTTA 1 false
+ACCAAACGTTCTCACC ZRANB3 CTGACTGGATTC 1 false
=====================================
testdata/org/broadinstitute/dropseq/barnyard/MarkChimericReads.6T.sam
=====================================
The diff for this file was not included because it is too large.
=====================================
testdata/org/broadinstitute/dropseq/barnyard/MarkChimericReads.both.sam
=====================================
The diff for this file was not included because it is too large.
=====================================
testdata/org/broadinstitute/dropseq/barnyard/MarkChimericReads.input.sam
=====================================
The diff for this file was not included because it is too large.
=====================================
testdata/org/broadinstitute/dropseq/barnyard/MarkChimericReads.input2.sam
=====================================
The diff for this file was not included because it is too large.
=====================================
testdata/org/broadinstitute/dropseq/barnyard/MarkChimericReads.umiReuse.sam
=====================================
The diff for this file was not included because it is too large.
=====================================
testdata/org/broadinstitute/dropseq/barnyard/MarkChimericReads.umiReuse10x.sam
=====================================
The diff for this file was not included because it is too large.
=====================================
testdata/org/broadinstitute/dropseq/barnyard/both.chimeric_read_metrics
=====================================
@@ -0,0 +1,6 @@
+
+## METRICS CLASS org.broadinstitute.dropseqrna.priv.barnyard.MarkChimericReads$MarkChimericReadMetrics
+NUM_UMIS NUM_MARKED_UMIS NUM_T_RICH_UMIS NUM_REUSED_UMIS
+164 22 20 2
+
+
=====================================
testdata/org/broadinstitute/dropseq/barnyard/both.chimeric_report.txt
=====================================
@@ -0,0 +1,165 @@
+CELL_BARCODE GENE MOLECULAR_BARCODE NUM_OBS CHIMERIC
+ACCAAACGTTCTCACC AC025594.3 TACATTACAGCT 1 false
+ACCAAACGTTCTCACC ACOT13 AGAGTAATATGA 1 false
+ACCAAACGTTCTCACC AFF4 TAAACTGTTTAA 1 false
+ACCAAACGTTCTCACC AJAP1 CCGGATTATTGC 1 false
+ACCAAACGTTCTCACC AL645728.1 CTGCGATTACGG 1 false
+ACCAAACGTTCTCACC ALDOA CTTCTGCAATGG 1 false
+ACCAAACGTTCTCACC ALG1L9P CCAGCGCTAATT 1 false
+ACCAAACGTTCTCACC ANKRD10 AATTTTATGAAT 1 true
+ACCAAACGTTCTCACC ANKRD10-IT1 ATCCTCGTTCTG 1 false
+ACCAAACGTTCTCACC AP3B2 ACTCGGATACCT 1 false
+ACCAAACGTTCTCACC APBA2 CATCCTGCGGGC 1 false
+ACCAAACGTTCTCACC ARGLU1 ACGAGTATTGCC 1 false
+ACCAAACGTTCTCACC ARHGEF3 TATTTGTCTGAG 1 true
+ACCAAACGTTCTCACC ARNT2 TGCTATCTTAAC 1 false
+ACCAAACGTTCTCACC ARPP21 CTGTGTTAATTG 1 true
+ACCAAACGTTCTCACC ATAD1 AGAAGACGGCCG 1 false
+ACCAAACGTTCTCACC ATP9B AGCAAGCACCGA 1 false
+ACCAAACGTTCTCACC BRD1 GCACTCATTTCG 1 false
+ACCAAACGTTCTCACC C19orf66 AAAATTTGGCAA 1 false
+ACCAAACGTTCTCACC CACNA1A TCACCTCCATAC 1 false
+ACCAAACGTTCTCACC CAMK1D ATGATGGTAACA 1 false
+ACCAAACGTTCTCACC CAPZB GCATGCTTAGAT 1 false
+ACCAAACGTTCTCACC CBL TAACCCTATGAT 1 false
+ACCAAACGTTCTCACC CCDC3 GACATGGCTCAC 1 false
+ACCAAACGTTCTCACC CCDC84 TCTCTAGGGTCC 1 false
+ACCAAACGTTCTCACC CDK16 CAGGGACCATAC 1 false
+ACCAAACGTTCTCACC CELF2 GCAAGGCATTGT 1 false
+ACCAAACGTTCTCACC CFDP1 GAGTAGTACGAC 1 false
+ACCAAACGTTCTCACC CIAO1 ATCTTAAAGTAT 1 false
+ACCAAACGTTCTCACC CREBZF ATTCACCACCGA 1 false
+ACCAAACGTTCTCACC CRYM GCCCATCCCCAG 1 false
+ACCAAACGTTCTCACC CSMD3 TTTATCGACTAG 1 false
+ACCAAACGTTCTCACC CTXN1 CCACGCAACTGT 1 false
+ACCAAACGTTCTCACC CXorf56 ATTTAGTAAATG 1 true
+ACCAAACGTTCTCACC DAAM1 CATATCTACATG 1 false
+ACCAAACGTTCTCACC DCLK1 GTCACCTTAGCC 1 false
+ACCAAACGTTCTCACC DDHD1 AATACGACATTA 1 false
+ACCAAACGTTCTCACC DDX5 ACTCGAAAGTCC 1 false
+ACCAAACGTTCTCACC DNAJB14 CGGAATTGATAC 1 false
+ACCAAACGTTCTCACC DOCK4 TTCATTTCAGGA 1 false
+ACCAAACGTTCTCACC DOCK7 CGGCTCCGGTCA 1 false
+ACCAAACGTTCTCACC DOK6 TGCCCTCTCACT 1 false
+ACCAAACGTTCTCACC EPM2AIP1 TAGACCGTGTTG 1 false
+ACCAAACGTTCTCACC ETFRF1 ACTACCCCGGGA 1 false
+ACCAAACGTTCTCACC FANCA TCACCTGCCTCC 1 false
+ACCAAACGTTCTCACC FKBP2 GTATCAAATGTA 1 false
+ACCAAACGTTCTCACC GGA3 TCTCACGGGTTT 1 false
+ACCAAACGTTCTCACC GNG2 GATTGCACCCAT 1 false
+ACCAAACGTTCTCACC GPM6A AAACCCCACGGC 1 false
+ACCAAACGTTCTCACC GRIN2B GGCTTTAATTCA 1 false
+ACCAAACGTTCTCACC GRIN2B GTTAAACGCATC 1 false
+ACCAAACGTTCTCACC HAGH TTTTCAACGCAC 1 false
+ACCAAACGTTCTCACC KCNIP4-IT1 ACGCATATTCCC 1 false
+ACCAAACGTTCTCACC KIAA0586 CATCCCCACCCC 1 false
+ACCAAACGTTCTCACC KIDINS220 TGTTGATCATCT 1 true
+ACCAAACGTTCTCACC LARS ACGCACCATCAC 1 false
+ACCAAACGTTCTCACC LIMCH1 TAGACCTATTGC 1 false
+ACCAAACGTTCTCACC LINC-PINT CCCCCCTATCGT 1 false
+ACCAAACGTTCTCACC LINC01352 TTAGACTGACCA 1 false
+ACCAAACGTTCTCACC LUC7L3 CGTATCATAACA 1 false
+ACCAAACGTTCTCACC MALAT1 ACCACAAAATAA 1 false
+ACCAAACGTTCTCACC MALAT1 ACACATATTTAC 1 false
+ACCAAACGTTCTCACC MALAT1 GATCTATAGTTC 1 false
+ACCAAACGTTCTCACC MALAT1 AATCTTTTTCAA 1 true
+ACCAAACGTTCTCACC MALAT1 TTCGGGTCTAAC 1 false
+ACCAAACGTTCTCACC MALAT1 AAATATCTGGGC 1 false
+ACCAAACGTTCTCACC MALAT1 ATTTTAGGATTT 1 true
+ACCAAACGTTCTCACC MALAT1 GGATAATGAATT 1 false
+ACCAAACGTTCTCACC MALAT1 TGGCCGACTACA 1 false
+ACCAAACGTTCTCACC MALAT1 TTTACATGCACA 1 false
+ACCAAACGTTCTCACC MALAT1 CTTATTAGAATA 1 false
+ACCAAACGTTCTCACC MALAT1 AAATAGTAGATA 1 false
+ACCAAACGTTCTCACC MALAT1 GATGGAGTATGA 1 false
+ACCAAACGTTCTCACC MALAT1 AAATTAATGAAG 1 false
+ACCAAACGTTCTCACC MALAT1 ATGATTTCCTAT 1 true
+ACCAAACGTTCTCACC MALAT1 AAGCTTTTTTCA 1 true
+ACCAAACGTTCTCACC MALAT1 GTAATCCAGCAC 1 false
+ACCAAACGTTCTCACC MALAT1 CACTGCTCAACT 1 false
+ACCAAACGTTCTCACC MALAT1 TTCCTTAGGGCA 1 false
+ACCAAACGTTCTCACC MALAT1 GGGATTTTACTA 1 false
+ACCAAACGTTCTCACC MAP1A CGTTCTAGGAAA 1 false
+ACCAAACGTTCTCACC MAP1B CATGGCAATACG 1 false
+ACCAAACGTTCTCACC MAP4 GCCTGTTAGCAT 1 false
+ACCAAACGTTCTCACC MAP6 ACACACATCTCT 1 false
+ACCAAACGTTCTCACC MDM4 TGTACCACTGAT 1 false
+ACCAAACGTTCTCACC MEG3 GGAATGGAAGTA 1 false
+ACCAAACGTTCTCACC MEG3 TTCTTCGCATTG 1 true
+ACCAAACGTTCTCACC MIS18BP1 ACTATCAGCTGT 1 false
+ACCAAACGTTCTCACC MOG TATAACATTTGC 1 false
+ACCAAACGTTCTCACC MPHOSPH8 GGTTCCATAGCC 1 false
+ACCAAACGTTCTCACC MT-ATP6 AAAAGGCACGGC 1 false
+ACCAAACGTTCTCACC MT-ATP6 TTATTACATAAC 1 false
+ACCAAACGTTCTCACC MT-ATP6 TCTCTCCTCTAT 1 true
+ACCAAACGTTCTCACC MT-ND5 GTTCATAAGCTG 1 false
+ACCAAACGTTCTCACC MTX2 TAGCGATTTAAT 1 false
+ACCAAACGTTCTCACC MYO5A CCGTCCCGCAGG 1 false
+ACCAAACGTTCTCACC MYO5C GACAATAGCCTC 1 false
+ACCAAACGTTCTCACC NAB1 CTGGTTTTGTTG 1 true
+ACCAAACGTTCTCACC NBAS TTCCCGCAAGCT 1 false
+ACCAAACGTTCTCACC NDFIP1 CTGCCATTATGG 1 false
+ACCAAACGTTCTCACC NPTN ATGGATTATAGG 1 false
+ACCAAACGTTCTCACC NR3C1 GTGTTGTTAATG 1 true
+ACCAAACGTTCTCACC ORAOV1 GAGTTCCTCGAA 1 false
+ACCAAACGTTCTCACC PFN2 AACTACTACCTA 1 false
+ACCAAACGTTCTCACC PGAP1 CAAGTATCCGGC 1 false
+ACCAAACGTTCTCACC PRKAR1B CTAAGATTGTTT 1 true
+ACCAAACGTTCTCACC PRKDC AGCACCTCAGCT 1 false
+ACCAAACGTTCTCACC PSIP1 TTTATGAAACGG 1 false
+ACCAAACGTTCTCACC PSMA1 TCCCGTTACACA 1 false
+ACCAAACGTTCTCACC PTPN18 AAATTCTAATAC 1 false
+ACCAAACGTTCTCACC RAP1GAP2 ATGCTCGTCCAG 1 false
+ACCAAACGTTCTCACC RIMS1 TCTTGGTACAAA 1 false
+ACCAAACGTTCTCACC RN7SK TCATTGCTAATT 1 true
+ACCAAACGTTCTCACC RPL10A ACGAGTAACATC 1 false
+ACCAAACGTTCTCACC RPL30 CTGGAAGCGTAA 1 false
+ACCAAACGTTCTCACC RPL34 TGCACCACAAGG 1 false
+ACCAAACGTTCTCACC RPLP1 AGAGCTTATCTA 1 false
+ACCAAACGTTCTCACC RYR2 CAGCAGTCCGTA 1 false
+ACCAAACGTTCTCACC SAMD8 TCCGCCATATTA 1 false
+ACCAAACGTTCTCACC SDF4 TTGAGTGTAAGT 1 false
+ACCAAACGTTCTCACC SH3PXD2A GATTTGGGGCGT 1 false
+ACCAAACGTTCTCACC SLC17A7 AATTGATTAAGT 1 false
+ACCAAACGTTCTCACC SLC26A10 TAGCGTCTTTAG 1 false
+ACCAAACGTTCTCACC SLC8A1 CCGCATTCCAGG 1 false
+ACCAAACGTTCTCACC SLC8A1-AS1 CCTTACAGGCAA 1 false
+ACCAAACGTTCTCACC SLF1 TAATCGTTCGGC 1 false
+ACCAAACGTTCTCACC SMC1A CTCATGATTGTT 1 true
+ACCAAACGTTCTCACC SMURF1 GGCTCCATTGAC 1 false
+ACCAAACGTTCTCACC SNRNP70 CGAACTTGCTGC 1 false
+ACCAAACGTTCTCACC SPATA18 CATCAACATCGG 1 false
+ACCAAACGTTCTCACC SPTBN1 TCTTATGTCTCG 1 true
+ACCAAACGTTCTCACC SRSF11 TCCAGGCTGCAT 2 false
+ACCAAACGTTCTCACC SSTR2 AATTGTATCTTA 1 true
+ACCAAACGTTCTCACC STRN TGATAGTCCCCG 1 false
+ACCAAACGTTCTCACC SV2B TACAAAAATGTT 1 false
+ACCAAACGTTCTCACC SYNE1 GATTTCTAAGTC 1 false
+ACCAAACGTTCTCACC TMED4 ACTTCGTGTTCA 1 false
+ACCAAACGTTCTCACC TMEM117 GGTATGCGTGTA 1 false
+ACCAAACGTTCTCACC TMEM246 GACATAGCTAAA 1 false
+ACCAAACGTTCTCACC TMEM87A TGCCAGCTGACT 1 false
+ACCAAACGTTCTCACC TMOD2 AATTAAGAGTTA 1 false
+ACCAAACGTTCTCACC TOMM7 GAATACTCTGGT 1 false
+ACCAAACGTTCTCACC TRIM27 CCAAGTCCGGAT 1 false
+ACCAAACGTTCTCACC TRIM44 GATCTGCCCGAC 1 false
+ACCAAACGTTCTCACC TRIM59 TATATATGCGAT 1 false
+ACCAAACGTTCTCACC TTC28 ATTCTAGACCCT 1 false
+ACCAAACGTTCTCACC TTC9B GCCTCCCTTAGT 1 false
+ACCAAACGTTCTCACC TVP23A TTCTCTTTTTCC 1 true
+ACCAAACGTTCTCACC UNC119 GCCGGAGTTTGC 1 false
+ACCAAACGTTCTCACC UQCRH TCACTCATATGT 1 false
+ACCAAACGTTCTCACC USMG5 ATTTAGTAAATG 1 true
+ACCAAACGTTCTCACC USP25 AGGACCCGATCA 1 false
+ACCAAACGTTCTCACC USP25 TGCAGTCCCCGG 1 false
+ACCAAACGTTCTCACC UTP6 CCCAGGCAGCTA 1 false
+ACCAAACGTTCTCACC VGLL4 CCGACGGACACA 1 false
+ACCAAACGTTCTCACC YBEY TTGCTGGATTTA 1 true
+ACCAAACGTTCTCACC YEATS2 CTCAACATCATG 1 false
+ACCAAACGTTCTCACC YWHAH GATATTGGGTCA 1 false
+ACCAAACGTTCTCACC ZC2HC1A AGATGGAAATCC 1 false
+ACCAAACGTTCTCACC ZFP28 CCTTTTACTGTA 1 true
+ACCAAACGTTCTCACC ZFYVE16 ATGCAACTTCGG 2 false
+ACCAAACGTTCTCACC ZNF516 GTGATTGTGCGC 1 false
+ACCAAACGTTCTCACC ZNF528 CATCCGGACTTA 1 false
+ACCAAACGTTCTCACC ZRANB3 CTGACTGGATTC 1 false
=====================================
testdata/org/broadinstitute/dropseq/barnyard/ed_collapse.chimeric_report.txt
=====================================
@@ -0,0 +1,3 @@
+CELL_BARCODE GENE MOLECULAR_BARCODE NUM_OBS CHIMERIC
+AAAAGGTTATGT AC018832.1 CCTAGGGG 18 false
+AAAAGGTTATGT AC018832.1 CCTAGGGT 1 true
=====================================
testdata/org/broadinstitute/dropseq/barnyard/selectedCellBarcodes.txt
=====================================
@@ -0,0 +1 @@
+ACCAAACGTTCTCACC
=====================================
testdata/org/broadinstitute/dropseq/barnyard/two_cells.chimeric_report.txt
=====================================
@@ -0,0 +1,3 @@
+CELL_BARCODE GENE MOLECULAR_BARCODE NUM_OBS CHIMERIC
+ACCAAACGTTCTCACC AC025594.3 TACATTACAGCT 1 false
+ACGTAACGTTCTCACC ACOT13 AGAGTAATATGA 1 false
=====================================
testdata/org/broadinstitute/dropseq/barnyard/umiReuse.chimeric_read_metrics
=====================================
@@ -0,0 +1,6 @@
+
+## METRICS CLASS org.broadinstitute.dropseqrna.priv.barnyard.MarkChimericReads$MarkChimericReadMetrics
+NUM_UMIS NUM_MARKED_UMIS NUM_T_RICH_UMIS NUM_REUSED_UMIS
+164 2 0 2
+
+
=====================================
testdata/org/broadinstitute/dropseq/barnyard/umiReuse.chimeric_report.txt
=====================================
@@ -0,0 +1,165 @@
+CELL_BARCODE GENE MOLECULAR_BARCODE NUM_OBS CHIMERIC
+ACCAAACGTTCTCACC AC025594.3 TACATTACAGCT 1 false
+ACCAAACGTTCTCACC ACOT13 AGAGTAATATGA 1 false
+ACCAAACGTTCTCACC AFF4 TAAACTGTTTAA 1 false
+ACCAAACGTTCTCACC AJAP1 CCGGATTATTGC 1 false
+ACCAAACGTTCTCACC AL645728.1 CTGCGATTACGG 1 false
+ACCAAACGTTCTCACC ALDOA CTTCTGCAATGG 1 false
+ACCAAACGTTCTCACC ALG1L9P CCAGCGCTAATT 1 false
+ACCAAACGTTCTCACC ANKRD10 AATTTTATGAAT 1 false
+ACCAAACGTTCTCACC ANKRD10-IT1 ATCCTCGTTCTG 1 false
+ACCAAACGTTCTCACC AP3B2 ACTCGGATACCT 1 false
+ACCAAACGTTCTCACC APBA2 CATCCTGCGGGC 1 false
+ACCAAACGTTCTCACC ARGLU1 ACGAGTATTGCC 1 false
+ACCAAACGTTCTCACC ARHGEF3 TATTTGTCTGAG 1 false
+ACCAAACGTTCTCACC ARNT2 TGCTATCTTAAC 1 false
+ACCAAACGTTCTCACC ARPP21 CTGTGTTAATTG 1 false
+ACCAAACGTTCTCACC ATAD1 AGAAGACGGCCG 1 false
+ACCAAACGTTCTCACC ATP9B AGCAAGCACCGA 1 false
+ACCAAACGTTCTCACC BRD1 GCACTCATTTCG 1 false
+ACCAAACGTTCTCACC C19orf66 AAAATTTGGCAA 1 false
+ACCAAACGTTCTCACC CACNA1A TCACCTCCATAC 1 false
+ACCAAACGTTCTCACC CAMK1D ATGATGGTAACA 1 false
+ACCAAACGTTCTCACC CAPZB GCATGCTTAGAT 1 false
+ACCAAACGTTCTCACC CBL TAACCCTATGAT 1 false
+ACCAAACGTTCTCACC CCDC3 GACATGGCTCAC 1 false
+ACCAAACGTTCTCACC CCDC84 TCTCTAGGGTCC 1 false
+ACCAAACGTTCTCACC CDK16 CAGGGACCATAC 1 false
+ACCAAACGTTCTCACC CELF2 GCAAGGCATTGT 1 false
+ACCAAACGTTCTCACC CFDP1 GAGTAGTACGAC 1 false
+ACCAAACGTTCTCACC CIAO1 ATCTTAAAGTAT 1 false
+ACCAAACGTTCTCACC CREBZF ATTCACCACCGA 1 false
+ACCAAACGTTCTCACC CRYM GCCCATCCCCAG 1 false
+ACCAAACGTTCTCACC CSMD3 TTTATCGACTAG 1 false
+ACCAAACGTTCTCACC CTXN1 CCACGCAACTGT 1 false
+ACCAAACGTTCTCACC CXorf56 ATTTAGTAAATG 1 true
+ACCAAACGTTCTCACC DAAM1 CATATCTACATG 1 false
+ACCAAACGTTCTCACC DCLK1 GTCACCTTAGCC 1 false
+ACCAAACGTTCTCACC DDHD1 AATACGACATTA 1 false
+ACCAAACGTTCTCACC DDX5 ACTCGAAAGTCC 1 false
+ACCAAACGTTCTCACC DNAJB14 CGGAATTGATAC 1 false
+ACCAAACGTTCTCACC DOCK4 TTCATTTCAGGA 1 false
+ACCAAACGTTCTCACC DOCK7 CGGCTCCGGTCA 1 false
+ACCAAACGTTCTCACC DOK6 TGCCCTCTCACT 1 false
+ACCAAACGTTCTCACC EPM2AIP1 TAGACCGTGTTG 1 false
+ACCAAACGTTCTCACC ETFRF1 ACTACCCCGGGA 1 false
+ACCAAACGTTCTCACC FANCA TCACCTGCCTCC 1 false
+ACCAAACGTTCTCACC FKBP2 GTATCAAATGTA 1 false
+ACCAAACGTTCTCACC GGA3 TCTCACGGGTTT 1 false
+ACCAAACGTTCTCACC GNG2 GATTGCACCCAT 1 false
+ACCAAACGTTCTCACC GPM6A AAACCCCACGGC 1 false
+ACCAAACGTTCTCACC GRIN2B GGCTTTAATTCA 1 false
+ACCAAACGTTCTCACC GRIN2B GTTAAACGCATC 1 false
+ACCAAACGTTCTCACC HAGH TTTTCAACGCAC 1 false
+ACCAAACGTTCTCACC KCNIP4-IT1 ACGCATATTCCC 1 false
+ACCAAACGTTCTCACC KIAA0586 CATCCCCACCCC 1 false
+ACCAAACGTTCTCACC KIDINS220 TGTTGATCATCT 1 false
+ACCAAACGTTCTCACC LARS ACGCACCATCAC 1 false
+ACCAAACGTTCTCACC LIMCH1 TAGACCTATTGC 1 false
+ACCAAACGTTCTCACC LINC-PINT CCCCCCTATCGT 1 false
+ACCAAACGTTCTCACC LINC01352 TTAGACTGACCA 1 false
+ACCAAACGTTCTCACC LUC7L3 CGTATCATAACA 1 false
+ACCAAACGTTCTCACC MALAT1 ACCACAAAATAA 1 false
+ACCAAACGTTCTCACC MALAT1 ACACATATTTAC 1 false
+ACCAAACGTTCTCACC MALAT1 GATCTATAGTTC 1 false
+ACCAAACGTTCTCACC MALAT1 AATCTTTTTCAA 1 false
+ACCAAACGTTCTCACC MALAT1 TTCGGGTCTAAC 1 false
+ACCAAACGTTCTCACC MALAT1 AAATATCTGGGC 1 false
+ACCAAACGTTCTCACC MALAT1 ATTTTAGGATTT 1 false
+ACCAAACGTTCTCACC MALAT1 GGATAATGAATT 1 false
+ACCAAACGTTCTCACC MALAT1 TGGCCGACTACA 1 false
+ACCAAACGTTCTCACC MALAT1 TTTACATGCACA 1 false
+ACCAAACGTTCTCACC MALAT1 CTTATTAGAATA 1 false
+ACCAAACGTTCTCACC MALAT1 AAATAGTAGATA 1 false
+ACCAAACGTTCTCACC MALAT1 GATGGAGTATGA 1 false
+ACCAAACGTTCTCACC MALAT1 AAATTAATGAAG 1 false
+ACCAAACGTTCTCACC MALAT1 ATGATTTCCTAT 1 false
+ACCAAACGTTCTCACC MALAT1 AAGCTTTTTTCA 1 false
+ACCAAACGTTCTCACC MALAT1 GTAATCCAGCAC 1 false
+ACCAAACGTTCTCACC MALAT1 CACTGCTCAACT 1 false
+ACCAAACGTTCTCACC MALAT1 TTCCTTAGGGCA 1 false
+ACCAAACGTTCTCACC MALAT1 GGGATTTTACTA 1 false
+ACCAAACGTTCTCACC MAP1A CGTTCTAGGAAA 1 false
+ACCAAACGTTCTCACC MAP1B CATGGCAATACG 1 false
+ACCAAACGTTCTCACC MAP4 GCCTGTTAGCAT 1 false
+ACCAAACGTTCTCACC MAP6 ACACACATCTCT 1 false
+ACCAAACGTTCTCACC MDM4 TGTACCACTGAT 1 false
+ACCAAACGTTCTCACC MEG3 GGAATGGAAGTA 1 false
+ACCAAACGTTCTCACC MEG3 TTCTTCGCATTG 1 false
+ACCAAACGTTCTCACC MIS18BP1 ACTATCAGCTGT 1 false
+ACCAAACGTTCTCACC MOG TATAACATTTGC 1 false
+ACCAAACGTTCTCACC MPHOSPH8 GGTTCCATAGCC 1 false
+ACCAAACGTTCTCACC MT-ATP6 AAAAGGCACGGC 1 false
+ACCAAACGTTCTCACC MT-ATP6 TTATTACATAAC 1 false
+ACCAAACGTTCTCACC MT-ATP6 TCTCTCCTCTAT 1 false
+ACCAAACGTTCTCACC MT-ND5 GTTCATAAGCTG 1 false
+ACCAAACGTTCTCACC MTX2 TAGCGATTTAAT 1 false
+ACCAAACGTTCTCACC MYO5A CCGTCCCGCAGG 1 false
+ACCAAACGTTCTCACC MYO5C GACAATAGCCTC 1 false
+ACCAAACGTTCTCACC NAB1 CTGGTTTTGTTG 1 false
+ACCAAACGTTCTCACC NBAS TTCCCGCAAGCT 1 false
+ACCAAACGTTCTCACC NDFIP1 CTGCCATTATGG 1 false
+ACCAAACGTTCTCACC NPTN ATGGATTATAGG 1 false
+ACCAAACGTTCTCACC NR3C1 GTGTTGTTAATG 1 false
+ACCAAACGTTCTCACC ORAOV1 GAGTTCCTCGAA 1 false
+ACCAAACGTTCTCACC PFN2 AACTACTACCTA 1 false
+ACCAAACGTTCTCACC PGAP1 CAAGTATCCGGC 1 false
+ACCAAACGTTCTCACC PRKAR1B CTAAGATTGTTT 1 false
+ACCAAACGTTCTCACC PRKDC AGCACCTCAGCT 1 false
+ACCAAACGTTCTCACC PSIP1 TTTATGAAACGG 1 false
+ACCAAACGTTCTCACC PSMA1 TCCCGTTACACA 1 false
+ACCAAACGTTCTCACC PTPN18 AAATTCTAATAC 1 false
+ACCAAACGTTCTCACC RAP1GAP2 ATGCTCGTCCAG 1 false
+ACCAAACGTTCTCACC RIMS1 TCTTGGTACAAA 1 false
+ACCAAACGTTCTCACC RN7SK TCATTGCTAATT 1 false
+ACCAAACGTTCTCACC RPL10A ACGAGTAACATC 1 false
+ACCAAACGTTCTCACC RPL30 CTGGAAGCGTAA 1 false
+ACCAAACGTTCTCACC RPL34 TGCACCACAAGG 1 false
+ACCAAACGTTCTCACC RPLP1 AGAGCTTATCTA 1 false
+ACCAAACGTTCTCACC RYR2 CAGCAGTCCGTA 1 false
+ACCAAACGTTCTCACC SAMD8 TCCGCCATATTA 1 false
+ACCAAACGTTCTCACC SDF4 TTGAGTGTAAGT 1 false
+ACCAAACGTTCTCACC SH3PXD2A GATTTGGGGCGT 1 false
+ACCAAACGTTCTCACC SLC17A7 AATTGATTAAGT 1 false
+ACCAAACGTTCTCACC SLC26A10 TAGCGTCTTTAG 1 false
+ACCAAACGTTCTCACC SLC8A1 CCGCATTCCAGG 1 false
+ACCAAACGTTCTCACC SLC8A1-AS1 CCTTACAGGCAA 1 false
+ACCAAACGTTCTCACC SLF1 TAATCGTTCGGC 1 false
+ACCAAACGTTCTCACC SMC1A CTCATGATTGTT 1 false
+ACCAAACGTTCTCACC SMURF1 GGCTCCATTGAC 1 false
+ACCAAACGTTCTCACC SNRNP70 CGAACTTGCTGC 1 false
+ACCAAACGTTCTCACC SPATA18 CATCAACATCGG 1 false
+ACCAAACGTTCTCACC SPTBN1 TCTTATGTCTCG 1 false
+ACCAAACGTTCTCACC SRSF11 TCCAGGCTGCAT 2 false
+ACCAAACGTTCTCACC SSTR2 AATTGTATCTTA 1 false
+ACCAAACGTTCTCACC STRN TGATAGTCCCCG 1 false
+ACCAAACGTTCTCACC SV2B TACAAAAATGTT 1 false
+ACCAAACGTTCTCACC SYNE1 GATTTCTAAGTC 1 false
+ACCAAACGTTCTCACC TMED4 ACTTCGTGTTCA 1 false
+ACCAAACGTTCTCACC TMEM117 GGTATGCGTGTA 1 false
+ACCAAACGTTCTCACC TMEM246 GACATAGCTAAA 1 false
+ACCAAACGTTCTCACC TMEM87A TGCCAGCTGACT 1 false
+ACCAAACGTTCTCACC TMOD2 AATTAAGAGTTA 1 false
+ACCAAACGTTCTCACC TOMM7 GAATACTCTGGT 1 false
+ACCAAACGTTCTCACC TRIM27 CCAAGTCCGGAT 1 false
+ACCAAACGTTCTCACC TRIM44 GATCTGCCCGAC 1 false
+ACCAAACGTTCTCACC TRIM59 TATATATGCGAT 1 false
+ACCAAACGTTCTCACC TTC28 ATTCTAGACCCT 1 false
+ACCAAACGTTCTCACC TTC9B GCCTCCCTTAGT 1 false
+ACCAAACGTTCTCACC TVP23A TTCTCTTTTTCC 1 false
+ACCAAACGTTCTCACC UNC119 GCCGGAGTTTGC 1 false
+ACCAAACGTTCTCACC UQCRH TCACTCATATGT 1 false
+ACCAAACGTTCTCACC USMG5 ATTTAGTAAATG 1 true
+ACCAAACGTTCTCACC USP25 AGGACCCGATCA 1 false
+ACCAAACGTTCTCACC USP25 TGCAGTCCCCGG 1 false
+ACCAAACGTTCTCACC UTP6 CCCAGGCAGCTA 1 false
+ACCAAACGTTCTCACC VGLL4 CCGACGGACACA 1 false
+ACCAAACGTTCTCACC YBEY TTGCTGGATTTA 1 false
+ACCAAACGTTCTCACC YEATS2 CTCAACATCATG 1 false
+ACCAAACGTTCTCACC YWHAH GATATTGGGTCA 1 false
+ACCAAACGTTCTCACC ZC2HC1A AGATGGAAATCC 1 false
+ACCAAACGTTCTCACC ZFP28 CCTTTTACTGTA 1 false
+ACCAAACGTTCTCACC ZFYVE16 ATGCAACTTCGG 2 false
+ACCAAACGTTCTCACC ZNF516 GTGATTGTGCGC 1 false
+ACCAAACGTTCTCACC ZNF528 CATCCGGACTTA 1 false
+ACCAAACGTTCTCACC ZRANB3 CTGACTGGATTC 1 false
=====================================
testdata/org/broadinstitute/dropseq/barnyard/umiReuse10x.chimeric_read_metrics
=====================================
@@ -0,0 +1,6 @@
+
+## METRICS CLASS org.broadinstitute.dropseqrna.priv.barnyard.MarkChimericReads$MarkChimericReadMetrics
+NUM_UMIS NUM_MARKED_UMIS NUM_T_RICH_UMIS NUM_REUSED_UMIS
+164 1 0 1
+
+
=====================================
testdata/org/broadinstitute/dropseq/barnyard/umiReuse10x.chimeric_report.txt
=====================================
@@ -0,0 +1,165 @@
+CELL_BARCODE GENE MOLECULAR_BARCODE NUM_OBS CHIMERIC
+ACCAAACGTTCTCACC AC025594.3 TACATTACAGCT 1 false
+ACCAAACGTTCTCACC ACOT13 AGAGTAATATGA 1 false
+ACCAAACGTTCTCACC AFF4 TAAACTGTTTAA 1 false
+ACCAAACGTTCTCACC AJAP1 CCGGATTATTGC 1 false
+ACCAAACGTTCTCACC AL645728.1 CTGCGATTACGG 1 false
+ACCAAACGTTCTCACC ALDOA CTTCTGCAATGG 1 false
+ACCAAACGTTCTCACC ALG1L9P CCAGCGCTAATT 1 false
+ACCAAACGTTCTCACC ANKRD10 AATTTTATGAAT 1 false
+ACCAAACGTTCTCACC ANKRD10-IT1 ATCCTCGTTCTG 1 false
+ACCAAACGTTCTCACC AP3B2 ACTCGGATACCT 1 false
+ACCAAACGTTCTCACC APBA2 CATCCTGCGGGC 1 false
+ACCAAACGTTCTCACC ARGLU1 ACGAGTATTGCC 1 false
+ACCAAACGTTCTCACC ARHGEF3 TATTTGTCTGAG 1 false
+ACCAAACGTTCTCACC ARNT2 TGCTATCTTAAC 1 false
+ACCAAACGTTCTCACC ARPP21 CTGTGTTAATTG 1 false
+ACCAAACGTTCTCACC ATAD1 AGAAGACGGCCG 1 false
+ACCAAACGTTCTCACC ATP9B AGCAAGCACCGA 1 false
+ACCAAACGTTCTCACC BRD1 GCACTCATTTCG 1 false
+ACCAAACGTTCTCACC C19orf66 AAAATTTGGCAA 1 false
+ACCAAACGTTCTCACC CACNA1A TCACCTCCATAC 1 false
+ACCAAACGTTCTCACC CAMK1D ATGATGGTAACA 1 false
+ACCAAACGTTCTCACC CAPZB GCATGCTTAGAT 1 false
+ACCAAACGTTCTCACC CBL TAACCCTATGAT 1 false
+ACCAAACGTTCTCACC CCDC3 GACATGGCTCAC 1 false
+ACCAAACGTTCTCACC CCDC84 TCTCTAGGGTCC 1 false
+ACCAAACGTTCTCACC CDK16 CAGGGACCATAC 1 false
+ACCAAACGTTCTCACC CELF2 GCAAGGCATTGT 1 false
+ACCAAACGTTCTCACC CFDP1 GAGTAGTACGAC 1 false
+ACCAAACGTTCTCACC CIAO1 ATCTTAAAGTAT 1 false
+ACCAAACGTTCTCACC CREBZF ATTCACCACCGA 1 false
+ACCAAACGTTCTCACC CRYM GCCCATCCCCAG 1 false
+ACCAAACGTTCTCACC CSMD3 TTTATCGACTAG 1 false
+ACCAAACGTTCTCACC CTXN1 CCACGCAACTGT 1 false
+ACCAAACGTTCTCACC CXorf56 ATTTAGTAAATG 1 true
+ACCAAACGTTCTCACC DAAM1 CATATCTACATG 1 false
+ACCAAACGTTCTCACC DCLK1 GTCACCTTAGCC 1 false
+ACCAAACGTTCTCACC DDHD1 AATACGACATTA 1 false
+ACCAAACGTTCTCACC DDX5 ACTCGAAAGTCC 1 false
+ACCAAACGTTCTCACC DNAJB14 CGGAATTGATAC 1 false
+ACCAAACGTTCTCACC DOCK4 TTCATTTCAGGA 1 false
+ACCAAACGTTCTCACC DOCK7 CGGCTCCGGTCA 1 false
+ACCAAACGTTCTCACC DOK6 TGCCCTCTCACT 1 false
+ACCAAACGTTCTCACC EPM2AIP1 TAGACCGTGTTG 1 false
+ACCAAACGTTCTCACC ETFRF1 ACTACCCCGGGA 1 false
+ACCAAACGTTCTCACC FANCA TCACCTGCCTCC 1 false
+ACCAAACGTTCTCACC FKBP2 GTATCAAATGTA 1 false
+ACCAAACGTTCTCACC GGA3 TCTCACGGGTTT 1 false
+ACCAAACGTTCTCACC GNG2 GATTGCACCCAT 1 false
+ACCAAACGTTCTCACC GPM6A AAACCCCACGGC 1 false
+ACCAAACGTTCTCACC GRIN2B GGCTTTAATTCA 1 false
+ACCAAACGTTCTCACC GRIN2B GTTAAACGCATC 1 false
+ACCAAACGTTCTCACC HAGH TTTTCAACGCAC 1 false
+ACCAAACGTTCTCACC KCNIP4-IT1 ACGCATATTCCC 1 false
+ACCAAACGTTCTCACC KIAA0586 CATCCCCACCCC 1 false
+ACCAAACGTTCTCACC KIDINS220 TGTTGATCATCT 1 false
+ACCAAACGTTCTCACC LARS ACGCACCATCAC 1 false
+ACCAAACGTTCTCACC LIMCH1 TAGACCTATTGC 1 false
+ACCAAACGTTCTCACC LINC-PINT CCCCCCTATCGT 1 false
+ACCAAACGTTCTCACC LINC01352 TTAGACTGACCA 1 false
+ACCAAACGTTCTCACC LUC7L3 CGTATCATAACA 1 false
+ACCAAACGTTCTCACC MALAT1 ACCACAAAATAA 1 false
+ACCAAACGTTCTCACC MALAT1 ACACATATTTAC 1 false
+ACCAAACGTTCTCACC MALAT1 GATCTATAGTTC 1 false
+ACCAAACGTTCTCACC MALAT1 AATCTTTTTCAA 1 false
+ACCAAACGTTCTCACC MALAT1 TTCGGGTCTAAC 1 false
+ACCAAACGTTCTCACC MALAT1 AAATATCTGGGC 1 false
+ACCAAACGTTCTCACC MALAT1 ATTTTAGGATTT 1 false
+ACCAAACGTTCTCACC MALAT1 GGATAATGAATT 1 false
+ACCAAACGTTCTCACC MALAT1 TGGCCGACTACA 1 false
+ACCAAACGTTCTCACC MALAT1 TTTACATGCACA 1 false
+ACCAAACGTTCTCACC MALAT1 CTTATTAGAATA 1 false
+ACCAAACGTTCTCACC MALAT1 AAATAGTAGATA 1 false
+ACCAAACGTTCTCACC MALAT1 GATGGAGTATGA 1 false
+ACCAAACGTTCTCACC MALAT1 AAATTAATGAAG 1 false
+ACCAAACGTTCTCACC MALAT1 ATGATTTCCTAT 1 false
+ACCAAACGTTCTCACC MALAT1 AAGCTTTTTTCA 1 false
+ACCAAACGTTCTCACC MALAT1 GTAATCCAGCAC 1 false
+ACCAAACGTTCTCACC MALAT1 CACTGCTCAACT 1 false
+ACCAAACGTTCTCACC MALAT1 TTCCTTAGGGCA 1 false
+ACCAAACGTTCTCACC MALAT1 GGGATTTTACTA 1 false
+ACCAAACGTTCTCACC MAP1A CGTTCTAGGAAA 1 false
+ACCAAACGTTCTCACC MAP1B CATGGCAATACG 1 false
+ACCAAACGTTCTCACC MAP4 GCCTGTTAGCAT 1 false
+ACCAAACGTTCTCACC MAP6 ACACACATCTCT 1 false
+ACCAAACGTTCTCACC MDM4 TGTACCACTGAT 1 false
+ACCAAACGTTCTCACC MEG3 GGAATGGAAGTA 1 false
+ACCAAACGTTCTCACC MEG3 TTCTTCGCATTG 1 false
+ACCAAACGTTCTCACC MIS18BP1 ACTATCAGCTGT 1 false
+ACCAAACGTTCTCACC MOG TATAACATTTGC 1 false
+ACCAAACGTTCTCACC MPHOSPH8 GGTTCCATAGCC 1 false
+ACCAAACGTTCTCACC MT-ATP6 AAAAGGCACGGC 1 false
+ACCAAACGTTCTCACC MT-ATP6 TTATTACATAAC 1 false
+ACCAAACGTTCTCACC MT-ATP6 TCTCTCCTCTAT 1 false
+ACCAAACGTTCTCACC MT-ND5 GTTCATAAGCTG 1 false
+ACCAAACGTTCTCACC MTX2 TAGCGATTTAAT 1 false
+ACCAAACGTTCTCACC MYO5A CCGTCCCGCAGG 1 false
+ACCAAACGTTCTCACC MYO5C GACAATAGCCTC 1 false
+ACCAAACGTTCTCACC NAB1 CTGGTTTTGTTG 1 false
+ACCAAACGTTCTCACC NBAS TTCCCGCAAGCT 1 false
+ACCAAACGTTCTCACC NDFIP1 CTGCCATTATGG 1 false
+ACCAAACGTTCTCACC NPTN ATGGATTATAGG 1 false
+ACCAAACGTTCTCACC NR3C1 GTGTTGTTAATG 1 false
+ACCAAACGTTCTCACC ORAOV1 GAGTTCCTCGAA 1 false
+ACCAAACGTTCTCACC PFN2 AACTACTACCTA 1 false
+ACCAAACGTTCTCACC PGAP1 CAAGTATCCGGC 1 false
+ACCAAACGTTCTCACC PRKAR1B CTAAGATTGTTT 1 false
+ACCAAACGTTCTCACC PRKDC AGCACCTCAGCT 1 false
+ACCAAACGTTCTCACC PSIP1 TTTATGAAACGG 1 false
+ACCAAACGTTCTCACC PSMA1 TCCCGTTACACA 1 false
+ACCAAACGTTCTCACC PTPN18 AAATTCTAATAC 1 false
+ACCAAACGTTCTCACC RAP1GAP2 ATGCTCGTCCAG 1 false
+ACCAAACGTTCTCACC RIMS1 TCTTGGTACAAA 1 false
+ACCAAACGTTCTCACC RN7SK TCATTGCTAATT 1 false
+ACCAAACGTTCTCACC RPL10A ACGAGTAACATC 1 false
+ACCAAACGTTCTCACC RPL30 CTGGAAGCGTAA 1 false
+ACCAAACGTTCTCACC RPL34 TGCACCACAAGG 1 false
+ACCAAACGTTCTCACC RPLP1 AGAGCTTATCTA 1 false
+ACCAAACGTTCTCACC RYR2 CAGCAGTCCGTA 1 false
+ACCAAACGTTCTCACC SAMD8 TCCGCCATATTA 1 false
+ACCAAACGTTCTCACC SDF4 TTGAGTGTAAGT 1 false
+ACCAAACGTTCTCACC SH3PXD2A GATTTGGGGCGT 1 false
+ACCAAACGTTCTCACC SLC17A7 AATTGATTAAGT 1 false
+ACCAAACGTTCTCACC SLC26A10 TAGCGTCTTTAG 1 false
+ACCAAACGTTCTCACC SLC8A1 CCGCATTCCAGG 1 false
+ACCAAACGTTCTCACC SLC8A1-AS1 CCTTACAGGCAA 1 false
+ACCAAACGTTCTCACC SLF1 TAATCGTTCGGC 1 false
+ACCAAACGTTCTCACC SMC1A CTCATGATTGTT 1 false
+ACCAAACGTTCTCACC SMURF1 GGCTCCATTGAC 1 false
+ACCAAACGTTCTCACC SNRNP70 CGAACTTGCTGC 1 false
+ACCAAACGTTCTCACC SPATA18 CATCAACATCGG 1 false
+ACCAAACGTTCTCACC SPTBN1 TCTTATGTCTCG 1 false
+ACCAAACGTTCTCACC SRSF11 TCCAGGCTGCAT 2 false
+ACCAAACGTTCTCACC SSTR2 AATTGTATCTTA 1 false
+ACCAAACGTTCTCACC STRN TGATAGTCCCCG 1 false
+ACCAAACGTTCTCACC SV2B TACAAAAATGTT 1 false
+ACCAAACGTTCTCACC SYNE1 GATTTCTAAGTC 1 false
+ACCAAACGTTCTCACC TMED4 ACTTCGTGTTCA 1 false
+ACCAAACGTTCTCACC TMEM117 GGTATGCGTGTA 1 false
+ACCAAACGTTCTCACC TMEM246 GACATAGCTAAA 1 false
+ACCAAACGTTCTCACC TMEM87A TGCCAGCTGACT 1 false
+ACCAAACGTTCTCACC TMOD2 AATTAAGAGTTA 1 false
+ACCAAACGTTCTCACC TOMM7 GAATACTCTGGT 1 false
+ACCAAACGTTCTCACC TRIM27 CCAAGTCCGGAT 1 false
+ACCAAACGTTCTCACC TRIM44 GATCTGCCCGAC 1 false
+ACCAAACGTTCTCACC TRIM59 TATATATGCGAT 1 false
+ACCAAACGTTCTCACC TTC28 ATTCTAGACCCT 1 false
+ACCAAACGTTCTCACC TTC9B GCCTCCCTTAGT 1 false
+ACCAAACGTTCTCACC TVP23A TTCTCTTTTTCC 1 false
+ACCAAACGTTCTCACC UNC119 GCCGGAGTTTGC 1 false
+ACCAAACGTTCTCACC UQCRH TCACTCATATGT 1 false
+ACCAAACGTTCTCACC USMG5 ATTTAGTAAATG 2 false
+ACCAAACGTTCTCACC USP25 AGGACCCGATCA 1 false
+ACCAAACGTTCTCACC USP25 TGCAGTCCCCGG 1 false
+ACCAAACGTTCTCACC UTP6 CCCAGGCAGCTA 1 false
+ACCAAACGTTCTCACC VGLL4 CCGACGGACACA 1 false
+ACCAAACGTTCTCACC YBEY TTGCTGGATTTA 1 false
+ACCAAACGTTCTCACC YEATS2 CTCAACATCATG 1 false
+ACCAAACGTTCTCACC YWHAH GATATTGGGTCA 1 false
+ACCAAACGTTCTCACC ZC2HC1A AGATGGAAATCC 1 false
+ACCAAACGTTCTCACC ZFP28 CCTTTTACTGTA 1 false
+ACCAAACGTTCTCACC ZFYVE16 ATGCAACTTCGG 2 false
+ACCAAACGTTCTCACC ZNF516 GTGATTGTGCGC 1 false
+ACCAAACGTTCTCACC ZNF528 CATCCGGACTTA 1 false
+ACCAAACGTTCTCACC ZRANB3 CTGACTGGATTC 1 false
View it on GitLab: https://salsa.debian.org/med-team/drop-seq/-/compare/a1aaded58271ad0f43a0f76fc67a04382ffd1992...8887ed0e7c42eded3e10cd109c76d320c8434fc8
--
View it on GitLab: https://salsa.debian.org/med-team/drop-seq/-/compare/a1aaded58271ad0f43a0f76fc67a04382ffd1992...8887ed0e7c42eded3e10cd109c76d320c8434fc8
You're receiving this email because of your account on salsa.debian.org.
-------------- next part --------------
An HTML attachment was scrubbed...
URL: <http://alioth-lists.debian.net/pipermail/debian-med-commit/attachments/20230625/397b531a/attachment-0001.htm>
More information about the debian-med-commit
mailing list