[med-svn] [adapterremoval] 01/01: Try more adaptations to run the benchmarking suite

Andreas Tille tille at debian.org
Wed Aug 17 08:05:39 UTC 2016


This is an automated email from the git hooks/post-receive script.

tille pushed a commit to branch master
in repository adapterremoval.

commit 4d9f5f4a403c0081b5694adc96cd0c71ceff0a75
Author: Andreas Tille <tille at debian.org>
Date:   Wed Aug 17 09:59:35 2016 +0200

    Try more adaptations to run the benchmarking suite
---
 debian/patches/adapt-benchmarking.patch            | 81 ++++++++++++++++------
 .../delete_adapterremoval1x_completely.patch       | 80 +++++++++++++++++++++
 debian/patches/reduce_benchmark_length.patch       |  4 +-
 debian/patches/series                              |  2 +-
 4 files changed, 142 insertions(+), 25 deletions(-)

diff --git a/debian/patches/adapt-benchmarking.patch b/debian/patches/adapt-benchmarking.patch
index 56e69ff..0b8562b 100644
--- a/debian/patches/adapt-benchmarking.patch
+++ b/debian/patches/adapt-benchmarking.patch
@@ -1,6 +1,16 @@
 --- a/benchmark/benchmark.sh
 +++ b/benchmark/benchmark.sh
-@@ -46,11 +46,13 @@ MAX_THREADS=4
+@@ -34,7 +34,8 @@ NUM_REPLICATES=10
+ # Read lengths to examine
+ # Lengths > 100 use an interpolated profile, and should therefore not be used
+ # to estimate anything other than runtime (see 'simulate_reads').
+-READ_LENGTHS=(100 200)
++# READ_LENGTHS=(100 200) # 200 is accessing specific profiles which seems not to work with pIRS 2.0 syntax
++READ_LENGTHS=(100)
+ 
+ # Insert sizes of reads to simulate for adapter ID
+ ADAPTER_ID_INSERT_SIZES=($(seq 250 5 350))
+@@ -46,11 +47,13 @@ MAX_THREADS=4
  # Number of read (pairs) to simulate using pIRS for each replicate
  SIMULATED_NREADS=1000000
  
@@ -19,7 +29,7 @@
  
  
  ###############################################################################
-@@ -84,63 +86,79 @@ function check_for_jar()
+@@ -84,63 +87,79 @@ function check_for_jar()
      fi
  }
  
@@ -122,7 +132,7 @@
  check_for_executable "fastq-mcf" ${EXEC_FASTQ_MCF}
  # https://code.google.com/p/ea-utils/
  
-@@ -149,7 +167,7 @@ EXEC_TIME=/usr/bin/time
+@@ -149,7 +168,7 @@ EXEC_TIME=/usr/bin/time
  check_for_executable "GNU time" ${EXEC_TIME}
  # Needed for time / RAM usage
  
@@ -131,7 +141,7 @@
  check_for_executable "Java JRE" ${EXEC_JAVA}
  # Needed for Trimmomatic
  
-@@ -240,14 +258,16 @@ function fetch_reference()
+@@ -240,14 +259,16 @@ function fetch_reference()
      echo "------------------------------------------------------------" > /dev/stderr
      echo "Fetching reference sequence ..."  > /dev/stderr
  
@@ -151,16 +161,40 @@
      fi
  
  
-@@ -286,7 +306,7 @@ function simulate_reads()
+@@ -268,7 +289,8 @@ function simulate_reads()
+         if [ "${readlen}" -gt 100 ];
+         then
+             # Use fake profiles, built using scripts/extend_profile.py
+-            PROFILE_CLI="-b profiles/phixv2.InDel.matrix -s profiles/humNew.PE100.matrix.gz"
++            # PROFILE_CLI="-b profiles/phixv2.InDel.matrix -s profiles/humNew.PE100.matrix.gz"
++            PROFILE_CLI="--indel-profile=profiles/phixv2.InDel.matrix --base-calling-profile=profiles/humNew.PE100.matrix.gz"
+         else
+             PROFILE_CLI=
+         fi
+@@ -285,15 +307,19 @@ function simulate_reads()
+                 rm -rf "${DST:?}/"
                  mkdir -p "${DST}/"
  
-                 # -c 0 = uncompressed output
+-                # -c 0 = uncompressed output
 -                if ! ${EXEC_PIRS} simulate ${PROFILE_CLI} -x "${SIMULATED_NREADS}" -l "${readlen}" -i "${REFSEQ}" -c 0 -m "${INSERT_MEAN}" -v "${INSERT_SD}" -Q 33 -o "${DST}/reads" \
-+                if ! ${EXEC_PIRS} simulate ${PROFILE_CLI} -x "${SIMULATED_NREADS}" -l "${readlen}" "${REFSEQ}" -c gzip -m "${INSERT_MEAN}" -v "${INSERT_SD}" -Q 33 -o "${DST}/reads" \
++                # -c text = uncompressed output
++                set -x
++                if ! ${EXEC_PIRS} simulate ${PROFILE_CLI} -x "${SIMULATED_NREADS}" -l "${readlen}" "${REFSEQ}" -c text -m "${INSERT_MEAN}" -v "${INSERT_SD}" -Q 33 -o "${DST}/reads" \
                      > "${DST}/reads.stdout" 2> "${DST}/reads.stderr";
                  then
                      echo "Error simulated reads ..." > /dev/stderr
-@@ -813,14 +833,16 @@ function benchmark_se()
+                     exit 1
+                 fi
++                set +x
+ 
+-                gunzip "${DST}/reads_${readlen}_${INSERT_MEAN}.read.info.gz"
++                if [ -e "${DST}/reads_${readlen}_${INSERT_MEAN}.read.info.gz" ] ; then
++                    gunzip "${DST}/reads_${readlen}_${INSERT_MEAN}.read.info.gz"
++                fi
+                 ln -sf "$(basename "${DST}")/reads_${readlen}_${INSERT_MEAN}_1.fq" "${DST}_1.fq"
+                 ln -sf "$(basename "${DST}")/reads_${readlen}_${INSERT_MEAN}_2.fq" "${DST}_2.fq"
+                 ln -sf "$(basename "${DST}")/reads_${readlen}_${INSERT_MEAN}.read.info" "${DST}.read.info"
+@@ -813,14 +839,16 @@ function benchmark_se()
                  SIMULATED_MATE1="${SIMULATED_PREFIX}_${run_n}_${readlen}_1.fq"
                  SIMULATED_MATE2="${SIMULATED_PREFIX}_${run_n}_${readlen}_2.fq"
                  SIMULATED_INFO="${SIMULATED_PREFIX}_${run_n}_${readlen}.read.info"
@@ -173,7 +207,7 @@
 -                    "${SIMULATED_INFO}" \
 -                    --file1 "${SIMULATED_MATE1}" \
 -                    --mm 3
-+                if [ "${EXEC_ADAPTERREMOVAL1x}" != "" } ; then
++                if [  -z ${EXEC_ADAPTERREMOVAL1x+x} ] ; then
 +                    echo run_adapterremoval SE ${EXEC_ADAPTERREMOVAL1x} 1 \
 +                        "${RESULTS}/adapterremoval1x_mm3" \
 +                        "${SIMULATED_INFO}" \
@@ -183,7 +217,7 @@
  
                  # -mm 3 --minadapteroverlap 3 (test)
                  echo run_adapterremoval SE ${EXEC_ADAPTERREMOVAL2x} 1 \
-@@ -901,7 +923,7 @@ function benchmark_se()
+@@ -901,7 +929,7 @@ function benchmark_se()
          done
      done
  
@@ -192,7 +226,7 @@
  }
  
  
-@@ -921,17 +943,19 @@ function benchmark_pe()
+@@ -921,17 +949,19 @@ function benchmark_pe()
                  SIMULATED_MATE1="${SIMULATED_PREFIX}_${run_n}_${readlen}_1.fq"
                  SIMULATED_MATE2="${SIMULATED_PREFIX}_${run_n}_${readlen}_2.fq"
                  SIMULATED_INFO="${SIMULATED_PREFIX}_${run_n}_${readlen}.read.info"
@@ -208,7 +242,7 @@
 -                    --file1 "${SIMULATED_MATE1}" \
 -                    --file2 "${SIMULATED_MATE2}" \
 -                    --mm 3
-+                if [ "${EXEC_ADAPTERREMOVAL1x}" != "" ] ; then
++                if [  -z ${EXEC_ADAPTERREMOVAL1x+x} ] ; then
 +                    echo run_adapterremoval PE ${EXEC_ADAPTERREMOVAL1x} 1 \
 +                        "${RESULTS}/adapterremoval1x_mm3" \
 +                        "${SIMULATED_INFO}" \
@@ -219,7 +253,7 @@
  
                  for nthreads in $(seq 1 ${MAX_THREADS});
                  do
-@@ -997,7 +1021,7 @@ function benchmark_pe()
+@@ -997,7 +1027,7 @@ function benchmark_pe()
          done
      done
  
@@ -228,7 +262,7 @@
  }
  
  
-@@ -1017,17 +1041,19 @@ function benchmark_collapse()
+@@ -1017,17 +1047,19 @@ function benchmark_collapse()
                  SIMULATED_MATE1="${SIMULATED_PREFIX}_${run_n}_${readlen}_1.fq"
                  SIMULATED_MATE2="${SIMULATED_PREFIX}_${run_n}_${readlen}_2.fq"
                  SIMULATED_INFO="${SIMULATED_PREFIX}_${run_n}_${readlen}.read.info"
@@ -244,7 +278,7 @@
 -                    --file1 "${SIMULATED_MATE1}" \
 -                    --file2 "${SIMULATED_MATE2}" \
 -                    --mm 3 --collapse
-+                if [ "${EXEC_ADAPTERREMOVAL1x}" != "" ] ; then
++                if [  -z ${EXEC_ADAPTERREMOVAL1x+x} ] ; then
 +                    echo run_adapterremoval COLLAPSE ${EXEC_ADAPTERREMOVAL1x} 1 \
 +                        "${RESULTS}/adapterremoval1x_mm3" \
 +                        "${SIMULATED_INFO}" \
@@ -255,7 +289,7 @@
  
                  for nthreads in $(seq 1 ${MAX_THREADS});
                  do
-@@ -1063,7 +1089,7 @@ function benchmark_collapse()
+@@ -1063,7 +1095,7 @@ function benchmark_collapse()
          done
      done
  
@@ -264,7 +298,7 @@
  }
  
  
-@@ -1084,7 +1110,7 @@ function benchmark_mixed_se
+@@ -1084,7 +1116,7 @@ function benchmark_mixed_se
  	            SIMULATED_MATE1="${SIMULATED_MIXED_PREFIX}_${run_n}_${readlen}_1.fq"
  	            SIMULATED_MATE2="${SIMULATED_MIXED_PREFIX}_${run_n}_${readlen}_2.fq"
  	            SIMULATED_INFO="${SIMULATED_MIXED_PREFIX}_${run_n}_${readlen}.read.info"
@@ -273,7 +307,7 @@
  
  	            DEFAULT_ARGS=("${SIMULATED_INFO}" "${SIMULATED_MATE1}" "${SIMULATED_MATE2}")
  	            AR_PREFIX="${RESULTS}/adapterremoval2x"
-@@ -1139,7 +1165,7 @@ function benchmark_mixed_se
+@@ -1139,7 +1171,7 @@ function benchmark_mixed_se
  	    done
  	done
  
@@ -282,7 +316,7 @@
  }
  
  
-@@ -1159,7 +1185,7 @@ function benchmark_mixed_pe
+@@ -1159,7 +1191,7 @@ function benchmark_mixed_pe
  	            SIMULATED_MATE1="${SIMULATED_MIXED_PREFIX}_${run_n}_${readlen}_1.fq"
  	            SIMULATED_MATE2="${SIMULATED_MIXED_PREFIX}_${run_n}_${readlen}_2.fq"
  	            SIMULATED_INFO="${SIMULATED_MIXED_PREFIX}_${run_n}_${readlen}.read.info"
@@ -291,7 +325,7 @@
  
  	            DEFAULT_ARGS=("${SIMULATED_INFO}" "${SIMULATED_MATE1}" "${SIMULATED_MATE2}")
  
-@@ -1204,7 +1230,7 @@ function benchmark_mixed_pe
+@@ -1204,7 +1236,7 @@ function benchmark_mixed_pe
  	    done
  	done
  
@@ -300,7 +334,7 @@
  }
  
  
-@@ -1224,7 +1250,7 @@ function benchmark_adapter_id
+@@ -1224,7 +1256,7 @@ function benchmark_adapter_id
              SIMULATED_MATE1="${SIMULATED_ADAPTER_ID_PREFIX}_${run_n}_${readlen}_${INSERT_MEAN}_1.fq"
              SIMULATED_MATE2="${SIMULATED_ADAPTER_ID_PREFIX}_${run_n}_${readlen}_${INSERT_MEAN}_2.fq"
              SIMULATED_INFO="${SIMULATED_ADAPTER_ID_PREFIX}_${run_n}_${readlen}_${INSERT_MEAN}.read.info"
@@ -309,7 +343,7 @@
  
              run_minion "${RESULTS}/minion" \
                  "${SIMULATED_MATE1}" \
-@@ -1236,13 +1262,14 @@ function benchmark_adapter_id
+@@ -1236,15 +1268,17 @@ function benchmark_adapter_id
          done
      done
  
@@ -325,4 +359,7 @@
 +#mkdir -p results
  
  fetch_reference
++cp -a profiles $results
+ 
+ echo > /dev/stderr
  
diff --git a/debian/patches/delete_adapterremoval1x_completely.patch b/debian/patches/delete_adapterremoval1x_completely.patch
new file mode 100644
index 0000000..f030db7
--- /dev/null
+++ b/debian/patches/delete_adapterremoval1x_completely.patch
@@ -0,0 +1,80 @@
+--- a/benchmark/benchmark.sh
++++ b/benchmark/benchmark.sh
+@@ -843,15 +843,6 @@ function benchmark_se()
+                 SIMULATED_INFO="${SIMULATED_PREFIX}_${run_n}_${readlen}.read.info"
+                 RESULTS="$results/se/${readlen}_${run_n}"
+ 
+-                # -mm 3 corresponds to AR 2.x defaults
+-                if [  -z ${EXEC_ADAPTERREMOVAL1x+x} ] ; then
+-                    echo run_adapterremoval SE ${EXEC_ADAPTERREMOVAL1x} 1 \
+-                        "${RESULTS}/adapterremoval1x_mm3" \
+-                        "${SIMULATED_INFO}" \
+-                        --file1 "${SIMULATED_MATE1}" \
+-                        --mm 3
+-                fi
+-
+                 # -mm 3 --minadapteroverlap 3 (test)
+                 echo run_adapterremoval SE ${EXEC_ADAPTERREMOVAL2x} 1 \
+                     "${RESULTS}/adapterremoval2x_min3_mm3" \
+@@ -912,16 +903,18 @@ function benchmark_se()
+                     -c "adapters/adapter_1.txt" \
+                     -q 0
+ 
+-                echo run_leeHom SE \
+-                    "${RESULTS}/leeHom" \
+-                    "${SIMULATED_INFO}" \
+-                    -fq1 "${SIMULATED_MATE1}"
++                if [ -z ${EXEC_LEEHOM+x} ]; then  
++                    echo run_leeHom SE \
++                        "${RESULTS}/leeHom" \
++                        "${SIMULATED_INFO}" \
++                        -fq1 "${SIMULATED_MATE1}"
+ 
+-                echo run_leeHom SE \
+-                    "${RESULTS}/leeHom_ancient" \
+-                    "${SIMULATED_INFO}" \
+-                    -fq1 "${SIMULATED_MATE1}" \
+-                    --ancientdna
++                    echo run_leeHom SE \
++                        "${RESULTS}/leeHom_ancient" \
++                        "${SIMULATED_INFO}" \
++                        -fq1 "${SIMULATED_MATE1}" \
++                        --ancientdna
++                fi
+ 
+                 echo run_fastq_mcf_se "${RESULTS}/fastq_mcf" \
+                     "${SIMULATED_INFO}" \
+@@ -955,16 +948,6 @@ function benchmark_pe()
+ 
+                 DEFAULT_ARGS=("${SIMULATED_INFO}" "${SIMULATED_MATE1}" "${SIMULATED_MATE2}")
+ 
+-                # -mm 3 corresponds to AR 2.x defaults
+-                if [  -z ${EXEC_ADAPTERREMOVAL1x+x} ] ; then
+-                    echo run_adapterremoval PE ${EXEC_ADAPTERREMOVAL1x} 1 \
+-                        "${RESULTS}/adapterremoval1x_mm3" \
+-                        "${SIMULATED_INFO}" \
+-                        --file1 "${SIMULATED_MATE1}" \
+-                        --file2 "${SIMULATED_MATE2}" \
+-                        --mm 3
+-                fi
+-
+                 for nthreads in $(seq 1 ${MAX_THREADS});
+                 do
+                     AR_PREFIX="${RESULTS}/adapterremoval2x"
+@@ -1053,16 +1036,6 @@ function benchmark_collapse()
+ 
+                 DEFAULT_ARGS=("${SIMULATED_INFO}" "${SIMULATED_MATE1}" "${SIMULATED_MATE2}")
+ 
+-                # -mm 3 corresponds to AR 2.x defaults
+-                if [  -z ${EXEC_ADAPTERREMOVAL1x+x} ] ; then
+-                    echo run_adapterremoval COLLAPSE ${EXEC_ADAPTERREMOVAL1x} 1 \
+-                        "${RESULTS}/adapterremoval1x_mm3" \
+-                        "${SIMULATED_INFO}" \
+-                        --file1 "${SIMULATED_MATE1}" \
+-                        --file2 "${SIMULATED_MATE2}" \
+-                        --mm 3 --collapse
+-                fi
+-
+                 for nthreads in $(seq 1 ${MAX_THREADS});
+                 do
+                     AR_PREFIX="${RESULTS}/adapterremoval2x"
diff --git a/debian/patches/reduce_benchmark_length.patch b/debian/patches/reduce_benchmark_length.patch
index 4b0c8f0..2f35199 100644
--- a/debian/patches/reduce_benchmark_length.patch
+++ b/debian/patches/reduce_benchmark_length.patch
@@ -1,6 +1,6 @@
 --- a/benchmark/benchmark.sh
 +++ b/benchmark/benchmark.sh
-@@ -44,7 +44,7 @@ ADAPTER_ID_INSERT_SIZES=($(seq 250 5 350
+@@ -45,7 +45,7 @@ ADAPTER_ID_INSERT_SIZES=($(seq 250 5 350
  MAX_THREADS=4
  
  # Number of read (pairs) to simulate using pIRS for each replicate
@@ -9,7 +9,7 @@
  
  results=$(mktemp -d /tmp/adapter-benchmark.XXXXXX)
  echo "*** Results will be found in $results ***"
-@@ -266,8 +266,10 @@ function fetch_reference()
+@@ -267,8 +267,10 @@ function fetch_reference()
          echo ""  > /dev/stderr
  #        wget -O $results/reference.fasta.gz http://hgdownload.soe.ucsc.edu/goldenPath/hg38/chromosomes/chr1.fa.gz
  #        gunzip $results/reference.fasta.gz
diff --git a/debian/patches/series b/debian/patches/series
index ec30ca0..bcaa682 100644
--- a/debian/patches/series
+++ b/debian/patches/series
@@ -1,4 +1,4 @@
 hardening.patch
 adapt-benchmarking.patch
 reduce_benchmark_length.patch
-do_not_gunzip.patch
+delete_adapterremoval1x_completely.patch

-- 
Alioth's /usr/local/bin/git-commit-notice on /srv/git.debian.org/git/debian-med/adapterremoval.git



More information about the debian-med-commit mailing list