[med-svn] r16628 - trunk/community/infrastructure/getData/getData.conf.d

Steffen Möller moeller at moszumanska.debian.org
Wed Apr 9 10:46:09 UTC 2014


Author: moeller
Date: 2014-04-09 10:46:09 +0000 (Wed, 09 Apr 2014)
New Revision: 16628

Removed:
   trunk/community/infrastructure/getData/getData.conf.d/human.getData.mk
   trunk/community/infrastructure/getData/getData.conf.d/mouse.getData.mk
Modified:
   trunk/community/infrastructure/getData/getData.conf.d/Ensembl_genome.mk
   trunk/community/infrastructure/getData/getData.conf.d/human.getData
   trunk/community/infrastructure/getData/getData.conf.d/mouse.getData
Log:
Updated and extended download and processing of genomes

Human and mouse only - as a pilot.



Modified: trunk/community/infrastructure/getData/getData.conf.d/Ensembl_genome.mk
===================================================================
--- trunk/community/infrastructure/getData/getData.conf.d/Ensembl_genome.mk	2014-04-09 10:45:07 UTC (rev 16627)
+++ trunk/community/infrastructure/getData/getData.conf.d/Ensembl_genome.mk	2014-04-09 10:46:09 UTC (rev 16628)
@@ -1,9 +1,21 @@
 SHARED_WGET_OPTIONS=$(shell getData --getWgetOptions)
 
-MIRROR = ftp://ftp.ensembl.org/pub/current_fasta
+ENSEMBLVERSION=75
+MIRROR = ftp://ftp.ensembl.org/pub/release-$(ENSEMBLVERSION)/fasta
 
 get:
-	wget $(SHARED_WGET_OPTIONS) $(MIRROR)/$(ORGANISM_L)/dna/$(ORGANISM).$(BUILD).dna.chromosome.*.fa.gz
+	echo "I: Retrieving data for Ensembl version $(ENSEMBLVERSION) species $(ORGANISM_L)"
+	wget $(SHARED_WGET_OPTIONS) $(MIRROR)/$(ORGANISM_L)/dna/$(ORGANISM).*.$(ENSEMBLVERSION).dna.chromosome.*.fa.gz
 
 unpack:
+	find . -maxdepth 1 -name "*.fa" -delete
 	for file in *chromosome.*.fa.gz ; do zcat $$file > `basename $$file .gz` ; done
+
+blast:
+	if [ -x /usr/bin/makeblastdb ]; then \
+		echo "I: Found BLAST+ (preferred) for indexing"; \
+		cat *fa | makeblastdb -title $(NICKNAME) -dbtype nucl -out $(NICKNAME); \
+	elif [ -x /usr/bin/formatdb ]; then \
+		echo "I: Found legacy BLAST for indexing"; \
+		cat *fa | formatdb -i /dev/stdin -t $(NICKNAME) -n $(NICKNAME) -p F ; \
+	fi

Modified: trunk/community/infrastructure/getData/getData.conf.d/human.getData
===================================================================
--- trunk/community/infrastructure/getData/getData.conf.d/human.getData	2014-04-09 10:45:07 UTC (rev 16627)
+++ trunk/community/infrastructure/getData/getData.conf.d/human.getData	2014-04-09 10:46:09 UTC (rev 16628)
@@ -1,10 +1,24 @@
 print STDERR "Reading Homo sapiens configuration file\n" if $verbose;
 
-$toBeMirrored{"human.genome"}={
-  "name" => "hg19 – Genome Reference Consortium",
+$toBeMirrored{"human.hg18.ncbi36.genome"}={
+  "name" => "hg18/NCBI36 – Genome Reference Consortium from Ensembl",
   "tags" => ["human","genome"],
-  "source" => "make -f /etc/getData.conf.d/human.getData.mk get unpack",
-#  "post-download" => "make blast"
+  "source" => "make ORGANISM=Homo_sapiens ORGANISM_L=homo_sapiens ENSEMBLVERSION=54 NICKNAME=hg18 -f /etc/getData.conf.d/Ensembl_genome.mk get unpack",
+
+  "post-download" => "make -f NICKNAME=hg18 -f /etc/getData.conf.d/Ensembl_genome.mk blast",
+  "depends" => "make",
+  "recommends" => "ncbi-blast+",
+  "size" => "39G"
 };
 
+$toBeMirrored{"human.hg19.grch37.genome"}={
+  "name" => "hg19/GRCh37 – Genome Reference Consortium from Ensembl",
+  "tags" => ["human","genome"],
+  "source" => "make ORGANISM=Homo_sapiens ORGANISM_L=homo_sapiens ENSEMBLVERSION=75 NICKNAME=hg19 -f /etc/getData.conf.d/Ensembl_genome.mk get unpack",
+  "post-download" => "make -f NICKNAME=hg19 -f /etc/getData.conf.d/Ensembl_genome.mk blast",
+  "depends" => "make",
+  "recommends" => "ncbi-blast+",
+  "size" => "39G"
+};
+
 1;

Deleted: trunk/community/infrastructure/getData/getData.conf.d/human.getData.mk
===================================================================
--- trunk/community/infrastructure/getData/getData.conf.d/human.getData.mk	2014-04-09 10:45:07 UTC (rev 16627)
+++ trunk/community/infrastructure/getData/getData.conf.d/human.getData.mk	2014-04-09 10:46:09 UTC (rev 16628)
@@ -1,6 +0,0 @@
-ORGANISM   = Homo_sapiens
-ORGANISM_L = homo_sapiens
-BUILD      = GRCh37.56
-NICKNAME   = hg19
-
-include /etc/getData.conf.d/Ensembl_genome.mk

Modified: trunk/community/infrastructure/getData/getData.conf.d/mouse.getData
===================================================================
--- trunk/community/infrastructure/getData/getData.conf.d/mouse.getData	2014-04-09 10:45:07 UTC (rev 16627)
+++ trunk/community/infrastructure/getData/getData.conf.d/mouse.getData	2014-04-09 10:46:09 UTC (rev 16628)
@@ -1,10 +1,17 @@
 print STDERR "Reading Mus musculus configuration file\n" if $verbose;
 
-$toBeMirrored{"mouse.genome"}={
-  "name" => "mm9 – Mouse Genome Sequencing Consortium",
+$toBeMirrored{"mouse.mm9.ncbim37.genome"}={
+  "name" => "mm9 – NCBIM37 Mouse Genome Sequencing Consortium from Ensembl",
   "tags" => ["mouse","genome"],
-  "source" => "make -f /etc/getData.conf.d/mouse.getData.mk get unpack",
-#  "post-download" => "make blast"
+  "source" => "make ENSEMBLVERSION=67 ORGANISM=Mus_musculus ORGANISM_L=mus_musculus NICKNAME=mm9 -f /etc/getData.conf.d/Ensembl_genome.mk get unpack",
+  "post-download" => "make NICKNAME=mm9 -f /etc/getData.conf.d/Ensembl_genome.mk blast"
 };
 
+$toBeMirrored{"mouse.mm10.grcm38.genome"}={
+  "name" => "mm10 – GRCm38 Mouse Genome Sequencing Consortium from Ensembl",
+  "tags" => ["mouse","genome"],
+  "source" => "make ENSEMBLVERSION=75 ORGANISM=Mus_musculus ORGANISM_L=mus_musculus NICKNAME=mm10 -f /etc/getData.conf.d/Ensembl_genome.mk get unpack",
+  "post-download" => "make NICKNAME=mm10 -f /etc/getData.conf.d/Ensembl_genome.mk blast"
+};
+
 1;

Deleted: trunk/community/infrastructure/getData/getData.conf.d/mouse.getData.mk
===================================================================
--- trunk/community/infrastructure/getData/getData.conf.d/mouse.getData.mk	2014-04-09 10:45:07 UTC (rev 16627)
+++ trunk/community/infrastructure/getData/getData.conf.d/mouse.getData.mk	2014-04-09 10:46:09 UTC (rev 16628)
@@ -1,6 +0,0 @@
-ORGANISM   = Mus_musculus
-ORGANISM_L = mus_musculus
-BUILD      = NCBIM37.55
-NICKNAME   = mm9
-
-include /etc/getData.conf.d/Ensembl_genome.mk




More information about the debian-med-commit mailing list