[med-svn] [Git][med-team/pique][upstream] New upstream version 1.1

Étienne Mollier (@emollier) gitlab at salsa.debian.org
Thu May 1 08:34:13 BST 2025



Étienne Mollier pushed to branch upstream at Debian Med / pique


Commits:
93240e98 by Étienne Mollier at 2025-05-01T08:41:44+02:00
New upstream version 1.1
- - - - -


8 changed files:

- Makefile
- README.md
- bin/GWAS_manhattanplots
- bin/pique-input
- doc/pique_manual.odt
- test/Makefile
- + test/RiceDiversity_44K_Genotypes_PLINK.tar.xz
- + test/plot_files.txt


Changes:

=====================================
Makefile
=====================================
@@ -1,4 +1,4 @@
-#@(#)Makefile  2017-01-13  A.J.Travis and A.Douglas
+#@(#)Makefile  2021-05-22  A.J.Travis and A.Douglas
 
 #
 # pique: parallel identification of QTL's using EMMAX
@@ -9,9 +9,9 @@
 # installation directory
 DIR = /usr/local/pique
 
-EMMAX = /usr/local/bin/emmax
+EMMAX = /usr/bin/emmax
 PLINK = /usr/bin/p-link
-EIGENSTRAT = /usr/local/EIGENSOFT/bin/eigenstrat
+EIGENSTRAT = /usr/lib/eigensoft/smartpca
 R = /usr/bin/R
 FORECAST = /usr/lib/R/site-library/forecast
 PARALLEL = /usr/share/perl5/Parallel
@@ -37,9 +37,8 @@ install: $(TARGETS)
 #pique-run: bin/pique-run
 
 # install "EMMAX"
-$(EMMAX): emmax-beta-07Mar2010.tar.gz
-	tar xvf $<
-	install -C -o root -g root emmax-beta-07Mar2010/emmax* /usr/local/bin/
+$(EMMAX):
+	apt install emmax
 
 emmax-beta-07Mar2010.tar.gz:
 	wget http://genetics.cs.ucla.edu/emmax/$@
@@ -52,13 +51,15 @@ plink_linux_x86_64.zip:
 	wget https://www.cog-genomics.org/static/bin/plink161202/$@
 
 # install "EIGENSOFT"
-$(EIGENSTRAT): EIG5.0.2.tar.gz
-	tar xvf $<
-	install -d /usr/local/EIGENSOFT/bin
-	install -C -o root -g root EIG5.0.2/bin/* /usr/local/EIGENSOFT/bin
-
-EIG5.0.2.tar.gz:
-	wget http://cdn1.sph.harvard.edu/wp-content/uploads/sites/181/2014/05/$@
+#$(EIGENSTRAT): EIG5.0.2.tar.gz
+#	tar xvf $<
+#	install -d /usr/local/EIGENSOFT/bin
+#	install -C -o root -g root EIG5.0.2/bin/* /usr/local/EIGENSOFT/bin
+#
+#EIG5.0.2.tar.gz:
+#	wget http://cdn1.sph.harvard.edu/wp-content/uploads/sites/181/2014/05/$@
+$(EIGENSTRAT):
+	apt install eigensoft
 
 # install R
 $(R):


=====================================
README.md
=====================================
@@ -1,2 +1,4 @@
 # PIQUE
 Parallel Identification of QTL's Using EMMAX
+
+[Tony Travis](https://github.com/tony-travis) (Minke Informatics Limited) & [Alex Douglas](https://github.com/alexd106) (University of Aberdeen)


=====================================
bin/GWAS_manhattanplots
=====================================
@@ -1,5 +1,5 @@
 #!/usr/bin/env Rscript
-#' @(#)GWAS_manhattanplots  2017-10-05  A.Douglas and A.J.Travis
+#' @(#)GWAS_manhattanplots  2021-01-25  A.Douglas and A.J.Travis
 
 suppressMessages(library(getopt))
 suppressMessages(library(optparse))
@@ -9,19 +9,20 @@ suppressMessages(library(optparse))
 # command-line arguments
 opt1 <- make_option(c("-b", "--bh"), action = "store", default = 0)
 opt2 <- make_option(c("-c", "--chromosome"), action = "store", default = "1:23")
-opt3 <- make_option(c("-i", "--input"), action = "store", default = NULL)
-opt4 <- make_option(c("-o", "--output"), action = "store", default = "manhattan")
-opt5 <- make_option(c("-q", "--qqplot"), action = "store_true", default = FALSE)
-opt6 <- make_option(c("-v", "--verbose"), action = "store_true", default = FALSE)
-opt7 <- make_option(c("-y", "--ymax"), action = "store", default = "max")
+opt3 <- make_option(c("-g", "--guide"), action = "store", default = 0)
+opt4 <- make_option(c("-i", "--input"), action = "store", default = NULL)
+opt5 <- make_option(c("-o", "--output"), action = "store", default = "manhattan")
+opt6 <- make_option(c("-q", "--qqplot"), action = "store_true", default = FALSE)
+opt7 <- make_option(c("-v", "--verbose"), action = "store_true", default = FALSE)
+opt8 <- make_option(c("-y", "--ymax"), action = "store", default = "max")
 
 #' debug in RStudio
-#' args <- c('-b', '0.1', '-q', '-y', '10', '-i', 'files.txt')
+#' args <- c('-b', '0.1', '4', '-q', '-y', '10', '-i', 'files.txt')
 #' opt <- parse_args(OptionParser(option_list = list(opt1, opt2, opt3, opt4, opt5, opt6, 
-#'   opt7)), args = args, positional_arguments = TRUE)
+#'   opt7, opt8)), args = args, positional_arguments = TRUE)
 
 opt <- parse_args(OptionParser(option_list = list(opt1, opt2, opt3, opt4, opt5, opt6, 
-  opt7)), positional_arguments = TRUE)
+  opt7, opt8)), positional_arguments = TRUE)
 
 # limit number of chromosomes
 chr.name <- parse(text = opt$options$chromosome)
@@ -38,6 +39,9 @@ if (chr.class == "numeric") {
 # PNG output file
 output.file <- paste(sep = "", opt$options$output, ".png")
 
+# Benjamini-Hochberg output file
+bh.file <- paste(sep = "", opt$options$output, ".bh")
+
 # check usage
 if (is.null(opt$options$input)) {
   nplots <- length(opt$args)
@@ -140,10 +144,13 @@ for (i in 1:nplots) {
   mt <- mt.rawp2adjp(snp$P, proc = "BH")
   ind <- mt$index
   snp <- cbind(snp, BH = mt$adjp[order(ind), 2])
+
+  # write BH adjusted P values to file
+  write.table(snp, file = bh.file, sep = "\t", row.names = FALSE, col.names = TRUE)
   
   # Manhattan plot
   manhattan(snp, markers = NULL, blocks = NULL, limitchromosomes = chr.limit, ymax = opt$options$ymax, 
-    suggestiveline = 4, genomewideline = FALSE, highlight = opt$options$bh, stack = stack)
+    suggestiveline = opt$options$guide, genomewideline = FALSE, highlight = opt$options$bh, stack = stack)
   
   # sub-plot title (left-justified)
   if (!is.null(opt$options$input)) {


=====================================
bin/pique-input
=====================================
@@ -1,5 +1,5 @@
 #!/usr/bin/perl -w
-#@(#)pique-input.pl  2017-07-24  A.J.Travis and A.Douglas
+#@(#)pique-input.pl  2024-04-18  A.J.Travis and A.Douglas
 
 #
 # PIQUE - Parallel Identification Of QTL's using EMMAX
@@ -55,7 +55,7 @@ my $opt_k = "IBS";
 my $opt_c;
 
 # number of Eigenvectors to include
-my $opt_e = 5;
+my $opt_e;
 
 # MAF: Minor Allele Frequency
 my $opt_m;
@@ -209,12 +209,12 @@ my @gid_list;
         }
     }
 
-    # remove temporary files
-    tidy_up();
-
     # write end-time to log file
     system("date >> $logfile");
 
+    # remove temporary files
+    tidy_up();
+
     exit 0;
 }
 
@@ -249,7 +249,7 @@ sub options {
     ) or usage();
 
     # input and output prefixes are mandatory
-    if ( !defined $opt_i && defined $opt_o ) {
+    if ( !( defined $opt_i && defined $opt_o ) ) {
         usage();
     }
 
@@ -267,8 +267,8 @@ sub options {
 
 sub programs {
 
-    # "plink" is called "p-link" on Ubuntu/Debian systems
-    $PLINK = must_have("p-link");
+    # warning: "plink" is putty-tools:/usr/bin/plink on Ubuntu/Debian systems
+    $PLINK = must_have("plink1.9");
 
     #    $PLINK_OPT = "--chr-set 95 --allow-extra-chr --missing-genotype 0";
     $PLINK_OPT = "--missing-genotype 0";
@@ -277,7 +277,7 @@ sub programs {
     if ( defined $opt_c ) {
 
         # look for "EIGENSOFT" smartpca
-        $EIGENSTRAT = must_have("smartpca");
+        $EIGENSTRAT = must_have("/usr/bin/smartpca");
     }
 
     # look for "emmax-kin"
@@ -339,6 +339,7 @@ sub files {
     $rmap_file = "$rc_prefix.map";
     $tped_file = "$opt_i.tped";
     $tfam_file = "$opt_i.tfam";
+
     if ( defined $opt_p ) {
         $pheno_file = $opt_p;
     }
@@ -385,7 +386,7 @@ sub recode {
     # convert .vcf to .ped + .map
     if ( $type eq "vcf" ) {
         must_exist("$input.vcf");
-        plink("--recode --vcf $input.vcf --out $input");
+        plink("--vcf $input.vcf --recode ped --out $input");
     }
 
     # convert .tped + .tfam to .ped + .map
@@ -398,7 +399,7 @@ sub recode {
     # recode .ped + .map
     must_exist("$input.ped");
     must_exist("$input.map");
-    plink("--recode12 --file $opt_i --out $output");
+    plink("--recode 12 --file $opt_i --out $output");
 
     # check output files
     must_exist("$output.ped");
@@ -766,7 +767,9 @@ sub create_covar {
     $cov_fh = must_write($cov_file);
 
     # run Eigenstrat "smartpca" program
-    smartpca( $ped, $map, $evec, $opt_o );
+    run(
+"$EIGENSTRAT -i $ped -a $map -b $ped -o $opt_o -p $opt_o -e $opt_o.eval -k $evec -l $opt_o-smartpca.log"
+    );
 
     # open "smartpca" output file
     $evec_fh = must_read("$opt_o.evec");
@@ -807,48 +810,6 @@ sub create_covar {
 
 #/////////////////////////////////////////////////////////////////////////////
 
-=head2 smartpca
- 
- Parameters  : ped = PLINK format .ped file
-               map  = PLINK format .map file
-               evec = number of eigenvectors
-               out = output prefix
- Returns     : null
- Description : run smartpca
- 
-=cut
-
-sub smartpca {
-    my ( $ped, $map, $evec, $out ) = @_;
-
-    my $par_file;    # "smartpca" parameter file name
-    my $par_fh;      # "smartpca" parameter file handle
-
-    $par_file = "$out.par";
-    $par_fh   = must_write($par_file);
-
-    # create parameter file
-    print $par_fh "genotypename: $ped\n";
-    print $par_fh "snpname: $map\n";
-    print $par_fh "indivname: $ped\n";
-    print $par_fh "evecoutname: $out.evec\n";
-    print $par_fh "evaloutname: $out.eval\n";
-    print $par_fh "altnormstyle: NO\n";
-    print $par_fh "numoutevec: $evec\n";
-    must_close($par_fh);
-
-    # run smartpca
-    run("smartpca -p $par_file > $out-smartpca.log");
-
-    # tidy up
-    if ( !$opt_d ) {
-        must_unlink($par_file);
-    }
-    return;
-}
-
-#/////////////////////////////////////////////////////////////////////////////
-
 =head2 exclude
  
  Parameters  : $file = name of input file
@@ -958,7 +919,7 @@ sub transpose {
     if ($opt_v) {
         print "Transpose $prefix...\n";
     }
-    plink("--recode --transpose --file $prefix --out $prefix");
+    plink("--recode transpose --file $prefix --out $prefix");
     return;
 }
 
@@ -1088,16 +1049,14 @@ sub output {
  Description : tidy up files unless debugging
  External    : $opt_i = input prefix
              : $opt_o = output prefix
- Files       : $rped_file = recoded .ped file
-             : $rmap_file = recoded map file
-             : "$opt_i_recode12.nosex"
+ Files       : "$opt_i_recode12.nosex"
              : "$opt_i_recode12.log"
-             : "$opt_o.log"
+             : "$opt_o-pique-input.log"
 
 =cut
 
 sub tidy_up {
-    my @hit_list = <*.nosex *.log>;
+    my @hit_list = <$opt_i*.nosex $opt_i*.log $opt_o*.log>;
 
     if ( defined $opt_v ) {
         print "removing temp files...\n";


=====================================
doc/pique_manual.odt
=====================================
Binary files a/doc/pique_manual.odt and b/doc/pique_manual.odt differ


=====================================
test/Makefile
=====================================
@@ -1,15 +1,15 @@
-#@(#)Makefile  2017-10-05  A.J.Travis and A.Douglas
+#@(#)Makefile  2025-04-30  A.J.Travis and A.Douglas, last modified by Étienne Mollier
 
 #
 # PIQUE - Parallel Identification Of QTL's using EMMAX
 #
 
-WGET = wget --no-check-certificate 
-PLINK = p-link
+PLINK = plink1.9
 PIQUE-INPUT = pique-input
 PIQUE-RUN = pique-run
 PHENO_GROUP = rice_phenotype_group.txt
 PHENO = rice_phenotype.txt
+PLOT_FILES = plot_files.txt
 IPREFIX = sativas413
 OPREFIX = sativas_GWAS
 TPREFIX = sativas_GWAS_trans
@@ -20,22 +20,35 @@ OPT = -d -v
 TEST := $(dir $(realpath $(firstword $(MAKEFILE_LIST))))
 PATH := $(realpath $(TEST)/../bin):$(PATH)
 
-all:  input run
+help:
+	@echo "Targets:"
+	@echo "\tinput"
+	@echo "\trun"
+	@echo "\ttrans"
+	@echo "\tgroup"
+	@echo "\ttest_vcf"
+	@echo "\tld"
+	@echo "\tblocks"
+	@echo "\tplot"
+	@echo "\tall"
 
-debug:
-	which GWAS_manhattanplots
+all:  input run trans group test_vcf ld blocks plot
 
 input: $(IPREFIX).ped
 	$(PIQUE-INPUT) $(OPT) -i $(IPREFIX) -o $(OPREFIX) -p $(PHENO) -k -c -e 5
+	touch input
 
-run: $(OPREFIX)
+run: input
 	$(PIQUE-RUN) $(OPT) -i $(OPREFIX) -k IBS -c $(OPREFIX).covar -n $(THREADS)
+	touch run
 
 trans: $(IPREFIX).ped
+	rm -rf $(TPREFIX)
 	$(PIQUE-INPUT) $(OPT) -i $(IPREFIX) -o $(TPREFIX) -p $(PHENO) -k -c -e 5
 	$(PIQUE-RUN) $(OPT) -i $(TPREFIX) -k IBS -c $(TPREFIX).covar -n $(THREADS) -t BC
 
 group: $(IPREFIX).ped
+	rm -rf $(GPREFIX)
 	$(PIQUE-INPUT) $(OPT) -i $(IPREFIX) -o $(GPREFIX) -p $(PHENO_GROUP) -k -c -e 5
 	$(PIQUE-RUN) $(OPT) -i $(GPREFIX) -k IBS -c $(GPREFIX).covar -n $(THREADS)
 	for group in $(GROUPS); do \
@@ -43,40 +56,37 @@ group: $(IPREFIX).ped
 	    $(PIQUE-RUN) $(OPT) -i $$prefix -k IBS -n $(THREADS); \
 	done
 
-$(IPREFIX).ped:
-	$(WGET) http://ricediversity.org/data/sets/44kgwas/RiceDiversity.44K.MSU6.Genotypes_PLINK.zip
-	unzip RiceDiversity.44K.MSU6.Genotypes_PLINK.zip
+$(IPREFIX).ped $(IPREFIX).map:
+	tar xf RiceDiversity_44K_Genotypes_PLINK.tar.xz
 	mv -i ./RiceDiversity_44K_Genotypes_PLINK/sativas* .
 	rmdir RiceDiversity_44K_Genotypes_PLINK
-	rm RiceDiversity.44K.MSU6.Genotypes_PLINK.zip
 
-test: test.vcf
-	$(PIQUE-INPUT) $(OPT) -i $@ -f vcf -o $@_vcf -p $(PHENO) -k -c -e 5
-	$(PIQUE-RUN) -i $@_vcf -k IBS -c $@_vcf.covar -t $(THREADS)
+test_vcf: test_vcf.vcf
+	rm -rf $@_GWAS
+	$(PIQUE-INPUT) $(OPT) -i $@ -f vcf -o $@_GWAS -p $(PHENO) -k -c -e 5
+	$(PIQUE-RUN) -i $@_GWAS -k IBS -c $@_GWAS.covar -n $(THREADS)
 
-test.vcf: $(IPREFIX).ped $(IPREFIX).map
-	$(PLINK) --file $(IPREFIX) --recode-vcf --out $(basename $@)
+test_vcf.vcf: $(IPREFIX).ped $(IPREFIX).map
+	$(PLINK) --file $(IPREFIX) --recode vcf --out $(basename $@)
 
-ld:
+ld: input
 	$(PLINK) --file $(IPREFIX)_recode12 --r --out $(OPREFIX)_r
 	$(PLINK) --file $(IPREFIX)_recode12 --r2 --out $(OPREFIX)_r2
 
 blocks:
 	$(PLINK) --file $(IPREFIX)_recode12 --blocks no-pheno-req --out $(OPREFIX)_blocks
 
-diff:
-	-diff -x '.??*' -x Makefile -x '*.log' -rq ../ref .
-
-plot: files.txt
-	GWAS_manhattanplots -b 0.1 -q -y 12 -i files.txt
+plot: $(PLOT_FILES) run
+	GWAS_manhattanplots -b 0.1 -q -y 12 -i $<
 
 clean:
-	rm -f RiceDiversity.44K.MSU6.Genotypes_PLINK.zip
 	rm -rf RiceDiversity_44K_Genotypes_PLINK sativas413*
 	rm -rf __MACOSX
-	rm -rf $(OPREFIX)* $(GPREFIX)*
+	rm -rf $(OPREFIX)* $(TPREFIX)* $(GPREFIX)*
+	rm -rf test_vcf*
+	rm -f input run
+	rm -f *.bh *.png
 	rm -f .pversion *.log
 
 clobber: clean
-	rm -f RiceDiversity.44K.MSU6.Genotypes_PLINK.zip
 	rm -f $(IPREFIX)_recode12.*


=====================================
test/RiceDiversity_44K_Genotypes_PLINK.tar.xz
=====================================
Binary files /dev/null and b/test/RiceDiversity_44K_Genotypes_PLINK.tar.xz differ


=====================================
test/plot_files.txt
=====================================
@@ -0,0 +1,4 @@
+sativas_GWAS/F_grain_Mo/F_grain_Mo_sativas_GWAS.manh	Grain Mo
+sativas_GWAS/F_grain_Zn/F_grain_Zn_sativas_GWAS.manh	Grain Zn
+sativas_GWAS/A_FT/A_FT_sativas_GWAS.manh	Aberdeen Flowering Time
+sativas_GWAS/A_PH/A_PH_sativas_GWAS.manh	Aberdeen Plant Height



View it on GitLab: https://salsa.debian.org/med-team/pique/-/commit/93240e98b3dbc8f581b21049e87a25112a8ab62e

-- 
View it on GitLab: https://salsa.debian.org/med-team/pique/-/commit/93240e98b3dbc8f581b21049e87a25112a8ab62e
You're receiving this email because of your account on salsa.debian.org.


-------------- next part --------------
An HTML attachment was scrubbed...
URL: <http://alioth-lists.debian.net/pipermail/debian-med-commit/attachments/20250501/755d352e/attachment-0001.htm>


More information about the debian-med-commit mailing list