[med-svn] [Git][med-team/pique][upstream] New upstream version 1.1
Étienne Mollier (@emollier)
gitlab at salsa.debian.org
Thu May 1 08:34:13 BST 2025
Étienne Mollier pushed to branch upstream at Debian Med / pique
Commits:
93240e98 by Étienne Mollier at 2025-05-01T08:41:44+02:00
New upstream version 1.1
- - - - -
8 changed files:
- Makefile
- README.md
- bin/GWAS_manhattanplots
- bin/pique-input
- doc/pique_manual.odt
- test/Makefile
- + test/RiceDiversity_44K_Genotypes_PLINK.tar.xz
- + test/plot_files.txt
Changes:
=====================================
Makefile
=====================================
@@ -1,4 +1,4 @@
-#@(#)Makefile 2017-01-13 A.J.Travis and A.Douglas
+#@(#)Makefile 2021-05-22 A.J.Travis and A.Douglas
#
# pique: parallel identification of QTL's using EMMAX
@@ -9,9 +9,9 @@
# installation directory
DIR = /usr/local/pique
-EMMAX = /usr/local/bin/emmax
+EMMAX = /usr/bin/emmax
PLINK = /usr/bin/p-link
-EIGENSTRAT = /usr/local/EIGENSOFT/bin/eigenstrat
+EIGENSTRAT = /usr/lib/eigensoft/smartpca
R = /usr/bin/R
FORECAST = /usr/lib/R/site-library/forecast
PARALLEL = /usr/share/perl5/Parallel
@@ -37,9 +37,8 @@ install: $(TARGETS)
#pique-run: bin/pique-run
# install "EMMAX"
-$(EMMAX): emmax-beta-07Mar2010.tar.gz
- tar xvf $<
- install -C -o root -g root emmax-beta-07Mar2010/emmax* /usr/local/bin/
+$(EMMAX):
+ apt install emmax
emmax-beta-07Mar2010.tar.gz:
wget http://genetics.cs.ucla.edu/emmax/$@
@@ -52,13 +51,15 @@ plink_linux_x86_64.zip:
wget https://www.cog-genomics.org/static/bin/plink161202/$@
# install "EIGENSOFT"
-$(EIGENSTRAT): EIG5.0.2.tar.gz
- tar xvf $<
- install -d /usr/local/EIGENSOFT/bin
- install -C -o root -g root EIG5.0.2/bin/* /usr/local/EIGENSOFT/bin
-
-EIG5.0.2.tar.gz:
- wget http://cdn1.sph.harvard.edu/wp-content/uploads/sites/181/2014/05/$@
+#$(EIGENSTRAT): EIG5.0.2.tar.gz
+# tar xvf $<
+# install -d /usr/local/EIGENSOFT/bin
+# install -C -o root -g root EIG5.0.2/bin/* /usr/local/EIGENSOFT/bin
+#
+#EIG5.0.2.tar.gz:
+# wget http://cdn1.sph.harvard.edu/wp-content/uploads/sites/181/2014/05/$@
+$(EIGENSTRAT):
+ apt install eigensoft
# install R
$(R):
=====================================
README.md
=====================================
@@ -1,2 +1,4 @@
# PIQUE
Parallel Identification of QTL's Using EMMAX
+
+[Tony Travis](https://github.com/tony-travis) (Minke Informatics Limited) & [Alex Douglas](https://github.com/alexd106) (University of Aberdeen)
=====================================
bin/GWAS_manhattanplots
=====================================
@@ -1,5 +1,5 @@
#!/usr/bin/env Rscript
-#' @(#)GWAS_manhattanplots 2017-10-05 A.Douglas and A.J.Travis
+#' @(#)GWAS_manhattanplots 2021-01-25 A.Douglas and A.J.Travis
suppressMessages(library(getopt))
suppressMessages(library(optparse))
@@ -9,19 +9,20 @@ suppressMessages(library(optparse))
# command-line arguments
opt1 <- make_option(c("-b", "--bh"), action = "store", default = 0)
opt2 <- make_option(c("-c", "--chromosome"), action = "store", default = "1:23")
-opt3 <- make_option(c("-i", "--input"), action = "store", default = NULL)
-opt4 <- make_option(c("-o", "--output"), action = "store", default = "manhattan")
-opt5 <- make_option(c("-q", "--qqplot"), action = "store_true", default = FALSE)
-opt6 <- make_option(c("-v", "--verbose"), action = "store_true", default = FALSE)
-opt7 <- make_option(c("-y", "--ymax"), action = "store", default = "max")
+opt3 <- make_option(c("-g", "--guide"), action = "store", default = 0)
+opt4 <- make_option(c("-i", "--input"), action = "store", default = NULL)
+opt5 <- make_option(c("-o", "--output"), action = "store", default = "manhattan")
+opt6 <- make_option(c("-q", "--qqplot"), action = "store_true", default = FALSE)
+opt7 <- make_option(c("-v", "--verbose"), action = "store_true", default = FALSE)
+opt8 <- make_option(c("-y", "--ymax"), action = "store", default = "max")
#' debug in RStudio
-#' args <- c('-b', '0.1', '-q', '-y', '10', '-i', 'files.txt')
+#' args <- c('-b', '0.1', '4', '-q', '-y', '10', '-i', 'files.txt')
#' opt <- parse_args(OptionParser(option_list = list(opt1, opt2, opt3, opt4, opt5, opt6,
-#' opt7)), args = args, positional_arguments = TRUE)
+#' opt7, opt8)), args = args, positional_arguments = TRUE)
opt <- parse_args(OptionParser(option_list = list(opt1, opt2, opt3, opt4, opt5, opt6,
- opt7)), positional_arguments = TRUE)
+ opt7, opt8)), positional_arguments = TRUE)
# limit number of chromosomes
chr.name <- parse(text = opt$options$chromosome)
@@ -38,6 +39,9 @@ if (chr.class == "numeric") {
# PNG output file
output.file <- paste(sep = "", opt$options$output, ".png")
+# Benjamini-Hochberg output file
+bh.file <- paste(sep = "", opt$options$output, ".bh")
+
# check usage
if (is.null(opt$options$input)) {
nplots <- length(opt$args)
@@ -140,10 +144,13 @@ for (i in 1:nplots) {
mt <- mt.rawp2adjp(snp$P, proc = "BH")
ind <- mt$index
snp <- cbind(snp, BH = mt$adjp[order(ind), 2])
+
+ # write BH adjusted P values to file
+ write.table(snp, file = bh.file, sep = "\t", row.names = FALSE, col.names = TRUE)
# Manhattan plot
manhattan(snp, markers = NULL, blocks = NULL, limitchromosomes = chr.limit, ymax = opt$options$ymax,
- suggestiveline = 4, genomewideline = FALSE, highlight = opt$options$bh, stack = stack)
+ suggestiveline = opt$options$guide, genomewideline = FALSE, highlight = opt$options$bh, stack = stack)
# sub-plot title (left-justified)
if (!is.null(opt$options$input)) {
=====================================
bin/pique-input
=====================================
@@ -1,5 +1,5 @@
#!/usr/bin/perl -w
-#@(#)pique-input.pl 2017-07-24 A.J.Travis and A.Douglas
+#@(#)pique-input.pl 2024-04-18 A.J.Travis and A.Douglas
#
# PIQUE - Parallel Identification Of QTL's using EMMAX
@@ -55,7 +55,7 @@ my $opt_k = "IBS";
my $opt_c;
# number of Eigenvectors to include
-my $opt_e = 5;
+my $opt_e;
# MAF: Minor Allele Frequency
my $opt_m;
@@ -209,12 +209,12 @@ my @gid_list;
}
}
- # remove temporary files
- tidy_up();
-
# write end-time to log file
system("date >> $logfile");
+ # remove temporary files
+ tidy_up();
+
exit 0;
}
@@ -249,7 +249,7 @@ sub options {
) or usage();
# input and output prefixes are mandatory
- if ( !defined $opt_i && defined $opt_o ) {
+ if ( !( defined $opt_i && defined $opt_o ) ) {
usage();
}
@@ -267,8 +267,8 @@ sub options {
sub programs {
- # "plink" is called "p-link" on Ubuntu/Debian systems
- $PLINK = must_have("p-link");
+ # warning: "plink" is putty-tools:/usr/bin/plink on Ubuntu/Debian systems
+ $PLINK = must_have("plink1.9");
# $PLINK_OPT = "--chr-set 95 --allow-extra-chr --missing-genotype 0";
$PLINK_OPT = "--missing-genotype 0";
@@ -277,7 +277,7 @@ sub programs {
if ( defined $opt_c ) {
# look for "EIGENSOFT" smartpca
- $EIGENSTRAT = must_have("smartpca");
+ $EIGENSTRAT = must_have("/usr/bin/smartpca");
}
# look for "emmax-kin"
@@ -339,6 +339,7 @@ sub files {
$rmap_file = "$rc_prefix.map";
$tped_file = "$opt_i.tped";
$tfam_file = "$opt_i.tfam";
+
if ( defined $opt_p ) {
$pheno_file = $opt_p;
}
@@ -385,7 +386,7 @@ sub recode {
# convert .vcf to .ped + .map
if ( $type eq "vcf" ) {
must_exist("$input.vcf");
- plink("--recode --vcf $input.vcf --out $input");
+ plink("--vcf $input.vcf --recode ped --out $input");
}
# convert .tped + .tfam to .ped + .map
@@ -398,7 +399,7 @@ sub recode {
# recode .ped + .map
must_exist("$input.ped");
must_exist("$input.map");
- plink("--recode12 --file $opt_i --out $output");
+ plink("--recode 12 --file $opt_i --out $output");
# check output files
must_exist("$output.ped");
@@ -766,7 +767,9 @@ sub create_covar {
$cov_fh = must_write($cov_file);
# run Eigenstrat "smartpca" program
- smartpca( $ped, $map, $evec, $opt_o );
+ run(
+"$EIGENSTRAT -i $ped -a $map -b $ped -o $opt_o -p $opt_o -e $opt_o.eval -k $evec -l $opt_o-smartpca.log"
+ );
# open "smartpca" output file
$evec_fh = must_read("$opt_o.evec");
@@ -807,48 +810,6 @@ sub create_covar {
#/////////////////////////////////////////////////////////////////////////////
-=head2 smartpca
-
- Parameters : ped = PLINK format .ped file
- map = PLINK format .map file
- evec = number of eigenvectors
- out = output prefix
- Returns : null
- Description : run smartpca
-
-=cut
-
-sub smartpca {
- my ( $ped, $map, $evec, $out ) = @_;
-
- my $par_file; # "smartpca" parameter file name
- my $par_fh; # "smartpca" parameter file handle
-
- $par_file = "$out.par";
- $par_fh = must_write($par_file);
-
- # create parameter file
- print $par_fh "genotypename: $ped\n";
- print $par_fh "snpname: $map\n";
- print $par_fh "indivname: $ped\n";
- print $par_fh "evecoutname: $out.evec\n";
- print $par_fh "evaloutname: $out.eval\n";
- print $par_fh "altnormstyle: NO\n";
- print $par_fh "numoutevec: $evec\n";
- must_close($par_fh);
-
- # run smartpca
- run("smartpca -p $par_file > $out-smartpca.log");
-
- # tidy up
- if ( !$opt_d ) {
- must_unlink($par_file);
- }
- return;
-}
-
-#/////////////////////////////////////////////////////////////////////////////
-
=head2 exclude
Parameters : $file = name of input file
@@ -958,7 +919,7 @@ sub transpose {
if ($opt_v) {
print "Transpose $prefix...\n";
}
- plink("--recode --transpose --file $prefix --out $prefix");
+ plink("--recode transpose --file $prefix --out $prefix");
return;
}
@@ -1088,16 +1049,14 @@ sub output {
Description : tidy up files unless debugging
External : $opt_i = input prefix
: $opt_o = output prefix
- Files : $rped_file = recoded .ped file
- : $rmap_file = recoded map file
- : "$opt_i_recode12.nosex"
+ Files : "$opt_i_recode12.nosex"
: "$opt_i_recode12.log"
- : "$opt_o.log"
+ : "$opt_o-pique-input.log"
=cut
sub tidy_up {
- my @hit_list = <*.nosex *.log>;
+ my @hit_list = <$opt_i*.nosex $opt_i*.log $opt_o*.log>;
if ( defined $opt_v ) {
print "removing temp files...\n";
=====================================
doc/pique_manual.odt
=====================================
Binary files a/doc/pique_manual.odt and b/doc/pique_manual.odt differ
=====================================
test/Makefile
=====================================
@@ -1,15 +1,15 @@
-#@(#)Makefile 2017-10-05 A.J.Travis and A.Douglas
+#@(#)Makefile 2025-04-30 A.J.Travis and A.Douglas, last modified by Étienne Mollier
#
# PIQUE - Parallel Identification Of QTL's using EMMAX
#
-WGET = wget --no-check-certificate
-PLINK = p-link
+PLINK = plink1.9
PIQUE-INPUT = pique-input
PIQUE-RUN = pique-run
PHENO_GROUP = rice_phenotype_group.txt
PHENO = rice_phenotype.txt
+PLOT_FILES = plot_files.txt
IPREFIX = sativas413
OPREFIX = sativas_GWAS
TPREFIX = sativas_GWAS_trans
@@ -20,22 +20,35 @@ OPT = -d -v
TEST := $(dir $(realpath $(firstword $(MAKEFILE_LIST))))
PATH := $(realpath $(TEST)/../bin):$(PATH)
-all: input run
+help:
+ @echo "Targets:"
+ @echo "\tinput"
+ @echo "\trun"
+ @echo "\ttrans"
+ @echo "\tgroup"
+ @echo "\ttest_vcf"
+ @echo "\tld"
+ @echo "\tblocks"
+ @echo "\tplot"
+ @echo "\tall"
-debug:
- which GWAS_manhattanplots
+all: input run trans group test_vcf ld blocks plot
input: $(IPREFIX).ped
$(PIQUE-INPUT) $(OPT) -i $(IPREFIX) -o $(OPREFIX) -p $(PHENO) -k -c -e 5
+ touch input
-run: $(OPREFIX)
+run: input
$(PIQUE-RUN) $(OPT) -i $(OPREFIX) -k IBS -c $(OPREFIX).covar -n $(THREADS)
+ touch run
trans: $(IPREFIX).ped
+ rm -rf $(TPREFIX)
$(PIQUE-INPUT) $(OPT) -i $(IPREFIX) -o $(TPREFIX) -p $(PHENO) -k -c -e 5
$(PIQUE-RUN) $(OPT) -i $(TPREFIX) -k IBS -c $(TPREFIX).covar -n $(THREADS) -t BC
group: $(IPREFIX).ped
+ rm -rf $(GPREFIX)
$(PIQUE-INPUT) $(OPT) -i $(IPREFIX) -o $(GPREFIX) -p $(PHENO_GROUP) -k -c -e 5
$(PIQUE-RUN) $(OPT) -i $(GPREFIX) -k IBS -c $(GPREFIX).covar -n $(THREADS)
for group in $(GROUPS); do \
@@ -43,40 +56,37 @@ group: $(IPREFIX).ped
$(PIQUE-RUN) $(OPT) -i $$prefix -k IBS -n $(THREADS); \
done
-$(IPREFIX).ped:
- $(WGET) http://ricediversity.org/data/sets/44kgwas/RiceDiversity.44K.MSU6.Genotypes_PLINK.zip
- unzip RiceDiversity.44K.MSU6.Genotypes_PLINK.zip
+$(IPREFIX).ped $(IPREFIX).map:
+ tar xf RiceDiversity_44K_Genotypes_PLINK.tar.xz
mv -i ./RiceDiversity_44K_Genotypes_PLINK/sativas* .
rmdir RiceDiversity_44K_Genotypes_PLINK
- rm RiceDiversity.44K.MSU6.Genotypes_PLINK.zip
-test: test.vcf
- $(PIQUE-INPUT) $(OPT) -i $@ -f vcf -o $@_vcf -p $(PHENO) -k -c -e 5
- $(PIQUE-RUN) -i $@_vcf -k IBS -c $@_vcf.covar -t $(THREADS)
+test_vcf: test_vcf.vcf
+ rm -rf $@_GWAS
+ $(PIQUE-INPUT) $(OPT) -i $@ -f vcf -o $@_GWAS -p $(PHENO) -k -c -e 5
+ $(PIQUE-RUN) -i $@_GWAS -k IBS -c $@_GWAS.covar -n $(THREADS)
-test.vcf: $(IPREFIX).ped $(IPREFIX).map
- $(PLINK) --file $(IPREFIX) --recode-vcf --out $(basename $@)
+test_vcf.vcf: $(IPREFIX).ped $(IPREFIX).map
+ $(PLINK) --file $(IPREFIX) --recode vcf --out $(basename $@)
-ld:
+ld: input
$(PLINK) --file $(IPREFIX)_recode12 --r --out $(OPREFIX)_r
$(PLINK) --file $(IPREFIX)_recode12 --r2 --out $(OPREFIX)_r2
blocks:
$(PLINK) --file $(IPREFIX)_recode12 --blocks no-pheno-req --out $(OPREFIX)_blocks
-diff:
- -diff -x '.??*' -x Makefile -x '*.log' -rq ../ref .
-
-plot: files.txt
- GWAS_manhattanplots -b 0.1 -q -y 12 -i files.txt
+plot: $(PLOT_FILES) run
+ GWAS_manhattanplots -b 0.1 -q -y 12 -i $<
clean:
- rm -f RiceDiversity.44K.MSU6.Genotypes_PLINK.zip
rm -rf RiceDiversity_44K_Genotypes_PLINK sativas413*
rm -rf __MACOSX
- rm -rf $(OPREFIX)* $(GPREFIX)*
+ rm -rf $(OPREFIX)* $(TPREFIX)* $(GPREFIX)*
+ rm -rf test_vcf*
+ rm -f input run
+ rm -f *.bh *.png
rm -f .pversion *.log
clobber: clean
- rm -f RiceDiversity.44K.MSU6.Genotypes_PLINK.zip
rm -f $(IPREFIX)_recode12.*
=====================================
test/RiceDiversity_44K_Genotypes_PLINK.tar.xz
=====================================
Binary files /dev/null and b/test/RiceDiversity_44K_Genotypes_PLINK.tar.xz differ
=====================================
test/plot_files.txt
=====================================
@@ -0,0 +1,4 @@
+sativas_GWAS/F_grain_Mo/F_grain_Mo_sativas_GWAS.manh Grain Mo
+sativas_GWAS/F_grain_Zn/F_grain_Zn_sativas_GWAS.manh Grain Zn
+sativas_GWAS/A_FT/A_FT_sativas_GWAS.manh Aberdeen Flowering Time
+sativas_GWAS/A_PH/A_PH_sativas_GWAS.manh Aberdeen Plant Height
View it on GitLab: https://salsa.debian.org/med-team/pique/-/commit/93240e98b3dbc8f581b21049e87a25112a8ab62e
--
View it on GitLab: https://salsa.debian.org/med-team/pique/-/commit/93240e98b3dbc8f581b21049e87a25112a8ab62e
You're receiving this email because of your account on salsa.debian.org.
-------------- next part --------------
An HTML attachment was scrubbed...
URL: <http://alioth-lists.debian.net/pipermail/debian-med-commit/attachments/20250501/755d352e/attachment-0001.htm>
More information about the debian-med-commit
mailing list