[med-svn] [gwama] 01/01: Add upstream examples

Dylan Aïssi bob.dybian-guest at moszumanska.debian.org
Tue Dec 16 07:15:14 UTC 2014


This is an automated email from the git hooks/post-receive script.

bob.dybian-guest pushed a commit to branch master
in repository gwama.

commit 4c8cd11637ae6753a362e73e678787a9a1a00ec9
Author: Dylan Aïssi <bob.dybian at gmail.com>
Date:   Tue Dec 16 07:50:40 2014 +0100

    Add upstream examples
---
 debian/TODO.Debian                                 |  5 --
 debian/copyright                                   |  4 +
 debian/examples                                    |  8 ++
 debian/rules                                       |  4 +
 debian/upstream.docs/Perl_scripts/PLINK2GWAMA.pl   | 81 +++++++++++++++++++
 .../Perl_scripts/SNPTEST2.5_2_GWAMA.pl             |  1 +
 debian/upstream.docs/Perl_scripts/SNPTEST2GWAMA.pl | 91 ++++++++++++++++++++++
 .../upstream.docs/Perl_scripts/SNPTEST2_2_GWAMA.pl | 91 ++++++++++++++++++++++
 debian/upstream.docs/R_scripts/MANH.R              | 68 ++++++++++++++++
 debian/upstream.docs/R_scripts/QQ.R                | 34 ++++++++
 debian/upstream.docs/log2.txt                      | 65 ++++++++++++++++
 debian/upstream.docs/samples/gwama.in              |  3 +
 debian/upstream.docs/samples/pop1.txt              | 12 +++
 debian/upstream.docs/samples/pop2.txt              | 13 ++++
 debian/upstream.docs/samples/pop3.txt              | 12 +++
 15 files changed, 487 insertions(+), 5 deletions(-)

diff --git a/debian/TODO.Debian b/debian/TODO.Debian
deleted file mode 100644
index fc049ab..0000000
--- a/debian/TODO.Debian
+++ /dev/null
@@ -1,5 +0,0 @@
-# Add addtional files from http://www.well.ox.ac.uk/gwama/download.shtml
-# -- Changelog - http://www.well.ox.ac.uk/gwama/log2.txt
-# -- Samples files - http://www.well.ox.ac.uk/gwama/samples.zip
-# -- R scritps - http://www.well.ox.ac.uk/gwama/*.R
-# -- Perl scritps - http://www.well.ox.ac.uk/gwama/*.pl
diff --git a/debian/copyright b/debian/copyright
index 7f89f43..93e68e8 100644
--- a/debian/copyright
+++ b/debian/copyright
@@ -24,6 +24,10 @@ Files: debian/*
 Copyright: 2014 Dylan Aïssi <bob.dybian at gmail.com>
 License: BSD-3-clause
 
+Files: debian/upstream.docs/*
+Copyright: 2009-2010 Reedik Mägi, Andrew P Morris, Joshua C Randall, Matthias Wuttke.
+License: BSD-3-clause
+
 License: BSD-3-clause
  Redistribution and use in source and binary forms, with or without
  modification, are permitted provided that the following conditions are
diff --git a/debian/examples b/debian/examples
new file mode 100644
index 0000000..049b5c6
--- /dev/null
+++ b/debian/examples
@@ -0,0 +1,8 @@
+# Upstream R scripts for Manhattan and QQ Plots
+debian/upstream.docs/R_scripts
+
+# Upstream Perl scripts for reformatting SNPTEST and PLINK output to GWAMA input format
+debian/upstream.docs/Perl_scripts
+
+# Upstream samples files
+debian/upstream.docs/samples
diff --git a/debian/rules b/debian/rules
index 3db83ea..420ea92 100755
--- a/debian/rules
+++ b/debian/rules
@@ -12,6 +12,10 @@ override_dh_auto_build:
 	# Don't override the upstream FLAG -O3
 	make CXXFLAGS="$(CXXFLAGS)"
 
+override_dh_installchangelogs:
+	# Install upstream log2.txt file as changelog
+	dh_installchangelogs debian/upstream.docs/log2.txt
+
 get-orig-source:
 	uscan --verbose --force-download --repack --compress xz
 
diff --git a/debian/upstream.docs/Perl_scripts/PLINK2GWAMA.pl b/debian/upstream.docs/Perl_scripts/PLINK2GWAMA.pl
new file mode 100644
index 0000000..63a2d77
--- /dev/null
+++ b/debian/upstream.docs/Perl_scripts/PLINK2GWAMA.pl
@@ -0,0 +1,81 @@
+# Script for creating GWAMA input file from PLINK association results file. 
+# The allele frequency file must also be used for generating GWAMA file.
+# Use the script: perl PLINK2GWAMA.pl <PLINK assoc file> <PLINK frq file> <output GWAMA file>
+# NB! If PLINK association file contains data of covariate effects or multiple models then please remove unnecessary rows prior using this script
+
+$inputassoc = $ARGV[0];
+$inputfrq = $ARGV[1];
+if($inputfrq !~ /frq$/){die "Please enter PLINK frq file. File extension must be frq"};
+$outputfile = $ARGV[2];
+open F1, "$inputassoc" or die "Cannot file PLINK assoc file. This must be first command line argument!\n";
+open F2, "$inputfrq" or die "Cannot file PLINK frq file. This must be second command line argument!\n";
+if ($outputfile eq ""){die "Please enter the outputfile name as third command line argument!\n";}
+open O, ">$outputfile" or die "Cannot open $outputfile for writing. Please check folder's access rights and disk quota!\n";
+while(<F2>)
+{
+	chomp;
+	@data = split(/\s+/);
+	if ($i>0)
+	{
+		$snp_ref{$data[2]}=$i;
+		$snp_ea[$i] = $data[3];
+		$snp_nea[$i] = $data[4];
+		$snp_eaf[$i] = $data[5];
+		$snp_n[$i] = $data[6]/2;	
+	}
+	$i++;
+}
+$i=0;
+
+while(<F1>)
+{
+	chomp;
+	@data = split(/\s+/);
+	if ($i==0) 		# header line 
+	{
+		$locSNP=$locBETA=$locSE=$locOR=$locCIL=$locCIU=-1;
+		for ($j=0;$j<scalar(@data);$j++)
+		{
+			if ($data[$j] eq "SNP"){$locSNP=$j;}
+			if ($data[$j] eq "BETA"){$locBETA=$j;}
+			if ($data[$j] eq "SE"){$locSE=$j;}
+			if ($data[$j] eq "OR"){$locOR=$j;}
+			if ($data[$j] eq "L95"){$locCIL=$j;}
+			if ($data[$j] eq "U95"){$locCIU=$j;}
+		}
+
+		if ($locOR>-1)
+		{
+ 			print "Using OR with CI output.\n";
+ 			print O "MARKER\tEA\tNEA\tOR\tOR_95L\tOR_95U\tN\tEAF\tSTRAND\n";
+		}
+		else
+		{
+ 			print "Using BETA with SE output.\n";
+ 			print O "MARKER\tEA\tNEA\tBETA\tSE\tN\tEAF\tSTRAND\n";
+		}
+
+
+	}
+ 	if ($i>0) #snp line
+ 	{
+  		$marker = $data[2];
+		$loc = $snp_ref{$marker};
+		if ($loc>0)
+		{
+			$ea = $snp_ea[$loc];
+			$nea = $snp_nea[$loc];
+			if ($locBETA>-1){$beta = $data[$locBETA];}
+			if ($locSE>-1){$se = $data[$locSE];}
+  			if ($locOR>-1){$or = $data[$locOR];}
+  			if ($locCIL>-1){$or_95l = $data[$locCIL];}
+  			if ($locCIU>-1){$or_95u = $data[$locCIU];}
+  			$n = $snp_n[$loc];
+  			$eaf = $snp_eaf[$loc];
+  			$strand = "+";
+  			if ($locOR>-1){print O "$marker\t$ea\t$nea\t$or\t$or_95l\t$or_95u\t$n\t$eaf\t$strand\n";}
+  			else {print O "$marker\t$ea\t$nea\t$beta\t$se\t$n\t$eaf\t$strand\n";}
+		}
+ }
+ $i++;
+}
diff --git a/debian/upstream.docs/Perl_scripts/SNPTEST2.5_2_GWAMA.pl b/debian/upstream.docs/Perl_scripts/SNPTEST2.5_2_GWAMA.pl
new file mode 100644
index 0000000..f612f53
--- /dev/null
+++ b/debian/upstream.docs/Perl_scripts/SNPTEST2.5_2_GWAMA.pl
@@ -0,0 +1 @@
+# Script for creating GWAMA input file from SNPTEST2 association results file. 
# Use the script: "perl SNPTEST2_2_GWAMA.pl <SNPTEST output file> <output GWAMA file> SE" for quantitative trait analysis
# Use the script: "perl SNPTEST2_2_GWAMA.pl <SNPTEST output file> <output GWAMA file> OR" for dichotomous analysis
# NB! Script uses BETA and SE values which are in last columns of file. If multiple analyse models were used then please edit this script
#
# Thanks to Dr. Matthias Wuttke for script update 

$inputfile = $ARGV[0];
$outputfile = $ARGV[1];
$scheme = uc($ARGV[2]);
$cMAF=$cMAC=$cN=$cPROPER=0;
for ($i=3; $i<scalar(@ARGV);$i++)
{
	@arg = split(/=/, $ARGV[$i]);
	if (uc($arg[0]) eq "N" && $arg[1]>0){print "N cut-off $arg[1]\n"; $cN=$arg[1];}
	if (uc($arg[0]) eq "MAC" && $arg[1]>0){print "MAC cut-off $arg[1]\n"; $cMAC=$arg[1];}
	if (uc($arg[0]) eq "MAF" && $arg[1]>0){print "MAF cut-off $arg[1]\n"; $cMAF=$arg[1];}
	if (uc($arg[0]) eq "PROPERINFO" && $arg[1]>0){print "PROPERINFO cut-off $arg[1]\n"; $cPROPER=$arg[1];}
}
if ($ARGV[0] eq "" || $ARGV[0] eq "-h" || $ARGV[0] eq "--help"){printhelp();exit;}
open F, "$inputfile" or die "Cannot file SNPTEST file. This must be first command line argument!\n";
if ($outputfile eq ""){die "Please enter the outputfile name as second command line argument!\n";}
open O, ">$outputfile" or die "Cannot open $outputfile for writing. Please check folder's access rights and disk quota!\n";
if ($scheme eq "OR")
{
	print "Using OR with CI output.\n";
	print O "MARKER\tEA\tNEA\tOR\tOR_95L\tOR_95U\tN\tEAF\tSTRAND\tIMPUTED\n";
}
else 
{
	print "Using BETA with SE output.\n";
	print O "MARKER\tEA\tNEA\tBETA\tSE\tN\tEAF\tSTRAND\tIMPUTED\n";
}
$i=0;
LINE: while(<F>)
{
	chomp;
	next LINE if /^#/;
	@data = split(/\s/);
	if ($i==0)	#header line
	{
		$locAA=$locAB=$locBB=0;
		for ($j=0;$j<scalar(@data); $j++)
		{
			if ($data[$j] eq "all_AA"){$locAA=$j;}
			if ($data[$j] eq "all_AB"){$locAB=$j;}
			if ($data[$j] eq "all_BB"){$locBB=$j;}
		}
	}
	else		#snp line
	{
		$marker = $data[1];
		$ea = $data[5];
		$nea = $data[4];
		$beta = $data[scalar(@data)-3];
		$se = $data[scalar(@data)-2];
		$proper = $data[scalar(@data)-4];
		$or = exp($beta);
		$or_95l = exp($beta - 1.96* $se);
		$or_95u = exp($beta + 1.96* $se);
		$n = $data[$locAA]+$data[$locAB]+$data[$locBB];
		if (($data[$locAA]+$data[$locAB]+$data[$locBB])>0){$eaf = ((2*$data[$locBB])+$data[$locAB])/(2*($data[$locAA]+$data[$locAB]+$data[$locBB]));}
		else {$eaf =0;}
		if ($eaf>0.5){$maf = 1-$eaf;}
		else {$maf=$eaf;}
		$strand = "+";
		if ($data[0] eq "---"){$imp=1;}else{$imp=0;}
	
		if ($cMAF > $maf || $cMAC>$maf*$n || $cN>$n || $cPROPER>$proper || $beta eq "NA" || $se eq "NA")
		{
		}
		else
		{
			if ($scheme eq "OR"){print O "$marker\t$ea\t$nea\t$or\t$or_95l\t$or_95u\t$n\t$eaf\t$strand\t$imp\n";}
			else {print O "$marker\t$ea\t$nea\t$beta\t$se\t$n\t$eaf\t$strand\t$imp\n";}
		}
	}
	$i++;
}


sub printhelp()
{
	print "Script for creating GWAMA input file from SNPTEST association results file.\n";
	print "Quantitative analysis:\n\tperl SNPTEST2_2_GWAMA.pl <SNPTEST output file> <output GWAMA file> SE\n";
	print "Case-control analysis:\n\tperl SNPTEST2_2_GWAMA.pl <SNPTEST output file> <output GWAMA file> OR\n";
	print "NB! Script uses BETA and SE values which are in last columns of file. If multiple analyse models were used then please edit this script.\n";
	print "NB! Script expects that all markers are from positive strand. If not, Strand column must be modified with correct strand information.\n";
	print "Data can be filtered according to minimum number of samples (N), minor allele frequency (MAF), and minimum number of allele count (MAC = MAF*N)\n";
	print "All cut-offs must be entered after mandatory 3 command line options shown above.\n";
	print "Example: N=100 MAF=0.01 MAC=10 PROPER=0.4, will remove markers with less than 100 individuals, MAF<1% and MAC<10 and properinfo<0.4\n";
	print "Don't leave any spaces into the equations.\n";
	print "Example command line:\n\tperl SNPTEST2_2_GWAMA.pl <SNPTEST output file> <output GWAMA file> SE MAF=0.01 MAC=10\n";
}
\ No newline at end of file
diff --git a/debian/upstream.docs/Perl_scripts/SNPTEST2GWAMA.pl b/debian/upstream.docs/Perl_scripts/SNPTEST2GWAMA.pl
new file mode 100644
index 0000000..c6d748b
--- /dev/null
+++ b/debian/upstream.docs/Perl_scripts/SNPTEST2GWAMA.pl
@@ -0,0 +1,91 @@
+# Script for creating GWAMA input file from SNPTEST association results file. 
+# Use the script: "perl SNPTEST2GWAMA.pl <SNPTEST output file> <output GWAMA file> SE" for quantitative trait analysis
+# Use the script: "perl SNPTESTGWAMA.pl <SNPTEST output file> <output GWAMA file> OR" for dichotomous analysis
+# NB! Script uses BETA and SE values which are in last columns of file. If multiple analyse models were used then please edit this script
+
+$inputfile = $ARGV[0];
+$outputfile = $ARGV[1];
+$scheme = uc($ARGV[2]);
+$cMAF=$cMAC=$cN=$cPROPER=0;
+for ($i=3; $i<scalar(@ARGV);$i++)
+{
+	@arg = split(/=/, $ARGV[$i]);
+	if (uc($arg[0]) eq "N" && $arg[1]>0){print "N cut-off $arg[1]\n"; $cN=$arg[1];}
+	if (uc($arg[0]) eq "MAC" && $arg[1]>0){print "MAC cut-off $arg[1]\n"; $cMAC=$arg[1];}
+	if (uc($arg[0]) eq "MAF" && $arg[1]>0){print "MAF cut-off $arg[1]\n"; $cMAF=$arg[1];}
+	if (uc($arg[0]) eq "PROPERINFO" && $arg[1]>0){print "PROPERINFO cut-off $arg[1]\n"; $cPROPER=$arg[1];}
+}
+if ($ARGV[0] eq "" || $ARGV[0] eq "-h" || $ARGV[0] eq "--help"){printhelp();exit;}
+open F, "$inputfile" or die "Cannot file SNPTEST file. This must be first command line argument!\n";
+if ($outputfile eq ""){die "Please enter the outputfile name as second command line argument!\n";}
+open O, ">$outputfile" or die "Cannot open $outputfile for writing. Please check folder's access rights and disk quota!\n";
+if ($scheme eq "OR")
+{
+	print "Using OR with CI output.\n";
+	print O "MARKER\tEA\tNEA\tOR\tOR_95L\tOR_95U\tN\tEAF\tSTRAND\tIMPUTED\n";
+}
+else 
+{
+	print "Using BETA with SE output.\n";
+	print O "MARKER\tEA\tNEA\tBETA\tSE\tN\tEAF\tSTRAND\tIMPUTED\n";
+}
+$i=0;
+while(<F>)
+{
+	chomp;
+	@data = split(/\s/);
+	if ($i==0)	#header line
+	{
+		$locAA=$locAB=$locBB=0;
+		for ($j=0;$j<scalar(@data); $j++)
+		{
+			if ($data[$j] eq "all_AA"){$locAA=$j;}
+			if ($data[$j] eq "all_AB"){$locAB=$j;}
+			if ($data[$j] eq "all_BB"){$locBB=$j;}
+		}
+	}
+	else		#snp line
+	{
+		$marker = $data[1];
+		$ea = $data[4];
+		$nea = $data[3];
+		$beta = $data[scalar(@data)-2];
+		$se = $data[scalar(@data)-1];
+		$proper = $data[scalar(@data)-3];
+		$or = exp($beta);
+		$or_95l = exp($beta - 1.96* $se);
+		$or_95u = exp($beta + 1.96* $se);
+		$n = $data[$locAA]+$data[$locAB]+$data[$locBB];
+		if (($data[$locAA]+$data[$locAB]+$data[$locBB])>0){$eaf = ((2*$data[$locBB])+$data[$locAB])/(2*($data[$locAA]+$data[$locAB]+$data[$locBB]));}
+		else {$eaf =0;}
+		if ($eaf>0.5){$maf = 1-$eaf;}
+		else {$maf=$eaf;}
+		$strand = "+";
+		if ($data[0] eq "---"){$imp=1;}else{$imp=0;}
+		
+		if ($cMAF > $maf || $cMAC>$maf*$n || $cN>$n || $cPROPER>$proper)
+		{
+		}
+		else
+		{
+			if ($scheme eq "OR"){print O "$marker\t$ea\t$nea\t$or\t$or_95l\t$or_95u\t$n\t$eaf\t$strand\t$imp\n";}
+			else {print O "$marker\t$ea\t$nea\t$beta\t$se\t$n\t$eaf\t$strand\t$imp\n";}
+		}
+	}
+	$i++;
+}
+
+
+sub printhelp()
+{
+	print "Script for creating GWAMA input file from SNPTEST association results file.\n";
+	print "Quantitative analysis:\n\tperl SNPTEST2GWAMA.pl <SNPTEST output file> <output GWAMA file> SE\n";
+	print "Case-control analysis:\n\tperl SNPTESTGWAMA.pl <SNPTEST output file> <output GWAMA file> OR\n";
+	print "NB! Script uses BETA and SE values which are in last columns of file. If multiple analyse models were used then please edit this script.\n";
+	print "NB! Script expects that all markers are from positive strand. If not, Strand column must be modified with correct strand information.\n";
+	print "Data can be filtered according to minimum number of samples (N), minor allele frequency (MAF), and minimum number of allele count (MAC = MAF*N)\n";
+	print "All cut-offs must be entered after mandatory 3 command line options shown above.\n";
+	print "Example: N=100 MAF=0.01 MAC=10 PROPER=0.4, will remove markers with less than 100 individuals, MAF<1% and MAC<10 and properinfo<0.4\n";
+	print "Don't leave any spaces into the equations.\n";
+	print "Example command line:\n\tperl SNPTEST2GWAMA.pl <SNPTEST output file> <output GWAMA file> SE MAF=0.01 MAC=10\n";
+}
diff --git a/debian/upstream.docs/Perl_scripts/SNPTEST2_2_GWAMA.pl b/debian/upstream.docs/Perl_scripts/SNPTEST2_2_GWAMA.pl
new file mode 100644
index 0000000..a3e1bfd
--- /dev/null
+++ b/debian/upstream.docs/Perl_scripts/SNPTEST2_2_GWAMA.pl
@@ -0,0 +1,91 @@
+# Script for creating GWAMA input file from SNPTEST2 association results file. 
+# Use the script: "perl SNPTEST2_2_GWAMA.pl <SNPTEST output file> <output GWAMA file> SE" for quantitative trait analysis
+# Use the script: "perl SNPTEST2_2_GWAMA.pl <SNPTEST output file> <output GWAMA file> OR" for dichotomous analysis
+# NB! Script uses BETA and SE values which are in last columns of file. If multiple analyse models were used then please edit this script
+
+$inputfile = $ARGV[0];
+$outputfile = $ARGV[1];
+$scheme = uc($ARGV[2]);
+$cMAF=$cMAC=$cN=$cPROPER=0;
+for ($i=3; $i<scalar(@ARGV);$i++)
+{
+	@arg = split(/=/, $ARGV[$i]);
+	if (uc($arg[0]) eq "N" && $arg[1]>0){print "N cut-off $arg[1]\n"; $cN=$arg[1];}
+	if (uc($arg[0]) eq "MAC" && $arg[1]>0){print "MAC cut-off $arg[1]\n"; $cMAC=$arg[1];}
+	if (uc($arg[0]) eq "MAF" && $arg[1]>0){print "MAF cut-off $arg[1]\n"; $cMAF=$arg[1];}
+	if (uc($arg[0]) eq "PROPERINFO" && $arg[1]>0){print "PROPERINFO cut-off $arg[1]\n"; $cPROPER=$arg[1];}
+}
+if ($ARGV[0] eq "" || $ARGV[0] eq "-h" || $ARGV[0] eq "--help"){printhelp();exit;}
+open F, "$inputfile" or die "Cannot file SNPTEST file. This must be first command line argument!\n";
+if ($outputfile eq ""){die "Please enter the outputfile name as second command line argument!\n";}
+open O, ">$outputfile" or die "Cannot open $outputfile for writing. Please check folder's access rights and disk quota!\n";
+if ($scheme eq "OR")
+{
+	print "Using OR with CI output.\n";
+	print O "MARKER\tEA\tNEA\tOR\tOR_95L\tOR_95U\tN\tEAF\tSTRAND\tIMPUTED\n";
+}
+else 
+{
+	print "Using BETA with SE output.\n";
+	print O "MARKER\tEA\tNEA\tBETA\tSE\tN\tEAF\tSTRAND\tIMPUTED\n";
+}
+$i=0;
+while(<F>)
+{
+	chomp;
+	@data = split(/\s/);
+	if ($i==0)	#header line
+	{
+		$locAA=$locAB=$locBB=0;
+		for ($j=0;$j<scalar(@data); $j++)
+		{
+			if ($data[$j] eq "all_AA"){$locAA=$j;}
+			if ($data[$j] eq "all_AB"){$locAB=$j;}
+			if ($data[$j] eq "all_BB"){$locBB=$j;}
+		}
+	}
+	else		#snp line
+	{
+		$marker = $data[1];
+		$ea = $data[5];
+		$nea = $data[4];
+		$beta = $data[scalar(@data)-2];
+		$se = $data[scalar(@data)-1];
+		$proper = $data[scalar(@data)-3];
+		$or = exp($beta);
+		$or_95l = exp($beta - 1.96* $se);
+		$or_95u = exp($beta + 1.96* $se);
+		$n = $data[$locAA]+$data[$locAB]+$data[$locBB];
+		if (($data[$locAA]+$data[$locAB]+$data[$locBB])>0){$eaf = ((2*$data[$locBB])+$data[$locAB])/(2*($data[$locAA]+$data[$locAB]+$data[$locBB]));}
+		else {$eaf =0;}
+		if ($eaf>0.5){$maf = 1-$eaf;}
+		else {$maf=$eaf;}
+		$strand = "+";
+		if ($data[0] eq "---"){$imp=1;}else{$imp=0;}
+		
+		if ($cMAF > $maf || $cMAC>$maf*$n || $cN>$n || $cPROPER>$proper)
+		{
+		}
+		else
+		{
+			if ($scheme eq "OR"){print O "$marker\t$ea\t$nea\t$or\t$or_95l\t$or_95u\t$n\t$eaf\t$strand\t$imp\n";}
+			else {print O "$marker\t$ea\t$nea\t$beta\t$se\t$n\t$eaf\t$strand\t$imp\n";}
+		}
+	}
+	$i++;
+}
+
+
+sub printhelp()
+{
+	print "Script for creating GWAMA input file from SNPTEST association results file.\n";
+	print "Quantitative analysis:\n\tperl SNPTEST2_2_GWAMA.pl <SNPTEST output file> <output GWAMA file> SE\n";
+	print "Case-control analysis:\n\tperl SNPTEST2_2_GWAMA.pl <SNPTEST output file> <output GWAMA file> OR\n";
+	print "NB! Script uses BETA and SE values which are in last columns of file. If multiple analyse models were used then please edit this script.\n";
+	print "NB! Script expects that all markers are from positive strand. If not, Strand column must be modified with correct strand information.\n";
+	print "Data can be filtered according to minimum number of samples (N), minor allele frequency (MAF), and minimum number of allele count (MAC = MAF*N)\n";
+	print "All cut-offs must be entered after mandatory 3 command line options shown above.\n";
+	print "Example: N=100 MAF=0.01 MAC=10 PROPER=0.4, will remove markers with less than 100 individuals, MAF<1% and MAC<10 and properinfo<0.4\n";
+	print "Don't leave any spaces into the equations.\n";
+	print "Example command line:\n\tperl SNPTEST2_2_GWAMA.pl <SNPTEST output file> <output GWAMA file> SE MAF=0.01 MAC=10\n";
+}
diff --git a/debian/upstream.docs/R_scripts/MANH.R b/debian/upstream.docs/R_scripts/MANH.R
new file mode 100644
index 0000000..e613bf8
--- /dev/null
+++ b/debian/upstream.docs/R_scripts/MANH.R
@@ -0,0 +1,68 @@
+#Manhattan plot script for GWAMA
+#Written by Joshua C Randall & Reedik Magi
+for (e in commandArgs(trailingOnly=TRUE))
+{
+  ta = strsplit(e,"=",fixed=TRUE)
+  if(!is.null(ta[[1]][2]))
+  {
+    assign(ta[[1]][1],ta[[1]][2])
+  } else {
+    assign(ta[[1]][1],TRUE)
+  }
+}
+if(!exists("input"))
+{
+  input <- paste("gwama.out")
+}
+if(!exists("out")) {
+  out <- paste(input,".manh.png",sep="")
+}
+data<-read.table(input,stringsAsFactors=FALSE,header=TRUE,sep = "\t",na.strings = "-9")
+png(out,height=600,width=800)
+
+obspval <- (data$p.value)
+chr <- (data$chromosome)
+pos <- (data$position)
+obsmax <- trunc(max(-log10(obspval)))+1
+
+sort.ind <- order(chr, pos) 
+chr <- chr[sort.ind]
+pos <- pos[sort.ind]
+obspval <- obspval[sort.ind]
+
+x <- 1:22
+x2<- 1:22
+
+for (i in 1:22)
+{
+	 curchr=which(chr==i)
+	 x[i] <- trunc((max(pos[curchr]))/100) +100000
+	 x2[i] <- trunc((min(pos[curchr]))/100) -100000
+}
+
+x[1]=x[1]-x2[1]
+x2[1]=0-x2[1]
+
+for (i in 2:24)
+{
+	x[i] <- x[i-1]-x2[i]+x[i]
+	x2[i] <- x[i-1]-x2[i]
+
+}
+locX = trunc(pos/100) + x2[chr]
+locY = -log10(obspval)
+col1=rgb(0,0,108,maxColorValue=255)
+col2=rgb(100,149,237,maxColorValue=255)
+col3=rgb(0,205,102,maxColorValue=255)
+col4 <- ifelse (chr%%2==0, col1, col2)
+curcol <- ifelse (obspval<5e-8, col3, col4) 
+plot(locX,locY,pch=20,col=curcol,axes=F,ylab="-log10 p-value",xlab="",bty="n",ylim=c(0,obsmax),cex=0.8)
+axis(2,las=1)
+for (i in 1:22)
+{
+	labpos = (x[i] + x2[i]) / 2
+	mtext(i,1,at=labpos,cex=0.8,line=0)
+}
+mtext("Chromosome",1,at=x[22]/2,cex=1,line=1)
+dev.off()
+
diff --git a/debian/upstream.docs/R_scripts/QQ.R b/debian/upstream.docs/R_scripts/QQ.R
new file mode 100644
index 0000000..30c164d
--- /dev/null
+++ b/debian/upstream.docs/R_scripts/QQ.R
@@ -0,0 +1,34 @@
+#Quantile-quantile plot script for GWAMA
+#Written by Joshua C Randall & Reedik Magi
+for (e in commandArgs(trailingOnly=TRUE)) 
+{
+  ta = strsplit(e,"=",fixed=TRUE)
+  if(!is.null(ta[[1]][2])) 
+  {
+    assign(ta[[1]][1],ta[[1]][2])
+  } else {
+    assign(ta[[1]][1],TRUE)
+  }
+}
+
+if(!exists("input")) 
+{
+  input <- paste("gwama.out")
+}
+
+if(!exists("out")) {
+  out <- paste(input,".qq.png",sep="")
+}
+data<-read.table(input,stringsAsFactors=FALSE,header=TRUE,sep = "\t")
+png(out,height=600,width=600)
+obspval <- sort(data$p.value)
+logobspval <- -(log10(obspval))
+exppval <- c(1:length(obspval))
+logexppval <- -(log10( (exppval-0.5)/length(exppval)))
+obsmax <- trunc(max(logobspval))+1
+expmax <- trunc(max(logexppval))+1
+plot(c(0,expmax), c(0,expmax), col="gray", lwd=1, type="l", xlab="Expected -log10 P-value", ylab="Observed -log10 P-value", xlim=c(0,expmax), ylim=c(0,obsmax), las=1, xaxs="i", yaxs="i", bty="l")
+points(logexppval, logobspval, pch=23, cex=.4, bg="black")
+dev.off()
+
+
diff --git a/debian/upstream.docs/log2.txt b/debian/upstream.docs/log2.txt
new file mode 100644
index 0000000..843bc96
--- /dev/null
+++ b/debian/upstream.docs/log2.txt
@@ -0,0 +1,65 @@
+v2.1
+*Added option "--indel_alleles" for using longer allele names than single character. Please note that enabling that option will remove strand flipping.
+*Added "Analysis finished." line to log file and standard output.
+
+v2.0.7
+*Fixed a typo in printing effect directions for cohorts where p-value was 1
+
+v2.0.6
+*Fixed typos in manual and in command line options (thanks Josh!)
+
+v2.0.5
+*Repaired effect allele frequency discrepancy check. will print now warning and increase strand problem quantity if eaf difference larger than 30%
+
+v2.0.4
+*Repaired an issue with IMPUTED column (segmentation fault if value not 1 or 0)
+
+v2.0.4
+*Removed a bug created by previous fix
+
+v2.0.3
+*Repaired issue with sex-specific results in case of missing eaf
+
+v2.0.2
+*Repaired issue with alternative column names - all names are converted to uppercase to remove case sensitivity
+
+v2.0.1
+*Added genomic control output file
+*Repaired header line in case of map option is used
+*If using OR and confidence intervals, the std.err calculation has changed a bit - in previous version both upper and lower confidence interval were used (and average was measured), in GWAMA v.2 only the lower one is used. This might cause a slight change in results due to rounding.
+
+v2
+*Added new command line options
+*Added sex-specific analysis framework
+*Fixed bug concerning numeric alleles and negaitive strands
+*Made program more memory efficient
+*Summary file info on screen while running analysis
+
+v1.4
+*Added random effect correction
+*Log and error file names now have changable output file root ("--output" option)
+*PERL scripts for reformatting SNPTEST and PLINK association results to GWAMA format have been added to webpage
+*Added check for column count for each row of input data
+*Repaired directions column - it used to be mirrored
+
+v1.3.1.1
+*Added automatic strand flipping if marker is not A/T nor C/G
+*Repaired threshold use in effect directions
+
+v1.3.1
+*Changed file input format: added N column
+*Changed input format: added chromosome and position columns, N column
+*Added --map and -qt command line options
+*Added R scritps for plots
+*Added HapMap marker maps
+
+v1.3
+
+*Changed file input format: removed imputation accuracy, chromosome and position columns. 
+Added alternative input file format with odds ratio information.
+*Changed output format: removed chromosome and position columns. Added different 
+format options
+*Replaced single configuration file with command line options
+*Improved error trapping
+*Replaced chi-distribution median 0.456 with 0.4549364. Value received from R using 
+qchisq(0.5, df= 1)
diff --git a/debian/upstream.docs/samples/gwama.in b/debian/upstream.docs/samples/gwama.in
new file mode 100644
index 0000000..acaee76
--- /dev/null
+++ b/debian/upstream.docs/samples/gwama.in
@@ -0,0 +1,3 @@
+pop1.txt
+pop2.txt
+pop3.txt
diff --git a/debian/upstream.docs/samples/pop1.txt b/debian/upstream.docs/samples/pop1.txt
new file mode 100644
index 0000000..7b9d137
--- /dev/null
+++ b/debian/upstream.docs/samples/pop1.txt
@@ -0,0 +1,12 @@
+SNP	STRAND	IMPUTED	EFFECT_ALLELE	NON_EFFECT_ALLELE	BETA	SE
+rs12565286	+	0	G	C	-0.0223417	0.403115
+rs2977670	+	0	C	G	-0.0137211	0.406126
+rs12138618	+	0	G	A	-0.0763812	0.3712373
+rs3094315	+	0	G	A	0.0258089	0.101264
+rs3131968	+	0	G	A	-0.3734	0.1012374
+rs2519016	+	1	T	C	0.26655	0.347234
+rs12562034	+	1	G	A	0.00923484	0.279129
+rs2905035	+	1	G	A	-0.0435863	NA
+rs12124819	+	1	A	G	0.0262434	0.121293
+rs2980319	+	1	T	A	-0.0423165	0.1241691
+rs4040617	+	1	A	G	0.0230683	0.1012426
diff --git a/debian/upstream.docs/samples/pop2.txt b/debian/upstream.docs/samples/pop2.txt
new file mode 100644
index 0000000..1ff8605
--- /dev/null
+++ b/debian/upstream.docs/samples/pop2.txt
@@ -0,0 +1,13 @@
+SNP	STRAND	IMP	EFFECT_ALLELE	NON_EFFECT_ALLELE	BETA	SE
+rs12565286	+	0	G	C	0.01123417	0.4075
+rs11804171	+	0	T	A	0.011236	0.406971
+rs2977670	+	0	C	G	0.0112432	0.406756
+rs12138618	+	0	G	A	-0.5312453	0.374873
+rs3094315	+	0	G	A	0.02158089	0.102164
+rs3131968	+	1	G	A	-0.273734	0.10202
+rs2519016	+	1	T	C	0.2331655	0.377374
+rs12562034	+	1	G	A	0.009284	0.279969
+rs2905035	+	0	G	A	-0.0469863	0.101732
+rs12124819	+	0	A	G	0.0264434	0.12393
+rs2980319	+	0	T	A	-0.0469165	0.102291
+rs4040617	+	0	A	G	0.0470683	0.101526
diff --git a/debian/upstream.docs/samples/pop3.txt b/debian/upstream.docs/samples/pop3.txt
new file mode 100644
index 0000000..733d60f
--- /dev/null
+++ b/debian/upstream.docs/samples/pop3.txt
@@ -0,0 +1,12 @@
+SNP	STRAND	CHR	POS	IMP	EFFECT_ALLELE	NON_EFFECT_ALLELE	BETA	SE
+rs12565286	+	1	761153	0	G	C	-0.0134423	0.4075
+rs11804171	+	1	763682	0	T	A	-0.015524	0.406971
+rs2977670	+	1	763754	0	C	G	-0.0142455	0.406756
+rs12138618	+	1	790098	0	T	A	0.32344	0.374873
+rs3094315	+	1	792429	0	G	A	0.0258089	0.107864
+rs3131968	+	1	794055	0	G	A	-0.0475233	0.102074
+rs2519016	+	1	805811	0	T	C	0.2334523	0.377374
+rs12562034	+	1	808311	0	G	A	0.00923552	0.279969
+rs2905035	+	1	815522	0	G	A	-0.0412412	0.101732
+rs2980319	+	1	816985	0	T	A	-0.02134	0.101691
+rs4040617	+	1	819185	0	A	G	-0.04423	0.101526

-- 
Alioth's /usr/local/bin/git-commit-notice on /srv/git.debian.org/git/debian-med/gwama.git



More information about the debian-med-commit mailing list