[med-svn] r1721 - in trunk/packages: . plink plink/branches plink/branches/upstream plink/branches/upstream/current
smoe-guest at alioth.debian.org
smoe-guest at alioth.debian.org
Wed Apr 9 09:42:38 UTC 2008
Author: smoe-guest
Date: 2008-04-09 09:42:37 +0000 (Wed, 09 Apr 2008)
New Revision: 1721
Added:
trunk/packages/plink/
trunk/packages/plink/branches/
trunk/packages/plink/branches/upstream/
trunk/packages/plink/branches/upstream/current/
trunk/packages/plink/branches/upstream/current/Makefile
trunk/packages/plink/branches/upstream/current/clumpld.cpp
trunk/packages/plink/branches/upstream/current/filters.cpp
trunk/packages/plink/branches/upstream/current/fisher.cpp
trunk/packages/plink/branches/upstream/current/nlist.cpp
trunk/packages/plink/branches/upstream/current/parse.cpp
trunk/packages/plink/branches/upstream/current/perm.cpp
trunk/packages/plink/branches/upstream/current/prephap.cpp
trunk/packages/plink/branches/upstream/current/proxy.cpp
Log:
[svn-inject] Installing original source of plink
Added: trunk/packages/plink/branches/upstream/current/Makefile
===================================================================
--- trunk/packages/plink/branches/upstream/current/Makefile (rev 0)
+++ trunk/packages/plink/branches/upstream/current/Makefile 2008-04-09 09:42:37 UTC (rev 1721)
@@ -0,0 +1,117 @@
+# -----------------------------------------------------------------# Makefile for PLINK
+#
+# Supported platforms
+# Unix / Linux LINUX
+# Windows WIN
+# Mac MAC
+# Solaris SOLARIS
+#
+# Compilation options
+# R plugins WITH_R_PLUGINS
+# Web-based version check WITH_WEBCHECK
+# Ensure 32-bit binary FORCE_32BIT
+#
+# ---------------------------------------------------------------------
+
+# Set this variable to either UNIX, MAC or WIN
+SYS = UNIX
+
+# Leave blank after "=" to disable; put "= 1" to enable
+WITH_R_PLUGINS = 1
+WITH_WEBCHECK = 1
+FORCE_32BIT =
+
+# Put C++ compiler here; Windows has it's own specific version
+CXX_UNIX = g++
+CXX_WIN = c:\bin\mingw\bin\mingw32-g++.exe
+
+# Any other compiler flags here ( -Wall, -g, etc)
+CXXFLAGS =
+
+# --------------------------------------------------------------------
+# Do not edit below this line
+# --------------------------------------------------------------------
+
+CXXFLAGS += -O3 -I.
+OUTPUT = plink
+
+# Some system specific flags
+
+ifeq ($(SYS),WIN)
+ CXXFLAGS += -DWIN -static
+ CXX = $(CXX_WIN)
+endif
+
+ifeq ($(SYS),UNIX)
+ CXXFLAGS += -DUNIX -static
+ CXX = $(CXX_UNIX)
+endif
+
+ifeq ($(SYS),MAC)
+ CXXFLAGS += -DUNIX -Dfopen64=fopen
+ CXX = $(CXX_UNIX)
+endif
+
+ifeq ($(SYS),SOLARIS)
+ CXX = $(CXX_UNIX)
+endif
+
+ifdef FORCE_32BIT
+ CXXFLAGS += -m32
+endif
+
+# Flags for web-based version check
+
+ifdef WITH_WEBCHECK
+ifeq ($(SYS),WIN)
+ LIB += -lwsock32
+endif
+else
+ CXXFLAGS += -DSKIP
+endif
+
+
+
+SRC = plink.cpp options.cpp input.cpp binput.cpp tinput.cpp genome.cpp \
+helper.cpp filters.cpp locus.cpp multi.cpp crandom.cpp cluster.cpp \
+mds.cpp output.cpp informative.cpp assoc.cpp epi.cpp prephap.cpp \
+phase.cpp trio.cpp tdt.cpp sharing.cpp genepi.cpp sets.cpp perm.cpp \
+mh.cpp genedrop.cpp gxe.cpp merge.cpp hotel.cpp multiple.cpp \
+haploCC.cpp haploTDT.cpp poo.cpp webcheck.cpp qfam.cpp linear.cpp \
+bmerge.cpp parse.cpp mishap.cpp legacy.cpp homozyg.cpp segment.cpp \
+model.cpp logistic.cpp glm.cpp dcdflib.cpp rarer.cpp dfam.cpp \
+fisher.cpp linput.cpp sockets.cpp lookup.cpp proxy.cpp pdriver.cpp \
+haploQTL.cpp haplohelper.cpp haplowindow.cpp genogroup.cpp \
+nonfounderphasing.cpp clumpld.cpp genoerr.cpp em.cpp impute.cpp \
+metaem.cpp profile.cpp nlist.cpp whap.cpp simul.cpp gvar.cpp cnv.cpp
+
+HDR = plink.h options.h helper.h crandom.h sets.h phase.h perm.h \
+model.h linear.h logistic.h dcdflib.h ipmpar.h cdflib.h fisher.h \
+sockets.h haplowindow.h genogroup.h clumpld.h nlist.h whap.h gvar.h
+
+ifdef WITH_R_PLUGINS
+CXXFLAGS += -DWITH_R_PLUGINS
+HDR += sisocks.h Rsrv.h Rconnection.h config.h
+SRC += r.cpp Rconnection.cpp
+LIB += -ldl -lcrypt
+endif
+
+OBJ = $(SRC:.cpp=.o)
+
+all : $(OUTPUT)
+
+$(OUTPUT) :
+ $(CXX) $(CXXFLAGS) -o $(OUTPUT) $(OBJ) $(LIB)
+
+$(OBJ) : $(HDR)
+
+.cpp.o :
+ $(CXX) $(CXXFLAGS) -c $*.cpp
+.SUFFIXES : .cpp .c .o $(SUFFIXES)
+
+$(OUTPUT) : $(OBJ)
+
+FORCE:
+
+clean:
+ rm -f *.o *~
Added: trunk/packages/plink/branches/upstream/current/clumpld.cpp
===================================================================
--- trunk/packages/plink/branches/upstream/current/clumpld.cpp (rev 0)
+++ trunk/packages/plink/branches/upstream/current/clumpld.cpp 2008-04-09 09:42:37 UTC (rev 1721)
@@ -0,0 +1,810 @@
+
+
+//////////////////////////////////////////////////////////////////
+// //
+// PLINK (c) 2005-2007 Shaun Purcell //
+// //
+// This file is distributed under the GNU General Public //
+// License, Version 2. Please see the file COPYING for more //
+// details //
+// //
+//////////////////////////////////////////////////////////////////
+
+
+#include <iostream>
+#include <iomanip>
+#include <fstream>
+#include <sstream>
+
+#include "clumpld.h"
+#include "phase.h"
+#include "options.h"
+#include "plink.h"
+
+
+//////////////////////////////////////////////////
+// Set user-defined parameters within constructor
+
+clump_LD::clump_LD(Plink * pp,
+ HaploPhase * hp_,
+ double sig,
+ double dist,
+ double secondp,
+ float r2c)
+{
+ P = pp;
+ hp = hp_;
+ pval_cutoff = sig;
+ ld_distance = dist;
+ second_pval_cutoff = secondp;
+ r2_cutoff = r2c;
+
+}
+
+
+/////////////
+// accessors
+
+void clump_LD::set_pval( double sig ){
+ pval_cutoff = sig;
+}
+
+void clump_LD::set_second_pval( double secondp ){
+ second_pval_cutoff = secondp;
+}
+
+void clump_LD::set_ld( double dist ){
+ ld_distance = dist;
+}
+
+void clump_LD::set_r2( double r2c ){
+ r2_cutoff = r2c;
+}
+
+
+//////////////////////////////////////////////////
+// read association file and pull out significant
+// results sort by pvalue
+
+vector<ResultTrio> clump_LD::read_assoc_file(string fileList)
+{
+
+ vector<ResultTrio> sp;
+
+ // We may be reading a single file, or more than one file
+ fileList = searchAndReplace(fileList,","," ");
+
+ // Tokenize
+ filename.clear();
+ string buf;
+ stringstream ss(fileList);
+ while (ss >> buf)
+ filename.push_back(buf);
+
+ // Read each file
+
+ for (int f=0; f<filename.size(); f++)
+ {
+
+ checkFileExists(filename[f]);
+ ifstream RESIN( filename[f].c_str(), ios::in );
+
+ int snp_column = -1;
+ int pval_column = -1;
+ vector<int> annot_field;
+
+ // Get header row
+
+ char cline[par::MAX_LINE_LENGTH];
+ RESIN.getline(cline,par::MAX_LINE_LENGTH,'\n');
+
+ string sline = cline;
+ if (sline=="")
+ error("Problem reading [ " + par::clumpld_results + " ]\n");
+
+ vector<string> tok_annot;
+
+ if ( par::clumpld_annot )
+ {
+ string afields = searchAndReplace(par::clumpld_annot_fields,","," ");
+ string buf;
+ stringstream ss(afields);
+ while (ss >> buf)
+ tok_annot.push_back(buf);
+ }
+
+ string buf;
+ stringstream ss(sline);
+ vector<string> tokens;
+ while (ss >> buf)
+ tokens.push_back(buf);
+
+ for (int i=0; i<tokens.size(); i++)
+ {
+ if ( tokens[i] == "SNP" )
+ snp_column = i;
+
+ if ( tokens[i] == par::clumpld_column )
+ pval_column = i;
+
+ if ( par::clumpld_annot )
+ for ( int f=0; f<tok_annot.size(); f++)
+ if ( tokens[i] == tok_annot[f] )
+ annot_field.push_back(i);
+ }
+
+
+ //////////////////////////////////////
+ // Did we find the necessary columns?
+
+ if ( snp_column < 0 || pval_column < 0 )
+ {
+ error("Could not find [" + par::clumpld_column
+ + "] as given in --clump-field\n");
+ }
+
+ P->printLOG("Reading results for clumping from [ "
+ + filename[f] + " ]\n");
+ P->printLOG("Extracting fields SNP and "
+ + par::clumpld_column + "\n");
+
+ while (!RESIN.eof())
+ {
+
+ char cline[par::MAX_LINE_LENGTH];
+ RESIN.getline(cline,par::MAX_LINE_LENGTH,'\n');
+
+ string sline = cline;
+ if (sline=="")
+ continue;
+
+ string buf;
+ stringstream ss(sline);
+ vector<string> tokens;
+ while (ss >> buf)
+ tokens.push_back(buf);
+
+ if ( tokens.size() <= snp_column ||
+ tokens.size() <= pval_column )
+ continue;
+
+ ResultTrio pt;
+
+ // Keep track of which file this is from
+ pt.f = f+1;
+
+ if ( ! from_string<double>( pt.p, tokens[pval_column] , std::dec))
+ continue;
+
+ pt.s = tokens[snp_column];
+
+ // Create clumped vector, based just on SNP name
+
+ if( pt.p <= pval_cutoff )
+ clumped.insert(make_pair(pt.s,false));
+
+ ClumpPair cp;
+ cp.snp = pt.s;
+ cp.f = f+1;
+
+ ClumpResults cr;
+ cr.p = pt.p;
+ cr.annot = "";
+ if ( par::clumpld_annot )
+ {
+ for (int f=0; f<annot_field.size(); f++)
+ {
+ cr.annot += tokens[annot_field[f]];
+ if ( f < annot_field.size() - 1 )
+ cr.annot += ", ";
+ }
+ }
+
+ assoc_results.insert( make_pair( cp, cr ));
+ sp.push_back(pt);
+
+ }
+
+ RESIN.close();
+
+ // Read in next results file
+ }
+
+ sort(sp.begin(), sp.end());
+ return sp;
+}
+
+
+///////////////////////////////
+// Perform clumping operation
+
+void clump_LD::clump()
+{
+
+ P->printLOG("\nWriting clumped results file to [ " +
+ par::output_file_name + ".clumped ]\n");
+
+ P->printLOG("Parameters for --clump:\n");
+ P->printLOG(" p-value threshold for index SNPs = "
+ + dbl2str(pval_cutoff) + "\n");
+
+ P->printLOG(" Physical (kb) threshold for clumping = "
+ + dbl2str(ld_distance/1000.0) + "\n");
+
+ P->printLOG(" LD (r-squared) threshold for clumping = "
+ + dbl2str(r2_cutoff) + "\n");
+
+ P->printLOG(" p-value threshold for clumped SNPs = "
+ + dbl2str(second_pval_cutoff) + "\n");
+
+
+ if ( par::clumpld_annot )
+ P->printLOG("Including annotation fields: " + par::clumpld_annot_fields +"\n");
+
+ vector<ResultTrio> sp = read_assoc_file( par::clumpld_results );
+
+ if ( par::clumpld_index1 )
+ P->printLOG("Indexing only on [ " + filename[0] + " ]\n");
+ else
+ P->printLOG("Indexing on all files\n");
+
+ if ( par::clumpld_only_show_replications )
+ P->printLOG("Only showing cross-file clumps\n");
+
+ int zero, one, two, three, four;
+ map<string,int> mlocus;
+ map<int2,double> grouped_snps;
+
+ ofstream CLMP;
+ CLMP.open( (par::output_file_name + ".clumped").c_str() , ios::out);
+ CLMP.precision(3);
+ CLMP.setf(ios::scientific);
+
+ string vmessage;
+
+ /////////////////
+ // Output header
+
+ if ( ! par::clumpld_verbose )
+ CLMP << setw(4) << "CHR" << " "
+ << setw(4) << "F" << " "
+ << setw(par::pp_maxsnp) << "SNP" << " "
+ << setw(10) << "BP" << " "
+ << setw(8) << "P" << " "
+ << setw(8) << "TOTAL" << " "
+ << setw(6) << "NSIG" << " "
+ << setw(6) << "S05" << " "
+ << setw(6) << "S01" << " "
+ << setw(6) << "S001" << " "
+ << setw(6) << "S0001" << " "
+ << setw(6) << "SP2" << "\n";
+
+
+ //////////////////
+ // Build SNP map
+
+ for (int l=0; l<P->nl_all; l++)
+ mlocus.insert(make_pair(P->locus[l]->name, l));
+
+ if (sp.size()==0)
+ {
+ P->printLOG("No significant results given --clump parameters\n");
+ return;
+ }
+
+
+
+ ////////////////////////////////////////////////
+ // Iterate through all association results by
+ // decreasing p-value until cutoff is reached
+
+ int clumpCount = 0;
+
+ for (int i = 0; i < sp.size(); i++)
+ {
+
+ zero=one=two=three=four=0;
+
+ ////////////////////////////////////
+ // End if p-value cutoff is reached
+
+ if( sp[i].p > pval_cutoff )
+ break;
+
+
+ //////////////////////////////////
+ // Skip already clumped this SNP
+
+ if( clumped[sp[i].s] )
+ continue;
+
+
+ /////////////////////////////////////////////
+ // Are we only indexing based on first file?
+
+ if ( par::clumpld_index1 && sp[i].f > 1 )
+ continue;
+
+
+ ///////////////////////////////////////////
+ // Make sure associated SNP is in SNP map
+
+ map<string,int>::iterator ilocus;
+ ilocus = mlocus.find(sp[i].s);
+ int l = -1;
+ if (ilocus != mlocus.end())
+ {
+ l = ilocus->second;
+ }
+ else
+ {
+
+ if ( !par::clumpld_verbose )
+ CLMP << setw(4) << "NA" << " "
+ << setw(4) << "NA" << " "
+ << setw(par::pp_maxsnp) << sp[i].s << " "
+ << setw(10) << "NA" << " "
+ << setw(10) << sp[i].p << " "
+ << setw(8) << "NA" << " "
+ << setw(6) << "NA" << " "
+ << setw(6) << "NA" << " "
+ << setw(6) << "NA" << " "
+ << setw(6) << "NA" << " "
+ << setw(6) << "NA" << " "
+ << setw(6) << "NA" << "\n";
+ else
+ vmessage += sp[i].s + " not found in dataset\n";
+
+ continue;
+ }
+
+
+ ///////////////////////////////
+ // Check all SNPs in LD range
+
+ // Set at reference SNP, l, and move out
+ // left and right (l1,l2)
+
+ int l1 = l;
+ int l2 = l;
+
+ // If multiple files, allow for self comparison also
+ if ( filename.size()>1 ) --l1;
+
+ set<string> willClump;
+
+ map<int,string> inPhaseAllele;
+
+ while(1)
+ {
+
+ bool failed1 = false, failed2 = false;
+
+ // Expand outwards to physical limits/SNP sets
+
+ // Moving right
+
+ if( l1 < P->locus.size()-1 )
+ {
+ // Advance a position
+ l1++;
+
+ double r2a = -1;
+
+ // Compute r-squared if this SNP is close
+ // enough to the index SNP, physically
+
+ if( P->locus[l1]->chr == P->locus[l]->chr &&
+ P->locus[l1]->bp - P->locus[l]->bp < ld_distance )
+ r2a = hp->rsq( l, l1 );
+ else
+ failed1 = true;
+
+
+ //////////////////////////////////
+ // Skip already clumped this SNP
+
+ if( par::clumpld_indep && clumped[ P->locus[l1]->name ] )
+ continue;
+
+ // If in LD with association result
+
+ if( r2a > r2_cutoff )
+ {
+
+ // Record which alleles are correlated
+
+ inPhaseAllele.insert(make_pair(l1, allelePairs(l,l1)));
+
+
+ // Record that this SNP has been clumped
+
+ willClump.insert(P->locus[l1]->name);
+
+ // Now look at the assocations (in the multiple files)
+ // with this SNP
+
+ for (int f=1; f<=filename.size(); f++)
+ {
+ ClumpPair result;
+ result.snp = P->locus[l1]->name;
+ result.f = f;
+
+ // Do not clump with self
+ if ( l == l1 && f == sp[i].f )
+ continue;
+
+ // Result not found
+ if ( assoc_results.find(result) == assoc_results.end() )
+ continue;
+
+ double pval = assoc_results[result].p;
+
+ if( pval < second_pval_cutoff )
+ {
+ int2 t(l1,f);
+ grouped_snps.insert(make_pair(t,r2a));
+ }
+
+ if( pval < .0001 )
+ four++;
+ else
+ if( pval < .001 )
+ three++;
+ else
+ if( pval < .01 )
+ two++;
+ else
+ if( pval < .05 )
+ one++;
+ else
+ zero++;
+ }
+ }
+ }
+ else
+ failed1 = true;
+
+
+
+ //////////////
+ // Move left
+
+ if( l2 > 0 )
+ {
+ l2--;
+
+ double r2b = -1;
+
+
+ if( P->locus[l2]->chr == P->locus[l]->chr &&
+ P->locus[l]->bp - P->locus[l2]->bp < ld_distance )
+ r2b = hp->rsq( l, l2 );
+ else
+ failed2 = true;
+
+
+ //////////////////////////////////
+ // Skip already clumped this SNP
+
+ if( par::clumpld_indep && clumped[ P->locus[l2]->name ] )
+ continue;
+
+
+ /////////////////////////////////////
+ // Does this SNP meet r-sq threshold?
+
+ if( r2b > r2_cutoff )
+ {
+
+ // Find the allele in phase with rare allele for l
+
+ inPhaseAllele.insert(make_pair(l2, allelePairs(l,l2)));
+
+
+ // Record that this SNP is clumped
+
+ willClump.insert(P->locus[l2]->name);
+
+ // Now look at associations
+
+ for (int f=1; f<=filename.size(); f++)
+ {
+ ClumpPair result;
+ result.snp = P->locus[l2]->name;
+ result.f = f;
+
+ // Result not found
+ if ( assoc_results.find(result) == assoc_results.end() )
+ continue;
+
+ double pval = assoc_results[result].p;
+
+ if( pval < second_pval_cutoff )
+ {
+ int2 t(l2,f);
+ grouped_snps.insert(make_pair(t,r2b));
+ }
+
+ if( pval < .0001 )
+ four++;
+ else
+ if( pval < .001 )
+ three++;
+ else
+ if( pval < .01 )
+ two++;
+ else
+ if( pval < .05 )
+ one++;
+ else
+ zero++;
+ }
+ }
+
+ }
+ else
+ failed2 = true;
+
+
+ // No point in looking further?
+
+ if( failed1 && failed2 )
+ break;
+
+ }
+
+
+ //////////////////////////////////////////////////////////
+ // Report results
+
+ // Are we only interested in cross-file clumpings?
+
+ if ( par::clumpld_only_show_replications )
+ {
+ bool seen_replication = false;
+ map<int2,double>::iterator gi = grouped_snps.begin();
+ int cnt=0;
+ int lastf;
+ while( gi != grouped_snps.end() )
+ {
+ if ( cnt > 0 )
+ {
+ if ( gi->first.p2 != lastf )
+ seen_replication = true;
+ }
+ lastf = gi->first.p2;
+ ++cnt;
+ ++gi;
+ }
+
+ // If no cross-file results, then do not report this
+ // clump -- clear all flags for it
+
+ if ( ! seen_replication )
+ {
+ grouped_snps.clear();
+ continue;
+ }
+ }
+
+
+ // Indicate that this SNP has now been clumped
+ set<string>::iterator si = willClump.begin();
+ while ( si != willClump.end() )
+ {
+ clumped[ *si ] = true;
+ ++si;
+ }
+
+
+
+ int total = zero+one+two+three+four;
+
+ if ( par::clumpld_verbose )
+ {
+ // Repeat header
+ CLMP << "\n"
+ << setw(4) << "CHR" << " "
+ << setw(4) << "F" << " "
+ << setw(par::pp_maxsnp) << "SNP" << " "
+ << setw(10) << "BP" << " "
+ << setw(10) << "P" << " "
+ << setw(8) << "TOTAL" << " "
+ << setw(6) << "NSIG" << " "
+ << setw(6) << "S05" << " "
+ << setw(6) << "S01" << " "
+ << setw(6) << "S001" << " "
+ << setw(6) << "S0001" << "\n";
+ }
+
+ ClumpPair cp;
+ cp.snp = P->locus[l]->name;
+ cp.f = sp[i].f;
+
+ CLMP << setw(4) << P->locus[l]->chr << " "
+ << setw(4) << cp.f << " "
+ << setw(par::pp_maxsnp) << P->locus[l]->name << " "
+ << setw(10) << P->locus[l]->bp << " "
+ << setw(10) << assoc_results[cp].p << " "
+ << setw(8) << total << " "
+ << setw(6) << zero << " "
+ << setw(6) << one << " "
+ << setw(6) << two << " "
+ << setw(6) << three << " "
+ << setw(6) << four << " ";
+
+ if ( par::clumpld_verbose )
+ {
+ // Now list one per line, sorted by distance
+ CLMP << "\n";
+ if ( grouped_snps.size() > 0 )
+ {
+
+ CLMP << "\n"
+ << setw(4) << " " << " "
+ << setw(4) << " " << " "
+ << setw(par::pp_maxsnp) << " " << " "
+ << setw(10) << "KB" << " "
+ << setw(8) << "RSQ" << " "
+ << setw(8) << "ALLELES" << " "
+ << setw(4) << "F" << " "
+ << setw(12) << "P" << " ";
+ if ( par::clumpld_annot )
+ CLMP << setw(12) << "ANNOT" << "\n";
+ else
+ CLMP << "\n";
+
+ CLMP << setw(4) << " (INDEX) "
+ << setw(par::pp_maxsnp) << P->locus[l]->name << " ";
+
+ CLMP << setw(10) << (double)(P->locus[l]->bp - P->locus[l]->bp)/1000.0 << " ";
+
+ CLMP << setw(8) << "1.000" << " "
+ << setw(8) << P->locus[l]->allele1 << " "
+ << setw(4) << sp[i].f << " "
+ << setw(12) << assoc_results[cp].p << " ";
+ if ( par::clumpld_annot )
+ CLMP << setw(12) << assoc_results[cp].annot << "\n";
+ else
+ CLMP << "\n";
+
+ CLMP << "\n";
+
+
+ //////////////////////////////////////////////////
+ // Track if SNP info already listed in this group
+
+ set<int> infoDisplayed;
+
+
+ map<int2,double>::iterator gi = grouped_snps.begin();
+
+ while( gi != grouped_snps.end() )
+ {
+
+ int l0 = gi->first.p1;
+ int f = gi->first.p2;
+
+ ClumpPair cp;
+ cp.snp = P->locus[l0]->name;
+ cp.f = f;
+
+
+ if ( assoc_results.find(cp) != assoc_results.end() )
+ {
+ if ( infoDisplayed.find(l0) == infoDisplayed.end() )
+ {
+ CLMP << setw(4) << " " << " "
+ << setw(4) << " " << " "
+ << setw(par::pp_maxsnp) << P->locus[l0]->name << " "
+ << setw(10) << (double)(P->locus[l0]->bp - P->locus[l]->bp)/1000.0 << " ";
+
+ CLMP.setf(ios::fixed);
+ CLMP << setw(8) << gi->second << " ";
+ CLMP.setf(ios::scientific);
+
+ CLMP << setw(8) << inPhaseAllele.find(l0)->second << " "
+ << setw(4) << f << " "
+ << setw(12) << assoc_results[cp].p << " ";
+ if ( par::clumpld_annot )
+ CLMP << setw(12) << assoc_results[cp].annot << "\n";
+ else
+ CLMP << "\n";
+
+ infoDisplayed.insert(l0);
+ }
+ else
+ {
+ CLMP << setw(4) << " " << " "
+ << setw(4) << " " << " "
+ << setw(par::pp_maxsnp) << " " << " "
+ << setw(10) << " " << " "
+ << setw(8) << " " << " "
+ << setw(8) << " " << " "
+ << setw(4) << f << " "
+ << setw(12) << assoc_results[cp].p << " ";
+ if ( par::clumpld_annot )
+ CLMP << setw(12) << assoc_results[cp].annot << "\n";
+ else
+ CLMP << "\n";
+
+ }
+ }
+
+ gi++;
+ }
+ }
+ }
+ else
+ {
+
+ // Just list of SNP names
+ if( grouped_snps.size() == 0 )
+ CLMP << "NONE";
+
+ map<int2,double>::iterator gi = grouped_snps.begin();
+ int j = 0;
+ while( gi != grouped_snps.end() )
+ {
+ CLMP << P->locus[ gi->first.p1 ]->name << "("
+ << gi->first.p2 << ")";
+ if( j < grouped_snps.size()-1)
+ CLMP << ",";
+ j++;
+ gi++;
+ }
+ }
+
+
+ if ( par::clumpld_verbose )
+ CLMP << "\n"
+ << "------------------------------------------------------------------\n";
+
+ CLMP << "\n";
+
+ if ( ! par::silent)
+ cout << "Formed " << ++clumpCount
+ << " clumps of the top "
+ << i << " SNPs, so far \r";
+
+ grouped_snps.clear();
+
+
+ // Next SNP
+ }
+
+
+ if ( ! par::silent)
+ cout << "\n";
+
+ CLMP << "\n" << vmessage << "\n";
+
+ CLMP.close();
+}
+
+
+
+string clump_LD::allelePairs(int l1, int l2)
+{
+
+ // The first SNP is always the index SNP
+ // i.e. as we call rsq(l,l1) and rsq(l,l2) above
+
+ // Identify common/common haplotype
+
+ int ch = 0;
+ for (int h=0; h<hp->nh; h++)
+ if ( hp->haplotypeName(h) == P->locus[l1]->allele2 + P->locus[l2]->allele2 )
+ ch = h;
+
+ // Is D positive or negative?
+ string s;
+
+ if ( hp->f[ch] > (1 - P->locus[l1]->freq)*(1 - P->locus[l2]->freq) )
+ s = P->locus[l1]->allele1 + P->locus[l2]->allele1 + "/" + P->locus[l1]->allele2 + P->locus[l2]->allele2;
+ else
+ s = P->locus[l1]->allele1 + P->locus[l2]->allele2 + "/" + P->locus[l1]->allele2 + P->locus[l2]->allele1;
+
+ return s;
+}
Added: trunk/packages/plink/branches/upstream/current/filters.cpp
===================================================================
--- trunk/packages/plink/branches/upstream/current/filters.cpp (rev 0)
+++ trunk/packages/plink/branches/upstream/current/filters.cpp 2008-04-09 09:42:37 UTC (rev 1721)
@@ -0,0 +1,1177 @@
+
+
+//////////////////////////////////////////////////////////////////
+// //
+// PLINK (c) 2005-2007 Shaun Purcell //
+// //
+// This file is distributed under the GNU General Public //
+// License, Version 2. Please see the file COPYING for more //
+// details //
+// //
+//////////////////////////////////////////////////////////////////
+
+
+#include <iostream>
+#include <iomanip>
+#include <fstream>
+#include <sstream>
+#include <vector>
+#include <map>
+#include <cmath>
+#include <iterator>
+#include "plink.h"
+#include "options.h"
+#include "helper.h"
+
+#define MISSING(i,l) ( SNP[l]->one[i] && ( ! SNP[l]->two[i] ) )
+
+void Plink::filterSNPs()
+{
+
+
+ //////////////////////////////////////////////////////
+ // This functions applies the following filters and
+ // functions:
+
+ // Counts of number of founders, nonfounders
+ // Per-individual genotyping rate
+ // Read in, or calculate, allele frequencies (and save these)
+ // Optionally write allele frequencies, then close
+ // Exclude SNPs with too many missing genotypes
+ // Identify/correct heterozygote haploid
+ // Identify SNPs with no founder genotypes
+ // Calculate/report/filter on HWE tests
+ // Calculate/report genotyping rate per SNP/per individual
+ // Filter on MAF
+ // Remove filtered-out SNPs
+
+
+ bool original_SNP_major = par::SNP_major;
+
+ if ( ! par::SNP_major )
+ Ind2SNP();
+
+
+ // Which SNPs to delete
+ vector<bool> del(locus.size(),false);
+
+ // Which individuals to delete
+ vector<bool> indel(sample.size(),false);
+
+
+ //////////////////////////////////////////
+ // Display number of founders/nonfounders
+
+ cnt_f=0;
+ vector<Individual*>::iterator person = sample.begin();
+ while ( person != sample.end() )
+ {
+ if ( (*person)->founder ) cnt_f++;
+ person++;
+ }
+ printLOG(int2str(cnt_f)+" founders and "+int2str(n-cnt_f)+
+ " non-founders found\n");
+
+ if (cnt_f<n) par::has_nonfounders = true;
+
+
+ ////////////////////////////////////////////
+ // If we have obligatory missing genotypes:
+ // ensure they really are missng
+
+ if ( par::oblig_missing )
+ {
+ set<int2>::iterator p = oblig_missing.begin();
+ while ( p != oblig_missing.end() )
+ {
+ int l = p->p1;
+ int k = p->p2;
+
+ for (int i=0; i<sample.size(); i++)
+ {
+ Individual * person = sample[i];
+ if ( person->sol == k )
+ {
+ SNP[l]->one[i] = true;
+ SNP[l]->two[i] = false;
+ }
+ }
+ ++p;
+ }
+ }
+
+
+ /////////////////////////////////////////////////
+ // Remove individuals with too many missing calls
+
+ double total_genotyping = 0;
+
+ if ( par::MAX_IND_MISSING < 1 )
+ {
+
+ int n_removed = 0;
+ int n_orig = n;
+
+ // Consider each individual
+ if ( ! par::oblig_missing )
+ {
+ for (int i=0;i<sample.size();i++)
+ {
+
+ // Sum missingness over all SNPs
+ int m=0; // Missing SNPs
+ int nsnps=0; // All non-obligatory missing SNPs
+
+ nsnps = locus.size();
+ for (int l=0; l<nsnps;l++)
+ if ( MISSING(i,l) ) m++;
+
+ // Too much missingness?
+ if ( (double)m/(double)nsnps > par::MAX_IND_MISSING )
+ {
+ indel[i] = true;
+ n_removed++;
+ }
+ } // next individual
+ }
+ else // ... allow oblig missing values
+ {
+ for (int i=0;i<sample.size();i++)
+ {
+
+ // Sum missingness over all SNPs
+ int m=0; // Missing SNPs
+ int nsnps=0; // All non-obligatory missing SNPs
+
+ for (int l=0; l<locus.size();l++)
+ {
+ if ( ! obligMissing(i,l) )
+ {
+ if ( MISSING(i,l) )
+ ++m;
+ ++nsnps;
+ }
+ }
+
+ // Too much missingness?
+ if ( (double)m/(double)nsnps > par::MAX_IND_MISSING )
+ {
+ indel[i] = true;
+ n_removed++;
+ }
+
+ } // next individual
+ } // end if oblig-missing section
+
+
+ ////////////////////////////////////////
+ // Save list of any removed individuals
+
+ if (n_removed>0)
+ {
+ string f = par::output_file_name + ".irem";
+ printLOG("Writing list of removed individuals to [ " + f + " ]\n");
+ ofstream REM;
+ REM.open(f.c_str(), ifstream::out);
+ for (int i=0;i<sample.size();i++)
+ if (indel[i])
+ REM << sample[i]->fid << "\t" << sample[i]->iid << "\n";
+ REM.close();
+
+
+ // And now remove these individuals, so that
+ // SNP-based statistics are calculated with
+ // these samples already excluded
+
+ n_removed = deleteIndividuals(indel);
+
+ }
+
+ printLOG(int2str(n_removed)+" of "+int2str(n_orig));
+ printLOG(" individuals removed for low genotyping ( MIND > ");
+ printLOG(dbl2str(par::MAX_IND_MISSING)+" )\n");
+
+
+ } // end of remove people conditional
+
+
+
+ /////////////////////////////////
+ // Calculate or read from file?
+
+ if (par::af_read)
+ {
+ checkFileExists(par::af_file);
+ printLOG( "Reading allele frequencies from [ " + par::af_file + " ] \n");
+
+ // Make hash of original SNP names
+ map<string,int> mlocus;
+ map<string,int>::iterator ilocus;
+
+ vector<Locus*>::iterator loc = locus.begin();
+ int l=0;
+ while ( loc != locus.end() )
+ {
+ mlocus.insert(make_pair( (*loc)->name,l));
+ loc++;
+ l++;
+ }
+
+
+ // Read allele frequencies
+ ifstream FRQ;
+ FRQ.open(par::af_file.c_str());
+ FRQ.clear();
+
+ string dum1, dum2, dum3, dum4, dum5, dum6;
+ string snpname;
+ double freq;
+ int nm;
+
+ loc = locus.begin();
+ while ( loc != locus.end() )
+ {
+ (*loc)->freq = -1;
+ (*loc)->nm = 0;
+ loc++;
+ }
+
+ // Skip header line
+ FRQ >> dum1 >> dum2 >> dum3 >> dum4 >> dum5 >> dum6;
+
+ while(!FRQ.eof())
+ {
+
+ char cline[256];
+ FRQ.getline(cline,256,'\n');
+ string sline = cline;
+ if (sline=="") continue;
+ stringstream ss(sline);
+ string buf;
+ vector<string> tokens;
+ while (ss >> buf)
+ tokens.push_back(buf);
+ if (tokens.size() == 0)
+ continue;
+ else if (tokens.size() != 6)
+ error("Problem with allele frequency line: 6 fields required:\n"
+ +sline+"\n");
+
+ ilocus = mlocus.find(tokens[1]);
+ if (ilocus != mlocus.end())
+ {
+ locus[ilocus->second]->freq = atof(tokens[4].c_str());
+ locus[ilocus->second]->nm = atoi(tokens[5].c_str());
+ }
+ }
+ FRQ.clear();
+ FRQ.close();
+
+ }
+
+
+ /////////////////////////////////
+ // Calculate allele frequencies
+
+ vector<string> hetlist(0);
+
+ vector<bool>::iterator d = del.begin();
+ vector<Locus*>::iterator loc = locus.begin();
+ vector<CSNP*>::iterator s = SNP.begin();
+ int l = 0; // Main locus counter
+
+ int exc_maf = 0;
+ int exc_miss = 0;
+
+ vector<Locus*> no_founders_found_list;
+
+ while ( loc != locus.end() )
+ {
+
+ if (!par::af_read)
+ {
+ (*loc)->freq = 0;
+ // count 1 per allele, for frequency
+ (*loc)->nm = 0;
+ }
+
+ // count 1 per genotype, for missingness
+ int geno_nm = 0;
+
+ // count 1 per non-obligatory missing genotype
+ // (or set to N individuals)
+ int geno_real = par::oblig_missing ? 0 : n ;
+
+ bool X = false;
+ bool haploid = false;
+
+ // Determine type of SNP
+ if (par::chr_sex[(*loc)->chr]) X=true;
+ else if (par::chr_haploid[(*loc)->chr]) haploid=true;
+
+
+ ///////////////////////////////
+ // Iterate over each individual
+
+ vector<bool>::iterator i1 = (*s)->one.begin();
+ vector<bool>::iterator i2 = (*s)->two.begin();
+ vector<Individual*>::iterator person = sample.begin();
+ int i = 0;
+
+ while ( person != sample.end() )
+ {
+
+ bool s1 = *i1;
+ bool s2 = *i2;
+
+ // Check female Y genotypes
+ if ( par::chr_Y[(*loc)->chr] && ! (*person)->sex )
+ {
+ // Set to missing, unless in a RECODE mode
+ if ( ! par::preserve_all_genotypes )
+ {
+ s1 = *i1 = true;
+ s2 = *i2 = false;
+ }
+ }
+
+ // For haploid heterozygosity check, also consider all individuals
+ if ( haploid || ( X && (*person)->sex ) )
+ {
+ if ( (!s1) && s2 )
+ {
+ hetlist.push_back( (*person)->fid + "\t" +
+ (*person)->iid + "\t" +
+ (*loc)->name );
+
+ // Set to missing, unless in a RECODE mode
+ if ( ! par::preserve_all_genotypes )
+ {
+ *i1 = true;
+ *i2 = false;
+ }
+ }
+ }
+
+
+
+ // For missing genotypes, consider all individuals
+ if ( ! ( s1 && (!s2) ) ) geno_nm++;
+
+ // But is this a real genotype in any case?
+ if ( par::oblig_missing && ! obligMissing(i,l) )
+ geno_real++;
+
+
+ // Do not recount alleles if we have read in allele frequencies
+ if (!par::af_read)
+ {
+ // For allele frequencies
+ // only consider founders
+ if ( par::summ_nonfounders || (*person)->founder )
+ {
+
+ if ( haploid || ( X && (*person)->sex ) )
+ {
+
+ //////////////////
+ // Haploid counts
+
+ // "1" allele count
+
+ if ( (!s1) && (!s2) ) // FF = hom(11)
+ {
+ (*loc)->freq++;
+ (*loc)->nm++;
+ }
+ else if ( s1 && s2 ) // TT = hom(22)
+ {
+ (*loc)->nm++;
+ }
+
+ }
+ else
+ {
+
+ //////////////////
+ // Autosomal count
+
+ // "1" allele count
+
+ if (!s1)
+ {
+ if (!s2) // 00 = hom(11)
+ {
+ (*loc)->freq+=2;
+ (*loc)->nm+=2;
+ }
+ else // 01 = het(12)
+ {
+ (*loc)->freq+=1;
+ (*loc)->nm+=2;
+ }
+ }
+ else if ( s2 ) // 11 = hom(22)
+ {
+ (*loc)->nm+=2;
+ }
+ }
+
+ }
+ }
+
+ // Next individual
+ person++;
+ i++;
+ i1++;
+ i2++;
+ }
+
+
+ ////////////////////////////////
+ // Calculate allele frequencies
+
+ if (!par::af_read)
+ {
+ if ( par::af_count) // Allele counts...
+ {
+ // Use freq to store count (keep as is)
+
+ // Use "bp" to store number of allele 2
+ (*loc)->bp = (long int)((*loc)->nm - (*loc)->freq);
+
+ // Use "pos" to store number missing genotypes
+ (*loc)->pos = geno_real - geno_nm;
+ }
+ else // ... or frequencies
+ {
+ if ((*loc)->nm>0)
+ (*loc)->freq /= (double)(*loc)->nm;
+ else
+ {
+ (*loc)->freq = 1;
+ // If we aren't getting rid of it anyway
+ if ( (double)geno_nm/(double)geno_real >= (1-par::MAX_GENO_MISSING))
+ no_founders_found_list.push_back(*loc);
+ }
+ }
+ }
+
+
+ //////////////////////////////////////////
+ // Record total proportion of missingness
+
+ double snp_genotyping = n>0 ? (double)geno_nm/(double)geno_real : 0;
+ total_genotyping += snp_genotyping;
+
+
+ /////////////////////////////////////////////////
+ // Exclude if SNP has too many missing genotypes
+
+ if ( snp_genotyping < (1-par::MAX_GENO_MISSING) )
+ {
+ *d = true;
+ exc_miss++;
+ }
+
+
+ ////////////////////////////////////////////////
+ // Make allele1 always the least common allele
+
+ if ( (!par::af_count) && (*loc)->freq > 0.5 )
+ {
+
+ // then we need to swap alleles
+
+ (*loc)->freq = 1 - (*loc)->freq;
+
+ string tmp = (*loc)->allele2;
+ (*loc)->allele2 = (*loc)->allele1;
+ (*loc)->allele1 = tmp;
+
+ vector<bool>::iterator i1 = (*s)->one.begin();
+ vector<bool>::iterator i2 = (*s)->two.begin();
+
+ while ( i1 != (*s)->one.end() )
+ {
+
+ if ( (*i1) == (*i2) )
+ {
+ *i1 = ! (*i1);
+ *i2 = ! (*i2);
+ }
+
+ i1++;
+ i2++;
+ }
+
+ }
+
+ // Next SNP
+ ++d;
+ ++loc;
+ ++l;
+ ++s;
+ }
+
+
+
+
+ /////////////////////////////////////////////////
+ // Save list of any heterozygous haploid alleles
+
+ if (hetlist.size()>0)
+ {
+ printLOG(int2str( hetlist.size())
+ + " heterozygous haploid genotypes called\n");
+ string f = par::output_file_name + ".hh";
+ printLOG("Writing list of heterozygous haploid genotypes to [ "
+ + f + " ]\n");
+ ofstream REM;
+ REM.open(f.c_str(), ifstream::out);
+ for (int i=0; i<hetlist.size(); i++)
+ REM << hetlist[i] << "\n";
+ REM.close();
+ }
+ hetlist.clear();
+
+
+
+ /////////////////////////////////////////////////
+ // Save list of SNPs with no founders observed
+
+ if (no_founders_found_list.size()>0)
+ {
+ printLOG(int2str( no_founders_found_list.size())
+ + " SNPs with no founder genotypes observed\n");
+ printLOG("Warning, MAF set to 0 for these SNPs (see --nonfounders)\n");
+ string f = par::output_file_name + ".nof";
+ printLOG( "Writing list of these SNPs to [ " + f + " ]\n");
+ ofstream NOF;
+ NOF.open(f.c_str(), ifstream::out);
+ for (int i=0; i<no_founders_found_list.size(); i++)
+ NOF << no_founders_found_list[i]->name << "\n";
+ NOF.close();
+ }
+ no_founders_found_list.clear();
+
+
+
+ //////////////////////////
+ // Write allele freq file
+
+ if (par::af_write)
+ {
+ if (par::include_cluster_from_file)
+ calcStratifiedAlleleFreqs();
+ else
+ {
+ ofstream FRQ;
+ string f = par::output_file_name + ".frq";
+ if (par::af_count) f += ".count";
+
+ if (par::summ_nonfounders)
+ printLOG("Writing allele frequencies (all individuals) to [ "
+ + f + " ] \n");
+ else
+ printLOG("Writing allele frequencies (founders-only) to [ "
+ + f + " ] \n");
+
+ if (par::af_count)
+ printLOG("Display counts rather than frequencies\n");
+
+ FRQ.open(f.c_str(), ifstream::out);
+
+ FRQ << setw(4) << "CHR" << " "
+ << setw(par::pp_maxsnp) << "SNP" << " "
+ << setw(4) << "A1" << " "
+ << setw(4) << "A2" << " ";
+ if (par::af_count)
+ FRQ << setw(6) << "C1" << " "
+ << setw(6) << "C2" << " "
+ << setw(6) << "G0" << "\n";
+ else
+ FRQ << setw(12) << "MAF" << " "
+ << setw(8) << "NCHROBS"
+ << "\n";
+
+ vector<Locus*>::iterator loc = locus.begin();
+ while (loc != locus.end() )
+ {
+ string a1 = (*loc)->allele1;
+ string a2 = (*loc)->allele2;
+ if (a1=="") a1="0";
+ if (a2=="") a2="0";
+ FRQ << setw(4) << (*loc)->chr << " "
+ << setw(par::pp_maxsnp) << (*loc)->name << " "
+ << setw(4) << a1 << " "
+ << setw(4) << a2 << " ";
+
+ if (par::af_count)
+ {
+ FRQ << setw(6) << int( (*loc)->freq ) << " "
+ << setw(6) << int( (*loc)->bp ) << " "
+ << setw(6) << int( (*loc)->pos ) << "\n";
+ }
+ else
+ {
+ if ( (*loc)->freq >= 0 )
+ FRQ << setw(12) << (*loc)->freq << " ";
+ else
+ FRQ << setw(12) << "NA" << " ";
+ FRQ << setw(8) << (*loc)->nm << "\n";
+ }
+ loc++;
+ }
+ FRQ.close();
+
+ }
+
+ // Close after we've done alle freqs,
+ shutdown();
+ }
+
+
+
+ /////////////////////////
+ // Write HWE statistics
+
+ if (par::HWD_test || par::HWD_report)
+ {
+
+ ofstream HWD;
+ if (par::HWD_report)
+ {
+
+ if (par::summ_nonfounders)
+ printLOG("Writing Hardy-Weinberg tests (all individuals) to [ " +
+ par::output_file_name + ".hwe ] \n");
+ else
+ printLOG("Writing Hardy-Weinberg tests (founders-only) to [ " +
+ par::output_file_name + ".hwe ] \n");
+
+ string f = par::output_file_name + ".hwe";
+ HWD.open(f.c_str(), ifstream::out);
+
+ HWD.precision(4);
+
+ HWD << setw(par::pp_maxsnp) << "SNP" << " "
+ << setw(8) << "TEST" << " "
+ << setw(20) << "GENO" << " "
+ << setw(8) << "O(HET)" << " "
+ << setw(8) << "E(HET)" << " "
+ << setw(12) << "P_HWD" << " "
+ << "\n";
+ }
+
+ int cnt=0, cnt_a=0, cnt_u=0;
+
+
+ ////////////////////////
+ // Consider each locus
+
+ vector<bool>::iterator d = del.begin();
+ vector<Locus*>::iterator loc = locus.begin();
+
+ for ( int l = 0 ; l < locus.size() ; l++ )
+ {
+
+ // Compute p-values for HWE test in cases, controls & all
+
+ // Only consider founders
+
+ int a11, a12, a22;
+ int u11, u12, u22;
+ int b11, b12, b22;
+
+ a11=a12=a22=0;
+ u11=u12=u22=0;
+ b11=b12=b22=0;
+
+ bool X = false, haploid = false;
+ if (par::chr_sex[(*loc)->chr]) X=true;
+ else if (par::chr_haploid[(*loc)->chr]) haploid=true;
+
+
+ ///////////////////////////////////////////////
+ // Iterate over each individual, founders only
+
+ for ( int i = 0 ; i < sample.size() ; i++ )
+ {
+
+ Individual * person = sample[i];
+
+ ///////////////////////////////////////////////
+ // Only consider founders, & diploid genotypes
+
+ if ( par::summ_nonfounders || person->founder )
+ if ( ! ( haploid || ( X && person->sex ) ) )
+ {
+
+ bool s1 = SNP[l]->one[i];
+ bool s2 = SNP[l]->two[i];
+
+ // Consider everybody, irrespective of phenotype
+ // (QT, C/C or missing)
+
+ if (!s1)
+ {
+ if (!s2) b11++; // 00 = hom(11)
+ else b12++; // 01 = het(12)
+ }
+ else if ( s2 ) b22++; // 11 = hom(22)
+
+
+ if (par::bt) // for binary trait, separately for cases/controls
+ {
+ if (person->phenotype == 1)
+ {
+
+ if (!s1)
+ {
+ if (!s2) u11++; // 00 = hom(11)
+ else u12++; // 01 = het(12)
+ }
+ else if ( s2 ) u22++; // 11 = hom(22)
+
+ }
+ else if (person->phenotype == 2)
+ {
+ if (!s1)
+ {
+ if (!s2) a11++; // 00 = hom(11)
+ else a12++; // 01 = het(12)
+ }
+ else if ( s2 ) a22++; // 11 = hom(22)
+ }
+
+ }
+ }
+
+
+ // Next individual
+ }
+
+
+ // Allele frequencies
+ double afreq = 0, ufreq = 0, freq = 0;
+
+ bool include_cases = true;
+ bool include_controls = true;
+ if (par::qt)
+ freq = ( b11 + (double)b12/2.0 ) / (double)( b11+b12+b22 );
+ else
+ {
+ afreq = ( a11 + (double)a12/2.0 ) / (double)( a11+a12+a22 );
+ ufreq = ( u11 + (double)u12/2.0 ) / (double)( u11+u12+u22 );
+ freq = ( b11 + (double)b12/2.0 ) / (double)( b11+b12+b22 );
+
+ if ( a11+a12+a22 == 0 ) include_cases = false;
+ if ( u11+u12+u22 == 0 ) include_controls = false;
+ }
+
+
+ if (par::qt)
+ {
+
+ double p;
+
+ if (par::HWD_standard)
+ {
+ double tot = b11 + b12 + b22;
+ double exp_11 = freq * freq * tot;
+ double exp_12 = 2 * freq * (1-freq) * tot;
+ double exp_22 = (1-freq) * (1-freq) * tot;
+
+ double chisq = ( (b11-exp_11)*(b11-exp_11) ) / exp_11
+ + ( (b12-exp_12)*(b12-exp_12) ) / exp_12
+ + ( (b22-exp_22)*(b22-exp_22) ) / exp_22 ;
+
+ p = chiprobP(chisq,1);
+ }
+ else
+ p = SNPHWE( b12, b11, b22 );
+
+
+ if (par::HWD_report)
+ {
+ HWD << setw(par::pp_maxsnp) << (*loc)->name << " "
+ << setw(8) << "ALL(QT)" << " "
+ << setw(20) << (int2str(b11)+
+ "/"+int2str(b12)+
+ "/"+int2str(b22)) << " "
+ << setw(8) << (double)b12/(double)(b11+b12+b22) << " "
+ << setw(8) << 2 * freq * (1-freq) << " "
+ << setw(12) << p << "\n";
+ }
+
+ if ( p <= par::HWD_limit )
+ {
+ cnt++;
+ *d = true;
+ }
+ }
+ else
+ {
+ // For case/control data
+
+ double p, p_a, p_u;
+
+ if (par::HWD_standard)
+ {
+ double exp_a11 = afreq * afreq * (a11+a12+a22);
+ double exp_a12 = 2 * afreq * (1-afreq) * (a11+a12+a22);
+ double exp_a22 = (1-afreq) * (1-afreq) * (a11+a12+a22);
+
+ double exp_u11 = ufreq * ufreq * (u11+u12+u22);
+ double exp_u12 = 2 * ufreq * (1-ufreq) * (u11+u12+u22);
+ double exp_u22 = (1-ufreq) * (1-ufreq) * (u11+u12+u22);
+
+ double exp_11 = freq * freq * (b11+b12+b22);
+ double exp_12 = 2 * freq * (1-freq) * (b11+b12+b22);
+ double exp_22 = (1-freq) * (1-freq) * (b11+b12+b22);
+
+ double chisq_a = ( (a11-exp_a11)*(a11-exp_a11) ) / exp_a11
+ + ( (a12-exp_a12)*(a12-exp_a12) ) / exp_a12
+ + ( (a22-exp_a22)*(a22-exp_a22) ) / exp_a22 ;
+
+ double chisq_u = ( (u11-exp_u11)*(u11-exp_u11) ) / exp_u11
+ + ( (u12-exp_u12)*(u12-exp_u12) ) / exp_u12
+ + ( (u22-exp_u22)*(u22-exp_u22) ) / exp_u22 ;
+
+ double chisq = ( (b11-exp_11)*(b11-exp_11) ) / exp_11
+ + ( (b12-exp_12)*(b12-exp_12) ) / exp_12
+ + ( (b22-exp_22)*(b22-exp_22) ) / exp_22 ;
+
+ p = chiprobP(chisq,1);
+ p_a = chiprobP(chisq_a,1);
+ p_u = chiprobP(chisq_u,1);
+ }
+ else
+ {
+ p = SNPHWE( b12, b11, b22 );
+ p_a = SNPHWE( a12, a11, a22 );
+ p_u = SNPHWE( u12, u11, u22 );
+ }
+
+ if (par::HWD_report)
+ {
+
+ HWD << setw(par::pp_maxsnp) << (*loc)->name << " "
+ << setw(8) << "ALL" << " "
+
+ << setw(20)
+ << int2str(b11)+"/"+int2str(b12)+"/"+int2str(b22) << " "
+ << setw(8) << (double)b12/(double)(b11+b12+b22) << " "
+ << setw(8) << 2 * freq * (1-freq) << " "
+ << setw(12) << p << "\n";
+
+
+ HWD << setw(par::pp_maxsnp) << (*loc)->name << " "
+ << setw(8) << "AFF" << " "
+ << setw(20)
+ << int2str(a11)+"/"+int2str(a12)+"/"+int2str(a22) << " "
+ << setw(8) << (double)a12/(double)(a11+a12+a22) << " "
+ << setw(8) << 2 * afreq * (1-afreq) << " ";
+
+
+ if (include_cases)
+ HWD << setw(12) << p_a << "\n";
+ else
+ HWD << setw(12) << "NA" << "\n";
+
+
+ HWD << setw(par::pp_maxsnp) << (*loc)->name << " "
+ << setw(8) << "UNAFF" << " "
+ << setw(20)
+ << int2str(u11)+"/"+int2str(u12)+"/"+int2str(u22) << " "
+ << setw(8) << (double)u12/(double)(u11+u12+u22) << " "
+ << setw(8) << 2 * ufreq * (1-ufreq) << " ";
+
+
+ if (include_controls)
+ HWD << setw(12) << p_u << "\n";
+ else
+ HWD << setw(12) << "NA" << "\n";
+
+ }
+
+ // Increase counts: in cases
+ if ( include_cases && p_a < par::HWD_limit && p>-1 ) cnt_a++;
+
+ // Controls (and, if possible, exclude on this value)
+ if ( include_controls &&
+ p_u < par::HWD_limit && p>-1 )
+ {
+ cnt_u++;
+
+ if ( ! par::HWD_filter_on_all )
+ {
+ *d = true;
+ cnt++;
+ }
+ }
+
+ // In total sample, and if needed, exclude here
+ if ( p < par::HWD_limit && p>-1 )
+ {
+ if ( par::HWD_filter_on_all || ! include_controls )
+ {
+ *d = true;
+ cnt++;
+ }
+ }
+
+
+
+
+ }
+
+
+ // next locus
+ ++loc;
+ ++d;
+ }
+
+
+ // Finish the report...
+ if (par::HWD_report)
+ HWD.close();
+
+ // ...or finish pruning
+
+ printLOG( int2str(cnt) +
+ " markers to be excluded based on HWE test ( p <= " +
+ dbl2str(par::HWD_limit) + " )\n");
+
+ if (par::bt)
+ {
+ printLOG("\t" + int2str(cnt_a) + " markers failed HWE test in cases\n");
+ printLOG("\t" + int2str(cnt_u) + " markers failed HWE test in controls\n");
+ }
+ }
+
+
+
+
+ ///////////////////////////////////////////////////
+ // Summary statistics for genotyping/missing rates
+
+ if (par::report_missing)
+ {
+
+
+ ///////////////////////////////////////////
+ // Report by genotyping rate by individual
+ // possibly allowing for obligatory missingness
+
+ printLOG( "Writing individual missingness information to [ " +
+ par::output_file_name + ".imiss ] \n");
+
+ ofstream MIS;
+ string f = par::output_file_name + ".imiss";
+ MIS.open(f.c_str(), ifstream::out);
+ MIS << setw(par::pp_maxfid) << "FID" << " "
+ << setw(par::pp_maxiid) << "IID" << " "
+ << setw(10) << "MISS_PHENO" << " "
+ << setw(8) << "N_MISS" << " ";
+ if ( par::oblig_missing )
+ MIS << setw(8) << "N_GENO" << " ";
+ MIS << setw(8) << "F_MISS" << "\n";
+
+ for (int i=0; i<n; i++)
+ {
+
+ MIS << setw(par::pp_maxfid) << sample[i]->fid << " "
+ << setw(par::pp_maxiid) << sample[i]->iid << " ";
+ if (sample[i]->missing) MIS << setw(10) << "Y" << " ";
+ else MIS << setw(10) << "N" << " " ;
+
+
+ // Sum missingness over all SNPs
+ int m=0; // Missing SNPs
+ int nsnps=0; // All non-obligatory missing SNPs
+
+ if ( ! par::oblig_missing )
+ {
+ nsnps = locus.size();
+ for (int l=0; l<nsnps;l++)
+ if ( MISSING(i,l) ) m++;
+
+ }
+ else // ... allow oblig missing values
+ {
+ for (int l=0; l<locus.size();l++)
+ {
+ if ( ! obligMissing(i,l) )
+ {
+ if ( MISSING(i,l) )
+ ++m;
+ ++nsnps;
+ }
+ }
+ }
+
+ MIS << setw(8) << m << " ";
+ if ( par::oblig_missing)
+ MIS << setw(8) << nsnps << " ";
+ MIS << setw(8) << (double)m/(double)nsnps << "\n";
+
+ }
+ MIS.close();
+
+
+
+ ///////////////////////////////////////////
+ // Report by genotyping rate by locus
+ // possibly allowing for sample strata
+ // possibly allowing for obligatory missingness
+
+ printLOG("Writing locus missingness information to [ " +
+ par::output_file_name +".lmiss ] \n");
+ f = par::output_file_name + ".lmiss";
+ MIS.open(f.c_str(), ifstream::out);
+ MIS.clear();
+
+ MIS << setw(4) << "CHR" << " "
+ << setw(par::pp_maxsnp) << "SNP" << " ";
+
+ if (par::include_cluster_from_file)
+ MIS << setw(10) << "CLST" << " ";
+
+ MIS << setw(8) << "N_MISS" << " ";
+
+ if ( par::oblig_missing )
+ MIS << setw(8) << "N_GENO" << " ";
+
+ if (par::include_cluster_from_file)
+ MIS << setw(8) << "N_CLST" << " ";
+
+ MIS << setw(8) << "F_MISS" << "\n";
+
+
+ for (int l=0; l<locus.size(); l++)
+ {
+
+ Locus * loc = locus[l];
+
+ // nk==1 for basic missingness (i.e. not stratified by
+ // cluster)
+
+ for (int k=0; k<nk; k++)
+ {
+
+ MIS << setw(4) << loc->chr << " "
+ << setw(par::pp_maxsnp) << loc->name << " ";
+
+ if (par::include_cluster_from_file)
+ MIS << setw(10) << kname[k] << " ";
+
+ int m=0; // Number of missing genotypes
+ int c=0; // Number of people in cluster
+ int nsnps=0; // Number of actual genotypes in cluster
+
+ for ( int i=0; i<sample.size(); i++)
+ {
+ if (par::include_cluster_from_file)
+ {
+ if ( sample[i]->sol == k )
+ {
+ if ( ( ! par::oblig_missing ) ||
+ ( ! obligMissing(i,l) ) )
+ {
+ if ( MISSING(i,l) ) ++m;
+ ++nsnps;
+ }
+ ++c;
+ }
+ }
+ else // ... ignore cluster strata
+ {
+ if ( ( ! par::oblig_missing ) ||
+ ( ! obligMissing(i,l) ) )
+ {
+ if ( MISSING(i,l) ) ++m;
+ ++nsnps;
+ }
+ }
+
+ // Next individual
+ }
+
+ MIS << setw(8) << m << " ";
+
+ if (par::include_cluster_from_file)
+ MIS << setw(8) << c << " ";
+
+ if (par::oblig_missing)
+ MIS << setw(8) << nsnps << " ";
+
+
+ MIS << setw(8) << (double)m / (double)nsnps << "\n";
+ }
+
+ // Next SNP
+ }
+
+ MIS.close();
+
+ }
+
+
+
+
+ /////////////////////////////////
+ // Remove rare SNPs
+
+
+ loc = locus.begin();
+ d = del.begin();
+
+ while ( loc != locus.end() )
+ {
+
+ // Note epsilon correction for MAF, due to floating point
+ // issues: only apply to the lower MAF range
+
+ if ( (*loc)->freq < 0 ||
+ (*loc)->freq + par::epsilon < par::min_af ||
+ (*loc)->freq > par::max_af )
+ {
+ *d = true;
+ exc_maf++;
+ }
+
+ d++;
+ loc++;
+ }
+
+
+
+ /////////////////////////////////////////
+ // Remove SNPs based on thresholds
+
+ if ( locus.size() > 0 )
+ printLOG("Total genotyping rate in remaining individuals is "
+ + dbl2str(total_genotyping/(double)locus.size())+"\n");
+ printLOG(int2str(exc_miss)+" SNPs failed missingness test ( GENO > "
+ +dbl2str(par::MAX_GENO_MISSING)+" )\n");
+ printLOG(int2str(exc_maf)+" SNPs failed frequency test ( MAF < "+dbl2str(par::min_af));
+ if (par::max_af < 0.5 ) printLOG(" or MAF > " + dbl2str(par::max_af));
+ printLOG(" )\n");
+
+
+ int tmp = deleteSNPs(del);
+
+
+ //////////////////////////////////////////
+ // Need to make back to individual major?
+
+ if ( ! original_SNP_major )
+ SNP2Ind();
+
+
+ return;
+
+}
+
Added: trunk/packages/plink/branches/upstream/current/fisher.cpp
===================================================================
--- trunk/packages/plink/branches/upstream/current/fisher.cpp (rev 0)
+++ trunk/packages/plink/branches/upstream/current/fisher.cpp 2008-04-09 09:42:37 UTC (rev 1721)
@@ -0,0 +1,2257 @@
+
+
+//////////////////////////////////////////////////////////////////
+// //
+// PLINK (c) 2005-2006 Shaun Purcell //
+// //
+// This file is distributed under the GNU General Public //
+// License, Version 2. Please see the file COPYING for more //
+// details //
+// //
+//////////////////////////////////////////////////////////////////
+
+
+#include "fisher.h"
+#include "plink.h"
+#include "helper.h"
+
+double fisher(table_t t)
+{
+
+ int nrow = t.size();
+ if ( nrow == 0 )
+ return -9;
+ int ncol = t[0].size();
+ if ( ncol == 0 )
+ return -9;
+ double table[200];
+
+ int c=0;
+
+ for (int j=0; j<ncol; j++)
+ for (int i=0; i<nrow; i++)
+ {
+ table[c++] = t[i][j];
+ }
+
+ double expect = -1.0;
+ double percnt = 100.0;
+ double emin = 0;
+ double pre = 0, prt = 0;
+ int ws = 300000;
+
+ fexact(&nrow, &ncol, table, &nrow, &expect, &percnt, &emin, &prt, &pre, &ws);
+
+ return pre;
+
+// // Test of routine:
+
+// double expected = -1.0;
+// percnt = 100.0;
+// emin = 0.0;
+// prt = 0.0;
+// double *expectedp = &expected;
+// double *percntp = %
+// double *eminp = &emin;
+// double *prtp = &prt;
+// double pvalue = 0.0;
+// double *pvaluep = &pvalue;
+// int workspace = 300000;
+// int *workspacep = &workspace;
+
+// nrow = 3;
+// ncol = 2;
+// int *nrowp = &nrow;
+// int *ncolp = &ncol;
+// double *tmp_table = new double[6];
+
+// // tmp_table[0] =10;
+// // tmp_table[1] =20;
+// // tmp_table[2] =30;
+
+// // tmp_table[3] =40;
+// // tmp_table[4] =10;
+// // tmp_table[5] =60;
+
+// tmp_table[0] =10;
+// tmp_table[1] =40;
+// tmp_table[2] =20;
+// tmp_table[3] =10;
+// tmp_table[4] =30;
+// tmp_table[5] =60;
+
+// fexact(nrowp, ncolp, tmp_table, nrowp, expectedp, percntp,eminp, prtp, pvaluep, workspacep);
+
+// cout << "exactP = " << pvalue<<"\n\n";
+
+// exit(0);
+
+// return pre;
+
+}
+
+
+/* fisher2.c: this is fexact.c, part of the R
+ (http://cran.r-project.org) distribution
+ (package ctest).
+ Minor edits by az-Uriarte (rdiaz at cnio.es). May 2003.
+*/
+
+
+/* [Notice from the original fexact.c]
+ Fisher's exact test for contingency tables -- usage see below
+
+ fexact.f -- translated by f2c (version 19971204).
+ Run through a slightly modified version of MM's f2c-clean.
+ Heavily hand-edited by KH and MM.
+*/
+
+#include <cstdlib>
+#include <cstdio>
+#include <cmath>
+
+#include <limits>
+#define SINT_MAX INT_MAX
+
+#undef min
+#undef max
+#define max(a, b)((a) < (b) ? (b) : (a))
+#define min(a, b)((a) > (b) ? (b) : (a))
+
+static void f2xact(int *nrow, int *ncol, double *table, int *ldtabl,
+ double *expect, double *percnt, double *emin, double
+ *prt, double *pre, double *fact, int *ico, int
+ *iro, int *kyy, int *idif, int *irn, int *key,
+ int *ldkey, int *ipoin, double *stp, int *ldstp,
+ int *ifrq, double *dlp, double *dsp, double *tm,
+ int *key2, int *iwk, double *rwk);
+static void f3xact(int *nrow, int *irow, int *ncol,int *icol,
+ double *dlp, int *mm, double *fact, int *ico, int
+ *iro, int *it, int *lb, int *nr, int *nt, int
+ *nu, int *itc, int *ist, double *stv, double *alen,
+ const double *tol);
+static void f4xact(int *nrow, int *irow, int *ncol, int *icol,
+ double *dsp, double *fact, int *icstk, int *ncstk,
+ int *lstk, int *mstk, int *nstk, int *nrstk, int
+ *irstk, double *ystk, const double *tol);
+static void f5xact(double *pastp, const double *tol, int *kval, int *key,
+ int *ldkey, int *ipoin, double *stp, int *ldstp,
+ int *ifrq, int *npoin, int *nr, int *nl, int
+ *ifreq, int *itop, int *ipsh);
+static void f6xact(int *nrow, int *irow, int *iflag, int *kyy,
+ int *key, int *ldkey, int *last, int *ipn);
+static void f7xact(int *nrow, int *imax, int *idif, int *k, int *ks,
+ int *iflag);
+static void f8xact(int *irow, int *is, int *i1, int *izero, int *knew);
+static double f9xact(int *n, int *mm, int *ir, double *fact);
+static void f10act(int *nrow, int *irow, int *ncol, int *icol,
+ double *val, int *xmin, double *fact, int *nd,
+ int *ne, int *m);
+static void f11act(int *irow, int *i1, int *i2, int *knew);
+static void prterr(int icode, char *mes);
+static int iwork(int iwkmax, int *iwkpt, int number, int itype);
+
+#ifdef USING_R
+# define isort(n, ix)R_isort(ix, *n)
+# include <Rmath.h>/* -> pgamma() */
+#else
+ static void isort(int *n, int *ix);
+ static double gammds(double *y, double *p, int *ifault);
+ static double alogam(double *x, int *ifault);
+#endif
+
+/* The only public function : */
+void
+fexact(int *nrow, int *ncol, double *table, int *ldtabl,
+ double *expect, double *percnt, double *emin, double *prt,
+ double *pre, int *workspace)
+{
+
+ /*
+ ALGORITHM 643, COLLECTED ALGORITHMS FROM ACM.
+ THIS WORK PUBLISHED IN TRANSACTIONS ON MATHEMATICAL SOFTWARE,
+ VOL. 19, NO. 4, DECEMBER, 1993, PP. 484-488.
+ -----------------------------------------------------------------------
+ Name: FEXACT
+ Purpose: Computes Fisher's exact test probabilities and a hybrid
+ approximation to Fisher exact test probabilities for a
+ contingency table using the network algorithm.
+ Usage: CALL FEXACT (NROW, NCOL, TABLE, LDTABL, EXPECT, PERCNT,
+ EMIN, PRT, PRE)
+ Arguments:
+ NROW - The number of rows in the table.(Input)
+ NCOL - The number of columns in the table.(Input)
+ TABLE - NROW by NCOL matrix containing the contingency
+ table.(Input)
+ LDTABL - Leading dimension of TABLE exactly as specified
+ in the dimension statement in the calling
+ program.(Input)
+
+ EXPECT - Expected value used in the hybrid algorithm for
+ deciding when to use asymptotic theory
+ probabilities.(Input)
+ If EXPECT <= 0.0 then asymptotic theory probabilities
+ are not used and Fisher exact test probabilities are
+ computed. Otherwise, if PERCNT or more of the cells in
+ the remaining table have estimated expected values of
+ EXPECT or more, with no remaining cell having expected
+ value less than EMIN, then asymptotic chi-squared
+ probabilities are used. See the algorithm section of the
+ manual document for details.
+ Use EXPECT = 5.0 to obtain the 'Cochran' condition.
+
+ PERCNT - Percentage of remaining cells that must have
+ estimated expected values greater than EXPECT
+ before asymptotic probabilities can be used.(Input)
+ See argument EXPECT for details.
+ Use PERCNT = 80.0 to obtain the 'Cochran' condition.
+
+ EMIN - Minimum cell estimated expected value allowed for
+ asymptotic chi-squared probabilities to be used.(Input)
+ See argument EXPECT for details.
+ Use EMIN = 1.0 to obtain the 'Cochran' condition.
+
+ PRT - Probability of the observed table for fixed
+ marginal totals.(Output)
+
+ PRE - Table p-value.(Output)
+ PRE is the probability of a more extreme table,
+ where `extreme' is in a probabilistic sense.
+ If EXPECT < 0 then the Fisher exact probability
+ is returned. Otherwise, an approximation to the
+ Fisher exact probability is computed based upon
+ asymptotic chi-squared probabilities for ``large''
+ table expected values. The user defines ``large''
+ through the arguments EXPECT, PERCNT, and EMIN.
+
+ Remarks:
+ 1. For many problems one megabyte or more of workspace can be
+ required.If the environment supports it, the user should begin
+ by increasing the workspace used to 200,000 units.
+ 2. In FEXACT, LDSTP = 30*LDKEY. The proportion of table space used
+ by STP may be changed by changing the line MULT = 30 below to
+ another value.
+ 3. FEXACT may be converted to single precision by setting IREAL = 3,
+ and converting all DOUBLE PRECISION specifications (except the
+ specifications for RWRK, IWRK, and DWRK) to REAL.This will
+ require changing the names and specifications of the intrinsic
+ functions ALOG, AMAX1, AMIN1, EXP, and REAL. In addition, the
+ machine specific constants will need to be changed, and the name
+ DWRK will need to be changed to RWRK in the call to F2XACT.
+ 4. Machine specific constants are specified and documented in F2XACT.
+ A missing value code is specified in both FEXACT and F2XACT.
+ 5. Although not a restriction, is is not generally practical to call
+ this routine with large tables which are not sparse and in
+ which the 'hybrid' algorithm has little effect. For example,
+ although it is feasible to compute exact probabilities for the
+ table
+ 1 8 5 4 4 2 2
+ 5 3 3 4 3 1 0
+ 10 1 4 0 0 0 0,
+ computing exact probabilities for a similar table which has been
+ enlarged by the addition of an extra row (or column) may not be
+ feasible.
+ -----------------------------------------------------------------------
+ */
+
+ /* CONSTANT Parameters : */
+
+ /* To increase the length of the table of paste path lengths relative
+ to the length of the hash table, increase MULT.
+ */
+ const int mult = 30;
+ /* AMISS is a missing value indicator which is returned when the
+ probability is not defined.
+ */
+ const double amiss = -12345.;
+ /*
+ Set IREAL = 4 for DOUBLE PRECISION
+ Set IREAL = 3 for SINGLE PRECISION
+ */
+#define i_real 4
+#define i_int 2
+
+ /* System generated locals */
+ int ikh;
+ /* Local variables */
+ int nco, nro, ntot, numb, iiwk, irwk;
+ int i, j, k, kk, ldkey, ldstp, i1, i2, i3, i4, i5, i6, i7, i8, i9, i10;
+ int i3a, i3b, i3c, i9a, iwkmax, iwkpt;
+
+ /* Workspace Allocation (freed at end) */
+ double *equiv;
+ iwkmax = 2 * (int) (*workspace / 2);
+ // equiv = (double *) R_alloc(iwkmax / 2, sizeof(double));
+ equiv = (double *) calloc(iwkmax / 2, sizeof(double));
+
+ /* The check could never happen with Calloc!
+ equiv = Calloc(iwkmax / 2, double);
+ if (!equiv) {
+ prterr(0, "Can not allocate specified workspace");
+ } */
+
+#define dwrk (equiv)
+#define iwrk ((int *)equiv)
+#define rwrk ((float *)equiv)
+
+ /* Parameter adjustments */
+ table -= *ldtabl + 1;
+
+ /* Function Body */
+ iwkpt = 0;
+
+ if (*nrow > *ldtabl)
+ prterr(1, "NROW must be less than or equal to LDTABL.");
+
+ ntot = 0;
+ for (i = 1; i <= *nrow; ++i) {
+ for (j = 1; j <= *ncol; ++j) {
+ if (table[i + j * *ldtabl] < 0.)
+ prterr(2, "All elements of TABLE must be positive.");
+ ntot = (int) (ntot + table[i + j * *ldtabl]);
+ }
+ }
+ if (ntot == 0) {
+ prterr(3, "All elements of TABLE are zero.\n"
+ "PRT and PRE are set to missing values.");
+ *prt = amiss;
+ *pre = amiss;
+ goto L_End;
+ }
+
+ nco = max(*nrow, *ncol);
+ nro = *nrow + *ncol - nco;/* = min(*nrow, *ncol) */
+ k = *nrow + *ncol + 1;
+ kk = k * nco;
+
+ ikh = ntot + 1;
+ i1 = iwork(iwkmax, &iwkpt, ikh, i_real);
+ i2 = iwork(iwkmax, &iwkpt, nco, i_int);
+ i3 = iwork(iwkmax, &iwkpt, nco, i_int);
+ i3a = iwork(iwkmax, &iwkpt, nco, i_int);
+ i3b = iwork(iwkmax, &iwkpt, nro, i_int);
+ i3c = iwork(iwkmax, &iwkpt, nro, i_int);
+ ikh = max(k * 5 + (kk << 1), nco * 7 + 800);
+ iiwk= iwork(iwkmax, &iwkpt, ikh, i_int);
+ ikh = max(nco + 401, k);
+ irwk= iwork(iwkmax, &iwkpt, ikh, i_real);
+
+ /* NOTE:
+ What follows below splits the remaining amount iwkmax - iwkpt of
+ (int) workspace into hash tables as follows.
+ type size index
+ INT 2 * ldkey i4 i5 i11
+ REAL 2 * ldkey i8 i9 i10
+ REAL 2 * ldstp i6
+ INT 6 * ldstp i7
+ Hence, we need ldkey times
+ 3 * 2 + 3 * 2 * s + 2 * mult * s + 6 * mult
+ chunks of integer memory, where s = sizeof(REAL) / sizeof(INT).
+ If doubles are used and are twice as long as ints, this gives
+ 18 + 10 * mult
+ so that the value of ldkey can be obtained by dividing available
+ (int) workspace by this number.
+
+ In fact, because iwork() can actually s * n + s - 1 int chunks
+ when allocating a REAL, we use ldkey = available / numb - 1.
+
+ FIXME:
+ Can we always assume that sizeof(double) / sizeof(int) is 2?
+ */
+
+ if (i_real == 4) {/* Double precision reals */
+ numb = 18 + 10 * mult;
+ } else {/* Single precision reals */
+ numb = (mult << 3) + 12;
+ }
+ ldkey = (iwkmax - iwkpt) / numb - 1;
+ ldstp = mult * ldkey;
+ ikh = ldkey << 1;i4 = iwork(iwkmax, &iwkpt, ikh, i_int);
+ ikh = ldkey << 1;i5 = iwork(iwkmax, &iwkpt, ikh, i_int);
+ ikh = ldstp << 1;i6 = iwork(iwkmax, &iwkpt, ikh, i_real);
+ ikh = ldstp * 6;i7 = iwork(iwkmax, &iwkpt, ikh, i_int);
+ ikh = ldkey << 1;i8 = iwork(iwkmax, &iwkpt, ikh, i_real);
+ ikh = ldkey << 1;i9 = iwork(iwkmax, &iwkpt, ikh, i_real);
+ ikh = ldkey << 1;i9a = iwork(iwkmax, &iwkpt, ikh, i_real);
+ ikh = ldkey << 1;i10 = iwork(iwkmax, &iwkpt, ikh, i_int);
+
+ /* To convert to double precision, change RWRK to DWRK in the next CALL.
+ */
+ f2xact(nrow,
+ ncol,
+ &table[*ldtabl + 1],
+ ldtabl,
+ expect,
+ percnt,
+ emin,
+ prt,
+ pre,
+ dwrk + i1,
+ iwrk + i2,
+ iwrk + i3,
+ iwrk + i3a,
+ iwrk + i3b,
+ iwrk + i3c,
+ iwrk + i4,
+ &ldkey,
+ iwrk + i5,
+ dwrk + i6,
+ &ldstp,
+ iwrk + i7,
+ dwrk + i8,
+ dwrk + i9,
+ dwrk + i9a,
+ iwrk + i10,
+ iwrk + iiwk,
+ dwrk + irwk);
+
+L_End:
+ /* Free(equiv); */
+ free(equiv);
+ return;
+}
+
+#undef rwrk
+#undef iwrk
+#undef dwrk
+
+
+/*
+ -----------------------------------------------------------------------
+ Name:F2XACT
+ Purpose:Computes Fisher's exact test for a contingency table,
+ routine with workspace variables specified.
+ Usage:F2XACT (NROW, NCOL, TABLE, LDTABL, EXPECT, PERCNT,
+ EMIN, PRT, PRE, FACT, ICO, IRO, KYY, IDIF,
+ IRN, KEY, LDKEY, IPOIN, STP, LDSTP, IFRQ,
+ DLP, DSP, TM, KEY2, IWK, RWK)
+ -----------------------------------------------------------------------
+*/
+void
+f2xact(int *nrow, int *ncol, double *table, int *ldtabl,
+ double *expect, double *percnt, double *emin, double *prt,
+ double *pre, double *fact, int *ico, int *iro, int *kyy,
+ int *idif, int *irn, int *key, int *ldkey, int *ipoin,
+ double *stp, int *ldstp, int *ifrq, double *dlp, double *dsp,
+ double *tm, int *key2, int *iwk, double *rwk)
+{
+ /* IMAX is the largest representable int on the machine. */
+ const int imax = SINT_MAX;
+
+ /* AMISS is a missing value indicator which is returned when the
+ probability is not defined. */
+ const double amiss = -12345.;
+
+ /* TOL is chosen as the square root of the smallest relative spacing. */
+#ifndef Macintosh
+ const static double tol = 3.45254e-7;
+#else
+ static double tol = 3.45254e-7;
+#endif
+ /* EMX is a large positive value used in comparing expected values. */
+ const static double emx = 1e30;
+
+ /* Local variables {{any really need to be static ???}} */
+ static int kval, kmax, jkey, last, ipsh, itmp, itop, jstp, ntot,
+ jstp2, jstp3, jstp4, i, ii, j, k, n, iflag, ncell, ifreq, chisq,
+ ikkey, ikstp, ikstp2, k1, kb, kd, ks,
+ i31, i32, i33, i34, i35, i36, i37, i38, i39,
+ i41, i42, i43, i44, i45, i46, i47, i48, i310, i311,
+ nco, nrb, ipn, ipo, itp, nro, nro2;
+ static double dspt, dd, df,ddf, drn,dro, emn, obs, obs2, obs3,
+ pastp, pv, tmp;
+ double d1;
+#ifndef USING_R
+ double d2;
+ static int ifault;
+#endif
+ bool nr_gt_nc;
+
+ /* Parameter adjustments */
+ table -= *ldtabl + 1;
+ --ico;
+ --iro;
+ --kyy;
+ --idif;
+ --irn;
+ --key;
+ --ipoin;
+ --stp;
+ --ifrq;
+ --dlp;
+ --dsp;
+ --tm;
+ --key2;
+ --iwk;
+ --rwk;
+
+
+ /* Check table dimensions */
+ if (*nrow > *ldtabl)
+ prterr(1, "NROW must be less than or equal to LDTABL.");
+ if (*ncol <= 1)
+ prterr(4, "NCOL must be at least 2");
+
+ /* Initialize KEY array */
+ for (i = 1; i <= *ldkey << 1; ++i) {
+ key[i] = -9999;
+ key2[i] = -9999;
+ }
+ /* Initialize parameters */
+ *pre = 0.;
+ itop = 0;
+ if (*expect > 0.)
+ emn = *emin;
+ else
+ emn = emx;
+
+ nr_gt_nc = *nrow > *ncol;
+ /* nco := max(nrow, ncol) : */
+ if(nr_gt_nc)
+ nco = *nrow;
+ else
+ nco = *ncol;
+ /* Initialize pointers for workspace */
+ /* f3xact */
+ i31 = 1;
+ i32 = i31 + nco;
+ i33 = i32 + nco;
+ i34 = i33 + nco;
+ i35 = i34 + nco;
+ i36 = i35 + nco;
+ i37 = i36 + nco;
+ i38 = i37 + nco;
+ i39 = i38 + 400;
+ i310 = 1;
+ i311 = 401;
+ /* f4xact */
+ k = *nrow + *ncol + 1;
+ i41 = 1;
+ i42 = i41 + k;
+ i43 = i42 + k;
+ i44 = i43 + k;
+ i45 = i44 + k;
+ i46 = i45 + k;
+ i47 = i46 + k * nco;
+ i48 = 1;
+
+ /* Compute row marginals and total */
+ ntot = 0;
+ for (i = 1; i <= *nrow; ++i) {
+ iro[i] = 0;
+ for (j = 1; j <= *ncol; ++j) {
+ if (table[i + j * *ldtabl] < -1e-4)
+ prterr(2, "All elements of TABLE must be positive.");
+ iro[i] += (int) table[i + j * *ldtabl];
+ }
+ ntot += iro[i];
+ }
+
+ if (ntot == 0) {
+ prterr(3, "All elements of TABLE are zero.\n"
+ "PRT and PRE are set to missing values.");
+ *pre = *prt = amiss;
+ return;
+ }
+
+ /* Column marginals */
+ for (i = 1; i <= *ncol; ++i) {
+ ico[i] = 0;
+ for (j = 1; j <= *nrow; ++j)
+ ico[i] += (int) table[j + i * *ldtabl];
+ }
+
+ /* sort marginals */
+ isort(nrow, &iro[1]);
+ isort(ncol, &ico[1]);
+
+ /*Determine row and column marginals.
+ Define max(nrow,ncol) =: nco >= nro := min(nrow,ncol)
+ nco is defined above
+
+ Swap marginals if necessary toico[1:nco] & iro[1:nro]
+ */
+ if (nr_gt_nc) {
+ nro = *ncol;
+ /* Swap marginals */
+ for (i = 1; i <= nco; ++i) {
+ itmp = iro[i];
+ if (i <= nro)
+ iro[i] = ico[i];
+ ico[i] = itmp;
+ }
+ } else
+ nro = *nrow;
+
+
+ /* Get multiplers for stack */
+ kyy[1] = 1;
+ for (i = 2; i <= nro; ++i) {
+ /* Hash table multipliers */
+ if (iro[i - 1] + 1 <= imax / kyy[i - 1]) {
+ kyy[i] = kyy[i - 1] * (iro[i - 1] + 1);
+ j /= kyy[i - 1];
+ } else
+ goto L_ERR_5;
+ }
+ /* Maximum product */
+ if (iro[nro - 1] + 1 <= imax / kyy[nro - 1]) {
+ kmax = (iro[nro] + 1) * kyy[nro - 1];
+ } else {
+ L_ERR_5:
+ prterr(5, "The hash table key cannot be computed because "
+ "the largest key\n"
+ "is larger than the largest representable int.\n"
+ "The algorithm cannot proceed.\n"
+ "Reduce the workspace size, or use `exact = FALSE'.");
+ return;
+ }
+
+ /* Compute log factorials */
+ fact[0] = 0.;
+ fact[1] = 0.;
+ if(ntot >= 2) fact[2] = log(2.);
+ /* MM: old code assuming log() to be SLOW */
+ for (i = 3; i <= ntot; i += 2) {
+ fact[i] = fact[i - 1] + log((double) i);
+ j = i + 1;
+ if (j <= ntot)
+ fact[j] = fact[i] + fact[2] + fact[j / 2] - fact[j / 2 - 1];
+ }
+ /* Compute obs := observed path length */
+ obs = tol;
+ ntot = 0;
+ for (j = 1; j <= nco; ++j) {
+ dd = 0.;
+ for (i = 1; i <= nro; ++i) {
+ if (nr_gt_nc) {
+ dd += fact[(int) table[j + i * *ldtabl]];
+ ntot += (int) table[j + i * *ldtabl];
+ } else {
+ dd += fact[(int) table[i + j * *ldtabl]];
+ ntot += (int) table[i + j * *ldtabl];
+ }
+ }
+ obs += fact[ico[j]] - dd;
+ }
+ /* Denominator of observed table: DRO */
+ dro = f9xact(&nro, &ntot, &iro[1], fact);
+ *prt = exp(obs - dro);
+ /* Initialize pointers */
+ k = nco;
+ last = *ldkey + 1;
+ jkey = *ldkey + 1;
+ jstp = *ldstp + 1;
+ jstp2 = *ldstp * 3 + 1;
+ jstp3 = (*ldstp << 2) + 1;
+ jstp4 = *ldstp * 5 + 1;
+ ikkey = 0;
+ ikstp = 0;
+ ikstp2 = *ldstp << 1;
+ ipo = 1;
+ ipoin[1] = 1;
+ stp[1] = 0.;
+ ifrq[1] = 1;
+ ifrq[ikstp2 + 1] = -1;
+
+Outer_Loop:
+ kb = nco - k + 1;
+ ks = 0;
+ n = ico[kb];
+ kd = nro + 1;
+ kmax = nro;
+ /* IDIF is the difference in going to the daughter */
+ for (i = 1; i <= nro; ++i)
+ idif[i] = 0;
+
+ /* Generate the first daughter */
+ do {
+ --kd;
+ ntot = min(n, iro[kd]);
+ idif[kd] = ntot;
+ if (idif[kmax] == 0)
+ --kmax;
+ n -= ntot;
+ }
+ while (n > 0 && kd != 1);
+
+ if (n != 0) {
+ goto L310;
+ }
+
+ k1 = k - 1;
+ n = ico[kb];
+ ntot = 0;
+ for (i = kb + 1; i <= nco; ++i)
+ ntot += ico[i];
+
+
+L150:
+ /* Arc to daughter length=ICO(KB) */
+ for (i = 1; i <= nro; ++i)
+ irn[i] = iro[i] - idif[i];
+
+ /* Sort irn */
+ if (k1 > 1) {
+ if (nro == 2) {
+ if (irn[1] > irn[2]) {
+ ii = irn[1];
+ irn[1] = irn[2];
+ irn[2] = ii;
+ }
+ } else if (nro == 3) {
+ ii = irn[1];
+ if (ii > irn[3]) {
+ if (ii > irn[2]) {
+ if (irn[2] > irn[3]) {
+ irn[1] = irn[3];
+ irn[3] = ii;
+ } else {
+ irn[1] = irn[2];
+ irn[2] = irn[3];
+ irn[3] = ii;
+ }
+ } else {
+ irn[1] = irn[3];
+ irn[3] = irn[2];
+ irn[2] = ii;
+ }
+ } else if (ii > irn[2]) {
+ irn[1] = irn[2];
+ irn[2] = ii;
+ } else if (irn[2] > irn[3]) {
+ ii = irn[2];
+ irn[2] = irn[3];
+ irn[3] = ii;
+ }
+ } else {
+ for (j = 2; j <= nro; ++j) {
+ i = j - 1;
+ ii = irn[j];
+
+ while (ii < irn[i]) {
+ irn[i + 1] = irn[i];
+ --i;
+ if (i == 0)
+ break;
+ }
+ irn[i + 1] = ii;
+ }
+ }
+ /* Adjust start for zero */
+ for (i = 1; i <= nro; ++i) {
+ if (irn[i] != 0)
+ break;
+ }
+
+ nrb = i;
+ nro2 = nro - i + 1;
+ } else {
+ nrb = 1;
+ nro2 = nro;
+ }
+ /* Some table values */
+ ddf = f9xact(&nro, &n, &idif[1], fact);
+ drn = f9xact(&nro2, &ntot, &irn[nrb], fact) - dro + ddf;
+ /* Get hash value */
+ if (k1 > 1) {
+ kval = irn[1] + irn[2] * kyy[2];
+ for (i = 3; i <= nro; ++i) {
+ kval += irn[i] * kyy[i];
+ }
+ /* Get hash table entry */
+ i = kval % (*ldkey << 1) + 1;
+ /* Search for unused location */
+ for (itp = i; itp <= *ldkey << 1; ++itp) {
+ ii = key2[itp];
+ if (ii == kval) {
+ goto L240;
+ } else if (ii < 0) {
+ key2[itp] = kval;
+ dlp[itp] = 1.;
+ dsp[itp] = 1.;
+ goto L240;
+ }
+ }
+
+ for (itp = 1; itp <= i - 1; ++itp) {
+ ii = key2[itp];
+ if (ii == kval) {
+ goto L240;
+ } else if (ii < 0) {
+ key2[itp] = kval;
+ dlp[itp] = 1.;
+ goto L240;
+ }
+ }
+
+ /* KH
+ prterr(6, "LDKEY is too small.\n"
+ "It is not possible to give the value of LDKEY required,\n"
+ "but you could try doubling LDKEY (and possibly LDSTP).");
+ */
+ prterr(6, "LDKEY is too small for this problem.\n"
+ "Try increasing the size of the workspace.");
+ }
+
+L240:
+ ipsh = (1);
+ /* Recover pastp */
+ ipn = ipoin[ipo + ikkey];
+ pastp = stp[ipn + ikstp];
+ ifreq = ifrq[ipn + ikstp];
+ /* Compute shortest and longest path */
+ if (k1 > 1) {
+ obs2 = obs - fact[ico[kb + 1]] - fact[ico[kb + 2]] - ddf;
+ for (i = 3; i <= k1; ++i) {
+ obs2 -= fact[ico[kb + i]];
+ }
+ if (dlp[itp] > 0.) {
+ dspt = obs - obs2 - ddf;
+ /* Compute longest path */
+ dlp[itp] = 0.;
+ f3xact(&nro2, &irn[nrb], &k1, &ico[kb + 1], &dlp[itp],
+ &ntot, fact, &iwk[i31], &iwk[i32], &iwk[i33],
+ &iwk[i34], &iwk[i35], &iwk[i36], &iwk[i37],
+ &iwk[i38], &iwk[i39], &rwk[i310], &rwk[i311], &tol);
+ dlp[itp] = min(0., dlp[itp]);
+ /* Compute shortest path */
+ dsp[itp] = dspt;
+ f4xact(&nro2, &irn[nrb], &k1, &ico[kb + 1], &dsp[itp], fact,
+ &iwk[i47], &iwk[i41], &iwk[i42], &iwk[i43],
+ &iwk[i44], &iwk[i45], &iwk[i46], &rwk[i48], &tol);
+ dsp[itp] = min(0., dsp[itp] - dspt);
+ /* Use chi-squared approximation? */
+ if ((irn[nrb] * ico[kb + 1]) > ntot * emn) {
+ ncell = 0;
+ for (i = 0; i < nro2; ++i)
+ for (j = 1; j <= k1; ++j)
+ if (irn[nrb + i] * ico[kb + j] >= ntot * *expect)
+ ncell++;
+
+ if (ncell * 100 >= k1 * nro2 * *percnt) {
+ tmp = 0.;
+ for (i = 0; i < nro2; ++i)
+ tmp += (fact[irn[nrb + i]] -
+ fact[irn[nrb + i] - 1]);
+ tmp *= k1 - 1;
+ for (j = 1; j <= k1; ++j)
+ tmp += (nro2 - 1) * (fact[ico[kb + j]] -
+ fact[ico[kb + j] - 1]);
+ df = (double) ((nro2 - 1) * (k1 - 1));
+ tmp += df * 1.83787706640934548356065947281;
+ tmp -= (nro2 * k1 - 1) * (fact[ntot] - fact[ntot - 1]);
+ tm[itp] = (obs - dro) * -2. - tmp;
+ } else {
+ /* tm(itp) set to a flag value */
+ tm[itp] = -9876.;
+ }
+ } else {
+ tm[itp] = -9876.;
+ }
+ }
+ obs3 = obs2 - dlp[itp];
+ obs2 -= dsp[itp];
+ if (tm[itp] == -9876.) {
+ chisq = (0);
+ } else {
+ chisq = (1);
+ tmp = tm[itp];
+ }
+ } else {
+ obs2 = obs - drn - dro;
+ obs3 = obs2;
+ }
+
+L300:
+ /* Process node with new PASTP */
+ if (pastp <= obs3) {
+ /* Update pre */
+ *pre += (double) ifreq * exp(pastp + drn);
+ } else if (pastp < obs2) {
+ if (chisq) {
+ df = (double) ((nro2 - 1) * (k1 - 1));
+#ifdef USING_R
+ pv = pgamma(fmax2(0., tmp + (pastp + drn) * 2.) / 2.,
+ df / 2., /*scale = */ 1.,
+ /*lower_tail = */FALSE, /*log_p = */ FALSE);
+#else
+ d1 = max(0., tmp + (pastp + drn) * 2.) / 2.;
+ d2 = df / 2.;
+ pv = 1. - gammds(&d1, &d2, &ifault);
+#endif
+ *pre += (double) ifreq * exp(pastp + drn) * pv;
+ } else {
+ /* Put daughter on queue */
+ d1 = pastp + ddf;
+ f5xact(&d1, &tol, &kval, &key[jkey], ldkey, &ipoin[jkey],
+ &stp[jstp], ldstp, &ifrq[jstp], &ifrq[jstp2],
+ &ifrq[jstp3], &ifrq[jstp4], &ifreq, &itop, &ipsh);
+ ipsh = (0);
+ }
+ }
+ /* Get next PASTP on chain */
+ ipn = ifrq[ipn + ikstp2];
+ if (ipn > 0) {
+ pastp = stp[ipn + ikstp];
+ ifreq = ifrq[ipn + ikstp];
+ goto L300;
+ }
+ /* Generate a new daughter node */
+ f7xact(&kmax, &iro[1], &idif[1], &kd, &ks, &iflag);
+ if (iflag != 1) {
+ goto L150;
+ }
+
+L310:
+ /* Go get a new mother from stage K */
+ do {
+ iflag = 1;
+ f6xact(&nro, &iro[1], &iflag, &kyy[1], &key[ikkey + 1], ldkey,
+ &last, &ipo);
+ /* Update pointers */
+ if (iflag != 3)
+ goto Outer_Loop;
+ /* else iflag == 3 : no additional nodes to process */
+ --k;
+ itop = 0;
+ ikkey = jkey - 1;
+ ikstp = jstp - 1;
+ ikstp2 = jstp2 - 1;
+ jkey = *ldkey - jkey + 2;
+ jstp = *ldstp - jstp + 2;
+ jstp2 = (*ldstp << 1) + jstp;
+ for (i = 1; i <= *ldkey << 1; ++i)
+ key2[i] = -9999;
+
+ } while (k >= 2);
+}
+
+/*
+ -----------------------------------------------------------------------
+ Name: F3XACT
+ Purpose: Computes the shortest path length for a given table.
+ Usage: F3XACT (NROW, IROW, NCOL, ICOL, DLP, MM, FACT, ICO, IRO,
+ IT, LB, NR, NT, NU, ITC, IST, STV, ALEN, TOL)
+ Arguments:
+ NROW - The number of rows in the table.(Input)
+ IROW - Vector of length NROW containing the row sums
+ for the table.(Input)
+ NCOL - The number of columns in the table.(Input)
+ ICOL - Vector of length K containing the column sums
+ for the table.(Input)
+ DLP - The longest path for the table.(Output)
+ MM - The total count in the table.(Output)
+ FACT - Vector containing the logarithms of factorials.(Input)
+ ICO - Work vector of length MAX(NROW,NCOL).
+ IRO - Work vector of length MAX(NROW,NCOL).
+ IT - Work vector of length MAX(NROW,NCOL).
+ LB - Work vector of length MAX(NROW,NCOL).
+ NR - Work vector of length MAX(NROW,NCOL).
+ NT - Work vector of length MAX(NROW,NCOL).
+ NU - Work vector of length MAX(NROW,NCOL).
+ ITC - Work vector of length 400.
+ IST - Work vector of length 400.
+ STV - Work vector of length 400.
+ ALEN - Work vector of length MAX(NROW,NCOL).
+ TOL - Tolerance.(Input)
+ -----------------------------------------------------------------------
+*/
+
+void
+f3xact(int *nrow, int *irow, int *ncol, int *icol, double *dlp,
+ int *mm, double *fact, int *ico, int *iro, int *it,
+ int *lb, int *nr, int *nt, int *nu, int *itc, int *ist,
+ double *stv, double *alen, const double *tol)
+{
+ /* Initialized data */
+ static int ldst = 200;
+ static int nst = 0;
+ static int nitc = 0;
+
+ /* Local variables */
+ static int xmin;
+ static int i, k;
+ static double v;
+ static int n11, n12, ii, nn, ks, ic1, ic2, nc1, nn1;
+ static int nr1, nco;
+ static double val;
+ static int nct, ipn, irl, key, lev, itp, nro;
+ static double vmn;
+ static int nrt, kyy, nc1s;
+
+ /* Parameter adjustments */
+ --stv;
+ --ist;
+ --itc;
+ --nu;
+ --nt;
+ --nr;
+ --lb;
+ --it;
+ --iro;
+ --ico;
+ --icol;
+ --irow;
+
+ /* Function Body */
+ for (i = 0; i <= *ncol; ++i) {
+ alen[i] = 0.;
+ }
+ for (i = 1; i <= 400; ++i) {
+ ist[i] = -1;
+ }
+ /* nrow is 1 */
+ if (*nrow <= 1) {
+ if (*nrow > 0) {
+ *dlp -= fact[icol[1]];
+ for (i = 2; i <= *ncol; ++i) {
+ *dlp -= fact[icol[i]];
+ }
+ }
+ return;
+ }
+ /* ncol is 1 */
+ if (*ncol <= 1) {
+ if (*ncol > 0) {
+ *dlp = *dlp - fact[irow[1]] - fact[irow[2]];
+ for (i = 3; i <= *nrow; ++i) {
+ *dlp -= fact[irow[i]];
+ }
+ }
+ return;
+ }
+ /* 2 by 2 table */
+ if (*nrow * *ncol == 4) {
+ n11 = (irow[1] + 1) * (icol[1] + 1) / (*mm + 2);
+ n12 = irow[1] - n11;
+ *dlp = *dlp - fact[n11] - fact[n12] - fact[icol[1] - n11]
+ - fact[icol[2] - n12];
+ return;
+ }
+ /* Test for optimal table */
+ val = 0.;
+ xmin = (0);
+ if (irow[*nrow] <= irow[1] + *ncol) {
+ f10act(nrow, &irow[1], ncol, &icol[1], &val, &xmin, fact,
+ &lb[1], &nu[1], &nr[1]);
+ }
+ if (! xmin) {
+ if (icol[*ncol] <= icol[1] + *nrow) {
+ f10act(ncol, &icol[1], nrow, &irow[1], &val, &xmin, fact,
+ &lb[1], &nu[1], &nr[1]);
+ }
+ }
+
+ if (xmin) {
+ *dlp -= val;
+ return;
+ }
+ /* Setup for dynamic programming */
+ nn = *mm;
+ /* Minimize ncol */
+ if (*nrow >= *ncol) {
+ nro = *nrow;
+ nco = *ncol;
+ for (i = 1; i <= *nrow; ++i) {
+ iro[i] = irow[i];
+ }
+ ico[1] = icol[1];
+ nt[1] = nn - ico[1];
+ for (i = 2; i <= *ncol; ++i) {
+ ico[i] = icol[i];
+ nt[i] = nt[i - 1] - ico[i];
+ }
+ } else {
+ nro = *ncol;
+ nco = *nrow;
+ ico[1] = irow[1];
+ nt[1] = nn - ico[1];
+ for (i = 2; i <= *nrow; ++i) {
+ ico[i] = irow[i];
+ nt[i] = nt[i - 1] - ico[i];
+ }
+ for (i = 1; i <= *ncol; ++i)
+ iro[i] = icol[i];
+ }
+ /* Initialize pointers */
+ vmn = 1e10;
+ nc1s = nco - 1;
+ irl = 1;
+ ks = 0;
+ k = ldst;
+ kyy = ico[nco] + 1;
+
+ LnewNode: /* Setup to generate new node */
+
+ lev = 1;
+ nr1 = nro - 1;
+ nrt = iro[irl];
+ nct = ico[1];
+ lb[1] = (int) ((double) ((nrt + 1) * (nct + 1)) /
+ (double) (nn + nr1 * nc1s + 1) - *tol) - 1;
+ nu[1] = (int) ((double) ((nrt + nc1s) * (nct + nr1)) /
+ (double) (nn + nr1 + nc1s)) - lb[1] + 1;
+ nr[1] = nrt - lb[1];
+
+ LoopNode: /* Generate a node */
+ --nu[lev];
+ if (nu[lev] == 0) {
+ if (lev == 1)
+ goto L200;
+
+ --lev;
+ goto LoopNode;
+ }
+ ++lb[lev];
+ --nr[lev];
+L120:
+ alen[lev] = alen[lev - 1] + fact[lb[lev]];
+ if (lev < nc1s) {
+ nn1 = nt[lev];
+ nrt = nr[lev];
+ ++lev;
+ nc1 = nco - lev;
+ nct = ico[lev];
+ lb[lev] = (int) ((double) ((nrt + 1) * (nct + 1)) /
+ (double) (nn1 + nr1 * nc1 + 1) - *tol);
+ nu[lev] = (int) ((double) ((nrt + nc1) * (nct + nr1)) /
+ (double) (nn1 + nr1 + nc1) - lb[lev] + 1);
+ nr[lev] = nrt - lb[lev];
+ goto L120;
+ }
+ alen[nco] = alen[lev] + fact[nr[lev]];
+ lb[nco] = nr[lev];
+
+ v = val + alen[nco];
+ if (nro == 2) {
+ /* Only 1 row left */
+ v = v + fact[ico[1] - lb[1]] + fact[ico[2] - lb[2]];
+ for (i = 3; i <= nco; ++i) {
+ v += fact[ico[i] - lb[i]];
+ }
+ if (v < vmn) {
+ vmn = v;
+ }
+ } else if (nro == 3 && nco == 2) {
+ /* 3 rows and 2 columns */
+ nn1 = nn - iro[irl] + 2;
+ ic1 = ico[1] - lb[1];
+ ic2 = ico[2] - lb[2];
+ n11 = (iro[irl + 1] + 1) * (ic1 + 1) / nn1;
+ n12 = iro[irl + 1] - n11;
+ v = v + fact[n11] + fact[n12] + fact[ic1 - n11]
+ + fact[ic2 - n12];
+ if (v < vmn) {
+ vmn = v;
+ }
+ } else {
+ /* Column marginals are new node */
+ for (i = 1; i <= nco; ++i) {
+ it[i] = ico[i] - lb[i];
+ }
+ /* Sort column marginals */
+ if (nco == 2) {
+ if (it[1] > it[2]) {
+ ii = it[1];
+ it[1] = it[2];
+ it[2] = ii;
+ }
+ } else if (nco == 3) {
+ ii = it[1];
+ if (ii > it[3]) {
+ if (ii > it[2]) {
+ if (it[2] > it[3]) {
+ it[1] = it[3];
+ it[3] = ii;
+ } else {
+ it[1] = it[2];
+ it[2] = it[3];
+ it[3] = ii;
+ }
+ } else {
+ it[1] = it[3];
+ it[3] = it[2];
+ it[2] = ii;
+ }
+ } else if (ii > it[2]) {
+ it[1] = it[2];
+ it[2] = ii;
+ } else if (it[2] > it[3]) {
+ ii = it[2];
+ it[2] = it[3];
+ it[3] = ii;
+ }
+ } else {
+ isort(&nco, &it[1]);
+ }
+ /* Compute hash value */
+ key = it[1] * kyy + it[2];
+ for (i = 3; i <= nco; ++i) {
+ key = it[i] + key * kyy;
+ }
+ if(key < 0)
+ error("Bug in FEXACT: gave negative key \n");
+
+ /* Table index */
+ ipn = key % ldst + 1;
+
+ /* Find empty position */
+ for (itp = ipn, ii = ks + ipn; itp <= ldst; ++itp, ++ii) {
+ if (ist[ii] < 0) {
+ goto L180;
+ } else if (ist[ii] == key) {
+ goto L190;
+ }
+ }
+
+ for (itp = 1, ii = ks + 1; itp <= ipn - 1; ++itp, ++ii) {
+ if (ist[ii] < 0) {
+ goto L180;
+ } else if (ist[ii] == key) {
+ goto L190;
+ }
+ }
+
+ error("Stack length exceeded in f3xact");
+
+ L180: /* Push onto stack */
+ ist[ii] = key;
+ stv[ii] = v;
+ ++nst;
+ ii = nst + ks;
+ itc[ii] = itp;
+ goto LoopNode;
+
+ L190: /* Marginals already on stack */
+ stv[ii] = min(v, stv[ii]);
+ }
+ goto LoopNode;
+
+
+ L200: /* Pop item from stack */
+ if (nitc > 0) {
+ /* Stack index */
+ itp = itc[nitc + k] + k;
+ --nitc;
+ val = stv[itp];
+ key = ist[itp];
+ ist[itp] = -1;
+ /* Compute marginals */
+ for (i = nco; i >= 2; --i) {
+ ico[i] = key % kyy;
+ key /= kyy;
+ }
+ ico[1] = key;
+ /* Set up nt array */
+ nt[1] = nn - ico[1];
+ for (i = 2; i <= nco; ++i)
+ nt[i] = nt[i - 1] - ico[i];
+
+ /* Test for optimality (L90) */
+ xmin = (0);
+ if (iro[nro] <= iro[irl] + nco) {
+ f10act(&nro, &iro[irl], &nco, &ico[1], &val, &xmin, fact,
+ &lb[1], &nu[1], &nr[1]);
+ }
+ if (!xmin && ico[nco] <= ico[1] + nro)
+ f10act(&nco, &ico[1], &nro, &iro[irl], &val, &xmin, fact,
+ &lb[1], &nu[1], &nr[1]);
+ if (xmin) {
+ if (vmn > val)
+ vmn = val;
+ goto L200;
+ }
+ else goto LnewNode;
+
+ } else if (nro > 2 && nst > 0) {
+ /* Go to next level */
+ nitc = nst;
+ nst = 0;
+ k = ks;
+ ks = ldst - ks;
+ nn -= iro[irl];
+ ++irl;
+ --nro;
+ goto L200;
+ }
+
+ *dlp -= vmn;
+}
+
+/*
+ -----------------------------------------------------------------------
+ Name: F4XACT
+ Purpose: Computes the longest path length for a given table.
+ Usage: CALL F4XACT (NROW, IROW, NCOL, ICOL, DSP, FACT, ICSTK,
+ NCSTK, LSTK, MSTK, NSTK, NRSTK, IRSTK, YSTK,
+ TOL)
+ Arguments:
+ NROW - The number of rows in the table.(Input)
+ IROW - Vector of length NROW containing the row sums for the
+ table. (Input)
+ NCOL - The number of columns in the table. (Input)
+ ICOL - Vector of length K containing the column sums for the
+ table. (Input)
+ DSP - The shortest path for the table.(Output)
+ FACT - Vector containing the logarithms of factorials. (Input)
+ ICSTK - NCOL by NROW+NCOL+1 work array.
+ NCSTK - Work vector of length NROW+NCOL+1.
+ LSTK - Work vector of length NROW+NCOL+1.
+ MSTK - Work vector of length NROW+NCOL+1.
+ NSTK - Work vector of length NROW+NCOL+1.
+ NRSTK - Work vector of length NROW+NCOL+1.
+ IRSTK - NROW by MAX(NROW,NCOL) work array.
+ YSTK - Work vector of length NROW+NCOL+1.
+ TOL - Tolerance. (Input)
+ -----------------------------------------------------------------------
+*/
+
+void
+f4xact(int *nrow, int *irow, int *ncol, int *icol, double *dsp,
+ double *fact, int *icstk, int *ncstk, int *lstk, int *mstk,
+ int *nstk, int *nrstk, int *irstk, double *ystk, const double *tol)
+{
+ /* System generated locals */
+ int ikh;
+
+ /* Local variables */
+ int i, j, k, l, m, n, mn, ic1, ir1, ict, irt, istk, nco, nro;
+ double y, amx;
+
+ /* Parameter adjustments */
+ irstk -= *nrow + 1;
+ --irow;
+ icstk -= *ncol + 1;
+ --icol;
+ --ncstk;
+ --lstk;
+ --mstk;
+ --nstk;
+ --nrstk;
+ --ystk;
+
+ /* Function Body */
+ /* Take care of the easy cases first */
+ if (*nrow == 1) {
+ for (i = 1; i <= *ncol; ++i) {
+ *dsp -= fact[icol[i]];
+ }
+ return;
+ }
+ if (*ncol == 1) {
+ for (i = 1; i <= *nrow; ++i) {
+ *dsp -= fact[irow[i]];
+ }
+ return;
+ }
+ if (*nrow * *ncol == 4) {
+ if (irow[2] <= icol[2]) {
+ *dsp = *dsp - fact[irow[2]] - fact[icol[1]]
+ - fact[icol[2] - irow[2]];
+ } else {
+ *dsp = *dsp - fact[icol[2]] - fact[irow[1]]
+ - fact[irow[2] - icol[2]];
+ }
+ return;
+ }
+ /* initialization before loop */
+ for (i = 1; i <= *nrow; ++i) {
+ irstk[i + *nrow] = irow[*nrow - i + 1];
+ }
+ for (j = 1; j <= *ncol; ++j) {
+ icstk[j + *ncol] = icol[*ncol - j + 1];
+ }
+
+ nro = *nrow;
+ nco = *ncol;
+ nrstk[1] = nro;
+ ncstk[1] = nco;
+ ystk[1] = 0.;
+ y = 0.;
+ istk = 1;
+ l = 1;
+ amx = 0.;
+
+ /* First LOOP */
+ do {
+ ir1 = irstk[istk * *nrow + 1];
+ ic1 = icstk[istk * *ncol + 1];
+ if (ir1 > ic1) {
+ if (nro >= nco) {
+ m = nco - 1;n = 2;
+ } else {
+ m = nro;n = 1;
+ }
+ } else if (ir1 < ic1) {
+ if (nro <= nco) {
+ m = nro - 1;n = 1;
+ } else {
+ m = nco;n = 2;
+ }
+ } else {
+ if (nro <= nco) {
+ m = nro - 1;n = 1;
+ } else {
+ m = nco - 1;n = 2;
+ }
+ }
+
+ L60:
+ if (n == 1) {
+ i = l; j = 1;
+ } else {
+ i = 1; j = l;
+ }
+
+ irt = irstk[i + istk * *nrow];
+ ict = icstk[j + istk * *ncol];
+ mn = irt;
+ if (mn > ict) {
+ mn = ict;
+ }
+ y += fact[mn];
+ if (irt == ict) {
+ --nro;
+ --nco;
+ f11act(&irstk[istk * *nrow + 1], &i, &nro,
+ &irstk[(istk + 1) * *nrow + 1]);
+ f11act(&icstk[istk * *ncol + 1], &j, &nco,
+ &icstk[(istk + 1) * *ncol + 1]);
+ } else if (irt > ict) {
+ --nco;
+ f11act(&icstk[istk * *ncol + 1], &j, &nco,
+ &icstk[(istk + 1) * *ncol + 1]);
+ ikh = irt - ict;
+ f8xact(&irstk[istk * *nrow + 1], &ikh, &i,
+ &nro, &irstk[(istk + 1) * *nrow + 1]);
+ } else {
+ --nro;
+ f11act(&irstk[istk * *nrow + 1], &i, &nro,
+ &irstk[(istk + 1) * *nrow + 1]);
+ ikh = ict - irt;
+ f8xact(&icstk[istk * *ncol + 1], &ikh, &j,
+ &nco, &icstk[(istk + 1) * *ncol + 1]);
+ }
+
+ if (nro == 1) {
+ for (k = 1; k <= nco; ++k) {
+ y += fact[icstk[k + (istk + 1) * *ncol]];
+ }
+ break;
+ }
+ if (nco == 1) {
+ for (k = 1; k <= nro; ++k) {
+ y += fact[irstk[k + (istk + 1) * *nrow]];
+ }
+ break;
+ }
+
+ lstk[istk] = l;
+ mstk[istk] = m;
+ nstk[istk] = n;
+ ++istk;
+ nrstk[istk] = nro;
+ ncstk[istk] = nco;
+ ystk[istk] = y;
+ l = 1;
+ } while(1);/* end do */
+
+ /* L90:*/
+ if (y > amx) {
+ amx = y;
+ if (*dsp - amx <= *tol) {
+ *dsp = 0.;
+ return;
+ }
+ }
+
+L100:
+ --istk;
+ if (istk == 0) {
+ *dsp -= amx;
+ if (*dsp - amx <= *tol) {
+ *dsp = 0.;
+ }
+ return;
+ }
+ l = lstk[istk] + 1;
+
+ /* L110: */
+ for(;; ++l) {
+ if (l > mstk[istk])goto L100;
+
+ n = nstk[istk];
+ nro = nrstk[istk];
+ nco = ncstk[istk];
+ y = ystk[istk];
+ if (n == 1) {
+ if (irstk[l + istk * *nrow] <
+ irstk[l - 1 + istk * *nrow])goto L60;
+ }
+ else if (n == 2) {
+ if (icstk[l + istk * *ncol] <
+ icstk[l - 1 + istk * *ncol])goto L60;
+ }
+ }
+}
+
+/*
+ -----------------------------------------------------------------------
+ Name: F5XACT
+ Purpose: Put node on stack in network algorithm.
+ Usage: CALL F5XACT (PASTP, TOL, KVAL, KEY, LDKEY, IPOIN, STP,
+ LDSTP, IFRQ, NPOIN, NR, NL, IFREQ, ITOP,
+ IPSH)
+ Arguments:
+ PASTP - The past path length.(Input)
+ TOL - Tolerance for equivalence of past path lengths. (Input)
+ KVAL - Key value. (Input)
+ KEY - Vector of length LDKEY containing the key values.(in/out)
+ LDKEY - Length of vector KEY. (Input)
+ IPOIN - Vector of length LDKEY pointing to the
+ linked list of past path lengths. (in/out)
+ STP - Vector of length LSDTP containing the
+ linked lists of past path lengths. (in/out)
+ LDSTP - Length of vector STP. (Input)
+ IFRQ - Vector of length LDSTP containing the past path
+ frequencies. (in/out)
+ NPOIN - Vector of length LDSTP containing the pointers to
+ the next past path length. (in/out)
+ NR - Vector of length LDSTP containing the right object
+ pointers in the tree of past path lengths. (in/out)
+ NL - Vector of length LDSTP containing the left object
+ pointers in the tree of past path lengths. (in/out)
+ IFREQ - Frequency of the current path length. (Input)
+ ITOP - Pointer to the top of STP. (Input)
+ IPSH - Option parameter. (Input)
+ If IPSH is true, the past path length is found in the
+ table KEY. Otherwise the location of the past path
+ length is assumed known and to have been found in
+ a previous call. ==>>>>> USING "static" variables
+ -----------------------------------------------------------------------
+*/
+
+void
+f5xact(double *pastp, const double *tol, int *kval, int *key, int *ldkey,
+ int *ipoin, double *stp, int *ldstp, int *ifrq, int *npoin,
+ int *nr, int *nl, int *ifreq, int *itop, int *ipsh)
+{
+ /* Local variables */
+ static int itmp, ird, ipn, itp;
+ double test1, test2;
+
+ /* Parameter adjustments */
+ --nl;
+ --nr;
+ --npoin;
+ --ifrq;
+ --stp;
+ --ipoin;
+ --key;
+
+ /* Function Body */
+ if (*ipsh) {
+ /* Convert KVAL to int in range 1, ..., LDKEY. */
+ ird = *kval % *ldkey + 1;
+ /* Search for an unused location */
+ for (itp = ird; itp <= *ldkey; ++itp) {
+ if (key[itp] == *kval) {
+ goto L40;
+ }
+ if (key[itp] < 0) {
+ goto L30;
+ }
+ }
+ for (itp = 1; itp <= ird - 1; ++itp) {
+ if (key[itp] == *kval) {
+ goto L40;
+ }
+ if (key[itp] < 0) {
+ goto L30;
+ }
+ }
+ /* Return if KEY array is full */
+ /* KH
+ prterr(6, "LDKEY is too small for this problem.\n"
+ "It is not possible to estimate the value of LDKEY "
+ "required,\n"
+ "but twice the current value may be sufficient.");
+ */
+ prterr(6, "LDKEY is too small for this problem.\n"
+ "Try increasing the size of the workspace.");
+
+ /* Update KEY */
+L30:
+ key[itp] = *kval;
+ ++(*itop);
+ ipoin[itp] = *itop;
+ /* Return if STP array full */
+ if (*itop > *ldstp) {
+ /* KH
+ prterr(7, "LDSTP is too small for this problem.\n"
+ "It is not possible to estimate the value of LDSTP "
+ "required,\n"
+ "but twice the current value may be sufficient.");
+ */
+ prterr(7, "LDSTP is too small for this problem.\n"
+ "Try increasing the size of the workspace.");
+ }
+ /* Update STP, etc. */
+ npoin[*itop] = -1;
+ nr[*itop] = -1;
+ nl[*itop] = -1;
+ stp[*itop] = *pastp;
+ ifrq[*itop] = *ifreq;
+ return;
+ }
+
+ /* Find location, if any, of pastp */
+L40:
+ ipn = ipoin[itp];
+ test1 = *pastp - *tol;
+ test2 = *pastp + *tol;
+
+L50:
+ if (stp[ipn] < test1) {
+ ipn = nl[ipn];
+ if (ipn > 0) {
+ goto L50;
+ }
+ } else if (stp[ipn] > test2) {
+ ipn = nr[ipn];
+ if (ipn > 0) {
+ goto L50;
+ }
+ } else {
+ ifrq[ipn] += *ifreq;
+ return;
+ }
+ /* Return if STP array full */
+ ++(*itop);
+ if (*itop > *ldstp) {
+ /*
+ prterr(7, "LDSTP is too small for this problem.\n"
+ "It is not possible to estimate the value of LDSTP "
+ "required,\n"
+ "but twice the current value may be sufficient.");
+ */
+ prterr(7, "LDSTP is too small for this problem.\n"
+ "Try increasing the size of the workspace.");
+ return;
+ }
+ /* Find location to add value */
+ ipn = ipoin[itp];
+ itmp = ipn;
+
+L60:
+ if (stp[ipn] < test1) {
+ itmp = ipn;
+ ipn = nl[ipn];
+ if (ipn > 0) {
+ goto L60;
+ } else {
+ nl[itmp] = *itop;
+ }
+ } else if (stp[ipn] > test2) {
+ itmp = ipn;
+ ipn = nr[ipn];
+ if (ipn > 0) {
+ goto L60;
+ } else {
+ nr[itmp] = *itop;
+ }
+ }
+ /* Update STP, etc. */
+ npoin[*itop] = npoin[itmp];
+ npoin[itmp] = *itop;
+ stp[*itop] = *pastp;
+ ifrq[*itop] = *ifreq;
+ nl[*itop] = -1;
+ nr[*itop] = -1;
+}
+
+/*
+ -----------------------------------------------------------------------
+ Name: F6XACT
+ Purpose: Pop a node off the stack.
+ Usage: CALL F6XACT (NROW, IROW, IFLAG, KYY, KEY, LDKEY, LAST, IPN)
+ Arguments:
+ NROW - The number of rows in the table.(Input)
+ IROW - Vector of length nrow containing the row sums on
+ output.(Output)
+ IFLAG - Set to 3 if there are no additional nodes to process.
+ (Output)
+ KYY - Constant mutlipliers used in forming the hash
+ table key.(Input)
+ KEY - Vector of length LDKEY containing the hash table
+ keys.(In/out)
+ LDKEY - Length of vector KEY.(Input)
+ LAST - Index of the last key popped off the stack.(In/out)
+ IPN - Pointer to the linked list of past path lengths.(Output)
+ -----------------------------------------------------------------------
+*/
+void
+f6xact(int *nrow, int *irow, int *iflag, int *kyy, int *key, int
+ *ldkey, int *last, int *ipn)
+{
+ int kval, j;
+
+ /* Parameter adjustments */
+ --key;
+ --kyy;
+ --irow;
+
+ /* Function Body */
+L10:
+ ++(*last);
+ if (*last <= *ldkey) {
+ if (key[*last] < 0) {
+ goto L10;
+ }
+ /* Get KVAL from the stack */
+ kval = key[*last];
+ key[*last] = -9999;
+ for (j = *nrow; j >= 2; --j) {
+ irow[j] = kval / kyy[j];
+ kval -= irow[j] * kyy[j];
+ }
+ irow[1] = kval;
+ *ipn = *last;
+ } else {
+ *last = 0;
+ *iflag = 3;
+ }
+ return;
+}
+
+/*
+ -----------------------------------------------------------------------
+ Name: F7XACT
+ Purpose: Generate the new nodes for given marinal totals.
+ Usage: CALL F7XACT (NROW, IMAX, IDIF, K, KS, IFLAG)
+ Arguments:
+ NROW - The number of rows in the table.(Input)
+ IMAX - The row marginal totals.(Input)
+ IDIF - The column counts for the new column.(in/out)
+ K - Indicator for the row to decrement.(in/out)
+ KS - Indicator for the row to increment.(in/out)
+ IFLAG - Status indicator.(Output)
+ If IFLAG is zero, a new table was generated. For
+ IFLAG = 1, no additional tables could be generated.
+ -----------------------------------------------------------------------
+*/
+
+void
+f7xact(int *nrow, int *imax, int *idif, int *k, int *ks,
+ int *iflag)
+
+{
+ int i, m, k1, mm;
+
+ /* Parameter adjustments */
+ --idif;
+ --imax;
+
+ /* Function Body */
+ *iflag = 0;
+ /* Find node which can be incremented, ks */
+ if (*ks == 0)
+ do {
+ ++(*ks);
+ } while (idif[*ks] == imax[*ks]);
+
+ /* Find node to decrement (>ks) */
+ if (idif[*k] > 0 && *k > *ks) {
+ --idif[*k];
+ do {
+ --(*k);
+ } while(imax[*k] == 0);
+
+ m = *k;
+
+ /* Find node to increment (>=ks) */
+ while (idif[m] >= imax[m]) {
+ --m;
+ }
+ ++idif[m];
+ /* Change ks */
+ if (m == *ks) {
+ if (idif[m] == imax[m]) {
+ *ks = *k;
+ }
+ }
+ }
+ else {
+ Loop:
+ /* Check for finish */
+ for (k1 = *k + 1; k1 <= *nrow; ++k1) {
+ if (idif[k1] > 0) {
+ goto L70;
+ }
+ }
+ *iflag = 1;
+ return;
+
+ L70:
+ /* Reallocate counts */
+ mm = 1;
+ for (i = 1; i <= *k; ++i) {
+ mm += idif[i];
+ idif[i] = 0;
+ }
+ *k = k1;
+
+ do {
+ --(*k);
+ m = min(mm, imax[*k]);
+ idif[*k] = m;
+ mm -= m;
+ } while (mm > 0 && *k != 1);
+
+ /* Check that all counts reallocated */
+ if (mm > 0) {
+ if (k1 != *nrow) {
+ *k = k1;
+ goto Loop;
+ }
+ *iflag = 1;
+ return;
+ }
+ /* Get ks */
+ --idif[k1];
+ *ks = 0;
+ do {
+ ++(*ks);
+ if (*ks > *k) {
+ return;
+ }
+ } while (idif[*ks] >= imax[*ks]);
+ }
+}
+
+/*
+ -----------------------------------------------------------------------
+ Name: F8XACT
+ Purpose: Routine for reducing a vector when there is a zero
+ element.
+ Usage: CALL F8XACT (IROW, IS, I1, IZERO, NEW)
+ Arguments:
+ IROW - Vector containing the row counts.(Input)
+ IS - Indicator.(Input)
+ I1 - Indicator.(Input)
+ IZERO - Position of the zero.(Input)
+ NEW - Vector of new row counts.(Output)
+ -----------------------------------------------------------------------
+*/
+
+void
+f8xact(int *irow, int *is, int *i1, int *izero, int *knew)
+{
+ int i;
+
+ /* Parameter adjustments */
+ --knew;
+ --irow;
+
+ /* Function Body */
+ for (i = 1; i < *i1; ++i)
+ knew[i] = irow[i];
+
+ for (i = *i1; i <= *izero - 1; ++i) {
+ if (*is >= irow[i + 1])
+ break;
+ knew[i] = irow[i + 1];
+ }
+
+ knew[i] = *is;
+
+ for(;;) {
+ ++i;
+ if (i > *izero)
+ return;
+ knew[i] = irow[i];
+ }
+}
+
+/*
+ -----------------------------------------------------------------------
+ Name: F9XACT
+ Purpose: Computes the log of a multinomial coefficient.
+ Usage: F9XACT(N, MM, IR, FACT)
+ Arguments:
+ N - Length of IR.(Input)
+ MM - Number for factorial in numerator.(Input)
+ IR - Vector of length N containing the numbers for
+ the denominator of the factorial.(Input)
+ FACT - Table of log factorials.(Input)
+ F9XACT - The log of the multinomal coefficient.(Output)
+ -----------------------------------------------------------------------
+*/
+
+double
+f9xact(int *n, int *mm, int *ir, double *fact)
+{
+ double d;
+ int k;
+
+ d = fact[*mm];
+ for (k = 0; k < *n; k++)
+ d -= fact[ir[k]];
+ return d;
+}
+
+/*
+ -----------------------------------------------------------------------
+ Name: F10ACT
+ Purpose: Computes the shortest path length for special tables.
+ Usage: F10ACT (NROW, IROW, NCOL, ICOL, VAL, XMIN, FACT, ND, NE, M)
+ Arguments:
+ NROW - The number of rows in the table.(Input)
+ IROW - Vector of length NROW containing the row totals.(Input)
+ NCOL - The number of columns in the table. (Input)
+ ICO - Vector of length NCOL containing the column totals.(Input)
+ VAL - The shortest path. (Output)
+ XMIN - Set to true if shortest path obtained. (Output)
+ FACT - Vector containing the logarithms of factorials. (Input)
+ ND - Workspace vector of length NROW.(Input)
+ NE - Workspace vector of length NCOL.(Input)
+ M - Workspace vector of length NCOL.(Input)
+
+ Chapter: STAT/LIBRARY Categorical and Discrete Data Analysis
+ -----------------------------------------------------------------------
+*/
+
+void
+f10act(int *nrow, int *irow, int *ncol, int *icol, double *val,
+ int *xmin, double *fact, int *nd, int *ne, int *m)
+{
+ /* Local variables */
+ int i, is, ix, nrw1;
+
+ /* Parameter adjustments */
+ --m;
+ --ne;
+ --nd;
+ --icol;
+ --irow;
+
+ /* Function Body */
+ for (i = 1; i <= *nrow - 1; ++i)
+ nd[i] = 0;
+
+ is = icol[1] / *nrow;
+ ix = icol[1] - *nrow * is;
+ ne[1] = is;
+ m[1] = ix;
+ if (ix != 0)
+ ++nd[ix];
+
+ for (i = 2; i <= *ncol; ++i) {
+ ix = icol[i] / *nrow;
+ ne[i] = ix;
+ is += ix;
+ ix = icol[i] - *nrow * ix;
+ m[i] = ix;
+ if (ix != 0)
+ ++nd[ix];
+ }
+
+ for (i = *nrow - 2; i >= 1; --i)
+ nd[i] += nd[i + 1];
+
+ ix = 0;
+ nrw1 = *nrow + 1;
+ for (i = *nrow; i >= 2; --i) {
+ ix = ix + is + nd[nrw1 - i] - irow[i];
+ if (ix < 0)
+ return;
+ }
+
+ for (i = 1; i <= *ncol; ++i) {
+ ix = ne[i];
+ is = m[i];
+ *val = *val + is * fact[ix + 1] + (*nrow - is) * fact[ix];
+ }
+ *xmin = (1);
+
+ return;
+}
+
+/*
+ -----------------------------------------------------------------------
+ Name: F11ACT
+ Purpose: Routine for revising row totals.
+ Usage: CALL F11ACT (IROW, I1, I2, KNEW)
+ Arguments:
+ IROW - Vector containing the row totals.(Input)
+ I1 - Indicator.(Input)
+ I2 - Indicator. (Input)
+ KNEW - Vector containing the row totals.(Output)
+ -----------------------------------------------------------------------
+*/
+void
+f11act(int *irow, int *i1, int *i2, int *knew)
+{
+ int i;
+
+ /* Parameter adjustments */
+ --knew;
+ --irow;
+
+ for (i = 1; i <= (*i1 - 1); ++i)knew[i] = irow[i];
+ for (i = *i1; i <= *i2; ++i)knew[i] = irow[i + 1];
+
+ return;
+}
+
+void prterr(int icode, char *mes)
+{ error("FEXACT error "+int2str(icode)); }
+
+/*
+ -----------------------------------------------------------------------
+ Name: iwork
+ Purpose: Routine for allocating workspace.
+ Usage: iwork (iwkmax, iwkpt, number, itype)
+ Arguments:
+ iwkmax - Maximum (int) amount of workspace.(Input)
+ iwkpt - Amount of (int) workspace currently allocated.(in/out)
+ number - Number of elements of workspace desired.(Input)
+ itype - Workspace type.(Input)
+ ITYPE TYPE
+ 2 integer
+ 3 float
+ 4 double
+ iwork(): Index in rwrk, dwrk, or iwrk of the beginning of
+ the first free element in the workspace array.(Output)
+ -----------------------------------------------------------------------
+*/
+int
+iwork(int iwkmax, int *iwkpt, int number, int itype)
+{
+ int i;
+
+ i = *iwkpt;
+ if (itype == 2 || itype == 3)
+ *iwkpt += number;
+ else { /* double */
+ if (i % 2 != 0)
+ ++i;
+ *iwkpt += (number << 1);
+ i /= 2;
+ }
+ if (*iwkpt > iwkmax)
+ prterr(40, "Out of workspace.");
+
+ return i;
+}
+
+#ifndef USING_R
+
+void isort(int *n, int *ix)
+{
+ /*
+ -----------------------------------------------------------------------
+ Name: ISORT
+ Purpose: Shell sort for an int vector.
+ Usage: CALL ISORT (N, IX)
+ Arguments:
+ N - Lenth of vector IX.(Input)
+ IX - Vector to be sorted.(in/out)
+ -----------------------------------------------------------------------
+ */
+ static int ikey, i, j, m, il[10], kl, it, iu[10], ku;
+
+ /* Parameter adjustments */
+ --ix;
+
+ /* Function Body */
+ m = 1;
+ i = 1;
+ j = *n;
+
+L10:
+ if (i >= j) {
+ goto L40;
+ }
+ kl = i;
+ ku = j;
+ ikey = i;
+ ++j;
+ /* Find element in first half */
+L20:
+ ++i;
+ if (i < j) {
+ if (ix[ikey] > ix[i]) {
+ goto L20;
+ }
+ }
+ /* Find element in second half */
+L30:
+ --j;
+ if (ix[j] > ix[ikey]) {
+ goto L30;
+ }
+ /* Interchange */
+ if (i < j) {
+ it = ix[i];
+ ix[i] = ix[j];
+ ix[j] = it;
+ goto L20;
+ }
+ it = ix[ikey];
+ ix[ikey] = ix[j];
+ ix[j] = it;
+ /* Save upper and lower subscripts of the array yet to be sorted */
+ if (m < 11) {
+ if (j - kl < ku - j) {
+ il[m - 1] = j + 1;
+ iu[m - 1] = ku;
+ i = kl;
+ --j;
+ } else {
+ il[m - 1] = kl;
+ iu[m - 1] = j - 1;
+ i = j + 1;
+ j = ku;
+ }
+ ++m;
+ goto L10;
+ } else {
+ prterr(20, "This should never occur.");
+ }
+ /* Use another segment */
+L40:
+ --m;
+ if (m == 0) {
+ return;
+ }
+ i = il[m - 1];
+ j = iu[m - 1];
+ goto L10;
+}
+
+double gammds(double *y, double *p, int *ifault)
+{
+ /*
+ -----------------------------------------------------------------------
+ Name: GAMMDS
+ Purpose: Cumulative distribution for the gamma distribution.
+ Usage: PGAMMA (Q, ALPHA,IFAULT)
+ Arguments:
+ Q - Value at which the distribution is desired. (Input)
+ ALPHA - Parameter in the gamma distribution. (Input)
+ IFAULT - Error indicator.(Output)
+ IFAULT DEFINITION
+ 0 No error
+ 1 An argument is misspecified.
+ 2 A numerical error has occurred.
+ PGAMMA - The cdf for the gamma distribution with parameter alpha
+ evaluated at Q. (Output)
+ -----------------------------------------------------------------------
+
+ Algorithm AS 147 APPL. Statist. (1980) VOL. 29, P. 113
+
+ Computes the incomplete gamma integral for positive parameters Y, P
+ using and infinite series.
+ */
+
+ static double a, c, f, g;
+ static int ifail;
+
+ /* Checks for the admissibility of arguments and value of F */
+ *ifault = 1;
+ g = 0.;
+ if (*y <= 0. || *p <= 0.) {
+ return g;
+ }
+ *ifault = 2;
+
+ /*
+ ALOGAM is natural log of gamma function no need to test ifail as
+ an error is impossible
+ */
+
+ a = *p + 1.;
+ f = exp(*p * log(*y) - alogam(&a, &ifail) - *y);
+ if (f == 0.) {
+ return g;
+ }
+ *ifault = 0;
+
+ /* Series begins */
+ c = 1.;
+ g = 1.;
+ a = *p;
+L10:
+ a += 1.;
+ c = c * *y / a;
+ g += c;
+ if (c / g > 1e-6) {
+ goto L10;
+ }
+ g *= f;
+ return g;
+}
+
+/*
+ -----------------------------------------------------------------------
+ Name: ALOGAM
+ Purpose: Value of the log-gamma function.
+ Usage: ALOGAM (X, IFAULT)
+ Arguments:
+ X - Value at which the log-gamma function is to be evaluated.
+ (Input)
+ IFAULT - Error indicator. (Output)
+ IFAULT DEFINITION
+ 0 No error
+ 1 X < 0
+ ALGAMA - The value of the log-gamma function at XX. (Output)
+ -----------------------------------------------------------------------
+
+ Algorithm ACM 291, Comm. ACM. (1966) Vol. 9, P. 684
+
+ Evaluates natural logarithm of gamma(x) for X greater than zero.
+*/
+
+double alogam(double *x, int *ifault)
+{
+ /* Initialized data */
+
+ static double a1 = .918938533204673;
+ static double a2 = 5.95238095238e-4;
+ static double a3 = 7.93650793651e-4;
+ static double a4 = .002777777777778;
+ static double a5 = .083333333333333;
+
+ /* Local variables */
+ static double f, y, z;
+
+ *ifault = 1;
+ if (*x < 0.) {
+ return(0.);
+ }
+ *ifault = 0;
+ y = *x;
+ f = 0.;
+ if (y >= 7.) {
+ goto L30;
+ }
+ f = y;
+L10:
+ y += 1.;
+ if (y >= 7.) {
+ goto L20;
+ }
+ f *= y;
+ goto L10;
+L20:
+ f = -log(f);
+L30:
+ z = 1. / (y * y);
+ return(f + (y - .5) * log(y) - y + a1 +
+ (((-a2 * z + a3) * z - a4) * z + a5) / y);
+}
+
+#endif /* not USING_R */
+
Added: trunk/packages/plink/branches/upstream/current/nlist.cpp
===================================================================
--- trunk/packages/plink/branches/upstream/current/nlist.cpp (rev 0)
+++ trunk/packages/plink/branches/upstream/current/nlist.cpp 2008-04-09 09:42:37 UTC (rev 1721)
@@ -0,0 +1,258 @@
+
+
+//////////////////////////////////////////////////////////////////
+// //
+// PLINK (c) 2005-2007 Shaun Purcell //
+// //
+// This file is distributed under the GNU General Public //
+// License, Version 2. Please see the file COPYING for more //
+// details //
+// //
+//////////////////////////////////////////////////////////////////
+
+#include <string>
+#include <vector>
+
+#include "nlist.h"
+
+vector<int> NList::deparseNumberList(string input)
+{
+
+ // We have a string that should contain integers, possibily
+ // separated by "," and "-" characters
+
+ // Use 1-N coding up until the last moment, instead of
+ // standard 0..N-1 coding
+
+ firstWord = "1";
+ lastWord = int2str(maxcat);
+
+ vector<string> tok = tokenize(input);
+ vector<int> nums;
+ for (int i=0; i<tok.size(); i++)
+ {
+ int j;
+ if ( tok[i] == range_string )
+ nums.push_back(-1);
+ else
+ {
+ if ( ! from_string<int>( j, tok[i] , std::dec))
+ continue;
+ nums.push_back(j);
+ }
+ }
+ return expandNumberList(nums);
+}
+
+
+vector<int> NList::deparseStringList(string input, map<string,int> * mapping)
+{
+
+ // Assume map contains a 0..N-1 coding in the map
+
+ map<string,int>::iterator im = mapping->begin();
+ while ( im != mapping->end() )
+ {
+ int i = im->second;
+
+ if ( i == 0 )
+ firstWord = im->first;
+
+ if ( i == maxcat - 1 )
+ lastWord = im->first;
+
+ ++im;
+ }
+
+
+ ////////////////////////////////////////////////////////////////
+ // Convert string codes to numbers, then call deparseNumberList
+
+ // But incrememnt to 1..N coding here, i.e. so that it is
+ // similar process to the human-friendly 1..N coding for a
+ // numeric list
+
+ vector<string> tok = tokenize(input);
+ vector<int> nums;
+ for (int i=0; i<tok.size(); i++)
+ {
+ map<string,int>::iterator im = mapping->find(tok[i]);
+ if ( im != mapping->end() )
+ nums.push_back(im->second + 1);
+ else if ( tok[i] == range_string )
+ nums.push_back(-1);
+ else
+ error("Cannot find value: " + tok[i] + "\n");
+ }
+ return expandNumberList(nums);
+}
+
+
+
+vector<int> NList::expandNumberList(vector<int> & nlist)
+{
+
+ // Convert
+ vector<int> n;
+
+ //////////////////
+ // Check ranges
+
+ for (int i=0; i<nlist.size(); i++)
+ {
+ if ( nlist[i] == -1 )
+ {
+ if ( nlist[i-1] > nlist[i+1] )
+ {
+ int tmp = nlist[i+1];
+ nlist[i+1] = nlist[i-1];
+ nlist[i-1] = tmp;
+ }
+ }
+ }
+
+
+ //////////////////
+ // Expand ranges
+
+ for (int i=0; i<nlist.size(); i++)
+ {
+
+ if ( nlist[i]>0 )
+ {
+ // Only add valid codes
+ if ( nlist[i] <= maxcat )
+ n.push_back(nlist[i]);
+ }
+ else
+ {
+ int start = nlist[i-1]+1;
+ int end = nlist[i+1]-1;
+ if ( end > maxcat )
+ end = maxcat;
+
+ for (int j=start; j<=end; j++)
+ n.push_back(j);
+ }
+ }
+
+ // Sort and uniquify
+ stable_sort(n.begin(),n.end());
+ vector<int>::iterator ne = unique(n.begin(),n.end());
+ n.erase(ne, n.end());
+
+ // Shift to 0..N-1 coding
+ for (int i=0; i<n.size(); i++)
+ --n[i];
+
+
+ // Or is it a negative complement?
+ if ( negmode )
+ {
+
+ vector<bool> k(maxcat,false);
+ for (int i=0; i<n.size(); i++)
+ k[ n[i] ] = true;
+
+ vector<int> n2 = n;
+ n.clear();
+
+ for (int i=0; i<maxcat; i++)
+ {
+ if ( ! k[i] )
+ {
+ n.push_back(i);
+ }
+ }
+ }
+
+ return n;
+
+}
+
+
+vector<string> NList::tokenize(string s)
+{
+
+ vector<string> t;
+
+ string word = "";
+ bool range_set = false;
+ bool number_given = false;
+
+ for (int i=0; i<s.length(); i++)
+ {
+
+ // Skip spaces
+ if (s[i] == ' ')
+ continue;
+
+ if ( s[i] == ',' || s[i] == range_char )
+ {
+
+ // If we've built up a meaningful word,
+ // we can add it now
+
+ if ( word != "" )
+ {
+ t.push_back(word);
+ word = "";
+ number_given = true;
+ range_set = false;
+ }
+
+
+ // Start a range?
+ if ( s[i]== range_char )
+ {
+
+ // Can't do twice
+ if (range_set)
+ error("Invalid list: " + s + "\n");
+
+ range_set = true;
+
+ // But was a start point given? Make one
+ // if not
+
+ if ( ! number_given )
+ {
+ t.push_back("1");
+ word="";
+ }
+
+ number_given = false;
+
+ t.push_back(range_string);
+
+ }
+
+ // And if the "-" is also the last
+ // character, we need an end point
+
+ if ( s[i]== range_char && i == s.length()-1 )
+ t.push_back( lastWord );
+
+ // Here, fill in here
+ if ( s[i]==',')
+ {
+ number_given = false;
+ if ( range_set )
+ {
+ range_set = false;
+ t.push_back( lastWord );
+ }
+ }
+ }
+ else if ( i == s.length()-1 )
+ {
+ // Is this a last number?
+ t.push_back(word+s[i]);
+ number_given = true;
+ }
+ else
+ word += s[i];
+ }
+
+ return t;
+}
Added: trunk/packages/plink/branches/upstream/current/parse.cpp
===================================================================
--- trunk/packages/plink/branches/upstream/current/parse.cpp (rev 0)
+++ trunk/packages/plink/branches/upstream/current/parse.cpp 2008-04-09 09:42:37 UTC (rev 1721)
@@ -0,0 +1,3386 @@
+
+
+//////////////////////////////////////////////////////////////////
+// //
+// PLINK (c) 2005-2008 Shaun Purcell //
+// //
+// This file is distributed under the GNU General Public //
+// License, Version 2. Please see the file COPYING for more //
+// details //
+// //
+//////////////////////////////////////////////////////////////////
+
+
+#include <iostream>
+#include <string>
+#include <cmath>
+#include "options.h"
+#include "helper.h"
+
+
+void getOutputFilename(CArgs & a)
+{
+
+ ////////////////////////////////////////
+ // Insert commands from a file
+
+ if (a.find("--script"))
+ {
+ string f = a.value("--script");
+ a.fromScript(f);
+ }
+
+ if (a.find("--rerun"))
+ {
+ string f = a.value("--rerun");
+ a.fromPriorLog(f);
+ }
+
+ if (a.find("--out"))
+ {
+ par::output_file_name = a.value("--out");
+ }
+
+ if (a.find("--gplink"))
+ {
+ if ( par::cli )
+ error("Cannot specify --gplink and --interactive");
+ par::silent = true;
+ par::gplink = true;
+ }
+
+ if (a.find("--silent"))
+ {
+ if ( par::cli )
+ error("Cannot specify --silent and --interactive");
+
+ par::silent = true;
+ }
+
+ if (a.find("--interactive"))
+ {
+ if ( a.find("--script") )
+ error("Cannot specify --script and --interactive");
+ par::cli = true;
+ }
+}
+
+void setOptions(CArgs & a)
+{
+
+
+ /////////////////////////////////////
+ // Web-based functions
+
+ if (a.find("--noweb")) par::web_check = false;
+
+ if (a.find("--lookup"))
+ {
+ par::lookup = true;
+ par::lookup_single_snp = true;
+ par::lookup_snp = a.value("--lookup");
+ }
+
+ if (a.find("--lookup-list"))
+ {
+ par::lookup = true;
+ par::lookup_to_file = true;
+ par::lookup_single_snp = false;
+ par::lookup_snp = a.value("--lookup-list");
+ }
+
+ if (a.find("--lookup-save"))
+ par::lookup_to_file = true;
+
+ if (a.find("--lookup-gene"))
+ {
+ par::lookup = true;
+ par::lookup_to_file = true;
+ par::lookup_gene = true;
+ par::lookup_gene_name = a.value("--lookup-gene");
+ }
+
+ if (a.find("--R"))
+ {
+ par::run_R_script = true;
+ par::R_script = a.value("--R");
+ }
+
+ if (a.find("--R-debug"))
+ par::run_R_write_script = true;
+
+ if (a.find("--R-chisq"))
+ par::run_R_chisq = true;
+
+ if (a.find("--R-z"))
+ par::run_R_z = true;
+
+ if (a.find("--R-nsnps"))
+ par::run_R_nsnps = a.value_int("--R-nsnps");
+
+ if (a.find("--seed"))
+ par::random_seed = a.value_lui("--seed");
+
+ ////////////////////////////////////////
+ // A data-generation option?
+
+ bool makedata = false;
+
+ if (a.find("--recode"))
+ {
+ makedata = true;
+ par::recode = true;
+ // and unless otherwise specified, set GENO = 1 and MAF = 0
+ if (!a.find("--maf")) par::min_af = 0.0;
+ if (!a.find("--geno")) par::MAX_GENO_MISSING = 1;
+ if (!a.find("--mind")) par::MAX_IND_MISSING = 1;
+ }
+
+ if (a.find("--recode12"))
+ {
+ makedata = true;
+ par::recode_12 = true;
+ // and unless otherwise specified, set GENO = 1 and MAF = 0
+ if (!a.find("--maf")) par::min_af = 0.0;
+ if (!a.find("--geno")) par::MAX_GENO_MISSING = 1;
+ if (!a.find("--mind")) par::MAX_IND_MISSING = 1;
+ }
+
+ if (a.find("--recodeHV"))
+ {
+ makedata = true;
+ par::recode_HV = true;
+ // and unless otherwise specified, set GENO = 1 and MAF = 0
+ if (!a.find("--maf")) par::min_af = 0.0;
+ if (!a.find("--geno")) par::MAX_GENO_MISSING = 1;
+ if (!a.find("--mind")) par::MAX_IND_MISSING = 1;
+ }
+
+ if (a.find("--recode-whap"))
+ {
+ makedata = true;
+ par::recode_whap = true;
+ // and unless otherwise specified, set GENO = 1 and MAF = 0
+ if (!a.find("--maf")) par::min_af = 0.0;
+ if (!a.find("--geno")) par::MAX_GENO_MISSING = 1;
+ if (!a.find("--mind")) par::MAX_IND_MISSING = 1;
+ }
+
+ if (a.find("--recode-fastphase"))
+ {
+ makedata = true;
+ par::recode_fastphase = true;
+ // and unless otherwise specified, set GENO = 1 and MAF = 0
+ if (!a.find("--maf")) par::min_af = 0.0;
+ if (!a.find("--geno")) par::MAX_GENO_MISSING = 1;
+ if (!a.find("--mind")) par::MAX_IND_MISSING = 1;
+ }
+
+ if (a.find("--recode-structure"))
+ {
+ makedata = true;
+ par::recode_structure = true;
+ // and unless otherwise specified, set GENO = 1 and MAF = 0
+ if (!a.find("--maf")) par::min_af = 0.0;
+ if (!a.find("--geno")) par::MAX_GENO_MISSING = 1;
+ if (!a.find("--mind")) par::MAX_IND_MISSING = 1;
+ }
+
+ if (a.find("--recode-bimbam"))
+ {
+ makedata = true;
+ par::recode_bimbam = true;
+ // and unless otherwise specified, set GENO = 1 and MAF = 0
+ if (!a.find("--maf")) par::min_af = 0.0;
+ if (!a.find("--geno")) par::MAX_GENO_MISSING = 1;
+ if (!a.find("--mind")) par::MAX_IND_MISSING = 1;
+ }
+
+ if (a.find("--recodeAD"))
+ {
+ makedata = true;
+ par::recode_AD = true;
+ // and unless otherwise specified, set GENO = 1 and MAF = 0
+ if (!a.find("--maf")) par::min_af = 0.0;
+ if (!a.find("--geno")) par::MAX_GENO_MISSING = 1;
+ if (!a.find("--mind")) par::MAX_IND_MISSING = 1;
+ }
+
+
+ if (a.find("--recodeA"))
+ {
+ makedata = true;
+ par::recode_AD = true;
+ par::recode_AD_Aonly = true;
+ // and unless otherwise specified, set GENO = 1 and MAF = 0
+ if (!a.find("--maf")) par::min_af = 0.0;
+ if (!a.find("--geno")) par::MAX_GENO_MISSING = 1;
+ if (!a.find("--mind")) par::MAX_IND_MISSING = 1;
+ }
+
+ if (a.find("--recode-allele"))
+ {
+ if ( ! par::recode_AD )
+ error("Must specify --recodeA or --recodeAD with --recode-allele");
+ par::recode_allele_coding = true;
+ par::recode_allele_coding_file = a.value("--recode-allele");
+ }
+
+ if (a.find("--make-bed"))
+ {
+ makedata = true;
+ par::write_bitfile = true;
+
+ // and unless otherwise specified, set GENO = 1 and MAF = 0
+ if (!a.find("--maf")) par::min_af = 0.0;
+ if (!a.find("--geno")) par::MAX_GENO_MISSING = 1;
+ if (!a.find("--mind")) par::MAX_IND_MISSING = 1;
+ }
+
+
+ //////////////////////////////////////////////
+ // Modifiers of main data generation commands
+
+ if (a.find("--alleleACGT")) par::recode_ACGT = true;
+
+ if (a.find("--allele1234")) par::recode_1234 = true;
+
+ if (a.find("--transpose"))
+ {
+ if ( ! ( a.find("--recode") || a.find("--recode12") ) )
+ error("--transpose requires --recode or --recode12");
+ par::recode_transpose = true;
+ }
+
+ if (a.find("--tab"))
+ {
+ if ( ! makedata )
+ error("You can only specify --tab with a --recode* option");
+ par::recode_delimit = "\t";
+ }
+
+
+ ////////////////////////////////////////////////////
+ // Convenience functions and obligatory missingness
+
+ // Zero out genotypes for a particular SNP / cluster
+
+ if ( a.find("--zero-cluster") )
+ {
+ if ( ! a.find("--within") )
+ error("You must specify --within with --zero-cluster");
+ par::zero_cluster = true;
+ par::zero_cluster_filename = a.value("--zero-cluster");
+ }
+
+ // Perform genotyping rate calculations, but allow for
+ // obligatory missingness
+
+ if ( a.find("--oblig-missing") )
+ {
+ if ( ! a.find("--oblig-clusters") )
+ error("You must specify --oblig-clusters with --oblig-missing");
+ par::oblig_missing = true;
+ par::oblig_missing_filename = a.value("--oblig-missing");
+ }
+
+ if ( a.find("--oblig-clusters") )
+ {
+ if ( ! a.find("--oblig-missing") )
+ error("You must specify --oblig-missing with --oblig-clusters");
+ par::oblig_clusters_filename = a.value("--oblig-clusters");
+ }
+
+ // Multiple category phenotype
+
+ if ( a.find("--loop-assoc") )
+ {
+ if ( a.find("--within") )
+ error("You cannot specify --within with --loop-assoc");
+
+ par::all_pheno = true;
+ par::assoc_test = true;
+ par::loop_over = true;
+ par::include_cluster_from_file = true;
+ par::include_cluster_filename = a.value("--loop-assoc");
+ }
+
+
+
+ ////////////////////////////////////////
+ // TODO list
+
+ if (a.find("--todo"))
+ {
+ cout << "TODO list\n"
+ << " * Make --min/--max thresholds apply w/ read-segment\n"
+ << " * Odds ratio for DFAM\n"
+ << " * X chromosome phasing\n"
+ << " * Check Hotelling's T2 imputation of missing genotypes\n"
+ << " * Add improved familial phasing to haplotype tests\n"
+ << " * Add OR and CI to haplotype tests\n"
+ << " * Add CMH and/or permutation to haplotype tests\n"
+ << " \n"
+ << " * Include weighted association test, for haplotypes (effN)\n"
+ << " * Phasing option to merge with current data\n"
+ << " \n"
+ << " * Check new X models\n"
+ << " * Check new MM tests\n"
+ << " \n";
+ exit(0);
+ }
+
+
+ ///////////////////////////////////////
+ // Output options
+
+ if (a.find("--flag")) par::flag = true;
+
+ if (a.find("--verbose")) par::verbose = true;
+
+ if (a.find("--pedigree")) par::dumpped = true;
+
+ if (a.find("--debug")) par::debug = true;
+
+
+ ///////////////////////////////////////
+ // IBD analyses, output options
+
+ if (a.find("--multi")) par::multi_output = true;
+
+ if (a.find("--gmulti")) par::gmulti_output = true;
+
+ if (a.find("--summarise-ibd")) par::summary_ibd_output = true;
+
+ if (a.find("--genome"))
+ {
+ // By default, include everybody
+ if (!a.find("--min"))
+ {
+ par::MIN_PIHAT = 0;
+ par::pihat_filter = false;
+ }
+
+ par::genome_output = true;
+ }
+
+ if (a.find("--impossible"))
+ {
+ if (!a.find("--genome"))
+ error("Can only specify --impossible with --genome");
+ par::show_impossible_IBD = false;
+ }
+
+ if (a.find("--nudge"))
+ {
+ if (!a.find("--genome"))
+ error("Can only specify --nudge with --genome");
+ if (a.find("--impossible"))
+ error("Cannot specify --impossible and --nudge together");
+ par::nudge = true;
+ }
+
+ if (a.find("--unbounded"))
+ {
+ par::bound = false;
+ par::MIN_PIHAT = -1;
+ par::MAX_PIHAT = 1;
+ }
+
+ if (a.find("--genome-lists"))
+ {
+ if ( ! ( a.find("--genome") || a.find("--segment") ) )
+ error("Must specify --genome or --segment with --genome-lists");
+ par::genome_2sets = true;
+ vector<string> s = a.value("--genome-lists",2);
+ par::genome_setlist1 = s[0];
+ par::genome_setlist2 = s[1];
+ }
+
+ if (a.find("--segment-within"))
+ {
+ if ( !a.find("--within"))
+ error("You need to specify a --within {clusterfile}");
+ par::IBD_within = true;
+ }
+
+ if (a.find("--genome-test"))
+ {
+ par::genome_test = true;
+ par::genome_test_threshold = a.value_double("--genome-test");
+ par::plink = true;
+ par::permute = true;
+ par::adaptive_perm = false;
+ par::replicates = 100000;
+ }
+
+ if (a.find("--ibs-test"))
+ {
+ par::ibs_test = true;
+ par::permute = true;
+ par::adaptive_perm = false;
+ par::replicates = 100000;
+ }
+
+ if (a.find("--ibs-test2"))
+ {
+ par::ibs_test = true;
+ par::ibs_test_method2 = true;
+ par::permute = true;
+ par::adaptive_perm = false;
+ par::replicates = 100000;
+ }
+
+
+ if (a.find("--segment-match-snp"))
+ par::genome_test_min_snp = a.value_int("--segment-match-snp");
+
+
+
+
+ ///////////////////////////////////////////
+ // WGAS main options: summary stats and QC
+
+
+ if (a.find("--missing")) { par::report_missing = true; }
+
+ if (a.find("--mendel")) { par::MENDEL_report = true; }
+
+ if (a.find("--hardy")) par::HWD_report = true;
+
+ if (a.find("--hardy2")) par::HWD_report = par::HWD_standard = true;
+
+ if (a.find("--het")) par::inbreeding = true;
+
+ if (a.find("--check-sex"))
+ par::check_sex = true;
+
+ if (a.find("--impute-sex"))
+ par::check_sex = par::impute_sex = true;
+
+ if (a.find("--test-missing")) { par::test_missing = true; }
+
+ if (a.find("--test-mishap")) { par::mishap_test = true; }
+
+ if (a.find("--mishap-window"))
+ {
+ par::mishap_test = true;
+ par::mishap_window = a.value_int("--mishap-window");
+ }
+
+ if (a.find("--score"))
+ {
+ par::score_risk = true;
+ par::score_risk_file = a.value("--score");
+ }
+
+
+
+ ////////////////////////////////////////////////////////////////
+ // Proxy association methods
+
+ if (a.find("--proxy-assoc"))
+ {
+ par::proxy_assoc = true;
+ par::proxy_CC = true;
+ par::proxy_assoc_snp = a.value("--proxy-assoc");
+ if ( par::proxy_assoc_snp == "all" || par::proxy_assoc_snp == "ALL" )
+ par::proxy_all = true;
+
+ if ( a.find("--proxy-glm") )
+ par::proxy_glm = true;
+ }
+
+
+ if (a.find("--proxy-drop"))
+ {
+ if ( a.find("--proxy-impute"))
+ error("Cannot have --proxy-drop and --proxy-impute (already implied)");
+ par::proxy_leave_out = true;
+ }
+
+ if (a.find("--proxy-exclude"))
+ {
+ par::proxy_exclude = true;
+ par::proxy_exclude_from_file = false;
+ par::proxy_exclude_list = a.value("--proxy-exclude");
+ }
+ else if (a.find("--proxy-exclude-list"))
+ {
+ par::proxy_exclude = true;
+ par::proxy_exclude_from_file = true;
+ par::proxy_exclude_list = a.value("--proxy-exclude-list");
+ }
+
+ if (a.find("--proxy-include-reference"))
+ par::proxy_include_reference = true;
+
+ if (a.find("--proxy-impute"))
+ {
+ par::proxy_assoc = true;
+ par::proxy_impute = true;
+ par::proxy_assoc_snp = a.value("--proxy-impute");
+ if ( par::proxy_assoc_snp == "all" || par::proxy_assoc_snp == "ALL" )
+ par::proxy_all = true;
+
+ if ( a.find("--proxy-dosage") )
+ par::proxy_record_dosage = true;
+
+ if ( a.find("--proxy-replace") )
+ par::proxy_impute_replace = true;
+
+// if ( a.find("--proxy-preserve") )
+// par::proxy_impute_preserve_genotyped = true;
+
+ if ( a.find("--proxy-genotypic-concordance") )
+ par::proxy_impute_genotypic_concordance = true;
+
+ }
+
+ if (a.find("--proxy-error")) par::proxy_error = true;
+
+
+ if (a.find("--proxy-impute-threshold"))
+ par::proxy_impute_threshold = a.value_double("--proxy-impute-threshold");
+
+ if (a.find("--proxy-tdt"))
+ {
+ par::proxy_assoc = true;
+ par::proxy_TDT = true;
+ par::proxy_assoc_snp = a.value("--proxy-tdt");
+ if ( par::proxy_assoc_snp == "all" || par::proxy_assoc_snp == "ALL" )
+ par::proxy_all = true;
+ }
+
+ if ( par::proxy_assoc )
+ {
+ // Reset default missing rate per individual
+ // per haplotype (more relevant if the window
+ // size is small (i.e. to goal is not to discard
+ // too many / any individuals, but rely on E-M
+ // phasing to reconstruct missing genotypes. Normally
+ // the default value here is 0.5; it can be modified as
+ // --hap-miss option is below this one
+
+ par::hap_missing_geno = 0.9;
+
+ }
+
+ if (a.find("--proxy-verbose"))
+ par::proxy_full_report = true;
+
+ if (a.find("--proxy-window"))
+ par::proxy_window = a.value_int("--proxy-window");
+
+ if (a.find("--proxy-flanking"))
+ {
+ if ( par::proxy_all )
+ error("Cannt specify --proxy-flanking with >1 reference SNP");
+
+ par::proxy_list = true;
+ par::proxy_list_file = a.value("--proxy-flanking");
+ }
+
+ if (a.find("--proxy-list"))
+ {
+ if ( a.find("--proxy-flanking"))
+ error("Cannt specify --proxy-flanking with >1 reference SNP");
+
+ par::proxy_all_list = true;
+ par::proxy_all_list_file = a.value("--proxy-list");
+ }
+
+ if (a.find("--proxy-kb"))
+ par::proxy_kb = a.value_double("--proxy-kb");
+
+ if (a.find("--proxy-sub-r2"))
+ par::proxy_r2 = a.value_double("--proxy-sub-r2");
+
+ if (a.find("--proxy-maf"))
+ par::proxy_maf = a.value_double("--proxy-maf");
+
+ if (a.find("--proxy-geno"))
+ par::proxy_geno = a.value_double("--proxy-geno");
+
+ if (a.find("--proxy-mhf"))
+ par::proxy_mhf = a.value_double("--proxy-mhf");
+
+ if (a.find("--proxy-sub-maxsnp"))
+ par::proxy_maxhap = a.value_int("--proxy-sub-maxsnp");
+
+ if (a.find("--proxy-no-r2-filter"))
+ par::proxy_r2_filter = false;
+
+ if (a.find("--proxy-r2"))
+ {
+ par::proxy_r2_filter = true;
+ vector<string> s = a.value("--proxy-r2",3);
+ par::proxy_r2_filter_A = getDouble(s[0], "--proxy-r2");
+ par::proxy_r2_filter_B = getDouble(s[1], "--proxy-r2");
+ par::proxy_r2_filter_C = getDouble(s[2], "--proxy-r2");
+ }
+
+ if (a.find("--proxy-maxsnp"))
+ par::proxy_snp_filter = a.value_int("--proxy-maxsnp");
+
+ if (a.find("--proxy-show-proxies"))
+ par::proxy_list_proxies = true;
+
+ if (a.find("--proxy-r2-reference-only"))
+ par::proxy_reference_only = true;
+
+
+ ////////////////////////////////////////
+ // Segmental options
+
+ // --segment-match-snp
+ // --homozyg
+ // --homozyg-window-kb
+ // --homozyg-window-snp
+ // --homozyg-window-het
+ // --homozyg-window-missing
+ // --homozyg-snp
+ // --homozyg-kb
+ // --homozyg-density
+ // --homozyg-gap
+ // --homozyg-group
+ // --homozyg-match
+ // --homozyg-het
+ // --homozyg-verbose
+
+ // --segment
+ // --segment-gap
+ // --segment-length
+ // --segment-thresholds
+ // --segment-minimal
+
+ // --segment-group
+ // --segment-spanning
+ // --segment-from
+ // --segment-to
+ // --segment-force
+ // --segment-match
+ // --segment-verbose
+
+ // --pool-size
+
+ // Read segments back in
+ // --read-segment
+ // --read-segment-minimal
+
+
+ //////////////////////////
+ // CNV/other segment types
+
+ if (a.find("--cnv-list"))
+ {
+ par::cnv_list = true;
+ par::cnv_listname = a.value("--cnv-list");
+
+ // Remove any missing individuals
+ if (!a.find("--cnv-missing-phenotypes"))
+ par::ignore_phenotypes = false;
+
+ if (a.find("--cnv-make-map"))
+ par::cnv_makemap = true;
+
+ if (a.find("--cnv-write"))
+ {
+ par::cnv_writelist = true;
+ if (a.find("--with-phenotype"))
+ par::dump_covar_with_phenotype = true;
+ }
+
+ if (a.find("--cnv-intersect"))
+ {
+ if ( a.find("--cnv-exclude") ||
+ a.find("--cnv-count"))
+ error("Cannot specify --cnv-count/intersect/exclude together");
+
+ par::cnv_intersect = true;
+ par::cnv_intersect_file = a.value("--cnv-intersect");
+ }
+
+ if (a.find("--cnv-exclude"))
+ {
+ if ( a.find("--cnv-count") ||
+ a.find("--cnv-intersect"))
+ error("Cannot specify --cnv-count/intersect/exclude together");
+
+ par::cnv_intersect = par::cnv_exclude = true;
+ par::cnv_intersect_file = a.value("--cnv-exclude");
+ }
+
+ if (a.find("--cnv-count"))
+ {
+ if ( a.find("--cnv-exclude") ||
+ a.find("--cnv-intersect"))
+ error("Cannot specify --cnv-count/intersect/exclude together");
+ par::cnv_intersect = par::cnv_count = true;
+ par::cnv_intersect_file = a.value("--cnv-count");
+ }
+
+ if (a.find("--cnv-freq-exclude-above"))
+ {
+ par::cnv_freq_include = true;
+ par::cnv_freq_include_below = true;
+ par::cnv_freq_include_cnt = a.value_int("--cnv-freq-exclude-above");
+ }
+
+ if (a.find("--cnv-freq-exclude-below"))
+ {
+ par::cnv_freq_include = true;
+ par::cnv_freq_include_below = false;
+ par::cnv_freq_include_cnt = a.value_int("--cnv-freq-exclude-below");
+ }
+
+ if (a.find("--cnv-unique"))
+ {
+ par::cnv_unique = true;
+ }
+
+ if (a.find("--cnv-report-regions") )
+ {
+ if ( ! ( a.find("--cnv-intersect") || a.find("--cnv-exclude")) )
+ error("Must specify either --cnv-intersect or --cnv-exclude with --cnv-report-regions");
+ par::cnv_intersect_writeback = true;
+ }
+
+ if (a.find("--cnv-overlap"))
+ {
+ par::cnv_defined_overlap = true;
+ par::cnv_overlap = a.value_double("--cnv-overlap");
+ }
+
+ if (a.find("--cnv-kb"))
+ par::cnv_min_kb = a.value_int("--cnv-kb");
+ if (a.find("--cnv-score"))
+ par::cnv_min_score = a.value_double("--cnv-score");
+ if (a.find("--cnv-sites"))
+ par::cnv_min_sites = a.value_int("--cnv-sites");
+
+ if (a.find("--cnv-max-kb"))
+ par::cnv_max_kb = a.value_int("--cnv-max-kb");
+ if (a.find("--cnv-max-score"))
+ par::cnv_max_score = a.value_double("--cnv-max-score");
+ if (a.find("--cnv-max-sites"))
+ par::cnv_max_sites = a.value_int("--cnv-max-sites");
+
+ if (a.find("--cnv-del"))
+ par::cnv_del_only = true;
+ if (a.find("--cnv-dup"))
+ par::cnv_dup_only = true;
+
+ if (a.find("--cnv-test-window"))
+ {
+ par::seg_test_window = true;
+ par::seg_test_window_bp = a.value_double("--cnv-test-window") * 1000;
+ }
+
+ if (a.find("--cnv-test-2sided"))
+ par::segment_test_1sided = false;
+
+ if (a.find("--cnv-indiv-perm"))
+ par::cnv_indiv_perm = true;
+
+ if (a.find("--cnv-drop-no-segment"))
+ par::cnv_drop_no_segment = true;
+
+ if (a.find("--merge") || a.find("--bmerge"))
+ error("Cannot specify --cnv-list and merge SNP data together");
+
+ if (a.find("--exclude") || a.find("--extract"))
+ error("Cannot specify --cnv-list and exclude/extract markers SNP data together");
+
+ if (a.find("--maf") || a.find("--geno") || a.find("--mind"))
+ error("Cannot specify --cnv-list and filter on SNP data together");
+
+ }
+
+ ///////////////////////
+ // Runs of homozygosity
+
+ if (a.find("--homozyg"))
+ {
+ par::homo_run = true;
+ }
+
+ if (a.find("--read-homozyg"))
+ {
+ par::homo_run = true;
+ par::read_segment_filename = a.value("--read-homozyg");
+ par::read_segment_file = true;
+ }
+
+ if (a.find("--homozyg-snp"))
+ {
+ par::homo_run = true;
+ par::homo_run_snps = true;
+ par::homo_run_length_snps = a.value_int("--homozyg-snp");
+ }
+
+ if (a.find("--homozyg-kb"))
+ {
+ par::homo_run = true;
+ par::homo_run_kb = true;
+ par::homo_run_length_kb = a.value_int("--homozyg-kb");
+ }
+
+ if (a.find("--homozyg-density"))
+ {
+ par::homo_run = true;
+ par::homo_run_density = a.value_double("--homozyg-density");
+ }
+
+ if (a.find("--homozyg-gap"))
+ {
+ par::homo_run = true;
+ par::homo_run_gap = a.value_int("--homozyg-gap");
+ }
+
+ if (a.find("--homozyg-window-snp"))
+ {
+ par::homo_run = true;
+ par::homo_windowSize = a.value_int("--homozyg-window-snp");
+ }
+
+ if (a.find("--homozyg-window-kb"))
+ {
+ par::homo_run = true;
+ par::homo_windowKB = a.value_int("--homozyg-window-kb");
+ }
+
+ if (a.find("--homozyg-window-het"))
+ {
+ par::homo_run = true;
+ par::homo_windowAllowedHet = a.value_int("--homozyg-window-het");
+ }
+
+ if (a.find("--homozyg-window-missing"))
+ {
+ par::homo_run = true;
+ par::homo_windowAllowedMissing = a.value_int("--homozyg-window-missing");
+ }
+
+ if (a.find("--homozyg-window-threshold"))
+ {
+ par::homo_run = true;
+ par::homo_threshold = a.value_double("--homozyg-window-threshold");
+ }
+
+ if (a.find("--homozyg-group"))
+ {
+ par::homo_summary_allelic_match = true;
+ par::fuzzy_homo = 0.95;
+ }
+
+ if (a.find("--homozyg-match"))
+ {
+ par::homo_summary_allelic_match = true;
+ par::fuzzy_homo = a.value_double("--homozyg-match");
+ }
+
+ if (a.find("--homozyg-het"))
+ {
+ if (! ( a.find("--homozyg-snp") ||
+ a.find("--homozyg-kb") ) )
+ error("Must specify --homozyg-snp or --homozyg-kb with --homozyg-het");
+ par::homo_run_het = a.value_int("--homozyg-het");
+ }
+
+ if (a.find("--homozyg-verbose"))
+ par::homozyg_verbose = true;
+
+ if (a.find("--consensus-match"))
+ {
+ par::homo_run_consensus_match = true;
+ }
+
+ if (a.find("--pool-size"))
+ {
+ par::pool_size_min = a.value_int("--pool-size");
+ }
+
+ if (a.find("--ibs")) par::ibs_run = true;
+
+ if (a.find("--ibs2")) par::ibs_run = par::ibs_2only = true;
+
+ if (a.find("--ibs-density"))
+ {
+ par::ibs_run = true;
+ par::ibs_run_density = a.value_double("--ibs-density");
+ }
+
+ if (a.find("--ibs-kb"))
+ {
+ par::ibs_run = true;
+ par::ibs_run_length_kb = a.value_int("--ibs-kb");
+ }
+
+ if (a.find("--ibs-snp"))
+ {
+ par::ibs_run = true;
+ par::ibs_run_length_snps = a.value_int("--ibs-snp");
+ }
+
+// static int ibs_inner_run_length_kb;
+
+// if (a.find("--ibs-join-snp"))
+// {
+// par::ibs_run = true;
+// par::ibs_join_snp = a.value_int("--ibs-join-snp");
+// }
+
+// if (a.find("--ibs-join-snp"))
+// {
+// par::ibs_run = true;
+// par::ibs_join_snp = a.value_int("--ibs-join-snp");
+// }
+
+// if (a.find("--ibs-join-kb"))
+// {
+// par::ibs_run = true;
+// par::ibs_join_kb = a.value_int("--ibs-join-kb") * 1000;
+// }
+
+
+ if (a.find("--ibs-gap"))
+ {
+ par::ibs_run = true;
+ par::ibs_inter_snp_distance = a.value_int("--ibs-gap") * 1000;
+ }
+
+ if (a.find("--ibs-miss"))
+ {
+ par::ibs_run = true;
+ par::ibs_run_missing = a.value_int("--ibs-miss");
+ }
+
+ if (a.find("--ibs-err"))
+ {
+ par::ibs_run = true;
+ par::ibs_run_0 = a.value_int("--ibs-err");
+ }
+
+ if (a.find("--miss-run-snps"))
+ {
+ par::miss_run = true;
+ par::miss_run_length = a.value_int("--miss-run-snps");
+ }
+
+ if (a.find("--miss-run"))
+ {
+ cerr << "\n*** WARNING -- use --miss-run-snps N option instead\n\n";
+ par::miss_run = true;
+ par::miss_run_length_kb = true;
+ par::miss_run_length = a.value_int("--miss-run");
+ }
+
+ if (a.find("--miss-run-level"))
+ {
+ par::miss_run = true;
+ par::miss_run_level = a.value_double("--miss-run-level");
+ }
+
+
+ ///////////////////////////
+ // Segmental sharing tests
+
+// if (a.find("--plink"))
+// {
+// par::plink = true;
+// par::nudge = true;
+// }
+
+ if (a.find("--segment"))
+ {
+ par::plink = true;
+ par::nudge = true;
+ par::segment_output = true;
+ }
+
+ if (a.find("--segment-ibs"))
+ {
+ par::segment_validate = true;
+ }
+
+ if (a.find("--segment-minimal"))
+ {
+ par::plink = true;
+ par::nudge = true;
+ par::segment_output = true;
+ par::segment_minimal = true;
+ }
+
+ if (a.find("--segment-test-individual"))
+ {
+ // Instead of pairwise Case-Case versus non-Case-Case,
+ // just count the number of segs that any one individual
+ // has, and compare count in cases to count in controls
+ par::segment_test_individual = true;
+ if (a.find("--specific"))
+ par::segment_test_specific_segs = true;
+ }
+
+ if (a.find("--segment-test-ignore-discordant"))
+ {
+ // Pairwise Case-Case versus Control-Control
+ par::segment_test_ignore_discordant = true;
+ }
+
+ if (a.find("--segment-test-fisher"))
+ {
+ par::segment_test_fisher = true;
+ }
+
+ if (a.find("--segment-test-2sided"))
+ par::segment_test_1sided = false;
+
+ if (a.find("--segment-group"))
+ {
+ // Use HBD segment match routine, but without
+ // allelic identity option
+ par::segment_overlap = true;
+ par::homo_summary_allelic_match = false;
+ par::fuzzy_homo = 0.95;
+ }
+
+ if (a.find("--segment-spanning"))
+ {
+
+// if (!a.find("--segment-group"))
+// error("Must specify --segment-group for --segment-spanning\n");
+
+ par::segment_overlap = true;
+ par::homo_summary_allelic_match = false;
+ par::fuzzy_homo = 0.95;
+ par::segment_m1 = par::segment_m2 = a.value("--segment-spanning");
+ }
+
+ if (a.find("--segment-from"))
+ {
+ if (!a.find("--segment-group"))
+ error("Must specify --segment-group for --segment-from\n");
+ par::segment_m1 = a.value("--segment-from");
+ }
+
+ if (a.find("--segment-to"))
+ {
+ if (!a.find("--segment-group"))
+ error("Must specify --segment-group for --segment-to\n");
+ par::segment_m2 = a.value("--segment-to");
+ }
+
+ if (a.find("--segment-force"))
+ {
+ if (! ( a.find("--segment-from") && a.find("--segment-to") ) )
+ error("Can only use --segment-force with --segment-from/to \n");
+ par::force_span = true;
+ }
+
+ if (a.find("--segment-match"))
+ {
+ // Use HBD segment match routine, but without
+ // allelic identity option
+ par::segment_overlap = true;
+ par::homo_summary_allelic_match = true;
+ par::fuzzy_homo = a.value_double("--segment-match");
+ }
+
+ if (a.find("--segment-verbose"))
+ par::segment_verbose = true;
+
+ if (a.find("--read-segment"))
+ {
+ if (par::segment_output)
+ error("Cannot specify both --segment and --read-segment\n");
+ par::read_segment_filename = a.value("--read-segment");
+ par::read_segment_file = true;
+ par::plink = true;
+ par::segment_output = true;
+ }
+
+ if (a.find("--read-segment-minimal"))
+ {
+ if (par::segment_output)
+ error("Cannot specify both --segment and --read-segment\n");
+ par::read_segment_filename = a.value("--read-segment-minimal");
+ par::read_segment_file = true;
+ par::plink = true;
+ par::segment_output = true;
+ par::segment_minimal = true;
+ }
+
+ if (a.find("--segment-gap"))
+ {
+ par::segment_output = true;
+ par::segment_inter_snp_distance = a.value_int("--segment-gap");
+ }
+
+ if (a.find("--segment-length"))
+ {
+ // Min length in kb, convert to bp
+ par::segment_length = a.value_int("--segment-length") * 1000;
+ }
+
+ if (a.find("--segment-snp"))
+ {
+ // Min length in SNPs
+ par::segment_snp = a.value_int("--segment-snp");
+ }
+
+ if (a.find("--segment-thresholds"))
+ {
+ par::segment_output = true;
+ vector<string> s = a.value("--segment-thresholds",2);
+ par::segment_threshold_start = getDouble(s[0], "--segment-thresholds");
+ par::segment_threshold_finish = getDouble(s[1], "--segment-thresholds");
+ }
+
+
+ //////////////////////////
+ // Misc, external functions
+
+ if (a.find("--rare-test"))
+ {
+ vector<string> s = a.value("--rare-test",2);
+ par::rarer_maf_threshold = getDouble(s[0], "--rare-test");
+ par::rarer_dist_threshold = getDouble(s[1], "--rare-test") * 1000;
+ par::rare_test = true;
+ }
+
+
+ //////////////////////////
+ // Association testing
+
+ if (a.find("--assoc")) par::assoc_test = true;
+
+ if (a.find("--qt-means"))
+ {
+ if ( ! a.find("--assoc"))
+ error("Can only specify --qt-means with --assoc for a QTL test");
+ par::qt_means = true;
+ }
+
+ if (a.find("--logistic")) par::assoc_test = par::assoc_glm = true;
+
+ if (a.find("--linear")) par::assoc_test = par::assoc_glm = true;
+
+ if (a.find("--no-snp")) par::assoc_glm_without_main_snp = true;
+
+ if (a.find("--vif")) par::vif_threshold = a.value_double("--vif");
+
+ if (a.find("--genotypic"))
+ {
+ if ( a.find("--hethom"))
+ par::twoDFmodel_hethom = true;
+
+ par::twoDFmodel = true;
+ }
+
+ if (a.find("--interaction")) par::simple_interaction = true;
+
+ if (a.find("--parameters"))
+ {
+ string ilist = a.value("--parameters");
+ par::parameter_list = parse2int(ilist);
+ par::glm_user_parameters = true;
+ }
+
+ if (a.find("--tests"))
+ {
+ string ilist = a.value("--tests");
+ par::test_list = parse2int(ilist);
+ par::glm_user_test = true;
+ }
+
+ if (a.find("--condition"))
+ {
+ if (! ( a.find("--linear") ||
+ a.find("--logistic") ||
+ a.find("--proxy-glm") ||
+ a.find("--chap")) )
+ error("Can only use --condition with --linear, --logistic, --proxy-glm or --chap");
+
+ par::conditioning_snp_single = true;
+ par::conditioning_snps = true;
+ par::conditioning_snp_name = a.value("--condition");
+ }
+
+ if (a.find("--condition-list"))
+ {
+ if ( ! ( a.find("--linear") || a.find("--logistic") || a.find("--chap")) )
+ error("Can only use --condition-list with --linear, --logistic or --chap");
+
+ par::conditioning_snps = true;
+ par::conditioning_snps_file = a.value("--condition-list");
+ }
+
+ if (a.find("--test-all")) par::test_full_model = true;
+
+ if (a.find("--sex"))
+ par::glm_sex_effect = true;
+
+ if (a.find("--no-x-sex"))
+ par::glm_no_auto_sex_effect = true;
+
+ if (a.find("--xchr-model"))
+ {
+ par::xchr_model = a.value_int("--xchr-model");
+ if (par::xchr_model < 0 || par::xchr_model >4)
+ error("--xchr-model must have a value between 1 and 4");
+ }
+
+ if (a.find("--dominant"))
+ {
+ if ( ! ( a.find("--linear") || a.find("--logistic")) )
+ error("Can only use --condition-list with --linear or --logistic");
+ if (a.find("--genotypic"))
+ error("Cannot specify --dominant and --genotypic together");
+ par::glm_dominant = true;
+
+ // Only consider autosomes
+ par::xchr_model = 0;
+ }
+
+ if (a.find("--recessive"))
+ {
+ if ( ! ( a.find("--linear") || a.find("--logistic")) )
+ error("Can only use --condition-list with --linear or --logistic");
+ if (a.find("--genotypic"))
+ error("Cannot specify --recessive and --genotypic together");
+ if (a.find("--dominant"))
+ error("Cannot specify --recessive and --dominant together");
+ par::glm_recessive = true;
+
+ // Only consider autosomes
+ par::xchr_model = 0;
+ }
+
+ if (a.find("--qfam"))
+ {
+
+ if (a.find("--within"))
+ error("Cannot specify --within and --qfam together");
+ if (a.find("--family"))
+ error("Cannot specify --family and --qfam together");
+ if (a.find("--genedrop"))
+ error("Cannot specify --genedrop and --qfam together");
+
+ par::QTDT_test = true;
+ par::assoc_test = true;
+ par::QFAM_within1 = true;
+ }
+
+ if (a.find("--qfam-total"))
+ {
+
+ if (a.find("--within"))
+ error("Cannot specify --within and --qfam-total together");
+ if (a.find("--family"))
+ error("Cannot specify --family and --qfam-total together");
+ if (a.find("--genedrop"))
+ error("Cannot specify --genedrop and --qfam-total together");
+
+ par::QTDT_test = true;
+ par::assoc_test = true;
+ par::QFAM_total = true;
+ }
+
+ if (a.find("--qfam-between"))
+ {
+
+ if (a.find("--within"))
+ error("Cannot specify --within and --qfam-between together");
+ if (a.find("--family"))
+ error("Cannot specify --family and --qfam-between together");
+ if (a.find("--genedrop"))
+ error("Cannot specify --genedrop and --qfam-between together");
+
+ par::QTDT_test = true;
+ par::assoc_test = true;
+ par::QFAM_between = true;
+ }
+
+ if (a.find("--qfam-parents"))
+ {
+
+ if (a.find("--within"))
+ error("Cannot specify --within and --qfam-parents together");
+ if (a.find("--family"))
+ error("Cannot specify --family and --qfam-parents together");
+ if (a.find("--genedrop"))
+ error("Cannot specify --genedrop and --qfam-parents together");
+
+ par::QTDT_test = true;
+ par::assoc_test = true;
+ par::QFAM_within2 = true;
+ }
+
+
+ if (a.find("--mh") || a.find("--mh1")) par::CMH_test_1 = par::assoc_test = true;
+ if (a.find("--mh-ord")) par::CMH_test_ORD = par::assoc_test = true;
+ if (a.find("--mh2")) par::CMH_test_2 = par::assoc_test = true;
+
+ if (a.find("--bd"))
+ {
+ par::CMH_test_1 = par::assoc_test = true;
+ par::breslowday = true;
+ }
+
+ if (a.find("--homog")) par::assoc_test = par::OR_homog_test = true;
+
+ if (a.find("--model") ||
+ a.find("--model-dom") ||
+ a.find("--model-rec") ||
+ a.find("--model-trend") ||
+ a.find("--model-gen") )
+ {
+ par::assoc_test = true;
+ par::full_model_assoc = true;
+
+ if (a.find("--model-gen")) par::model_perm_gen = true;
+ else if (a.find("--model-dom")) par::model_perm_dom = true;
+ else if (a.find("--model-rec")) par::model_perm_rec = true;
+ else if (a.find("--model-trend")) par::model_perm_trend = true;
+ else par::model_perm_best = true;
+
+ if ( (par::model_perm_gen || par::model_perm_best )
+ && a.find("--adjust") )
+ error("Cannot specific --model-gen or --model-best with --adjust\n"
+ "Add --model-dom, --model-rec or --model-trend instead");
+
+ }
+
+ if (a.find("--fisher"))
+ {
+ par::assoc_test = true;
+ par::fisher_test = true;
+ par::min_geno_cell = 0;
+ }
+
+ if (a.find("--cell"))
+ par::min_geno_cell = a.value_int("--cell");
+
+ if (a.find("--gxe"))
+ {
+ if (!a.find("--covar"))
+ error ("--covar {filename} must be specified with --gxe");
+ par::assoc_gxe = true;
+ }
+
+ if (a.find("--tdt"))
+ {
+ par::TDT_test = true;
+ par::perm_TDT_basic = true;
+ par::perm_TDT_parent = false;
+ }
+
+ if (a.find("--mating"))
+ {
+ par::TDT_test = true;
+ par::mating_tests = true;
+ }
+
+ if (a.find("--dfam"))
+ {
+ if ( a.find("--family") )
+ error("Cannot specify --family with --dfam\n");
+
+ par::TDT_test = true;
+ par::sibTDT_test = true;
+ }
+
+ if (a.find("--dfam-no-tdt"))
+ par::dfam_tdt = false;
+
+ if (a.find("--dfam-no-sibs"))
+ par::dfam_sibs = false;
+
+ if (a.find("--dfam-no-unrelateds"))
+ par::dfam_unrelateds = false;
+
+ if (a.find("--parentdt1"))
+ {
+ par::TDT_test = true;
+ par::perm_TDT_basic = false;
+ par::perm_TDT_parent = true;
+ }
+
+ if (a.find("--parentdt2"))
+ {
+ par::TDT_test = true;
+ par::perm_TDT_basic = false;
+ par::perm_TDT_parent = false;
+ }
+
+ if (a.find("--poo"))
+ {
+ if (!a.find("--tdt"))
+ error("Parent-of-origin analysis requires --tdt option");
+
+ if (a.find("--hap-tdt"))
+ error("Parent-of-origin analysis not yet implemented for haplotypic TDT");
+
+ par::TDT_test = true;
+ par::parent_of_origin = true;
+
+ // flavour of permutation?
+ if (a.find("--pat"))
+ {
+ par::perm_POO_poo = false;
+ par::perm_POO_pat = true;
+ }
+ else if (a.find("--mat"))
+ {
+ par::perm_POO_poo = false;
+ par::perm_POO_mat = true;
+ }
+ else if (a.find("--best"))
+ {
+ par::perm_POO_poo = false;
+ par::perm_POO_best = true;
+ }
+ }
+
+
+
+
+ if (a.find("--sharing")) par::ibs_sharing_test = true;
+
+ if (a.find("--boot")) par::boot = true;
+
+ if (a.find("--r"))
+ {
+ par::disp_r1 = true;
+ if ( ! a.find("--matrix") )
+ par::disp_r_window = true;
+ }
+
+ if (a.find("--r2"))
+ {
+ par::disp_r2 = true;
+ if ( ! a.find("--matrix") )
+ par::disp_r_window = true;
+ }
+
+ if (a.find("--ld"))
+ {
+ par::calc_SNPSNP_LD = true;
+ vector<string> s = a.value("--ld",2);
+ par::ld_SNP1 = s[0];
+ par::ld_SNP2 = s[1];
+ }
+
+ if (a.find("--ld-window"))
+ {
+ if (a.find("--matrix"))
+ error("Cannot specify --matrix and --ld-window together\n");
+ par::disp_r_window = true;
+ par::disp_r_window_snp = a.value_int("--ld-window");
+ }
+
+ if (a.find("--ld-window-kb"))
+ {
+ if (a.find("--matrix"))
+ error("Cannot specify --matrix and --ld-window together\n");
+ par::disp_r_window = true;
+ // Store in base-pair units
+ par::disp_r_window_kb = a.value_int("--ld-window-kb") * 1000;
+ }
+
+ if (a.find("--ld-window-r2"))
+ {
+ if (a.find("--matrix"))
+ error("Cannot specify --matrix and --ld-window together\n");
+ par::disp_r_window = true;
+ par::disp_r_window_r2 = a.value_double("--ld-window-r2");
+ }
+
+ if (a.find("--indep"))
+ {
+ if (makedata)
+ {
+ string msg = "Cannot specify --indep with --make-bed or --recode\n";
+ msg += " use --extract/--exclude with *.prune.in/*.prune.out files";
+ error(msg);
+ }
+
+ par::prune_ld = true;
+ vector<string> s = a.value("--indep",3);
+ par::prune_ld_win = getInt(s[0].c_str(),"--indep");
+ par::prune_ld_step = getInt(s[1].c_str(),"--indep");
+ par::prune_ld_vif = getDouble(s[2].c_str(),"--indep");
+
+ if (par::prune_ld_win<2)
+ error("Cannot have a window size < 2 for --indep {window} {step} {VIF}");
+
+ if (par::prune_ld_step<1)
+ error("Cannot have a window step < 1 for --indep {window} {step} {VIF}");
+
+ if (par::prune_ld_vif<1)
+ error("Cannot have a VIF threshold < 1 for --indep {window} {step} {VIF}");
+
+ }
+
+ if (a.find("--indep-pairwise"))
+ {
+ if (makedata)
+ {
+ string msg = "Cannot specify --indep with --make-bed or --recode\n";
+ msg += " use --extract/--exclude with *.prune.in/*.prune.out files";
+ error(msg);
+ }
+
+ par::prune_ld = true;
+ par::prune_ld_pairwise = true;
+ vector<string> s = a.value("--indep-pairwise",3);
+ par::prune_ld_win = getInt(s[0].c_str(),"--indep-pairwise");
+ par::prune_ld_step = getInt(s[1].c_str(),"--indep-pairwise");
+ par::prune_ld_r2 = getDouble(s[2].c_str(),"--indep-pairwise");
+
+ if (par::prune_ld_win<2)
+ error("Cannot have a window size < 2 for --indep-pairwise {window} {step} {R^2}");
+ if (par::prune_ld_step<1)
+ error("Cannot have a window step < 1 for --indep-pairwise {window} {step} {R^2}");
+ if (par::prune_ld_r2<0)
+ error("Cannot have an R^2 threshold < 0 for --indep-pairwise {window} {step} {R^2}");
+ if (par::prune_ld_r2>1)
+ error("Cannot have an R^2 threshold > 1 for --indep-pairwise {window} {step} {R^2}");
+
+ // Compare to correlation
+ par::prune_ld_r2 = sqrt(par::prune_ld_r2);
+
+ }
+
+ if (a.find("--T2") || a.find("--t2") )
+ {
+ if ( a.find("--set-test") || a.find("--assoc") )
+ error("Cannot specify --T2 and other association commands");
+ par::set_test = true;
+ par::hotel = true;
+ }
+
+ if (a.find("--set")) {
+ par::setfile = a.value("--set");
+
+ }
+
+ if (a.find("--set-test"))
+ {
+ par::set_test = true;
+ if ( (!a.find("--gene")) && ( a.find("--assoc") || a.find("--tdt") ) &&
+ ( !a.find("--mperm") ) )
+ error("Must use --mperm N with set association tests");
+
+ }
+
+ if (a.find("--set-score"))
+ {
+ par::set_test = true;
+ par::set_score = true;
+ par::set_score_p = a.value_double("--set-score");
+ par::set_max = par::set_min = 1;
+ if ( a.find("--set-min") || a.find("--set-max") )
+ error("Cannot specify --set-min or --set-max with --set-score");
+ }
+
+ if (a.find("--set-p2"))
+ par::set_p2 = true;
+
+ if (a.find("--set-min"))
+ {
+ if (!a.find("--set"))
+ error ("You need to specify --set also");
+ par::set_min = a.value_int("--set-min");
+ }
+
+ if (a.find("--set-max"))
+ {
+ if (!a.find("--set"))
+ error ("You need to specify --set also");
+ par::set_max = a.value_int("--set-max");
+ }
+
+ //////////////////////////
+ // Impute tagged SNPs
+
+ if (a.find("--hap-impute"))
+ {
+ par::impute_tags = true;
+ par::phase_snps = true;
+ // par::hap_missing_geno = 1;
+ }
+
+ if (a.find("--hap-impute-verbose"))
+ {
+ par::impute_tags = true;
+ par::phase_snps = true;
+
+ // par::hap_missing_geno = 1;
+ par::impute_verbose = true;
+ }
+
+ if (a.find("--hap"))
+ {
+ par::phase_snps = true;
+ par::tagfile = a.value("--hap");
+ }
+
+ if (a.find("--hap-window"))
+ {
+ par::phase_snps = true;
+ par::sliding_window = true;
+ par::phase_hap_all = true;
+ if (a.find("--hap"))
+ error("Cannot specify both --hap-window and --hap {file}\n");
+ par::sliding_window_size = a.value_int("--hap-window");
+
+ if (a.find("--hap-window-step"))
+ par::sliding_window_step = a.value_int("--hap-window-step");
+ }
+
+
+ // Using all SNPs, perform a basic sliding window analysis and look
+ // for haplotypically shared regions, i.e. given that we now have
+ // much more highly heterozygous markers
+
+ if (a.find("--homozyg-haplo-track"))
+ {
+ if ( ! a.find("--hap-window"))
+ error("The 'haplo-track' option requires --hap-window {s} to be specified");
+
+ par::segment_haplotrack = true;
+ vector<string> s = a.value("--homozyg-haplo-track",2);
+ par::segment_haplotrack_fid1 = s[0];
+ par::segment_haplotrack_iid1 = s[1];
+ par::segment_haplotrack_fid2 = par::segment_haplotrack_fid1;
+ par::segment_haplotrack_iid2 = par::segment_haplotrack_iid1;
+ }
+
+ if (a.find("--segment-haplo-track"))
+ {
+ if ( ! a.find("--hap-window"))
+ error("The 'haplo-track' option requires --hap-window {s} to be specified");
+
+ par::segment_haplotrack = true;
+ vector<string> s = a.value("--segment-haplo-track",4);
+ par::segment_haplotrack_fid1 = s[0];
+ par::segment_haplotrack_iid1 = s[1];
+ par::segment_haplotrack_fid2 = s[2];
+ par::segment_haplotrack_iid2 = s[3];
+ }
+
+
+
+ /////////////////////////////////
+ // EM Phasing options
+
+ if (a.find("--em-verbose"))
+ par::haplo_plem_verbose = true;
+
+ if (a.find("--em-follow"))
+ {
+ par::haplo_plem_follow = true;
+ vector<string> s = a.value("--em-follow",2);
+ par::haplo_plem_follow_fid = s[0];
+ par::haplo_plem_follow_iid = s[1];
+ }
+
+ if (a.find("--em-window"))
+ par::haplo_plem_window = a.value_int("--em-window");
+
+ if (a.find("--em-overlap"))
+ par::haplo_plem_overlap = a.value_int("--em-overlap");
+
+ if (a.find("--em-window-iter"))
+ par::haplo_plem_iter = a.value_int("--em-window-iter");
+
+ if (a.find("--em-window-prune-phase"))
+ par::haplo_plem_window_prune_phase =
+ a.value_double("--em-window-prune-phase");
+
+ if (a.find("--em-window-likelihood"))
+ par::haplo_plem_likelihood_iter = a.value_int("--em-window-likilood");
+
+ if (a.find("--em-window-tol"))
+ par::haplo_plem_window_tol = a.value_double("--em-window-tol");
+
+ if (a.find("--em-window-prune-haplotype"))
+ {
+ par::haplo_plem_zero_threshold = a.value_double("--em-window-prune-haplotype");
+ if ( par::haplo_plem_zero_threshold == 0 )
+ par::haplo_plem_nonzero_threshold = false;
+ else
+ par::haplo_plem_nonzero_threshold = true;
+ }
+
+
+
+ //////////////////////////
+ // Meta EM parameters
+
+ if (a.find("--em-meta-window"))
+ {
+ par::haplo_plem_meta_window = a.value_int("--em-meta-window");
+ if ( par::haplo_plem_meta_window < 2 )
+ error("--em-meta-window must be >1");
+ }
+
+ if (a.find("--em-meta-prune-haplotype"))
+ par::haplo_plem_meta_prune_haplotype =
+ a.value_double("--em-meta-prune-haplotype");
+
+ if (a.find("--em-meta-prune-phase"))
+ par::haplo_plem_meta_prune_phase =
+ a.value_double("--em-meta-prune-phase");
+
+ if (a.find("--em-meta-iter"))
+ par::haplo_plem_meta_iter = a.value_int("--em-meta-iter");
+
+ if (a.find("--em-meta-likilood"))
+ par::haplo_plem_meta_likelihood_iter = a.value_int("--em-meta-likilood");
+
+ if (a.find("--em-meta-tol"))
+ par::haplo_plem_meta_tol = a.value_double("--em-meta-tol");
+
+
+ /////////////////////////////
+ // Other haplotype options
+
+ if (a.find("--hap-all"))
+ {
+ if (!a.find("--hap"))
+ error("--hap-all modifies --hap, but you have not specified --hap");
+ par::phase_hap_all = true;
+ }
+
+ if (a.find("--whap"))
+ {
+ par::phase_snps = true;
+ par::tagfile = a.value("--whap");
+ par::weighted_mm = true;
+ }
+
+ if (a.find("--hap-pp")) { par::hap_post_prob = a.value_double("--hap-pp"); }
+ if (a.find("--hap-miss")) { par::hap_missing_geno = a.value_double("--hap-miss"); }
+
+ if (a.find("--hap-freq"))
+ {
+ if ( par::impute_tags )
+ error("Cannot specify --hap-impute and --hap-freq\n");
+ par::display_hap_freqs = true;
+ }
+
+ if (a.find("--hap-assoc"))
+ {
+ if ( par::impute_tags )
+ error("Cannot specify --hap-impute and --hap-assoc\n");
+ par::test_hap_CC = true;
+ }
+
+ if (a.find("--chap"))
+ {
+ if ( ! a.find("--hap-snps"))
+ error("--chap requires --hap-snps");
+
+ par::chap_test = true;
+
+ if ( a.find("--null-group") )
+ {
+ par::chap_specified_groups = true;
+ par::chap_model0 = a.value("--null-group");
+ }
+
+ if ( a.find("--alt-group") )
+ {
+ par::chap_specified_groups = true;
+ par::chap_model1 = a.value("--alt-group");
+ }
+
+ if ( a.find("--null-snp") )
+ {
+ if ( par::chap_specified_groups )
+ error("Cannot specify SNPs and groups for --chap tests");
+ par::chap_specified_snps = true;
+ par::chap_model0 = a.value("--null-snp");
+ }
+
+ if ( a.find("--alt-snp") )
+ {
+ if ( par::chap_specified_groups )
+ error("Cannot specify SNPs and groups for --chap tests");
+ par::chap_specified_snps = true;
+ par::chap_model1 = a.value("--alt-snp");
+ }
+
+ if ( a.find("--control") )
+ {
+ if ( par::chap_specified_groups ||
+ par::chap_specified_snps )
+ error("Cannot use --control and other --chap options");
+ par::chap_sole_variant = true;
+ par::chap_entity = a.value("--control");
+ }
+ else if ( a.find("--independent-effect") )
+ {
+ if ( par::chap_specified_groups ||
+ par::chap_specified_snps )
+ error("Cannot use --independent-effect and other --chap options");
+ par::chap_independent_effect = true;
+ par::chap_entity = a.value("--independent-effect");
+ }
+ else if ( a.find("--specific-haplotype") )
+ {
+ if ( par::chap_specified_groups ||
+ par::chap_specified_snps )
+ error("Cannot use --specific-haplotype and other --chap options");
+ par::chap_haplotype_specific = true;
+ par::chap_entity = a.value("--specific-haplotype");
+ }
+
+ if ( a.find("--test-snp") )
+ {
+ if ( ! ( a.find("--condition") || a.find("--condition-list") ) )
+ error("You must first specify conditioning SNPs");
+ par::chap_drop_snps = true;
+ par::chap_drop_snps_list = a.value("--test-snp");
+ }
+
+ if ( a.find("--each-vs-others") )
+ par::chap_add_grp_specifics = true;
+
+ if ( a.find("--each-versus-others") )
+ par::chap_add_grp_specifics = true;
+
+ }
+
+
+ if (a.find("--hap-snps"))
+ {
+ if ( a.find("--hap") || a.find("--hap-window"))
+ error("Cannot specify --hap-snps with --hap or --hap-window");
+ par::phase_snps = true;
+ par::hap_specific_snps = true;
+ par::hap_specific_snps_list = a.value("--hap-snps");
+ par::phase_hap_all = true;
+ }
+
+ if (a.find("--hap-tdt"))
+ {
+ if ( par::impute_tags )
+ error("Cannot specify --hap-impute and --hap-tdt\n");
+ par::test_hap_TDT = true;
+ }
+
+ if (a.find("--hap-phase"))
+ {
+ if ( par::impute_tags )
+ error("Cannot specify --hap-impute and --hap-phase\n");
+ par::display_phase_probs = true;
+ }
+
+ if (a.find("--hap-phase-wide"))
+ {
+ if ( par::impute_tags )
+ error("Cannot specify --hap-impute and --hap-phase-wide\n");
+ par::display_phase_probs = par::display_phase_probs_wide = true;
+ }
+
+ if (a.find("--hap-only")) { par::test_hap_only = true; }
+
+ if (a.find("--hap-max-phase"))
+ {
+ par::hap_max_nf_phases = a.value_int("--hap-max-phase");
+ if ( par::hap_max_nf_phases < 1 ) error("Invalid --hap-max-phase value (should be >0)");
+ }
+
+ if (a.find("--hap-min-phase-prob"))
+ {
+ par::hap_min_phase_prob = a.value_double("--hap-min-phase-prob");
+ if ( par::hap_min_phase_prob < 0 || par::hap_min_phase_prob > 1 )
+ error("Invalid --hap-min-phase-prob value, should be between 0 and 1");
+ }
+
+
+
+
+ //////////////////////////
+ // Epistasis
+
+ if (a.find("--epistasis"))
+ {
+ if (a.find("--set"))
+ par::set_test = true;
+ par::epistasis = true;
+ }
+
+ // Use odds-ratio test as default fast-epistasis method
+ if (a.find("--fast-epistasis"))
+ {
+ par::fast_epistasis = par::epistasis = true;
+ if ( a.find("--set"))
+ par::set_test = true;
+ }
+
+ if (a.find("--case-only"))
+ {
+ par::epistasis = true;
+ par::fast_epistasis = true;
+ par::epi_caseonly = true;
+ if (a.find("--epistasis"))
+ error("--case-only requires --fast-epistasis");
+ }
+
+ if (a.find("--gap"))
+ {
+ if (!a.find("--case-only")) error("--gap option only valid when --caseonly is in effect");
+ par::epi_caseonly_kb_gap = a.value_double("--gap");
+ }
+ if (a.find("--nop")) par::epi_quickscan = true;
+ if (a.find("--set-by-all")) par::set_by_set = false;
+ if (a.find("--epi1")) par::epi_alpha1 = a.value_double("--epi1");
+ if (par::epi_alpha1 > 1) par::epi_filter = false;
+ if (a.find("--epi2")) par::epi_alpha2 = a.value_double("--epi2");
+ if (a.find("--twolocus"))
+ {
+ par::list_twolocus = true;
+ vector<string> s = a.value("--twolocus",2);
+ par::twolocus_snp1 = s[0];
+ par::twolocus_snp2 = s[1];
+ }
+
+ if (a.find("--genepi"))
+ {
+ par::set_test = true;
+ par::epi_genebased = true;
+ }
+
+
+
+ ///////////////////////////////////////
+ // Gene X environment / heterogeneity
+
+
+
+
+ //////////////////////////////////////
+ // File output options
+
+ if (a.find("--freq"))
+ {
+ par::af_write = true;
+ // and unless otherwise specified, set GENO = 1 and MAF = 0
+// if (!a.find("--maf")) par::min_af = 0.0;
+// if (!a.find("--geno")) par::MAX_GENO_MISSING = 1;
+// if (!a.find("--mind")) par::MAX_IND_MISSING = 1;
+
+ // display MAF counts instead of freqs?
+ if (a.find("--counts"))
+ {
+ if (a.find("--within"))
+ error("Cannot specify --counts and --within together\n");
+ par::af_count = true;
+ }
+ }
+
+
+
+ if (a.find("--nonfounders")) par::summ_nonfounders = true;
+
+ if (a.find("--make-founders")) par::make_founders = true;
+
+ if (a.find("--allow-no-sex")) par::ignore_missing_sex = true;
+
+ if (a.find("--read-freq"))
+ {
+ par::af_read = true;
+ par::af_file = a.value("--read-freq");
+ }
+
+ if (a.find("--read-genome"))
+ {
+ par::ibd_read = true;
+ par::ibd_file = a.value("--read-genome");
+ }
+
+ if (a.find("--list"))
+ {
+ par::list_by_allele = true;
+ // and unless otherwise specified, set GENO = 1 and MAF = 0
+ if (!a.find("--maf")) par::min_af = 0.0;
+ if (!a.find("--geno")) par::MAX_GENO_MISSING = 1;
+ if (!a.find("--mind")) par::MAX_IND_MISSING = 1;
+ }
+
+ if (a.find("--report"))
+ {
+ par::indiv_report = true;
+ vector<string> s = a.value("--report",2);
+ par::indiv_report_fid = s[0];
+ par::indiv_report_iid = s[1];
+ if (!a.find("--maf")) par::min_af = 0.0;
+ if (!a.find("--geno")) par::MAX_GENO_MISSING = 1;
+ if (!a.find("--mind")) par::MAX_IND_MISSING = 1;
+ }
+
+ if (a.find("--plist"))
+ {
+ par::plist = true;
+ // and unless otherwise specified, set GENO = 1 and MAF = 0
+ if (!a.find("--maf")) par::min_af = 0.0;
+ if (!a.find("--geno")) par::MAX_GENO_MISSING = 1;
+ if (!a.find("--mind")) par::MAX_IND_MISSING = 1;
+
+ vector<string> s = a.value("--plist",4);
+ par::plist_fid1 = s[0];
+ par::plist_iid1 = s[1];
+ par::plist_fid2 = s[2];
+ par::plist_iid2 = s[3];
+
+ }
+
+
+ if (a.find("--fix-allele"))
+ {
+ if (! (a.find("--recodeAD") || a.find("--recodeA")) )
+ error("--fix-allele option only works with --recodeA/--recodeAD options");
+ else par::recode_AD_fixed = true;
+ }
+
+ if (a.find("--merge"))
+ {
+ if (a.find("--merge-list") || a.find("--bmerge") )
+ error("Can only specify --merge or --bmerge or --merge-list");
+
+ par::merge_data = true;
+ vector<string> s = a.value("--merge",2);
+ par::merge_pedfile = s[0];
+ par::merge_mapfile = s[1];
+ }
+
+ if (a.find("--bmerge"))
+ {
+ if (a.find("--merge-list") || a.find("--merge") )
+ error("Can only specify --bmerge or --merge or --merge-list");
+
+ par::merge_data = true;
+ par::merge_binary = true;
+
+ vector<string> s = a.value("--bmerge",3);
+ par::merge_bedfile = s[0];
+ par::merge_bimfile = s[1];
+ par::merge_famfile = s[2];
+
+ }
+
+ if (a.find("--merge-list"))
+ {
+ if (a.find("--merge") || a.find("--bmerge") )
+ error("Can only specify --merge or --bmerge or --merge-list");
+
+ par::merge_data = true;
+ par::merge_list = true;
+ par::merge_list_filename = a.value("--merge-list");
+ }
+
+ if (a.find("--merge-mode"))
+ {
+ if (! (a.find("--merge") || a.find("--merge-list") || a.find("--bmerge") ) )
+ error("Can only specify --merge-mode when --merge or --bmerge or --merge-list is used");
+ par::merge_mode = a.value_int("--merge-mode");
+ if (par::merge_mode < 1 || par::merge_mode > 7)
+ error("--merge-mode N, where N must be between 1 and 7");
+
+ if (par::merge_list && par::merge_mode >= 6)
+ error("Can not specify --merge-mode 6/7 (diff) and --merge-list");
+ }
+
+
+ if (a.find("--flip"))
+ {
+ par::flip_strand = true;
+ par::flip_file = a.value("--flip");
+ }
+
+ // Remove these individuals from a file
+ if (a.find("--remove"))
+ {
+ par::remove_indiv = true;
+ par::remove_indiv_list = a.value("--remove");
+ }
+
+ // Keep only these individuals from a file
+ if (a.find("--keep"))
+ {
+ par::keep_indiv = true;
+ par::keep_indiv_list = a.value("--keep");
+ }
+
+ // By default, remove then keep
+ if (a.find("--keep-before-remove"))
+ par::remove_before_keep = false;
+
+ // Extract only these SNPs from a file
+ if (a.find("--extract"))
+ {
+ par::extract_set = true;
+ par::extract_file = a.value("--extract");
+ }
+
+ // Exclude these SNPs from a file
+ if (a.find("--exclude"))
+ {
+ par::exclude_set = true;
+ par::exclude_file = a.value("--exclude");
+ }
+
+ // Select a set of SNPs based on different physical
+ // positions: this modifies the behavior of --extract
+ // and --exclude
+
+ if (a.find("--range"))
+ {
+ if ( ! ( a.find("--extract") || a.find("--exclude") ) )
+ error("Must specify --extract or --exclude with --range");
+ par::snp_range_list = true;
+ }
+
+ // By default, extract before exclude
+ if (a.find("--exclude-before-extract"))
+ par::extract_before_exclude = false;
+
+
+ // Extract SNPs in a certain GENE (specified in the SET file)
+ if (a.find("--gene"))
+ {
+ if (!a.find("--set"))
+ error("You must also specify --set {file} with --gene {gene}\n");
+ par::extract_set = true;
+ par::dump_gene = true;
+ par::dump_genename = a.value("--gene");
+ }
+
+
+ if (a.find("--ind-major"))
+ {
+ if (!a.find("--make-bed"))
+ error("You can only specify --ind-major when --make-bed is in effect");
+ par::out_SNP_major = false;
+ }
+
+
+ if (a.find("--include")) par::inc_write = true;
+
+ if (a.find("--read-include"))
+ {
+ par::inc_read = true;
+ par::inc_file = a.value("--read-include");
+ }
+
+
+ ///////////////////////////
+ // Plink phenotype definition
+// // Squared differences (quantitative trait)
+// if (a.find("--SD")) { par::SD = true; par::CP = false; }
+// // Cross-product (quantitative trait)
+// if (a.find("--CP")) { par::CP = true; par::SD = false; }
+// // Fix the prevalence of a binary trait
+// if (a.find("--prev")) {
+// par::fix_prev=true;
+// par::fixed_prev = a.value_double("--prev");
+// }
+// // Remove unaffected pairs from analysis
+// // i.e. so only discordant versus affected concordant
+// // i.e. need at least 1 affected "1A"
+// if (a.find("--1aff")) { par::remove_unaffected_pairs = true; }
+
+
+
+ ///////////////////////////
+ // Some basic filters
+
+ if (a.find("--prune"))
+ par::ignore_phenotypes = false;
+ if (a.find("--filter-cases"))
+ par::filter_cases = true;
+ if (a.find("--filter-controls"))
+ par::filter_controls = true;
+ if (a.find("--filter-females"))
+ par::filter_females = true;
+ if (a.find("--filter-males"))
+ par::filter_males = true;
+ if (a.find("--filter-founders"))
+ par::filter_founders = true;
+ if (a.find("--filter-nonfounders"))
+ par::filter_nonfounders = true;
+
+ if (a.find("--filter-cases") && a.find("--filter-controls"))
+ error("Cannot filter on both cases and controls");
+ if (a.find("--filter-males") && a.find("--filter-females"))
+ error("Cannot filter on both males and females");
+ if (a.find("--filter-founders") && a.find("--filter-nonfounders"))
+ error("Cannot filter on both founders and nonfounders");
+
+
+ /////////////////////////////////
+ // Basic input file processing
+
+ if (a.find("--dummy"))
+ {
+ vector<string> s = a.value("--dummy",2);
+ par::dummy = true;
+ par::dummy_nind = getInt(s[0].c_str(),"--dummy");
+ par::dummy_nsnp = getInt(s[1].c_str(),"--dummy");
+ }
+
+ if (a.find("--simulate"))
+ {
+ par::simul = true;
+ par::simul_file = a.value("--simulate");
+ }
+
+ if (a.find("--simulate-ncases"))
+ par::simul_ncases = a.value_int("--simulate-ncases");
+
+ if (a.find("--simulate-ncontrols"))
+ par::simul_ncontrols = a.value_int("--simulate-ncontrols");
+
+ if (a.find("--simulate-prevalence"))
+ par::simul_prevalence = a.value_double("--simulate-prevalence");
+
+ if (a.find("--file"))
+ {
+ if (a.find("--map") || a.find("--ped") )
+ error("Use either --file {root} OR --ped {name} --map {name}");
+ par::read_ped = true;
+ par::fileroot = a.value("--file");
+ par::pedfile = par::fileroot + ".ped";
+ par::mapfile = par::fileroot + ".map";
+ }
+
+
+ if (a.find("--tfile"))
+ {
+ if (a.find("--tfam") || a.find("--tped") )
+ error("Use either --tfile {root} OR --tped {name} --tfam {name}");
+
+ par::tfile_input = true;
+ par::fileroot = a.value("--tfile");
+ par::tpedfile = par::fileroot + ".tped";
+ par::tfamfile = par::fileroot + ".tfam";
+ }
+
+ if (a.find("--tped"))
+ {
+ par::tpedfile = a.value("--tped");
+ if (par::tpedfile == "-")
+ par::ped_from_stdin = true;
+ par::tfile_input = true;
+ }
+
+ if (a.find("--tfam"))
+ {
+ par::tfamfile = a.value("--tfam");
+ par::tfile_input = true;
+ }
+
+ // Long file format
+
+ if (a.find("--lfile"))
+ {
+ if (a.find("--fam") || a.find("--lgen") || a.find("--map") )
+ error("Use either --lfile {root} OR --lgen {name} --map (name) --fam {name}");
+
+ par::lfile_input = true;
+ par::fileroot = a.value("--lfile");
+ par::lpedfile = par::fileroot + ".lgen";
+ par::lfamfile = par::fileroot + ".fam";
+ par::mapfile = par::fileroot + ".map";
+ }
+
+ if (a.find("--lgen"))
+ {
+ par::lpedfile = a.value("--lgen");
+ if (par::lpedfile == "-")
+ par::ped_from_stdin = true;
+ par::lfile_input = true;
+ }
+
+
+ /////////////////////
+ // Generic variant
+
+ if (a.find("--gfile"))
+ {
+ // Analyse generic variants
+ par::gvar = true;
+ // Primarily load these; this will be changed
+ // if another load is not specified
+ par::load_gvar = false;
+ par::fileroot = a.value("--gfile");
+ par::gmapfile = par::fileroot + ".map";
+ par::gfamfile = par::fileroot + ".fam";
+
+ if (a.find("--gvar-verbose"))
+ par::gvar_verbose_association = true;
+
+ if (a.find("--gvar-all"))
+ par::gvar_include_all_variants = true;
+ }
+
+ // Text-file modifiers
+
+ if (a.find("--map3"))
+ par::map3 = true;
+
+ if (a.find("--no-sex"))
+ par::ped_skip_sex = true;
+
+ if (a.find("--no-parents"))
+ par::ped_skip_parents = true;
+
+ if (a.find("--no-fid"))
+ par::ped_skip_fid = true;
+
+ if (a.find("--no-pheno"))
+ par::ped_skip_pheno = true;
+
+ if (a.find("--liability"))
+ par::liability = true;
+
+ if (a.find("--bfile"))
+ {
+ if (a.find("--bim") || a.find("--bed") || a.find("--fam") )
+ error("Use either --bfile {root} OR --bed {name} --bim {name} --fam {name}");
+
+ par::read_bitfile = true;
+ par::fileroot = a.value("--bfile");
+ par::bitfilename = par::pedfile = par::fileroot + ".bed";
+ par::bitfilename_map = par::mapfile = par::fileroot + ".bim";
+ par::bitfilename_fam = par::fileroot + ".fam";
+ }
+
+ if (a.find("--bfile-faster"))
+ par::fast_binary = true;
+
+ if (a.find("--ped"))
+ {
+ par::read_ped = true;
+ par::pedfile = a.value("--ped");
+ if (par::pedfile == "-")
+ par::ped_from_stdin = true;
+ }
+
+ if (a.find("--map")) par::mapfile = a.value("--map");
+
+
+ if (a.find("--bed"))
+ {
+ par::read_bitfile = true;
+ par::bitfilename = par::pedfile = a.value("--bed");
+ }
+
+ if (a.find("--fam"))
+ {
+ par::read_bitfile = true;
+ par::bitfilename_fam = a.value("--fam");
+ }
+
+ if (a.find("--bim"))
+ {
+ par::read_bitfile = true;
+ par::bitfilename_map = par::mapfile = a.value("--bim");
+ }
+
+
+ // Single phenotype in file specified
+ if (a.find("--pheno"))
+ {
+ par::pheno_file = true;
+ par::pheno_filename = a.value("--pheno");
+ }
+
+ // Binary 0/1 coding instead of 1/2
+ if (a.find("--1"))
+ par::coding01 = true;
+
+
+ // Multiple phenotypes in a file specified
+ if (a.find("--mpheno"))
+ {
+ if (!a.find("--pheno"))
+ error("You need to specify --pheno {file} with --mpheno {N}");
+ par::mult_pheno = a.value_int("--mpheno");
+ }
+
+ // Select phenotype ny name
+ if (a.find("--pheno-name"))
+ {
+ if (!a.find("--pheno"))
+ error("You need to specify --pheno {file} with --pheno-name {name}");
+ if (a.find("--mpheno"))
+ error("You cannot specify --mpheno and --pheno-name together");
+
+ par::name_pheno = a.value("--pheno-name");
+
+ }
+
+ if (a.find("--values"))
+ {
+ par::number_list_string = a.value("--values");
+ }
+
+ if (a.find("--valueless"))
+ {
+ par::number_list_string = a.value("--valueless");
+ par::number_list_positive = false;
+ }
+
+ // Loop over all phenotypes
+ if (a.find("--all-pheno"))
+ {
+ if (!a.find("--pheno"))
+ error("You need to specify --pheno {file} with --all-pheno");
+
+ if (a.find("--mpheno"))
+ error("You cannot specify --mpheno {N} with --all-pheno");
+ par::mult_pheno = 1;
+ par::all_pheno = true;
+
+ }
+
+
+ // Single covariate in file specified
+ if (a.find("--covar"))
+ {
+ par::covar_file = true;
+ par::covar_filename = a.value("--covar");
+
+ // Multiple covariates in file specified, read all of them
+ if ( a.find("--linear") ||
+ a.find("--logistic") ||
+ a.find("--R") ||
+ a.find("--chap") ||
+ a.find("--proxy-glm") )
+ par::clist = true;
+
+ // Request to dump back out all covariates
+ if (makedata)
+ {
+ par::clist = true;
+ if (a.find("--dummy-coding"))
+ par::dump_covar_dummy_coding = true;
+ }
+
+ if (a.find("--with-phenotype"))
+ par::dump_covar_with_phenotype = true;
+
+ }
+
+
+ // Multiple covariates in file specified, select one
+ if (a.find("--mcovar"))
+ {
+ if (!a.find("--covar"))
+ error("You need to specify --covar {file} with --mcovar {N}");
+ par::mult_covar = a.value_int("--mcovar");
+ }
+
+ // Selection fields for covariates
+ if (a.find("--covar-number"))
+ {
+ par::clist_selection = par::clist_selection_number = true;
+ par::clist_selection_string = a.value("--covar-number");
+ }
+
+ if ( a.find("--covar-name") )
+ {
+ par::clist_selection = par::clist_selection_name = true;
+ par::clist_selection_string = a.value("--covar-name");
+ }
+
+ // Request to dump back out all covariates
+ if (a.find("--write-covar"))
+ {
+ if (makedata)
+ error("No need to specify --write-covar separately");
+ if (!a.find("--covar"))
+ error("You must specify a --covar {file} with --write-covar");
+
+ if (a.find("--with-phenotype"))
+ par::dump_covar_with_phenotype = true;
+
+ if (a.find("--dummy-coding"))
+ par::dump_covar_dummy_coding = true;
+
+ par::dump_covar = true;
+ par::clist = true;
+ }
+
+ if (a.find("--write-snplist"))
+ par::write_snplist = true;
+
+ if (a.find("--update-map"))
+ {
+ if (a.find("--update-cm"))
+ par::update_cm = true;
+ par::update_map = true;
+ par::update_mapfile = a.value("--update-map");
+ }
+
+ // Examine only a subset of the data?
+ if (a.find("--filter"))
+ {
+ par::filter_on_covar = true;
+
+ vector<string> s = a.value("--filter",2);
+
+ par::filter_filename = s[0];
+ par::filter_value = getInt(s[1].c_str(),"--filter");
+
+ }
+
+ if (a.find("--mfilter"))
+ {
+ if (!a.find("--filter"))
+ error("You can only specify --mfilter with --filter\n");
+ par::mult_filter = a.value_int("--mfilter");
+ }
+
+ // Different species other than human?
+ // i.e. alters chromosome definitions
+
+ if (a.find("--dog")) par::species_dog = true;
+ if (a.find("--cow")) par::species_cow = true;
+ if (a.find("--sheep")) par::species_sheep = true;
+ if (a.find("--horse")) par::species_horse = true;
+
+ //////////////////////////////////
+ // Multipoint and singlepoint
+
+ // if (a.find("--singlepoint")) par::singlepoint = true;
+
+ if (a.find("--fringe"))
+ {
+ par::singlepoint = false;
+ par::fringe = a.value_double("--fringe");
+ }
+
+ if (a.find("--grid"))
+ {
+ par::singlepoint = false;
+ par::grid = a.value_double("--grid");
+ par::inter_grid = 0;
+ }
+
+ if (a.find("--step"))
+ {
+ par::singlepoint = false;
+ par::inter_grid = a.value_int("--step");
+ }
+
+ if (a.find("--cm")) par::cm_map = true;
+
+ if (a.find("--ci"))
+ {
+ par::display_ci = true;
+ par::ci_level = a.value_double("--ci");
+ if ( par::ci_level < 0.01 || par::ci_level >= 1 )
+ error("CI level (--ci) must be between 0 and 1\n");
+ par::ci_zt = ltqnorm( 1 - (1 - par::ci_level) / 2 );
+ }
+
+ if (a.find("--pfilter"))
+ {
+ par::pfilter = true;
+ par::pfvalue = a.value_double("--pfilter");
+ }
+
+ if (a.find("--clump"))
+ {
+ par::clumpld = true;
+ par::clumpld_results = a.value("--clump");
+
+ if (a.find("--clump-field"))
+ par::clumpld_column = a.value("--clump-field");
+
+ if (a.find("--clump-verbose"))
+ par::clumpld_verbose = true;
+
+ if (a.find("--clump-p1"))
+ par::clumpld_p1 = a.value_double("--clump-p1");
+
+ if (a.find("--clump-p2"))
+ par::clumpld_p2 = a.value_double("--clump-p2");
+
+ if (a.find("--clump-r2"))
+ par::clumpld_r2 = a.value_double("--clump-r2");
+
+ if (a.find("--clump-kb"))
+ par::clumpld_kb = a.value_int("--clump-kb") * 1000;
+
+ if (a.find("--clump-index-first"))
+ par::clumpld_index1 = true;
+
+ if (a.find("--clump-replicate"))
+ par::clumpld_only_show_replications = true;
+
+ if (a.find("--clump-annotate"))
+ {
+ par::clumpld_annot = true;
+ par::clumpld_annot_fields = a.value("--clump-annotate");
+ }
+
+ if (a.find("--clump-allow-overlap"))
+ par::clumpld_indep = false;
+
+ }
+
+
+ if (a.find("--adjust"))
+ { par::multtest = true; }
+
+ if (a.find("--log10"))
+ { par::logscale = true; }
+
+ if (a.find("--qq-plot"))
+ { par::qq_plot = true; }
+
+ if (a.find("--lambda"))
+ {
+ par::fix_lambda = true;
+ par::lambda = a.value_double("--lambda");
+ if ( par::lambda < 1 )
+ par::lambda = 1;
+ }
+
+ if (a.find("--gc"))
+ {
+ if (!a.find("--adjust"))
+ error("Must specify --adjust to use --gc");
+ par::use_GC = true;
+ }
+
+ ///////////////////////
+ // Permutation options
+
+
+
+ // Use permutations (default is adaptive)
+ if (a.find("--perm"))
+ {
+ par::permute = true;
+ }
+
+ // Return counts not p-values (i.e. number of times exceeded)
+ if ( a.find("--perm-count") )
+ {
+ par::perm_count = true;
+ }
+
+ // Specify parameters for adaptive permutation
+ if (a.find("--aperm"))
+ {
+ if (a.find("--segment"))
+ error("--segment options requires --pperm option");
+
+ if (a.find("--set"))
+ error("Cannot use --aperm with --set option (use --mperm N instead)");
+
+ par::permute = true;
+ par::adaptive_perm = true;
+ vector<string> s = a.value("--aperm",6);
+
+ par::adaptive_min = getInt(s[0].c_str(),"--aperm");
+ par::adaptive_max = getInt(s[1].c_str(),"--aperm");
+ par::adaptive_alpha = getDouble(s[2].c_str(),"--aperm");
+ par::adaptive_ci = getDouble(s[3].c_str(),"--aperm");
+ par::adaptive_interval = getInt(s[4].c_str(),"--aperm");
+ par::adaptive_interval2 = getDouble(s[5].c_str(),"--aperm");
+ }
+
+
+ // Non-adaptive (maxT) permutations
+ if (a.find("--mperm"))
+ {
+ par::permute = true;
+ par::adaptive_perm = false;
+ par::replicates = a.value_int("--mperm");
+
+ if (a.find("--mperm-save"))
+ par::mperm_save_best = true;
+ else if (a.find("--mperm-save-all"))
+ par::mperm_save_all = true;
+
+
+ // But make special fix for QFAM tests
+ if ( par::QTDT_test )
+ {
+ par::adaptive_perm = true;
+ par::adaptive_min = par::replicates;
+ par::adaptive_max = par::replicates;
+ par::adaptive_alpha = 0;
+ par::adaptive_ci = 0;
+ par::adaptive_interval = par::replicates+1;
+ par::adaptive_interval2 = 0;
+ par::QFAM_adaptive = true;
+ }
+ }
+
+
+ if (a.find("--rank"))
+ {
+ if (! a.find("--mperm") )
+ error("--rank requires --mperm to be specified");
+ par::mperm_rank = true;
+ }
+
+ // PLINK permutations
+
+ if (a.find("--pperm"))
+ {
+ if (! ( a.find("--segment") || a.find("--read-segment") ) )
+ error("--pperm options requires --segment or --read-segment option");
+ par::permute = true;
+ par::adaptive_perm = false;
+ par::replicates = a.value_int("--pperm");
+ }
+
+
+ if (a.find("--p2"))
+ {
+ if ((!a.find("--perm"))
+ && (!a.find("--mperm"))
+ && (!a.find("--aperm")))
+ error("--p2 option also requires--perm, --aperm or --mperm");
+ else if (!a.find("--assoc"))
+ error("The --p2 option can only be specified with --assoc");
+ else par::assoc_test_alt_perm = true;
+ }
+
+
+ /////////////////////
+ // Gene-dropping
+
+ if (a.find("--genedrop"))
+ par::perm_genedrop = par::permute = true;
+
+ if (a.find("--swap-sibs"))
+ {
+ if (!a.find("--genedrop"))
+ error("--swap-sibs only makes sense when --genedrop specified");
+ par::perm_genedrop_sibships = true;
+ par::perm_genedrop_and_swap = true;
+ }
+
+ if (a.find("--swap-parents"))
+ {
+ if (!a.find("--genedrop"))
+ error("--swap-parents only makes sense when --genedrop specified");
+ par::perm_genedrop_parents = true;
+ par::perm_genedrop_and_swap = true;
+ }
+
+ if (a.find("--swap-unrel"))
+ {
+ if (!a.find("--genedrop"))
+ error("--swap-unrel only makes sense when --genedrop specified");
+
+ par::perm_genedrop_unrel = true;
+ par::perm_genedrop_and_swap = true;
+ }
+
+
+ ///////////////////////
+ // Misc. options
+
+ if (a.find("--missing-genotype"))
+ par::missing_genotype = a.value("--missing-genotype");
+
+ if (a.find("--missing-phenotype"))
+ par::missing_phenotype = a.value("--missing-phenotype");
+
+ if (a.find("--FIX")) {
+ par::FIXED = par::FIXED_p = true;
+ vector<string> p = a.value("--FIX",4);
+
+ par::FIX_IBD.z0 = getDouble(p[0].c_str(),"--FIX");
+ par::FIX_IBD.z1 = getDouble(p[1].c_str(),"--FIX");
+ par::FIX_IBD.z2 = getDouble(p[2].c_str(),"--FIX");
+ par::FIX_p = getDouble(p[3].c_str(),"--FIX");
+ cout << "Fixing Z0, Z1, Z2 and p to "
+ << par::FIX_IBD.z0 << " "
+ << par::FIX_IBD.z1 << " "
+ << par::FIX_IBD.z2 << " "
+ << par::FIX_p << "\n(p must refer to '1' allele in '1/2' genotype)\n";
+ }
+
+
+ if (a.find("--fix-ibd")) {
+ par::FIXED = true;
+ vector<string> p = a.value("--fix-ibd",3);
+
+ par::FIX_IBD.z0 = getDouble(p[0].c_str(),"--fix-ibd");
+ par::FIX_IBD.z1 = getDouble(p[1].c_str(),"--fix-ibd");
+ par::FIX_IBD.z2 = getDouble(p[2].c_str(),"--fib-ibd");
+
+ }
+
+
+ if (a.find("--batch"))
+ par::BATCH_SIZE = a.value_int("--batch");
+
+ if (a.find("--min"))
+ par::MIN_PIHAT = a.value_double("--min");
+
+ if (a.find("--max"))
+ par::MAX_PIHAT = a.value_double("--max");
+
+ if (a.find("--all-pairs"))
+ {
+ if (a.find("--min"))
+ error("Cannot specify --min and --all-pairs\n");
+ par::include_all_pairs = true;
+ }
+
+
+// if (a.find("--lock"))
+// { par::locked = true; }
+
+// if (a.find("--unlock"))
+// { par::locked = false; }
+
+
+ ///////////////////////////////
+ // Basic filters
+
+ if (a.find("--all"))
+ {
+ par::min_af = 0.0;
+ par::MAX_GENO_MISSING = 1;
+ par::MAX_IND_MISSING = 1;
+ }
+
+ if (a.find("--geno"))
+ par::MAX_GENO_MISSING = a.value_double("--geno");
+
+ if (a.find("--mind"))
+ par::MAX_IND_MISSING = a.value_double("--mind");
+
+ if (a.find("--maf"))
+ par::min_af = a.value_double("--maf");
+
+ if (a.find("--max-maf"))
+ {
+ par::max_af = a.value_double("--max-maf");
+ if (par::max_af < par::min_af)
+ error("Cannot set --max-maf less than --maf\n");
+ }
+
+ if (a.find("--mhf"))
+ par::min_hf = a.value_double("--mhf");
+
+ if (a.find("--max-mhf"))
+ {
+ par::max_hf = a.value_double("--max-mhf");
+ if (par::max_hf < par::min_hf)
+ error("Cannot set --max-mhf less than --mhf\n");
+ }
+
+ if (a.find("--hwe"))
+ {
+ par::HWD_test = true;
+ par::HWD_limit = a.value_double("--hwe");
+ }
+
+ if (a.find("--hwe2"))
+ {
+ par::HWD_test = true;
+ par::HWD_standard = true;
+ par::HWD_limit = a.value_double("--hwe2");
+ }
+
+ if (a.find("--hwe-all"))
+ {
+ par::HWD_filter_on_all = true;
+ }
+
+
+ if (a.find("--me"))
+ {
+ par::MENDEL_test = true;
+ vector<string> s = a.value("--me",2);
+ par::MENDEL_ind = getDouble(s[0],"--me");
+ par::MENDEL_snp = getDouble(s[1],"--me");
+ }
+
+
+
+ //////////////////////////////////
+ // IBS clustering
+
+ if (a.find("--cluster"))
+ {
+
+ par::cluster = true;
+
+ if (a.find("--within"))
+ par::force_initial_cluster = true;
+
+ }
+
+ if (a.find("--euclidean"))
+ {
+ if (!a.find("--cluster"))
+ error("Cannot specify --euclidean without --cluster");
+ par::cluster_euclidean = true;
+ }
+
+
+
+ if (a.find("--pick1"))
+ {
+ par::cluster_selcon = true;
+ par::cluster_selcon_file = a.value("--pick1");
+ }
+
+ if (a.find("--cluster-missing"))
+ {
+ par::cluster = true;
+ par::cluster_missing = true;
+ par::matrix = true;
+
+ if (!a.find("--maf")) par::min_af = 0.0;
+ if (!a.find("--geno")) par::MAX_GENO_MISSING = 1;
+ if (!a.find("--mind")) par::MAX_IND_MISSING = 1;
+
+ par::merge_p = 0;
+ if (a.find("--ppc"))
+ error("Cannot specify --ppc with --cluster-missing");
+ }
+
+ if (a.find("--K"))
+ {
+ if (!a.find("--cluster"))
+ error("Must specify --cluster also if --K used");
+ par::max_cluster_N = a.value_int("--K");
+ }
+
+ if (a.find("--neighbour"))
+ {
+ vector<string> s = a.value("--neighbour",2);
+ par::min_neighbour = getInt(s[0].c_str(),"--neighbour");
+ par::max_neighbour = getInt(s[1].c_str(),"--neighbour");
+ par::outlier_detection = true;
+ }
+
+ if (a.find("--matrix")) par::matrix = true;
+ if (a.find("--distance-matrix")) { par::distance_matrix = par::matrix = true; }
+
+ if (a.find("--mds-plot"))
+ {
+ par::cluster_plot = true;
+ par::cluster_mds_dim = a.value_int("--mds-plot");
+ }
+
+ if (a.find("--mds-cluster"))
+ par::mds_by_individual = false;
+
+ if (a.find("--pmerge"))
+ error("--pmerge is depreciated: use --ppc instead\n");
+
+ if (a.find("--ppc"))
+ {
+ if (!par::cluster) error("--ppc options requires --cluster");
+ par::merge_p = a.value_double("--ppc");
+ }
+
+ if (a.find("--pibs-gap"))
+ error("--pibs-gap is depreciated: please use --ppc-gap\n");
+
+ if (a.find("--ppc-gap"))
+ {
+ par::ibstest_gap = 1000 * a.value_int("--ppc-gap");
+ }
+
+ if (a.find("--ibm"))
+ {
+ if (! a.find("--cluster"))
+ error("Can only use --ibm with --cluster\n");
+ par::cluster_ibm_constraint = true;
+ par::cluster_ibm_constraint_value = a.value_double("--ibm");
+ }
+
+ if (a.find("--mc"))
+ {
+ if (!par::cluster) error("--mc options requires --cluster");
+ par::max_cluster_size = a.value_int("--mc");
+ }
+
+ if (a.find("--cc"))
+ {
+ if (!par::cluster) error("--cc options requires --cluster");
+ par::cluster_on_phenotype = true;
+ }
+
+ if (a.find("--mcc"))
+ {
+ if (!par::cluster) error("--mcc options requires --cluster");
+ par::cluster_on_mcc = true;
+ vector<string> s = a.value("--mcc",2);
+ par::max_cluster_case = getInt(s[0].c_str(),"--mcc");
+ par::max_cluster_control = getInt(s[1].c_str(),"--mcc");
+ if (a.find("--mc") || a.find("-cc"))
+ error("Cannot specify --mc N and/or --cc as well as --mcc N1 N2\n");
+ }
+
+
+
+ /////////////////////////////////
+ // External criteria to match on
+
+ // Categorical binary traits,
+ // by default
+ // e.g. { A, A } is a match and so are pairable
+ // { A, B } is not
+ //
+ // if match-type file is also specifed, then matches
+ // can potentially be otherwise, e.g.
+ // { A, B } are pairable
+ // { A, A } are not
+
+ if (a.find("--match"))
+ {
+ par::bmatch = true;
+ par::bmatch_filename = a.value("--match");
+ }
+
+ if (a.find("--match-type"))
+ {
+ if (!a.find("--match"))
+ error("Must specify a --match {file} with the --match-type {file} option");
+ par::bmatch_usertype = true;
+ par::bmatch_direction_filename = a.value("--match-type");
+ }
+
+ // Quantitative trait match
+ // Based on difference exceeding a certain threshold
+ // e.g. (X-Y)>T => no match
+ // (X-Y)<=T => match
+
+ // T is specified by including an extra individual in the qmatch file
+ // with the Family ID and Individual ID "_T_"
+
+ if (a.find("--qmatch"))
+ {
+ if (!a.find("--qt"))
+ error("You need to specify a --qt file when using --qmatch");
+ par::qmatch_threshold_filename = a.value("--qt");
+ par::qmatch = true;
+ par::qmatch_filename = a.value("--qmatch");
+ }
+
+
+
+ //////////////////////////
+ // Permutation clustering
+
+ if (a.find("--family"))
+ {
+ par::sol_family = true;
+ par::permute_within_sol = true;
+ par::include_cluster = true;
+ }
+
+ if (a.find("--within"))
+ {
+ par::permute_within_sol = par::include_cluster = true;
+ par::include_cluster_from_file = true;
+ par::include_cluster_filename = a.value("--within");
+ }
+
+ if (a.find("--mwithin"))
+ {
+ if (!a.find("--within"))
+ error("You can only specify --mwithin with --within");
+ par::mult_clst = a.value_int("--mwithin");
+ }
+
+
+ //////////////////////////////////
+ // Specific scan region selected
+
+ if (!a.find("--from"))
+ {
+
+ // Specify a specific chromosome
+ string c="0"; // Default all chromosomes
+
+ if (a.find("--chr"))
+ c = a.value("--chr");
+
+ if (c=="X" || c=="x") par::run_chr = 23;
+ else if (c=="Y" || c=="y") par::run_chr = 24;
+ else par::run_chr = getInt(c,"--chr");
+ }
+
+ if (a.find("--from"))
+ {
+
+ if (!a.find("--to"))
+ error("Must also specify --to {marker} when using --from {marker}");
+
+ par::m1 = a.value("--from");
+ par::m2 = a.value("--to");
+ par::run_chr = -1;
+ }
+
+ if (a.find("--snp"))
+ {
+ par::m1 = a.value("--snp");
+ par::m2 = a.value("--snp");
+ par::run_chr = -1;
+ }
+
+ if (a.find("--snps"))
+ {
+ par::extract_set = true;
+ par::snp_include_from_cl = true;
+ par::snp_include_range = a.value("--snps");
+
+ if ( a.find("--snp") || a.find("--window") || a.find("--extract") || a.find("--exclude") )
+ error("Cannot specify multiple SNP-selection options with --snps");
+
+ }
+
+ if ( a.find("--d") )
+ {
+ par::range_delimiter = a.value("--d");
+ if ( par::range_delimiter.length() > 1 )
+ error("Range delimiter can only be 1 character");
+ if ( par::range_delimiter == "," )
+ error("Cannot set range delimiter to comma");
+ }
+
+ if (a.find("--window"))
+ {
+ if (!a.find("--snp")) error("Must specify --snp with --window");
+ par::window = a.value_double("--window");
+ }
+
+ if (a.find("--from-bp"))
+ {
+ if (!a.find("--to-bp")) error("Must specify --to-bp with --from-bp");
+ par::from_window = a.value_int("--from-bp");
+ par::position_window = true;
+ }
+
+ if (a.find("--from-kb"))
+ {
+ if (!a.find("--to-kb")) error("Must specify --to-kb with --from-kb");
+ par::from_window = int(a.value_double("--from-kb") * 1000);
+ par::position_window = true;
+ }
+
+ if (a.find("--from-mb"))
+ {
+ if (!a.find("--to-mb")) error("Must specify --to-mb with --from-mb");
+ double v = a.value_double("--from-mb");
+ if (v>1000) error("Too large a value for --from-mb");
+ par::from_window = int(v * 1000 * 1000);
+ par::position_window = true;
+ }
+
+
+ if (a.find("--to-bp"))
+ {
+ if (!a.find("--from-bp")) error("Must specify --from-bp with --to-bp");
+ par::to_window = a.value_int("--to-bp");
+ par::position_window = true;
+ }
+
+ if (a.find("--to-kb"))
+ {
+ if (!a.find("--from-kb")) error("Must specify --from-kb with --to-kb");
+ par::to_window = int(a.value_double("--to-kb") * 1000);
+ par::position_window = true;
+ }
+
+ if (a.find("--to-mb"))
+ {
+ if (!a.find("--from-mb")) error("Must specify --from-mb with --to-mb");
+ double v = a.value_double("--to-mb");
+ if (v>1000) error("Too large a value for --to-mb");
+ par::to_window = int(v * 1000 * 1000);
+ par::position_window = true;
+ }
+
+ if (par::position_window)
+ if (!a.find("--chr"))
+ error("You must specify which chromosome (--chr N) also");
+
+
+
+ ////////////////////////////////////////////
+ // General warnings
+
+ if ( a.find("--assoc") && a.find("--covar") )
+ error("Cannot specify --covar with --assoc");
+
+ if ( a.find("--model") && a.find("--covar") )
+ error("Cannot specify --covar with --model");
+
+ if ( a.find("--assoc") && a.find("--linear") )
+ error("Cannot specify --assoc with --linear");
+
+ if ( a.find("--assoc") && a.find("--logistic") )
+ error("Cannot specify --assoc with --logistic");
+
+ if ( a.find("--model") && a.find("--linear") )
+ error("Cannot specify --model with --linear");
+
+ if ( a.find("--model") && a.find("--logistic") )
+ error("Cannot specify --model with --logistic");
+
+ if ( a.find("--model") && a.find("--assoc") )
+ error("Cannot specify --model with --assoc");
+
+
+ /////////////////////////////////////////////
+ // Help -- display all options
+
+
+ if (a.find("--help") || a.find("-h"))
+ {
+
+ cout << "\n"
+ << "Please visit the PLINK website for a complete list of options\n"
+ << "\n"
+ << "A few common options are listed here:\n"
+ << "\n";
+
+ cout << "plink --file {fileroot} Specify .ped and .map files \n"
+ << " --bfile {fileroot} Specify .bed, .fam and .map \n"
+ << "\n"
+ << " --out {fileroot} Specify output root filename\n"
+ << "\n"
+ << " --missing-genotype {0} Missing genotype code \n"
+ << " --missing-phenotype {-9} Missing phenotype code \n"
+ << "\n"
+ << " --pheno {phenofile} Specify .phe file \n"
+ << " --within {file} Specify cluster file \n"
+ << " --cov {covarfile} Specify .cov file \n"
+ << "\n"
+ << " --extract {snplist} Extract list of SNPs \n"
+ << " --exclude {snplist} Exclude list of SNPs \n"
+ << " --remove {indlist} Remove these individuals \n"
+ << " --keep {indlist} Keep these individuals \n"
+ << "\n"
+ << " --make-bed Make .bed, .fam and .bim \n"
+ << " --recode Output new PED and MAP files\n"
+ << " --recode12 As above, with 1/2 alleles \n"
+ << " --recodeAD As above, but: 1/0/-1, 0/1/0\n"
+ << " --recodeA As above, but: 1/0/-1 only \n"
+ << "\n"
+ << " --snp {marker} Specify this single SNP \n"
+ << " --snps {marker list} Specify list,range of SNPs \n"
+ << " --window {kb} Select +/- kb around --snp \n"
+ << " --chr {N} Analyse chromosome \n"
+ << " --from-kb {KB} Start scan here (kilobase) \n"
+ << " --to-kb {KB} End scan here \n"
+ << "\n"
+ << " --all Set filters to include all \n"
+ << " --maf {0.01} Minor allele frequency \n"
+ << " --geno {0.1} Maximum per-SNP missing \n"
+ << " --mind {0.1} Maximum per-person missing \n"
+ << "\n"
+ << " --freq Output allele frequencies \n"
+ << " --hardy Hardy-Weinberg tests \n"
+ << " --missing Genotyping rate information \n"
+ << " --het Individual inbreeding \n"
+ << " --genome Genome-wide IBS/IBD \n"
+ << " --cluster Perform IBS clustering \n"
+ << "\n"
+ << " --assoc Case/control, QT association\n"
+ << " --model Full-model C/C association \n"
+ << " --tdt Family-based TDT association\n"
+ << " --linear Linear regression model \n"
+ << " --logistic Logistic regression model \n"
+ << "\n"
+ << " --perm Apaptive permutations \n"
+ << " --mperm {1000} max(T) permutations \n"
+ << "\n"
+ << " --hap {tagfilename} Multimarker predictor list \n"
+ << " --hap-window {N} Phase sliding window \n"
+ << " --hap-snps {snp list} Phase this set of SNPs \n"
+ << " --hap-assoc Haplotype-based association \n"
+ << " --hap-tdt Haplotype-based TDT \n"
+ << " --chap Conditional haplotype tests \n"
+ << " --hap-phase Report haplotype phases \n"
+ << " --hap-freq Report haplotype frequencies\n"
+ << "\n";
+
+ cout << "\nPlease visit the PLINK website for a complete list of options\n\n";
+
+ shutdown();
+ }
+
+
+ // By default, most tests are SNP major
+
+ par::SNP_major = true;
+
+ // Exceptions are:
+ // TDT ( family structure confuses things)
+ // Whole genome / IBS clustering
+ // PLINK
+
+ if (par::TDT_test ||
+ par::MENDEL_test ||
+ par::MENDEL_report ||
+ par::genome_output ||
+ par::cluster ||
+ par::plink )
+ par::SNP_major = false;
+
+ if (a.find("--ind"))
+ par::SNP_major = false;
+
+
+ // If recoding data, the default will be not to set heterozygous
+ // haploid genotypes to missing. Likewise for Mendel errors. Merge
+ // operations will also specify a recode/make-bed, so they are also
+ // captured here. The one special case where we want to allow to
+ // preserve males hets on the X is the --check-sex
+
+
+ if ( par::check_sex )
+ par::preserve_all_genotypes = true;
+
+ if ( par::write_bitfile ||
+ par::recode ||
+ par::recode_HV ||
+ par::recode_whap ||
+ par::recode_12 ||
+ par::recode_AD )
+ {
+
+ // Unless flag given, these options will not replace haploid
+ // heterozygotes with a missing genotype
+
+ if ( a.find("--set-hh-missing") )
+ par::preserve_all_genotypes = false;
+ else
+ par::preserve_all_genotypes = true;
+
+ if ( a.find("--set-me-missing") )
+ par::preserve_mendel_errors = false;
+ else
+ par::preserve_mendel_errors = true;
+
+ }
+
+
+}
+
Added: trunk/packages/plink/branches/upstream/current/perm.cpp
===================================================================
--- trunk/packages/plink/branches/upstream/current/perm.cpp (rev 0)
+++ trunk/packages/plink/branches/upstream/current/perm.cpp 2008-04-09 09:42:37 UTC (rev 1721)
@@ -0,0 +1,507 @@
+
+
+//////////////////////////////////////////////////////////////////
+// //
+// PLINK (c) 2005-2008 Shaun Purcell //
+// //
+// This file is distributed under the GNU General Public //
+// License, Version 2. Please see the file COPYING for more //
+// details //
+// //
+//////////////////////////////////////////////////////////////////
+
+
+#include "perm.h"
+#include "helper.h"
+#include <iostream>
+#include <cmath>
+
+Perm::Perm(Plink & pref) : P(pref)
+{
+
+ if (par::adaptive_perm)
+ {
+ adaptive = true;
+ replicates = par::adaptive_max;
+ }
+ else
+ {
+ adaptive = false;
+ replicates = par::replicates;
+ }
+
+ count = par::perm_count;
+
+ min = par::adaptive_min;
+
+ interval = par::adaptive_interval;
+ performed = 0;
+
+ dump_all = par::mperm_save_all;
+ dump_best = par::mperm_save_best;
+
+ if (dump_all)
+ {
+ PDUMP.open((par::output_file_name+".mperm.dump.all").c_str(),ios::out);
+ }
+ else if (dump_best)
+ {
+ PDUMP.open((par::output_file_name+".mperm.dump.best").c_str(),ios::out);
+ }
+}
+
+
+void Perm::setTests(int x)
+{
+
+ performed = 0;
+
+ t = x;
+ R.resize(t);
+
+ if (adaptive)
+ {
+ N.resize(t);
+ test.resize(t);
+ for (int i=0; i<t; i++)
+ {
+ test[i] = true;
+ R[i] = N[i] = 0;
+ }
+
+ // Given t tests, set the threshold to be
+ // p +/- Phi^{-1} (1 - \gamma/2t ) sqrt( p(1-p)/N )
+ zt = ltqnorm( 1 - par::adaptive_ci / ( 2 * t ) ) ;
+
+ }
+ else
+ {
+ maxR.resize(t);
+ for (int i=0; i<t; i++)
+ R[i] = maxR[i] = 0;
+ }
+
+ // For gene-dropping, set up some family-information
+ if (par::perm_genedrop)
+ preGeneDrop();
+
+}
+
+void Perm::originalOrder()
+{
+ for (int i=0; i<P.n; i++)
+ P.sample[i]->pperson = P.sample[i];
+}
+
+
+bool Perm::finished()
+{
+ if (performed>=replicates) return true;
+ else return false;
+}
+
+
+void Perm::permuteInCluster()
+{
+
+ // Store remapped IDs
+ vector<vector< long int> > i(ns);
+
+ // Permute phenotypes, within cluster
+ for (int k=0; k<ns; k++)
+ {
+ vector<long int> p(s[k].size());
+ permute(p);
+ i[k]=p;
+ }
+
+ //////////////////////////
+ // Post-permutation:
+ // Iterate over clusters { s[][] }
+ // i[][] holds the permuted codes
+ // s[][] points to individuals (non-missing)
+
+ // Genotype = sample[s[j][k]];
+ // Matching phenotype = sample[s[j][(int)i[j][k]]];
+
+ // Create pheno[] with label-swapped codes
+ for (int j=0; j<s.size(); j++)
+ for (int k=0; k<s[j].size(); k++)
+ P.sample[s[j][k]]->pperson = P.sample[s[j][(int)i[j][k]]];
+
+}
+
+void Perm::setPermClusters(Plink & P)
+{
+
+ // Permute within clusters only
+ // (stored in sample[i]->sol)
+
+ // Get list of non-missing individuals, and number of solutions
+ // These are always numbered 0,1,2,3,..
+
+ // -1 indicates do not permute this individual
+ // 0..ns indicate cluster numbers
+
+ // Count the number of clusters: 'ns'
+
+ ns=-1;
+ for (int i=0; i<P.n; i++)
+ if ((!P.sample[i]->missing) && P.sample[i]->sol > ns)
+ ns=P.sample[i]->sol;
+ ns++;
+
+ // store set membership is 's'
+
+ s.resize(ns);
+ for (int i=0; i<P.n; i++)
+ if (!P.sample[i]->missing && P.sample[i]->sol>=0)
+ s[P.sample[i]->sol].push_back(i);
+
+ pheno.resize(P.n);
+
+ if (par::permute && ! par::QTDT_test )
+ P.printLOG("Set to permute within "+int2str(ns)+" cluster(s)\n");
+
+}
+
+void Perm::setOriginalRanking(vector_t & original)
+{
+ vector<Pair2> o;
+ for (int i=0; i<original.size(); i++)
+ {
+ Pair2 p;
+ p.p = original[i];
+ p.l = i;
+ o.push_back(p);
+ }
+
+ sort(o.begin(),o.end());
+
+ order.clear();
+ reorder.resize(original.size());
+ for(int i=0; i<original.size(); i++)
+ {
+ order.push_back(o[i].l);
+ reorder[o[i].l] = i;
+ }
+}
+
+
+
+bool Perm::update(vector<double> & result, vector<double> & original)
+{
+
+ // Increment number of permutations performed
+ performed++;
+
+ // Finished all perms?
+ bool done = false;
+
+ //////////////////////////////
+ // Update number of successes
+
+ if (!adaptive)
+ {
+ for (int l=0; l<t; l++)
+ if (result[l] >= original[l] || !realnum(original[l]) ) R[l]++;
+ }
+ else
+ {
+ for (int l=0; l<t; l++)
+ if (test[l])
+ {
+ if (result[l] >= original[l] || !realnum(original[l]) ) R[l]++;
+ N[l]++;
+ }
+ }
+
+
+ // Stopping rules for adaptive permutation?
+ int todo = 0;
+ if (adaptive &&
+ performed > min &&
+ performed % interval == 0)
+ {
+
+ // Update interval
+ interval = (int)(par::adaptive_interval + performed * par::adaptive_interval2);
+
+ // Consider each test
+ for (int l=0; l<t; l++)
+ {
+ if (test[l])
+ {
+
+ // Check for at least one success
+ if (R[l]>0)
+ {
+ double pv = (double)(R[l]+1)/(double)(performed+1);
+
+ double sd = sqrt( pv * (1-pv) / performed );
+ double lower = pv - zt * sd;
+ double upper = pv + zt * sd;
+ //double cv = sd/(performed*pv);
+ if (lower<0) lower = 0;
+ if (lower>1) upper = 0;
+
+ // Is lower bound greater than threshold, or
+ // upper bound smaller than threshold?
+ if (lower > par::adaptive_alpha || upper < par::adaptive_alpha )
+ {
+ N[l] = performed;
+ test[l] = false;
+ }
+
+ else
+ todo++;
+ }
+ else todo++;
+ }
+ }
+
+ if (!par::silent)
+ cout << "Adaptive permutation: "
+ << performed
+ << " of (max) "
+ << replicates
+ << " : "
+ << todo
+ << " SNPs left"
+ << " \r";
+
+ if (todo==0) done = true;
+ }
+
+
+ ///////////////////////////////////////////////////////////
+ // For non-adaptive permutation, keep track of the maximum
+
+ if (!adaptive)
+ {
+
+ if (dump_all)
+ {
+ if (performed==1)
+ {
+ PDUMP << 0 << " ";
+ for (int l=0; l<t; l++)
+ PDUMP << original[l] << " ";
+ PDUMP << "\n";
+ }
+ PDUMP << performed << " ";
+ }
+
+
+ if (dump_best)
+ {
+ if (performed==1)
+ {
+ PDUMP << 0 << " ";
+
+ double mx=0;
+ for (int l=0; l<t; l++)
+ if (original[l]>mx)
+ if ( realnum(mx))
+ mx=original[l];
+ PDUMP << mx << "\n";
+ }
+ PDUMP << performed << " ";
+ }
+
+
+ // Find maximum, or sort all results
+
+ double mx=0;
+
+ if ( par::mperm_rank )
+ {
+ // Ranked permutation
+ // populate mx vector
+
+ // Set any NA to 0
+ for (int l=0; l<t; l++)
+ if ( ! realnum(result[l]) )
+ result[l] = 0;
+
+ // Sort (ascending)
+ sort(result.begin(),result.end());
+
+ // Save max in any case
+ mx = result[result.size()-1];
+ }
+ else
+ {
+ // Standard max(T)
+ // populate single mx variable
+ for (int l=0; l<t; l++)
+ if (result[l]>mx)
+ if ( realnum(mx) )
+ mx=result[l];
+ }
+
+
+ if (dump_best)
+ PDUMP << mx << "\n";
+ else if (dump_all)
+ {
+ for (int l=0; l<t; l++)
+ PDUMP << result[l] << " ";
+ PDUMP << "\n";
+ }
+
+
+ // Max(T) permutation -- compare against best
+
+ if ( ! par::mperm_rank )
+ {
+ for (int l=0; l<t; l++)
+ if (mx >= original[l] || !realnum(original[l]) ) maxR[l]++;
+ }
+
+ // Rank(T) permutation -- compare against similar rank
+
+ else
+ {
+ for (int l=0; l<t; l++)
+ {
+ //cout << "P " << order[l] << " " << original[order[l]] << " , pr = " << result[l] << " ";
+
+ if (result[l] >= original[order[l]] || !realnum(original[order[l]]) )
+ maxR[order[l]]++;
+
+ }
+ }
+
+
+
+ if (!par::silent)
+ {
+ cout << "maxT permutation: "
+ << performed
+ << " of "
+ << par::replicates
+ << " \r";
+ cout.flush();
+ }
+ }
+
+ // Have we hit the maximum number of replicates?
+ if (performed>=replicates) done = true;
+
+ return done;
+}
+
+void Perm::nextSNP()
+{
+ // Reset Perm class for next SNP when in adaptive, SNP-by-SNP mode
+ performed = 0;
+ originalOrder();
+}
+
+bool Perm::updateSNP(double result, double original, int l)
+{
+
+ /////////////////////////////////////////
+ // Single SNP adaptive permutation update
+ /////////////////////////////////////////
+
+ // Increment number of permutations performed for this SNP
+ performed++;
+
+ // Finished all perms for this SNP?
+ bool done = false;
+
+ //////////////////////////////
+ // Update number of successes
+
+ if (test[l])
+ {
+ if (result >= original || !realnum(original) ) R[l]++;
+ N[l]++;
+ }
+
+
+ // Stopping rules for adaptive permutation?
+
+ if (adaptive &&
+ performed > min &&
+ performed % interval == 0)
+ {
+
+ // Update interval
+ interval = (int)(par::adaptive_interval + performed * par::adaptive_interval2);
+
+ // Consider this specific SNP
+ if (test[l])
+ {
+
+ // Check for at least one success
+ if (R[l]>0)
+ {
+
+ double pv = (double)(R[l]+1)/(double)(performed+1);
+
+ double sd = sqrt( pv * (1-pv) / performed );
+ double lower = pv - zt * sd;
+ double upper = pv + zt * sd;
+ //double cv = sd/(performed*pv);
+ if (lower<0) lower = 0;
+ if (lower>1) upper = 0;
+
+ // Is lower bound greater than threshold, or
+ // upper bound smaller than threshold?
+ if (lower > par::adaptive_alpha || upper < par::adaptive_alpha )
+ {
+ N[l] = performed;
+ test[l] = false;
+ done = true;
+ }
+ }
+ }
+
+ }
+
+ // Have we hit the maximum number of replicates?
+ if (performed>=replicates) done = true;
+
+ return done;
+}
+
+
+int Perm::rank(int l)
+{
+ if ( ! par::mperm_rank )
+ return 0;
+ else
+ return t - reorder[l];
+
+}
+
+double Perm::pvalue(int l)
+{
+ if (count)
+ return (double)R[l];
+ if (adaptive)
+ return (double)(R[l]+1) / (double)(N[l]+1);
+ else
+ return (double)(R[l]+1) / (double)(replicates+1);
+}
+
+
+double Perm::max_pvalue(int l)
+{
+ if (adaptive)
+ return -1;
+ else
+ return (double)(maxR[l]+1) / (double)(replicates+1);
+}
+
+int Perm::reps_done(int l)
+{
+ if (adaptive)
+ return N[l];
+ else
+ return replicates;
+}
+
Added: trunk/packages/plink/branches/upstream/current/prephap.cpp
===================================================================
--- trunk/packages/plink/branches/upstream/current/prephap.cpp (rev 0)
+++ trunk/packages/plink/branches/upstream/current/prephap.cpp 2008-04-09 09:42:37 UTC (rev 1721)
@@ -0,0 +1,656 @@
+
+
+//////////////////////////////////////////////////////////////////
+// //
+// PLINK (c) 2005-2007 Shaun Purcell //
+// //
+// This file is distributed under the GNU General Public //
+// License, Version 2. Please see the file COPYING for more //
+// details //
+// //
+//////////////////////////////////////////////////////////////////
+
+
+#include <iostream>
+#include <iomanip>
+#include <fstream>
+#include <sstream>
+#include <cmath>
+#include <vector>
+#include <map>
+
+#include "plink.h"
+#include "options.h"
+#include "phase.h"
+#include "helper.h"
+#include "nlist.h"
+
+extern ofstream LOG;
+
+using namespace std;
+
+// Format for haplotype file: format 1:
+// SNP_ID CHR CM BP A1 A2 HAP SNP1 SNP2 ...
+
+// i.e. length of Pred_allele indicate how many SNPs to expect:
+// rs10001 5 0 10203 A G TTC rs001 rs002 rs003
+
+// Alternatively: using wild cards: format 2:
+// will name haplotype "H1_TTC_", "H1_CTT_", "H2_AA_", etc
+// * rs001 rs002 rs003
+// * rs002 rs004
+
+// Alternatively: using wild cards: format 3:
+// will name haplotype "MYHAP1_1_TTC_", "MYHAP1_1_CTT_", "GENEB_2_AA_", etc
+// ** MYHAP1 rs001 rs002 rs003
+// ** GENEB rs002 rs004
+
+// Alternatively: using --whap weighted multimarker tests
+// rs10001 5 0 10203 A G rs001 rs002 rs003 / TTC / CCT 0.9 / TCT 0.1
+// i.e. "/" separator used, if number omitted, then assume PP=1
+
+void HaploPhase::readTagFile()
+{
+
+ P.printLOG("\n");
+
+ ///////////////////////////////////////////////
+ // Haplotype inference is a SNP-major function
+
+ if (!par::SNP_major) P.Ind2SNP();
+
+
+ ////////////////////////
+ // Lookup table for SNPs
+
+ map<string,int> mlocus;
+ map<string,int>::iterator ilocus;
+
+ for (int l=0;l<P.nl_all;l++)
+ mlocus.insert(make_pair(P.locus[l]->name,l));
+
+
+ ///////////////////
+ // Affection coding
+
+ if (par::bt) affCoding(P);
+
+
+ ////////////////////////////////
+ // Read list of tags/haplotypes
+
+ checkFileExists(par::tagfile);
+ ifstream TAG(par::tagfile.c_str(), ios::in);
+ TAG.clear();
+
+ string f2 = par::output_file_name + ".mishap";
+ ofstream MISHAP(f2.c_str(), ios::out);
+ bool all_okay = true;
+
+ // Count of new haplotypes we want to infer
+ int hc=1;
+
+ while(!TAG.eof())
+ {
+ char c[500000];
+ TAG.getline(c,500000,'\n');
+
+ string l = c;
+
+ // Catch blank lines or DOS carriage-returns
+ if (l=="" || l=="\r") continue;
+
+ // Tokenize line
+ string buf;
+ stringstream ss(l);
+ vector<string> tokens;
+ while (ss >> buf)
+ tokens.push_back(buf);
+
+ // whitepsace line?
+ if (tokens.size() == 0)
+ continue;
+
+ // tokens[0] predicted SNP rs#
+ // tokens[1] predicted SNP chromosome
+ // tokens[2] predicted SNP Morgan position
+ // tokens[3] predicted SNP base pair position
+ // tokens[4] predicted SNP allele 1
+ // tokens[5] predicted SNP allele 2
+ // tokens[6] tag allele (-> allele 1)
+ // tokens[7+] predictor rs#(s)
+
+
+ // If we see one or more wildcard specifications, then
+ // automatically set phase_all_haps to be true;
+
+ if ( tokens[0] == "*" )
+ {
+ // Wildcard format
+
+ if ( tokens.size() == 1 )
+ error("Problem with " +
+ par::tagfile + " line\n" + l
+ + "\n: must have atleast one SNP list\n");
+
+ par::phase_hap_all = true;
+ }
+ else if ( tokens[0] == "**")
+ {
+ // Wildcard2 format
+
+ if (tokens.size() == 2 )
+ error("Problem with "
+ + par::tagfile + " line\n" + l
+ + "\n: must have atleast one SNP list\n");
+
+ par::phase_hap_all = true;
+
+ }
+ else if ( ! par::weighted_mm )
+ {
+ // Standard format
+
+ if (tokens.size() < 8 )
+ {
+ string e = "Problem with " +
+ par::tagfile + " line\n"
+ + l +
+ "\n (expecting at least 8 items, or to start with */** wildcard)\n";
+ error(e);
+ }
+ }
+ else
+ {
+ // Weighted multi-marker format
+
+
+ if (tokens.size() < 9 )
+ {
+ string e = "Problem with " + par::tagfile + " line\n"
+ + l
+ + "\n (expecting at least 9 items for --whap format file)\n";
+ error(e);
+ }
+
+
+ }
+
+ if (tokens[0].substr(tokens[0].size()-1) == "_" )
+ error("Cannot use '_' in tag/haplotype name: reserved for wildcards\n");
+
+
+ int len; // length of haplotype
+ vector<int> locusList; // list of predictor #s
+
+
+ // Is this particular line a wildcard? okay?
+ bool wildcard = tokens[0] == "*" || tokens[0] == "**" ? true : false ;
+ string wildname = "H";
+
+ // Take the name from the second position? (** wildcard?)
+ if ( tokens[0] == "**" )
+ {
+ wildname = tokens[1]+"_";
+ tokens.erase(tokens.begin()+1);
+ }
+
+ bool okay = true;
+
+
+
+
+ /////////////////////////////////
+ // Fully-specified haplotype
+
+ if (!wildcard)
+ {
+
+ int offset = 7;
+ if ( par::weighted_mm ) offset = 6;
+
+ if ( par::weighted_mm )
+
+ {
+ // Find first "/" separator
+ int sep = 6;
+ while ( tokens[++sep] != "/" ) { }
+ len = sep - 6;
+ }
+ else
+ {
+ len = tokens[6].length();
+
+ if (len != tokens.size() - offset )
+ {
+ string e = "Problem with " +
+ par::tagfile + " line\n" + l + "\n";
+ error(e);
+ }
+ }
+
+
+ //////////////////////
+ // Lookup locus name
+
+ for (int i=0; i<len; i++)
+ {
+ ilocus = mlocus.find(tokens[i+offset]);
+ if (ilocus != mlocus.end())
+ {
+ locusList.push_back(ilocus->second);
+ }
+ else
+ {
+ MISHAP << "NOSNP\t" << tokens[0]
+ << "\t" << tokens[i+offset] << "\n";
+ okay = false;
+ }
+ }
+
+
+ /////////////////////////////////
+ // Check specified alleles exist
+
+ if (okay)
+ {
+
+ // Just one haplotype to check for non-weighted test version
+
+ if ( ! par::weighted_mm )
+ {
+ for (int s=0;s<len;s++)
+ if ( ! ( P.locus[locusList[s]]->allele1
+ == tokens[6].substr(s,1)
+ || P.locus[locusList[s]]->allele2
+ == tokens[6].substr(s,1) ) )
+ {
+ MISHAP << "NOALLELE\t" << tokens[0]
+ << "\t" << P.locus[locusList[s]]->name
+ << "\t" << tokens[6] << "\n";
+ okay = false;
+ }
+ }
+
+ // Otherwise, we must parse through and check each
+ else
+ {
+
+ int allele = 5 + len + 2;
+
+ for ( int i = allele ; i < tokens.size() ; i++ )
+ {
+ // Is this a haplotype?
+ if ( i == allele || tokens[i-1] == "/" )
+ {
+ for (int s=0;s<len;s++)
+ if ( ! ( P.locus[locusList[s]]->allele1
+ == tokens[i].substr(s,1)
+ || P.locus[locusList[s]]->allele2
+ == tokens[i].substr(s,1) ) )
+ {
+ MISHAP << "NOALLELE\t" << tokens[0]
+ << "\t" << P.locus[locusList[s]]->name
+ << "\t" << tokens[i] << "\n";
+ okay = false;
+ }
+ }
+ }
+ }
+
+ }
+
+
+ }
+ else
+ {
+ /////////////////////////////////
+ // Wildcard selection
+
+ len = tokens.size()-1;
+
+ // Lookup locus name
+ for (int i=0; i<len; i++)
+ {
+ ilocus = mlocus.find(tokens[i+1]);
+ if (ilocus != mlocus.end())
+ {
+ locusList.push_back(ilocus->second);
+ }
+ else
+ {
+ MISHAP << "NOSNP\t" << tokens[i+1] << "\n";
+ okay = false;
+ }
+ }
+ }
+
+
+
+ //////////////////////////////////////
+ // Should we try to add this haploype
+
+ if (!okay)
+ {
+ all_okay = false;
+ continue;
+ }
+
+
+ ////////////////////////////////////////////////////////
+ // Check that all predictors are on the same chromosome
+
+ if (!wildcard)
+ {
+ for (int ck=0; ck<len; ck++)
+ if (P.locus[locusList[ck]]->chr != atoi(tokens[1].c_str()))
+ {
+ MISHAP << "DIFF_CHR\t" << P.locus[locusList[ck]]->name
+ << "\t" << tokens[0] << "\n";
+ okay = false;
+ }
+ }
+ else
+ {
+ for (int ck=0; ck<len-1; ck++)
+ if (P.locus[locusList[ck]]->chr != P.locus[locusList[ck+1]]->chr)
+ {
+ MISHAP << "DIFF_CHR\t" << P.locus[locusList[ck]]->name
+ << "\t" << P.locus[locusList[ck+1]]->name << "\n";
+ okay = false;
+ }
+ }
+
+ if (!okay)
+ {
+ all_okay = false;
+ continue;
+ }
+
+
+ ///////////////////////////////////////
+ // Standard approach -- only one entry
+
+ if ( ( !wildcard) && (!par::weighted_mm) )
+ {
+
+ // Add numbers for predictors to list
+ new_pred_locus.push_back(locusList);
+
+ // Add which allele to look for (corresponding to allele1)
+ new_pred_allele.push_back(tokens[6]);
+
+ // Make new entry in MAP file
+ Locus * loc = new Locus;
+ loc->name = tokens[0];
+ loc->chr = getChromosomeCode( tokens[1] );
+ loc->pos = atof(tokens[2].c_str());
+ loc->bp = atoi(tokens[3].c_str());
+ loc->allele1 = tokens[4];
+ loc->allele2 = tokens[5];
+
+ // Add this new locus to the list
+ new_map.push_back(loc);
+ }
+ else if ( ( !wildcard ) && par::weighted_mm )
+ {
+
+ ///////////////////////////////////////
+ // Weighted MM test -- only one entry
+
+ // Add numbers for predictors to list
+ new_pred_locus.push_back(locusList);
+
+ // Add which allele(s) to look for (corresponding to allele 1)
+ // and then the corresponding weights
+
+ map<string,double> whap;
+
+ int index = 5 + len + 2;
+ while ( index < tokens.size() )
+ {
+ // Read haplotype, then optionally a weight
+
+ if ( tokens[index].size() != len )
+ error("Problem with " + par::tagfile + " line\n" + l + "\n");
+
+ // Read weight, or advance to next haplotype?
+ if ( index == tokens.size() - 1 )
+ {
+ whap.insert(make_pair( tokens[index] , 1 ) );
+ break;
+ }
+ else if ( tokens[index+1] == "/" )
+ {
+ whap.insert(make_pair( tokens[index] , 1 ) );
+ index += 2;
+ }
+ else
+ {
+ double w = atof(tokens[index+1].c_str());
+ if ( ( !realnum(w) ) ||
+ w < 0 || w > 1 )
+ error("Problem with specified weight in line:\n"+l+"\n");
+ whap.insert(make_pair( tokens[index] , w ) );
+ index += 3;
+ }
+ }
+
+
+ new_pred_weighted_allele.push_back(whap);
+
+ // Make new entry in MAP file
+ Locus * loc = new Locus;
+ loc->name = tokens[0];
+ loc->chr = getChromosomeCode( tokens[1] );
+ loc->pos = atof(tokens[2].c_str());
+ loc->bp = atoi(tokens[3].c_str());
+ loc->allele1 = tokens[4];
+ loc->allele2 = tokens[5];
+
+ // Add this new locus to the list
+ new_map.push_back(loc);
+
+
+
+ }
+ else
+ {
+
+ ////////////////////////////////////////////
+ // Wildcard approach -- just a single entry
+
+ // Add numbers for predictors to list
+ new_pred_locus.push_back(locusList);
+
+ // Put in a dummy allele code (we ignore this...)
+ string hstr="";
+ for (int s=0;s<S.size();s++)
+ hstr += P.locus[locusList[s]]->allele1;
+
+ new_pred_allele.push_back(hstr);
+
+ // Make new entry in MAP file
+
+ Locus * loc = new Locus;
+ loc->name = wildname +int2str(hc)+"_DUMMY_"+hstr+"_";
+ loc->chr = P.locus[locusList[0]]->chr;
+ loc->pos = 0;
+ loc->bp = hc;
+ loc->allele1 = "1";
+ loc->allele2 = "2";
+
+ // Add this new locus to the list
+ new_map.push_back(loc);
+
+
+ }
+
+ // Increment new haplotype count
+ hc++;
+
+ // Read next in TAG file
+
+ }
+
+ TAG.close();
+ MISHAP.close();
+
+ if (!all_okay)
+ P.printLOG("Warning: misspecified haplotypes found: listed in [ " + f2 + " ]\n");
+
+
+ // End of reading haplotype list -- did we encounter any problems?
+
+ P.printLOG("Read " + int2str(new_map.size()) +
+ " haplotypes from [ " + par::tagfile + " ]\n");
+
+}
+
+
+
+void HaploPhase::makeSlidingWindow(int winsize, int winstep)
+{
+
+ P.printLOG("\n");
+
+ ///////////////////////////////////////////////
+ // Haplotype inference is a SNP-major function
+
+ if (!par::SNP_major)
+ P.Ind2SNP();
+
+
+ //////////////////////////////////
+ // Set up haplotype entries
+
+ int w=1;
+
+ int start = 0;
+
+ while ( 1 )
+ {
+
+ // Beyond last SNP?
+
+ if ( start >= P.nl_all )
+ break;
+
+ // Make a window, as large as possible
+ vector<int> snps;
+
+ // Make sure it is restricted to one chromosome
+ int chr = P.locus[start]->chr;
+
+ bool fail = false;
+ bool newChromosome = false;
+ int actualStop = start;
+
+ // Add SNPs to window
+
+ for (int s = start; s < start + winsize; s++)
+ {
+
+ // No more SNPs left
+ if ( s == P.nl_all )
+ {
+ fail = true;
+ break;
+ }
+
+ // Next chromosome?
+ if ( P.locus[s]->chr != chr )
+ {
+ newChromosome = true;
+ actualStop = s;
+ break;
+ }
+
+ snps.push_back(s);
+ actualStop = s;
+ }
+
+ // Have we come up to the end of this chromosome in an exact number?
+ if ( actualStop == P.nl_all-1 )
+ {
+ fail = true;
+ }
+ else if ( ( ! newChromosome ) && P.locus[actualStop+1]->chr != chr )
+ {
+ newChromosome = true;
+ actualStop++;
+ }
+
+
+ // Finished constructing this particular window:
+ // do we have anything to add?
+
+ if ( snps.size() == 0 )
+ {
+ if ( newChromosome )
+ start = actualStop;
+ else
+ start += winstep;
+ continue;
+ }
+
+ Locus * tmploc = new Locus;
+ tmploc->name = "WIN"+int2str(w++);
+ tmploc->chr = P.locus[start]->chr;
+ tmploc->pos = P.locus[start]->pos;
+ tmploc->bp = P.locus[start]->bp;
+ tmploc->allele1 = "1";
+ tmploc->allele2 = "2";
+ new_pred_locus.push_back(snps);
+
+ new_map.push_back(tmploc);
+ new_pred_allele.push_back("");
+
+
+ // Advance window
+
+ if ( fail )
+ break;
+
+ if ( newChromosome )
+ start = actualStop;
+ else
+ start += winstep;
+
+ }
+
+ P.printLOG("Created " + int2str(w-1) + " sliding windows\n");
+
+}
+
+
+void HaploPhase::setSpecificSNPs(string snps)
+{
+
+ map<string,int> mapping;
+ for (int l=0; l<P.nl_all;l++)
+ mapping.insert(make_pair(P.locus[l]->name,l));
+
+ NList nl(P.nl_all,true);
+ vector<int> snplist = nl.deparseStringList(snps,&mapping);
+
+ if (snplist.size() == 0 )
+ return;
+
+ int start = snplist[0];
+
+ new_pred_locus.push_back(snplist);
+
+ Locus * tmploc = new Locus;
+ tmploc->name = "WIN1";
+ tmploc->chr = P.locus[start]->chr;
+ tmploc->pos = P.locus[start]->pos;
+ tmploc->bp = P.locus[start]->bp;
+ tmploc->allele1 = "1";
+ tmploc->allele2 = "2";
+ new_map.push_back(tmploc);
+
+ new_pred_allele.push_back("");
+
+ return;
+}
Added: trunk/packages/plink/branches/upstream/current/proxy.cpp
===================================================================
--- trunk/packages/plink/branches/upstream/current/proxy.cpp (rev 0)
+++ trunk/packages/plink/branches/upstream/current/proxy.cpp 2008-04-09 09:42:37 UTC (rev 1721)
@@ -0,0 +1,1593 @@
+
+
+//////////////////////////////////////////////////////////////////
+// //
+// PLINK (c) 2005-2008 Shaun Purcell //
+// //
+// This file is distributed under the GNU General Public //
+// License, Version 2. Please see the file COPYING for more //
+// details //
+// //
+//////////////////////////////////////////////////////////////////
+
+
+#include <iostream>
+#include <iomanip>
+#include <string>
+#include <set>
+#include <cmath>
+#include "options.h"
+#include "helper.h"
+#include "plink.h"
+#include "phase.h"
+#include "model.h"
+#include "linear.h"
+
+//////////////////////////////////////////
+// Helper classes to store and sort proxies
+
+class ProxyResult {
+public:
+
+ string name;
+ double f;
+ double r2;
+ double odds;
+ double chisq;
+ double pvalue;
+
+ ProxyResult(string n, double frq, double r, double o, double c, double p)
+ : name(n), f(frq), r2(r), odds(o), chisq(c), pvalue(p) { }
+
+ bool operator< (const ProxyResult & b) const
+ {
+ return ( pvalue == b.pvalue ?
+ name < b.name :
+ pvalue < b.pvalue );
+ }
+};
+
+
+class LDPair {
+public:
+ int s1;
+ int s2;
+ double ld;
+ bool operator< (const LDPair & b) const
+ {
+ return ld > b.ld;
+ }
+
+
+};
+
+
+///////////////////////////////////////////////
+// Helper function to find n of m combinations
+
+void combinations_recursive(const vector<int> &elems,
+ unsigned long req_len,
+ vector<unsigned long> &pos,
+ unsigned long depth,
+ unsigned long margin,
+ vector<vector<int> > & collection)
+{
+
+ if (depth >= req_len) {
+ vector<int> t;
+ for (unsigned long ii = 0; ii < pos.size(); ++ii)
+ t.push_back( elems[pos[ii]] );
+ collection.push_back(t);
+ return;
+ }
+
+ if ((elems.size() - margin) < (req_len - depth))
+ return;
+
+ for (unsigned long ii = margin; ii < elems.size(); ++ii) {
+ pos[depth] = ii;
+ combinations_recursive(elems, req_len, pos, depth + 1, ii + 1, collection);
+ }
+ return;
+}
+
+
+///////////////////////////////////////////////
+// For locus l, main proxy association function
+
+void Plink::performProxyTests(int l)
+{
+
+ // Consider a particular SNP, l, and
+ // a) form haplotypes surrounding SNPs
+ // b) calculate association with phenotype for these SNPs
+ // c) calculate r^2 (haplotypic) with allele and haplotypes
+
+ // Form haplotypes based on the surrounding SNPs
+ // Form phenotype based on the patterns of missingness for test SNP
+ // Is there any association?
+
+ bool old_silent = par::silent;
+
+
+ ////////////////////////////////////////////////////////////////
+ // If we are in 'impute' mode: this is to evaluate imputation, in a
+ // 'leave-one-out' manner; here we assume the reference panel is
+ // coded as 'missing phenotype' and the rest of the sample is coded
+ // as non-missing phenotype; so we must first blank out (but later
+ // replace) any genotype data for these individuals
+
+ vector<bool> tmp1, tmp2;
+
+ if ( par::proxy_impute || par::proxy_leave_out )
+ {
+
+ // Pretend that we do not have these genotypes, except for the
+ // reference panel (i.e. individuals with a missing phenotype)
+
+ for ( int i = 0 ; i < n ; i++ )
+ if ( ! sample[i]->missing )
+ {
+ tmp1.push_back( SNP[l]->one[i] );
+ tmp2.push_back( SNP[l]->two[i] );
+
+ SNP[l]->one[i] = true;
+ SNP[l]->two[i] = false;
+ }
+ }
+
+
+ ///////////////////////
+ // Form test haplotypes
+
+ CSNP * s = SNP[l];
+
+ vector<int> proxyHaplotypePlusSNP;
+
+
+ // Add reference SNP
+
+ proxyHaplotypePlusSNP.push_back(l);
+
+
+ // Either a fixed maximum number of SNPs left and right (allowing
+ // for different filters, and chromosome ends) or read from a file
+ // (these SNPs must be on same chromosome)
+
+ if ( par::proxy_list )
+ {
+ checkFileExists( par::proxy_list_file );
+ printLOG("Reading proxy list from [ "
+ + par::proxy_list_file + " ]\n");
+ ifstream PL( par::proxy_list_file.c_str(), ios::in);
+
+ map<string,int> mlocus;
+ for (int j=0;j<nl_all;j++)
+ mlocus.insert(make_pair(locus[j]->name,j));
+
+ while ( ! PL.eof() )
+ {
+ string psnp;
+ PL >> psnp;
+
+ if ( psnp == "" )
+ continue;
+
+ map<string,int>::iterator m = mlocus.find( psnp );
+
+ if ( m == mlocus.end() )
+ continue;
+
+ // Add if okay w/ MAF and genotyping thresholds
+
+ int pn = m->second;
+
+ if ( pn != l &&
+ locus[pn]->chr == locus[l]->chr &&
+ locus[pn]->freq >= par::proxy_maf &&
+ abs(double((locus[l]->bp - locus[pn]->bp)/1000.0))
+ <= par::proxy_kb &&
+ locus[pn]->pos <= par::proxy_geno )
+ {
+ proxyHaplotypePlusSNP.push_back( pn );
+ }
+
+ }
+
+ PL.close();
+ }
+
+ else // ... use window approach
+ {
+
+ int i = l-1;
+ int added = 0;
+
+ while ( added < par::proxy_window )
+ {
+ if ( i >= 0 &&
+ locus[i]->chr == locus[l]->chr )
+ {
+
+ // Add MAF and genotyping thresholds here
+ if ( locus[i]->freq >= par::proxy_maf &&
+ abs(double((locus[l]->bp - locus[i]->bp)/1000.0))
+ <= par::proxy_kb &&
+ locus[i]->pos <= par::proxy_geno )
+ {
+ proxyHaplotypePlusSNP.push_back(i);
+ added++;
+ }
+
+ // Shift left
+ --i;
+ }
+ else
+ {
+ // Cannot add any more
+ added = par::proxy_window;
+ }
+ }
+
+ // Now move right
+ added = 0;
+ i = l+1;
+ while ( added < par::proxy_window )
+ {
+ if ( i < nl_all &&
+ locus[i]->chr == locus[l]->chr )
+ {
+
+ // Add MAF, kb and genotyping thresholds here
+ if ( locus[i]->freq >= par::proxy_maf &&
+ abs(double((locus[l]->bp - locus[i]->bp)/1000.0))
+ <= par::proxy_kb &&
+ locus[i]->pos <= par::proxy_geno )
+ {
+ proxyHaplotypePlusSNP.push_back(i);
+ added++;
+ }
+
+ //Shift right
+ ++i;
+ }
+ else
+ {
+ // Cannot add any more
+ added = par::proxy_window;
+ }
+
+ }
+ }
+
+
+ ///////////////////////////////////////////////////////////////////////////
+ //
+ // Optionally, filter list based on LD with reference and with eachother
+ //
+ ///////////////////////////////////////////////////////////////////////////
+
+
+ if ( par::proxy_r2_filter )
+ {
+
+ // Only use Reference Panel for these r-sq calculations at this stage;
+ // add flag to modify this
+
+ if ( par::proxy_reference_only )
+ haplo->reference_only = true;
+
+ set<int> added;
+
+ // Examine SNP number list: proxyHaplotypePlusSNP
+ // First entry is always the reference SNP
+
+ set<LDPair> proxies;
+
+ // Skip first entry
+ for (int i=1; i<proxyHaplotypePlusSNP.size(); i++)
+ {
+ LDPair p;
+ p.s1 = l;
+ p.s2 = proxyHaplotypePlusSNP[i];
+ p.ld = haplo->rsq(p.s1,p.s2);
+ //p.ld = haplo->dprime(p.s1,p.s2);
+
+ proxies.insert(p);
+ }
+
+ set<LDPair>::iterator pi = proxies.begin();
+
+ while ( pi != proxies.end() )
+ {
+
+ // Enough proxies already?
+
+ if ( added.size() >= par::proxy_snp_filter )
+ break;
+
+ if ( ( added.size() < 2 && pi->ld >= par::proxy_r2_filter_A ) // low filter
+ || pi->ld >= par::proxy_r2_filter_B ) // higher filter, once 2 proxies found
+ {
+
+ // But does this already correlate too strongly with an
+ // existing proxy?
+
+ set<int>::iterator si = added.begin();
+
+ bool okay = true;
+ while ( si != added.end() )
+ {
+ int2 snps;
+
+ if ( snps.p1 > snps.p2 )
+ {
+ snps.p1 = *si;
+ snps.p2 = pi->s2;
+ }
+ else
+ {
+ snps.p1 = pi->s2;
+ snps.p2 = *si;
+ }
+
+ double ld;
+
+ map<int2,double>::iterator f = proxyLD.find(snps);
+ if ( f != proxyLD.end() )
+ {
+ ld = f->second;
+ }
+ else
+ {
+ ld = haplo->rsq(snps.p1,snps.p2);
+ //ld = haplo->dprime(snps.p1,snps.p2);
+ proxyLD.insert(make_pair(snps,ld));
+ }
+
+ if ( ld > par::proxy_r2_filter_C )
+ {
+ okay = false;
+ break;
+ }
+ ++si;
+ }
+
+ if ( okay )
+ {
+ added.insert( pi->s2 );
+ }
+ }
+
+ // Consider next proxy SNP
+ ++pi;
+ }
+
+
+ //////////////////////////////////
+ // Update the fitlered proxy list
+
+ proxyHaplotypePlusSNP.clear();
+ proxyHaplotypePlusSNP.push_back(l);
+ set<int>::iterator si = added.begin();
+ while ( si != added.end() )
+ {
+ proxyHaplotypePlusSNP.push_back( *si );
+ ++si;
+
+ }
+
+
+ // And remember to reset
+ haplo->reference_only = false;
+ }
+
+
+ //////////////////////////////////////////////////////////////////
+ //
+ // Sort, and select reference SNP
+ //
+ //////////////////////////////////////////////////////////////////
+
+ sort( proxyHaplotypePlusSNP.begin(),
+ proxyHaplotypePlusSNP.end());
+
+ int cnt = proxyHaplotypePlusSNP.size();
+ int ref;
+
+ for (int i=0; i<cnt; i++)
+ if ( proxyHaplotypePlusSNP[i] == l )
+ {
+ ref = i;
+ break;
+ }
+
+
+ //////////////////////////////////////////////////////////////////////
+ //
+ // Do we actually have 1 or more proxy SNP selected? If not, leave now
+ // if in verbose mode
+ //
+ //////////////////////////////////////////////////////////////////////
+
+ if ( par::proxy_all && ( ! par::proxy_full_report ) )
+ {
+
+ if ( proxyHaplotypePlusSNP.size() == 1 )
+ {
+
+ if ( ! par::proxy_impute )
+ {
+ haplo->HTEST << setw( 4 ) << locus[l]->chr << " "
+ << setw(par::pp_maxsnp) << locus[l]->name << " "
+ << setw(12) << locus[l]->bp << " "
+ << setw(4) << locus[l]->allele1 << " "
+ << setw(4) << locus[l]->allele2 << " "
+ << setw(10) << locus[l]->pos << " "
+ << setw(4) << proxyHaplotypePlusSNP.size() - 1 << " "
+ << setw(8) << "NA" << " ";
+
+ // Display C/C or T/U for case/control and TDT, else F and BETA
+
+ if ( par::qt || par::proxy_glm )
+ haplo->HTEST << setw(8) << "NA" << " "
+ << setw(8) << "NA" << " ";
+ else
+ haplo->HTEST << setw(8) << "NA" << " "
+ << setw(8) << "NA" << " "
+ << setw(8) << "NA" << " ";
+
+ haplo->HTEST << setw(10) << "NA" << " ";
+
+ if ( par::proxy_list_proxies )
+ haplo->HTEST << "(NONE)";
+
+ haplo->HTEST << "\n";
+
+
+ // Finally, replace any genotypes made temporarily missing
+
+ if ( par::proxy_leave_out )
+ {
+ int cnt = 0;
+ for ( int i = 0 ; i < n ; i++ )
+ if ( ! sample[i]->missing )
+ {
+ SNP[l]->one[i] = tmp1[cnt];
+ SNP[l]->two[i] = tmp2[cnt++];
+ }
+
+ tmp1.clear();
+ tmp2.clear();
+ }
+
+ return;
+ }
+
+ }
+ }
+
+
+
+
+ ///////////////////////////////////////////////////////////////////////////////
+ //
+ // Phase haplotypes
+ //
+ ///////////////////////////////////////////////////////////////////////////////
+
+ haplo->reset();
+
+ haplo->new_pred_locus.resize(1);
+
+ haplo->new_map.resize(1);
+
+ haplo->new_pred_locus[0] = proxyHaplotypePlusSNP;
+
+ haplo->new_map[0] = locus[l];
+
+ par::silent = true;
+
+ haplo->phaseAllHaplotypes();
+
+ haplo->hname = locus[l]->name;
+
+ par::silent = old_silent;
+
+
+
+ ///////////////////////////////////////////////////////////////////////////////
+ //
+ // Per-genotype error screen
+ //
+ ///////////////////////////////////////////////////////////////////////////////
+
+ if ( par::proxy_error )
+ {
+
+ // Identify test SNP with respect to 0..cnt phased region
+ // (i.e. 'ref' and not 'l')
+
+ haplo->queryGenotype( ref );
+ return;
+ }
+
+
+ //////////////////////////////////////////////////////////////////////////////
+ //
+ // If we are in 'impute' mode: do not perform association tests, but
+ // just compare imputed genotypes to actual (left-out) genotypes,
+ // and report on this.
+ //
+ //////////////////////////////////////////////////////////////////////////////
+
+ if ( par::proxy_leave_out )
+ {
+ int cnt = 0;
+ for ( int i = 0 ; i < n ; i++ )
+ if ( ! sample[i]->missing )
+ {
+ SNP[l]->one[i] = tmp1[cnt];
+ SNP[l]->two[i] = tmp2[cnt++];
+ }
+
+ tmp1.clear();
+ tmp2.clear();
+ }
+ else if ( par::proxy_impute )
+ {
+
+ ////////////////////////////////////////
+ // Replace genotypes, and record dosage
+
+ // For imputation quality score
+ boolvec_t m1(cnt,false);
+ m1[ref] = true;
+ boolvec_t a1(cnt,false);
+ map<int,int> tests = haplo->makeTestSet(m1,a1);
+ set<int> hs;
+ map<int,int>::iterator i1 = tests.begin();
+ while ( i1 != tests.end() )
+ {
+ if ( i1->second == 0 )
+ hs.insert( i1->first);
+ ++i1;
+ }
+
+ haplo->calculateEmpiricalVariance(hs);
+
+
+ int con[4][4];
+ for (int j=0;j<4;j++)
+ for (int k=0; k<4; k++)
+ con[j][k] = 0;
+
+
+ if ( par::proxy_record_dosage )
+ OUTFILE << locus[l]->name << "\t"
+ << locus[l]->allele1 << "\t"
+ << locus[l]->allele2 << "\t"
+ << haplo->ratio << "\t";
+
+ int cnt = 0;
+ for ( int i = 0 ; i < n ; i++ )
+ if ( ! sample[i]->missing )
+ {
+
+ // Actual genotypes
+
+ bool a1 = tmp1[cnt];
+ bool a2 = tmp2[cnt++];
+
+ int og;
+
+ if ( a1 )
+ {
+ if ( a2 )
+ og = 2;
+ else
+ og = 3;
+ }
+ else
+ {
+ if ( a2 )
+ og = 1;
+ else
+ og = 0;
+ }
+
+ // Imputed genotypes
+
+ vector_t g = haplo->imputeGenotype(i,ref);
+
+ // Call imputed
+
+ bool i1, i2;
+
+ if ( g[0] > par::proxy_impute_threshold )
+ {
+ i1 = i2 = false;
+ con[og][0]++;
+ }
+ else if ( g[1] > par::proxy_impute_threshold )
+ {
+ i1 = false;
+ i2 = true;
+ con[og][1]++;
+ }
+ else if ( g[2] > par::proxy_impute_threshold )
+ {
+ i1 = i2 = true;
+ con[og][2]++;
+ }
+ else
+ {
+ i1 = true; i2 = false;
+ con[og][3]++;
+ }
+
+ if ( par::proxy_full_report )
+ haplo->HTEST << locus[l]->name << "\t"
+ << sample[i]->fid << " "
+ << sample[i]->iid << "\t"
+ << a1<<a2 << " "
+ << i1<<i2 << " "
+ << g[0] << " "
+ << g[1] << " "
+ << g[2] << "\n";
+
+ if ( par::proxy_record_dosage )
+ OUTFILE << g[0] + 0.5 * g[1] << "\t";
+
+
+ ////////////////////////////////////////////////
+ // Replace missing slots with imputed genotypes
+
+ // Note: because genotyping rate is pre-calculated, this
+ // will not be a problem in practice (as long as panel is
+ // sufficiently large) -- i.e. in smaller cases, we need
+ // to worry about whether this imputed SNP now might be
+ // used as a proxy for another SNP -- wouldn't be terrible,
+ // but probably better to try to use real SNPs as
+ // proxies whenever possible.
+
+// if ( ! par::proxy_impute_preserve_genotyped )
+// {
+ if ( par::proxy_impute_replace || ( a1 && ! a2 ) )
+ {
+ SNP[l]->one[i] = i1;
+ SNP[l]->two[i] = i2;
+ }
+ else
+ {
+ SNP[l]->one[i] = a1;
+ SNP[l]->two[i] = a2;
+ }
+// }
+ }
+
+
+ if ( par::proxy_record_dosage )
+ OUTFILE << "\n";
+
+ // Report matrix of concordance
+
+ int total = 0;
+ for (int j=0;j<4;j++)
+ for (int k=0;k<4;k++)
+ total += con[j][k];
+
+ int concordant = con[0][0] + con[1][1] + con[2][2];
+
+ int both_geno = 0;
+ for (int j=0;j<3;j++)
+ for (int k=0;k<3;k++)
+ both_geno += con[j][k];
+
+ int observed_geno = total;
+ for (int j=0; j<4; j++)
+ observed_geno -= con[3][j];
+
+ int imputed_geno = total;
+ for (int j=0; j<4; j++)
+ imputed_geno -= con[j][3];
+
+ double rate = both_geno == 0 ?
+ -1 :
+ (double)concordant/(double)both_geno ;
+
+ double rate_obs = (double)observed_geno/(double)total;
+ double rate_imp = (double)imputed_geno/(double)total;
+ double rate_ovr = (double)both_geno/(double)total;
+
+ if ( par::proxy_full_report )
+ {
+ haplo->HTEST << "\nImputation matrix "
+ << "(rows observed, columns imputed)\n\n";
+
+ for (int j=0;j<4;j++)
+ {
+
+ if ( j == 0 )
+ haplo->HTEST << locus[l]->allele1 << "/"
+ << locus[l]->allele1 << "\t";
+ else if ( j == 1 )
+ haplo->HTEST << locus[l]->allele1 << "/"
+ << locus[l]->allele2 << "\t";
+ else if ( j == 2 )
+ haplo->HTEST << locus[l]->allele2 << "/"
+ << locus[l]->allele2 << "\t";
+ else
+ haplo->HTEST << par::missing_genotype << "/"
+ << par::missing_genotype << "\t";
+
+ for (int k=0; k<4; k++)
+ haplo->HTEST << con[j][k] << "\t";
+
+ haplo->HTEST << "\n";
+
+ }
+
+ haplo->HTEST << setw(4) << "CHR" << " "
+ << setw(par::pp_maxsnp) << "SNP" << " "
+ << setw(8) << "INFO" << " "
+ << setw(4) << "NPRX" << " "
+ << setw(8) << "TOTAL_N" << " "
+ << setw(8) << "OBSERVD" << " "
+ << setw(8) << "IMPUTED" << " "
+ << setw(8) << "OVERLAP" << " "
+ << setw(8) << "CONCORD" << " ";
+
+ if ( par::proxy_impute_genotypic_concordance )
+ haplo->HTEST << setw(8) << "F_AA" << " "
+ << setw(8) << "I_AA" << " "
+ << setw(8) << "C_AA" << " "
+ << setw(8) << "F_AB" << " "
+ << setw(8) << "I_AB" << " "
+ << setw(8) << "C_AB" << " "
+ << setw(8) << "F_BB" << " "
+ << setw(8) << "I_BB" << " "
+ << setw(8) << "C_BB" << " ";
+
+ if ( par::proxy_list_proxies )
+ haplo->HTEST << "SNPS";
+
+ haplo->HTEST << "\n";
+
+ }
+
+
+ haplo->HTEST << setw(4) << locus[l]->chr << " "
+ << setw(par::pp_maxsnp) << locus[l]->name << " "
+ << setw(4) << proxyHaplotypePlusSNP.size() - 1 << " "
+ << setw(8) << haplo->ratio << " "
+ << setw(8) << total << " "
+ << setw(8) << rate_obs << " "
+ << setw(8) << rate_imp << " "
+ << setw(8) << rate_ovr << " ";
+
+ if ( rate >= 0 )
+ haplo->HTEST << setw(8) << rate << " ";
+ else
+ haplo->HTEST << setw(8) << "NA" << " ";
+
+ if ( par::proxy_impute_genotypic_concordance )
+ {
+
+ int impAA = con[0][0]+con[0][1]+con[0][2];
+ int impAB = con[1][0]+con[1][1]+con[1][2];
+ int impBB = con[2][0]+con[2][1]+con[2][2];
+
+ double fAA = (double)(con[0][0]+con[1][0]+con[2][0]) / (double)observed_geno;
+ double fAB = (double)(con[0][1]+con[1][1]+con[2][1]) / (double)observed_geno;
+ double fBB = (double)(con[0][2]+con[1][2]+con[2][2]) / (double)observed_geno;
+
+ if ( observed_geno == 0 )
+ fAA = fAB = fBB = 0;
+
+ int obsAA = impAA+con[0][3];
+ int obsAB = impAB+con[1][3];
+ int obsBB = impBB+con[2][3];
+
+ haplo->HTEST << setw(8) << fAA << " ";
+
+ if ( obsAA == 0 )
+ haplo->HTEST << setw(8) << "NA" << " ";
+ else
+ haplo->HTEST << setw(8) << (double)(impAA)/(double)(impAA+con[0][3]) << " ";
+
+ if ( impAA==0 )
+ haplo->HTEST << setw(8) << "NA" << " ";
+ else
+ haplo->HTEST << setw(8) << (double)(con[0][0])/(double)impAA << " ";
+
+ haplo->HTEST << setw(8) << fAB << " ";
+ if ( obsAB == 0 )
+ haplo->HTEST << setw(8) << "NA" << " ";
+ else
+ haplo->HTEST << setw(8) << (double)(impAB)/(double)(impAB+con[1][3]) << " ";
+ if ( impAB==0 )
+ haplo->HTEST << setw(8) << "NA" << " ";
+ else
+ haplo->HTEST << setw(8) << (double)(con[1][1])/(double)impAB << " ";
+
+ haplo->HTEST << setw(8) << fBB << " ";
+ if ( obsBB == 0 )
+ haplo->HTEST << setw(8) << "NA" << " ";
+ else
+ haplo->HTEST << setw(8) << (double)(impBB)/(double)(impBB+con[2][3]) << " ";
+ if ( impBB==0 )
+ haplo->HTEST << setw(8) << "NA" << " ";
+ else
+ haplo->HTEST << setw(8) << (double)(con[2][2])/(double)impBB << " ";
+
+ }
+
+
+ if ( par::proxy_list_proxies )
+ {
+ bool printed = false;
+ for (int l0=0; l0< proxyHaplotypePlusSNP.size(); l0++)
+ {
+ if ( proxyHaplotypePlusSNP[ l0 ] != l )
+ {
+ if ( printed )
+ haplo->HTEST << "|";
+ haplo->HTEST << locus[ proxyHaplotypePlusSNP[ l0 ] ]->name;
+ printed = true;
+
+ }
+ }
+ }
+
+
+ haplo->HTEST << endl;
+
+ tmp1.clear();
+ tmp2.clear();
+
+ return;
+ }
+
+
+
+
+ ////////////////////////////////////////////////////////////////
+ // //
+ // Consider all subsets of subhaplotypes, if in verbose mode //
+ // //
+ ////////////////////////////////////////////////////////////////
+
+
+ if ( ! ( par::proxy_all || par::proxy_full_report ) )
+ printLOG("Estimated haplotype frequencies: now considering combinations...\n");
+
+
+
+
+ ////////////////////////////////////////////////////
+ // Display haplotype frequencies and individual r^2
+
+ if ( ( ! par::proxy_all ) || par::proxy_full_report )
+ haplo->HTEST << "\n"
+ << " *** Proxy haplotype association report for "
+ << haplo->hname << " *** \n\n";
+
+
+ ////////////////////////////////////////
+ // Report SNPs and single-SNP r-squared
+
+ if ( ( ! par::proxy_all) || par::proxy_full_report )
+ {
+ haplo->HTEST << setw(par::pp_maxsnp) << "SNP" << " "
+ << setw(8) << "MAF" << " "
+ << setw(8) << "GENO" << " "
+ << setw(8) << "KB" << " "
+ << setw(8) << "RSQ" << " ";
+ if ( par::qt )
+ haplo->HTEST << setw(8) << "BETA" << " "
+ << setw(8) << "STAT" << " "
+ << setw(8) << "P" << "\n";
+ else
+ haplo->HTEST << setw(8) << "OR" << " "
+ << setw(8) << "CHISQ" << " "
+ << setw(8) << "P" << "\n";
+
+
+ for ( int s = 0 ; s < cnt ; s++)
+ {
+ haplo->HTEST << setw(par::pp_maxsnp)
+ << locus[ haplo->new_pred_locus[0][s] ]->name << " "
+ << setw(8)
+ << locus[ haplo->new_pred_locus[0][s] ]->freq << " "
+ << setw(8)
+ << locus[ haplo->new_pred_locus[0][s] ]->pos << " "
+ << setw(8)
+ << (double)(locus[ haplo->new_pred_locus[0][s]]->bp -
+ locus[ haplo->new_pred_locus[0][ref]]->bp)/1000<<" ";
+
+ // R-squared
+
+ if ( s == ref )
+ haplo->HTEST << setw(8) << "*" << " ";
+ else
+ haplo->HTEST << setw(8) << haplo->rsq_internal(s,ref) << " ";
+
+ // Single SNP association
+
+ boolvec_t snpmask(cnt,false);
+ boolvec_t dummy_allele(cnt,true);
+ snpmask[s] = true;
+
+ haplo->testSet = haplo->makeTestSet(snpmask,dummy_allele);
+
+ if (par::proxy_CC)
+ {
+ // GLM or standard test?
+ if ( par::proxy_glm )
+ {
+ glmAssoc(false,*pperm);
+ }
+ else
+ {
+ if ( par::qt )
+ haplo->haplotypicQTL(haplo->testSet,2,false);
+ else
+ haplo->haplotypicCC(haplo->testSet,2,false);
+ }
+ }
+ else if (par::proxy_TDT)
+ {
+
+ haplo->subhaplotypes = true;
+ haplo->downcoding = haplo->testSet;
+
+ haplo->trans.clear();
+ haplo->untrans.clear();
+
+ haplo->trans.resize(2,0);
+ haplo->untrans.resize(2,0);
+ haplo->nt = 2;
+
+ // First rescore transmissions
+
+ for (int i=0; i<n; i++)
+ {
+ if ( (!sample[i]->founder) &&
+ haplo->include[i] )
+ {
+ haplo->transmissionCount(i,haplo->phasemap[i]);
+ }
+ }
+
+
+ // Then recount T:U based on revised transmission counts
+
+ haplo->haplotypicTDT(haplo->testSet,2,false);
+
+ haplo->subhaplotypes = false;
+ haplo->downcoding.clear();
+
+ }
+
+ // Recover main results from Model, if GLM used
+ if ( par::proxy_glm )
+ {
+ vector_t coef = model->getCoefs();
+ haplo->odds = par::bt ? exp(coef[1]) : coef[1];
+ haplo->result = model->isValid() ? model->getStatistic() : 0;
+ haplo->pvalue = par::bt ? chiprobP(haplo->result,1) : ((LinearModel*)model)->getPValue();
+ delete model;
+ }
+
+ haplo->HTEST << setw(8) << haplo->odds << " "
+ << setw(8) << haplo->result << " "
+ << setw(8) << haplo->pvalue << "\n";
+
+ }
+
+ haplo->HTEST << "\n\n";
+ }
+
+
+ ///////////////////////////////////////////////////
+ // Report haplotypes and frequencies, and also all
+ // haplotype-specific tests and omnibus test result
+
+ if ( ( ! par::proxy_all ) || par::proxy_full_report )
+ {
+ string str = "";
+ for ( int i = 0; i < cnt ; i++)
+ if ( i == ref )
+ str += "*";
+ else
+ str += ".";
+
+ haplo->HTEST << setw(14) << str << " "
+ << setw(10) << "FREQ" << " ";
+ if ( par::qt )
+ haplo->HTEST << setw(8) << "BETA" << " "
+ << setw(8) << "STAT" << " "
+ << setw(8) << "P" << "\n";
+ else
+ haplo->HTEST << setw(10) << "OR" << " "
+ << setw(10) << "CHISQ" << " "
+ << setw(10) << "P" << "\n";
+
+ for (int h=0; h< haplo->nh; h++)
+ {
+ if (haplo->f[h] >= par::proxy_mhf )
+ {
+ haplo->HTEST << setw(14) << haplo->haplotypeName(h) << " "
+ << setw(10) << haplo->f[h] << " ";
+
+
+ haplo->testSet.clear();
+
+ for (int h2=0; h2 < haplo->nh; h2++)
+ {
+ if ( haplo->f[h2] >= par::proxy_mhf)
+ {
+ if (h==h2)
+ {
+ haplo->testSet.insert(make_pair(h2,0));
+ }
+ else
+ {
+ haplo->testSet.insert(make_pair(h2,1));
+ }
+ }
+ }
+
+ if (par::proxy_CC)
+ {
+ // GLM or standard test?
+ if ( par::proxy_glm )
+ {
+ glmAssoc(false,*pperm);
+ }
+ else
+ {
+
+ if ( par::qt)
+ haplo->haplotypicQTL(haplo->testSet,2,false);
+ else
+ haplo->haplotypicCC(haplo->testSet,2,false);
+ }
+ }
+ else if (par::proxy_TDT)
+ {
+
+ haplo->subhaplotypes = true;
+ haplo->downcoding = haplo->testSet;
+
+ haplo->trans.clear();
+ haplo->untrans.clear();
+
+ haplo->trans.resize(2,0);
+ haplo->untrans.resize(2,0);
+ haplo->nt = 2;
+
+ // First rescore transmissions
+
+ for (int i=0; i<n; i++)
+ {
+ if ( (!sample[i]->founder) &&
+ haplo->include[i] )
+ {
+ haplo->transmissionCount(i,haplo->phasemap[i]);
+ }
+ }
+
+ // Then recount T:U based on revised transmission counts
+
+ haplo->haplotypicTDT(haplo->testSet,2,false);
+
+ haplo->subhaplotypes = false;
+ haplo->downcoding.clear();
+
+
+ }
+
+ // Recover main results from Model, if GLM used
+ if ( par::proxy_glm )
+ {
+ vector_t coef = model->getCoefs();
+ haplo->odds = par::bt ? exp(coef[1]) : coef[1];
+ haplo->result = model->isValid() ? model->getStatistic() : 0;
+ haplo->pvalue = par::bt ? chiprobP(haplo->result,1) : ((LinearModel*)model)->getPValue();
+ delete model;
+ }
+
+ haplo->HTEST << setw(10) << haplo->odds << " "
+ << setw(10) << haplo->result << " "
+ << setw(10) << haplo->pvalue << "\n";
+
+ }
+ }
+
+
+ haplo->HTEST << "\nHaplotype frequency estimation based on "
+ << haplo->validN;
+
+ if ( haplo->X )
+ {
+ int found_chr = 0;
+ for (int i=0; i<n; i++)
+ if ( sample[i]->founder )
+ {
+ if ( sample[i]->sex )
+ found_chr++;
+ else
+ found_chr+=2;
+ }
+ haplo->HTEST << " of " << found_chr << " founder chromosomes\n";
+ }
+ else if ( haplo->haploid )
+ haplo->HTEST << " of " << haplo->cnt_f << " founder chromosomes\n";
+ else
+ haplo->HTEST << " of " << haplo->cnt_f * 2 << " founder chromosomes\n";
+
+
+
+ ///////////////////////////////////
+ // Omnibus test: C/C only
+
+ if ( par::proxy_CC && ! par::qt )
+ {
+
+ map<int,int> tests;
+ int nch=0;
+ for (int h=0; h < haplo->nh; h++)
+ if ( haplo->f[h] >= par::proxy_mhf)
+ tests.insert(make_pair(h,nch++));
+
+ if (nch>2)
+ {
+ haplo->haplotypicCC(tests,nch,false);
+
+ haplo->HTEST << "Omnibus haplotype test statistic: "
+ << haplo->result << ", df = " << nch-1 << ", "
+ << "p = "
+ << chiprobP( haplo->result , nch-1 ) << "\n\n";
+ }
+
+ }
+ }
+
+
+
+ ///////////////////////////////////////////
+ // Create masks
+
+ // Reference SNP (fix here)
+ boolvec_t m1(cnt,false);
+ m1[ref] = true;
+ boolvec_t a1(cnt,true);
+
+ // Proxy haplotype (populated below)
+ boolvec_t m2(cnt,false);
+ boolvec_t a2(cnt);
+
+
+ ///////////////////////////////////////////
+ // Report actual SNP //
+ ///////////////////////////////////////////
+
+ haplo->testSet = haplo->makeTestSet(m1,a1);
+
+ if (par::proxy_CC)
+ {
+ // GLM or standard test?
+ if ( par::proxy_glm )
+ {
+ glmAssoc(false,*pperm);
+ }
+ else
+ {
+ if ( par::qt )
+ haplo->haplotypicQTL(haplo->testSet,2,false);
+ else
+ haplo->haplotypicCC(haplo->testSet,2,false);
+ }
+ }
+ else if (par::proxy_TDT)
+ {
+
+ haplo->subhaplotypes = true;
+ haplo->downcoding = haplo->testSet;
+
+ haplo->trans.clear();
+ haplo->untrans.clear();
+
+ haplo->trans.resize(2,0);
+ haplo->untrans.resize(2,0);
+ haplo->nt = 2;
+
+ // First rescore transmissions
+
+ for (int i=0; i<n; i++)
+ {
+ if ( (!sample[i]->founder) &&
+ haplo->include[i] )
+ {
+ haplo->transmissionCount(i,haplo->phasemap[i]);
+ }
+ }
+
+
+ // Then recount T:U based on revised transmission counts
+
+ haplo->haplotypicTDT(haplo->testSet,2,false);
+
+ haplo->subhaplotypes = false;
+ haplo->downcoding.clear();
+
+ // Also, calculate the information score
+ set<int> t1 = haplo->makeSetFromMap(haplo->testSet);
+ haplo->calculateEmpiricalVariance(t1);
+
+ }
+
+
+ // Recover main results from Model, if GLM used
+ if ( par::proxy_glm )
+ {
+ vector_t coef = model->getCoefs();
+ haplo->odds = par::bt ? exp(coef[1]) : coef[1];
+ haplo->result = model->isValid() ? model->getStatistic() : 0;
+ haplo->pvalue = par::bt ? chiprobP(haplo->result,1) : ((LinearModel*)model)->getPValue();
+ delete model;
+
+ // Also, calculate the information score
+ set<int> t1 = haplo->makeSetFromMap(haplo->testSet);
+ haplo->calculateEmpiricalVariance(t1);
+ }
+
+
+ //////////////////////////////////////////////////////////////////////
+ //
+ // Just report the single SNP result (based on haplotype test)?
+ //
+ //////////////////////////////////////////////////////////////////////
+
+ if ( par::proxy_all && ( ! par::proxy_full_report ) )
+ {
+
+
+
+ haplo->HTEST << setw( 4 ) << locus[l]->chr << " "
+ << setw(par::pp_maxsnp) << locus[l]->name << " "
+ << setw(12) << locus[l]->bp << " "
+ << setw(4) << locus[l]->allele1 << " "
+ << setw(4) << locus[l]->allele2 << " "
+ << setw(10) << locus[l]->pos << " "
+ << setw(4) << proxyHaplotypePlusSNP.size() - 1 << " "
+ << setw(8) << haplo->ratio << " ";
+
+ // Display C/C or T/U for case/control and TDT, else F and BETA
+
+ if ( haplo->pvalue < -1 )
+ {
+ // Not a valid test, e.g. monomorphic, and so p value is returned as -9
+ if ( par::qt || par::proxy_glm )
+ haplo->HTEST << setw(8) << "NA" << " "
+ << setw(8) << "NA" << " "
+ << setw(8) << "NA" << " ";
+ else
+ haplo->HTEST << setw(8) << "NA" << " "
+ << setw(8) << "NA" << " "
+ << setw(8) << "NA" << " "
+ << setw(8) << "NA" << " ";
+ }
+ else
+ {
+
+ if ( par::qt || par::proxy_glm )
+ {
+ // Get population haplotype frequency for imputed SNP
+ double f = haplo->freq(m1,a1);
+
+ haplo->HTEST << setw(8) << f << " "
+ << setw(8) << haplo->odds << " ";
+ }
+ else
+ haplo->HTEST << setw(8) << haplo->case_freq << " "
+ << setw(8) << haplo->control_freq << " "
+ << setw(8) << haplo->odds << " ";
+
+ haplo->HTEST << setw(10) << haplo->pvalue << " ";
+ }
+
+
+ if ( par::proxy_list_proxies )
+ {
+ bool printed = false;
+ for (int l0=0; l0< proxyHaplotypePlusSNP.size(); l0++)
+ {
+ if ( proxyHaplotypePlusSNP[ l0 ] != l )
+ {
+ if ( printed )
+ haplo->HTEST << "|";
+ haplo->HTEST << locus[ proxyHaplotypePlusSNP[ l0 ] ]->name;
+ printed = true;
+ }
+ }
+ }
+
+
+ haplo->HTEST << endl;
+
+ return;
+ }
+
+
+
+
+
+
+ ////////////////////////////////////////////////////////////////////////
+ //
+ // Rest of this function is for the extended report mode
+ //
+ ////////////////////////////////////////////////////////////////////////
+
+
+
+ ///////////////////////////////////////////
+ // Consider from cnt-1 to 1 SNP haplotypes
+
+ int search_cnt = par::proxy_include_reference ? cnt : cnt - 1;
+ int maxsnp = search_cnt < par::proxy_maxhap ? search_cnt : par::proxy_maxhap;
+
+ int num_subhaps_total = 0;
+ int num_subhaps_valid = 0;
+
+ set<ProxyResult> presults;
+
+ for (int i=1; i<=maxsnp; i++)
+ {
+
+ // For an i-SNP of cnt-SNP, consider all the permutations
+
+ vector<unsigned long> pos1(i);
+ vector<int> d(search_cnt);
+ int i2=0;
+ for (int z=0; z<cnt; z++)
+ if ( z != ref )
+ {
+ d[i2]=i2;
+ i2++;
+ }
+
+ vector<vector<int> > collection;
+ combinations_recursive(d,i,pos1,0,0,collection);
+
+ vector<int> mapback;
+ for (int s=0;s<cnt;s++)
+ if ( par::proxy_include_reference || s != ref )
+ mapback.push_back(s);
+
+ // Consider each combination of i of cnt-1 SNP haplotypes
+ // then consider each possible haplotype within that
+
+ for (int c1 = 0; c1 < collection.size() ; c1++ )
+ {
+
+ // For biallelic haplotypes (SNPs)
+ // skip the second allele
+
+ // Which SNPs do we want to look up?
+
+ vector<int> posit = collection[c1];
+
+ // Now consider all search_cnt SNP haplotypes
+ // i.e. excluding reference SNP
+
+ int hapcnt = (int)pow((double)2,i);
+ int h = 0;
+ while ( h < hapcnt )
+ {
+
+ // Skip redundant second allele of
+ // SNPs
+
+ if ( collection[c1].size() == 1 &&
+ h == 1 )
+ {
+ h++;
+ continue;
+ }
+
+
+ vector<bool> tmp;
+ unsigned int p=1;
+ for (int s=0;s<i;s++)
+ {
+ if ( h & p ) tmp.push_back(false);
+ else tmp.push_back(true);
+ p <<= 1;
+ }
+
+
+ ///////////////////////
+ // Set proxy haplotype
+
+ // Insert i-SNP haplotype back into
+ // full cnt-SNP space
+
+ fill(m2.begin(), m2.end(), false);
+
+ int t1=0;
+ for (int t2=0; t2<i; t2++)
+ {
+ m2[ mapback[ posit[t2] ] ] = true;
+ a2[ mapback[ posit[t2] ] ] = tmp[ t2 ];
+ }
+
+
+ ////////////////////////////////
+ // Check frequency of haplotype
+
+ double f = haplo->freq(m2,a2);
+
+
+ ////////////////////////////////////////////////////////////
+ // Calculate r^2 between haplotype and reference SNP allele
+
+ double r2 = haplo->rsq_internal(m1,a1,m2,a2);
+
+
+
+ ////////////////////////////////////////////
+ // Is this haplotype not worth considering?
+
+
+ ++num_subhaps_total;
+
+ if ( r2 < par::proxy_r2 ||
+ f < par::proxy_mhf )
+ {
+ h++;
+ continue;
+ }
+
+
+ ++num_subhaps_valid;
+
+
+ ////////////////////////////////////////////////////////////
+ // Calculte association between proxy haplotype and disease
+
+ // If only two haplotypes, report only 1
+ // (note may only be two *common* haplotypes, but
+ // in that case, we should report both
+
+ haplo->testSet = haplo->makeTestSet(m2,a2);
+
+ if (par::proxy_CC)
+ {
+ // GLM or standard test?
+ if ( par::proxy_glm )
+ {
+ glmAssoc(false,*pperm);
+ }
+ else
+ {
+ if ( par::qt )
+ haplo->haplotypicQTL(haplo->testSet,2,false);
+ else
+ haplo->haplotypicCC(haplo->testSet,2,false);
+ }
+ }
+ else if (par::proxy_TDT)
+ {
+
+ haplo->subhaplotypes = true;
+ haplo->downcoding = haplo->testSet;
+
+ haplo->trans.clear();
+ haplo->untrans.clear();
+
+ haplo->trans.resize(2,0);
+ haplo->untrans.resize(2,0);
+ haplo->nt = 2;
+
+ // First rescore transmissions
+
+ for (int i=0; i<n; i++)
+ {
+ if ( (!sample[i]->founder) &&
+ haplo->include[i] )
+ {
+ haplo->transmissionCount(i,haplo->phasemap[i]);
+ }
+ }
+
+ // Then recount T:U based on revised transmission counts
+
+ haplo->haplotypicTDT(haplo->testSet,2,false);
+
+ haplo->subhaplotypes = false;
+ haplo->downcoding.clear();
+ }
+
+ string str = par::proxy_include_reference ?
+ haplo->getSubHaplotypeName(m2,a2,-1) :
+ haplo->getSubHaplotypeName(m2,a2,ref);
+
+
+ // Recover main results from Model, if GLM used
+ if ( par::proxy_glm )
+ {
+ vector_t coef = model->getCoefs();
+ haplo->odds = par::bt ? exp(coef[1]) : coef[1];
+ haplo->result = model->isValid() ? model->getStatistic() : 0;
+ haplo->pvalue = par::bt ? chiprobP(haplo->result,1) : ((LinearModel*)model)->getPValue();
+ delete model;
+ }
+
+
+ //////////////////////
+ // Store this result
+
+ ProxyResult r(str,f,r2,
+ haplo->odds,
+ haplo->result,
+ haplo->pvalue);
+ presults.insert(r);
+
+ // Consider next haplotype
+
+ h++;
+
+ }
+ }
+ }
+
+
+ haplo->HTEST << "Of " << num_subhaps_total << " subhaplotypes considered, "
+ << num_subhaps_valid << " met proxy criteria\n\n";
+
+ // Report results
+
+ if ( presults.size() == 0 )
+ haplo->HTEST << "No proxies found above r-sq " << par::proxy_r2 << "\n";
+ else
+ {
+
+ haplo->HTEST << setw(14) << "HAP" << " "
+ << setw(10) << "FREQ" << " "
+ << setw(10) << "RSQ" << " ";
+ if ( par::qt )
+ haplo->HTEST << setw(8) << "BETA" << " "
+ << setw(8) << "STAT" << " "
+ << setw(8) << "P" << "\n";
+ else
+ haplo->HTEST << setw(10) << "OR" << " "
+ << setw(10) << "CHISQ" << " "
+ << setw(10) << "P" << "\n";
+
+
+ set<ProxyResult>::iterator i = presults.begin();
+
+ while ( i != presults.end() )
+ {
+ haplo->HTEST << setw(14) << i->name << " "
+ << setw(10) << i->f << " "
+ << setw(10) << i->r2 << " "
+ << setw(10) << i->odds << " "
+ << setw(10) << i->chisq << " "
+ << setw(10) << i->pvalue << "\n";
+ i++;
+ }
+ }
+
+ if ( par::proxy_full_report )
+ haplo->HTEST << "\n+--------------------------------------------------------------------+\n\n";
+
+ return;
+
+}
+
+
More information about the debian-med-commit
mailing list