[med-svn] r1726 - in trunk/packages/plink/trunk: . debian
smoe-guest at alioth.debian.org
smoe-guest at alioth.debian.org
Wed Apr 9 10:09:23 UTC 2008
Author: smoe-guest
Date: 2008-04-09 10:09:22 +0000 (Wed, 09 Apr 2008)
New Revision: 1726
Added:
trunk/packages/plink/trunk/debian/examples
trunk/packages/plink/trunk/debian/manpages
Removed:
trunk/packages/plink/trunk/clumpld.cpp
trunk/packages/plink/trunk/debian/manpage.sgml.ex
trunk/packages/plink/trunk/debian/manpage.xml.ex
trunk/packages/plink/trunk/debian/menu.ex
trunk/packages/plink/trunk/debian/plink-default.ex
trunk/packages/plink/trunk/debian/plink.doc-base.EX
trunk/packages/plink/trunk/filters.cpp
trunk/packages/plink/trunk/fisher.cpp
trunk/packages/plink/trunk/nlist.cpp
trunk/packages/plink/trunk/parse.cpp
trunk/packages/plink/trunk/perm.cpp
trunk/packages/plink/trunk/plink.log
trunk/packages/plink/trunk/prephap.cpp
trunk/packages/plink/trunk/proxy.cpp
Modified:
trunk/packages/plink/trunk/debian/README.Debian
trunk/packages/plink/trunk/debian/control
trunk/packages/plink/trunk/debian/rules
Log:
Series of cleanups.
Deleted: trunk/packages/plink/trunk/clumpld.cpp
===================================================================
--- trunk/packages/plink/trunk/clumpld.cpp 2008-04-09 09:43:11 UTC (rev 1725)
+++ trunk/packages/plink/trunk/clumpld.cpp 2008-04-09 10:09:22 UTC (rev 1726)
@@ -1,811 +0,0 @@
-
-
-//////////////////////////////////////////////////////////////////
-// //
-// PLINK (c) 2005-2007 Shaun Purcell //
-// //
-// This file is distributed under the GNU General Public //
-// License, Version 2. Please see the file COPYING for more //
-// details //
-// //
-//////////////////////////////////////////////////////////////////
-
-
-#include <algorithm>
-#include <iostream>
-#include <iomanip>
-#include <fstream>
-#include <sstream>
-
-#include "clumpld.h"
-#include "phase.h"
-#include "options.h"
-#include "plink.h"
-
-
-//////////////////////////////////////////////////
-// Set user-defined parameters within constructor
-
-clump_LD::clump_LD(Plink * pp,
- HaploPhase * hp_,
- double sig,
- double dist,
- double secondp,
- float r2c)
-{
- P = pp;
- hp = hp_;
- pval_cutoff = sig;
- ld_distance = dist;
- second_pval_cutoff = secondp;
- r2_cutoff = r2c;
-
-}
-
-
-/////////////
-// accessors
-
-void clump_LD::set_pval( double sig ){
- pval_cutoff = sig;
-}
-
-void clump_LD::set_second_pval( double secondp ){
- second_pval_cutoff = secondp;
-}
-
-void clump_LD::set_ld( double dist ){
- ld_distance = dist;
-}
-
-void clump_LD::set_r2( double r2c ){
- r2_cutoff = r2c;
-}
-
-
-//////////////////////////////////////////////////
-// read association file and pull out significant
-// results sort by pvalue
-
-vector<ResultTrio> clump_LD::read_assoc_file(string fileList)
-{
-
- vector<ResultTrio> sp;
-
- // We may be reading a single file, or more than one file
- fileList = searchAndReplace(fileList,","," ");
-
- // Tokenize
- filename.clear();
- string buf;
- stringstream ss(fileList);
- while (ss >> buf)
- filename.push_back(buf);
-
- // Read each file
-
- for (int f=0; f<filename.size(); f++)
- {
-
- checkFileExists(filename[f]);
- ifstream RESIN( filename[f].c_str(), ios::in );
-
- int snp_column = -1;
- int pval_column = -1;
- vector<int> annot_field;
-
- // Get header row
-
- char cline[par::MAX_LINE_LENGTH];
- RESIN.getline(cline,par::MAX_LINE_LENGTH,'\n');
-
- string sline = cline;
- if (sline=="")
- error("Problem reading [ " + par::clumpld_results + " ]\n");
-
- vector<string> tok_annot;
-
- if ( par::clumpld_annot )
- {
- string afields = searchAndReplace(par::clumpld_annot_fields,","," ");
- string buf;
- stringstream ss(afields);
- while (ss >> buf)
- tok_annot.push_back(buf);
- }
-
- string buf;
- stringstream ss(sline);
- vector<string> tokens;
- while (ss >> buf)
- tokens.push_back(buf);
-
- for (int i=0; i<tokens.size(); i++)
- {
- if ( tokens[i] == "SNP" )
- snp_column = i;
-
- if ( tokens[i] == par::clumpld_column )
- pval_column = i;
-
- if ( par::clumpld_annot )
- for ( int f=0; f<tok_annot.size(); f++)
- if ( tokens[i] == tok_annot[f] )
- annot_field.push_back(i);
- }
-
-
- //////////////////////////////////////
- // Did we find the necessary columns?
-
- if ( snp_column < 0 || pval_column < 0 )
- {
- error("Could not find [" + par::clumpld_column
- + "] as given in --clump-field\n");
- }
-
- P->printLOG("Reading results for clumping from [ "
- + filename[f] + " ]\n");
- P->printLOG("Extracting fields SNP and "
- + par::clumpld_column + "\n");
-
- while (!RESIN.eof())
- {
-
- char cline[par::MAX_LINE_LENGTH];
- RESIN.getline(cline,par::MAX_LINE_LENGTH,'\n');
-
- string sline = cline;
- if (sline=="")
- continue;
-
- string buf;
- stringstream ss(sline);
- vector<string> tokens;
- while (ss >> buf)
- tokens.push_back(buf);
-
- if ( tokens.size() <= snp_column ||
- tokens.size() <= pval_column )
- continue;
-
- ResultTrio pt;
-
- // Keep track of which file this is from
- pt.f = f+1;
-
- if ( ! from_string<double>( pt.p, tokens[pval_column] , std::dec))
- continue;
-
- pt.s = tokens[snp_column];
-
- // Create clumped vector, based just on SNP name
-
- if( pt.p <= pval_cutoff )
- clumped.insert(make_pair(pt.s,false));
-
- ClumpPair cp;
- cp.snp = pt.s;
- cp.f = f+1;
-
- ClumpResults cr;
- cr.p = pt.p;
- cr.annot = "";
- if ( par::clumpld_annot )
- {
- for (int f=0; f<annot_field.size(); f++)
- {
- cr.annot += tokens[annot_field[f]];
- if ( f < annot_field.size() - 1 )
- cr.annot += ", ";
- }
- }
-
- assoc_results.insert( make_pair( cp, cr ));
- sp.push_back(pt);
-
- }
-
- RESIN.close();
-
- // Read in next results file
- }
-
- sort(sp.begin(), sp.end());
- return sp;
-}
-
-
-///////////////////////////////
-// Perform clumping operation
-
-void clump_LD::clump()
-{
-
- P->printLOG("\nWriting clumped results file to [ " +
- par::output_file_name + ".clumped ]\n");
-
- P->printLOG("Parameters for --clump:\n");
- P->printLOG(" p-value threshold for index SNPs = "
- + dbl2str(pval_cutoff) + "\n");
-
- P->printLOG(" Physical (kb) threshold for clumping = "
- + dbl2str(ld_distance/1000.0) + "\n");
-
- P->printLOG(" LD (r-squared) threshold for clumping = "
- + dbl2str(r2_cutoff) + "\n");
-
- P->printLOG(" p-value threshold for clumped SNPs = "
- + dbl2str(second_pval_cutoff) + "\n");
-
-
- if ( par::clumpld_annot )
- P->printLOG("Including annotation fields: " + par::clumpld_annot_fields +"\n");
-
- vector<ResultTrio> sp = read_assoc_file( par::clumpld_results );
-
- if ( par::clumpld_index1 )
- P->printLOG("Indexing only on [ " + filename[0] + " ]\n");
- else
- P->printLOG("Indexing on all files\n");
-
- if ( par::clumpld_only_show_replications )
- P->printLOG("Only showing cross-file clumps\n");
-
- int zero, one, two, three, four;
- map<string,int> mlocus;
- map<int2,double> grouped_snps;
-
- ofstream CLMP;
- CLMP.open( (par::output_file_name + ".clumped").c_str() , ios::out);
- CLMP.precision(3);
- CLMP.setf(ios::scientific);
-
- string vmessage;
-
- /////////////////
- // Output header
-
- if ( ! par::clumpld_verbose )
- CLMP << setw(4) << "CHR" << " "
- << setw(4) << "F" << " "
- << setw(par::pp_maxsnp) << "SNP" << " "
- << setw(10) << "BP" << " "
- << setw(8) << "P" << " "
- << setw(8) << "TOTAL" << " "
- << setw(6) << "NSIG" << " "
- << setw(6) << "S05" << " "
- << setw(6) << "S01" << " "
- << setw(6) << "S001" << " "
- << setw(6) << "S0001" << " "
- << setw(6) << "SP2" << "\n";
-
-
- //////////////////
- // Build SNP map
-
- for (int l=0; l<P->nl_all; l++)
- mlocus.insert(make_pair(P->locus[l]->name, l));
-
- if (sp.size()==0)
- {
- P->printLOG("No significant results given --clump parameters\n");
- return;
- }
-
-
-
- ////////////////////////////////////////////////
- // Iterate through all association results by
- // decreasing p-value until cutoff is reached
-
- int clumpCount = 0;
-
- for (int i = 0; i < sp.size(); i++)
- {
-
- zero=one=two=three=four=0;
-
- ////////////////////////////////////
- // End if p-value cutoff is reached
-
- if( sp[i].p > pval_cutoff )
- break;
-
-
- //////////////////////////////////
- // Skip already clumped this SNP
-
- if( clumped[sp[i].s] )
- continue;
-
-
- /////////////////////////////////////////////
- // Are we only indexing based on first file?
-
- if ( par::clumpld_index1 && sp[i].f > 1 )
- continue;
-
-
- ///////////////////////////////////////////
- // Make sure associated SNP is in SNP map
-
- map<string,int>::iterator ilocus;
- ilocus = mlocus.find(sp[i].s);
- int l = -1;
- if (ilocus != mlocus.end())
- {
- l = ilocus->second;
- }
- else
- {
-
- if ( !par::clumpld_verbose )
- CLMP << setw(4) << "NA" << " "
- << setw(4) << "NA" << " "
- << setw(par::pp_maxsnp) << sp[i].s << " "
- << setw(10) << "NA" << " "
- << setw(10) << sp[i].p << " "
- << setw(8) << "NA" << " "
- << setw(6) << "NA" << " "
- << setw(6) << "NA" << " "
- << setw(6) << "NA" << " "
- << setw(6) << "NA" << " "
- << setw(6) << "NA" << " "
- << setw(6) << "NA" << "\n";
- else
- vmessage += sp[i].s + " not found in dataset\n";
-
- continue;
- }
-
-
- ///////////////////////////////
- // Check all SNPs in LD range
-
- // Set at reference SNP, l, and move out
- // left and right (l1,l2)
-
- int l1 = l;
- int l2 = l;
-
- // If multiple files, allow for self comparison also
- if ( filename.size()>1 ) --l1;
-
- set<string> willClump;
-
- map<int,string> inPhaseAllele;
-
- while(1)
- {
-
- bool failed1 = false, failed2 = false;
-
- // Expand outwards to physical limits/SNP sets
-
- // Moving right
-
- if( l1 < P->locus.size()-1 )
- {
- // Advance a position
- l1++;
-
- double r2a = -1;
-
- // Compute r-squared if this SNP is close
- // enough to the index SNP, physically
-
- if( P->locus[l1]->chr == P->locus[l]->chr &&
- P->locus[l1]->bp - P->locus[l]->bp < ld_distance )
- r2a = hp->rsq( l, l1 );
- else
- failed1 = true;
-
-
- //////////////////////////////////
- // Skip already clumped this SNP
-
- if( par::clumpld_indep && clumped[ P->locus[l1]->name ] )
- continue;
-
- // If in LD with association result
-
- if( r2a > r2_cutoff )
- {
-
- // Record which alleles are correlated
-
- inPhaseAllele.insert(make_pair(l1, allelePairs(l,l1)));
-
-
- // Record that this SNP has been clumped
-
- willClump.insert(P->locus[l1]->name);
-
- // Now look at the assocations (in the multiple files)
- // with this SNP
-
- for (int f=1; f<=filename.size(); f++)
- {
- ClumpPair result;
- result.snp = P->locus[l1]->name;
- result.f = f;
-
- // Do not clump with self
- if ( l == l1 && f == sp[i].f )
- continue;
-
- // Result not found
- if ( assoc_results.find(result) == assoc_results.end() )
- continue;
-
- double pval = assoc_results[result].p;
-
- if( pval < second_pval_cutoff )
- {
- int2 t(l1,f);
- grouped_snps.insert(make_pair(t,r2a));
- }
-
- if( pval < .0001 )
- four++;
- else
- if( pval < .001 )
- three++;
- else
- if( pval < .01 )
- two++;
- else
- if( pval < .05 )
- one++;
- else
- zero++;
- }
- }
- }
- else
- failed1 = true;
-
-
-
- //////////////
- // Move left
-
- if( l2 > 0 )
- {
- l2--;
-
- double r2b = -1;
-
-
- if( P->locus[l2]->chr == P->locus[l]->chr &&
- P->locus[l]->bp - P->locus[l2]->bp < ld_distance )
- r2b = hp->rsq( l, l2 );
- else
- failed2 = true;
-
-
- //////////////////////////////////
- // Skip already clumped this SNP
-
- if( par::clumpld_indep && clumped[ P->locus[l2]->name ] )
- continue;
-
-
- /////////////////////////////////////
- // Does this SNP meet r-sq threshold?
-
- if( r2b > r2_cutoff )
- {
-
- // Find the allele in phase with rare allele for l
-
- inPhaseAllele.insert(make_pair(l2, allelePairs(l,l2)));
-
-
- // Record that this SNP is clumped
-
- willClump.insert(P->locus[l2]->name);
-
- // Now look at associations
-
- for (int f=1; f<=filename.size(); f++)
- {
- ClumpPair result;
- result.snp = P->locus[l2]->name;
- result.f = f;
-
- // Result not found
- if ( assoc_results.find(result) == assoc_results.end() )
- continue;
-
- double pval = assoc_results[result].p;
-
- if( pval < second_pval_cutoff )
- {
- int2 t(l2,f);
- grouped_snps.insert(make_pair(t,r2b));
- }
-
- if( pval < .0001 )
- four++;
- else
- if( pval < .001 )
- three++;
- else
- if( pval < .01 )
- two++;
- else
- if( pval < .05 )
- one++;
- else
- zero++;
- }
- }
-
- }
- else
- failed2 = true;
-
-
- // No point in looking further?
-
- if( failed1 && failed2 )
- break;
-
- }
-
-
- //////////////////////////////////////////////////////////
- // Report results
-
- // Are we only interested in cross-file clumpings?
-
- if ( par::clumpld_only_show_replications )
- {
- bool seen_replication = false;
- map<int2,double>::iterator gi = grouped_snps.begin();
- int cnt=0;
- int lastf;
- while( gi != grouped_snps.end() )
- {
- if ( cnt > 0 )
- {
- if ( gi->first.p2 != lastf )
- seen_replication = true;
- }
- lastf = gi->first.p2;
- ++cnt;
- ++gi;
- }
-
- // If no cross-file results, then do not report this
- // clump -- clear all flags for it
-
- if ( ! seen_replication )
- {
- grouped_snps.clear();
- continue;
- }
- }
-
-
- // Indicate that this SNP has now been clumped
- set<string>::iterator si = willClump.begin();
- while ( si != willClump.end() )
- {
- clumped[ *si ] = true;
- ++si;
- }
-
-
-
- int total = zero+one+two+three+four;
-
- if ( par::clumpld_verbose )
- {
- // Repeat header
- CLMP << "\n"
- << setw(4) << "CHR" << " "
- << setw(4) << "F" << " "
- << setw(par::pp_maxsnp) << "SNP" << " "
- << setw(10) << "BP" << " "
- << setw(10) << "P" << " "
- << setw(8) << "TOTAL" << " "
- << setw(6) << "NSIG" << " "
- << setw(6) << "S05" << " "
- << setw(6) << "S01" << " "
- << setw(6) << "S001" << " "
- << setw(6) << "S0001" << "\n";
- }
-
- ClumpPair cp;
- cp.snp = P->locus[l]->name;
- cp.f = sp[i].f;
-
- CLMP << setw(4) << P->locus[l]->chr << " "
- << setw(4) << cp.f << " "
- << setw(par::pp_maxsnp) << P->locus[l]->name << " "
- << setw(10) << P->locus[l]->bp << " "
- << setw(10) << assoc_results[cp].p << " "
- << setw(8) << total << " "
- << setw(6) << zero << " "
- << setw(6) << one << " "
- << setw(6) << two << " "
- << setw(6) << three << " "
- << setw(6) << four << " ";
-
- if ( par::clumpld_verbose )
- {
- // Now list one per line, sorted by distance
- CLMP << "\n";
- if ( grouped_snps.size() > 0 )
- {
-
- CLMP << "\n"
- << setw(4) << " " << " "
- << setw(4) << " " << " "
- << setw(par::pp_maxsnp) << " " << " "
- << setw(10) << "KB" << " "
- << setw(8) << "RSQ" << " "
- << setw(8) << "ALLELES" << " "
- << setw(4) << "F" << " "
- << setw(12) << "P" << " ";
- if ( par::clumpld_annot )
- CLMP << setw(12) << "ANNOT" << "\n";
- else
- CLMP << "\n";
-
- CLMP << setw(4) << " (INDEX) "
- << setw(par::pp_maxsnp) << P->locus[l]->name << " ";
-
- CLMP << setw(10) << (double)(P->locus[l]->bp - P->locus[l]->bp)/1000.0 << " ";
-
- CLMP << setw(8) << "1.000" << " "
- << setw(8) << P->locus[l]->allele1 << " "
- << setw(4) << sp[i].f << " "
- << setw(12) << assoc_results[cp].p << " ";
- if ( par::clumpld_annot )
- CLMP << setw(12) << assoc_results[cp].annot << "\n";
- else
- CLMP << "\n";
-
- CLMP << "\n";
-
-
- //////////////////////////////////////////////////
- // Track if SNP info already listed in this group
-
- set<int> infoDisplayed;
-
-
- map<int2,double>::iterator gi = grouped_snps.begin();
-
- while( gi != grouped_snps.end() )
- {
-
- int l0 = gi->first.p1;
- int f = gi->first.p2;
-
- ClumpPair cp;
- cp.snp = P->locus[l0]->name;
- cp.f = f;
-
-
- if ( assoc_results.find(cp) != assoc_results.end() )
- {
- if ( infoDisplayed.find(l0) == infoDisplayed.end() )
- {
- CLMP << setw(4) << " " << " "
- << setw(4) << " " << " "
- << setw(par::pp_maxsnp) << P->locus[l0]->name << " "
- << setw(10) << (double)(P->locus[l0]->bp - P->locus[l]->bp)/1000.0 << " ";
-
- CLMP.setf(ios::fixed);
- CLMP << setw(8) << gi->second << " ";
- CLMP.setf(ios::scientific);
-
- CLMP << setw(8) << inPhaseAllele.find(l0)->second << " "
- << setw(4) << f << " "
- << setw(12) << assoc_results[cp].p << " ";
- if ( par::clumpld_annot )
- CLMP << setw(12) << assoc_results[cp].annot << "\n";
- else
- CLMP << "\n";
-
- infoDisplayed.insert(l0);
- }
- else
- {
- CLMP << setw(4) << " " << " "
- << setw(4) << " " << " "
- << setw(par::pp_maxsnp) << " " << " "
- << setw(10) << " " << " "
- << setw(8) << " " << " "
- << setw(8) << " " << " "
- << setw(4) << f << " "
- << setw(12) << assoc_results[cp].p << " ";
- if ( par::clumpld_annot )
- CLMP << setw(12) << assoc_results[cp].annot << "\n";
- else
- CLMP << "\n";
-
- }
- }
-
- gi++;
- }
- }
- }
- else
- {
-
- // Just list of SNP names
- if( grouped_snps.size() == 0 )
- CLMP << "NONE";
-
- map<int2,double>::iterator gi = grouped_snps.begin();
- int j = 0;
- while( gi != grouped_snps.end() )
- {
- CLMP << P->locus[ gi->first.p1 ]->name << "("
- << gi->first.p2 << ")";
- if( j < grouped_snps.size()-1)
- CLMP << ",";
- j++;
- gi++;
- }
- }
-
-
- if ( par::clumpld_verbose )
- CLMP << "\n"
- << "------------------------------------------------------------------\n";
-
- CLMP << "\n";
-
- if ( ! par::silent)
- cout << "Formed " << ++clumpCount
- << " clumps of the top "
- << i << " SNPs, so far \r";
-
- grouped_snps.clear();
-
-
- // Next SNP
- }
-
-
- if ( ! par::silent)
- cout << "\n";
-
- CLMP << "\n" << vmessage << "\n";
-
- CLMP.close();
-}
-
-
-
-string clump_LD::allelePairs(int l1, int l2)
-{
-
- // The first SNP is always the index SNP
- // i.e. as we call rsq(l,l1) and rsq(l,l2) above
-
- // Identify common/common haplotype
-
- int ch = 0;
- for (int h=0; h<hp->nh; h++)
- if ( hp->haplotypeName(h) == P->locus[l1]->allele2 + P->locus[l2]->allele2 )
- ch = h;
-
- // Is D positive or negative?
- string s;
-
- if ( hp->f[ch] > (1 - P->locus[l1]->freq)*(1 - P->locus[l2]->freq) )
- s = P->locus[l1]->allele1 + P->locus[l2]->allele1 + "/" + P->locus[l1]->allele2 + P->locus[l2]->allele2;
- else
- s = P->locus[l1]->allele1 + P->locus[l2]->allele2 + "/" + P->locus[l1]->allele2 + P->locus[l2]->allele1;
-
- return s;
-}
Modified: trunk/packages/plink/trunk/debian/README.Debian
===================================================================
--- trunk/packages/plink/trunk/debian/README.Debian 2008-04-09 09:43:11 UTC (rev 1725)
+++ trunk/packages/plink/trunk/debian/README.Debian 2008-04-09 10:09:22 UTC (rev 1726)
@@ -1,6 +1,11 @@
plink for Debian
----------------
-Some more thoughts should be donated to the R plugin.
+Some more thoughts should be donated to the R plugin. It is omitted
+in the current version.
+The dependency on g++-3.3 is disturbing. g++-3.4 probably also works.
+The applied patches allow for the compilation of the sources, but I did
+not manage to help the link error. Help is welcome.
+
-- Steffen Moeller <moeller at debian.org> Tue, 08 Apr 2008 17:49:32 +0200
Modified: trunk/packages/plink/trunk/debian/control
===================================================================
--- trunk/packages/plink/trunk/debian/control 2008-04-09 09:43:11 UTC (rev 1725)
+++ trunk/packages/plink/trunk/debian/control 2008-04-09 10:09:22 UTC (rev 1726)
@@ -2,7 +2,7 @@
Section: science
Priority: optional
Maintainer: Steffen Moeller <moeller at debian.org>
-Build-Depends: debhelper (>= 5)
+Build-Depends: debhelper (>= 5), quilt, g++-3.3
Standards-Version: 3.7.3
Homepage: http://pngu.mgh.harvard.edu/~purcell/plink/
@@ -16,4 +16,3 @@
It finds associations of single or pairs of DNA
variations with a phenotype and can retrieve
SNP annotation from an online source.
-
Added: trunk/packages/plink/trunk/debian/examples
===================================================================
--- trunk/packages/plink/trunk/debian/examples (rev 0)
+++ trunk/packages/plink/trunk/debian/examples 2008-04-09 10:09:22 UTC (rev 1726)
@@ -0,0 +1,2 @@
+test.ped
+test.map
Deleted: trunk/packages/plink/trunk/debian/manpage.sgml.ex
===================================================================
--- trunk/packages/plink/trunk/debian/manpage.sgml.ex 2008-04-09 09:43:11 UTC (rev 1725)
+++ trunk/packages/plink/trunk/debian/manpage.sgml.ex 2008-04-09 10:09:22 UTC (rev 1726)
@@ -1,156 +0,0 @@
-<!doctype refentry PUBLIC "-//OASIS//DTD DocBook V4.1//EN" [
-
-<!-- Process this file with docbook-to-man to generate an nroff manual
- page: `docbook-to-man manpage.sgml > manpage.1'. You may view
- the manual page with: `docbook-to-man manpage.sgml | nroff -man |
- less'. A typical entry in a Makefile or Makefile.am is:
-
-manpage.1: manpage.sgml
- docbook-to-man $< > $@
-
-
- The docbook-to-man binary is found in the docbook-to-man package.
- Please remember that if you create the nroff version in one of the
- debian/rules file targets (such as build), you will need to include
- docbook-to-man in your Build-Depends control field.
-
- -->
-
- <!-- Fill in your name for FIRSTNAME and SURNAME. -->
- <!ENTITY dhfirstname "<firstname>FIRSTNAME</firstname>">
- <!ENTITY dhsurname "<surname>SURNAME</surname>">
- <!-- Please adjust the date whenever revising the manpage. -->
- <!ENTITY dhdate "<date>April 8, 2008</date>">
- <!-- SECTION should be 1-8, maybe w/ subsection other parameters are
- allowed: see man(7), man(1). -->
- <!ENTITY dhsection "<manvolnum>SECTION</manvolnum>">
- <!ENTITY dhemail "<email>moeller at debian.org</email>">
- <!ENTITY dhusername "Steffen Moeller">
- <!ENTITY dhucpackage "<refentrytitle>PLINK</refentrytitle>">
- <!ENTITY dhpackage "plink">
-
- <!ENTITY debian "<productname>Debian</productname>">
- <!ENTITY gnu "<acronym>GNU</acronym>">
- <!ENTITY gpl "&gnu; <acronym>GPL</acronym>">
-]>
-
-<refentry>
- <refentryinfo>
- <address>
- &dhemail;
- </address>
- <author>
- &dhfirstname;
- &dhsurname;
- </author>
- <copyright>
- <year>2003</year>
- <holder>&dhusername;</holder>
- </copyright>
- &dhdate;
- </refentryinfo>
- <refmeta>
- &dhucpackage;
-
- &dhsection;
- </refmeta>
- <refnamediv>
- <refname>&dhpackage;</refname>
-
- <refpurpose>program to do something</refpurpose>
- </refnamediv>
- <refsynopsisdiv>
- <cmdsynopsis>
- <command>&dhpackage;</command>
-
- <arg><option>-e <replaceable>this</replaceable></option></arg>
-
- <arg><option>--example <replaceable>that</replaceable></option></arg>
- </cmdsynopsis>
- </refsynopsisdiv>
- <refsect1>
- <title>DESCRIPTION</title>
-
- <para>This manual page documents briefly the
- <command>&dhpackage;</command> and <command>bar</command>
- commands.</para>
-
- <para>This manual page was written for the &debian; distribution
- because the original program does not have a manual page.
- Instead, it has documentation in the &gnu;
- <application>Info</application> format; see below.</para>
-
- <para><command>&dhpackage;</command> is a program that...</para>
-
- </refsect1>
- <refsect1>
- <title>OPTIONS</title>
-
- <para>These programs follow the usual &gnu; command line syntax,
- with long options starting with two dashes (`-'). A summary of
- options is included below. For a complete description, see the
- <application>Info</application> files.</para>
-
- <variablelist>
- <varlistentry>
- <term><option>-h</option>
- <option>--help</option>
- </term>
- <listitem>
- <para>Show summary of options.</para>
- </listitem>
- </varlistentry>
- <varlistentry>
- <term><option>-v</option>
- <option>--version</option>
- </term>
- <listitem>
- <para>Show version of program.</para>
- </listitem>
- </varlistentry>
- </variablelist>
- </refsect1>
- <refsect1>
- <title>SEE ALSO</title>
-
- <para>bar (1), baz (1).</para>
-
- <para>The programs are documented fully by <citetitle>The Rise and
- Fall of a Fooish Bar</citetitle> available via the
- <application>Info</application> system.</para>
- </refsect1>
- <refsect1>
- <title>AUTHOR</title>
-
- <para>This manual page was written by &dhusername; &dhemail; for
- the &debian; system (but may be used by others). Permission is
- granted to copy, distribute and/or modify this document under
- the terms of the &gnu; General Public License, Version 2 any
- later version published by the Free Software Foundation.
- </para>
- <para>
- On Debian systems, the complete text of the GNU General Public
- License can be found in /usr/share/common-licenses/GPL.
- </para>
-
- </refsect1>
-</refentry>
-
-<!-- Keep this comment at the end of the file
-Local variables:
-mode: sgml
-sgml-omittag:t
-sgml-shorttag:t
-sgml-minimize-attributes:nil
-sgml-always-quote-attributes:t
-sgml-indent-step:2
-sgml-indent-data:t
-sgml-parent-document:nil
-sgml-default-dtd-file:nil
-sgml-exposed-tags:nil
-sgml-local-catalogs:nil
-sgml-local-ecat-files:nil
-End:
--->
-
-
Deleted: trunk/packages/plink/trunk/debian/manpage.xml.ex
===================================================================
--- trunk/packages/plink/trunk/debian/manpage.xml.ex 2008-04-09 09:43:11 UTC (rev 1725)
+++ trunk/packages/plink/trunk/debian/manpage.xml.ex 2008-04-09 10:09:22 UTC (rev 1726)
@@ -1,144 +0,0 @@
-<?xml version='1.0' encoding='ISO-8859-1'?>
-<!DOCTYPE refentry PUBLIC "-//OASIS//DTD DocBook XML V4.2//EN"
-"http://www.oasis-open.org/docbook/xml/4.2/docbookx.dtd" [
-
-<!--
-
-Process this file with an XSLT processor: `xsltproc \
--''-nonet /usr/share/sgml/docbook/stylesheet/xsl/nwalsh/\
-manpages/docbook.xsl manpage.dbk'. A manual page
-<package>.<section> will be generated. You may view the
-manual page with: nroff -man <package>.<section> | less'. A
-typical entry in a Makefile or Makefile.am is:
-
-DB2MAN=/usr/share/sgml/docbook/stylesheet/xsl/nwalsh/\
-manpages/docbook.xsl
-XP=xsltproc -''-nonet
-
-manpage.1: manpage.dbk
- $(XP) $(DB2MAN) $<
-
-The xsltproc binary is found in the xsltproc package. The
-XSL files are in docbook-xsl. Please remember that if you
-create the nroff version in one of the debian/rules file
-targets (such as build), you will need to include xsltproc
-and docbook-xsl in your Build-Depends control field.
-
--->
-
- <!-- Fill in your name for FIRSTNAME and SURNAME. -->
- <!ENTITY dhfirstname "<firstname>FIRSTNAME</firstname>">
- <!ENTITY dhsurname "<surname>SURNAME</surname>">
- <!-- Please adjust the date whenever revising the manpage. -->
- <!ENTITY dhdate "<date>April 8, 2008</date>">
- <!-- SECTION should be 1-8, maybe w/ subsection other parameters are
- allowed: see man(7), man(1). -->
- <!ENTITY dhsection "<manvolnum>SECTION</manvolnum>">
- <!ENTITY dhemail "<email>moeller at debian.org</email>">
- <!ENTITY dhusername "Steffen Moeller">
- <!ENTITY dhucpackage "<refentrytitle>PLINK</refentrytitle>">
- <!ENTITY dhpackage "plink">
-
- <!ENTITY debian "<productname>Debian</productname>">
- <!ENTITY gnu "<acronym>GNU</acronym>">
- <!ENTITY gpl "&gnu; <acronym>GPL</acronym>">
-]>
-
-<refentry>
- <refentryinfo>
- <address>
- &dhemail;
- </address>
- <copyright>
- <year>2007</year>
- <holder>&dhusername;</holder>
- </copyright>
- &dhdate;
- </refentryinfo>
- <refmeta>
- &dhucpackage;
-
- &dhsection;
- </refmeta>
- <refnamediv>
- <refname>&dhpackage;</refname>
-
- <refpurpose>program to do something</refpurpose>
- </refnamediv>
- <refsynopsisdiv>
- <cmdsynopsis>
- <command>&dhpackage;</command>
-
- <arg><option>-e <replaceable>this</replaceable></option></arg>
-
- <arg><option>--example <replaceable>that</replaceable></option></arg>
- </cmdsynopsis>
- </refsynopsisdiv>
- <refsect1>
- <title>DESCRIPTION</title>
-
- <para>This manual page documents briefly the
- <command>&dhpackage;</command> and <command>bar</command>
- commands.</para>
-
- <para>This manual page was written for the &debian; distribution
- because the original program does not have a manual page.
- Instead, it has documentation in the &gnu;
- <application>Info</application> format; see below.</para>
-
- <para><command>&dhpackage;</command> is a program that...</para>
-
- </refsect1>
- <refsect1>
- <title>OPTIONS</title>
-
- <para>These programs follow the usual &gnu; command line syntax,
- with long options starting with two dashes (`-'). A summary of
- options is included below. For a complete description, see the
- <application>Info</application> files.</para>
-
- <variablelist>
- <varlistentry>
- <term><option>-h</option>
- <option>--help</option>
- </term>
- <listitem>
- <para>Show summary of options.</para>
- </listitem>
- </varlistentry>
- <varlistentry>
- <term><option>-v</option>
- <option>--version</option>
- </term>
- <listitem>
- <para>Show version of program.</para>
- </listitem>
- </varlistentry>
- </variablelist>
- </refsect1>
- <refsect1>
- <title>SEE ALSO</title>
-
- <para>bar (1), baz (1).</para>
-
- <para>The programs are documented fully by <citetitle>The Rise and
- Fall of a Fooish Bar</citetitle> available via the
- <application>Info</application> system.</para>
- </refsect1>
- <refsect1>
- <title>AUTHOR</title>
-
- <para>This manual page was written by &dhusername; &dhemail; for
- the &debian; system (but may be used by others). Permission is
- granted to copy, distribute and/or modify this document under
- the terms of the &gnu; General Public License, Version 2 any
- later version published by the Free Software Foundation.
- </para>
- <para>
- On Debian systems, the complete text of the GNU General Public
- License can be found in /usr/share/common-licenses/GPL.
- </para>
-
- </refsect1>
-</refentry>
-
Added: trunk/packages/plink/trunk/debian/manpages
===================================================================
--- trunk/packages/plink/trunk/debian/manpages (rev 0)
+++ trunk/packages/plink/trunk/debian/manpages 2008-04-09 10:09:22 UTC (rev 1726)
@@ -0,0 +1 @@
+debian/plink.1
Deleted: trunk/packages/plink/trunk/debian/menu.ex
===================================================================
--- trunk/packages/plink/trunk/debian/menu.ex 2008-04-09 09:43:11 UTC (rev 1725)
+++ trunk/packages/plink/trunk/debian/menu.ex 2008-04-09 10:09:22 UTC (rev 1726)
@@ -1,2 +0,0 @@
-?package(plink):needs="X11|text|vc|wm" section="Applications/see-menu-manual"\
- title="plink" command="/usr/bin/plink"
Deleted: trunk/packages/plink/trunk/debian/plink-default.ex
===================================================================
--- trunk/packages/plink/trunk/debian/plink-default.ex 2008-04-09 09:43:11 UTC (rev 1725)
+++ trunk/packages/plink/trunk/debian/plink-default.ex 2008-04-09 10:09:22 UTC (rev 1726)
@@ -1,10 +0,0 @@
-# Defaults for plink initscript
-# sourced by /etc/init.d/plink
-# installed at /etc/default/plink by the maintainer scripts
-
-#
-# This is a POSIX shell fragment
-#
-
-# Additional options that are passed to the Daemon.
-DAEMON_OPTS=""
Deleted: trunk/packages/plink/trunk/debian/plink.doc-base.EX
===================================================================
--- trunk/packages/plink/trunk/debian/plink.doc-base.EX 2008-04-09 09:43:11 UTC (rev 1725)
+++ trunk/packages/plink/trunk/debian/plink.doc-base.EX 2008-04-09 10:09:22 UTC (rev 1726)
@@ -1,22 +0,0 @@
-Document: plink
-Title: Debian plink Manual
-Author: <insert document author here>
-Abstract: This manual describes what plink is
- and how it can be used to
- manage online manuals on Debian systems.
-Section: unknown
-
-Format: debiandoc-sgml
-Files: /usr/share/doc/plink/plink.sgml.gz
-
-Format: postscript
-Files: /usr/share/doc/plink/plink.ps.gz
-
-Format: text
-Files: /usr/share/doc/plink/plink.text.gz
-
-Format: HTML
-Index: /usr/share/doc/plink/html/index.html
-Files: /usr/share/doc/plink/html/*.html
-
-
Modified: trunk/packages/plink/trunk/debian/rules
===================================================================
--- trunk/packages/plink/trunk/debian/rules 2008-04-09 09:43:11 UTC (rev 1725)
+++ trunk/packages/plink/trunk/debian/rules 2008-04-09 10:09:22 UTC (rev 1726)
@@ -1,51 +1,8 @@
#!/usr/bin/make -f
# -*- makefile -*-
-build: build-stamp
+include /usr/share/cdbs/1/rules/debhelper.mk
+include /usr/share/cdbs/1/rules/patchsys-quilt.mk
-build-stamp:
- dh_testdir
- $(MAKE)
- #docbook-to-man debian/plink.sgml > plink.1
- touch $@
-
-clean:
- dh_testdir
- dh_testroot
- rm -f build-stamp configure-stamp
- $(MAKE) distclean
- dh_clean
-
-install: build
- dh_testdir
- dh_testroot
- dh_clean -k
- dh_installdirs
- #cp plink $(CURDIR)/debian/plink/usr/bin/
-
-
-# Build architecture-independent files here.
-binary-indep: build install
-# We have nothing to do by default.
-
-# Build architecture-dependent files here.
-binary-arch: build install
- dh_testdir
- dh_testroot
- dh_installchangelogs
- dh_installdocs
- dh_installexamples test.ped test.map
- dh_install
- dh_installman debian/plink.1
- dh_link
- dh_strip
- dh_compress
- dh_fixperms
- dh_installdeb
- dh_shlibdeps
- dh_gencontrol
- dh_md5sums
- dh_builddeb
-
binary: binary-indep binary-arch
.PHONY: build clean binary-indep binary-arch binary install configure
Deleted: trunk/packages/plink/trunk/filters.cpp
===================================================================
--- trunk/packages/plink/trunk/filters.cpp 2008-04-09 09:43:11 UTC (rev 1725)
+++ trunk/packages/plink/trunk/filters.cpp 2008-04-09 10:09:22 UTC (rev 1726)
@@ -1,1178 +0,0 @@
-
-
-//////////////////////////////////////////////////////////////////
-// //
-// PLINK (c) 2005-2007 Shaun Purcell //
-// //
-// This file is distributed under the GNU General Public //
-// License, Version 2. Please see the file COPYING for more //
-// details //
-// //
-//////////////////////////////////////////////////////////////////
-
-
-#include <iostream>
-#include <iomanip>
-#include <fstream>
-#include <sstream>
-#include <vector>
-#include <map>
-#include <cmath>
-#include <cstdlib>
-#include <iterator>
-#include "plink.h"
-#include "options.h"
-#include "helper.h"
-
-#define MISSING(i,l) ( SNP[l]->one[i] && ( ! SNP[l]->two[i] ) )
-
-void Plink::filterSNPs()
-{
-
-
- //////////////////////////////////////////////////////
- // This functions applies the following filters and
- // functions:
-
- // Counts of number of founders, nonfounders
- // Per-individual genotyping rate
- // Read in, or calculate, allele frequencies (and save these)
- // Optionally write allele frequencies, then close
- // Exclude SNPs with too many missing genotypes
- // Identify/correct heterozygote haploid
- // Identify SNPs with no founder genotypes
- // Calculate/report/filter on HWE tests
- // Calculate/report genotyping rate per SNP/per individual
- // Filter on MAF
- // Remove filtered-out SNPs
-
-
- bool original_SNP_major = par::SNP_major;
-
- if ( ! par::SNP_major )
- Ind2SNP();
-
-
- // Which SNPs to delete
- vector<bool> del(locus.size(),false);
-
- // Which individuals to delete
- vector<bool> indel(sample.size(),false);
-
-
- //////////////////////////////////////////
- // Display number of founders/nonfounders
-
- cnt_f=0;
- vector<Individual*>::iterator person = sample.begin();
- while ( person != sample.end() )
- {
- if ( (*person)->founder ) cnt_f++;
- person++;
- }
- printLOG(int2str(cnt_f)+" founders and "+int2str(n-cnt_f)+
- " non-founders found\n");
-
- if (cnt_f<n) par::has_nonfounders = true;
-
-
- ////////////////////////////////////////////
- // If we have obligatory missing genotypes:
- // ensure they really are missng
-
- if ( par::oblig_missing )
- {
- set<int2>::iterator p = oblig_missing.begin();
- while ( p != oblig_missing.end() )
- {
- int l = p->p1;
- int k = p->p2;
-
- for (int i=0; i<sample.size(); i++)
- {
- Individual * person = sample[i];
- if ( person->sol == k )
- {
- SNP[l]->one[i] = true;
- SNP[l]->two[i] = false;
- }
- }
- ++p;
- }
- }
-
-
- /////////////////////////////////////////////////
- // Remove individuals with too many missing calls
-
- double total_genotyping = 0;
-
- if ( par::MAX_IND_MISSING < 1 )
- {
-
- int n_removed = 0;
- int n_orig = n;
-
- // Consider each individual
- if ( ! par::oblig_missing )
- {
- for (int i=0;i<sample.size();i++)
- {
-
- // Sum missingness over all SNPs
- int m=0; // Missing SNPs
- int nsnps=0; // All non-obligatory missing SNPs
-
- nsnps = locus.size();
- for (int l=0; l<nsnps;l++)
- if ( MISSING(i,l) ) m++;
-
- // Too much missingness?
- if ( (double)m/(double)nsnps > par::MAX_IND_MISSING )
- {
- indel[i] = true;
- n_removed++;
- }
- } // next individual
- }
- else // ... allow oblig missing values
- {
- for (int i=0;i<sample.size();i++)
- {
-
- // Sum missingness over all SNPs
- int m=0; // Missing SNPs
- int nsnps=0; // All non-obligatory missing SNPs
-
- for (int l=0; l<locus.size();l++)
- {
- if ( ! obligMissing(i,l) )
- {
- if ( MISSING(i,l) )
- ++m;
- ++nsnps;
- }
- }
-
- // Too much missingness?
- if ( (double)m/(double)nsnps > par::MAX_IND_MISSING )
- {
- indel[i] = true;
- n_removed++;
- }
-
- } // next individual
- } // end if oblig-missing section
-
-
- ////////////////////////////////////////
- // Save list of any removed individuals
-
- if (n_removed>0)
- {
- string f = par::output_file_name + ".irem";
- printLOG("Writing list of removed individuals to [ " + f + " ]\n");
- ofstream REM;
- REM.open(f.c_str(), ifstream::out);
- for (int i=0;i<sample.size();i++)
- if (indel[i])
- REM << sample[i]->fid << "\t" << sample[i]->iid << "\n";
- REM.close();
-
-
- // And now remove these individuals, so that
- // SNP-based statistics are calculated with
- // these samples already excluded
-
- n_removed = deleteIndividuals(indel);
-
- }
-
- printLOG(int2str(n_removed)+" of "+int2str(n_orig));
- printLOG(" individuals removed for low genotyping ( MIND > ");
- printLOG(dbl2str(par::MAX_IND_MISSING)+" )\n");
-
-
- } // end of remove people conditional
-
-
-
- /////////////////////////////////
- // Calculate or read from file?
-
- if (par::af_read)
- {
- checkFileExists(par::af_file);
- printLOG( "Reading allele frequencies from [ " + par::af_file + " ] \n");
-
- // Make hash of original SNP names
- map<string,int> mlocus;
- map<string,int>::iterator ilocus;
-
- vector<Locus*>::iterator loc = locus.begin();
- int l=0;
- while ( loc != locus.end() )
- {
- mlocus.insert(make_pair( (*loc)->name,l));
- loc++;
- l++;
- }
-
-
- // Read allele frequencies
- ifstream FRQ;
- FRQ.open(par::af_file.c_str());
- FRQ.clear();
-
- string dum1, dum2, dum3, dum4, dum5, dum6;
- string snpname;
- double freq;
- int nm;
-
- loc = locus.begin();
- while ( loc != locus.end() )
- {
- (*loc)->freq = -1;
- (*loc)->nm = 0;
- loc++;
- }
-
- // Skip header line
- FRQ >> dum1 >> dum2 >> dum3 >> dum4 >> dum5 >> dum6;
-
- while(!FRQ.eof())
- {
-
- char cline[256];
- FRQ.getline(cline,256,'\n');
- string sline = cline;
- if (sline=="") continue;
- stringstream ss(sline);
- string buf;
- vector<string> tokens;
- while (ss >> buf)
- tokens.push_back(buf);
- if (tokens.size() == 0)
- continue;
- else if (tokens.size() != 6)
- error("Problem with allele frequency line: 6 fields required:\n"
- +sline+"\n");
-
- ilocus = mlocus.find(tokens[1]);
- if (ilocus != mlocus.end())
- {
- locus[ilocus->second]->freq = atof(tokens[4].c_str());
- locus[ilocus->second]->nm = atoi(tokens[5].c_str());
- }
- }
- FRQ.clear();
- FRQ.close();
-
- }
-
-
- /////////////////////////////////
- // Calculate allele frequencies
-
- vector<string> hetlist(0);
-
- vector<bool>::iterator d = del.begin();
- vector<Locus*>::iterator loc = locus.begin();
- vector<CSNP*>::iterator s = SNP.begin();
- int l = 0; // Main locus counter
-
- int exc_maf = 0;
- int exc_miss = 0;
-
- vector<Locus*> no_founders_found_list;
-
- while ( loc != locus.end() )
- {
-
- if (!par::af_read)
- {
- (*loc)->freq = 0;
- // count 1 per allele, for frequency
- (*loc)->nm = 0;
- }
-
- // count 1 per genotype, for missingness
- int geno_nm = 0;
-
- // count 1 per non-obligatory missing genotype
- // (or set to N individuals)
- int geno_real = par::oblig_missing ? 0 : n ;
-
- bool X = false;
- bool haploid = false;
-
- // Determine type of SNP
- if (par::chr_sex[(*loc)->chr]) X=true;
- else if (par::chr_haploid[(*loc)->chr]) haploid=true;
-
-
- ///////////////////////////////
- // Iterate over each individual
-
- vector<bool>::iterator i1 = (*s)->one.begin();
- vector<bool>::iterator i2 = (*s)->two.begin();
- vector<Individual*>::iterator person = sample.begin();
- int i = 0;
-
- while ( person != sample.end() )
- {
-
- bool s1 = *i1;
- bool s2 = *i2;
-
- // Check female Y genotypes
- if ( par::chr_Y[(*loc)->chr] && ! (*person)->sex )
- {
- // Set to missing, unless in a RECODE mode
- if ( ! par::preserve_all_genotypes )
- {
- s1 = *i1 = true;
- s2 = *i2 = false;
- }
- }
-
- // For haploid heterozygosity check, also consider all individuals
- if ( haploid || ( X && (*person)->sex ) )
- {
- if ( (!s1) && s2 )
- {
- hetlist.push_back( (*person)->fid + "\t" +
- (*person)->iid + "\t" +
- (*loc)->name );
-
- // Set to missing, unless in a RECODE mode
- if ( ! par::preserve_all_genotypes )
- {
- *i1 = true;
- *i2 = false;
- }
- }
- }
-
-
-
- // For missing genotypes, consider all individuals
- if ( ! ( s1 && (!s2) ) ) geno_nm++;
-
- // But is this a real genotype in any case?
- if ( par::oblig_missing && ! obligMissing(i,l) )
- geno_real++;
-
-
- // Do not recount alleles if we have read in allele frequencies
- if (!par::af_read)
- {
- // For allele frequencies
- // only consider founders
- if ( par::summ_nonfounders || (*person)->founder )
- {
-
- if ( haploid || ( X && (*person)->sex ) )
- {
-
- //////////////////
- // Haploid counts
-
- // "1" allele count
-
- if ( (!s1) && (!s2) ) // FF = hom(11)
- {
- (*loc)->freq++;
- (*loc)->nm++;
- }
- else if ( s1 && s2 ) // TT = hom(22)
- {
- (*loc)->nm++;
- }
-
- }
- else
- {
-
- //////////////////
- // Autosomal count
-
- // "1" allele count
-
- if (!s1)
- {
- if (!s2) // 00 = hom(11)
- {
- (*loc)->freq+=2;
- (*loc)->nm+=2;
- }
- else // 01 = het(12)
- {
- (*loc)->freq+=1;
- (*loc)->nm+=2;
- }
- }
- else if ( s2 ) // 11 = hom(22)
- {
- (*loc)->nm+=2;
- }
- }
-
- }
- }
-
- // Next individual
- person++;
- i++;
- i1++;
- i2++;
- }
-
-
- ////////////////////////////////
- // Calculate allele frequencies
-
- if (!par::af_read)
- {
- if ( par::af_count) // Allele counts...
- {
- // Use freq to store count (keep as is)
-
- // Use "bp" to store number of allele 2
- (*loc)->bp = (long int)((*loc)->nm - (*loc)->freq);
-
- // Use "pos" to store number missing genotypes
- (*loc)->pos = geno_real - geno_nm;
- }
- else // ... or frequencies
- {
- if ((*loc)->nm>0)
- (*loc)->freq /= (double)(*loc)->nm;
- else
- {
- (*loc)->freq = 1;
- // If we aren't getting rid of it anyway
- if ( (double)geno_nm/(double)geno_real >= (1-par::MAX_GENO_MISSING))
- no_founders_found_list.push_back(*loc);
- }
- }
- }
-
-
- //////////////////////////////////////////
- // Record total proportion of missingness
-
- double snp_genotyping = n>0 ? (double)geno_nm/(double)geno_real : 0;
- total_genotyping += snp_genotyping;
-
-
- /////////////////////////////////////////////////
- // Exclude if SNP has too many missing genotypes
-
- if ( snp_genotyping < (1-par::MAX_GENO_MISSING) )
- {
- *d = true;
- exc_miss++;
- }
-
-
- ////////////////////////////////////////////////
- // Make allele1 always the least common allele
-
- if ( (!par::af_count) && (*loc)->freq > 0.5 )
- {
-
- // then we need to swap alleles
-
- (*loc)->freq = 1 - (*loc)->freq;
-
- string tmp = (*loc)->allele2;
- (*loc)->allele2 = (*loc)->allele1;
- (*loc)->allele1 = tmp;
-
- vector<bool>::iterator i1 = (*s)->one.begin();
- vector<bool>::iterator i2 = (*s)->two.begin();
-
- while ( i1 != (*s)->one.end() )
- {
-
- if ( (*i1) == (*i2) )
- {
- *i1 = ! (*i1);
- *i2 = ! (*i2);
- }
-
- i1++;
- i2++;
- }
-
- }
-
- // Next SNP
- ++d;
- ++loc;
- ++l;
- ++s;
- }
-
-
-
-
- /////////////////////////////////////////////////
- // Save list of any heterozygous haploid alleles
-
- if (hetlist.size()>0)
- {
- printLOG(int2str( hetlist.size())
- + " heterozygous haploid genotypes called\n");
- string f = par::output_file_name + ".hh";
- printLOG("Writing list of heterozygous haploid genotypes to [ "
- + f + " ]\n");
- ofstream REM;
- REM.open(f.c_str(), ifstream::out);
- for (int i=0; i<hetlist.size(); i++)
- REM << hetlist[i] << "\n";
- REM.close();
- }
- hetlist.clear();
-
-
-
- /////////////////////////////////////////////////
- // Save list of SNPs with no founders observed
-
- if (no_founders_found_list.size()>0)
- {
- printLOG(int2str( no_founders_found_list.size())
- + " SNPs with no founder genotypes observed\n");
- printLOG("Warning, MAF set to 0 for these SNPs (see --nonfounders)\n");
- string f = par::output_file_name + ".nof";
- printLOG( "Writing list of these SNPs to [ " + f + " ]\n");
- ofstream NOF;
- NOF.open(f.c_str(), ifstream::out);
- for (int i=0; i<no_founders_found_list.size(); i++)
- NOF << no_founders_found_list[i]->name << "\n";
- NOF.close();
- }
- no_founders_found_list.clear();
-
-
-
- //////////////////////////
- // Write allele freq file
-
- if (par::af_write)
- {
- if (par::include_cluster_from_file)
- calcStratifiedAlleleFreqs();
- else
- {
- ofstream FRQ;
- string f = par::output_file_name + ".frq";
- if (par::af_count) f += ".count";
-
- if (par::summ_nonfounders)
- printLOG("Writing allele frequencies (all individuals) to [ "
- + f + " ] \n");
- else
- printLOG("Writing allele frequencies (founders-only) to [ "
- + f + " ] \n");
-
- if (par::af_count)
- printLOG("Display counts rather than frequencies\n");
-
- FRQ.open(f.c_str(), ifstream::out);
-
- FRQ << setw(4) << "CHR" << " "
- << setw(par::pp_maxsnp) << "SNP" << " "
- << setw(4) << "A1" << " "
- << setw(4) << "A2" << " ";
- if (par::af_count)
- FRQ << setw(6) << "C1" << " "
- << setw(6) << "C2" << " "
- << setw(6) << "G0" << "\n";
- else
- FRQ << setw(12) << "MAF" << " "
- << setw(8) << "NCHROBS"
- << "\n";
-
- vector<Locus*>::iterator loc = locus.begin();
- while (loc != locus.end() )
- {
- string a1 = (*loc)->allele1;
- string a2 = (*loc)->allele2;
- if (a1=="") a1="0";
- if (a2=="") a2="0";
- FRQ << setw(4) << (*loc)->chr << " "
- << setw(par::pp_maxsnp) << (*loc)->name << " "
- << setw(4) << a1 << " "
- << setw(4) << a2 << " ";
-
- if (par::af_count)
- {
- FRQ << setw(6) << int( (*loc)->freq ) << " "
- << setw(6) << int( (*loc)->bp ) << " "
- << setw(6) << int( (*loc)->pos ) << "\n";
- }
- else
- {
- if ( (*loc)->freq >= 0 )
- FRQ << setw(12) << (*loc)->freq << " ";
- else
- FRQ << setw(12) << "NA" << " ";
- FRQ << setw(8) << (*loc)->nm << "\n";
- }
- loc++;
- }
- FRQ.close();
-
- }
-
- // Close after we've done alle freqs,
- shutdown();
- }
-
-
-
- /////////////////////////
- // Write HWE statistics
-
- if (par::HWD_test || par::HWD_report)
- {
-
- ofstream HWD;
- if (par::HWD_report)
- {
-
- if (par::summ_nonfounders)
- printLOG("Writing Hardy-Weinberg tests (all individuals) to [ " +
- par::output_file_name + ".hwe ] \n");
- else
- printLOG("Writing Hardy-Weinberg tests (founders-only) to [ " +
- par::output_file_name + ".hwe ] \n");
-
- string f = par::output_file_name + ".hwe";
- HWD.open(f.c_str(), ifstream::out);
-
- HWD.precision(4);
-
- HWD << setw(par::pp_maxsnp) << "SNP" << " "
- << setw(8) << "TEST" << " "
- << setw(20) << "GENO" << " "
- << setw(8) << "O(HET)" << " "
- << setw(8) << "E(HET)" << " "
- << setw(12) << "P_HWD" << " "
- << "\n";
- }
-
- int cnt=0, cnt_a=0, cnt_u=0;
-
-
- ////////////////////////
- // Consider each locus
-
- vector<bool>::iterator d = del.begin();
- vector<Locus*>::iterator loc = locus.begin();
-
- for ( int l = 0 ; l < locus.size() ; l++ )
- {
-
- // Compute p-values for HWE test in cases, controls & all
-
- // Only consider founders
-
- int a11, a12, a22;
- int u11, u12, u22;
- int b11, b12, b22;
-
- a11=a12=a22=0;
- u11=u12=u22=0;
- b11=b12=b22=0;
-
- bool X = false, haploid = false;
- if (par::chr_sex[(*loc)->chr]) X=true;
- else if (par::chr_haploid[(*loc)->chr]) haploid=true;
-
-
- ///////////////////////////////////////////////
- // Iterate over each individual, founders only
-
- for ( int i = 0 ; i < sample.size() ; i++ )
- {
-
- Individual * person = sample[i];
-
- ///////////////////////////////////////////////
- // Only consider founders, & diploid genotypes
-
- if ( par::summ_nonfounders || person->founder )
- if ( ! ( haploid || ( X && person->sex ) ) )
- {
-
- bool s1 = SNP[l]->one[i];
- bool s2 = SNP[l]->two[i];
-
- // Consider everybody, irrespective of phenotype
- // (QT, C/C or missing)
-
- if (!s1)
- {
- if (!s2) b11++; // 00 = hom(11)
- else b12++; // 01 = het(12)
- }
- else if ( s2 ) b22++; // 11 = hom(22)
-
-
- if (par::bt) // for binary trait, separately for cases/controls
- {
- if (person->phenotype == 1)
- {
-
- if (!s1)
- {
- if (!s2) u11++; // 00 = hom(11)
- else u12++; // 01 = het(12)
- }
- else if ( s2 ) u22++; // 11 = hom(22)
-
- }
- else if (person->phenotype == 2)
- {
- if (!s1)
- {
- if (!s2) a11++; // 00 = hom(11)
- else a12++; // 01 = het(12)
- }
- else if ( s2 ) a22++; // 11 = hom(22)
- }
-
- }
- }
-
-
- // Next individual
- }
-
-
- // Allele frequencies
- double afreq = 0, ufreq = 0, freq = 0;
-
- bool include_cases = true;
- bool include_controls = true;
- if (par::qt)
- freq = ( b11 + (double)b12/2.0 ) / (double)( b11+b12+b22 );
- else
- {
- afreq = ( a11 + (double)a12/2.0 ) / (double)( a11+a12+a22 );
- ufreq = ( u11 + (double)u12/2.0 ) / (double)( u11+u12+u22 );
- freq = ( b11 + (double)b12/2.0 ) / (double)( b11+b12+b22 );
-
- if ( a11+a12+a22 == 0 ) include_cases = false;
- if ( u11+u12+u22 == 0 ) include_controls = false;
- }
-
-
- if (par::qt)
- {
-
- double p;
-
- if (par::HWD_standard)
- {
- double tot = b11 + b12 + b22;
- double exp_11 = freq * freq * tot;
- double exp_12 = 2 * freq * (1-freq) * tot;
- double exp_22 = (1-freq) * (1-freq) * tot;
-
- double chisq = ( (b11-exp_11)*(b11-exp_11) ) / exp_11
- + ( (b12-exp_12)*(b12-exp_12) ) / exp_12
- + ( (b22-exp_22)*(b22-exp_22) ) / exp_22 ;
-
- p = chiprobP(chisq,1);
- }
- else
- p = SNPHWE( b12, b11, b22 );
-
-
- if (par::HWD_report)
- {
- HWD << setw(par::pp_maxsnp) << (*loc)->name << " "
- << setw(8) << "ALL(QT)" << " "
- << setw(20) << (int2str(b11)+
- "/"+int2str(b12)+
- "/"+int2str(b22)) << " "
- << setw(8) << (double)b12/(double)(b11+b12+b22) << " "
- << setw(8) << 2 * freq * (1-freq) << " "
- << setw(12) << p << "\n";
- }
-
- if ( p <= par::HWD_limit )
- {
- cnt++;
- *d = true;
- }
- }
- else
- {
- // For case/control data
-
- double p, p_a, p_u;
-
- if (par::HWD_standard)
- {
- double exp_a11 = afreq * afreq * (a11+a12+a22);
- double exp_a12 = 2 * afreq * (1-afreq) * (a11+a12+a22);
- double exp_a22 = (1-afreq) * (1-afreq) * (a11+a12+a22);
-
- double exp_u11 = ufreq * ufreq * (u11+u12+u22);
- double exp_u12 = 2 * ufreq * (1-ufreq) * (u11+u12+u22);
- double exp_u22 = (1-ufreq) * (1-ufreq) * (u11+u12+u22);
-
- double exp_11 = freq * freq * (b11+b12+b22);
- double exp_12 = 2 * freq * (1-freq) * (b11+b12+b22);
- double exp_22 = (1-freq) * (1-freq) * (b11+b12+b22);
-
- double chisq_a = ( (a11-exp_a11)*(a11-exp_a11) ) / exp_a11
- + ( (a12-exp_a12)*(a12-exp_a12) ) / exp_a12
- + ( (a22-exp_a22)*(a22-exp_a22) ) / exp_a22 ;
-
- double chisq_u = ( (u11-exp_u11)*(u11-exp_u11) ) / exp_u11
- + ( (u12-exp_u12)*(u12-exp_u12) ) / exp_u12
- + ( (u22-exp_u22)*(u22-exp_u22) ) / exp_u22 ;
-
- double chisq = ( (b11-exp_11)*(b11-exp_11) ) / exp_11
- + ( (b12-exp_12)*(b12-exp_12) ) / exp_12
- + ( (b22-exp_22)*(b22-exp_22) ) / exp_22 ;
-
- p = chiprobP(chisq,1);
- p_a = chiprobP(chisq_a,1);
- p_u = chiprobP(chisq_u,1);
- }
- else
- {
- p = SNPHWE( b12, b11, b22 );
- p_a = SNPHWE( a12, a11, a22 );
- p_u = SNPHWE( u12, u11, u22 );
- }
-
- if (par::HWD_report)
- {
-
- HWD << setw(par::pp_maxsnp) << (*loc)->name << " "
- << setw(8) << "ALL" << " "
-
- << setw(20)
- << int2str(b11)+"/"+int2str(b12)+"/"+int2str(b22) << " "
- << setw(8) << (double)b12/(double)(b11+b12+b22) << " "
- << setw(8) << 2 * freq * (1-freq) << " "
- << setw(12) << p << "\n";
-
-
- HWD << setw(par::pp_maxsnp) << (*loc)->name << " "
- << setw(8) << "AFF" << " "
- << setw(20)
- << int2str(a11)+"/"+int2str(a12)+"/"+int2str(a22) << " "
- << setw(8) << (double)a12/(double)(a11+a12+a22) << " "
- << setw(8) << 2 * afreq * (1-afreq) << " ";
-
-
- if (include_cases)
- HWD << setw(12) << p_a << "\n";
- else
- HWD << setw(12) << "NA" << "\n";
-
-
- HWD << setw(par::pp_maxsnp) << (*loc)->name << " "
- << setw(8) << "UNAFF" << " "
- << setw(20)
- << int2str(u11)+"/"+int2str(u12)+"/"+int2str(u22) << " "
- << setw(8) << (double)u12/(double)(u11+u12+u22) << " "
- << setw(8) << 2 * ufreq * (1-ufreq) << " ";
-
-
- if (include_controls)
- HWD << setw(12) << p_u << "\n";
- else
- HWD << setw(12) << "NA" << "\n";
-
- }
-
- // Increase counts: in cases
- if ( include_cases && p_a < par::HWD_limit && p>-1 ) cnt_a++;
-
- // Controls (and, if possible, exclude on this value)
- if ( include_controls &&
- p_u < par::HWD_limit && p>-1 )
- {
- cnt_u++;
-
- if ( ! par::HWD_filter_on_all )
- {
- *d = true;
- cnt++;
- }
- }
-
- // In total sample, and if needed, exclude here
- if ( p < par::HWD_limit && p>-1 )
- {
- if ( par::HWD_filter_on_all || ! include_controls )
- {
- *d = true;
- cnt++;
- }
- }
-
-
-
-
- }
-
-
- // next locus
- ++loc;
- ++d;
- }
-
-
- // Finish the report...
- if (par::HWD_report)
- HWD.close();
-
- // ...or finish pruning
-
- printLOG( int2str(cnt) +
- " markers to be excluded based on HWE test ( p <= " +
- dbl2str(par::HWD_limit) + " )\n");
-
- if (par::bt)
- {
- printLOG("\t" + int2str(cnt_a) + " markers failed HWE test in cases\n");
- printLOG("\t" + int2str(cnt_u) + " markers failed HWE test in controls\n");
- }
- }
-
-
-
-
- ///////////////////////////////////////////////////
- // Summary statistics for genotyping/missing rates
-
- if (par::report_missing)
- {
-
-
- ///////////////////////////////////////////
- // Report by genotyping rate by individual
- // possibly allowing for obligatory missingness
-
- printLOG( "Writing individual missingness information to [ " +
- par::output_file_name + ".imiss ] \n");
-
- ofstream MIS;
- string f = par::output_file_name + ".imiss";
- MIS.open(f.c_str(), ifstream::out);
- MIS << setw(par::pp_maxfid) << "FID" << " "
- << setw(par::pp_maxiid) << "IID" << " "
- << setw(10) << "MISS_PHENO" << " "
- << setw(8) << "N_MISS" << " ";
- if ( par::oblig_missing )
- MIS << setw(8) << "N_GENO" << " ";
- MIS << setw(8) << "F_MISS" << "\n";
-
- for (int i=0; i<n; i++)
- {
-
- MIS << setw(par::pp_maxfid) << sample[i]->fid << " "
- << setw(par::pp_maxiid) << sample[i]->iid << " ";
- if (sample[i]->missing) MIS << setw(10) << "Y" << " ";
- else MIS << setw(10) << "N" << " " ;
-
-
- // Sum missingness over all SNPs
- int m=0; // Missing SNPs
- int nsnps=0; // All non-obligatory missing SNPs
-
- if ( ! par::oblig_missing )
- {
- nsnps = locus.size();
- for (int l=0; l<nsnps;l++)
- if ( MISSING(i,l) ) m++;
-
- }
- else // ... allow oblig missing values
- {
- for (int l=0; l<locus.size();l++)
- {
- if ( ! obligMissing(i,l) )
- {
- if ( MISSING(i,l) )
- ++m;
- ++nsnps;
- }
- }
- }
-
- MIS << setw(8) << m << " ";
- if ( par::oblig_missing)
- MIS << setw(8) << nsnps << " ";
- MIS << setw(8) << (double)m/(double)nsnps << "\n";
-
- }
- MIS.close();
-
-
-
- ///////////////////////////////////////////
- // Report by genotyping rate by locus
- // possibly allowing for sample strata
- // possibly allowing for obligatory missingness
-
- printLOG("Writing locus missingness information to [ " +
- par::output_file_name +".lmiss ] \n");
- f = par::output_file_name + ".lmiss";
- MIS.open(f.c_str(), ifstream::out);
- MIS.clear();
-
- MIS << setw(4) << "CHR" << " "
- << setw(par::pp_maxsnp) << "SNP" << " ";
-
- if (par::include_cluster_from_file)
- MIS << setw(10) << "CLST" << " ";
-
- MIS << setw(8) << "N_MISS" << " ";
-
- if ( par::oblig_missing )
- MIS << setw(8) << "N_GENO" << " ";
-
- if (par::include_cluster_from_file)
- MIS << setw(8) << "N_CLST" << " ";
-
- MIS << setw(8) << "F_MISS" << "\n";
-
-
- for (int l=0; l<locus.size(); l++)
- {
-
- Locus * loc = locus[l];
-
- // nk==1 for basic missingness (i.e. not stratified by
- // cluster)
-
- for (int k=0; k<nk; k++)
- {
-
- MIS << setw(4) << loc->chr << " "
- << setw(par::pp_maxsnp) << loc->name << " ";
-
- if (par::include_cluster_from_file)
- MIS << setw(10) << kname[k] << " ";
-
- int m=0; // Number of missing genotypes
- int c=0; // Number of people in cluster
- int nsnps=0; // Number of actual genotypes in cluster
-
- for ( int i=0; i<sample.size(); i++)
- {
- if (par::include_cluster_from_file)
- {
- if ( sample[i]->sol == k )
- {
- if ( ( ! par::oblig_missing ) ||
- ( ! obligMissing(i,l) ) )
- {
- if ( MISSING(i,l) ) ++m;
- ++nsnps;
- }
- ++c;
- }
- }
- else // ... ignore cluster strata
- {
- if ( ( ! par::oblig_missing ) ||
- ( ! obligMissing(i,l) ) )
- {
- if ( MISSING(i,l) ) ++m;
- ++nsnps;
- }
- }
-
- // Next individual
- }
-
- MIS << setw(8) << m << " ";
-
- if (par::include_cluster_from_file)
- MIS << setw(8) << c << " ";
-
- if (par::oblig_missing)
- MIS << setw(8) << nsnps << " ";
-
-
- MIS << setw(8) << (double)m / (double)nsnps << "\n";
- }
-
- // Next SNP
- }
-
- MIS.close();
-
- }
-
-
-
-
- /////////////////////////////////
- // Remove rare SNPs
-
-
- loc = locus.begin();
- d = del.begin();
-
- while ( loc != locus.end() )
- {
-
- // Note epsilon correction for MAF, due to floating point
- // issues: only apply to the lower MAF range
-
- if ( (*loc)->freq < 0 ||
- (*loc)->freq + par::epsilon < par::min_af ||
- (*loc)->freq > par::max_af )
- {
- *d = true;
- exc_maf++;
- }
-
- d++;
- loc++;
- }
-
-
-
- /////////////////////////////////////////
- // Remove SNPs based on thresholds
-
- if ( locus.size() > 0 )
- printLOG("Total genotyping rate in remaining individuals is "
- + dbl2str(total_genotyping/(double)locus.size())+"\n");
- printLOG(int2str(exc_miss)+" SNPs failed missingness test ( GENO > "
- +dbl2str(par::MAX_GENO_MISSING)+" )\n");
- printLOG(int2str(exc_maf)+" SNPs failed frequency test ( MAF < "+dbl2str(par::min_af));
- if (par::max_af < 0.5 ) printLOG(" or MAF > " + dbl2str(par::max_af));
- printLOG(" )\n");
-
-
- int tmp = deleteSNPs(del);
-
-
- //////////////////////////////////////////
- // Need to make back to individual major?
-
- if ( ! original_SNP_major )
- SNP2Ind();
-
-
- return;
-
-}
-
Deleted: trunk/packages/plink/trunk/fisher.cpp
===================================================================
--- trunk/packages/plink/trunk/fisher.cpp 2008-04-09 09:43:11 UTC (rev 1725)
+++ trunk/packages/plink/trunk/fisher.cpp 2008-04-09 10:09:22 UTC (rev 1726)
@@ -1,2259 +0,0 @@
-
-
-//////////////////////////////////////////////////////////////////
-// //
-// PLINK (c) 2005-2006 Shaun Purcell //
-// //
-// This file is distributed under the GNU General Public //
-// License, Version 2. Please see the file COPYING for more //
-// details //
-// //
-//////////////////////////////////////////////////////////////////
-
-
-#include <climits>
-#include <string>
-#include "fisher.h"
-#include "plink.h"
-#include "helper.h"
-
-double fisher(table_t t)
-{
-
- int nrow = t.size();
- if ( nrow == 0 )
- return -9;
- int ncol = t[0].size();
- if ( ncol == 0 )
- return -9;
- double table[200];
-
- int c=0;
-
- for (int j=0; j<ncol; j++)
- for (int i=0; i<nrow; i++)
- {
- table[c++] = t[i][j];
- }
-
- double expect = -1.0;
- double percnt = 100.0;
- double emin = 0;
- double pre = 0, prt = 0;
- int ws = 300000;
-
- fexact(&nrow, &ncol, table, &nrow, &expect, &percnt, &emin, &prt, &pre, &ws);
-
- return pre;
-
-// // Test of routine:
-
-// double expected = -1.0;
-// percnt = 100.0;
-// emin = 0.0;
-// prt = 0.0;
-// double *expectedp = &expected;
-// double *percntp = %
-// double *eminp = &emin;
-// double *prtp = &prt;
-// double pvalue = 0.0;
-// double *pvaluep = &pvalue;
-// int workspace = 300000;
-// int *workspacep = &workspace;
-
-// nrow = 3;
-// ncol = 2;
-// int *nrowp = &nrow;
-// int *ncolp = &ncol;
-// double *tmp_table = new double[6];
-
-// // tmp_table[0] =10;
-// // tmp_table[1] =20;
-// // tmp_table[2] =30;
-
-// // tmp_table[3] =40;
-// // tmp_table[4] =10;
-// // tmp_table[5] =60;
-
-// tmp_table[0] =10;
-// tmp_table[1] =40;
-// tmp_table[2] =20;
-// tmp_table[3] =10;
-// tmp_table[4] =30;
-// tmp_table[5] =60;
-
-// fexact(nrowp, ncolp, tmp_table, nrowp, expectedp, percntp,eminp, prtp, pvaluep, workspacep);
-
-// cout << "exactP = " << pvalue<<"\n\n";
-
-// exit(0);
-
-// return pre;
-
-}
-
-
-/* fisher2.c: this is fexact.c, part of the R
- (http://cran.r-project.org) distribution
- (package ctest).
- Minor edits by az-Uriarte (rdiaz at cnio.es). May 2003.
-*/
-
-
-/* [Notice from the original fexact.c]
- Fisher's exact test for contingency tables -- usage see below
-
- fexact.f -- translated by f2c (version 19971204).
- Run through a slightly modified version of MM's f2c-clean.
- Heavily hand-edited by KH and MM.
-*/
-
-#include <cstdlib>
-#include <cstdio>
-#include <cmath>
-
-#include <limits>
-#define SINT_MAX INT_MAX
-
-#undef min
-#undef max
-#define max(a, b)((a) < (b) ? (b) : (a))
-#define min(a, b)((a) > (b) ? (b) : (a))
-
-static void f2xact(int *nrow, int *ncol, double *table, int *ldtabl,
- double *expect, double *percnt, double *emin, double
- *prt, double *pre, double *fact, int *ico, int
- *iro, int *kyy, int *idif, int *irn, int *key,
- int *ldkey, int *ipoin, double *stp, int *ldstp,
- int *ifrq, double *dlp, double *dsp, double *tm,
- int *key2, int *iwk, double *rwk);
-static void f3xact(int *nrow, int *irow, int *ncol,int *icol,
- double *dlp, int *mm, double *fact, int *ico, int
- *iro, int *it, int *lb, int *nr, int *nt, int
- *nu, int *itc, int *ist, double *stv, double *alen,
- const double *tol);
-static void f4xact(int *nrow, int *irow, int *ncol, int *icol,
- double *dsp, double *fact, int *icstk, int *ncstk,
- int *lstk, int *mstk, int *nstk, int *nrstk, int
- *irstk, double *ystk, const double *tol);
-static void f5xact(double *pastp, const double *tol, int *kval, int *key,
- int *ldkey, int *ipoin, double *stp, int *ldstp,
- int *ifrq, int *npoin, int *nr, int *nl, int
- *ifreq, int *itop, int *ipsh);
-static void f6xact(int *nrow, int *irow, int *iflag, int *kyy,
- int *key, int *ldkey, int *last, int *ipn);
-static void f7xact(int *nrow, int *imax, int *idif, int *k, int *ks,
- int *iflag);
-static void f8xact(int *irow, int *is, int *i1, int *izero, int *knew);
-static double f9xact(int *n, int *mm, int *ir, double *fact);
-static void f10act(int *nrow, int *irow, int *ncol, int *icol,
- double *val, int *xmin, double *fact, int *nd,
- int *ne, int *m);
-static void f11act(int *irow, int *i1, int *i2, int *knew);
-static void prterr(int icode, char *mes);
-static int iwork(int iwkmax, int *iwkpt, int number, int itype);
-
-#ifdef USING_R
-# define isort(n, ix)R_isort(ix, *n)
-# include <Rmath.h>/* -> pgamma() */
-#else
- static void isort(int *n, int *ix);
- static double gammds(double *y, double *p, int *ifault);
- static double alogam(double *x, int *ifault);
-#endif
-
-/* The only public function : */
-void
-fexact(int *nrow, int *ncol, double *table, int *ldtabl,
- double *expect, double *percnt, double *emin, double *prt,
- double *pre, int *workspace)
-{
-
- /*
- ALGORITHM 643, COLLECTED ALGORITHMS FROM ACM.
- THIS WORK PUBLISHED IN TRANSACTIONS ON MATHEMATICAL SOFTWARE,
- VOL. 19, NO. 4, DECEMBER, 1993, PP. 484-488.
- -----------------------------------------------------------------------
- Name: FEXACT
- Purpose: Computes Fisher's exact test probabilities and a hybrid
- approximation to Fisher exact test probabilities for a
- contingency table using the network algorithm.
- Usage: CALL FEXACT (NROW, NCOL, TABLE, LDTABL, EXPECT, PERCNT,
- EMIN, PRT, PRE)
- Arguments:
- NROW - The number of rows in the table.(Input)
- NCOL - The number of columns in the table.(Input)
- TABLE - NROW by NCOL matrix containing the contingency
- table.(Input)
- LDTABL - Leading dimension of TABLE exactly as specified
- in the dimension statement in the calling
- program.(Input)
-
- EXPECT - Expected value used in the hybrid algorithm for
- deciding when to use asymptotic theory
- probabilities.(Input)
- If EXPECT <= 0.0 then asymptotic theory probabilities
- are not used and Fisher exact test probabilities are
- computed. Otherwise, if PERCNT or more of the cells in
- the remaining table have estimated expected values of
- EXPECT or more, with no remaining cell having expected
- value less than EMIN, then asymptotic chi-squared
- probabilities are used. See the algorithm section of the
- manual document for details.
- Use EXPECT = 5.0 to obtain the 'Cochran' condition.
-
- PERCNT - Percentage of remaining cells that must have
- estimated expected values greater than EXPECT
- before asymptotic probabilities can be used.(Input)
- See argument EXPECT for details.
- Use PERCNT = 80.0 to obtain the 'Cochran' condition.
-
- EMIN - Minimum cell estimated expected value allowed for
- asymptotic chi-squared probabilities to be used.(Input)
- See argument EXPECT for details.
- Use EMIN = 1.0 to obtain the 'Cochran' condition.
-
- PRT - Probability of the observed table for fixed
- marginal totals.(Output)
-
- PRE - Table p-value.(Output)
- PRE is the probability of a more extreme table,
- where `extreme' is in a probabilistic sense.
- If EXPECT < 0 then the Fisher exact probability
- is returned. Otherwise, an approximation to the
- Fisher exact probability is computed based upon
- asymptotic chi-squared probabilities for ``large''
- table expected values. The user defines ``large''
- through the arguments EXPECT, PERCNT, and EMIN.
-
- Remarks:
- 1. For many problems one megabyte or more of workspace can be
- required.If the environment supports it, the user should begin
- by increasing the workspace used to 200,000 units.
- 2. In FEXACT, LDSTP = 30*LDKEY. The proportion of table space used
- by STP may be changed by changing the line MULT = 30 below to
- another value.
- 3. FEXACT may be converted to single precision by setting IREAL = 3,
- and converting all DOUBLE PRECISION specifications (except the
- specifications for RWRK, IWRK, and DWRK) to REAL.This will
- require changing the names and specifications of the intrinsic
- functions ALOG, AMAX1, AMIN1, EXP, and REAL. In addition, the
- machine specific constants will need to be changed, and the name
- DWRK will need to be changed to RWRK in the call to F2XACT.
- 4. Machine specific constants are specified and documented in F2XACT.
- A missing value code is specified in both FEXACT and F2XACT.
- 5. Although not a restriction, is is not generally practical to call
- this routine with large tables which are not sparse and in
- which the 'hybrid' algorithm has little effect. For example,
- although it is feasible to compute exact probabilities for the
- table
- 1 8 5 4 4 2 2
- 5 3 3 4 3 1 0
- 10 1 4 0 0 0 0,
- computing exact probabilities for a similar table which has been
- enlarged by the addition of an extra row (or column) may not be
- feasible.
- -----------------------------------------------------------------------
- */
-
- /* CONSTANT Parameters : */
-
- /* To increase the length of the table of paste path lengths relative
- to the length of the hash table, increase MULT.
- */
- const int mult = 30;
- /* AMISS is a missing value indicator which is returned when the
- probability is not defined.
- */
- const double amiss = -12345.;
- /*
- Set IREAL = 4 for DOUBLE PRECISION
- Set IREAL = 3 for SINGLE PRECISION
- */
-#define i_real 4
-#define i_int 2
-
- /* System generated locals */
- int ikh;
- /* Local variables */
- int nco, nro, ntot, numb, iiwk, irwk;
- int i, j, k, kk, ldkey, ldstp, i1, i2, i3, i4, i5, i6, i7, i8, i9, i10;
- int i3a, i3b, i3c, i9a, iwkmax, iwkpt;
-
- /* Workspace Allocation (freed at end) */
- double *equiv;
- iwkmax = 2 * (int) (*workspace / 2);
- // equiv = (double *) R_alloc(iwkmax / 2, sizeof(double));
- equiv = (double *) calloc(iwkmax / 2, sizeof(double));
-
- /* The check could never happen with Calloc!
- equiv = Calloc(iwkmax / 2, double);
- if (!equiv) {
- prterr(0, "Can not allocate specified workspace");
- } */
-
-#define dwrk (equiv)
-#define iwrk ((int *)equiv)
-#define rwrk ((float *)equiv)
-
- /* Parameter adjustments */
- table -= *ldtabl + 1;
-
- /* Function Body */
- iwkpt = 0;
-
- if (*nrow > *ldtabl)
- prterr(1, "NROW must be less than or equal to LDTABL.");
-
- ntot = 0;
- for (i = 1; i <= *nrow; ++i) {
- for (j = 1; j <= *ncol; ++j) {
- if (table[i + j * *ldtabl] < 0.)
- prterr(2, "All elements of TABLE must be positive.");
- ntot = (int) (ntot + table[i + j * *ldtabl]);
- }
- }
- if (ntot == 0) {
- prterr(3, "All elements of TABLE are zero.\n"
- "PRT and PRE are set to missing values.");
- *prt = amiss;
- *pre = amiss;
- goto L_End;
- }
-
- nco = max(*nrow, *ncol);
- nro = *nrow + *ncol - nco;/* = min(*nrow, *ncol) */
- k = *nrow + *ncol + 1;
- kk = k * nco;
-
- ikh = ntot + 1;
- i1 = iwork(iwkmax, &iwkpt, ikh, i_real);
- i2 = iwork(iwkmax, &iwkpt, nco, i_int);
- i3 = iwork(iwkmax, &iwkpt, nco, i_int);
- i3a = iwork(iwkmax, &iwkpt, nco, i_int);
- i3b = iwork(iwkmax, &iwkpt, nro, i_int);
- i3c = iwork(iwkmax, &iwkpt, nro, i_int);
- ikh = max(k * 5 + (kk << 1), nco * 7 + 800);
- iiwk= iwork(iwkmax, &iwkpt, ikh, i_int);
- ikh = max(nco + 401, k);
- irwk= iwork(iwkmax, &iwkpt, ikh, i_real);
-
- /* NOTE:
- What follows below splits the remaining amount iwkmax - iwkpt of
- (int) workspace into hash tables as follows.
- type size index
- INT 2 * ldkey i4 i5 i11
- REAL 2 * ldkey i8 i9 i10
- REAL 2 * ldstp i6
- INT 6 * ldstp i7
- Hence, we need ldkey times
- 3 * 2 + 3 * 2 * s + 2 * mult * s + 6 * mult
- chunks of integer memory, where s = sizeof(REAL) / sizeof(INT).
- If doubles are used and are twice as long as ints, this gives
- 18 + 10 * mult
- so that the value of ldkey can be obtained by dividing available
- (int) workspace by this number.
-
- In fact, because iwork() can actually s * n + s - 1 int chunks
- when allocating a REAL, we use ldkey = available / numb - 1.
-
- FIXME:
- Can we always assume that sizeof(double) / sizeof(int) is 2?
- */
-
- if (i_real == 4) {/* Double precision reals */
- numb = 18 + 10 * mult;
- } else {/* Single precision reals */
- numb = (mult << 3) + 12;
- }
- ldkey = (iwkmax - iwkpt) / numb - 1;
- ldstp = mult * ldkey;
- ikh = ldkey << 1;i4 = iwork(iwkmax, &iwkpt, ikh, i_int);
- ikh = ldkey << 1;i5 = iwork(iwkmax, &iwkpt, ikh, i_int);
- ikh = ldstp << 1;i6 = iwork(iwkmax, &iwkpt, ikh, i_real);
- ikh = ldstp * 6;i7 = iwork(iwkmax, &iwkpt, ikh, i_int);
- ikh = ldkey << 1;i8 = iwork(iwkmax, &iwkpt, ikh, i_real);
- ikh = ldkey << 1;i9 = iwork(iwkmax, &iwkpt, ikh, i_real);
- ikh = ldkey << 1;i9a = iwork(iwkmax, &iwkpt, ikh, i_real);
- ikh = ldkey << 1;i10 = iwork(iwkmax, &iwkpt, ikh, i_int);
-
- /* To convert to double precision, change RWRK to DWRK in the next CALL.
- */
- f2xact(nrow,
- ncol,
- &table[*ldtabl + 1],
- ldtabl,
- expect,
- percnt,
- emin,
- prt,
- pre,
- dwrk + i1,
- iwrk + i2,
- iwrk + i3,
- iwrk + i3a,
- iwrk + i3b,
- iwrk + i3c,
- iwrk + i4,
- &ldkey,
- iwrk + i5,
- dwrk + i6,
- &ldstp,
- iwrk + i7,
- dwrk + i8,
- dwrk + i9,
- dwrk + i9a,
- iwrk + i10,
- iwrk + iiwk,
- dwrk + irwk);
-
-L_End:
- /* Free(equiv); */
- free(equiv);
- return;
-}
-
-#undef rwrk
-#undef iwrk
-#undef dwrk
-
-
-/*
- -----------------------------------------------------------------------
- Name:F2XACT
- Purpose:Computes Fisher's exact test for a contingency table,
- routine with workspace variables specified.
- Usage:F2XACT (NROW, NCOL, TABLE, LDTABL, EXPECT, PERCNT,
- EMIN, PRT, PRE, FACT, ICO, IRO, KYY, IDIF,
- IRN, KEY, LDKEY, IPOIN, STP, LDSTP, IFRQ,
- DLP, DSP, TM, KEY2, IWK, RWK)
- -----------------------------------------------------------------------
-*/
-void
-f2xact(int *nrow, int *ncol, double *table, int *ldtabl,
- double *expect, double *percnt, double *emin, double *prt,
- double *pre, double *fact, int *ico, int *iro, int *kyy,
- int *idif, int *irn, int *key, int *ldkey, int *ipoin,
- double *stp, int *ldstp, int *ifrq, double *dlp, double *dsp,
- double *tm, int *key2, int *iwk, double *rwk)
-{
- /* IMAX is the largest representable int on the machine. */
- const int imax = SINT_MAX;
-
- /* AMISS is a missing value indicator which is returned when the
- probability is not defined. */
- const double amiss = -12345.;
-
- /* TOL is chosen as the square root of the smallest relative spacing. */
-#ifndef Macintosh
- const static double tol = 3.45254e-7;
-#else
- static double tol = 3.45254e-7;
-#endif
- /* EMX is a large positive value used in comparing expected values. */
- const static double emx = 1e30;
-
- /* Local variables {{any really need to be static ???}} */
- static int kval, kmax, jkey, last, ipsh, itmp, itop, jstp, ntot,
- jstp2, jstp3, jstp4, i, ii, j, k, n, iflag, ncell, ifreq, chisq,
- ikkey, ikstp, ikstp2, k1, kb, kd, ks,
- i31, i32, i33, i34, i35, i36, i37, i38, i39,
- i41, i42, i43, i44, i45, i46, i47, i48, i310, i311,
- nco, nrb, ipn, ipo, itp, nro, nro2;
- static double dspt, dd, df,ddf, drn,dro, emn, obs, obs2, obs3,
- pastp, pv, tmp;
- double d1;
-#ifndef USING_R
- double d2;
- static int ifault;
-#endif
- bool nr_gt_nc;
-
- /* Parameter adjustments */
- table -= *ldtabl + 1;
- --ico;
- --iro;
- --kyy;
- --idif;
- --irn;
- --key;
- --ipoin;
- --stp;
- --ifrq;
- --dlp;
- --dsp;
- --tm;
- --key2;
- --iwk;
- --rwk;
-
-
- /* Check table dimensions */
- if (*nrow > *ldtabl)
- prterr(1, "NROW must be less than or equal to LDTABL.");
- if (*ncol <= 1)
- prterr(4, "NCOL must be at least 2");
-
- /* Initialize KEY array */
- for (i = 1; i <= *ldkey << 1; ++i) {
- key[i] = -9999;
- key2[i] = -9999;
- }
- /* Initialize parameters */
- *pre = 0.;
- itop = 0;
- if (*expect > 0.)
- emn = *emin;
- else
- emn = emx;
-
- nr_gt_nc = *nrow > *ncol;
- /* nco := max(nrow, ncol) : */
- if(nr_gt_nc)
- nco = *nrow;
- else
- nco = *ncol;
- /* Initialize pointers for workspace */
- /* f3xact */
- i31 = 1;
- i32 = i31 + nco;
- i33 = i32 + nco;
- i34 = i33 + nco;
- i35 = i34 + nco;
- i36 = i35 + nco;
- i37 = i36 + nco;
- i38 = i37 + nco;
- i39 = i38 + 400;
- i310 = 1;
- i311 = 401;
- /* f4xact */
- k = *nrow + *ncol + 1;
- i41 = 1;
- i42 = i41 + k;
- i43 = i42 + k;
- i44 = i43 + k;
- i45 = i44 + k;
- i46 = i45 + k;
- i47 = i46 + k * nco;
- i48 = 1;
-
- /* Compute row marginals and total */
- ntot = 0;
- for (i = 1; i <= *nrow; ++i) {
- iro[i] = 0;
- for (j = 1; j <= *ncol; ++j) {
- if (table[i + j * *ldtabl] < -1e-4)
- prterr(2, "All elements of TABLE must be positive.");
- iro[i] += (int) table[i + j * *ldtabl];
- }
- ntot += iro[i];
- }
-
- if (ntot == 0) {
- prterr(3, "All elements of TABLE are zero.\n"
- "PRT and PRE are set to missing values.");
- *pre = *prt = amiss;
- return;
- }
-
- /* Column marginals */
- for (i = 1; i <= *ncol; ++i) {
- ico[i] = 0;
- for (j = 1; j <= *nrow; ++j)
- ico[i] += (int) table[j + i * *ldtabl];
- }
-
- /* sort marginals */
- isort(nrow, &iro[1]);
- isort(ncol, &ico[1]);
-
- /*Determine row and column marginals.
- Define max(nrow,ncol) =: nco >= nro := min(nrow,ncol)
- nco is defined above
-
- Swap marginals if necessary toico[1:nco] & iro[1:nro]
- */
- if (nr_gt_nc) {
- nro = *ncol;
- /* Swap marginals */
- for (i = 1; i <= nco; ++i) {
- itmp = iro[i];
- if (i <= nro)
- iro[i] = ico[i];
- ico[i] = itmp;
- }
- } else
- nro = *nrow;
-
-
- /* Get multiplers for stack */
- kyy[1] = 1;
- for (i = 2; i <= nro; ++i) {
- /* Hash table multipliers */
- if (iro[i - 1] + 1 <= imax / kyy[i - 1]) {
- kyy[i] = kyy[i - 1] * (iro[i - 1] + 1);
- j /= kyy[i - 1];
- } else
- goto L_ERR_5;
- }
- /* Maximum product */
- if (iro[nro - 1] + 1 <= imax / kyy[nro - 1]) {
- kmax = (iro[nro] + 1) * kyy[nro - 1];
- } else {
- L_ERR_5:
- prterr(5, "The hash table key cannot be computed because "
- "the largest key\n"
- "is larger than the largest representable int.\n"
- "The algorithm cannot proceed.\n"
- "Reduce the workspace size, or use `exact = FALSE'.");
- return;
- }
-
- /* Compute log factorials */
- fact[0] = 0.;
- fact[1] = 0.;
- if(ntot >= 2) fact[2] = log(2.);
- /* MM: old code assuming log() to be SLOW */
- for (i = 3; i <= ntot; i += 2) {
- fact[i] = fact[i - 1] + log((double) i);
- j = i + 1;
- if (j <= ntot)
- fact[j] = fact[i] + fact[2] + fact[j / 2] - fact[j / 2 - 1];
- }
- /* Compute obs := observed path length */
- obs = tol;
- ntot = 0;
- for (j = 1; j <= nco; ++j) {
- dd = 0.;
- for (i = 1; i <= nro; ++i) {
- if (nr_gt_nc) {
- dd += fact[(int) table[j + i * *ldtabl]];
- ntot += (int) table[j + i * *ldtabl];
- } else {
- dd += fact[(int) table[i + j * *ldtabl]];
- ntot += (int) table[i + j * *ldtabl];
- }
- }
- obs += fact[ico[j]] - dd;
- }
- /* Denominator of observed table: DRO */
- dro = f9xact(&nro, &ntot, &iro[1], fact);
- *prt = exp(obs - dro);
- /* Initialize pointers */
- k = nco;
- last = *ldkey + 1;
- jkey = *ldkey + 1;
- jstp = *ldstp + 1;
- jstp2 = *ldstp * 3 + 1;
- jstp3 = (*ldstp << 2) + 1;
- jstp4 = *ldstp * 5 + 1;
- ikkey = 0;
- ikstp = 0;
- ikstp2 = *ldstp << 1;
- ipo = 1;
- ipoin[1] = 1;
- stp[1] = 0.;
- ifrq[1] = 1;
- ifrq[ikstp2 + 1] = -1;
-
-Outer_Loop:
- kb = nco - k + 1;
- ks = 0;
- n = ico[kb];
- kd = nro + 1;
- kmax = nro;
- /* IDIF is the difference in going to the daughter */
- for (i = 1; i <= nro; ++i)
- idif[i] = 0;
-
- /* Generate the first daughter */
- do {
- --kd;
- ntot = min(n, iro[kd]);
- idif[kd] = ntot;
- if (idif[kmax] == 0)
- --kmax;
- n -= ntot;
- }
- while (n > 0 && kd != 1);
-
- if (n != 0) {
- goto L310;
- }
-
- k1 = k - 1;
- n = ico[kb];
- ntot = 0;
- for (i = kb + 1; i <= nco; ++i)
- ntot += ico[i];
-
-
-L150:
- /* Arc to daughter length=ICO(KB) */
- for (i = 1; i <= nro; ++i)
- irn[i] = iro[i] - idif[i];
-
- /* Sort irn */
- if (k1 > 1) {
- if (nro == 2) {
- if (irn[1] > irn[2]) {
- ii = irn[1];
- irn[1] = irn[2];
- irn[2] = ii;
- }
- } else if (nro == 3) {
- ii = irn[1];
- if (ii > irn[3]) {
- if (ii > irn[2]) {
- if (irn[2] > irn[3]) {
- irn[1] = irn[3];
- irn[3] = ii;
- } else {
- irn[1] = irn[2];
- irn[2] = irn[3];
- irn[3] = ii;
- }
- } else {
- irn[1] = irn[3];
- irn[3] = irn[2];
- irn[2] = ii;
- }
- } else if (ii > irn[2]) {
- irn[1] = irn[2];
- irn[2] = ii;
- } else if (irn[2] > irn[3]) {
- ii = irn[2];
- irn[2] = irn[3];
- irn[3] = ii;
- }
- } else {
- for (j = 2; j <= nro; ++j) {
- i = j - 1;
- ii = irn[j];
-
- while (ii < irn[i]) {
- irn[i + 1] = irn[i];
- --i;
- if (i == 0)
- break;
- }
- irn[i + 1] = ii;
- }
- }
- /* Adjust start for zero */
- for (i = 1; i <= nro; ++i) {
- if (irn[i] != 0)
- break;
- }
-
- nrb = i;
- nro2 = nro - i + 1;
- } else {
- nrb = 1;
- nro2 = nro;
- }
- /* Some table values */
- ddf = f9xact(&nro, &n, &idif[1], fact);
- drn = f9xact(&nro2, &ntot, &irn[nrb], fact) - dro + ddf;
- /* Get hash value */
- if (k1 > 1) {
- kval = irn[1] + irn[2] * kyy[2];
- for (i = 3; i <= nro; ++i) {
- kval += irn[i] * kyy[i];
- }
- /* Get hash table entry */
- i = kval % (*ldkey << 1) + 1;
- /* Search for unused location */
- for (itp = i; itp <= *ldkey << 1; ++itp) {
- ii = key2[itp];
- if (ii == kval) {
- goto L240;
- } else if (ii < 0) {
- key2[itp] = kval;
- dlp[itp] = 1.;
- dsp[itp] = 1.;
- goto L240;
- }
- }
-
- for (itp = 1; itp <= i - 1; ++itp) {
- ii = key2[itp];
- if (ii == kval) {
- goto L240;
- } else if (ii < 0) {
- key2[itp] = kval;
- dlp[itp] = 1.;
- goto L240;
- }
- }
-
- /* KH
- prterr(6, "LDKEY is too small.\n"
- "It is not possible to give the value of LDKEY required,\n"
- "but you could try doubling LDKEY (and possibly LDSTP).");
- */
- prterr(6, "LDKEY is too small for this problem.\n"
- "Try increasing the size of the workspace.");
- }
-
-L240:
- ipsh = (1);
- /* Recover pastp */
- ipn = ipoin[ipo + ikkey];
- pastp = stp[ipn + ikstp];
- ifreq = ifrq[ipn + ikstp];
- /* Compute shortest and longest path */
- if (k1 > 1) {
- obs2 = obs - fact[ico[kb + 1]] - fact[ico[kb + 2]] - ddf;
- for (i = 3; i <= k1; ++i) {
- obs2 -= fact[ico[kb + i]];
- }
- if (dlp[itp] > 0.) {
- dspt = obs - obs2 - ddf;
- /* Compute longest path */
- dlp[itp] = 0.;
- f3xact(&nro2, &irn[nrb], &k1, &ico[kb + 1], &dlp[itp],
- &ntot, fact, &iwk[i31], &iwk[i32], &iwk[i33],
- &iwk[i34], &iwk[i35], &iwk[i36], &iwk[i37],
- &iwk[i38], &iwk[i39], &rwk[i310], &rwk[i311], &tol);
- dlp[itp] = min(0., dlp[itp]);
- /* Compute shortest path */
- dsp[itp] = dspt;
- f4xact(&nro2, &irn[nrb], &k1, &ico[kb + 1], &dsp[itp], fact,
- &iwk[i47], &iwk[i41], &iwk[i42], &iwk[i43],
- &iwk[i44], &iwk[i45], &iwk[i46], &rwk[i48], &tol);
- dsp[itp] = min(0., dsp[itp] - dspt);
- /* Use chi-squared approximation? */
- if ((irn[nrb] * ico[kb + 1]) > ntot * emn) {
- ncell = 0;
- for (i = 0; i < nro2; ++i)
- for (j = 1; j <= k1; ++j)
- if (irn[nrb + i] * ico[kb + j] >= ntot * *expect)
- ncell++;
-
- if (ncell * 100 >= k1 * nro2 * *percnt) {
- tmp = 0.;
- for (i = 0; i < nro2; ++i)
- tmp += (fact[irn[nrb + i]] -
- fact[irn[nrb + i] - 1]);
- tmp *= k1 - 1;
- for (j = 1; j <= k1; ++j)
- tmp += (nro2 - 1) * (fact[ico[kb + j]] -
- fact[ico[kb + j] - 1]);
- df = (double) ((nro2 - 1) * (k1 - 1));
- tmp += df * 1.83787706640934548356065947281;
- tmp -= (nro2 * k1 - 1) * (fact[ntot] - fact[ntot - 1]);
- tm[itp] = (obs - dro) * -2. - tmp;
- } else {
- /* tm(itp) set to a flag value */
- tm[itp] = -9876.;
- }
- } else {
- tm[itp] = -9876.;
- }
- }
- obs3 = obs2 - dlp[itp];
- obs2 -= dsp[itp];
- if (tm[itp] == -9876.) {
- chisq = (0);
- } else {
- chisq = (1);
- tmp = tm[itp];
- }
- } else {
- obs2 = obs - drn - dro;
- obs3 = obs2;
- }
-
-L300:
- /* Process node with new PASTP */
- if (pastp <= obs3) {
- /* Update pre */
- *pre += (double) ifreq * exp(pastp + drn);
- } else if (pastp < obs2) {
- if (chisq) {
- df = (double) ((nro2 - 1) * (k1 - 1));
-#ifdef USING_R
- pv = pgamma(fmax2(0., tmp + (pastp + drn) * 2.) / 2.,
- df / 2., /*scale = */ 1.,
- /*lower_tail = */FALSE, /*log_p = */ FALSE);
-#else
- d1 = max(0., tmp + (pastp + drn) * 2.) / 2.;
- d2 = df / 2.;
- pv = 1. - gammds(&d1, &d2, &ifault);
-#endif
- *pre += (double) ifreq * exp(pastp + drn) * pv;
- } else {
- /* Put daughter on queue */
- d1 = pastp + ddf;
- f5xact(&d1, &tol, &kval, &key[jkey], ldkey, &ipoin[jkey],
- &stp[jstp], ldstp, &ifrq[jstp], &ifrq[jstp2],
- &ifrq[jstp3], &ifrq[jstp4], &ifreq, &itop, &ipsh);
- ipsh = (0);
- }
- }
- /* Get next PASTP on chain */
- ipn = ifrq[ipn + ikstp2];
- if (ipn > 0) {
- pastp = stp[ipn + ikstp];
- ifreq = ifrq[ipn + ikstp];
- goto L300;
- }
- /* Generate a new daughter node */
- f7xact(&kmax, &iro[1], &idif[1], &kd, &ks, &iflag);
- if (iflag != 1) {
- goto L150;
- }
-
-L310:
- /* Go get a new mother from stage K */
- do {
- iflag = 1;
- f6xact(&nro, &iro[1], &iflag, &kyy[1], &key[ikkey + 1], ldkey,
- &last, &ipo);
- /* Update pointers */
- if (iflag != 3)
- goto Outer_Loop;
- /* else iflag == 3 : no additional nodes to process */
- --k;
- itop = 0;
- ikkey = jkey - 1;
- ikstp = jstp - 1;
- ikstp2 = jstp2 - 1;
- jkey = *ldkey - jkey + 2;
- jstp = *ldstp - jstp + 2;
- jstp2 = (*ldstp << 1) + jstp;
- for (i = 1; i <= *ldkey << 1; ++i)
- key2[i] = -9999;
-
- } while (k >= 2);
-}
-
-/*
- -----------------------------------------------------------------------
- Name: F3XACT
- Purpose: Computes the shortest path length for a given table.
- Usage: F3XACT (NROW, IROW, NCOL, ICOL, DLP, MM, FACT, ICO, IRO,
- IT, LB, NR, NT, NU, ITC, IST, STV, ALEN, TOL)
- Arguments:
- NROW - The number of rows in the table.(Input)
- IROW - Vector of length NROW containing the row sums
- for the table.(Input)
- NCOL - The number of columns in the table.(Input)
- ICOL - Vector of length K containing the column sums
- for the table.(Input)
- DLP - The longest path for the table.(Output)
- MM - The total count in the table.(Output)
- FACT - Vector containing the logarithms of factorials.(Input)
- ICO - Work vector of length MAX(NROW,NCOL).
- IRO - Work vector of length MAX(NROW,NCOL).
- IT - Work vector of length MAX(NROW,NCOL).
- LB - Work vector of length MAX(NROW,NCOL).
- NR - Work vector of length MAX(NROW,NCOL).
- NT - Work vector of length MAX(NROW,NCOL).
- NU - Work vector of length MAX(NROW,NCOL).
- ITC - Work vector of length 400.
- IST - Work vector of length 400.
- STV - Work vector of length 400.
- ALEN - Work vector of length MAX(NROW,NCOL).
- TOL - Tolerance.(Input)
- -----------------------------------------------------------------------
-*/
-
-void
-f3xact(int *nrow, int *irow, int *ncol, int *icol, double *dlp,
- int *mm, double *fact, int *ico, int *iro, int *it,
- int *lb, int *nr, int *nt, int *nu, int *itc, int *ist,
- double *stv, double *alen, const double *tol)
-{
- /* Initialized data */
- static int ldst = 200;
- static int nst = 0;
- static int nitc = 0;
-
- /* Local variables */
- static int xmin;
- static int i, k;
- static double v;
- static int n11, n12, ii, nn, ks, ic1, ic2, nc1, nn1;
- static int nr1, nco;
- static double val;
- static int nct, ipn, irl, key, lev, itp, nro;
- static double vmn;
- static int nrt, kyy, nc1s;
-
- /* Parameter adjustments */
- --stv;
- --ist;
- --itc;
- --nu;
- --nt;
- --nr;
- --lb;
- --it;
- --iro;
- --ico;
- --icol;
- --irow;
-
- /* Function Body */
- for (i = 0; i <= *ncol; ++i) {
- alen[i] = 0.;
- }
- for (i = 1; i <= 400; ++i) {
- ist[i] = -1;
- }
- /* nrow is 1 */
- if (*nrow <= 1) {
- if (*nrow > 0) {
- *dlp -= fact[icol[1]];
- for (i = 2; i <= *ncol; ++i) {
- *dlp -= fact[icol[i]];
- }
- }
- return;
- }
- /* ncol is 1 */
- if (*ncol <= 1) {
- if (*ncol > 0) {
- *dlp = *dlp - fact[irow[1]] - fact[irow[2]];
- for (i = 3; i <= *nrow; ++i) {
- *dlp -= fact[irow[i]];
- }
- }
- return;
- }
- /* 2 by 2 table */
- if (*nrow * *ncol == 4) {
- n11 = (irow[1] + 1) * (icol[1] + 1) / (*mm + 2);
- n12 = irow[1] - n11;
- *dlp = *dlp - fact[n11] - fact[n12] - fact[icol[1] - n11]
- - fact[icol[2] - n12];
- return;
- }
- /* Test for optimal table */
- val = 0.;
- xmin = (0);
- if (irow[*nrow] <= irow[1] + *ncol) {
- f10act(nrow, &irow[1], ncol, &icol[1], &val, &xmin, fact,
- &lb[1], &nu[1], &nr[1]);
- }
- if (! xmin) {
- if (icol[*ncol] <= icol[1] + *nrow) {
- f10act(ncol, &icol[1], nrow, &irow[1], &val, &xmin, fact,
- &lb[1], &nu[1], &nr[1]);
- }
- }
-
- if (xmin) {
- *dlp -= val;
- return;
- }
- /* Setup for dynamic programming */
- nn = *mm;
- /* Minimize ncol */
- if (*nrow >= *ncol) {
- nro = *nrow;
- nco = *ncol;
- for (i = 1; i <= *nrow; ++i) {
- iro[i] = irow[i];
- }
- ico[1] = icol[1];
- nt[1] = nn - ico[1];
- for (i = 2; i <= *ncol; ++i) {
- ico[i] = icol[i];
- nt[i] = nt[i - 1] - ico[i];
- }
- } else {
- nro = *ncol;
- nco = *nrow;
- ico[1] = irow[1];
- nt[1] = nn - ico[1];
- for (i = 2; i <= *nrow; ++i) {
- ico[i] = irow[i];
- nt[i] = nt[i - 1] - ico[i];
- }
- for (i = 1; i <= *ncol; ++i)
- iro[i] = icol[i];
- }
- /* Initialize pointers */
- vmn = 1e10;
- nc1s = nco - 1;
- irl = 1;
- ks = 0;
- k = ldst;
- kyy = ico[nco] + 1;
-
- LnewNode: /* Setup to generate new node */
-
- lev = 1;
- nr1 = nro - 1;
- nrt = iro[irl];
- nct = ico[1];
- lb[1] = (int) ((double) ((nrt + 1) * (nct + 1)) /
- (double) (nn + nr1 * nc1s + 1) - *tol) - 1;
- nu[1] = (int) ((double) ((nrt + nc1s) * (nct + nr1)) /
- (double) (nn + nr1 + nc1s)) - lb[1] + 1;
- nr[1] = nrt - lb[1];
-
- LoopNode: /* Generate a node */
- --nu[lev];
- if (nu[lev] == 0) {
- if (lev == 1)
- goto L200;
-
- --lev;
- goto LoopNode;
- }
- ++lb[lev];
- --nr[lev];
-L120:
- alen[lev] = alen[lev - 1] + fact[lb[lev]];
- if (lev < nc1s) {
- nn1 = nt[lev];
- nrt = nr[lev];
- ++lev;
- nc1 = nco - lev;
- nct = ico[lev];
- lb[lev] = (int) ((double) ((nrt + 1) * (nct + 1)) /
- (double) (nn1 + nr1 * nc1 + 1) - *tol);
- nu[lev] = (int) ((double) ((nrt + nc1) * (nct + nr1)) /
- (double) (nn1 + nr1 + nc1) - lb[lev] + 1);
- nr[lev] = nrt - lb[lev];
- goto L120;
- }
- alen[nco] = alen[lev] + fact[nr[lev]];
- lb[nco] = nr[lev];
-
- v = val + alen[nco];
- if (nro == 2) {
- /* Only 1 row left */
- v = v + fact[ico[1] - lb[1]] + fact[ico[2] - lb[2]];
- for (i = 3; i <= nco; ++i) {
- v += fact[ico[i] - lb[i]];
- }
- if (v < vmn) {
- vmn = v;
- }
- } else if (nro == 3 && nco == 2) {
- /* 3 rows and 2 columns */
- nn1 = nn - iro[irl] + 2;
- ic1 = ico[1] - lb[1];
- ic2 = ico[2] - lb[2];
- n11 = (iro[irl + 1] + 1) * (ic1 + 1) / nn1;
- n12 = iro[irl + 1] - n11;
- v = v + fact[n11] + fact[n12] + fact[ic1 - n11]
- + fact[ic2 - n12];
- if (v < vmn) {
- vmn = v;
- }
- } else {
- /* Column marginals are new node */
- for (i = 1; i <= nco; ++i) {
- it[i] = ico[i] - lb[i];
- }
- /* Sort column marginals */
- if (nco == 2) {
- if (it[1] > it[2]) {
- ii = it[1];
- it[1] = it[2];
- it[2] = ii;
- }
- } else if (nco == 3) {
- ii = it[1];
- if (ii > it[3]) {
- if (ii > it[2]) {
- if (it[2] > it[3]) {
- it[1] = it[3];
- it[3] = ii;
- } else {
- it[1] = it[2];
- it[2] = it[3];
- it[3] = ii;
- }
- } else {
- it[1] = it[3];
- it[3] = it[2];
- it[2] = ii;
- }
- } else if (ii > it[2]) {
- it[1] = it[2];
- it[2] = ii;
- } else if (it[2] > it[3]) {
- ii = it[2];
- it[2] = it[3];
- it[3] = ii;
- }
- } else {
- isort(&nco, &it[1]);
- }
- /* Compute hash value */
- key = it[1] * kyy + it[2];
- for (i = 3; i <= nco; ++i) {
- key = it[i] + key * kyy;
- }
- if(key < 0)
- error("Bug in FEXACT: gave negative key \n");
-
- /* Table index */
- ipn = key % ldst + 1;
-
- /* Find empty position */
- for (itp = ipn, ii = ks + ipn; itp <= ldst; ++itp, ++ii) {
- if (ist[ii] < 0) {
- goto L180;
- } else if (ist[ii] == key) {
- goto L190;
- }
- }
-
- for (itp = 1, ii = ks + 1; itp <= ipn - 1; ++itp, ++ii) {
- if (ist[ii] < 0) {
- goto L180;
- } else if (ist[ii] == key) {
- goto L190;
- }
- }
-
- error("Stack length exceeded in f3xact");
-
- L180: /* Push onto stack */
- ist[ii] = key;
- stv[ii] = v;
- ++nst;
- ii = nst + ks;
- itc[ii] = itp;
- goto LoopNode;
-
- L190: /* Marginals already on stack */
- stv[ii] = min(v, stv[ii]);
- }
- goto LoopNode;
-
-
- L200: /* Pop item from stack */
- if (nitc > 0) {
- /* Stack index */
- itp = itc[nitc + k] + k;
- --nitc;
- val = stv[itp];
- key = ist[itp];
- ist[itp] = -1;
- /* Compute marginals */
- for (i = nco; i >= 2; --i) {
- ico[i] = key % kyy;
- key /= kyy;
- }
- ico[1] = key;
- /* Set up nt array */
- nt[1] = nn - ico[1];
- for (i = 2; i <= nco; ++i)
- nt[i] = nt[i - 1] - ico[i];
-
- /* Test for optimality (L90) */
- xmin = (0);
- if (iro[nro] <= iro[irl] + nco) {
- f10act(&nro, &iro[irl], &nco, &ico[1], &val, &xmin, fact,
- &lb[1], &nu[1], &nr[1]);
- }
- if (!xmin && ico[nco] <= ico[1] + nro)
- f10act(&nco, &ico[1], &nro, &iro[irl], &val, &xmin, fact,
- &lb[1], &nu[1], &nr[1]);
- if (xmin) {
- if (vmn > val)
- vmn = val;
- goto L200;
- }
- else goto LnewNode;
-
- } else if (nro > 2 && nst > 0) {
- /* Go to next level */
- nitc = nst;
- nst = 0;
- k = ks;
- ks = ldst - ks;
- nn -= iro[irl];
- ++irl;
- --nro;
- goto L200;
- }
-
- *dlp -= vmn;
-}
-
-/*
- -----------------------------------------------------------------------
- Name: F4XACT
- Purpose: Computes the longest path length for a given table.
- Usage: CALL F4XACT (NROW, IROW, NCOL, ICOL, DSP, FACT, ICSTK,
- NCSTK, LSTK, MSTK, NSTK, NRSTK, IRSTK, YSTK,
- TOL)
- Arguments:
- NROW - The number of rows in the table.(Input)
- IROW - Vector of length NROW containing the row sums for the
- table. (Input)
- NCOL - The number of columns in the table. (Input)
- ICOL - Vector of length K containing the column sums for the
- table. (Input)
- DSP - The shortest path for the table.(Output)
- FACT - Vector containing the logarithms of factorials. (Input)
- ICSTK - NCOL by NROW+NCOL+1 work array.
- NCSTK - Work vector of length NROW+NCOL+1.
- LSTK - Work vector of length NROW+NCOL+1.
- MSTK - Work vector of length NROW+NCOL+1.
- NSTK - Work vector of length NROW+NCOL+1.
- NRSTK - Work vector of length NROW+NCOL+1.
- IRSTK - NROW by MAX(NROW,NCOL) work array.
- YSTK - Work vector of length NROW+NCOL+1.
- TOL - Tolerance. (Input)
- -----------------------------------------------------------------------
-*/
-
-void
-f4xact(int *nrow, int *irow, int *ncol, int *icol, double *dsp,
- double *fact, int *icstk, int *ncstk, int *lstk, int *mstk,
- int *nstk, int *nrstk, int *irstk, double *ystk, const double *tol)
-{
- /* System generated locals */
- int ikh;
-
- /* Local variables */
- int i, j, k, l, m, n, mn, ic1, ir1, ict, irt, istk, nco, nro;
- double y, amx;
-
- /* Parameter adjustments */
- irstk -= *nrow + 1;
- --irow;
- icstk -= *ncol + 1;
- --icol;
- --ncstk;
- --lstk;
- --mstk;
- --nstk;
- --nrstk;
- --ystk;
-
- /* Function Body */
- /* Take care of the easy cases first */
- if (*nrow == 1) {
- for (i = 1; i <= *ncol; ++i) {
- *dsp -= fact[icol[i]];
- }
- return;
- }
- if (*ncol == 1) {
- for (i = 1; i <= *nrow; ++i) {
- *dsp -= fact[irow[i]];
- }
- return;
- }
- if (*nrow * *ncol == 4) {
- if (irow[2] <= icol[2]) {
- *dsp = *dsp - fact[irow[2]] - fact[icol[1]]
- - fact[icol[2] - irow[2]];
- } else {
- *dsp = *dsp - fact[icol[2]] - fact[irow[1]]
- - fact[irow[2] - icol[2]];
- }
- return;
- }
- /* initialization before loop */
- for (i = 1; i <= *nrow; ++i) {
- irstk[i + *nrow] = irow[*nrow - i + 1];
- }
- for (j = 1; j <= *ncol; ++j) {
- icstk[j + *ncol] = icol[*ncol - j + 1];
- }
-
- nro = *nrow;
- nco = *ncol;
- nrstk[1] = nro;
- ncstk[1] = nco;
- ystk[1] = 0.;
- y = 0.;
- istk = 1;
- l = 1;
- amx = 0.;
-
- /* First LOOP */
- do {
- ir1 = irstk[istk * *nrow + 1];
- ic1 = icstk[istk * *ncol + 1];
- if (ir1 > ic1) {
- if (nro >= nco) {
- m = nco - 1;n = 2;
- } else {
- m = nro;n = 1;
- }
- } else if (ir1 < ic1) {
- if (nro <= nco) {
- m = nro - 1;n = 1;
- } else {
- m = nco;n = 2;
- }
- } else {
- if (nro <= nco) {
- m = nro - 1;n = 1;
- } else {
- m = nco - 1;n = 2;
- }
- }
-
- L60:
- if (n == 1) {
- i = l; j = 1;
- } else {
- i = 1; j = l;
- }
-
- irt = irstk[i + istk * *nrow];
- ict = icstk[j + istk * *ncol];
- mn = irt;
- if (mn > ict) {
- mn = ict;
- }
- y += fact[mn];
- if (irt == ict) {
- --nro;
- --nco;
- f11act(&irstk[istk * *nrow + 1], &i, &nro,
- &irstk[(istk + 1) * *nrow + 1]);
- f11act(&icstk[istk * *ncol + 1], &j, &nco,
- &icstk[(istk + 1) * *ncol + 1]);
- } else if (irt > ict) {
- --nco;
- f11act(&icstk[istk * *ncol + 1], &j, &nco,
- &icstk[(istk + 1) * *ncol + 1]);
- ikh = irt - ict;
- f8xact(&irstk[istk * *nrow + 1], &ikh, &i,
- &nro, &irstk[(istk + 1) * *nrow + 1]);
- } else {
- --nro;
- f11act(&irstk[istk * *nrow + 1], &i, &nro,
- &irstk[(istk + 1) * *nrow + 1]);
- ikh = ict - irt;
- f8xact(&icstk[istk * *ncol + 1], &ikh, &j,
- &nco, &icstk[(istk + 1) * *ncol + 1]);
- }
-
- if (nro == 1) {
- for (k = 1; k <= nco; ++k) {
- y += fact[icstk[k + (istk + 1) * *ncol]];
- }
- break;
- }
- if (nco == 1) {
- for (k = 1; k <= nro; ++k) {
- y += fact[irstk[k + (istk + 1) * *nrow]];
- }
- break;
- }
-
- lstk[istk] = l;
- mstk[istk] = m;
- nstk[istk] = n;
- ++istk;
- nrstk[istk] = nro;
- ncstk[istk] = nco;
- ystk[istk] = y;
- l = 1;
- } while(1);/* end do */
-
- /* L90:*/
- if (y > amx) {
- amx = y;
- if (*dsp - amx <= *tol) {
- *dsp = 0.;
- return;
- }
- }
-
-L100:
- --istk;
- if (istk == 0) {
- *dsp -= amx;
- if (*dsp - amx <= *tol) {
- *dsp = 0.;
- }
- return;
- }
- l = lstk[istk] + 1;
-
- /* L110: */
- for(;; ++l) {
- if (l > mstk[istk])goto L100;
-
- n = nstk[istk];
- nro = nrstk[istk];
- nco = ncstk[istk];
- y = ystk[istk];
- if (n == 1) {
- if (irstk[l + istk * *nrow] <
- irstk[l - 1 + istk * *nrow])goto L60;
- }
- else if (n == 2) {
- if (icstk[l + istk * *ncol] <
- icstk[l - 1 + istk * *ncol])goto L60;
- }
- }
-}
-
-/*
- -----------------------------------------------------------------------
- Name: F5XACT
- Purpose: Put node on stack in network algorithm.
- Usage: CALL F5XACT (PASTP, TOL, KVAL, KEY, LDKEY, IPOIN, STP,
- LDSTP, IFRQ, NPOIN, NR, NL, IFREQ, ITOP,
- IPSH)
- Arguments:
- PASTP - The past path length.(Input)
- TOL - Tolerance for equivalence of past path lengths. (Input)
- KVAL - Key value. (Input)
- KEY - Vector of length LDKEY containing the key values.(in/out)
- LDKEY - Length of vector KEY. (Input)
- IPOIN - Vector of length LDKEY pointing to the
- linked list of past path lengths. (in/out)
- STP - Vector of length LSDTP containing the
- linked lists of past path lengths. (in/out)
- LDSTP - Length of vector STP. (Input)
- IFRQ - Vector of length LDSTP containing the past path
- frequencies. (in/out)
- NPOIN - Vector of length LDSTP containing the pointers to
- the next past path length. (in/out)
- NR - Vector of length LDSTP containing the right object
- pointers in the tree of past path lengths. (in/out)
- NL - Vector of length LDSTP containing the left object
- pointers in the tree of past path lengths. (in/out)
- IFREQ - Frequency of the current path length. (Input)
- ITOP - Pointer to the top of STP. (Input)
- IPSH - Option parameter. (Input)
- If IPSH is true, the past path length is found in the
- table KEY. Otherwise the location of the past path
- length is assumed known and to have been found in
- a previous call. ==>>>>> USING "static" variables
- -----------------------------------------------------------------------
-*/
-
-void
-f5xact(double *pastp, const double *tol, int *kval, int *key, int *ldkey,
- int *ipoin, double *stp, int *ldstp, int *ifrq, int *npoin,
- int *nr, int *nl, int *ifreq, int *itop, int *ipsh)
-{
- /* Local variables */
- static int itmp, ird, ipn, itp;
- double test1, test2;
-
- /* Parameter adjustments */
- --nl;
- --nr;
- --npoin;
- --ifrq;
- --stp;
- --ipoin;
- --key;
-
- /* Function Body */
- if (*ipsh) {
- /* Convert KVAL to int in range 1, ..., LDKEY. */
- ird = *kval % *ldkey + 1;
- /* Search for an unused location */
- for (itp = ird; itp <= *ldkey; ++itp) {
- if (key[itp] == *kval) {
- goto L40;
- }
- if (key[itp] < 0) {
- goto L30;
- }
- }
- for (itp = 1; itp <= ird - 1; ++itp) {
- if (key[itp] == *kval) {
- goto L40;
- }
- if (key[itp] < 0) {
- goto L30;
- }
- }
- /* Return if KEY array is full */
- /* KH
- prterr(6, "LDKEY is too small for this problem.\n"
- "It is not possible to estimate the value of LDKEY "
- "required,\n"
- "but twice the current value may be sufficient.");
- */
- prterr(6, "LDKEY is too small for this problem.\n"
- "Try increasing the size of the workspace.");
-
- /* Update KEY */
-L30:
- key[itp] = *kval;
- ++(*itop);
- ipoin[itp] = *itop;
- /* Return if STP array full */
- if (*itop > *ldstp) {
- /* KH
- prterr(7, "LDSTP is too small for this problem.\n"
- "It is not possible to estimate the value of LDSTP "
- "required,\n"
- "but twice the current value may be sufficient.");
- */
- prterr(7, "LDSTP is too small for this problem.\n"
- "Try increasing the size of the workspace.");
- }
- /* Update STP, etc. */
- npoin[*itop] = -1;
- nr[*itop] = -1;
- nl[*itop] = -1;
- stp[*itop] = *pastp;
- ifrq[*itop] = *ifreq;
- return;
- }
-
- /* Find location, if any, of pastp */
-L40:
- ipn = ipoin[itp];
- test1 = *pastp - *tol;
- test2 = *pastp + *tol;
-
-L50:
- if (stp[ipn] < test1) {
- ipn = nl[ipn];
- if (ipn > 0) {
- goto L50;
- }
- } else if (stp[ipn] > test2) {
- ipn = nr[ipn];
- if (ipn > 0) {
- goto L50;
- }
- } else {
- ifrq[ipn] += *ifreq;
- return;
- }
- /* Return if STP array full */
- ++(*itop);
- if (*itop > *ldstp) {
- /*
- prterr(7, "LDSTP is too small for this problem.\n"
- "It is not possible to estimate the value of LDSTP "
- "required,\n"
- "but twice the current value may be sufficient.");
- */
- prterr(7, "LDSTP is too small for this problem.\n"
- "Try increasing the size of the workspace.");
- return;
- }
- /* Find location to add value */
- ipn = ipoin[itp];
- itmp = ipn;
-
-L60:
- if (stp[ipn] < test1) {
- itmp = ipn;
- ipn = nl[ipn];
- if (ipn > 0) {
- goto L60;
- } else {
- nl[itmp] = *itop;
- }
- } else if (stp[ipn] > test2) {
- itmp = ipn;
- ipn = nr[ipn];
- if (ipn > 0) {
- goto L60;
- } else {
- nr[itmp] = *itop;
- }
- }
- /* Update STP, etc. */
- npoin[*itop] = npoin[itmp];
- npoin[itmp] = *itop;
- stp[*itop] = *pastp;
- ifrq[*itop] = *ifreq;
- nl[*itop] = -1;
- nr[*itop] = -1;
-}
-
-/*
- -----------------------------------------------------------------------
- Name: F6XACT
- Purpose: Pop a node off the stack.
- Usage: CALL F6XACT (NROW, IROW, IFLAG, KYY, KEY, LDKEY, LAST, IPN)
- Arguments:
- NROW - The number of rows in the table.(Input)
- IROW - Vector of length nrow containing the row sums on
- output.(Output)
- IFLAG - Set to 3 if there are no additional nodes to process.
- (Output)
- KYY - Constant mutlipliers used in forming the hash
- table key.(Input)
- KEY - Vector of length LDKEY containing the hash table
- keys.(In/out)
- LDKEY - Length of vector KEY.(Input)
- LAST - Index of the last key popped off the stack.(In/out)
- IPN - Pointer to the linked list of past path lengths.(Output)
- -----------------------------------------------------------------------
-*/
-void
-f6xact(int *nrow, int *irow, int *iflag, int *kyy, int *key, int
- *ldkey, int *last, int *ipn)
-{
- int kval, j;
-
- /* Parameter adjustments */
- --key;
- --kyy;
- --irow;
-
- /* Function Body */
-L10:
- ++(*last);
- if (*last <= *ldkey) {
- if (key[*last] < 0) {
- goto L10;
- }
- /* Get KVAL from the stack */
- kval = key[*last];
- key[*last] = -9999;
- for (j = *nrow; j >= 2; --j) {
- irow[j] = kval / kyy[j];
- kval -= irow[j] * kyy[j];
- }
- irow[1] = kval;
- *ipn = *last;
- } else {
- *last = 0;
- *iflag = 3;
- }
- return;
-}
-
-/*
- -----------------------------------------------------------------------
- Name: F7XACT
- Purpose: Generate the new nodes for given marinal totals.
- Usage: CALL F7XACT (NROW, IMAX, IDIF, K, KS, IFLAG)
- Arguments:
- NROW - The number of rows in the table.(Input)
- IMAX - The row marginal totals.(Input)
- IDIF - The column counts for the new column.(in/out)
- K - Indicator for the row to decrement.(in/out)
- KS - Indicator for the row to increment.(in/out)
- IFLAG - Status indicator.(Output)
- If IFLAG is zero, a new table was generated. For
- IFLAG = 1, no additional tables could be generated.
- -----------------------------------------------------------------------
-*/
-
-void
-f7xact(int *nrow, int *imax, int *idif, int *k, int *ks,
- int *iflag)
-
-{
- int i, m, k1, mm;
-
- /* Parameter adjustments */
- --idif;
- --imax;
-
- /* Function Body */
- *iflag = 0;
- /* Find node which can be incremented, ks */
- if (*ks == 0)
- do {
- ++(*ks);
- } while (idif[*ks] == imax[*ks]);
-
- /* Find node to decrement (>ks) */
- if (idif[*k] > 0 && *k > *ks) {
- --idif[*k];
- do {
- --(*k);
- } while(imax[*k] == 0);
-
- m = *k;
-
- /* Find node to increment (>=ks) */
- while (idif[m] >= imax[m]) {
- --m;
- }
- ++idif[m];
- /* Change ks */
- if (m == *ks) {
- if (idif[m] == imax[m]) {
- *ks = *k;
- }
- }
- }
- else {
- Loop:
- /* Check for finish */
- for (k1 = *k + 1; k1 <= *nrow; ++k1) {
- if (idif[k1] > 0) {
- goto L70;
- }
- }
- *iflag = 1;
- return;
-
- L70:
- /* Reallocate counts */
- mm = 1;
- for (i = 1; i <= *k; ++i) {
- mm += idif[i];
- idif[i] = 0;
- }
- *k = k1;
-
- do {
- --(*k);
- m = min(mm, imax[*k]);
- idif[*k] = m;
- mm -= m;
- } while (mm > 0 && *k != 1);
-
- /* Check that all counts reallocated */
- if (mm > 0) {
- if (k1 != *nrow) {
- *k = k1;
- goto Loop;
- }
- *iflag = 1;
- return;
- }
- /* Get ks */
- --idif[k1];
- *ks = 0;
- do {
- ++(*ks);
- if (*ks > *k) {
- return;
- }
- } while (idif[*ks] >= imax[*ks]);
- }
-}
-
-/*
- -----------------------------------------------------------------------
- Name: F8XACT
- Purpose: Routine for reducing a vector when there is a zero
- element.
- Usage: CALL F8XACT (IROW, IS, I1, IZERO, NEW)
- Arguments:
- IROW - Vector containing the row counts.(Input)
- IS - Indicator.(Input)
- I1 - Indicator.(Input)
- IZERO - Position of the zero.(Input)
- NEW - Vector of new row counts.(Output)
- -----------------------------------------------------------------------
-*/
-
-void
-f8xact(int *irow, int *is, int *i1, int *izero, int *knew)
-{
- int i;
-
- /* Parameter adjustments */
- --knew;
- --irow;
-
- /* Function Body */
- for (i = 1; i < *i1; ++i)
- knew[i] = irow[i];
-
- for (i = *i1; i <= *izero - 1; ++i) {
- if (*is >= irow[i + 1])
- break;
- knew[i] = irow[i + 1];
- }
-
- knew[i] = *is;
-
- for(;;) {
- ++i;
- if (i > *izero)
- return;
- knew[i] = irow[i];
- }
-}
-
-/*
- -----------------------------------------------------------------------
- Name: F9XACT
- Purpose: Computes the log of a multinomial coefficient.
- Usage: F9XACT(N, MM, IR, FACT)
- Arguments:
- N - Length of IR.(Input)
- MM - Number for factorial in numerator.(Input)
- IR - Vector of length N containing the numbers for
- the denominator of the factorial.(Input)
- FACT - Table of log factorials.(Input)
- F9XACT - The log of the multinomal coefficient.(Output)
- -----------------------------------------------------------------------
-*/
-
-double
-f9xact(int *n, int *mm, int *ir, double *fact)
-{
- double d;
- int k;
-
- d = fact[*mm];
- for (k = 0; k < *n; k++)
- d -= fact[ir[k]];
- return d;
-}
-
-/*
- -----------------------------------------------------------------------
- Name: F10ACT
- Purpose: Computes the shortest path length for special tables.
- Usage: F10ACT (NROW, IROW, NCOL, ICOL, VAL, XMIN, FACT, ND, NE, M)
- Arguments:
- NROW - The number of rows in the table.(Input)
- IROW - Vector of length NROW containing the row totals.(Input)
- NCOL - The number of columns in the table. (Input)
- ICO - Vector of length NCOL containing the column totals.(Input)
- VAL - The shortest path. (Output)
- XMIN - Set to true if shortest path obtained. (Output)
- FACT - Vector containing the logarithms of factorials. (Input)
- ND - Workspace vector of length NROW.(Input)
- NE - Workspace vector of length NCOL.(Input)
- M - Workspace vector of length NCOL.(Input)
-
- Chapter: STAT/LIBRARY Categorical and Discrete Data Analysis
- -----------------------------------------------------------------------
-*/
-
-void
-f10act(int *nrow, int *irow, int *ncol, int *icol, double *val,
- int *xmin, double *fact, int *nd, int *ne, int *m)
-{
- /* Local variables */
- int i, is, ix, nrw1;
-
- /* Parameter adjustments */
- --m;
- --ne;
- --nd;
- --icol;
- --irow;
-
- /* Function Body */
- for (i = 1; i <= *nrow - 1; ++i)
- nd[i] = 0;
-
- is = icol[1] / *nrow;
- ix = icol[1] - *nrow * is;
- ne[1] = is;
- m[1] = ix;
- if (ix != 0)
- ++nd[ix];
-
- for (i = 2; i <= *ncol; ++i) {
- ix = icol[i] / *nrow;
- ne[i] = ix;
- is += ix;
- ix = icol[i] - *nrow * ix;
- m[i] = ix;
- if (ix != 0)
- ++nd[ix];
- }
-
- for (i = *nrow - 2; i >= 1; --i)
- nd[i] += nd[i + 1];
-
- ix = 0;
- nrw1 = *nrow + 1;
- for (i = *nrow; i >= 2; --i) {
- ix = ix + is + nd[nrw1 - i] - irow[i];
- if (ix < 0)
- return;
- }
-
- for (i = 1; i <= *ncol; ++i) {
- ix = ne[i];
- is = m[i];
- *val = *val + is * fact[ix + 1] + (*nrow - is) * fact[ix];
- }
- *xmin = (1);
-
- return;
-}
-
-/*
- -----------------------------------------------------------------------
- Name: F11ACT
- Purpose: Routine for revising row totals.
- Usage: CALL F11ACT (IROW, I1, I2, KNEW)
- Arguments:
- IROW - Vector containing the row totals.(Input)
- I1 - Indicator.(Input)
- I2 - Indicator. (Input)
- KNEW - Vector containing the row totals.(Output)
- -----------------------------------------------------------------------
-*/
-void
-f11act(int *irow, int *i1, int *i2, int *knew)
-{
- int i;
-
- /* Parameter adjustments */
- --knew;
- --irow;
-
- for (i = 1; i <= (*i1 - 1); ++i)knew[i] = irow[i];
- for (i = *i1; i <= *i2; ++i)knew[i] = irow[i + 1];
-
- return;
-}
-
-void prterr(int icode, char *mes)
-{ error("FEXACT error "+int2str(icode)); }
-
-/*
- -----------------------------------------------------------------------
- Name: iwork
- Purpose: Routine for allocating workspace.
- Usage: iwork (iwkmax, iwkpt, number, itype)
- Arguments:
- iwkmax - Maximum (int) amount of workspace.(Input)
- iwkpt - Amount of (int) workspace currently allocated.(in/out)
- number - Number of elements of workspace desired.(Input)
- itype - Workspace type.(Input)
- ITYPE TYPE
- 2 integer
- 3 float
- 4 double
- iwork(): Index in rwrk, dwrk, or iwrk of the beginning of
- the first free element in the workspace array.(Output)
- -----------------------------------------------------------------------
-*/
-int
-iwork(int iwkmax, int *iwkpt, int number, int itype)
-{
- int i;
-
- i = *iwkpt;
- if (itype == 2 || itype == 3)
- *iwkpt += number;
- else { /* double */
- if (i % 2 != 0)
- ++i;
- *iwkpt += (number << 1);
- i /= 2;
- }
- if (*iwkpt > iwkmax)
- prterr(40, "Out of workspace.");
-
- return i;
-}
-
-#ifndef USING_R
-
-void isort(int *n, int *ix)
-{
- /*
- -----------------------------------------------------------------------
- Name: ISORT
- Purpose: Shell sort for an int vector.
- Usage: CALL ISORT (N, IX)
- Arguments:
- N - Lenth of vector IX.(Input)
- IX - Vector to be sorted.(in/out)
- -----------------------------------------------------------------------
- */
- static int ikey, i, j, m, il[10], kl, it, iu[10], ku;
-
- /* Parameter adjustments */
- --ix;
-
- /* Function Body */
- m = 1;
- i = 1;
- j = *n;
-
-L10:
- if (i >= j) {
- goto L40;
- }
- kl = i;
- ku = j;
- ikey = i;
- ++j;
- /* Find element in first half */
-L20:
- ++i;
- if (i < j) {
- if (ix[ikey] > ix[i]) {
- goto L20;
- }
- }
- /* Find element in second half */
-L30:
- --j;
- if (ix[j] > ix[ikey]) {
- goto L30;
- }
- /* Interchange */
- if (i < j) {
- it = ix[i];
- ix[i] = ix[j];
- ix[j] = it;
- goto L20;
- }
- it = ix[ikey];
- ix[ikey] = ix[j];
- ix[j] = it;
- /* Save upper and lower subscripts of the array yet to be sorted */
- if (m < 11) {
- if (j - kl < ku - j) {
- il[m - 1] = j + 1;
- iu[m - 1] = ku;
- i = kl;
- --j;
- } else {
- il[m - 1] = kl;
- iu[m - 1] = j - 1;
- i = j + 1;
- j = ku;
- }
- ++m;
- goto L10;
- } else {
- prterr(20, "This should never occur.");
- }
- /* Use another segment */
-L40:
- --m;
- if (m == 0) {
- return;
- }
- i = il[m - 1];
- j = iu[m - 1];
- goto L10;
-}
-
-double gammds(double *y, double *p, int *ifault)
-{
- /*
- -----------------------------------------------------------------------
- Name: GAMMDS
- Purpose: Cumulative distribution for the gamma distribution.
- Usage: PGAMMA (Q, ALPHA,IFAULT)
- Arguments:
- Q - Value at which the distribution is desired. (Input)
- ALPHA - Parameter in the gamma distribution. (Input)
- IFAULT - Error indicator.(Output)
- IFAULT DEFINITION
- 0 No error
- 1 An argument is misspecified.
- 2 A numerical error has occurred.
- PGAMMA - The cdf for the gamma distribution with parameter alpha
- evaluated at Q. (Output)
- -----------------------------------------------------------------------
-
- Algorithm AS 147 APPL. Statist. (1980) VOL. 29, P. 113
-
- Computes the incomplete gamma integral for positive parameters Y, P
- using and infinite series.
- */
-
- static double a, c, f, g;
- static int ifail;
-
- /* Checks for the admissibility of arguments and value of F */
- *ifault = 1;
- g = 0.;
- if (*y <= 0. || *p <= 0.) {
- return g;
- }
- *ifault = 2;
-
- /*
- ALOGAM is natural log of gamma function no need to test ifail as
- an error is impossible
- */
-
- a = *p + 1.;
- f = exp(*p * log(*y) - alogam(&a, &ifail) - *y);
- if (f == 0.) {
- return g;
- }
- *ifault = 0;
-
- /* Series begins */
- c = 1.;
- g = 1.;
- a = *p;
-L10:
- a += 1.;
- c = c * *y / a;
- g += c;
- if (c / g > 1e-6) {
- goto L10;
- }
- g *= f;
- return g;
-}
-
-/*
- -----------------------------------------------------------------------
- Name: ALOGAM
- Purpose: Value of the log-gamma function.
- Usage: ALOGAM (X, IFAULT)
- Arguments:
- X - Value at which the log-gamma function is to be evaluated.
- (Input)
- IFAULT - Error indicator. (Output)
- IFAULT DEFINITION
- 0 No error
- 1 X < 0
- ALGAMA - The value of the log-gamma function at XX. (Output)
- -----------------------------------------------------------------------
-
- Algorithm ACM 291, Comm. ACM. (1966) Vol. 9, P. 684
-
- Evaluates natural logarithm of gamma(x) for X greater than zero.
-*/
-
-double alogam(double *x, int *ifault)
-{
- /* Initialized data */
-
- static double a1 = .918938533204673;
- static double a2 = 5.95238095238e-4;
- static double a3 = 7.93650793651e-4;
- static double a4 = .002777777777778;
- static double a5 = .083333333333333;
-
- /* Local variables */
- static double f, y, z;
-
- *ifault = 1;
- if (*x < 0.) {
- return(0.);
- }
- *ifault = 0;
- y = *x;
- f = 0.;
- if (y >= 7.) {
- goto L30;
- }
- f = y;
-L10:
- y += 1.;
- if (y >= 7.) {
- goto L20;
- }
- f *= y;
- goto L10;
-L20:
- f = -log(f);
-L30:
- z = 1. / (y * y);
- return(f + (y - .5) * log(y) - y + a1 +
- (((-a2 * z + a3) * z - a4) * z + a5) / y);
-}
-
-#endif /* not USING_R */
-
Deleted: trunk/packages/plink/trunk/nlist.cpp
===================================================================
--- trunk/packages/plink/trunk/nlist.cpp 2008-04-09 09:43:11 UTC (rev 1725)
+++ trunk/packages/plink/trunk/nlist.cpp 2008-04-09 10:09:22 UTC (rev 1726)
@@ -1,260 +0,0 @@
-
-
-//////////////////////////////////////////////////////////////////
-// //
-// PLINK (c) 2005-2007 Shaun Purcell //
-// //
-// This file is distributed under the GNU General Public //
-// License, Version 2. Please see the file COPYING for more //
-// details //
-// //
-//////////////////////////////////////////////////////////////////
-
-#include <algorithm>
-#include <string>
-#include <cstdio>
-#include <vector>
-
-#include "nlist.h"
-
-vector<int> NList::deparseNumberList(string input)
-{
-
- // We have a string that should contain integers, possibily
- // separated by "," and "-" characters
-
- // Use 1-N coding up until the last moment, instead of
- // standard 0..N-1 coding
-
- firstWord = "1";
- lastWord = int2str(maxcat);
-
- vector<string> tok = tokenize(input);
- vector<int> nums;
- for (int i=0; i<tok.size(); i++)
- {
- int j;
- if ( tok[i] == range_string )
- nums.push_back(-1);
- else
- {
- if ( ! from_string<int>( j, tok[i] , std::dec))
- continue;
- nums.push_back(j);
- }
- }
- return expandNumberList(nums);
-}
-
-
-vector<int> NList::deparseStringList(string input, map<string,int> * mapping)
-{
-
- // Assume map contains a 0..N-1 coding in the map
-
- map<string,int>::iterator im = mapping->begin();
- while ( im != mapping->end() )
- {
- int i = im->second;
-
- if ( i == 0 )
- firstWord = im->first;
-
- if ( i == maxcat - 1 )
- lastWord = im->first;
-
- ++im;
- }
-
-
- ////////////////////////////////////////////////////////////////
- // Convert string codes to numbers, then call deparseNumberList
-
- // But incrememnt to 1..N coding here, i.e. so that it is
- // similar process to the human-friendly 1..N coding for a
- // numeric list
-
- vector<string> tok = tokenize(input);
- vector<int> nums;
- for (int i=0; i<tok.size(); i++)
- {
- map<string,int>::iterator im = mapping->find(tok[i]);
- if ( im != mapping->end() )
- nums.push_back(im->second + 1);
- else if ( tok[i] == range_string )
- nums.push_back(-1);
- else
- error("Cannot find value: " + tok[i] + "\n");
- }
- return expandNumberList(nums);
-}
-
-
-
-vector<int> NList::expandNumberList(vector<int> & nlist)
-{
-
- // Convert
- vector<int> n;
-
- //////////////////
- // Check ranges
-
- for (int i=0; i<nlist.size(); i++)
- {
- if ( nlist[i] == -1 )
- {
- if ( nlist[i-1] > nlist[i+1] )
- {
- int tmp = nlist[i+1];
- nlist[i+1] = nlist[i-1];
- nlist[i-1] = tmp;
- }
- }
- }
-
-
- //////////////////
- // Expand ranges
-
- for (int i=0; i<nlist.size(); i++)
- {
-
- if ( nlist[i]>0 )
- {
- // Only add valid codes
- if ( nlist[i] <= maxcat )
- n.push_back(nlist[i]);
- }
- else
- {
- int start = nlist[i-1]+1;
- int end = nlist[i+1]-1;
- if ( end > maxcat )
- end = maxcat;
-
- for (int j=start; j<=end; j++)
- n.push_back(j);
- }
- }
-
- // Sort and uniquify
- stable_sort(n.begin(),n.end());
- vector<int>::iterator ne = unique(n.begin(),n.end());
- n.erase(ne, n.end());
-
- // Shift to 0..N-1 coding
- for (int i=0; i<n.size(); i++)
- --n[i];
-
-
- // Or is it a negative complement?
- if ( negmode )
- {
-
- vector<bool> k(maxcat,false);
- for (int i=0; i<n.size(); i++)
- k[ n[i] ] = true;
-
- vector<int> n2 = n;
- n.clear();
-
- for (int i=0; i<maxcat; i++)
- {
- if ( ! k[i] )
- {
- n.push_back(i);
- }
- }
- }
-
- return n;
-
-}
-
-
-vector<string> NList::tokenize(string s)
-{
-
- vector<string> t;
-
- string word = "";
- bool range_set = false;
- bool number_given = false;
-
- for (int i=0; i<s.length(); i++)
- {
-
- // Skip spaces
- if (s[i] == ' ')
- continue;
-
- if ( s[i] == ',' || s[i] == range_char )
- {
-
- // If we've built up a meaningful word,
- // we can add it now
-
- if ( word != "" )
- {
- t.push_back(word);
- word = "";
- number_given = true;
- range_set = false;
- }
-
-
- // Start a range?
- if ( s[i]== range_char )
- {
-
- // Can't do twice
- if (range_set)
- error("Invalid list: " + s + "\n");
-
- range_set = true;
-
- // But was a start point given? Make one
- // if not
-
- if ( ! number_given )
- {
- t.push_back("1");
- word="";
- }
-
- number_given = false;
-
- t.push_back(range_string);
-
- }
-
- // And if the "-" is also the last
- // character, we need an end point
-
- if ( s[i]== range_char && i == s.length()-1 )
- t.push_back( lastWord );
-
- // Here, fill in here
- if ( s[i]==',')
- {
- number_given = false;
- if ( range_set )
- {
- range_set = false;
- t.push_back( lastWord );
- }
- }
- }
- else if ( i == s.length()-1 )
- {
- // Is this a last number?
- t.push_back(word+s[i]);
- number_given = true;
- }
- else
- word += s[i];
- }
-
- return t;
-}
Deleted: trunk/packages/plink/trunk/parse.cpp
===================================================================
--- trunk/packages/plink/trunk/parse.cpp 2008-04-09 09:43:11 UTC (rev 1725)
+++ trunk/packages/plink/trunk/parse.cpp 2008-04-09 10:09:22 UTC (rev 1726)
@@ -1,3387 +0,0 @@
-
-
-//////////////////////////////////////////////////////////////////
-// //
-// PLINK (c) 2005-2008 Shaun Purcell //
-// //
-// This file is distributed under the GNU General Public //
-// License, Version 2. Please see the file COPYING for more //
-// details //
-// //
-//////////////////////////////////////////////////////////////////
-
-
-#include <iostream>
-#include <string>
-#include <cmath>
-#include <cstdlib>
-#include "options.h"
-#include "helper.h"
-
-
-void getOutputFilename(CArgs & a)
-{
-
- ////////////////////////////////////////
- // Insert commands from a file
-
- if (a.find("--script"))
- {
- string f = a.value("--script");
- a.fromScript(f);
- }
-
- if (a.find("--rerun"))
- {
- string f = a.value("--rerun");
- a.fromPriorLog(f);
- }
-
- if (a.find("--out"))
- {
- par::output_file_name = a.value("--out");
- }
-
- if (a.find("--gplink"))
- {
- if ( par::cli )
- error("Cannot specify --gplink and --interactive");
- par::silent = true;
- par::gplink = true;
- }
-
- if (a.find("--silent"))
- {
- if ( par::cli )
- error("Cannot specify --silent and --interactive");
-
- par::silent = true;
- }
-
- if (a.find("--interactive"))
- {
- if ( a.find("--script") )
- error("Cannot specify --script and --interactive");
- par::cli = true;
- }
-}
-
-void setOptions(CArgs & a)
-{
-
-
- /////////////////////////////////////
- // Web-based functions
-
- if (a.find("--noweb")) par::web_check = false;
-
- if (a.find("--lookup"))
- {
- par::lookup = true;
- par::lookup_single_snp = true;
- par::lookup_snp = a.value("--lookup");
- }
-
- if (a.find("--lookup-list"))
- {
- par::lookup = true;
- par::lookup_to_file = true;
- par::lookup_single_snp = false;
- par::lookup_snp = a.value("--lookup-list");
- }
-
- if (a.find("--lookup-save"))
- par::lookup_to_file = true;
-
- if (a.find("--lookup-gene"))
- {
- par::lookup = true;
- par::lookup_to_file = true;
- par::lookup_gene = true;
- par::lookup_gene_name = a.value("--lookup-gene");
- }
-
- if (a.find("--R"))
- {
- par::run_R_script = true;
- par::R_script = a.value("--R");
- }
-
- if (a.find("--R-debug"))
- par::run_R_write_script = true;
-
- if (a.find("--R-chisq"))
- par::run_R_chisq = true;
-
- if (a.find("--R-z"))
- par::run_R_z = true;
-
- if (a.find("--R-nsnps"))
- par::run_R_nsnps = a.value_int("--R-nsnps");
-
- if (a.find("--seed"))
- par::random_seed = a.value_lui("--seed");
-
- ////////////////////////////////////////
- // A data-generation option?
-
- bool makedata = false;
-
- if (a.find("--recode"))
- {
- makedata = true;
- par::recode = true;
- // and unless otherwise specified, set GENO = 1 and MAF = 0
- if (!a.find("--maf")) par::min_af = 0.0;
- if (!a.find("--geno")) par::MAX_GENO_MISSING = 1;
- if (!a.find("--mind")) par::MAX_IND_MISSING = 1;
- }
-
- if (a.find("--recode12"))
- {
- makedata = true;
- par::recode_12 = true;
- // and unless otherwise specified, set GENO = 1 and MAF = 0
- if (!a.find("--maf")) par::min_af = 0.0;
- if (!a.find("--geno")) par::MAX_GENO_MISSING = 1;
- if (!a.find("--mind")) par::MAX_IND_MISSING = 1;
- }
-
- if (a.find("--recodeHV"))
- {
- makedata = true;
- par::recode_HV = true;
- // and unless otherwise specified, set GENO = 1 and MAF = 0
- if (!a.find("--maf")) par::min_af = 0.0;
- if (!a.find("--geno")) par::MAX_GENO_MISSING = 1;
- if (!a.find("--mind")) par::MAX_IND_MISSING = 1;
- }
-
- if (a.find("--recode-whap"))
- {
- makedata = true;
- par::recode_whap = true;
- // and unless otherwise specified, set GENO = 1 and MAF = 0
- if (!a.find("--maf")) par::min_af = 0.0;
- if (!a.find("--geno")) par::MAX_GENO_MISSING = 1;
- if (!a.find("--mind")) par::MAX_IND_MISSING = 1;
- }
-
- if (a.find("--recode-fastphase"))
- {
- makedata = true;
- par::recode_fastphase = true;
- // and unless otherwise specified, set GENO = 1 and MAF = 0
- if (!a.find("--maf")) par::min_af = 0.0;
- if (!a.find("--geno")) par::MAX_GENO_MISSING = 1;
- if (!a.find("--mind")) par::MAX_IND_MISSING = 1;
- }
-
- if (a.find("--recode-structure"))
- {
- makedata = true;
- par::recode_structure = true;
- // and unless otherwise specified, set GENO = 1 and MAF = 0
- if (!a.find("--maf")) par::min_af = 0.0;
- if (!a.find("--geno")) par::MAX_GENO_MISSING = 1;
- if (!a.find("--mind")) par::MAX_IND_MISSING = 1;
- }
-
- if (a.find("--recode-bimbam"))
- {
- makedata = true;
- par::recode_bimbam = true;
- // and unless otherwise specified, set GENO = 1 and MAF = 0
- if (!a.find("--maf")) par::min_af = 0.0;
- if (!a.find("--geno")) par::MAX_GENO_MISSING = 1;
- if (!a.find("--mind")) par::MAX_IND_MISSING = 1;
- }
-
- if (a.find("--recodeAD"))
- {
- makedata = true;
- par::recode_AD = true;
- // and unless otherwise specified, set GENO = 1 and MAF = 0
- if (!a.find("--maf")) par::min_af = 0.0;
- if (!a.find("--geno")) par::MAX_GENO_MISSING = 1;
- if (!a.find("--mind")) par::MAX_IND_MISSING = 1;
- }
-
-
- if (a.find("--recodeA"))
- {
- makedata = true;
- par::recode_AD = true;
- par::recode_AD_Aonly = true;
- // and unless otherwise specified, set GENO = 1 and MAF = 0
- if (!a.find("--maf")) par::min_af = 0.0;
- if (!a.find("--geno")) par::MAX_GENO_MISSING = 1;
- if (!a.find("--mind")) par::MAX_IND_MISSING = 1;
- }
-
- if (a.find("--recode-allele"))
- {
- if ( ! par::recode_AD )
- error("Must specify --recodeA or --recodeAD with --recode-allele");
- par::recode_allele_coding = true;
- par::recode_allele_coding_file = a.value("--recode-allele");
- }
-
- if (a.find("--make-bed"))
- {
- makedata = true;
- par::write_bitfile = true;
-
- // and unless otherwise specified, set GENO = 1 and MAF = 0
- if (!a.find("--maf")) par::min_af = 0.0;
- if (!a.find("--geno")) par::MAX_GENO_MISSING = 1;
- if (!a.find("--mind")) par::MAX_IND_MISSING = 1;
- }
-
-
- //////////////////////////////////////////////
- // Modifiers of main data generation commands
-
- if (a.find("--alleleACGT")) par::recode_ACGT = true;
-
- if (a.find("--allele1234")) par::recode_1234 = true;
-
- if (a.find("--transpose"))
- {
- if ( ! ( a.find("--recode") || a.find("--recode12") ) )
- error("--transpose requires --recode or --recode12");
- par::recode_transpose = true;
- }
-
- if (a.find("--tab"))
- {
- if ( ! makedata )
- error("You can only specify --tab with a --recode* option");
- par::recode_delimit = "\t";
- }
-
-
- ////////////////////////////////////////////////////
- // Convenience functions and obligatory missingness
-
- // Zero out genotypes for a particular SNP / cluster
-
- if ( a.find("--zero-cluster") )
- {
- if ( ! a.find("--within") )
- error("You must specify --within with --zero-cluster");
- par::zero_cluster = true;
- par::zero_cluster_filename = a.value("--zero-cluster");
- }
-
- // Perform genotyping rate calculations, but allow for
- // obligatory missingness
-
- if ( a.find("--oblig-missing") )
- {
- if ( ! a.find("--oblig-clusters") )
- error("You must specify --oblig-clusters with --oblig-missing");
- par::oblig_missing = true;
- par::oblig_missing_filename = a.value("--oblig-missing");
- }
-
- if ( a.find("--oblig-clusters") )
- {
- if ( ! a.find("--oblig-missing") )
- error("You must specify --oblig-missing with --oblig-clusters");
- par::oblig_clusters_filename = a.value("--oblig-clusters");
- }
-
- // Multiple category phenotype
-
- if ( a.find("--loop-assoc") )
- {
- if ( a.find("--within") )
- error("You cannot specify --within with --loop-assoc");
-
- par::all_pheno = true;
- par::assoc_test = true;
- par::loop_over = true;
- par::include_cluster_from_file = true;
- par::include_cluster_filename = a.value("--loop-assoc");
- }
-
-
-
- ////////////////////////////////////////
- // TODO list
-
- if (a.find("--todo"))
- {
- cout << "TODO list\n"
- << " * Make --min/--max thresholds apply w/ read-segment\n"
- << " * Odds ratio for DFAM\n"
- << " * X chromosome phasing\n"
- << " * Check Hotelling's T2 imputation of missing genotypes\n"
- << " * Add improved familial phasing to haplotype tests\n"
- << " * Add OR and CI to haplotype tests\n"
- << " * Add CMH and/or permutation to haplotype tests\n"
- << " \n"
- << " * Include weighted association test, for haplotypes (effN)\n"
- << " * Phasing option to merge with current data\n"
- << " \n"
- << " * Check new X models\n"
- << " * Check new MM tests\n"
- << " \n";
- exit(0);
- }
-
-
- ///////////////////////////////////////
- // Output options
-
- if (a.find("--flag")) par::flag = true;
-
- if (a.find("--verbose")) par::verbose = true;
-
- if (a.find("--pedigree")) par::dumpped = true;
-
- if (a.find("--debug")) par::debug = true;
-
-
- ///////////////////////////////////////
- // IBD analyses, output options
-
- if (a.find("--multi")) par::multi_output = true;
-
- if (a.find("--gmulti")) par::gmulti_output = true;
-
- if (a.find("--summarise-ibd")) par::summary_ibd_output = true;
-
- if (a.find("--genome"))
- {
- // By default, include everybody
- if (!a.find("--min"))
- {
- par::MIN_PIHAT = 0;
- par::pihat_filter = false;
- }
-
- par::genome_output = true;
- }
-
- if (a.find("--impossible"))
- {
- if (!a.find("--genome"))
- error("Can only specify --impossible with --genome");
- par::show_impossible_IBD = false;
- }
-
- if (a.find("--nudge"))
- {
- if (!a.find("--genome"))
- error("Can only specify --nudge with --genome");
- if (a.find("--impossible"))
- error("Cannot specify --impossible and --nudge together");
- par::nudge = true;
- }
-
- if (a.find("--unbounded"))
- {
- par::bound = false;
- par::MIN_PIHAT = -1;
- par::MAX_PIHAT = 1;
- }
-
- if (a.find("--genome-lists"))
- {
- if ( ! ( a.find("--genome") || a.find("--segment") ) )
- error("Must specify --genome or --segment with --genome-lists");
- par::genome_2sets = true;
- vector<string> s = a.value("--genome-lists",2);
- par::genome_setlist1 = s[0];
- par::genome_setlist2 = s[1];
- }
-
- if (a.find("--segment-within"))
- {
- if ( !a.find("--within"))
- error("You need to specify a --within {clusterfile}");
- par::IBD_within = true;
- }
-
- if (a.find("--genome-test"))
- {
- par::genome_test = true;
- par::genome_test_threshold = a.value_double("--genome-test");
- par::plink = true;
- par::permute = true;
- par::adaptive_perm = false;
- par::replicates = 100000;
- }
-
- if (a.find("--ibs-test"))
- {
- par::ibs_test = true;
- par::permute = true;
- par::adaptive_perm = false;
- par::replicates = 100000;
- }
-
- if (a.find("--ibs-test2"))
- {
- par::ibs_test = true;
- par::ibs_test_method2 = true;
- par::permute = true;
- par::adaptive_perm = false;
- par::replicates = 100000;
- }
-
-
- if (a.find("--segment-match-snp"))
- par::genome_test_min_snp = a.value_int("--segment-match-snp");
-
-
-
-
- ///////////////////////////////////////////
- // WGAS main options: summary stats and QC
-
-
- if (a.find("--missing")) { par::report_missing = true; }
-
- if (a.find("--mendel")) { par::MENDEL_report = true; }
-
- if (a.find("--hardy")) par::HWD_report = true;
-
- if (a.find("--hardy2")) par::HWD_report = par::HWD_standard = true;
-
- if (a.find("--het")) par::inbreeding = true;
-
- if (a.find("--check-sex"))
- par::check_sex = true;
-
- if (a.find("--impute-sex"))
- par::check_sex = par::impute_sex = true;
-
- if (a.find("--test-missing")) { par::test_missing = true; }
-
- if (a.find("--test-mishap")) { par::mishap_test = true; }
-
- if (a.find("--mishap-window"))
- {
- par::mishap_test = true;
- par::mishap_window = a.value_int("--mishap-window");
- }
-
- if (a.find("--score"))
- {
- par::score_risk = true;
- par::score_risk_file = a.value("--score");
- }
-
-
-
- ////////////////////////////////////////////////////////////////
- // Proxy association methods
-
- if (a.find("--proxy-assoc"))
- {
- par::proxy_assoc = true;
- par::proxy_CC = true;
- par::proxy_assoc_snp = a.value("--proxy-assoc");
- if ( par::proxy_assoc_snp == "all" || par::proxy_assoc_snp == "ALL" )
- par::proxy_all = true;
-
- if ( a.find("--proxy-glm") )
- par::proxy_glm = true;
- }
-
-
- if (a.find("--proxy-drop"))
- {
- if ( a.find("--proxy-impute"))
- error("Cannot have --proxy-drop and --proxy-impute (already implied)");
- par::proxy_leave_out = true;
- }
-
- if (a.find("--proxy-exclude"))
- {
- par::proxy_exclude = true;
- par::proxy_exclude_from_file = false;
- par::proxy_exclude_list = a.value("--proxy-exclude");
- }
- else if (a.find("--proxy-exclude-list"))
- {
- par::proxy_exclude = true;
- par::proxy_exclude_from_file = true;
- par::proxy_exclude_list = a.value("--proxy-exclude-list");
- }
-
- if (a.find("--proxy-include-reference"))
- par::proxy_include_reference = true;
-
- if (a.find("--proxy-impute"))
- {
- par::proxy_assoc = true;
- par::proxy_impute = true;
- par::proxy_assoc_snp = a.value("--proxy-impute");
- if ( par::proxy_assoc_snp == "all" || par::proxy_assoc_snp == "ALL" )
- par::proxy_all = true;
-
- if ( a.find("--proxy-dosage") )
- par::proxy_record_dosage = true;
-
- if ( a.find("--proxy-replace") )
- par::proxy_impute_replace = true;
-
-// if ( a.find("--proxy-preserve") )
-// par::proxy_impute_preserve_genotyped = true;
-
- if ( a.find("--proxy-genotypic-concordance") )
- par::proxy_impute_genotypic_concordance = true;
-
- }
-
- if (a.find("--proxy-error")) par::proxy_error = true;
-
-
- if (a.find("--proxy-impute-threshold"))
- par::proxy_impute_threshold = a.value_double("--proxy-impute-threshold");
-
- if (a.find("--proxy-tdt"))
- {
- par::proxy_assoc = true;
- par::proxy_TDT = true;
- par::proxy_assoc_snp = a.value("--proxy-tdt");
- if ( par::proxy_assoc_snp == "all" || par::proxy_assoc_snp == "ALL" )
- par::proxy_all = true;
- }
-
- if ( par::proxy_assoc )
- {
- // Reset default missing rate per individual
- // per haplotype (more relevant if the window
- // size is small (i.e. to goal is not to discard
- // too many / any individuals, but rely on E-M
- // phasing to reconstruct missing genotypes. Normally
- // the default value here is 0.5; it can be modified as
- // --hap-miss option is below this one
-
- par::hap_missing_geno = 0.9;
-
- }
-
- if (a.find("--proxy-verbose"))
- par::proxy_full_report = true;
-
- if (a.find("--proxy-window"))
- par::proxy_window = a.value_int("--proxy-window");
-
- if (a.find("--proxy-flanking"))
- {
- if ( par::proxy_all )
- error("Cannt specify --proxy-flanking with >1 reference SNP");
-
- par::proxy_list = true;
- par::proxy_list_file = a.value("--proxy-flanking");
- }
-
- if (a.find("--proxy-list"))
- {
- if ( a.find("--proxy-flanking"))
- error("Cannt specify --proxy-flanking with >1 reference SNP");
-
- par::proxy_all_list = true;
- par::proxy_all_list_file = a.value("--proxy-list");
- }
-
- if (a.find("--proxy-kb"))
- par::proxy_kb = a.value_double("--proxy-kb");
-
- if (a.find("--proxy-sub-r2"))
- par::proxy_r2 = a.value_double("--proxy-sub-r2");
-
- if (a.find("--proxy-maf"))
- par::proxy_maf = a.value_double("--proxy-maf");
-
- if (a.find("--proxy-geno"))
- par::proxy_geno = a.value_double("--proxy-geno");
-
- if (a.find("--proxy-mhf"))
- par::proxy_mhf = a.value_double("--proxy-mhf");
-
- if (a.find("--proxy-sub-maxsnp"))
- par::proxy_maxhap = a.value_int("--proxy-sub-maxsnp");
-
- if (a.find("--proxy-no-r2-filter"))
- par::proxy_r2_filter = false;
-
- if (a.find("--proxy-r2"))
- {
- par::proxy_r2_filter = true;
- vector<string> s = a.value("--proxy-r2",3);
- par::proxy_r2_filter_A = getDouble(s[0], "--proxy-r2");
- par::proxy_r2_filter_B = getDouble(s[1], "--proxy-r2");
- par::proxy_r2_filter_C = getDouble(s[2], "--proxy-r2");
- }
-
- if (a.find("--proxy-maxsnp"))
- par::proxy_snp_filter = a.value_int("--proxy-maxsnp");
-
- if (a.find("--proxy-show-proxies"))
- par::proxy_list_proxies = true;
-
- if (a.find("--proxy-r2-reference-only"))
- par::proxy_reference_only = true;
-
-
- ////////////////////////////////////////
- // Segmental options
-
- // --segment-match-snp
- // --homozyg
- // --homozyg-window-kb
- // --homozyg-window-snp
- // --homozyg-window-het
- // --homozyg-window-missing
- // --homozyg-snp
- // --homozyg-kb
- // --homozyg-density
- // --homozyg-gap
- // --homozyg-group
- // --homozyg-match
- // --homozyg-het
- // --homozyg-verbose
-
- // --segment
- // --segment-gap
- // --segment-length
- // --segment-thresholds
- // --segment-minimal
-
- // --segment-group
- // --segment-spanning
- // --segment-from
- // --segment-to
- // --segment-force
- // --segment-match
- // --segment-verbose
-
- // --pool-size
-
- // Read segments back in
- // --read-segment
- // --read-segment-minimal
-
-
- //////////////////////////
- // CNV/other segment types
-
- if (a.find("--cnv-list"))
- {
- par::cnv_list = true;
- par::cnv_listname = a.value("--cnv-list");
-
- // Remove any missing individuals
- if (!a.find("--cnv-missing-phenotypes"))
- par::ignore_phenotypes = false;
-
- if (a.find("--cnv-make-map"))
- par::cnv_makemap = true;
-
- if (a.find("--cnv-write"))
- {
- par::cnv_writelist = true;
- if (a.find("--with-phenotype"))
- par::dump_covar_with_phenotype = true;
- }
-
- if (a.find("--cnv-intersect"))
- {
- if ( a.find("--cnv-exclude") ||
- a.find("--cnv-count"))
- error("Cannot specify --cnv-count/intersect/exclude together");
-
- par::cnv_intersect = true;
- par::cnv_intersect_file = a.value("--cnv-intersect");
- }
-
- if (a.find("--cnv-exclude"))
- {
- if ( a.find("--cnv-count") ||
- a.find("--cnv-intersect"))
- error("Cannot specify --cnv-count/intersect/exclude together");
-
- par::cnv_intersect = par::cnv_exclude = true;
- par::cnv_intersect_file = a.value("--cnv-exclude");
- }
-
- if (a.find("--cnv-count"))
- {
- if ( a.find("--cnv-exclude") ||
- a.find("--cnv-intersect"))
- error("Cannot specify --cnv-count/intersect/exclude together");
- par::cnv_intersect = par::cnv_count = true;
- par::cnv_intersect_file = a.value("--cnv-count");
- }
-
- if (a.find("--cnv-freq-exclude-above"))
- {
- par::cnv_freq_include = true;
- par::cnv_freq_include_below = true;
- par::cnv_freq_include_cnt = a.value_int("--cnv-freq-exclude-above");
- }
-
- if (a.find("--cnv-freq-exclude-below"))
- {
- par::cnv_freq_include = true;
- par::cnv_freq_include_below = false;
- par::cnv_freq_include_cnt = a.value_int("--cnv-freq-exclude-below");
- }
-
- if (a.find("--cnv-unique"))
- {
- par::cnv_unique = true;
- }
-
- if (a.find("--cnv-report-regions") )
- {
- if ( ! ( a.find("--cnv-intersect") || a.find("--cnv-exclude")) )
- error("Must specify either --cnv-intersect or --cnv-exclude with --cnv-report-regions");
- par::cnv_intersect_writeback = true;
- }
-
- if (a.find("--cnv-overlap"))
- {
- par::cnv_defined_overlap = true;
- par::cnv_overlap = a.value_double("--cnv-overlap");
- }
-
- if (a.find("--cnv-kb"))
- par::cnv_min_kb = a.value_int("--cnv-kb");
- if (a.find("--cnv-score"))
- par::cnv_min_score = a.value_double("--cnv-score");
- if (a.find("--cnv-sites"))
- par::cnv_min_sites = a.value_int("--cnv-sites");
-
- if (a.find("--cnv-max-kb"))
- par::cnv_max_kb = a.value_int("--cnv-max-kb");
- if (a.find("--cnv-max-score"))
- par::cnv_max_score = a.value_double("--cnv-max-score");
- if (a.find("--cnv-max-sites"))
- par::cnv_max_sites = a.value_int("--cnv-max-sites");
-
- if (a.find("--cnv-del"))
- par::cnv_del_only = true;
- if (a.find("--cnv-dup"))
- par::cnv_dup_only = true;
-
- if (a.find("--cnv-test-window"))
- {
- par::seg_test_window = true;
- par::seg_test_window_bp = a.value_double("--cnv-test-window") * 1000;
- }
-
- if (a.find("--cnv-test-2sided"))
- par::segment_test_1sided = false;
-
- if (a.find("--cnv-indiv-perm"))
- par::cnv_indiv_perm = true;
-
- if (a.find("--cnv-drop-no-segment"))
- par::cnv_drop_no_segment = true;
-
- if (a.find("--merge") || a.find("--bmerge"))
- error("Cannot specify --cnv-list and merge SNP data together");
-
- if (a.find("--exclude") || a.find("--extract"))
- error("Cannot specify --cnv-list and exclude/extract markers SNP data together");
-
- if (a.find("--maf") || a.find("--geno") || a.find("--mind"))
- error("Cannot specify --cnv-list and filter on SNP data together");
-
- }
-
- ///////////////////////
- // Runs of homozygosity
-
- if (a.find("--homozyg"))
- {
- par::homo_run = true;
- }
-
- if (a.find("--read-homozyg"))
- {
- par::homo_run = true;
- par::read_segment_filename = a.value("--read-homozyg");
- par::read_segment_file = true;
- }
-
- if (a.find("--homozyg-snp"))
- {
- par::homo_run = true;
- par::homo_run_snps = true;
- par::homo_run_length_snps = a.value_int("--homozyg-snp");
- }
-
- if (a.find("--homozyg-kb"))
- {
- par::homo_run = true;
- par::homo_run_kb = true;
- par::homo_run_length_kb = a.value_int("--homozyg-kb");
- }
-
- if (a.find("--homozyg-density"))
- {
- par::homo_run = true;
- par::homo_run_density = a.value_double("--homozyg-density");
- }
-
- if (a.find("--homozyg-gap"))
- {
- par::homo_run = true;
- par::homo_run_gap = a.value_int("--homozyg-gap");
- }
-
- if (a.find("--homozyg-window-snp"))
- {
- par::homo_run = true;
- par::homo_windowSize = a.value_int("--homozyg-window-snp");
- }
-
- if (a.find("--homozyg-window-kb"))
- {
- par::homo_run = true;
- par::homo_windowKB = a.value_int("--homozyg-window-kb");
- }
-
- if (a.find("--homozyg-window-het"))
- {
- par::homo_run = true;
- par::homo_windowAllowedHet = a.value_int("--homozyg-window-het");
- }
-
- if (a.find("--homozyg-window-missing"))
- {
- par::homo_run = true;
- par::homo_windowAllowedMissing = a.value_int("--homozyg-window-missing");
- }
-
- if (a.find("--homozyg-window-threshold"))
- {
- par::homo_run = true;
- par::homo_threshold = a.value_double("--homozyg-window-threshold");
- }
-
- if (a.find("--homozyg-group"))
- {
- par::homo_summary_allelic_match = true;
- par::fuzzy_homo = 0.95;
- }
-
- if (a.find("--homozyg-match"))
- {
- par::homo_summary_allelic_match = true;
- par::fuzzy_homo = a.value_double("--homozyg-match");
- }
-
- if (a.find("--homozyg-het"))
- {
- if (! ( a.find("--homozyg-snp") ||
- a.find("--homozyg-kb") ) )
- error("Must specify --homozyg-snp or --homozyg-kb with --homozyg-het");
- par::homo_run_het = a.value_int("--homozyg-het");
- }
-
- if (a.find("--homozyg-verbose"))
- par::homozyg_verbose = true;
-
- if (a.find("--consensus-match"))
- {
- par::homo_run_consensus_match = true;
- }
-
- if (a.find("--pool-size"))
- {
- par::pool_size_min = a.value_int("--pool-size");
- }
-
- if (a.find("--ibs")) par::ibs_run = true;
-
- if (a.find("--ibs2")) par::ibs_run = par::ibs_2only = true;
-
- if (a.find("--ibs-density"))
- {
- par::ibs_run = true;
- par::ibs_run_density = a.value_double("--ibs-density");
- }
-
- if (a.find("--ibs-kb"))
- {
- par::ibs_run = true;
- par::ibs_run_length_kb = a.value_int("--ibs-kb");
- }
-
- if (a.find("--ibs-snp"))
- {
- par::ibs_run = true;
- par::ibs_run_length_snps = a.value_int("--ibs-snp");
- }
-
-// static int ibs_inner_run_length_kb;
-
-// if (a.find("--ibs-join-snp"))
-// {
-// par::ibs_run = true;
-// par::ibs_join_snp = a.value_int("--ibs-join-snp");
-// }
-
-// if (a.find("--ibs-join-snp"))
-// {
-// par::ibs_run = true;
-// par::ibs_join_snp = a.value_int("--ibs-join-snp");
-// }
-
-// if (a.find("--ibs-join-kb"))
-// {
-// par::ibs_run = true;
-// par::ibs_join_kb = a.value_int("--ibs-join-kb") * 1000;
-// }
-
-
- if (a.find("--ibs-gap"))
- {
- par::ibs_run = true;
- par::ibs_inter_snp_distance = a.value_int("--ibs-gap") * 1000;
- }
-
- if (a.find("--ibs-miss"))
- {
- par::ibs_run = true;
- par::ibs_run_missing = a.value_int("--ibs-miss");
- }
-
- if (a.find("--ibs-err"))
- {
- par::ibs_run = true;
- par::ibs_run_0 = a.value_int("--ibs-err");
- }
-
- if (a.find("--miss-run-snps"))
- {
- par::miss_run = true;
- par::miss_run_length = a.value_int("--miss-run-snps");
- }
-
- if (a.find("--miss-run"))
- {
- cerr << "\n*** WARNING -- use --miss-run-snps N option instead\n\n";
- par::miss_run = true;
- par::miss_run_length_kb = true;
- par::miss_run_length = a.value_int("--miss-run");
- }
-
- if (a.find("--miss-run-level"))
- {
- par::miss_run = true;
- par::miss_run_level = a.value_double("--miss-run-level");
- }
-
-
- ///////////////////////////
- // Segmental sharing tests
-
-// if (a.find("--plink"))
-// {
-// par::plink = true;
-// par::nudge = true;
-// }
-
- if (a.find("--segment"))
- {
- par::plink = true;
- par::nudge = true;
- par::segment_output = true;
- }
-
- if (a.find("--segment-ibs"))
- {
- par::segment_validate = true;
- }
-
- if (a.find("--segment-minimal"))
- {
- par::plink = true;
- par::nudge = true;
- par::segment_output = true;
- par::segment_minimal = true;
- }
-
- if (a.find("--segment-test-individual"))
- {
- // Instead of pairwise Case-Case versus non-Case-Case,
- // just count the number of segs that any one individual
- // has, and compare count in cases to count in controls
- par::segment_test_individual = true;
- if (a.find("--specific"))
- par::segment_test_specific_segs = true;
- }
-
- if (a.find("--segment-test-ignore-discordant"))
- {
- // Pairwise Case-Case versus Control-Control
- par::segment_test_ignore_discordant = true;
- }
-
- if (a.find("--segment-test-fisher"))
- {
- par::segment_test_fisher = true;
- }
-
- if (a.find("--segment-test-2sided"))
- par::segment_test_1sided = false;
-
- if (a.find("--segment-group"))
- {
- // Use HBD segment match routine, but without
- // allelic identity option
- par::segment_overlap = true;
- par::homo_summary_allelic_match = false;
- par::fuzzy_homo = 0.95;
- }
-
- if (a.find("--segment-spanning"))
- {
-
-// if (!a.find("--segment-group"))
-// error("Must specify --segment-group for --segment-spanning\n");
-
- par::segment_overlap = true;
- par::homo_summary_allelic_match = false;
- par::fuzzy_homo = 0.95;
- par::segment_m1 = par::segment_m2 = a.value("--segment-spanning");
- }
-
- if (a.find("--segment-from"))
- {
- if (!a.find("--segment-group"))
- error("Must specify --segment-group for --segment-from\n");
- par::segment_m1 = a.value("--segment-from");
- }
-
- if (a.find("--segment-to"))
- {
- if (!a.find("--segment-group"))
- error("Must specify --segment-group for --segment-to\n");
- par::segment_m2 = a.value("--segment-to");
- }
-
- if (a.find("--segment-force"))
- {
- if (! ( a.find("--segment-from") && a.find("--segment-to") ) )
- error("Can only use --segment-force with --segment-from/to \n");
- par::force_span = true;
- }
-
- if (a.find("--segment-match"))
- {
- // Use HBD segment match routine, but without
- // allelic identity option
- par::segment_overlap = true;
- par::homo_summary_allelic_match = true;
- par::fuzzy_homo = a.value_double("--segment-match");
- }
-
- if (a.find("--segment-verbose"))
- par::segment_verbose = true;
-
- if (a.find("--read-segment"))
- {
- if (par::segment_output)
- error("Cannot specify both --segment and --read-segment\n");
- par::read_segment_filename = a.value("--read-segment");
- par::read_segment_file = true;
- par::plink = true;
- par::segment_output = true;
- }
-
- if (a.find("--read-segment-minimal"))
- {
- if (par::segment_output)
- error("Cannot specify both --segment and --read-segment\n");
- par::read_segment_filename = a.value("--read-segment-minimal");
- par::read_segment_file = true;
- par::plink = true;
- par::segment_output = true;
- par::segment_minimal = true;
- }
-
- if (a.find("--segment-gap"))
- {
- par::segment_output = true;
- par::segment_inter_snp_distance = a.value_int("--segment-gap");
- }
-
- if (a.find("--segment-length"))
- {
- // Min length in kb, convert to bp
- par::segment_length = a.value_int("--segment-length") * 1000;
- }
-
- if (a.find("--segment-snp"))
- {
- // Min length in SNPs
- par::segment_snp = a.value_int("--segment-snp");
- }
-
- if (a.find("--segment-thresholds"))
- {
- par::segment_output = true;
- vector<string> s = a.value("--segment-thresholds",2);
- par::segment_threshold_start = getDouble(s[0], "--segment-thresholds");
- par::segment_threshold_finish = getDouble(s[1], "--segment-thresholds");
- }
-
-
- //////////////////////////
- // Misc, external functions
-
- if (a.find("--rare-test"))
- {
- vector<string> s = a.value("--rare-test",2);
- par::rarer_maf_threshold = getDouble(s[0], "--rare-test");
- par::rarer_dist_threshold = getDouble(s[1], "--rare-test") * 1000;
- par::rare_test = true;
- }
-
-
- //////////////////////////
- // Association testing
-
- if (a.find("--assoc")) par::assoc_test = true;
-
- if (a.find("--qt-means"))
- {
- if ( ! a.find("--assoc"))
- error("Can only specify --qt-means with --assoc for a QTL test");
- par::qt_means = true;
- }
-
- if (a.find("--logistic")) par::assoc_test = par::assoc_glm = true;
-
- if (a.find("--linear")) par::assoc_test = par::assoc_glm = true;
-
- if (a.find("--no-snp")) par::assoc_glm_without_main_snp = true;
-
- if (a.find("--vif")) par::vif_threshold = a.value_double("--vif");
-
- if (a.find("--genotypic"))
- {
- if ( a.find("--hethom"))
- par::twoDFmodel_hethom = true;
-
- par::twoDFmodel = true;
- }
-
- if (a.find("--interaction")) par::simple_interaction = true;
-
- if (a.find("--parameters"))
- {
- string ilist = a.value("--parameters");
- par::parameter_list = parse2int(ilist);
- par::glm_user_parameters = true;
- }
-
- if (a.find("--tests"))
- {
- string ilist = a.value("--tests");
- par::test_list = parse2int(ilist);
- par::glm_user_test = true;
- }
-
- if (a.find("--condition"))
- {
- if (! ( a.find("--linear") ||
- a.find("--logistic") ||
- a.find("--proxy-glm") ||
- a.find("--chap")) )
- error("Can only use --condition with --linear, --logistic, --proxy-glm or --chap");
-
- par::conditioning_snp_single = true;
- par::conditioning_snps = true;
- par::conditioning_snp_name = a.value("--condition");
- }
-
- if (a.find("--condition-list"))
- {
- if ( ! ( a.find("--linear") || a.find("--logistic") || a.find("--chap")) )
- error("Can only use --condition-list with --linear, --logistic or --chap");
-
- par::conditioning_snps = true;
- par::conditioning_snps_file = a.value("--condition-list");
- }
-
- if (a.find("--test-all")) par::test_full_model = true;
-
- if (a.find("--sex"))
- par::glm_sex_effect = true;
-
- if (a.find("--no-x-sex"))
- par::glm_no_auto_sex_effect = true;
-
- if (a.find("--xchr-model"))
- {
- par::xchr_model = a.value_int("--xchr-model");
- if (par::xchr_model < 0 || par::xchr_model >4)
- error("--xchr-model must have a value between 1 and 4");
- }
-
- if (a.find("--dominant"))
- {
- if ( ! ( a.find("--linear") || a.find("--logistic")) )
- error("Can only use --condition-list with --linear or --logistic");
- if (a.find("--genotypic"))
- error("Cannot specify --dominant and --genotypic together");
- par::glm_dominant = true;
-
- // Only consider autosomes
- par::xchr_model = 0;
- }
-
- if (a.find("--recessive"))
- {
- if ( ! ( a.find("--linear") || a.find("--logistic")) )
- error("Can only use --condition-list with --linear or --logistic");
- if (a.find("--genotypic"))
- error("Cannot specify --recessive and --genotypic together");
- if (a.find("--dominant"))
- error("Cannot specify --recessive and --dominant together");
- par::glm_recessive = true;
-
- // Only consider autosomes
- par::xchr_model = 0;
- }
-
- if (a.find("--qfam"))
- {
-
- if (a.find("--within"))
- error("Cannot specify --within and --qfam together");
- if (a.find("--family"))
- error("Cannot specify --family and --qfam together");
- if (a.find("--genedrop"))
- error("Cannot specify --genedrop and --qfam together");
-
- par::QTDT_test = true;
- par::assoc_test = true;
- par::QFAM_within1 = true;
- }
-
- if (a.find("--qfam-total"))
- {
-
- if (a.find("--within"))
- error("Cannot specify --within and --qfam-total together");
- if (a.find("--family"))
- error("Cannot specify --family and --qfam-total together");
- if (a.find("--genedrop"))
- error("Cannot specify --genedrop and --qfam-total together");
-
- par::QTDT_test = true;
- par::assoc_test = true;
- par::QFAM_total = true;
- }
-
- if (a.find("--qfam-between"))
- {
-
- if (a.find("--within"))
- error("Cannot specify --within and --qfam-between together");
- if (a.find("--family"))
- error("Cannot specify --family and --qfam-between together");
- if (a.find("--genedrop"))
- error("Cannot specify --genedrop and --qfam-between together");
-
- par::QTDT_test = true;
- par::assoc_test = true;
- par::QFAM_between = true;
- }
-
- if (a.find("--qfam-parents"))
- {
-
- if (a.find("--within"))
- error("Cannot specify --within and --qfam-parents together");
- if (a.find("--family"))
- error("Cannot specify --family and --qfam-parents together");
- if (a.find("--genedrop"))
- error("Cannot specify --genedrop and --qfam-parents together");
-
- par::QTDT_test = true;
- par::assoc_test = true;
- par::QFAM_within2 = true;
- }
-
-
- if (a.find("--mh") || a.find("--mh1")) par::CMH_test_1 = par::assoc_test = true;
- if (a.find("--mh-ord")) par::CMH_test_ORD = par::assoc_test = true;
- if (a.find("--mh2")) par::CMH_test_2 = par::assoc_test = true;
-
- if (a.find("--bd"))
- {
- par::CMH_test_1 = par::assoc_test = true;
- par::breslowday = true;
- }
-
- if (a.find("--homog")) par::assoc_test = par::OR_homog_test = true;
-
- if (a.find("--model") ||
- a.find("--model-dom") ||
- a.find("--model-rec") ||
- a.find("--model-trend") ||
- a.find("--model-gen") )
- {
- par::assoc_test = true;
- par::full_model_assoc = true;
-
- if (a.find("--model-gen")) par::model_perm_gen = true;
- else if (a.find("--model-dom")) par::model_perm_dom = true;
- else if (a.find("--model-rec")) par::model_perm_rec = true;
- else if (a.find("--model-trend")) par::model_perm_trend = true;
- else par::model_perm_best = true;
-
- if ( (par::model_perm_gen || par::model_perm_best )
- && a.find("--adjust") )
- error("Cannot specific --model-gen or --model-best with --adjust\n"
- "Add --model-dom, --model-rec or --model-trend instead");
-
- }
-
- if (a.find("--fisher"))
- {
- par::assoc_test = true;
- par::fisher_test = true;
- par::min_geno_cell = 0;
- }
-
- if (a.find("--cell"))
- par::min_geno_cell = a.value_int("--cell");
-
- if (a.find("--gxe"))
- {
- if (!a.find("--covar"))
- error ("--covar {filename} must be specified with --gxe");
- par::assoc_gxe = true;
- }
-
- if (a.find("--tdt"))
- {
- par::TDT_test = true;
- par::perm_TDT_basic = true;
- par::perm_TDT_parent = false;
- }
-
- if (a.find("--mating"))
- {
- par::TDT_test = true;
- par::mating_tests = true;
- }
-
- if (a.find("--dfam"))
- {
- if ( a.find("--family") )
- error("Cannot specify --family with --dfam\n");
-
- par::TDT_test = true;
- par::sibTDT_test = true;
- }
-
- if (a.find("--dfam-no-tdt"))
- par::dfam_tdt = false;
-
- if (a.find("--dfam-no-sibs"))
- par::dfam_sibs = false;
-
- if (a.find("--dfam-no-unrelateds"))
- par::dfam_unrelateds = false;
-
- if (a.find("--parentdt1"))
- {
- par::TDT_test = true;
- par::perm_TDT_basic = false;
- par::perm_TDT_parent = true;
- }
-
- if (a.find("--parentdt2"))
- {
- par::TDT_test = true;
- par::perm_TDT_basic = false;
- par::perm_TDT_parent = false;
- }
-
- if (a.find("--poo"))
- {
- if (!a.find("--tdt"))
- error("Parent-of-origin analysis requires --tdt option");
-
- if (a.find("--hap-tdt"))
- error("Parent-of-origin analysis not yet implemented for haplotypic TDT");
-
- par::TDT_test = true;
- par::parent_of_origin = true;
-
- // flavour of permutation?
- if (a.find("--pat"))
- {
- par::perm_POO_poo = false;
- par::perm_POO_pat = true;
- }
- else if (a.find("--mat"))
- {
- par::perm_POO_poo = false;
- par::perm_POO_mat = true;
- }
- else if (a.find("--best"))
- {
- par::perm_POO_poo = false;
- par::perm_POO_best = true;
- }
- }
-
-
-
-
- if (a.find("--sharing")) par::ibs_sharing_test = true;
-
- if (a.find("--boot")) par::boot = true;
-
- if (a.find("--r"))
- {
- par::disp_r1 = true;
- if ( ! a.find("--matrix") )
- par::disp_r_window = true;
- }
-
- if (a.find("--r2"))
- {
- par::disp_r2 = true;
- if ( ! a.find("--matrix") )
- par::disp_r_window = true;
- }
-
- if (a.find("--ld"))
- {
- par::calc_SNPSNP_LD = true;
- vector<string> s = a.value("--ld",2);
- par::ld_SNP1 = s[0];
- par::ld_SNP2 = s[1];
- }
-
- if (a.find("--ld-window"))
- {
- if (a.find("--matrix"))
- error("Cannot specify --matrix and --ld-window together\n");
- par::disp_r_window = true;
- par::disp_r_window_snp = a.value_int("--ld-window");
- }
-
- if (a.find("--ld-window-kb"))
- {
- if (a.find("--matrix"))
- error("Cannot specify --matrix and --ld-window together\n");
- par::disp_r_window = true;
- // Store in base-pair units
- par::disp_r_window_kb = a.value_int("--ld-window-kb") * 1000;
- }
-
- if (a.find("--ld-window-r2"))
- {
- if (a.find("--matrix"))
- error("Cannot specify --matrix and --ld-window together\n");
- par::disp_r_window = true;
- par::disp_r_window_r2 = a.value_double("--ld-window-r2");
- }
-
- if (a.find("--indep"))
- {
- if (makedata)
- {
- string msg = "Cannot specify --indep with --make-bed or --recode\n";
- msg += " use --extract/--exclude with *.prune.in/*.prune.out files";
- error(msg);
- }
-
- par::prune_ld = true;
- vector<string> s = a.value("--indep",3);
- par::prune_ld_win = getInt(s[0].c_str(),"--indep");
- par::prune_ld_step = getInt(s[1].c_str(),"--indep");
- par::prune_ld_vif = getDouble(s[2].c_str(),"--indep");
-
- if (par::prune_ld_win<2)
- error("Cannot have a window size < 2 for --indep {window} {step} {VIF}");
-
- if (par::prune_ld_step<1)
- error("Cannot have a window step < 1 for --indep {window} {step} {VIF}");
-
- if (par::prune_ld_vif<1)
- error("Cannot have a VIF threshold < 1 for --indep {window} {step} {VIF}");
-
- }
-
- if (a.find("--indep-pairwise"))
- {
- if (makedata)
- {
- string msg = "Cannot specify --indep with --make-bed or --recode\n";
- msg += " use --extract/--exclude with *.prune.in/*.prune.out files";
- error(msg);
- }
-
- par::prune_ld = true;
- par::prune_ld_pairwise = true;
- vector<string> s = a.value("--indep-pairwise",3);
- par::prune_ld_win = getInt(s[0].c_str(),"--indep-pairwise");
- par::prune_ld_step = getInt(s[1].c_str(),"--indep-pairwise");
- par::prune_ld_r2 = getDouble(s[2].c_str(),"--indep-pairwise");
-
- if (par::prune_ld_win<2)
- error("Cannot have a window size < 2 for --indep-pairwise {window} {step} {R^2}");
- if (par::prune_ld_step<1)
- error("Cannot have a window step < 1 for --indep-pairwise {window} {step} {R^2}");
- if (par::prune_ld_r2<0)
- error("Cannot have an R^2 threshold < 0 for --indep-pairwise {window} {step} {R^2}");
- if (par::prune_ld_r2>1)
- error("Cannot have an R^2 threshold > 1 for --indep-pairwise {window} {step} {R^2}");
-
- // Compare to correlation
- par::prune_ld_r2 = sqrt(par::prune_ld_r2);
-
- }
-
- if (a.find("--T2") || a.find("--t2") )
- {
- if ( a.find("--set-test") || a.find("--assoc") )
- error("Cannot specify --T2 and other association commands");
- par::set_test = true;
- par::hotel = true;
- }
-
- if (a.find("--set")) {
- par::setfile = a.value("--set");
-
- }
-
- if (a.find("--set-test"))
- {
- par::set_test = true;
- if ( (!a.find("--gene")) && ( a.find("--assoc") || a.find("--tdt") ) &&
- ( !a.find("--mperm") ) )
- error("Must use --mperm N with set association tests");
-
- }
-
- if (a.find("--set-score"))
- {
- par::set_test = true;
- par::set_score = true;
- par::set_score_p = a.value_double("--set-score");
- par::set_max = par::set_min = 1;
- if ( a.find("--set-min") || a.find("--set-max") )
- error("Cannot specify --set-min or --set-max with --set-score");
- }
-
- if (a.find("--set-p2"))
- par::set_p2 = true;
-
- if (a.find("--set-min"))
- {
- if (!a.find("--set"))
- error ("You need to specify --set also");
- par::set_min = a.value_int("--set-min");
- }
-
- if (a.find("--set-max"))
- {
- if (!a.find("--set"))
- error ("You need to specify --set also");
- par::set_max = a.value_int("--set-max");
- }
-
- //////////////////////////
- // Impute tagged SNPs
-
- if (a.find("--hap-impute"))
- {
- par::impute_tags = true;
- par::phase_snps = true;
- // par::hap_missing_geno = 1;
- }
-
- if (a.find("--hap-impute-verbose"))
- {
- par::impute_tags = true;
- par::phase_snps = true;
-
- // par::hap_missing_geno = 1;
- par::impute_verbose = true;
- }
-
- if (a.find("--hap"))
- {
- par::phase_snps = true;
- par::tagfile = a.value("--hap");
- }
-
- if (a.find("--hap-window"))
- {
- par::phase_snps = true;
- par::sliding_window = true;
- par::phase_hap_all = true;
- if (a.find("--hap"))
- error("Cannot specify both --hap-window and --hap {file}\n");
- par::sliding_window_size = a.value_int("--hap-window");
-
- if (a.find("--hap-window-step"))
- par::sliding_window_step = a.value_int("--hap-window-step");
- }
-
-
- // Using all SNPs, perform a basic sliding window analysis and look
- // for haplotypically shared regions, i.e. given that we now have
- // much more highly heterozygous markers
-
- if (a.find("--homozyg-haplo-track"))
- {
- if ( ! a.find("--hap-window"))
- error("The 'haplo-track' option requires --hap-window {s} to be specified");
-
- par::segment_haplotrack = true;
- vector<string> s = a.value("--homozyg-haplo-track",2);
- par::segment_haplotrack_fid1 = s[0];
- par::segment_haplotrack_iid1 = s[1];
- par::segment_haplotrack_fid2 = par::segment_haplotrack_fid1;
- par::segment_haplotrack_iid2 = par::segment_haplotrack_iid1;
- }
-
- if (a.find("--segment-haplo-track"))
- {
- if ( ! a.find("--hap-window"))
- error("The 'haplo-track' option requires --hap-window {s} to be specified");
-
- par::segment_haplotrack = true;
- vector<string> s = a.value("--segment-haplo-track",4);
- par::segment_haplotrack_fid1 = s[0];
- par::segment_haplotrack_iid1 = s[1];
- par::segment_haplotrack_fid2 = s[2];
- par::segment_haplotrack_iid2 = s[3];
- }
-
-
-
- /////////////////////////////////
- // EM Phasing options
-
- if (a.find("--em-verbose"))
- par::haplo_plem_verbose = true;
-
- if (a.find("--em-follow"))
- {
- par::haplo_plem_follow = true;
- vector<string> s = a.value("--em-follow",2);
- par::haplo_plem_follow_fid = s[0];
- par::haplo_plem_follow_iid = s[1];
- }
-
- if (a.find("--em-window"))
- par::haplo_plem_window = a.value_int("--em-window");
-
- if (a.find("--em-overlap"))
- par::haplo_plem_overlap = a.value_int("--em-overlap");
-
- if (a.find("--em-window-iter"))
- par::haplo_plem_iter = a.value_int("--em-window-iter");
-
- if (a.find("--em-window-prune-phase"))
- par::haplo_plem_window_prune_phase =
- a.value_double("--em-window-prune-phase");
-
- if (a.find("--em-window-likelihood"))
- par::haplo_plem_likelihood_iter = a.value_int("--em-window-likilood");
-
- if (a.find("--em-window-tol"))
- par::haplo_plem_window_tol = a.value_double("--em-window-tol");
-
- if (a.find("--em-window-prune-haplotype"))
- {
- par::haplo_plem_zero_threshold = a.value_double("--em-window-prune-haplotype");
- if ( par::haplo_plem_zero_threshold == 0 )
- par::haplo_plem_nonzero_threshold = false;
- else
- par::haplo_plem_nonzero_threshold = true;
- }
-
-
-
- //////////////////////////
- // Meta EM parameters
-
- if (a.find("--em-meta-window"))
- {
- par::haplo_plem_meta_window = a.value_int("--em-meta-window");
- if ( par::haplo_plem_meta_window < 2 )
- error("--em-meta-window must be >1");
- }
-
- if (a.find("--em-meta-prune-haplotype"))
- par::haplo_plem_meta_prune_haplotype =
- a.value_double("--em-meta-prune-haplotype");
-
- if (a.find("--em-meta-prune-phase"))
- par::haplo_plem_meta_prune_phase =
- a.value_double("--em-meta-prune-phase");
-
- if (a.find("--em-meta-iter"))
- par::haplo_plem_meta_iter = a.value_int("--em-meta-iter");
-
- if (a.find("--em-meta-likilood"))
- par::haplo_plem_meta_likelihood_iter = a.value_int("--em-meta-likilood");
-
- if (a.find("--em-meta-tol"))
- par::haplo_plem_meta_tol = a.value_double("--em-meta-tol");
-
-
- /////////////////////////////
- // Other haplotype options
-
- if (a.find("--hap-all"))
- {
- if (!a.find("--hap"))
- error("--hap-all modifies --hap, but you have not specified --hap");
- par::phase_hap_all = true;
- }
-
- if (a.find("--whap"))
- {
- par::phase_snps = true;
- par::tagfile = a.value("--whap");
- par::weighted_mm = true;
- }
-
- if (a.find("--hap-pp")) { par::hap_post_prob = a.value_double("--hap-pp"); }
- if (a.find("--hap-miss")) { par::hap_missing_geno = a.value_double("--hap-miss"); }
-
- if (a.find("--hap-freq"))
- {
- if ( par::impute_tags )
- error("Cannot specify --hap-impute and --hap-freq\n");
- par::display_hap_freqs = true;
- }
-
- if (a.find("--hap-assoc"))
- {
- if ( par::impute_tags )
- error("Cannot specify --hap-impute and --hap-assoc\n");
- par::test_hap_CC = true;
- }
-
- if (a.find("--chap"))
- {
- if ( ! a.find("--hap-snps"))
- error("--chap requires --hap-snps");
-
- par::chap_test = true;
-
- if ( a.find("--null-group") )
- {
- par::chap_specified_groups = true;
- par::chap_model0 = a.value("--null-group");
- }
-
- if ( a.find("--alt-group") )
- {
- par::chap_specified_groups = true;
- par::chap_model1 = a.value("--alt-group");
- }
-
- if ( a.find("--null-snp") )
- {
- if ( par::chap_specified_groups )
- error("Cannot specify SNPs and groups for --chap tests");
- par::chap_specified_snps = true;
- par::chap_model0 = a.value("--null-snp");
- }
-
- if ( a.find("--alt-snp") )
- {
- if ( par::chap_specified_groups )
- error("Cannot specify SNPs and groups for --chap tests");
- par::chap_specified_snps = true;
- par::chap_model1 = a.value("--alt-snp");
- }
-
- if ( a.find("--control") )
- {
- if ( par::chap_specified_groups ||
- par::chap_specified_snps )
- error("Cannot use --control and other --chap options");
- par::chap_sole_variant = true;
- par::chap_entity = a.value("--control");
- }
- else if ( a.find("--independent-effect") )
- {
- if ( par::chap_specified_groups ||
- par::chap_specified_snps )
- error("Cannot use --independent-effect and other --chap options");
- par::chap_independent_effect = true;
- par::chap_entity = a.value("--independent-effect");
- }
- else if ( a.find("--specific-haplotype") )
- {
- if ( par::chap_specified_groups ||
- par::chap_specified_snps )
- error("Cannot use --specific-haplotype and other --chap options");
- par::chap_haplotype_specific = true;
- par::chap_entity = a.value("--specific-haplotype");
- }
-
- if ( a.find("--test-snp") )
- {
- if ( ! ( a.find("--condition") || a.find("--condition-list") ) )
- error("You must first specify conditioning SNPs");
- par::chap_drop_snps = true;
- par::chap_drop_snps_list = a.value("--test-snp");
- }
-
- if ( a.find("--each-vs-others") )
- par::chap_add_grp_specifics = true;
-
- if ( a.find("--each-versus-others") )
- par::chap_add_grp_specifics = true;
-
- }
-
-
- if (a.find("--hap-snps"))
- {
- if ( a.find("--hap") || a.find("--hap-window"))
- error("Cannot specify --hap-snps with --hap or --hap-window");
- par::phase_snps = true;
- par::hap_specific_snps = true;
- par::hap_specific_snps_list = a.value("--hap-snps");
- par::phase_hap_all = true;
- }
-
- if (a.find("--hap-tdt"))
- {
- if ( par::impute_tags )
- error("Cannot specify --hap-impute and --hap-tdt\n");
- par::test_hap_TDT = true;
- }
-
- if (a.find("--hap-phase"))
- {
- if ( par::impute_tags )
- error("Cannot specify --hap-impute and --hap-phase\n");
- par::display_phase_probs = true;
- }
-
- if (a.find("--hap-phase-wide"))
- {
- if ( par::impute_tags )
- error("Cannot specify --hap-impute and --hap-phase-wide\n");
- par::display_phase_probs = par::display_phase_probs_wide = true;
- }
-
- if (a.find("--hap-only")) { par::test_hap_only = true; }
-
- if (a.find("--hap-max-phase"))
- {
- par::hap_max_nf_phases = a.value_int("--hap-max-phase");
- if ( par::hap_max_nf_phases < 1 ) error("Invalid --hap-max-phase value (should be >0)");
- }
-
- if (a.find("--hap-min-phase-prob"))
- {
- par::hap_min_phase_prob = a.value_double("--hap-min-phase-prob");
- if ( par::hap_min_phase_prob < 0 || par::hap_min_phase_prob > 1 )
- error("Invalid --hap-min-phase-prob value, should be between 0 and 1");
- }
-
-
-
-
- //////////////////////////
- // Epistasis
-
- if (a.find("--epistasis"))
- {
- if (a.find("--set"))
- par::set_test = true;
- par::epistasis = true;
- }
-
- // Use odds-ratio test as default fast-epistasis method
- if (a.find("--fast-epistasis"))
- {
- par::fast_epistasis = par::epistasis = true;
- if ( a.find("--set"))
- par::set_test = true;
- }
-
- if (a.find("--case-only"))
- {
- par::epistasis = true;
- par::fast_epistasis = true;
- par::epi_caseonly = true;
- if (a.find("--epistasis"))
- error("--case-only requires --fast-epistasis");
- }
-
- if (a.find("--gap"))
- {
- if (!a.find("--case-only")) error("--gap option only valid when --caseonly is in effect");
- par::epi_caseonly_kb_gap = a.value_double("--gap");
- }
- if (a.find("--nop")) par::epi_quickscan = true;
- if (a.find("--set-by-all")) par::set_by_set = false;
- if (a.find("--epi1")) par::epi_alpha1 = a.value_double("--epi1");
- if (par::epi_alpha1 > 1) par::epi_filter = false;
- if (a.find("--epi2")) par::epi_alpha2 = a.value_double("--epi2");
- if (a.find("--twolocus"))
- {
- par::list_twolocus = true;
- vector<string> s = a.value("--twolocus",2);
- par::twolocus_snp1 = s[0];
- par::twolocus_snp2 = s[1];
- }
-
- if (a.find("--genepi"))
- {
- par::set_test = true;
- par::epi_genebased = true;
- }
-
-
-
- ///////////////////////////////////////
- // Gene X environment / heterogeneity
-
-
-
-
- //////////////////////////////////////
- // File output options
-
- if (a.find("--freq"))
- {
- par::af_write = true;
- // and unless otherwise specified, set GENO = 1 and MAF = 0
-// if (!a.find("--maf")) par::min_af = 0.0;
-// if (!a.find("--geno")) par::MAX_GENO_MISSING = 1;
-// if (!a.find("--mind")) par::MAX_IND_MISSING = 1;
-
- // display MAF counts instead of freqs?
- if (a.find("--counts"))
- {
- if (a.find("--within"))
- error("Cannot specify --counts and --within together\n");
- par::af_count = true;
- }
- }
-
-
-
- if (a.find("--nonfounders")) par::summ_nonfounders = true;
-
- if (a.find("--make-founders")) par::make_founders = true;
-
- if (a.find("--allow-no-sex")) par::ignore_missing_sex = true;
-
- if (a.find("--read-freq"))
- {
- par::af_read = true;
- par::af_file = a.value("--read-freq");
- }
-
- if (a.find("--read-genome"))
- {
- par::ibd_read = true;
- par::ibd_file = a.value("--read-genome");
- }
-
- if (a.find("--list"))
- {
- par::list_by_allele = true;
- // and unless otherwise specified, set GENO = 1 and MAF = 0
- if (!a.find("--maf")) par::min_af = 0.0;
- if (!a.find("--geno")) par::MAX_GENO_MISSING = 1;
- if (!a.find("--mind")) par::MAX_IND_MISSING = 1;
- }
-
- if (a.find("--report"))
- {
- par::indiv_report = true;
- vector<string> s = a.value("--report",2);
- par::indiv_report_fid = s[0];
- par::indiv_report_iid = s[1];
- if (!a.find("--maf")) par::min_af = 0.0;
- if (!a.find("--geno")) par::MAX_GENO_MISSING = 1;
- if (!a.find("--mind")) par::MAX_IND_MISSING = 1;
- }
-
- if (a.find("--plist"))
- {
- par::plist = true;
- // and unless otherwise specified, set GENO = 1 and MAF = 0
- if (!a.find("--maf")) par::min_af = 0.0;
- if (!a.find("--geno")) par::MAX_GENO_MISSING = 1;
- if (!a.find("--mind")) par::MAX_IND_MISSING = 1;
-
- vector<string> s = a.value("--plist",4);
- par::plist_fid1 = s[0];
- par::plist_iid1 = s[1];
- par::plist_fid2 = s[2];
- par::plist_iid2 = s[3];
-
- }
-
-
- if (a.find("--fix-allele"))
- {
- if (! (a.find("--recodeAD") || a.find("--recodeA")) )
- error("--fix-allele option only works with --recodeA/--recodeAD options");
- else par::recode_AD_fixed = true;
- }
-
- if (a.find("--merge"))
- {
- if (a.find("--merge-list") || a.find("--bmerge") )
- error("Can only specify --merge or --bmerge or --merge-list");
-
- par::merge_data = true;
- vector<string> s = a.value("--merge",2);
- par::merge_pedfile = s[0];
- par::merge_mapfile = s[1];
- }
-
- if (a.find("--bmerge"))
- {
- if (a.find("--merge-list") || a.find("--merge") )
- error("Can only specify --bmerge or --merge or --merge-list");
-
- par::merge_data = true;
- par::merge_binary = true;
-
- vector<string> s = a.value("--bmerge",3);
- par::merge_bedfile = s[0];
- par::merge_bimfile = s[1];
- par::merge_famfile = s[2];
-
- }
-
- if (a.find("--merge-list"))
- {
- if (a.find("--merge") || a.find("--bmerge") )
- error("Can only specify --merge or --bmerge or --merge-list");
-
- par::merge_data = true;
- par::merge_list = true;
- par::merge_list_filename = a.value("--merge-list");
- }
-
- if (a.find("--merge-mode"))
- {
- if (! (a.find("--merge") || a.find("--merge-list") || a.find("--bmerge") ) )
- error("Can only specify --merge-mode when --merge or --bmerge or --merge-list is used");
- par::merge_mode = a.value_int("--merge-mode");
- if (par::merge_mode < 1 || par::merge_mode > 7)
- error("--merge-mode N, where N must be between 1 and 7");
-
- if (par::merge_list && par::merge_mode >= 6)
- error("Can not specify --merge-mode 6/7 (diff) and --merge-list");
- }
-
-
- if (a.find("--flip"))
- {
- par::flip_strand = true;
- par::flip_file = a.value("--flip");
- }
-
- // Remove these individuals from a file
- if (a.find("--remove"))
- {
- par::remove_indiv = true;
- par::remove_indiv_list = a.value("--remove");
- }
-
- // Keep only these individuals from a file
- if (a.find("--keep"))
- {
- par::keep_indiv = true;
- par::keep_indiv_list = a.value("--keep");
- }
-
- // By default, remove then keep
- if (a.find("--keep-before-remove"))
- par::remove_before_keep = false;
-
- // Extract only these SNPs from a file
- if (a.find("--extract"))
- {
- par::extract_set = true;
- par::extract_file = a.value("--extract");
- }
-
- // Exclude these SNPs from a file
- if (a.find("--exclude"))
- {
- par::exclude_set = true;
- par::exclude_file = a.value("--exclude");
- }
-
- // Select a set of SNPs based on different physical
- // positions: this modifies the behavior of --extract
- // and --exclude
-
- if (a.find("--range"))
- {
- if ( ! ( a.find("--extract") || a.find("--exclude") ) )
- error("Must specify --extract or --exclude with --range");
- par::snp_range_list = true;
- }
-
- // By default, extract before exclude
- if (a.find("--exclude-before-extract"))
- par::extract_before_exclude = false;
-
-
- // Extract SNPs in a certain GENE (specified in the SET file)
- if (a.find("--gene"))
- {
- if (!a.find("--set"))
- error("You must also specify --set {file} with --gene {gene}\n");
- par::extract_set = true;
- par::dump_gene = true;
- par::dump_genename = a.value("--gene");
- }
-
-
- if (a.find("--ind-major"))
- {
- if (!a.find("--make-bed"))
- error("You can only specify --ind-major when --make-bed is in effect");
- par::out_SNP_major = false;
- }
-
-
- if (a.find("--include")) par::inc_write = true;
-
- if (a.find("--read-include"))
- {
- par::inc_read = true;
- par::inc_file = a.value("--read-include");
- }
-
-
- ///////////////////////////
- // Plink phenotype definition
-// // Squared differences (quantitative trait)
-// if (a.find("--SD")) { par::SD = true; par::CP = false; }
-// // Cross-product (quantitative trait)
-// if (a.find("--CP")) { par::CP = true; par::SD = false; }
-// // Fix the prevalence of a binary trait
-// if (a.find("--prev")) {
-// par::fix_prev=true;
-// par::fixed_prev = a.value_double("--prev");
-// }
-// // Remove unaffected pairs from analysis
-// // i.e. so only discordant versus affected concordant
-// // i.e. need at least 1 affected "1A"
-// if (a.find("--1aff")) { par::remove_unaffected_pairs = true; }
-
-
-
- ///////////////////////////
- // Some basic filters
-
- if (a.find("--prune"))
- par::ignore_phenotypes = false;
- if (a.find("--filter-cases"))
- par::filter_cases = true;
- if (a.find("--filter-controls"))
- par::filter_controls = true;
- if (a.find("--filter-females"))
- par::filter_females = true;
- if (a.find("--filter-males"))
- par::filter_males = true;
- if (a.find("--filter-founders"))
- par::filter_founders = true;
- if (a.find("--filter-nonfounders"))
- par::filter_nonfounders = true;
-
- if (a.find("--filter-cases") && a.find("--filter-controls"))
- error("Cannot filter on both cases and controls");
- if (a.find("--filter-males") && a.find("--filter-females"))
- error("Cannot filter on both males and females");
- if (a.find("--filter-founders") && a.find("--filter-nonfounders"))
- error("Cannot filter on both founders and nonfounders");
-
-
- /////////////////////////////////
- // Basic input file processing
-
- if (a.find("--dummy"))
- {
- vector<string> s = a.value("--dummy",2);
- par::dummy = true;
- par::dummy_nind = getInt(s[0].c_str(),"--dummy");
- par::dummy_nsnp = getInt(s[1].c_str(),"--dummy");
- }
-
- if (a.find("--simulate"))
- {
- par::simul = true;
- par::simul_file = a.value("--simulate");
- }
-
- if (a.find("--simulate-ncases"))
- par::simul_ncases = a.value_int("--simulate-ncases");
-
- if (a.find("--simulate-ncontrols"))
- par::simul_ncontrols = a.value_int("--simulate-ncontrols");
-
- if (a.find("--simulate-prevalence"))
- par::simul_prevalence = a.value_double("--simulate-prevalence");
-
- if (a.find("--file"))
- {
- if (a.find("--map") || a.find("--ped") )
- error("Use either --file {root} OR --ped {name} --map {name}");
- par::read_ped = true;
- par::fileroot = a.value("--file");
- par::pedfile = par::fileroot + ".ped";
- par::mapfile = par::fileroot + ".map";
- }
-
-
- if (a.find("--tfile"))
- {
- if (a.find("--tfam") || a.find("--tped") )
- error("Use either --tfile {root} OR --tped {name} --tfam {name}");
-
- par::tfile_input = true;
- par::fileroot = a.value("--tfile");
- par::tpedfile = par::fileroot + ".tped";
- par::tfamfile = par::fileroot + ".tfam";
- }
-
- if (a.find("--tped"))
- {
- par::tpedfile = a.value("--tped");
- if (par::tpedfile == "-")
- par::ped_from_stdin = true;
- par::tfile_input = true;
- }
-
- if (a.find("--tfam"))
- {
- par::tfamfile = a.value("--tfam");
- par::tfile_input = true;
- }
-
- // Long file format
-
- if (a.find("--lfile"))
- {
- if (a.find("--fam") || a.find("--lgen") || a.find("--map") )
- error("Use either --lfile {root} OR --lgen {name} --map (name) --fam {name}");
-
- par::lfile_input = true;
- par::fileroot = a.value("--lfile");
- par::lpedfile = par::fileroot + ".lgen";
- par::lfamfile = par::fileroot + ".fam";
- par::mapfile = par::fileroot + ".map";
- }
-
- if (a.find("--lgen"))
- {
- par::lpedfile = a.value("--lgen");
- if (par::lpedfile == "-")
- par::ped_from_stdin = true;
- par::lfile_input = true;
- }
-
-
- /////////////////////
- // Generic variant
-
- if (a.find("--gfile"))
- {
- // Analyse generic variants
- par::gvar = true;
- // Primarily load these; this will be changed
- // if another load is not specified
- par::load_gvar = false;
- par::fileroot = a.value("--gfile");
- par::gmapfile = par::fileroot + ".map";
- par::gfamfile = par::fileroot + ".fam";
-
- if (a.find("--gvar-verbose"))
- par::gvar_verbose_association = true;
-
- if (a.find("--gvar-all"))
- par::gvar_include_all_variants = true;
- }
-
- // Text-file modifiers
-
- if (a.find("--map3"))
- par::map3 = true;
-
- if (a.find("--no-sex"))
- par::ped_skip_sex = true;
-
- if (a.find("--no-parents"))
- par::ped_skip_parents = true;
-
- if (a.find("--no-fid"))
- par::ped_skip_fid = true;
-
- if (a.find("--no-pheno"))
- par::ped_skip_pheno = true;
-
- if (a.find("--liability"))
- par::liability = true;
-
- if (a.find("--bfile"))
- {
- if (a.find("--bim") || a.find("--bed") || a.find("--fam") )
- error("Use either --bfile {root} OR --bed {name} --bim {name} --fam {name}");
-
- par::read_bitfile = true;
- par::fileroot = a.value("--bfile");
- par::bitfilename = par::pedfile = par::fileroot + ".bed";
- par::bitfilename_map = par::mapfile = par::fileroot + ".bim";
- par::bitfilename_fam = par::fileroot + ".fam";
- }
-
- if (a.find("--bfile-faster"))
- par::fast_binary = true;
-
- if (a.find("--ped"))
- {
- par::read_ped = true;
- par::pedfile = a.value("--ped");
- if (par::pedfile == "-")
- par::ped_from_stdin = true;
- }
-
- if (a.find("--map")) par::mapfile = a.value("--map");
-
-
- if (a.find("--bed"))
- {
- par::read_bitfile = true;
- par::bitfilename = par::pedfile = a.value("--bed");
- }
-
- if (a.find("--fam"))
- {
- par::read_bitfile = true;
- par::bitfilename_fam = a.value("--fam");
- }
-
- if (a.find("--bim"))
- {
- par::read_bitfile = true;
- par::bitfilename_map = par::mapfile = a.value("--bim");
- }
-
-
- // Single phenotype in file specified
- if (a.find("--pheno"))
- {
- par::pheno_file = true;
- par::pheno_filename = a.value("--pheno");
- }
-
- // Binary 0/1 coding instead of 1/2
- if (a.find("--1"))
- par::coding01 = true;
-
-
- // Multiple phenotypes in a file specified
- if (a.find("--mpheno"))
- {
- if (!a.find("--pheno"))
- error("You need to specify --pheno {file} with --mpheno {N}");
- par::mult_pheno = a.value_int("--mpheno");
- }
-
- // Select phenotype ny name
- if (a.find("--pheno-name"))
- {
- if (!a.find("--pheno"))
- error("You need to specify --pheno {file} with --pheno-name {name}");
- if (a.find("--mpheno"))
- error("You cannot specify --mpheno and --pheno-name together");
-
- par::name_pheno = a.value("--pheno-name");
-
- }
-
- if (a.find("--values"))
- {
- par::number_list_string = a.value("--values");
- }
-
- if (a.find("--valueless"))
- {
- par::number_list_string = a.value("--valueless");
- par::number_list_positive = false;
- }
-
- // Loop over all phenotypes
- if (a.find("--all-pheno"))
- {
- if (!a.find("--pheno"))
- error("You need to specify --pheno {file} with --all-pheno");
-
- if (a.find("--mpheno"))
- error("You cannot specify --mpheno {N} with --all-pheno");
- par::mult_pheno = 1;
- par::all_pheno = true;
-
- }
-
-
- // Single covariate in file specified
- if (a.find("--covar"))
- {
- par::covar_file = true;
- par::covar_filename = a.value("--covar");
-
- // Multiple covariates in file specified, read all of them
- if ( a.find("--linear") ||
- a.find("--logistic") ||
- a.find("--R") ||
- a.find("--chap") ||
- a.find("--proxy-glm") )
- par::clist = true;
-
- // Request to dump back out all covariates
- if (makedata)
- {
- par::clist = true;
- if (a.find("--dummy-coding"))
- par::dump_covar_dummy_coding = true;
- }
-
- if (a.find("--with-phenotype"))
- par::dump_covar_with_phenotype = true;
-
- }
-
-
- // Multiple covariates in file specified, select one
- if (a.find("--mcovar"))
- {
- if (!a.find("--covar"))
- error("You need to specify --covar {file} with --mcovar {N}");
- par::mult_covar = a.value_int("--mcovar");
- }
-
- // Selection fields for covariates
- if (a.find("--covar-number"))
- {
- par::clist_selection = par::clist_selection_number = true;
- par::clist_selection_string = a.value("--covar-number");
- }
-
- if ( a.find("--covar-name") )
- {
- par::clist_selection = par::clist_selection_name = true;
- par::clist_selection_string = a.value("--covar-name");
- }
-
- // Request to dump back out all covariates
- if (a.find("--write-covar"))
- {
- if (makedata)
- error("No need to specify --write-covar separately");
- if (!a.find("--covar"))
- error("You must specify a --covar {file} with --write-covar");
-
- if (a.find("--with-phenotype"))
- par::dump_covar_with_phenotype = true;
-
- if (a.find("--dummy-coding"))
- par::dump_covar_dummy_coding = true;
-
- par::dump_covar = true;
- par::clist = true;
- }
-
- if (a.find("--write-snplist"))
- par::write_snplist = true;
-
- if (a.find("--update-map"))
- {
- if (a.find("--update-cm"))
- par::update_cm = true;
- par::update_map = true;
- par::update_mapfile = a.value("--update-map");
- }
-
- // Examine only a subset of the data?
- if (a.find("--filter"))
- {
- par::filter_on_covar = true;
-
- vector<string> s = a.value("--filter",2);
-
- par::filter_filename = s[0];
- par::filter_value = getInt(s[1].c_str(),"--filter");
-
- }
-
- if (a.find("--mfilter"))
- {
- if (!a.find("--filter"))
- error("You can only specify --mfilter with --filter\n");
- par::mult_filter = a.value_int("--mfilter");
- }
-
- // Different species other than human?
- // i.e. alters chromosome definitions
-
- if (a.find("--dog")) par::species_dog = true;
- if (a.find("--cow")) par::species_cow = true;
- if (a.find("--sheep")) par::species_sheep = true;
- if (a.find("--horse")) par::species_horse = true;
-
- //////////////////////////////////
- // Multipoint and singlepoint
-
- // if (a.find("--singlepoint")) par::singlepoint = true;
-
- if (a.find("--fringe"))
- {
- par::singlepoint = false;
- par::fringe = a.value_double("--fringe");
- }
-
- if (a.find("--grid"))
- {
- par::singlepoint = false;
- par::grid = a.value_double("--grid");
- par::inter_grid = 0;
- }
-
- if (a.find("--step"))
- {
- par::singlepoint = false;
- par::inter_grid = a.value_int("--step");
- }
-
- if (a.find("--cm")) par::cm_map = true;
-
- if (a.find("--ci"))
- {
- par::display_ci = true;
- par::ci_level = a.value_double("--ci");
- if ( par::ci_level < 0.01 || par::ci_level >= 1 )
- error("CI level (--ci) must be between 0 and 1\n");
- par::ci_zt = ltqnorm( 1 - (1 - par::ci_level) / 2 );
- }
-
- if (a.find("--pfilter"))
- {
- par::pfilter = true;
- par::pfvalue = a.value_double("--pfilter");
- }
-
- if (a.find("--clump"))
- {
- par::clumpld = true;
- par::clumpld_results = a.value("--clump");
-
- if (a.find("--clump-field"))
- par::clumpld_column = a.value("--clump-field");
-
- if (a.find("--clump-verbose"))
- par::clumpld_verbose = true;
-
- if (a.find("--clump-p1"))
- par::clumpld_p1 = a.value_double("--clump-p1");
-
- if (a.find("--clump-p2"))
- par::clumpld_p2 = a.value_double("--clump-p2");
-
- if (a.find("--clump-r2"))
- par::clumpld_r2 = a.value_double("--clump-r2");
-
- if (a.find("--clump-kb"))
- par::clumpld_kb = a.value_int("--clump-kb") * 1000;
-
- if (a.find("--clump-index-first"))
- par::clumpld_index1 = true;
-
- if (a.find("--clump-replicate"))
- par::clumpld_only_show_replications = true;
-
- if (a.find("--clump-annotate"))
- {
- par::clumpld_annot = true;
- par::clumpld_annot_fields = a.value("--clump-annotate");
- }
-
- if (a.find("--clump-allow-overlap"))
- par::clumpld_indep = false;
-
- }
-
-
- if (a.find("--adjust"))
- { par::multtest = true; }
-
- if (a.find("--log10"))
- { par::logscale = true; }
-
- if (a.find("--qq-plot"))
- { par::qq_plot = true; }
-
- if (a.find("--lambda"))
- {
- par::fix_lambda = true;
- par::lambda = a.value_double("--lambda");
- if ( par::lambda < 1 )
- par::lambda = 1;
- }
-
- if (a.find("--gc"))
- {
- if (!a.find("--adjust"))
- error("Must specify --adjust to use --gc");
- par::use_GC = true;
- }
-
- ///////////////////////
- // Permutation options
-
-
-
- // Use permutations (default is adaptive)
- if (a.find("--perm"))
- {
- par::permute = true;
- }
-
- // Return counts not p-values (i.e. number of times exceeded)
- if ( a.find("--perm-count") )
- {
- par::perm_count = true;
- }
-
- // Specify parameters for adaptive permutation
- if (a.find("--aperm"))
- {
- if (a.find("--segment"))
- error("--segment options requires --pperm option");
-
- if (a.find("--set"))
- error("Cannot use --aperm with --set option (use --mperm N instead)");
-
- par::permute = true;
- par::adaptive_perm = true;
- vector<string> s = a.value("--aperm",6);
-
- par::adaptive_min = getInt(s[0].c_str(),"--aperm");
- par::adaptive_max = getInt(s[1].c_str(),"--aperm");
- par::adaptive_alpha = getDouble(s[2].c_str(),"--aperm");
- par::adaptive_ci = getDouble(s[3].c_str(),"--aperm");
- par::adaptive_interval = getInt(s[4].c_str(),"--aperm");
- par::adaptive_interval2 = getDouble(s[5].c_str(),"--aperm");
- }
-
-
- // Non-adaptive (maxT) permutations
- if (a.find("--mperm"))
- {
- par::permute = true;
- par::adaptive_perm = false;
- par::replicates = a.value_int("--mperm");
-
- if (a.find("--mperm-save"))
- par::mperm_save_best = true;
- else if (a.find("--mperm-save-all"))
- par::mperm_save_all = true;
-
-
- // But make special fix for QFAM tests
- if ( par::QTDT_test )
- {
- par::adaptive_perm = true;
- par::adaptive_min = par::replicates;
- par::adaptive_max = par::replicates;
- par::adaptive_alpha = 0;
- par::adaptive_ci = 0;
- par::adaptive_interval = par::replicates+1;
- par::adaptive_interval2 = 0;
- par::QFAM_adaptive = true;
- }
- }
-
-
- if (a.find("--rank"))
- {
- if (! a.find("--mperm") )
- error("--rank requires --mperm to be specified");
- par::mperm_rank = true;
- }
-
- // PLINK permutations
-
- if (a.find("--pperm"))
- {
- if (! ( a.find("--segment") || a.find("--read-segment") ) )
- error("--pperm options requires --segment or --read-segment option");
- par::permute = true;
- par::adaptive_perm = false;
- par::replicates = a.value_int("--pperm");
- }
-
-
- if (a.find("--p2"))
- {
- if ((!a.find("--perm"))
- && (!a.find("--mperm"))
- && (!a.find("--aperm")))
- error("--p2 option also requires--perm, --aperm or --mperm");
- else if (!a.find("--assoc"))
- error("The --p2 option can only be specified with --assoc");
- else par::assoc_test_alt_perm = true;
- }
-
-
- /////////////////////
- // Gene-dropping
-
- if (a.find("--genedrop"))
- par::perm_genedrop = par::permute = true;
-
- if (a.find("--swap-sibs"))
- {
- if (!a.find("--genedrop"))
- error("--swap-sibs only makes sense when --genedrop specified");
- par::perm_genedrop_sibships = true;
- par::perm_genedrop_and_swap = true;
- }
-
- if (a.find("--swap-parents"))
- {
- if (!a.find("--genedrop"))
- error("--swap-parents only makes sense when --genedrop specified");
- par::perm_genedrop_parents = true;
- par::perm_genedrop_and_swap = true;
- }
-
- if (a.find("--swap-unrel"))
- {
- if (!a.find("--genedrop"))
- error("--swap-unrel only makes sense when --genedrop specified");
-
- par::perm_genedrop_unrel = true;
- par::perm_genedrop_and_swap = true;
- }
-
-
- ///////////////////////
- // Misc. options
-
- if (a.find("--missing-genotype"))
- par::missing_genotype = a.value("--missing-genotype");
-
- if (a.find("--missing-phenotype"))
- par::missing_phenotype = a.value("--missing-phenotype");
-
- if (a.find("--FIX")) {
- par::FIXED = par::FIXED_p = true;
- vector<string> p = a.value("--FIX",4);
-
- par::FIX_IBD.z0 = getDouble(p[0].c_str(),"--FIX");
- par::FIX_IBD.z1 = getDouble(p[1].c_str(),"--FIX");
- par::FIX_IBD.z2 = getDouble(p[2].c_str(),"--FIX");
- par::FIX_p = getDouble(p[3].c_str(),"--FIX");
- cout << "Fixing Z0, Z1, Z2 and p to "
- << par::FIX_IBD.z0 << " "
- << par::FIX_IBD.z1 << " "
- << par::FIX_IBD.z2 << " "
- << par::FIX_p << "\n(p must refer to '1' allele in '1/2' genotype)\n";
- }
-
-
- if (a.find("--fix-ibd")) {
- par::FIXED = true;
- vector<string> p = a.value("--fix-ibd",3);
-
- par::FIX_IBD.z0 = getDouble(p[0].c_str(),"--fix-ibd");
- par::FIX_IBD.z1 = getDouble(p[1].c_str(),"--fix-ibd");
- par::FIX_IBD.z2 = getDouble(p[2].c_str(),"--fib-ibd");
-
- }
-
-
- if (a.find("--batch"))
- par::BATCH_SIZE = a.value_int("--batch");
-
- if (a.find("--min"))
- par::MIN_PIHAT = a.value_double("--min");
-
- if (a.find("--max"))
- par::MAX_PIHAT = a.value_double("--max");
-
- if (a.find("--all-pairs"))
- {
- if (a.find("--min"))
- error("Cannot specify --min and --all-pairs\n");
- par::include_all_pairs = true;
- }
-
-
-// if (a.find("--lock"))
-// { par::locked = true; }
-
-// if (a.find("--unlock"))
-// { par::locked = false; }
-
-
- ///////////////////////////////
- // Basic filters
-
- if (a.find("--all"))
- {
- par::min_af = 0.0;
- par::MAX_GENO_MISSING = 1;
- par::MAX_IND_MISSING = 1;
- }
-
- if (a.find("--geno"))
- par::MAX_GENO_MISSING = a.value_double("--geno");
-
- if (a.find("--mind"))
- par::MAX_IND_MISSING = a.value_double("--mind");
-
- if (a.find("--maf"))
- par::min_af = a.value_double("--maf");
-
- if (a.find("--max-maf"))
- {
- par::max_af = a.value_double("--max-maf");
- if (par::max_af < par::min_af)
- error("Cannot set --max-maf less than --maf\n");
- }
-
- if (a.find("--mhf"))
- par::min_hf = a.value_double("--mhf");
-
- if (a.find("--max-mhf"))
- {
- par::max_hf = a.value_double("--max-mhf");
- if (par::max_hf < par::min_hf)
- error("Cannot set --max-mhf less than --mhf\n");
- }
-
- if (a.find("--hwe"))
- {
- par::HWD_test = true;
- par::HWD_limit = a.value_double("--hwe");
- }
-
- if (a.find("--hwe2"))
- {
- par::HWD_test = true;
- par::HWD_standard = true;
- par::HWD_limit = a.value_double("--hwe2");
- }
-
- if (a.find("--hwe-all"))
- {
- par::HWD_filter_on_all = true;
- }
-
-
- if (a.find("--me"))
- {
- par::MENDEL_test = true;
- vector<string> s = a.value("--me",2);
- par::MENDEL_ind = getDouble(s[0],"--me");
- par::MENDEL_snp = getDouble(s[1],"--me");
- }
-
-
-
- //////////////////////////////////
- // IBS clustering
-
- if (a.find("--cluster"))
- {
-
- par::cluster = true;
-
- if (a.find("--within"))
- par::force_initial_cluster = true;
-
- }
-
- if (a.find("--euclidean"))
- {
- if (!a.find("--cluster"))
- error("Cannot specify --euclidean without --cluster");
- par::cluster_euclidean = true;
- }
-
-
-
- if (a.find("--pick1"))
- {
- par::cluster_selcon = true;
- par::cluster_selcon_file = a.value("--pick1");
- }
-
- if (a.find("--cluster-missing"))
- {
- par::cluster = true;
- par::cluster_missing = true;
- par::matrix = true;
-
- if (!a.find("--maf")) par::min_af = 0.0;
- if (!a.find("--geno")) par::MAX_GENO_MISSING = 1;
- if (!a.find("--mind")) par::MAX_IND_MISSING = 1;
-
- par::merge_p = 0;
- if (a.find("--ppc"))
- error("Cannot specify --ppc with --cluster-missing");
- }
-
- if (a.find("--K"))
- {
- if (!a.find("--cluster"))
- error("Must specify --cluster also if --K used");
- par::max_cluster_N = a.value_int("--K");
- }
-
- if (a.find("--neighbour"))
- {
- vector<string> s = a.value("--neighbour",2);
- par::min_neighbour = getInt(s[0].c_str(),"--neighbour");
- par::max_neighbour = getInt(s[1].c_str(),"--neighbour");
- par::outlier_detection = true;
- }
-
- if (a.find("--matrix")) par::matrix = true;
- if (a.find("--distance-matrix")) { par::distance_matrix = par::matrix = true; }
-
- if (a.find("--mds-plot"))
- {
- par::cluster_plot = true;
- par::cluster_mds_dim = a.value_int("--mds-plot");
- }
-
- if (a.find("--mds-cluster"))
- par::mds_by_individual = false;
-
- if (a.find("--pmerge"))
- error("--pmerge is depreciated: use --ppc instead\n");
-
- if (a.find("--ppc"))
- {
- if (!par::cluster) error("--ppc options requires --cluster");
- par::merge_p = a.value_double("--ppc");
- }
-
- if (a.find("--pibs-gap"))
- error("--pibs-gap is depreciated: please use --ppc-gap\n");
-
- if (a.find("--ppc-gap"))
- {
- par::ibstest_gap = 1000 * a.value_int("--ppc-gap");
- }
-
- if (a.find("--ibm"))
- {
- if (! a.find("--cluster"))
- error("Can only use --ibm with --cluster\n");
- par::cluster_ibm_constraint = true;
- par::cluster_ibm_constraint_value = a.value_double("--ibm");
- }
-
- if (a.find("--mc"))
- {
- if (!par::cluster) error("--mc options requires --cluster");
- par::max_cluster_size = a.value_int("--mc");
- }
-
- if (a.find("--cc"))
- {
- if (!par::cluster) error("--cc options requires --cluster");
- par::cluster_on_phenotype = true;
- }
-
- if (a.find("--mcc"))
- {
- if (!par::cluster) error("--mcc options requires --cluster");
- par::cluster_on_mcc = true;
- vector<string> s = a.value("--mcc",2);
- par::max_cluster_case = getInt(s[0].c_str(),"--mcc");
- par::max_cluster_control = getInt(s[1].c_str(),"--mcc");
- if (a.find("--mc") || a.find("-cc"))
- error("Cannot specify --mc N and/or --cc as well as --mcc N1 N2\n");
- }
-
-
-
- /////////////////////////////////
- // External criteria to match on
-
- // Categorical binary traits,
- // by default
- // e.g. { A, A } is a match and so are pairable
- // { A, B } is not
- //
- // if match-type file is also specifed, then matches
- // can potentially be otherwise, e.g.
- // { A, B } are pairable
- // { A, A } are not
-
- if (a.find("--match"))
- {
- par::bmatch = true;
- par::bmatch_filename = a.value("--match");
- }
-
- if (a.find("--match-type"))
- {
- if (!a.find("--match"))
- error("Must specify a --match {file} with the --match-type {file} option");
- par::bmatch_usertype = true;
- par::bmatch_direction_filename = a.value("--match-type");
- }
-
- // Quantitative trait match
- // Based on difference exceeding a certain threshold
- // e.g. (X-Y)>T => no match
- // (X-Y)<=T => match
-
- // T is specified by including an extra individual in the qmatch file
- // with the Family ID and Individual ID "_T_"
-
- if (a.find("--qmatch"))
- {
- if (!a.find("--qt"))
- error("You need to specify a --qt file when using --qmatch");
- par::qmatch_threshold_filename = a.value("--qt");
- par::qmatch = true;
- par::qmatch_filename = a.value("--qmatch");
- }
-
-
-
- //////////////////////////
- // Permutation clustering
-
- if (a.find("--family"))
- {
- par::sol_family = true;
- par::permute_within_sol = true;
- par::include_cluster = true;
- }
-
- if (a.find("--within"))
- {
- par::permute_within_sol = par::include_cluster = true;
- par::include_cluster_from_file = true;
- par::include_cluster_filename = a.value("--within");
- }
-
- if (a.find("--mwithin"))
- {
- if (!a.find("--within"))
- error("You can only specify --mwithin with --within");
- par::mult_clst = a.value_int("--mwithin");
- }
-
-
- //////////////////////////////////
- // Specific scan region selected
-
- if (!a.find("--from"))
- {
-
- // Specify a specific chromosome
- string c="0"; // Default all chromosomes
-
- if (a.find("--chr"))
- c = a.value("--chr");
-
- if (c=="X" || c=="x") par::run_chr = 23;
- else if (c=="Y" || c=="y") par::run_chr = 24;
- else par::run_chr = getInt(c,"--chr");
- }
-
- if (a.find("--from"))
- {
-
- if (!a.find("--to"))
- error("Must also specify --to {marker} when using --from {marker}");
-
- par::m1 = a.value("--from");
- par::m2 = a.value("--to");
- par::run_chr = -1;
- }
-
- if (a.find("--snp"))
- {
- par::m1 = a.value("--snp");
- par::m2 = a.value("--snp");
- par::run_chr = -1;
- }
-
- if (a.find("--snps"))
- {
- par::extract_set = true;
- par::snp_include_from_cl = true;
- par::snp_include_range = a.value("--snps");
-
- if ( a.find("--snp") || a.find("--window") || a.find("--extract") || a.find("--exclude") )
- error("Cannot specify multiple SNP-selection options with --snps");
-
- }
-
- if ( a.find("--d") )
- {
- par::range_delimiter = a.value("--d");
- if ( par::range_delimiter.length() > 1 )
- error("Range delimiter can only be 1 character");
- if ( par::range_delimiter == "," )
- error("Cannot set range delimiter to comma");
- }
-
- if (a.find("--window"))
- {
- if (!a.find("--snp")) error("Must specify --snp with --window");
- par::window = a.value_double("--window");
- }
-
- if (a.find("--from-bp"))
- {
- if (!a.find("--to-bp")) error("Must specify --to-bp with --from-bp");
- par::from_window = a.value_int("--from-bp");
- par::position_window = true;
- }
-
- if (a.find("--from-kb"))
- {
- if (!a.find("--to-kb")) error("Must specify --to-kb with --from-kb");
- par::from_window = int(a.value_double("--from-kb") * 1000);
- par::position_window = true;
- }
-
- if (a.find("--from-mb"))
- {
- if (!a.find("--to-mb")) error("Must specify --to-mb with --from-mb");
- double v = a.value_double("--from-mb");
- if (v>1000) error("Too large a value for --from-mb");
- par::from_window = int(v * 1000 * 1000);
- par::position_window = true;
- }
-
-
- if (a.find("--to-bp"))
- {
- if (!a.find("--from-bp")) error("Must specify --from-bp with --to-bp");
- par::to_window = a.value_int("--to-bp");
- par::position_window = true;
- }
-
- if (a.find("--to-kb"))
- {
- if (!a.find("--from-kb")) error("Must specify --from-kb with --to-kb");
- par::to_window = int(a.value_double("--to-kb") * 1000);
- par::position_window = true;
- }
-
- if (a.find("--to-mb"))
- {
- if (!a.find("--from-mb")) error("Must specify --from-mb with --to-mb");
- double v = a.value_double("--to-mb");
- if (v>1000) error("Too large a value for --to-mb");
- par::to_window = int(v * 1000 * 1000);
- par::position_window = true;
- }
-
- if (par::position_window)
- if (!a.find("--chr"))
- error("You must specify which chromosome (--chr N) also");
-
-
-
- ////////////////////////////////////////////
- // General warnings
-
- if ( a.find("--assoc") && a.find("--covar") )
- error("Cannot specify --covar with --assoc");
-
- if ( a.find("--model") && a.find("--covar") )
- error("Cannot specify --covar with --model");
-
- if ( a.find("--assoc") && a.find("--linear") )
- error("Cannot specify --assoc with --linear");
-
- if ( a.find("--assoc") && a.find("--logistic") )
- error("Cannot specify --assoc with --logistic");
-
- if ( a.find("--model") && a.find("--linear") )
- error("Cannot specify --model with --linear");
-
- if ( a.find("--model") && a.find("--logistic") )
- error("Cannot specify --model with --logistic");
-
- if ( a.find("--model") && a.find("--assoc") )
- error("Cannot specify --model with --assoc");
-
-
- /////////////////////////////////////////////
- // Help -- display all options
-
-
- if (a.find("--help") || a.find("-h"))
- {
-
- cout << "\n"
- << "Please visit the PLINK website for a complete list of options\n"
- << "\n"
- << "A few common options are listed here:\n"
- << "\n";
-
- cout << "plink --file {fileroot} Specify .ped and .map files \n"
- << " --bfile {fileroot} Specify .bed, .fam and .map \n"
- << "\n"
- << " --out {fileroot} Specify output root filename\n"
- << "\n"
- << " --missing-genotype {0} Missing genotype code \n"
- << " --missing-phenotype {-9} Missing phenotype code \n"
- << "\n"
- << " --pheno {phenofile} Specify .phe file \n"
- << " --within {file} Specify cluster file \n"
- << " --cov {covarfile} Specify .cov file \n"
- << "\n"
- << " --extract {snplist} Extract list of SNPs \n"
- << " --exclude {snplist} Exclude list of SNPs \n"
- << " --remove {indlist} Remove these individuals \n"
- << " --keep {indlist} Keep these individuals \n"
- << "\n"
- << " --make-bed Make .bed, .fam and .bim \n"
- << " --recode Output new PED and MAP files\n"
- << " --recode12 As above, with 1/2 alleles \n"
- << " --recodeAD As above, but: 1/0/-1, 0/1/0\n"
- << " --recodeA As above, but: 1/0/-1 only \n"
- << "\n"
- << " --snp {marker} Specify this single SNP \n"
- << " --snps {marker list} Specify list,range of SNPs \n"
- << " --window {kb} Select +/- kb around --snp \n"
- << " --chr {N} Analyse chromosome \n"
- << " --from-kb {KB} Start scan here (kilobase) \n"
- << " --to-kb {KB} End scan here \n"
- << "\n"
- << " --all Set filters to include all \n"
- << " --maf {0.01} Minor allele frequency \n"
- << " --geno {0.1} Maximum per-SNP missing \n"
- << " --mind {0.1} Maximum per-person missing \n"
- << "\n"
- << " --freq Output allele frequencies \n"
- << " --hardy Hardy-Weinberg tests \n"
- << " --missing Genotyping rate information \n"
- << " --het Individual inbreeding \n"
- << " --genome Genome-wide IBS/IBD \n"
- << " --cluster Perform IBS clustering \n"
- << "\n"
- << " --assoc Case/control, QT association\n"
- << " --model Full-model C/C association \n"
- << " --tdt Family-based TDT association\n"
- << " --linear Linear regression model \n"
- << " --logistic Logistic regression model \n"
- << "\n"
- << " --perm Apaptive permutations \n"
- << " --mperm {1000} max(T) permutations \n"
- << "\n"
- << " --hap {tagfilename} Multimarker predictor list \n"
- << " --hap-window {N} Phase sliding window \n"
- << " --hap-snps {snp list} Phase this set of SNPs \n"
- << " --hap-assoc Haplotype-based association \n"
- << " --hap-tdt Haplotype-based TDT \n"
- << " --chap Conditional haplotype tests \n"
- << " --hap-phase Report haplotype phases \n"
- << " --hap-freq Report haplotype frequencies\n"
- << "\n";
-
- cout << "\nPlease visit the PLINK website for a complete list of options\n\n";
-
- shutdown();
- }
-
-
- // By default, most tests are SNP major
-
- par::SNP_major = true;
-
- // Exceptions are:
- // TDT ( family structure confuses things)
- // Whole genome / IBS clustering
- // PLINK
-
- if (par::TDT_test ||
- par::MENDEL_test ||
- par::MENDEL_report ||
- par::genome_output ||
- par::cluster ||
- par::plink )
- par::SNP_major = false;
-
- if (a.find("--ind"))
- par::SNP_major = false;
-
-
- // If recoding data, the default will be not to set heterozygous
- // haploid genotypes to missing. Likewise for Mendel errors. Merge
- // operations will also specify a recode/make-bed, so they are also
- // captured here. The one special case where we want to allow to
- // preserve males hets on the X is the --check-sex
-
-
- if ( par::check_sex )
- par::preserve_all_genotypes = true;
-
- if ( par::write_bitfile ||
- par::recode ||
- par::recode_HV ||
- par::recode_whap ||
- par::recode_12 ||
- par::recode_AD )
- {
-
- // Unless flag given, these options will not replace haploid
- // heterozygotes with a missing genotype
-
- if ( a.find("--set-hh-missing") )
- par::preserve_all_genotypes = false;
- else
- par::preserve_all_genotypes = true;
-
- if ( a.find("--set-me-missing") )
- par::preserve_mendel_errors = false;
- else
- par::preserve_mendel_errors = true;
-
- }
-
-
-}
-
Deleted: trunk/packages/plink/trunk/perm.cpp
===================================================================
--- trunk/packages/plink/trunk/perm.cpp 2008-04-09 09:43:11 UTC (rev 1725)
+++ trunk/packages/plink/trunk/perm.cpp 2008-04-09 10:09:22 UTC (rev 1726)
@@ -1,510 +0,0 @@
-
-
-//////////////////////////////////////////////////////////////////
-// //
-// PLINK (c) 2005-2008 Shaun Purcell //
-// //
-// This file is distributed under the GNU General Public //
-// License, Version 2. Please see the file COPYING for more //
-// details //
-// //
-//////////////////////////////////////////////////////////////////
-
-
-#include "perm.h"
-#include "helper.h"
-#include <iostream>
-#include <cmath>
-#include <cstdlib>
-#include <algorithm>
-#include <vector>
-
-Perm::Perm(Plink & pref) : P(pref)
-{
-
- if (par::adaptive_perm)
- {
- adaptive = true;
- replicates = par::adaptive_max;
- }
- else
- {
- adaptive = false;
- replicates = par::replicates;
- }
-
- count = par::perm_count;
-
- min = par::adaptive_min;
-
- interval = par::adaptive_interval;
- performed = 0;
-
- dump_all = par::mperm_save_all;
- dump_best = par::mperm_save_best;
-
- if (dump_all)
- {
- PDUMP.open((par::output_file_name+".mperm.dump.all").c_str(),ios::out);
- }
- else if (dump_best)
- {
- PDUMP.open((par::output_file_name+".mperm.dump.best").c_str(),ios::out);
- }
-}
-
-
-void Perm::setTests(int x)
-{
-
- performed = 0;
-
- t = x;
- R.resize(t);
-
- if (adaptive)
- {
- N.resize(t);
- test.resize(t);
- for (int i=0; i<t; i++)
- {
- test[i] = true;
- R[i] = N[i] = 0;
- }
-
- // Given t tests, set the threshold to be
- // p +/- Phi^{-1} (1 - \gamma/2t ) sqrt( p(1-p)/N )
- zt = ltqnorm( 1 - par::adaptive_ci / ( 2 * t ) ) ;
-
- }
- else
- {
- maxR.resize(t);
- for (int i=0; i<t; i++)
- R[i] = maxR[i] = 0;
- }
-
- // For gene-dropping, set up some family-information
- if (par::perm_genedrop)
- preGeneDrop();
-
-}
-
-void Perm::originalOrder()
-{
- for (int i=0; i<P.n; i++)
- P.sample[i]->pperson = P.sample[i];
-}
-
-
-bool Perm::finished()
-{
- if (performed>=replicates) return true;
- else return false;
-}
-
-
-void Perm::permuteInCluster()
-{
-
- // Store remapped IDs
- vector<vector< long int> > i(ns);
-
- // Permute phenotypes, within cluster
- for (int k=0; k<ns; k++)
- {
- vector<long int> p(s[k].size());
- permute(p);
- i[k]=p;
- }
-
- //////////////////////////
- // Post-permutation:
- // Iterate over clusters { s[][] }
- // i[][] holds the permuted codes
- // s[][] points to individuals (non-missing)
-
- // Genotype = sample[s[j][k]];
- // Matching phenotype = sample[s[j][(int)i[j][k]]];
-
- // Create pheno[] with label-swapped codes
- for (int j=0; j<s.size(); j++)
- for (int k=0; k<s[j].size(); k++)
- P.sample[s[j][k]]->pperson = P.sample[s[j][(int)i[j][k]]];
-
-}
-
-void Perm::setPermClusters(Plink & P)
-{
-
- // Permute within clusters only
- // (stored in sample[i]->sol)
-
- // Get list of non-missing individuals, and number of solutions
- // These are always numbered 0,1,2,3,..
-
- // -1 indicates do not permute this individual
- // 0..ns indicate cluster numbers
-
- // Count the number of clusters: 'ns'
-
- ns=-1;
- for (int i=0; i<P.n; i++)
- if ((!P.sample[i]->missing) && P.sample[i]->sol > ns)
- ns=P.sample[i]->sol;
- ns++;
-
- // store set membership is 's'
-
- s.resize(ns);
- for (int i=0; i<P.n; i++)
- if (!P.sample[i]->missing && P.sample[i]->sol>=0)
- s[P.sample[i]->sol].push_back(i);
-
- pheno.resize(P.n);
-
- if (par::permute && ! par::QTDT_test )
- P.printLOG("Set to permute within "+int2str(ns)+" cluster(s)\n");
-
-}
-
-void Perm::setOriginalRanking(vector_t & original)
-{
- vector<Pair2> o;
- for (int i=0; i<original.size(); i++)
- {
- Pair2 p;
- p.p = original[i];
- p.l = i;
- o.push_back(p);
- }
-
- sort(o.begin(),o.end());
-
- order.clear();
- reorder.resize(original.size());
- for(int i=0; i<original.size(); i++)
- {
- order.push_back(o[i].l);
- reorder[o[i].l] = i;
- }
-}
-
-
-
-bool Perm::update(vector<double> & result, vector<double> & original)
-{
-
- // Increment number of permutations performed
- performed++;
-
- // Finished all perms?
- bool done = false;
-
- //////////////////////////////
- // Update number of successes
-
- if (!adaptive)
- {
- for (int l=0; l<t; l++)
- if (result[l] >= original[l] || !realnum(original[l]) ) R[l]++;
- }
- else
- {
- for (int l=0; l<t; l++)
- if (test[l])
- {
- if (result[l] >= original[l] || !realnum(original[l]) ) R[l]++;
- N[l]++;
- }
- }
-
-
- // Stopping rules for adaptive permutation?
- int todo = 0;
- if (adaptive &&
- performed > min &&
- performed % interval == 0)
- {
-
- // Update interval
- interval = (int)(par::adaptive_interval + performed * par::adaptive_interval2);
-
- // Consider each test
- for (int l=0; l<t; l++)
- {
- if (test[l])
- {
-
- // Check for at least one success
- if (R[l]>0)
- {
- double pv = (double)(R[l]+1)/(double)(performed+1);
-
- double sd = sqrt( pv * (1-pv) / performed );
- double lower = pv - zt * sd;
- double upper = pv + zt * sd;
- //double cv = sd/(performed*pv);
- if (lower<0) lower = 0;
- if (lower>1) upper = 0;
-
- // Is lower bound greater than threshold, or
- // upper bound smaller than threshold?
- if (lower > par::adaptive_alpha || upper < par::adaptive_alpha )
- {
- N[l] = performed;
- test[l] = false;
- }
-
- else
- todo++;
- }
- else todo++;
- }
- }
-
- if (!par::silent)
- cout << "Adaptive permutation: "
- << performed
- << " of (max) "
- << replicates
- << " : "
- << todo
- << " SNPs left"
- << " \r";
-
- if (todo==0) done = true;
- }
-
-
- ///////////////////////////////////////////////////////////
- // For non-adaptive permutation, keep track of the maximum
-
- if (!adaptive)
- {
-
- if (dump_all)
- {
- if (performed==1)
- {
- PDUMP << 0 << " ";
- for (int l=0; l<t; l++)
- PDUMP << original[l] << " ";
- PDUMP << "\n";
- }
- PDUMP << performed << " ";
- }
-
-
- if (dump_best)
- {
- if (performed==1)
- {
- PDUMP << 0 << " ";
-
- double mx=0;
- for (int l=0; l<t; l++)
- if (original[l]>mx)
- if ( realnum(mx))
- mx=original[l];
- PDUMP << mx << "\n";
- }
- PDUMP << performed << " ";
- }
-
-
- // Find maximum, or sort all results
-
- double mx=0;
-
- if ( par::mperm_rank )
- {
- // Ranked permutation
- // populate mx vector
-
- // Set any NA to 0
- for (int l=0; l<t; l++)
- if ( ! realnum(result[l]) )
- result[l] = 0;
-
- // Sort (ascending)
- sort(result.begin(),result.end());
-
- // Save max in any case
- mx = result[result.size()-1];
- }
- else
- {
- // Standard max(T)
- // populate single mx variable
- for (int l=0; l<t; l++)
- if (result[l]>mx)
- if ( realnum(mx) )
- mx=result[l];
- }
-
-
- if (dump_best)
- PDUMP << mx << "\n";
- else if (dump_all)
- {
- for (int l=0; l<t; l++)
- PDUMP << result[l] << " ";
- PDUMP << "\n";
- }
-
-
- // Max(T) permutation -- compare against best
-
- if ( ! par::mperm_rank )
- {
- for (int l=0; l<t; l++)
- if (mx >= original[l] || !realnum(original[l]) ) maxR[l]++;
- }
-
- // Rank(T) permutation -- compare against similar rank
-
- else
- {
- for (int l=0; l<t; l++)
- {
- //cout << "P " << order[l] << " " << original[order[l]] << " , pr = " << result[l] << " ";
-
- if (result[l] >= original[order[l]] || !realnum(original[order[l]]) )
- maxR[order[l]]++;
-
- }
- }
-
-
-
- if (!par::silent)
- {
- cout << "maxT permutation: "
- << performed
- << " of "
- << par::replicates
- << " \r";
- cout.flush();
- }
- }
-
- // Have we hit the maximum number of replicates?
- if (performed>=replicates) done = true;
-
- return done;
-}
-
-void Perm::nextSNP()
-{
- // Reset Perm class for next SNP when in adaptive, SNP-by-SNP mode
- performed = 0;
- originalOrder();
-}
-
-bool Perm::updateSNP(double result, double original, int l)
-{
-
- /////////////////////////////////////////
- // Single SNP adaptive permutation update
- /////////////////////////////////////////
-
- // Increment number of permutations performed for this SNP
- performed++;
-
- // Finished all perms for this SNP?
- bool done = false;
-
- //////////////////////////////
- // Update number of successes
-
- if (test[l])
- {
- if (result >= original || !realnum(original) ) R[l]++;
- N[l]++;
- }
-
-
- // Stopping rules for adaptive permutation?
-
- if (adaptive &&
- performed > min &&
- performed % interval == 0)
- {
-
- // Update interval
- interval = (int)(par::adaptive_interval + performed * par::adaptive_interval2);
-
- // Consider this specific SNP
- if (test[l])
- {
-
- // Check for at least one success
- if (R[l]>0)
- {
-
- double pv = (double)(R[l]+1)/(double)(performed+1);
-
- double sd = sqrt( pv * (1-pv) / performed );
- double lower = pv - zt * sd;
- double upper = pv + zt * sd;
- //double cv = sd/(performed*pv);
- if (lower<0) lower = 0;
- if (lower>1) upper = 0;
-
- // Is lower bound greater than threshold, or
- // upper bound smaller than threshold?
- if (lower > par::adaptive_alpha || upper < par::adaptive_alpha )
- {
- N[l] = performed;
- test[l] = false;
- done = true;
- }
- }
- }
-
- }
-
- // Have we hit the maximum number of replicates?
- if (performed>=replicates) done = true;
-
- return done;
-}
-
-
-int Perm::rank(int l)
-{
- if ( ! par::mperm_rank )
- return 0;
- else
- return t - reorder[l];
-
-}
-
-double Perm::pvalue(int l)
-{
- if (count)
- return (double)R[l];
- if (adaptive)
- return (double)(R[l]+1) / (double)(N[l]+1);
- else
- return (double)(R[l]+1) / (double)(replicates+1);
-}
-
-
-double Perm::max_pvalue(int l)
-{
- if (adaptive)
- return -1;
- else
- return (double)(maxR[l]+1) / (double)(replicates+1);
-}
-
-int Perm::reps_done(int l)
-{
- if (adaptive)
- return N[l];
- else
- return replicates;
-}
-
Deleted: trunk/packages/plink/trunk/plink.log
===================================================================
--- trunk/packages/plink/trunk/plink.log 2008-04-09 09:43:11 UTC (rev 1725)
+++ trunk/packages/plink/trunk/plink.log 2008-04-09 10:09:22 UTC (rev 1726)
@@ -1,13 +0,0 @@
-
- at ----------------------------------------------------------@
-| PLINK! | v1.02 | 27/Mar/2008 |
-|----------------------------------------------------------|
-| (C) 2008 Shaun Purcell, GNU General Public License, v2 |
-|----------------------------------------------------------|
-| For documentation, citation & bug-report instructions: |
-| http://pngu.mgh.harvard.edu/purcell/plink/ |
- at ----------------------------------------------------------@
-
-
-Analysis finished: Wed Apr 9 00:35:19 2008
-
Deleted: trunk/packages/plink/trunk/prephap.cpp
===================================================================
--- trunk/packages/plink/trunk/prephap.cpp 2008-04-09 09:43:11 UTC (rev 1725)
+++ trunk/packages/plink/trunk/prephap.cpp 2008-04-09 10:09:22 UTC (rev 1726)
@@ -1,657 +0,0 @@
-
-
-//////////////////////////////////////////////////////////////////
-// //
-// PLINK (c) 2005-2007 Shaun Purcell //
-// //
-// This file is distributed under the GNU General Public //
-// License, Version 2. Please see the file COPYING for more //
-// details //
-// //
-//////////////////////////////////////////////////////////////////
-
-
-#include <iostream>
-#include <iomanip>
-#include <fstream>
-#include <sstream>
-#include <cmath>
-#include <vector>
-#include <map>
-#include <cstdlib>
-
-#include "plink.h"
-#include "options.h"
-#include "phase.h"
-#include "helper.h"
-#include "nlist.h"
-
-extern ofstream LOG;
-
-using namespace std;
-
-// Format for haplotype file: format 1:
-// SNP_ID CHR CM BP A1 A2 HAP SNP1 SNP2 ...
-
-// i.e. length of Pred_allele indicate how many SNPs to expect:
-// rs10001 5 0 10203 A G TTC rs001 rs002 rs003
-
-// Alternatively: using wild cards: format 2:
-// will name haplotype "H1_TTC_", "H1_CTT_", "H2_AA_", etc
-// * rs001 rs002 rs003
-// * rs002 rs004
-
-// Alternatively: using wild cards: format 3:
-// will name haplotype "MYHAP1_1_TTC_", "MYHAP1_1_CTT_", "GENEB_2_AA_", etc
-// ** MYHAP1 rs001 rs002 rs003
-// ** GENEB rs002 rs004
-
-// Alternatively: using --whap weighted multimarker tests
-// rs10001 5 0 10203 A G rs001 rs002 rs003 / TTC / CCT 0.9 / TCT 0.1
-// i.e. "/" separator used, if number omitted, then assume PP=1
-
-void HaploPhase::readTagFile()
-{
-
- P.printLOG("\n");
-
- ///////////////////////////////////////////////
- // Haplotype inference is a SNP-major function
-
- if (!par::SNP_major) P.Ind2SNP();
-
-
- ////////////////////////
- // Lookup table for SNPs
-
- map<string,int> mlocus;
- map<string,int>::iterator ilocus;
-
- for (int l=0;l<P.nl_all;l++)
- mlocus.insert(make_pair(P.locus[l]->name,l));
-
-
- ///////////////////
- // Affection coding
-
- if (par::bt) affCoding(P);
-
-
- ////////////////////////////////
- // Read list of tags/haplotypes
-
- checkFileExists(par::tagfile);
- ifstream TAG(par::tagfile.c_str(), ios::in);
- TAG.clear();
-
- string f2 = par::output_file_name + ".mishap";
- ofstream MISHAP(f2.c_str(), ios::out);
- bool all_okay = true;
-
- // Count of new haplotypes we want to infer
- int hc=1;
-
- while(!TAG.eof())
- {
- char c[500000];
- TAG.getline(c,500000,'\n');
-
- string l = c;
-
- // Catch blank lines or DOS carriage-returns
- if (l=="" || l=="\r") continue;
-
- // Tokenize line
- string buf;
- stringstream ss(l);
- vector<string> tokens;
- while (ss >> buf)
- tokens.push_back(buf);
-
- // whitepsace line?
- if (tokens.size() == 0)
- continue;
-
- // tokens[0] predicted SNP rs#
- // tokens[1] predicted SNP chromosome
- // tokens[2] predicted SNP Morgan position
- // tokens[3] predicted SNP base pair position
- // tokens[4] predicted SNP allele 1
- // tokens[5] predicted SNP allele 2
- // tokens[6] tag allele (-> allele 1)
- // tokens[7+] predictor rs#(s)
-
-
- // If we see one or more wildcard specifications, then
- // automatically set phase_all_haps to be true;
-
- if ( tokens[0] == "*" )
- {
- // Wildcard format
-
- if ( tokens.size() == 1 )
- error("Problem with " +
- par::tagfile + " line\n" + l
- + "\n: must have atleast one SNP list\n");
-
- par::phase_hap_all = true;
- }
- else if ( tokens[0] == "**")
- {
- // Wildcard2 format
-
- if (tokens.size() == 2 )
- error("Problem with "
- + par::tagfile + " line\n" + l
- + "\n: must have atleast one SNP list\n");
-
- par::phase_hap_all = true;
-
- }
- else if ( ! par::weighted_mm )
- {
- // Standard format
-
- if (tokens.size() < 8 )
- {
- string e = "Problem with " +
- par::tagfile + " line\n"
- + l +
- "\n (expecting at least 8 items, or to start with */** wildcard)\n";
- error(e);
- }
- }
- else
- {
- // Weighted multi-marker format
-
-
- if (tokens.size() < 9 )
- {
- string e = "Problem with " + par::tagfile + " line\n"
- + l
- + "\n (expecting at least 9 items for --whap format file)\n";
- error(e);
- }
-
-
- }
-
- if (tokens[0].substr(tokens[0].size()-1) == "_" )
- error("Cannot use '_' in tag/haplotype name: reserved for wildcards\n");
-
-
- int len; // length of haplotype
- vector<int> locusList; // list of predictor #s
-
-
- // Is this particular line a wildcard? okay?
- bool wildcard = tokens[0] == "*" || tokens[0] == "**" ? true : false ;
- string wildname = "H";
-
- // Take the name from the second position? (** wildcard?)
- if ( tokens[0] == "**" )
- {
- wildname = tokens[1]+"_";
- tokens.erase(tokens.begin()+1);
- }
-
- bool okay = true;
-
-
-
-
- /////////////////////////////////
- // Fully-specified haplotype
-
- if (!wildcard)
- {
-
- int offset = 7;
- if ( par::weighted_mm ) offset = 6;
-
- if ( par::weighted_mm )
-
- {
- // Find first "/" separator
- int sep = 6;
- while ( tokens[++sep] != "/" ) { }
- len = sep - 6;
- }
- else
- {
- len = tokens[6].length();
-
- if (len != tokens.size() - offset )
- {
- string e = "Problem with " +
- par::tagfile + " line\n" + l + "\n";
- error(e);
- }
- }
-
-
- //////////////////////
- // Lookup locus name
-
- for (int i=0; i<len; i++)
- {
- ilocus = mlocus.find(tokens[i+offset]);
- if (ilocus != mlocus.end())
- {
- locusList.push_back(ilocus->second);
- }
- else
- {
- MISHAP << "NOSNP\t" << tokens[0]
- << "\t" << tokens[i+offset] << "\n";
- okay = false;
- }
- }
-
-
- /////////////////////////////////
- // Check specified alleles exist
-
- if (okay)
- {
-
- // Just one haplotype to check for non-weighted test version
-
- if ( ! par::weighted_mm )
- {
- for (int s=0;s<len;s++)
- if ( ! ( P.locus[locusList[s]]->allele1
- == tokens[6].substr(s,1)
- || P.locus[locusList[s]]->allele2
- == tokens[6].substr(s,1) ) )
- {
- MISHAP << "NOALLELE\t" << tokens[0]
- << "\t" << P.locus[locusList[s]]->name
- << "\t" << tokens[6] << "\n";
- okay = false;
- }
- }
-
- // Otherwise, we must parse through and check each
- else
- {
-
- int allele = 5 + len + 2;
-
- for ( int i = allele ; i < tokens.size() ; i++ )
- {
- // Is this a haplotype?
- if ( i == allele || tokens[i-1] == "/" )
- {
- for (int s=0;s<len;s++)
- if ( ! ( P.locus[locusList[s]]->allele1
- == tokens[i].substr(s,1)
- || P.locus[locusList[s]]->allele2
- == tokens[i].substr(s,1) ) )
- {
- MISHAP << "NOALLELE\t" << tokens[0]
- << "\t" << P.locus[locusList[s]]->name
- << "\t" << tokens[i] << "\n";
- okay = false;
- }
- }
- }
- }
-
- }
-
-
- }
- else
- {
- /////////////////////////////////
- // Wildcard selection
-
- len = tokens.size()-1;
-
- // Lookup locus name
- for (int i=0; i<len; i++)
- {
- ilocus = mlocus.find(tokens[i+1]);
- if (ilocus != mlocus.end())
- {
- locusList.push_back(ilocus->second);
- }
- else
- {
- MISHAP << "NOSNP\t" << tokens[i+1] << "\n";
- okay = false;
- }
- }
- }
-
-
-
- //////////////////////////////////////
- // Should we try to add this haploype
-
- if (!okay)
- {
- all_okay = false;
- continue;
- }
-
-
- ////////////////////////////////////////////////////////
- // Check that all predictors are on the same chromosome
-
- if (!wildcard)
- {
- for (int ck=0; ck<len; ck++)
- if (P.locus[locusList[ck]]->chr != atoi(tokens[1].c_str()))
- {
- MISHAP << "DIFF_CHR\t" << P.locus[locusList[ck]]->name
- << "\t" << tokens[0] << "\n";
- okay = false;
- }
- }
- else
- {
- for (int ck=0; ck<len-1; ck++)
- if (P.locus[locusList[ck]]->chr != P.locus[locusList[ck+1]]->chr)
- {
- MISHAP << "DIFF_CHR\t" << P.locus[locusList[ck]]->name
- << "\t" << P.locus[locusList[ck+1]]->name << "\n";
- okay = false;
- }
- }
-
- if (!okay)
- {
- all_okay = false;
- continue;
- }
-
-
- ///////////////////////////////////////
- // Standard approach -- only one entry
-
- if ( ( !wildcard) && (!par::weighted_mm) )
- {
-
- // Add numbers for predictors to list
- new_pred_locus.push_back(locusList);
-
- // Add which allele to look for (corresponding to allele1)
- new_pred_allele.push_back(tokens[6]);
-
- // Make new entry in MAP file
- Locus * loc = new Locus;
- loc->name = tokens[0];
- loc->chr = getChromosomeCode( tokens[1] );
- loc->pos = atof(tokens[2].c_str());
- loc->bp = atoi(tokens[3].c_str());
- loc->allele1 = tokens[4];
- loc->allele2 = tokens[5];
-
- // Add this new locus to the list
- new_map.push_back(loc);
- }
- else if ( ( !wildcard ) && par::weighted_mm )
- {
-
- ///////////////////////////////////////
- // Weighted MM test -- only one entry
-
- // Add numbers for predictors to list
- new_pred_locus.push_back(locusList);
-
- // Add which allele(s) to look for (corresponding to allele 1)
- // and then the corresponding weights
-
- map<string,double> whap;
-
- int index = 5 + len + 2;
- while ( index < tokens.size() )
- {
- // Read haplotype, then optionally a weight
-
- if ( tokens[index].size() != len )
- error("Problem with " + par::tagfile + " line\n" + l + "\n");
-
- // Read weight, or advance to next haplotype?
- if ( index == tokens.size() - 1 )
- {
- whap.insert(make_pair( tokens[index] , 1 ) );
- break;
- }
- else if ( tokens[index+1] == "/" )
- {
- whap.insert(make_pair( tokens[index] , 1 ) );
- index += 2;
- }
- else
- {
- double w = atof(tokens[index+1].c_str());
- if ( ( !realnum(w) ) ||
- w < 0 || w > 1 )
- error("Problem with specified weight in line:\n"+l+"\n");
- whap.insert(make_pair( tokens[index] , w ) );
- index += 3;
- }
- }
-
-
- new_pred_weighted_allele.push_back(whap);
-
- // Make new entry in MAP file
- Locus * loc = new Locus;
- loc->name = tokens[0];
- loc->chr = getChromosomeCode( tokens[1] );
- loc->pos = atof(tokens[2].c_str());
- loc->bp = atoi(tokens[3].c_str());
- loc->allele1 = tokens[4];
- loc->allele2 = tokens[5];
-
- // Add this new locus to the list
- new_map.push_back(loc);
-
-
-
- }
- else
- {
-
- ////////////////////////////////////////////
- // Wildcard approach -- just a single entry
-
- // Add numbers for predictors to list
- new_pred_locus.push_back(locusList);
-
- // Put in a dummy allele code (we ignore this...)
- string hstr="";
- for (int s=0;s<S.size();s++)
- hstr += P.locus[locusList[s]]->allele1;
-
- new_pred_allele.push_back(hstr);
-
- // Make new entry in MAP file
-
- Locus * loc = new Locus;
- loc->name = wildname +int2str(hc)+"_DUMMY_"+hstr+"_";
- loc->chr = P.locus[locusList[0]]->chr;
- loc->pos = 0;
- loc->bp = hc;
- loc->allele1 = "1";
- loc->allele2 = "2";
-
- // Add this new locus to the list
- new_map.push_back(loc);
-
-
- }
-
- // Increment new haplotype count
- hc++;
-
- // Read next in TAG file
-
- }
-
- TAG.close();
- MISHAP.close();
-
- if (!all_okay)
- P.printLOG("Warning: misspecified haplotypes found: listed in [ " + f2 + " ]\n");
-
-
- // End of reading haplotype list -- did we encounter any problems?
-
- P.printLOG("Read " + int2str(new_map.size()) +
- " haplotypes from [ " + par::tagfile + " ]\n");
-
-}
-
-
-
-void HaploPhase::makeSlidingWindow(int winsize, int winstep)
-{
-
- P.printLOG("\n");
-
- ///////////////////////////////////////////////
- // Haplotype inference is a SNP-major function
-
- if (!par::SNP_major)
- P.Ind2SNP();
-
-
- //////////////////////////////////
- // Set up haplotype entries
-
- int w=1;
-
- int start = 0;
-
- while ( 1 )
- {
-
- // Beyond last SNP?
-
- if ( start >= P.nl_all )
- break;
-
- // Make a window, as large as possible
- vector<int> snps;
-
- // Make sure it is restricted to one chromosome
- int chr = P.locus[start]->chr;
-
- bool fail = false;
- bool newChromosome = false;
- int actualStop = start;
-
- // Add SNPs to window
-
- for (int s = start; s < start + winsize; s++)
- {
-
- // No more SNPs left
- if ( s == P.nl_all )
- {
- fail = true;
- break;
- }
-
- // Next chromosome?
- if ( P.locus[s]->chr != chr )
- {
- newChromosome = true;
- actualStop = s;
- break;
- }
-
- snps.push_back(s);
- actualStop = s;
- }
-
- // Have we come up to the end of this chromosome in an exact number?
- if ( actualStop == P.nl_all-1 )
- {
- fail = true;
- }
- else if ( ( ! newChromosome ) && P.locus[actualStop+1]->chr != chr )
- {
- newChromosome = true;
- actualStop++;
- }
-
-
- // Finished constructing this particular window:
- // do we have anything to add?
-
- if ( snps.size() == 0 )
- {
- if ( newChromosome )
- start = actualStop;
- else
- start += winstep;
- continue;
- }
-
- Locus * tmploc = new Locus;
- tmploc->name = "WIN"+int2str(w++);
- tmploc->chr = P.locus[start]->chr;
- tmploc->pos = P.locus[start]->pos;
- tmploc->bp = P.locus[start]->bp;
- tmploc->allele1 = "1";
- tmploc->allele2 = "2";
- new_pred_locus.push_back(snps);
-
- new_map.push_back(tmploc);
- new_pred_allele.push_back("");
-
-
- // Advance window
-
- if ( fail )
- break;
-
- if ( newChromosome )
- start = actualStop;
- else
- start += winstep;
-
- }
-
- P.printLOG("Created " + int2str(w-1) + " sliding windows\n");
-
-}
-
-
-void HaploPhase::setSpecificSNPs(string snps)
-{
-
- map<string,int> mapping;
- for (int l=0; l<P.nl_all;l++)
- mapping.insert(make_pair(P.locus[l]->name,l));
-
- NList nl(P.nl_all,true);
- vector<int> snplist = nl.deparseStringList(snps,&mapping);
-
- if (snplist.size() == 0 )
- return;
-
- int start = snplist[0];
-
- new_pred_locus.push_back(snplist);
-
- Locus * tmploc = new Locus;
- tmploc->name = "WIN1";
- tmploc->chr = P.locus[start]->chr;
- tmploc->pos = P.locus[start]->pos;
- tmploc->bp = P.locus[start]->bp;
- tmploc->allele1 = "1";
- tmploc->allele2 = "2";
- new_map.push_back(tmploc);
-
- new_pred_allele.push_back("");
-
- return;
-}
Deleted: trunk/packages/plink/trunk/proxy.cpp
===================================================================
--- trunk/packages/plink/trunk/proxy.cpp 2008-04-09 09:43:11 UTC (rev 1725)
+++ trunk/packages/plink/trunk/proxy.cpp 2008-04-09 10:09:22 UTC (rev 1726)
@@ -1,1594 +0,0 @@
-
-
-//////////////////////////////////////////////////////////////////
-// //
-// PLINK (c) 2005-2008 Shaun Purcell //
-// //
-// This file is distributed under the GNU General Public //
-// License, Version 2. Please see the file COPYING for more //
-// details //
-// //
-//////////////////////////////////////////////////////////////////
-
-
-#include <algorithm>
-#include <iostream>
-#include <iomanip>
-#include <string>
-#include <set>
-#include <cmath>
-#include "options.h"
-#include "helper.h"
-#include "plink.h"
-#include "phase.h"
-#include "model.h"
-#include "linear.h"
-
-//////////////////////////////////////////
-// Helper classes to store and sort proxies
-
-class ProxyResult {
-public:
-
- string name;
- double f;
- double r2;
- double odds;
- double chisq;
- double pvalue;
-
- ProxyResult(string n, double frq, double r, double o, double c, double p)
- : name(n), f(frq), r2(r), odds(o), chisq(c), pvalue(p) { }
-
- bool operator< (const ProxyResult & b) const
- {
- return ( pvalue == b.pvalue ?
- name < b.name :
- pvalue < b.pvalue );
- }
-};
-
-
-class LDPair {
-public:
- int s1;
- int s2;
- double ld;
- bool operator< (const LDPair & b) const
- {
- return ld > b.ld;
- }
-
-
-};
-
-
-///////////////////////////////////////////////
-// Helper function to find n of m combinations
-
-void combinations_recursive(const vector<int> &elems,
- unsigned long req_len,
- vector<unsigned long> &pos,
- unsigned long depth,
- unsigned long margin,
- vector<vector<int> > & collection)
-{
-
- if (depth >= req_len) {
- vector<int> t;
- for (unsigned long ii = 0; ii < pos.size(); ++ii)
- t.push_back( elems[pos[ii]] );
- collection.push_back(t);
- return;
- }
-
- if ((elems.size() - margin) < (req_len - depth))
- return;
-
- for (unsigned long ii = margin; ii < elems.size(); ++ii) {
- pos[depth] = ii;
- combinations_recursive(elems, req_len, pos, depth + 1, ii + 1, collection);
- }
- return;
-}
-
-
-///////////////////////////////////////////////
-// For locus l, main proxy association function
-
-void Plink::performProxyTests(int l)
-{
-
- // Consider a particular SNP, l, and
- // a) form haplotypes surrounding SNPs
- // b) calculate association with phenotype for these SNPs
- // c) calculate r^2 (haplotypic) with allele and haplotypes
-
- // Form haplotypes based on the surrounding SNPs
- // Form phenotype based on the patterns of missingness for test SNP
- // Is there any association?
-
- bool old_silent = par::silent;
-
-
- ////////////////////////////////////////////////////////////////
- // If we are in 'impute' mode: this is to evaluate imputation, in a
- // 'leave-one-out' manner; here we assume the reference panel is
- // coded as 'missing phenotype' and the rest of the sample is coded
- // as non-missing phenotype; so we must first blank out (but later
- // replace) any genotype data for these individuals
-
- vector<bool> tmp1, tmp2;
-
- if ( par::proxy_impute || par::proxy_leave_out )
- {
-
- // Pretend that we do not have these genotypes, except for the
- // reference panel (i.e. individuals with a missing phenotype)
-
- for ( int i = 0 ; i < n ; i++ )
- if ( ! sample[i]->missing )
- {
- tmp1.push_back( SNP[l]->one[i] );
- tmp2.push_back( SNP[l]->two[i] );
-
- SNP[l]->one[i] = true;
- SNP[l]->two[i] = false;
- }
- }
-
-
- ///////////////////////
- // Form test haplotypes
-
- CSNP * s = SNP[l];
-
- vector<int> proxyHaplotypePlusSNP;
-
-
- // Add reference SNP
-
- proxyHaplotypePlusSNP.push_back(l);
-
-
- // Either a fixed maximum number of SNPs left and right (allowing
- // for different filters, and chromosome ends) or read from a file
- // (these SNPs must be on same chromosome)
-
- if ( par::proxy_list )
- {
- checkFileExists( par::proxy_list_file );
- printLOG("Reading proxy list from [ "
- + par::proxy_list_file + " ]\n");
- ifstream PL( par::proxy_list_file.c_str(), ios::in);
-
- map<string,int> mlocus;
- for (int j=0;j<nl_all;j++)
- mlocus.insert(make_pair(locus[j]->name,j));
-
- while ( ! PL.eof() )
- {
- string psnp;
- PL >> psnp;
-
- if ( psnp == "" )
- continue;
-
- map<string,int>::iterator m = mlocus.find( psnp );
-
- if ( m == mlocus.end() )
- continue;
-
- // Add if okay w/ MAF and genotyping thresholds
-
- int pn = m->second;
-
- if ( pn != l &&
- locus[pn]->chr == locus[l]->chr &&
- locus[pn]->freq >= par::proxy_maf &&
- abs(double((locus[l]->bp - locus[pn]->bp)/1000.0))
- <= par::proxy_kb &&
- locus[pn]->pos <= par::proxy_geno )
- {
- proxyHaplotypePlusSNP.push_back( pn );
- }
-
- }
-
- PL.close();
- }
-
- else // ... use window approach
- {
-
- int i = l-1;
- int added = 0;
-
- while ( added < par::proxy_window )
- {
- if ( i >= 0 &&
- locus[i]->chr == locus[l]->chr )
- {
-
- // Add MAF and genotyping thresholds here
- if ( locus[i]->freq >= par::proxy_maf &&
- abs(double((locus[l]->bp - locus[i]->bp)/1000.0))
- <= par::proxy_kb &&
- locus[i]->pos <= par::proxy_geno )
- {
- proxyHaplotypePlusSNP.push_back(i);
- added++;
- }
-
- // Shift left
- --i;
- }
- else
- {
- // Cannot add any more
- added = par::proxy_window;
- }
- }
-
- // Now move right
- added = 0;
- i = l+1;
- while ( added < par::proxy_window )
- {
- if ( i < nl_all &&
- locus[i]->chr == locus[l]->chr )
- {
-
- // Add MAF, kb and genotyping thresholds here
- if ( locus[i]->freq >= par::proxy_maf &&
- abs(double((locus[l]->bp - locus[i]->bp)/1000.0))
- <= par::proxy_kb &&
- locus[i]->pos <= par::proxy_geno )
- {
- proxyHaplotypePlusSNP.push_back(i);
- added++;
- }
-
- //Shift right
- ++i;
- }
- else
- {
- // Cannot add any more
- added = par::proxy_window;
- }
-
- }
- }
-
-
- ///////////////////////////////////////////////////////////////////////////
- //
- // Optionally, filter list based on LD with reference and with eachother
- //
- ///////////////////////////////////////////////////////////////////////////
-
-
- if ( par::proxy_r2_filter )
- {
-
- // Only use Reference Panel for these r-sq calculations at this stage;
- // add flag to modify this
-
- if ( par::proxy_reference_only )
- haplo->reference_only = true;
-
- set<int> added;
-
- // Examine SNP number list: proxyHaplotypePlusSNP
- // First entry is always the reference SNP
-
- set<LDPair> proxies;
-
- // Skip first entry
- for (int i=1; i<proxyHaplotypePlusSNP.size(); i++)
- {
- LDPair p;
- p.s1 = l;
- p.s2 = proxyHaplotypePlusSNP[i];
- p.ld = haplo->rsq(p.s1,p.s2);
- //p.ld = haplo->dprime(p.s1,p.s2);
-
- proxies.insert(p);
- }
-
- set<LDPair>::iterator pi = proxies.begin();
-
- while ( pi != proxies.end() )
- {
-
- // Enough proxies already?
-
- if ( added.size() >= par::proxy_snp_filter )
- break;
-
- if ( ( added.size() < 2 && pi->ld >= par::proxy_r2_filter_A ) // low filter
- || pi->ld >= par::proxy_r2_filter_B ) // higher filter, once 2 proxies found
- {
-
- // But does this already correlate too strongly with an
- // existing proxy?
-
- set<int>::iterator si = added.begin();
-
- bool okay = true;
- while ( si != added.end() )
- {
- int2 snps;
-
- if ( snps.p1 > snps.p2 )
- {
- snps.p1 = *si;
- snps.p2 = pi->s2;
- }
- else
- {
- snps.p1 = pi->s2;
- snps.p2 = *si;
- }
-
- double ld;
-
- map<int2,double>::iterator f = proxyLD.find(snps);
- if ( f != proxyLD.end() )
- {
- ld = f->second;
- }
- else
- {
- ld = haplo->rsq(snps.p1,snps.p2);
- //ld = haplo->dprime(snps.p1,snps.p2);
- proxyLD.insert(make_pair(snps,ld));
- }
-
- if ( ld > par::proxy_r2_filter_C )
- {
- okay = false;
- break;
- }
- ++si;
- }
-
- if ( okay )
- {
- added.insert( pi->s2 );
- }
- }
-
- // Consider next proxy SNP
- ++pi;
- }
-
-
- //////////////////////////////////
- // Update the fitlered proxy list
-
- proxyHaplotypePlusSNP.clear();
- proxyHaplotypePlusSNP.push_back(l);
- set<int>::iterator si = added.begin();
- while ( si != added.end() )
- {
- proxyHaplotypePlusSNP.push_back( *si );
- ++si;
-
- }
-
-
- // And remember to reset
- haplo->reference_only = false;
- }
-
-
- //////////////////////////////////////////////////////////////////
- //
- // Sort, and select reference SNP
- //
- //////////////////////////////////////////////////////////////////
-
- sort( proxyHaplotypePlusSNP.begin(),
- proxyHaplotypePlusSNP.end());
-
- int cnt = proxyHaplotypePlusSNP.size();
- int ref;
-
- for (int i=0; i<cnt; i++)
- if ( proxyHaplotypePlusSNP[i] == l )
- {
- ref = i;
- break;
- }
-
-
- //////////////////////////////////////////////////////////////////////
- //
- // Do we actually have 1 or more proxy SNP selected? If not, leave now
- // if in verbose mode
- //
- //////////////////////////////////////////////////////////////////////
-
- if ( par::proxy_all && ( ! par::proxy_full_report ) )
- {
-
- if ( proxyHaplotypePlusSNP.size() == 1 )
- {
-
- if ( ! par::proxy_impute )
- {
- haplo->HTEST << setw( 4 ) << locus[l]->chr << " "
- << setw(par::pp_maxsnp) << locus[l]->name << " "
- << setw(12) << locus[l]->bp << " "
- << setw(4) << locus[l]->allele1 << " "
- << setw(4) << locus[l]->allele2 << " "
- << setw(10) << locus[l]->pos << " "
- << setw(4) << proxyHaplotypePlusSNP.size() - 1 << " "
- << setw(8) << "NA" << " ";
-
- // Display C/C or T/U for case/control and TDT, else F and BETA
-
- if ( par::qt || par::proxy_glm )
- haplo->HTEST << setw(8) << "NA" << " "
- << setw(8) << "NA" << " ";
- else
- haplo->HTEST << setw(8) << "NA" << " "
- << setw(8) << "NA" << " "
- << setw(8) << "NA" << " ";
-
- haplo->HTEST << setw(10) << "NA" << " ";
-
- if ( par::proxy_list_proxies )
- haplo->HTEST << "(NONE)";
-
- haplo->HTEST << "\n";
-
-
- // Finally, replace any genotypes made temporarily missing
-
- if ( par::proxy_leave_out )
- {
- int cnt = 0;
- for ( int i = 0 ; i < n ; i++ )
- if ( ! sample[i]->missing )
- {
- SNP[l]->one[i] = tmp1[cnt];
- SNP[l]->two[i] = tmp2[cnt++];
- }
-
- tmp1.clear();
- tmp2.clear();
- }
-
- return;
- }
-
- }
- }
-
-
-
-
- ///////////////////////////////////////////////////////////////////////////////
- //
- // Phase haplotypes
- //
- ///////////////////////////////////////////////////////////////////////////////
-
- haplo->reset();
-
- haplo->new_pred_locus.resize(1);
-
- haplo->new_map.resize(1);
-
- haplo->new_pred_locus[0] = proxyHaplotypePlusSNP;
-
- haplo->new_map[0] = locus[l];
-
- par::silent = true;
-
- haplo->phaseAllHaplotypes();
-
- haplo->hname = locus[l]->name;
-
- par::silent = old_silent;
-
-
-
- ///////////////////////////////////////////////////////////////////////////////
- //
- // Per-genotype error screen
- //
- ///////////////////////////////////////////////////////////////////////////////
-
- if ( par::proxy_error )
- {
-
- // Identify test SNP with respect to 0..cnt phased region
- // (i.e. 'ref' and not 'l')
-
- haplo->queryGenotype( ref );
- return;
- }
-
-
- //////////////////////////////////////////////////////////////////////////////
- //
- // If we are in 'impute' mode: do not perform association tests, but
- // just compare imputed genotypes to actual (left-out) genotypes,
- // and report on this.
- //
- //////////////////////////////////////////////////////////////////////////////
-
- if ( par::proxy_leave_out )
- {
- int cnt = 0;
- for ( int i = 0 ; i < n ; i++ )
- if ( ! sample[i]->missing )
- {
- SNP[l]->one[i] = tmp1[cnt];
- SNP[l]->two[i] = tmp2[cnt++];
- }
-
- tmp1.clear();
- tmp2.clear();
- }
- else if ( par::proxy_impute )
- {
-
- ////////////////////////////////////////
- // Replace genotypes, and record dosage
-
- // For imputation quality score
- boolvec_t m1(cnt,false);
- m1[ref] = true;
- boolvec_t a1(cnt,false);
- map<int,int> tests = haplo->makeTestSet(m1,a1);
- set<int> hs;
- map<int,int>::iterator i1 = tests.begin();
- while ( i1 != tests.end() )
- {
- if ( i1->second == 0 )
- hs.insert( i1->first);
- ++i1;
- }
-
- haplo->calculateEmpiricalVariance(hs);
-
-
- int con[4][4];
- for (int j=0;j<4;j++)
- for (int k=0; k<4; k++)
- con[j][k] = 0;
-
-
- if ( par::proxy_record_dosage )
- OUTFILE << locus[l]->name << "\t"
- << locus[l]->allele1 << "\t"
- << locus[l]->allele2 << "\t"
- << haplo->ratio << "\t";
-
- int cnt = 0;
- for ( int i = 0 ; i < n ; i++ )
- if ( ! sample[i]->missing )
- {
-
- // Actual genotypes
-
- bool a1 = tmp1[cnt];
- bool a2 = tmp2[cnt++];
-
- int og;
-
- if ( a1 )
- {
- if ( a2 )
- og = 2;
- else
- og = 3;
- }
- else
- {
- if ( a2 )
- og = 1;
- else
- og = 0;
- }
-
- // Imputed genotypes
-
- vector_t g = haplo->imputeGenotype(i,ref);
-
- // Call imputed
-
- bool i1, i2;
-
- if ( g[0] > par::proxy_impute_threshold )
- {
- i1 = i2 = false;
- con[og][0]++;
- }
- else if ( g[1] > par::proxy_impute_threshold )
- {
- i1 = false;
- i2 = true;
- con[og][1]++;
- }
- else if ( g[2] > par::proxy_impute_threshold )
- {
- i1 = i2 = true;
- con[og][2]++;
- }
- else
- {
- i1 = true; i2 = false;
- con[og][3]++;
- }
-
- if ( par::proxy_full_report )
- haplo->HTEST << locus[l]->name << "\t"
- << sample[i]->fid << " "
- << sample[i]->iid << "\t"
- << a1<<a2 << " "
- << i1<<i2 << " "
- << g[0] << " "
- << g[1] << " "
- << g[2] << "\n";
-
- if ( par::proxy_record_dosage )
- OUTFILE << g[0] + 0.5 * g[1] << "\t";
-
-
- ////////////////////////////////////////////////
- // Replace missing slots with imputed genotypes
-
- // Note: because genotyping rate is pre-calculated, this
- // will not be a problem in practice (as long as panel is
- // sufficiently large) -- i.e. in smaller cases, we need
- // to worry about whether this imputed SNP now might be
- // used as a proxy for another SNP -- wouldn't be terrible,
- // but probably better to try to use real SNPs as
- // proxies whenever possible.
-
-// if ( ! par::proxy_impute_preserve_genotyped )
-// {
- if ( par::proxy_impute_replace || ( a1 && ! a2 ) )
- {
- SNP[l]->one[i] = i1;
- SNP[l]->two[i] = i2;
- }
- else
- {
- SNP[l]->one[i] = a1;
- SNP[l]->two[i] = a2;
- }
-// }
- }
-
-
- if ( par::proxy_record_dosage )
- OUTFILE << "\n";
-
- // Report matrix of concordance
-
- int total = 0;
- for (int j=0;j<4;j++)
- for (int k=0;k<4;k++)
- total += con[j][k];
-
- int concordant = con[0][0] + con[1][1] + con[2][2];
-
- int both_geno = 0;
- for (int j=0;j<3;j++)
- for (int k=0;k<3;k++)
- both_geno += con[j][k];
-
- int observed_geno = total;
- for (int j=0; j<4; j++)
- observed_geno -= con[3][j];
-
- int imputed_geno = total;
- for (int j=0; j<4; j++)
- imputed_geno -= con[j][3];
-
- double rate = both_geno == 0 ?
- -1 :
- (double)concordant/(double)both_geno ;
-
- double rate_obs = (double)observed_geno/(double)total;
- double rate_imp = (double)imputed_geno/(double)total;
- double rate_ovr = (double)both_geno/(double)total;
-
- if ( par::proxy_full_report )
- {
- haplo->HTEST << "\nImputation matrix "
- << "(rows observed, columns imputed)\n\n";
-
- for (int j=0;j<4;j++)
- {
-
- if ( j == 0 )
- haplo->HTEST << locus[l]->allele1 << "/"
- << locus[l]->allele1 << "\t";
- else if ( j == 1 )
- haplo->HTEST << locus[l]->allele1 << "/"
- << locus[l]->allele2 << "\t";
- else if ( j == 2 )
- haplo->HTEST << locus[l]->allele2 << "/"
- << locus[l]->allele2 << "\t";
- else
- haplo->HTEST << par::missing_genotype << "/"
- << par::missing_genotype << "\t";
-
- for (int k=0; k<4; k++)
- haplo->HTEST << con[j][k] << "\t";
-
- haplo->HTEST << "\n";
-
- }
-
- haplo->HTEST << setw(4) << "CHR" << " "
- << setw(par::pp_maxsnp) << "SNP" << " "
- << setw(8) << "INFO" << " "
- << setw(4) << "NPRX" << " "
- << setw(8) << "TOTAL_N" << " "
- << setw(8) << "OBSERVD" << " "
- << setw(8) << "IMPUTED" << " "
- << setw(8) << "OVERLAP" << " "
- << setw(8) << "CONCORD" << " ";
-
- if ( par::proxy_impute_genotypic_concordance )
- haplo->HTEST << setw(8) << "F_AA" << " "
- << setw(8) << "I_AA" << " "
- << setw(8) << "C_AA" << " "
- << setw(8) << "F_AB" << " "
- << setw(8) << "I_AB" << " "
- << setw(8) << "C_AB" << " "
- << setw(8) << "F_BB" << " "
- << setw(8) << "I_BB" << " "
- << setw(8) << "C_BB" << " ";
-
- if ( par::proxy_list_proxies )
- haplo->HTEST << "SNPS";
-
- haplo->HTEST << "\n";
-
- }
-
-
- haplo->HTEST << setw(4) << locus[l]->chr << " "
- << setw(par::pp_maxsnp) << locus[l]->name << " "
- << setw(4) << proxyHaplotypePlusSNP.size() - 1 << " "
- << setw(8) << haplo->ratio << " "
- << setw(8) << total << " "
- << setw(8) << rate_obs << " "
- << setw(8) << rate_imp << " "
- << setw(8) << rate_ovr << " ";
-
- if ( rate >= 0 )
- haplo->HTEST << setw(8) << rate << " ";
- else
- haplo->HTEST << setw(8) << "NA" << " ";
-
- if ( par::proxy_impute_genotypic_concordance )
- {
-
- int impAA = con[0][0]+con[0][1]+con[0][2];
- int impAB = con[1][0]+con[1][1]+con[1][2];
- int impBB = con[2][0]+con[2][1]+con[2][2];
-
- double fAA = (double)(con[0][0]+con[1][0]+con[2][0]) / (double)observed_geno;
- double fAB = (double)(con[0][1]+con[1][1]+con[2][1]) / (double)observed_geno;
- double fBB = (double)(con[0][2]+con[1][2]+con[2][2]) / (double)observed_geno;
-
- if ( observed_geno == 0 )
- fAA = fAB = fBB = 0;
-
- int obsAA = impAA+con[0][3];
- int obsAB = impAB+con[1][3];
- int obsBB = impBB+con[2][3];
-
- haplo->HTEST << setw(8) << fAA << " ";
-
- if ( obsAA == 0 )
- haplo->HTEST << setw(8) << "NA" << " ";
- else
- haplo->HTEST << setw(8) << (double)(impAA)/(double)(impAA+con[0][3]) << " ";
-
- if ( impAA==0 )
- haplo->HTEST << setw(8) << "NA" << " ";
- else
- haplo->HTEST << setw(8) << (double)(con[0][0])/(double)impAA << " ";
-
- haplo->HTEST << setw(8) << fAB << " ";
- if ( obsAB == 0 )
- haplo->HTEST << setw(8) << "NA" << " ";
- else
- haplo->HTEST << setw(8) << (double)(impAB)/(double)(impAB+con[1][3]) << " ";
- if ( impAB==0 )
- haplo->HTEST << setw(8) << "NA" << " ";
- else
- haplo->HTEST << setw(8) << (double)(con[1][1])/(double)impAB << " ";
-
- haplo->HTEST << setw(8) << fBB << " ";
- if ( obsBB == 0 )
- haplo->HTEST << setw(8) << "NA" << " ";
- else
- haplo->HTEST << setw(8) << (double)(impBB)/(double)(impBB+con[2][3]) << " ";
- if ( impBB==0 )
- haplo->HTEST << setw(8) << "NA" << " ";
- else
- haplo->HTEST << setw(8) << (double)(con[2][2])/(double)impBB << " ";
-
- }
-
-
- if ( par::proxy_list_proxies )
- {
- bool printed = false;
- for (int l0=0; l0< proxyHaplotypePlusSNP.size(); l0++)
- {
- if ( proxyHaplotypePlusSNP[ l0 ] != l )
- {
- if ( printed )
- haplo->HTEST << "|";
- haplo->HTEST << locus[ proxyHaplotypePlusSNP[ l0 ] ]->name;
- printed = true;
-
- }
- }
- }
-
-
- haplo->HTEST << endl;
-
- tmp1.clear();
- tmp2.clear();
-
- return;
- }
-
-
-
-
- ////////////////////////////////////////////////////////////////
- // //
- // Consider all subsets of subhaplotypes, if in verbose mode //
- // //
- ////////////////////////////////////////////////////////////////
-
-
- if ( ! ( par::proxy_all || par::proxy_full_report ) )
- printLOG("Estimated haplotype frequencies: now considering combinations...\n");
-
-
-
-
- ////////////////////////////////////////////////////
- // Display haplotype frequencies and individual r^2
-
- if ( ( ! par::proxy_all ) || par::proxy_full_report )
- haplo->HTEST << "\n"
- << " *** Proxy haplotype association report for "
- << haplo->hname << " *** \n\n";
-
-
- ////////////////////////////////////////
- // Report SNPs and single-SNP r-squared
-
- if ( ( ! par::proxy_all) || par::proxy_full_report )
- {
- haplo->HTEST << setw(par::pp_maxsnp) << "SNP" << " "
- << setw(8) << "MAF" << " "
- << setw(8) << "GENO" << " "
- << setw(8) << "KB" << " "
- << setw(8) << "RSQ" << " ";
- if ( par::qt )
- haplo->HTEST << setw(8) << "BETA" << " "
- << setw(8) << "STAT" << " "
- << setw(8) << "P" << "\n";
- else
- haplo->HTEST << setw(8) << "OR" << " "
- << setw(8) << "CHISQ" << " "
- << setw(8) << "P" << "\n";
-
-
- for ( int s = 0 ; s < cnt ; s++)
- {
- haplo->HTEST << setw(par::pp_maxsnp)
- << locus[ haplo->new_pred_locus[0][s] ]->name << " "
- << setw(8)
- << locus[ haplo->new_pred_locus[0][s] ]->freq << " "
- << setw(8)
- << locus[ haplo->new_pred_locus[0][s] ]->pos << " "
- << setw(8)
- << (double)(locus[ haplo->new_pred_locus[0][s]]->bp -
- locus[ haplo->new_pred_locus[0][ref]]->bp)/1000<<" ";
-
- // R-squared
-
- if ( s == ref )
- haplo->HTEST << setw(8) << "*" << " ";
- else
- haplo->HTEST << setw(8) << haplo->rsq_internal(s,ref) << " ";
-
- // Single SNP association
-
- boolvec_t snpmask(cnt,false);
- boolvec_t dummy_allele(cnt,true);
- snpmask[s] = true;
-
- haplo->testSet = haplo->makeTestSet(snpmask,dummy_allele);
-
- if (par::proxy_CC)
- {
- // GLM or standard test?
- if ( par::proxy_glm )
- {
- glmAssoc(false,*pperm);
- }
- else
- {
- if ( par::qt )
- haplo->haplotypicQTL(haplo->testSet,2,false);
- else
- haplo->haplotypicCC(haplo->testSet,2,false);
- }
- }
- else if (par::proxy_TDT)
- {
-
- haplo->subhaplotypes = true;
- haplo->downcoding = haplo->testSet;
-
- haplo->trans.clear();
- haplo->untrans.clear();
-
- haplo->trans.resize(2,0);
- haplo->untrans.resize(2,0);
- haplo->nt = 2;
-
- // First rescore transmissions
-
- for (int i=0; i<n; i++)
- {
- if ( (!sample[i]->founder) &&
- haplo->include[i] )
- {
- haplo->transmissionCount(i,haplo->phasemap[i]);
- }
- }
-
-
- // Then recount T:U based on revised transmission counts
-
- haplo->haplotypicTDT(haplo->testSet,2,false);
-
- haplo->subhaplotypes = false;
- haplo->downcoding.clear();
-
- }
-
- // Recover main results from Model, if GLM used
- if ( par::proxy_glm )
- {
- vector_t coef = model->getCoefs();
- haplo->odds = par::bt ? exp(coef[1]) : coef[1];
- haplo->result = model->isValid() ? model->getStatistic() : 0;
- haplo->pvalue = par::bt ? chiprobP(haplo->result,1) : ((LinearModel*)model)->getPValue();
- delete model;
- }
-
- haplo->HTEST << setw(8) << haplo->odds << " "
- << setw(8) << haplo->result << " "
- << setw(8) << haplo->pvalue << "\n";
-
- }
-
- haplo->HTEST << "\n\n";
- }
-
-
- ///////////////////////////////////////////////////
- // Report haplotypes and frequencies, and also all
- // haplotype-specific tests and omnibus test result
-
- if ( ( ! par::proxy_all ) || par::proxy_full_report )
- {
- string str = "";
- for ( int i = 0; i < cnt ; i++)
- if ( i == ref )
- str += "*";
- else
- str += ".";
-
- haplo->HTEST << setw(14) << str << " "
- << setw(10) << "FREQ" << " ";
- if ( par::qt )
- haplo->HTEST << setw(8) << "BETA" << " "
- << setw(8) << "STAT" << " "
- << setw(8) << "P" << "\n";
- else
- haplo->HTEST << setw(10) << "OR" << " "
- << setw(10) << "CHISQ" << " "
- << setw(10) << "P" << "\n";
-
- for (int h=0; h< haplo->nh; h++)
- {
- if (haplo->f[h] >= par::proxy_mhf )
- {
- haplo->HTEST << setw(14) << haplo->haplotypeName(h) << " "
- << setw(10) << haplo->f[h] << " ";
-
-
- haplo->testSet.clear();
-
- for (int h2=0; h2 < haplo->nh; h2++)
- {
- if ( haplo->f[h2] >= par::proxy_mhf)
- {
- if (h==h2)
- {
- haplo->testSet.insert(make_pair(h2,0));
- }
- else
- {
- haplo->testSet.insert(make_pair(h2,1));
- }
- }
- }
-
- if (par::proxy_CC)
- {
- // GLM or standard test?
- if ( par::proxy_glm )
- {
- glmAssoc(false,*pperm);
- }
- else
- {
-
- if ( par::qt)
- haplo->haplotypicQTL(haplo->testSet,2,false);
- else
- haplo->haplotypicCC(haplo->testSet,2,false);
- }
- }
- else if (par::proxy_TDT)
- {
-
- haplo->subhaplotypes = true;
- haplo->downcoding = haplo->testSet;
-
- haplo->trans.clear();
- haplo->untrans.clear();
-
- haplo->trans.resize(2,0);
- haplo->untrans.resize(2,0);
- haplo->nt = 2;
-
- // First rescore transmissions
-
- for (int i=0; i<n; i++)
- {
- if ( (!sample[i]->founder) &&
- haplo->include[i] )
- {
- haplo->transmissionCount(i,haplo->phasemap[i]);
- }
- }
-
- // Then recount T:U based on revised transmission counts
-
- haplo->haplotypicTDT(haplo->testSet,2,false);
-
- haplo->subhaplotypes = false;
- haplo->downcoding.clear();
-
-
- }
-
- // Recover main results from Model, if GLM used
- if ( par::proxy_glm )
- {
- vector_t coef = model->getCoefs();
- haplo->odds = par::bt ? exp(coef[1]) : coef[1];
- haplo->result = model->isValid() ? model->getStatistic() : 0;
- haplo->pvalue = par::bt ? chiprobP(haplo->result,1) : ((LinearModel*)model)->getPValue();
- delete model;
- }
-
- haplo->HTEST << setw(10) << haplo->odds << " "
- << setw(10) << haplo->result << " "
- << setw(10) << haplo->pvalue << "\n";
-
- }
- }
-
-
- haplo->HTEST << "\nHaplotype frequency estimation based on "
- << haplo->validN;
-
- if ( haplo->X )
- {
- int found_chr = 0;
- for (int i=0; i<n; i++)
- if ( sample[i]->founder )
- {
- if ( sample[i]->sex )
- found_chr++;
- else
- found_chr+=2;
- }
- haplo->HTEST << " of " << found_chr << " founder chromosomes\n";
- }
- else if ( haplo->haploid )
- haplo->HTEST << " of " << haplo->cnt_f << " founder chromosomes\n";
- else
- haplo->HTEST << " of " << haplo->cnt_f * 2 << " founder chromosomes\n";
-
-
-
- ///////////////////////////////////
- // Omnibus test: C/C only
-
- if ( par::proxy_CC && ! par::qt )
- {
-
- map<int,int> tests;
- int nch=0;
- for (int h=0; h < haplo->nh; h++)
- if ( haplo->f[h] >= par::proxy_mhf)
- tests.insert(make_pair(h,nch++));
-
- if (nch>2)
- {
- haplo->haplotypicCC(tests,nch,false);
-
- haplo->HTEST << "Omnibus haplotype test statistic: "
- << haplo->result << ", df = " << nch-1 << ", "
- << "p = "
- << chiprobP( haplo->result , nch-1 ) << "\n\n";
- }
-
- }
- }
-
-
-
- ///////////////////////////////////////////
- // Create masks
-
- // Reference SNP (fix here)
- boolvec_t m1(cnt,false);
- m1[ref] = true;
- boolvec_t a1(cnt,true);
-
- // Proxy haplotype (populated below)
- boolvec_t m2(cnt,false);
- boolvec_t a2(cnt);
-
-
- ///////////////////////////////////////////
- // Report actual SNP //
- ///////////////////////////////////////////
-
- haplo->testSet = haplo->makeTestSet(m1,a1);
-
- if (par::proxy_CC)
- {
- // GLM or standard test?
- if ( par::proxy_glm )
- {
- glmAssoc(false,*pperm);
- }
- else
- {
- if ( par::qt )
- haplo->haplotypicQTL(haplo->testSet,2,false);
- else
- haplo->haplotypicCC(haplo->testSet,2,false);
- }
- }
- else if (par::proxy_TDT)
- {
-
- haplo->subhaplotypes = true;
- haplo->downcoding = haplo->testSet;
-
- haplo->trans.clear();
- haplo->untrans.clear();
-
- haplo->trans.resize(2,0);
- haplo->untrans.resize(2,0);
- haplo->nt = 2;
-
- // First rescore transmissions
-
- for (int i=0; i<n; i++)
- {
- if ( (!sample[i]->founder) &&
- haplo->include[i] )
- {
- haplo->transmissionCount(i,haplo->phasemap[i]);
- }
- }
-
-
- // Then recount T:U based on revised transmission counts
-
- haplo->haplotypicTDT(haplo->testSet,2,false);
-
- haplo->subhaplotypes = false;
- haplo->downcoding.clear();
-
- // Also, calculate the information score
- set<int> t1 = haplo->makeSetFromMap(haplo->testSet);
- haplo->calculateEmpiricalVariance(t1);
-
- }
-
-
- // Recover main results from Model, if GLM used
- if ( par::proxy_glm )
- {
- vector_t coef = model->getCoefs();
- haplo->odds = par::bt ? exp(coef[1]) : coef[1];
- haplo->result = model->isValid() ? model->getStatistic() : 0;
- haplo->pvalue = par::bt ? chiprobP(haplo->result,1) : ((LinearModel*)model)->getPValue();
- delete model;
-
- // Also, calculate the information score
- set<int> t1 = haplo->makeSetFromMap(haplo->testSet);
- haplo->calculateEmpiricalVariance(t1);
- }
-
-
- //////////////////////////////////////////////////////////////////////
- //
- // Just report the single SNP result (based on haplotype test)?
- //
- //////////////////////////////////////////////////////////////////////
-
- if ( par::proxy_all && ( ! par::proxy_full_report ) )
- {
-
-
-
- haplo->HTEST << setw( 4 ) << locus[l]->chr << " "
- << setw(par::pp_maxsnp) << locus[l]->name << " "
- << setw(12) << locus[l]->bp << " "
- << setw(4) << locus[l]->allele1 << " "
- << setw(4) << locus[l]->allele2 << " "
- << setw(10) << locus[l]->pos << " "
- << setw(4) << proxyHaplotypePlusSNP.size() - 1 << " "
- << setw(8) << haplo->ratio << " ";
-
- // Display C/C or T/U for case/control and TDT, else F and BETA
-
- if ( haplo->pvalue < -1 )
- {
- // Not a valid test, e.g. monomorphic, and so p value is returned as -9
- if ( par::qt || par::proxy_glm )
- haplo->HTEST << setw(8) << "NA" << " "
- << setw(8) << "NA" << " "
- << setw(8) << "NA" << " ";
- else
- haplo->HTEST << setw(8) << "NA" << " "
- << setw(8) << "NA" << " "
- << setw(8) << "NA" << " "
- << setw(8) << "NA" << " ";
- }
- else
- {
-
- if ( par::qt || par::proxy_glm )
- {
- // Get population haplotype frequency for imputed SNP
- double f = haplo->freq(m1,a1);
-
- haplo->HTEST << setw(8) << f << " "
- << setw(8) << haplo->odds << " ";
- }
- else
- haplo->HTEST << setw(8) << haplo->case_freq << " "
- << setw(8) << haplo->control_freq << " "
- << setw(8) << haplo->odds << " ";
-
- haplo->HTEST << setw(10) << haplo->pvalue << " ";
- }
-
-
- if ( par::proxy_list_proxies )
- {
- bool printed = false;
- for (int l0=0; l0< proxyHaplotypePlusSNP.size(); l0++)
- {
- if ( proxyHaplotypePlusSNP[ l0 ] != l )
- {
- if ( printed )
- haplo->HTEST << "|";
- haplo->HTEST << locus[ proxyHaplotypePlusSNP[ l0 ] ]->name;
- printed = true;
- }
- }
- }
-
-
- haplo->HTEST << endl;
-
- return;
- }
-
-
-
-
-
-
- ////////////////////////////////////////////////////////////////////////
- //
- // Rest of this function is for the extended report mode
- //
- ////////////////////////////////////////////////////////////////////////
-
-
-
- ///////////////////////////////////////////
- // Consider from cnt-1 to 1 SNP haplotypes
-
- int search_cnt = par::proxy_include_reference ? cnt : cnt - 1;
- int maxsnp = search_cnt < par::proxy_maxhap ? search_cnt : par::proxy_maxhap;
-
- int num_subhaps_total = 0;
- int num_subhaps_valid = 0;
-
- set<ProxyResult> presults;
-
- for (int i=1; i<=maxsnp; i++)
- {
-
- // For an i-SNP of cnt-SNP, consider all the permutations
-
- vector<unsigned long> pos1(i);
- vector<int> d(search_cnt);
- int i2=0;
- for (int z=0; z<cnt; z++)
- if ( z != ref )
- {
- d[i2]=i2;
- i2++;
- }
-
- vector<vector<int> > collection;
- combinations_recursive(d,i,pos1,0,0,collection);
-
- vector<int> mapback;
- for (int s=0;s<cnt;s++)
- if ( par::proxy_include_reference || s != ref )
- mapback.push_back(s);
-
- // Consider each combination of i of cnt-1 SNP haplotypes
- // then consider each possible haplotype within that
-
- for (int c1 = 0; c1 < collection.size() ; c1++ )
- {
-
- // For biallelic haplotypes (SNPs)
- // skip the second allele
-
- // Which SNPs do we want to look up?
-
- vector<int> posit = collection[c1];
-
- // Now consider all search_cnt SNP haplotypes
- // i.e. excluding reference SNP
-
- int hapcnt = (int)pow((double)2,i);
- int h = 0;
- while ( h < hapcnt )
- {
-
- // Skip redundant second allele of
- // SNPs
-
- if ( collection[c1].size() == 1 &&
- h == 1 )
- {
- h++;
- continue;
- }
-
-
- vector<bool> tmp;
- unsigned int p=1;
- for (int s=0;s<i;s++)
- {
- if ( h & p ) tmp.push_back(false);
- else tmp.push_back(true);
- p <<= 1;
- }
-
-
- ///////////////////////
- // Set proxy haplotype
-
- // Insert i-SNP haplotype back into
- // full cnt-SNP space
-
- fill(m2.begin(), m2.end(), false);
-
- int t1=0;
- for (int t2=0; t2<i; t2++)
- {
- m2[ mapback[ posit[t2] ] ] = true;
- a2[ mapback[ posit[t2] ] ] = tmp[ t2 ];
- }
-
-
- ////////////////////////////////
- // Check frequency of haplotype
-
- double f = haplo->freq(m2,a2);
-
-
- ////////////////////////////////////////////////////////////
- // Calculate r^2 between haplotype and reference SNP allele
-
- double r2 = haplo->rsq_internal(m1,a1,m2,a2);
-
-
-
- ////////////////////////////////////////////
- // Is this haplotype not worth considering?
-
-
- ++num_subhaps_total;
-
- if ( r2 < par::proxy_r2 ||
- f < par::proxy_mhf )
- {
- h++;
- continue;
- }
-
-
- ++num_subhaps_valid;
-
-
- ////////////////////////////////////////////////////////////
- // Calculte association between proxy haplotype and disease
-
- // If only two haplotypes, report only 1
- // (note may only be two *common* haplotypes, but
- // in that case, we should report both
-
- haplo->testSet = haplo->makeTestSet(m2,a2);
-
- if (par::proxy_CC)
- {
- // GLM or standard test?
- if ( par::proxy_glm )
- {
- glmAssoc(false,*pperm);
- }
- else
- {
- if ( par::qt )
- haplo->haplotypicQTL(haplo->testSet,2,false);
- else
- haplo->haplotypicCC(haplo->testSet,2,false);
- }
- }
- else if (par::proxy_TDT)
- {
-
- haplo->subhaplotypes = true;
- haplo->downcoding = haplo->testSet;
-
- haplo->trans.clear();
- haplo->untrans.clear();
-
- haplo->trans.resize(2,0);
- haplo->untrans.resize(2,0);
- haplo->nt = 2;
-
- // First rescore transmissions
-
- for (int i=0; i<n; i++)
- {
- if ( (!sample[i]->founder) &&
- haplo->include[i] )
- {
- haplo->transmissionCount(i,haplo->phasemap[i]);
- }
- }
-
- // Then recount T:U based on revised transmission counts
-
- haplo->haplotypicTDT(haplo->testSet,2,false);
-
- haplo->subhaplotypes = false;
- haplo->downcoding.clear();
- }
-
- string str = par::proxy_include_reference ?
- haplo->getSubHaplotypeName(m2,a2,-1) :
- haplo->getSubHaplotypeName(m2,a2,ref);
-
-
- // Recover main results from Model, if GLM used
- if ( par::proxy_glm )
- {
- vector_t coef = model->getCoefs();
- haplo->odds = par::bt ? exp(coef[1]) : coef[1];
- haplo->result = model->isValid() ? model->getStatistic() : 0;
- haplo->pvalue = par::bt ? chiprobP(haplo->result,1) : ((LinearModel*)model)->getPValue();
- delete model;
- }
-
-
- //////////////////////
- // Store this result
-
- ProxyResult r(str,f,r2,
- haplo->odds,
- haplo->result,
- haplo->pvalue);
- presults.insert(r);
-
- // Consider next haplotype
-
- h++;
-
- }
- }
- }
-
-
- haplo->HTEST << "Of " << num_subhaps_total << " subhaplotypes considered, "
- << num_subhaps_valid << " met proxy criteria\n\n";
-
- // Report results
-
- if ( presults.size() == 0 )
- haplo->HTEST << "No proxies found above r-sq " << par::proxy_r2 << "\n";
- else
- {
-
- haplo->HTEST << setw(14) << "HAP" << " "
- << setw(10) << "FREQ" << " "
- << setw(10) << "RSQ" << " ";
- if ( par::qt )
- haplo->HTEST << setw(8) << "BETA" << " "
- << setw(8) << "STAT" << " "
- << setw(8) << "P" << "\n";
- else
- haplo->HTEST << setw(10) << "OR" << " "
- << setw(10) << "CHISQ" << " "
- << setw(10) << "P" << "\n";
-
-
- set<ProxyResult>::iterator i = presults.begin();
-
- while ( i != presults.end() )
- {
- haplo->HTEST << setw(14) << i->name << " "
- << setw(10) << i->f << " "
- << setw(10) << i->r2 << " "
- << setw(10) << i->odds << " "
- << setw(10) << i->chisq << " "
- << setw(10) << i->pvalue << "\n";
- i++;
- }
- }
-
- if ( par::proxy_full_report )
- haplo->HTEST << "\n+--------------------------------------------------------------------+\n\n";
-
- return;
-
-}
-
-
More information about the debian-med-commit
mailing list