8 changed files:
- Python/ReadMe.md
- plink2.cc
- plink2_glm.cc
- plink2_help.cc
- plink2_import.cc
- plink2_misc.cc
@@ -1,6 +1,23 @@
-This provides a basic Python API for pgenlib; see python_api.txt for details.
+This provides a basic Python API for pgenlib (See [python_api.txt](python_api.txt) for details.)
+##### Build this with this.
Cython and NumPy must be installed.
+python3 setup.py build_ext
+[sudo] python3 setup.py install
+##### Example usage:
+#write a 2 sample file
+import numpy as np
+import pgenlib as pg
+with pg.PgenWriter("test.pgen".encode("utf-8"), 2, 3, False) as writer:
+ writer.append_alleles(np.array([0,1,1,1],dtype=np.int32))
+ writer.append_alleles(np.array([0,1,0,0],dtype=np.int32))
+ writer.append_alleles(np.array([0,0,0,0],dtype=np.int32))
-Build this with e.g.
- python3 setup.py build_ext
- [sudo] python3 setup.py install
@@ -71,10 +71,10 @@ static const char ver_str[] = "PLINK v2.00a3"
#ifdef USE_MKL
" Intel"
- " (1 Jul 2021)";
+ " (16 Aug 2021)";
static const char ver_str2[] =
// include leading space if day < 10, so character length stays the same
- " "
+ ""
#ifndef LAPACK_ILP64
" "
@@ -4957,6 +4957,7 @@ BoolErr AllocAndInitReportedTestNames(const uintptr_t* parameter_subset, const c
test_name_buf_iter = iter_next;
uint32_t pred_uidx = 2 + domdev_present;
+ // bugfix (16 Aug 2021): sex + interaction?
for (uint32_t covar_idx = 0; covar_idx != covar_ct; ++covar_idx, ++pred_uidx) {
if (parameter_subset && (!IsSet(parameter_subset, pred_uidx))) {
@@ -10910,9 +10911,10 @@ static const double kSexMaleToCovarD[2] = {2.0, 1.0};
void SexInteractionReshuffle(uint32_t first_interaction_pred_uidx, uint32_t raw_covar_ct, uint32_t domdev_present, uint32_t biallelic_raw_predictor_ctl, uintptr_t* __restrict parameters_or_tests, uintptr_t* __restrict parameter_subset_reshuffle_buf) {
ZeroWArr(biallelic_raw_predictor_ctl, parameter_subset_reshuffle_buf);
CopyBitarrRange(parameters_or_tests, 0, 0, first_interaction_pred_uidx - 1, parameter_subset_reshuffle_buf);
- const uint32_t raw_interaction_ct = raw_covar_ct * (domdev_present + 1);
- CopyBitarrRange(parameters_or_tests, first_interaction_pred_uidx - 1, first_interaction_pred_uidx, raw_interaction_ct, parameter_subset_reshuffle_buf);
- const uint32_t first_sex_parameter_idx = first_interaction_pred_uidx - 1 + raw_interaction_ct;
+ // bugfix (16 Aug 2021): raw_covar_ct includes sex
+ const uint32_t raw_nonsex_interaction_ct = (raw_covar_ct - 1) * (domdev_present + 1);
+ CopyBitarrRange(parameters_or_tests, first_interaction_pred_uidx - 1, first_interaction_pred_uidx, raw_nonsex_interaction_ct, parameter_subset_reshuffle_buf);
+ const uint32_t first_sex_parameter_idx = first_interaction_pred_uidx - 1 + raw_nonsex_interaction_ct;
if (IsSet(parameters_or_tests, first_sex_parameter_idx)) {
SetBit(first_interaction_pred_uidx - 1, parameter_subset_reshuffle_buf);
@@ -2059,9 +2059,9 @@ PglErr DispHelp(const char* const* argvk, uint32_t param_ct) {
" otherwise, column 3 is assumed. Use 'col-num=' to force a column number.\n"
" * Only the first character in the sex column is processed. By default,\n"
" '1'/'M'/'m' is interpreted as male, '2'/'F'/'f' is interpreted as female,\n"
-" and '0'/'N' is interpreted as unknown-sex. To change this to '0'/'M'/'m'\n"
-" = male, '1'/'F'/'f' = female, anything else other than '2' = unknown-sex,\n"
-" add 'male0'.\n"
+" and '0'/'N'/'U'/'u' is interpreted as unknown-sex. To change this to\n"
+" '0'/'M'/'m' = male, '1'/'F'/'f' = female, anything else other than '2' =\n"
+" unknown-sex, add 'male0'.\n"
// don't make --real-ref-alleles apply to e.g. Oxford import, since
// explicit 'ref-first'/'ref-last' modifiers are clearer
@@ -2935,7 +2935,7 @@ PglErr VcfToPgen(const char* vcfname, const char* preexisting_psamname, const ch
if (unlikely((!sample_ct) && (!no_samples_ok))) {
logerrputs("Error: No samples in --vcf file. (This is only permitted when you haven't\nspecified another operation which requires genotype or sample information.)\n");
- goto VcfToPgen_ret_INCONSISTENT_INPUT;
+ goto VcfToPgen_ret_DEGENERATE_DATA;
vic.vibc.sample_ct = sample_ct;
// bugfix (5 Jun 2018): must initialize qual_field_ct to zero
@@ -3208,7 +3208,7 @@ PglErr VcfToPgen(const char* vcfname, const char* preexisting_psamname, const ch
} else if (unlikely(!variant_ct)) {
logerrputs("Error: No variants in --vcf file.\n");
- goto VcfToPgen_ret_INCONSISTENT_INPUT;
+ goto VcfToPgen_ret_DEGENERATE_DATA;
putc_unlocked('\r', stdout);
@@ -3875,6 +3875,9 @@ PglErr VcfToPgen(const char* vcfname, const char* preexisting_psamname, const ch
reterr = kPglRetThreadCreateFail;
+ reterr = kPglRetDegenerateData;
+ break;
CleanupSpgw(&spgw, &reterr);
@@ -7352,7 +7355,7 @@ PglErr BcfToPgen(const char* bcfname, const char* preexisting_psamname, const ch
if (unlikely((!sample_ct) && (!no_samples_ok))) {
logerrputs("Error: No samples in BCF text header block. (This is only permitted when you\nhaven't specified another operation which requires genotype or sample\ninformation.)\n");
- goto BcfToPgen_ret_INCONSISTENT_INPUT;
+ goto BcfToPgen_ret_DEGENERATE_DATA;
if (unlikely(sample_ct >= (1 << 24))) {
snprintf(g_logbuf, kLogbufSize, "Error: BCF text header block has %u sample IDs, which is larger than the BCF limit of 2^24 - 1.\n", sample_ct);
@@ -8012,7 +8015,7 @@ PglErr BcfToPgen(const char* bcfname, const char* preexisting_psamname, const ch
} else if (unlikely(!variant_ct)) {
logerrputs("Error: No variants in --bcf file.\n");
- goto BcfToPgen_ret_INCONSISTENT_INPUT;
+ goto BcfToPgen_ret_DEGENERATE_DATA;
const uintptr_t variant_skip_ct = vrec_idx - 1 - variant_ct;
@@ -8983,6 +8986,9 @@ PglErr BcfToPgen(const char* bcfname, const char* preexisting_psamname, const ch
reterr = kPglRetThreadCreateFail;
+ reterr = kPglRetDegenerateData;
+ break;
CleanupSpgw(&spgw, &reterr);
@@ -9387,7 +9393,7 @@ PglErr OxSampleToPsam(const char* samplename, const char* const_fid, const char*
const uint32_t sample_ct = line_idx - 3;
if (unlikely(!sample_ct)) {
logerrputs("Error: No samples in .sample file.\n");
- goto OxSampleToPsam_ret_INCONSISTENT_INPUT;
+ goto OxSampleToPsam_ret_DEGENERATE_DATA;
const char* all_ids_iter = all_ids_start;
uint32_t nz_fid_present = 0;
@@ -9695,6 +9701,9 @@ PglErr OxSampleToPsam(const char* samplename, const char* const_fid, const char*
reterr = kPglRetInconsistentInput;
+ OxSampleToPsam_ret_DEGENERATE_DATA:
+ reterr = kPglRetDegenerateData;
+ break;
CleanupTextStream2(".sample file", &sample_txs, &reterr);
@@ -9896,7 +9905,7 @@ PglErr OxGenToPgen(const char* genname, const char* samplename, const char* cons
goto OxGenToPgen_ret_TSTREAM_FAIL;
logerrputs("Error: Empty .gen file.\n");
- goto OxGenToPgen_ret_INCONSISTENT_INPUT;
+ goto OxGenToPgen_ret_DEGENERATE_DATA;
uint32_t is_v2 = 0;
@@ -10327,6 +10336,9 @@ PglErr OxGenToPgen(const char* genname, const char* samplename, const char* cons
reterr = kPglRetInconsistentInput;
+ reterr = kPglRetDegenerateData;
+ break;
CleanupSpgw(&spgw, &reterr);
@@ -11820,7 +11832,7 @@ PglErr OxBgenToPgen(const char* bgenname, const char* samplename, const char* co
const uint32_t raw_variant_ct = initial_uints[2];
if (unlikely(!raw_variant_ct)) {
logerrputs("Error: Empty .bgen file.\n");
- goto OxBgenToPgen_ret_INCONSISTENT_INPUT;
+ goto OxBgenToPgen_ret_DEGENERATE_DATA;
if (unlikely(fseeko(bgenfile, initial_uints[1], SEEK_SET))) {
@@ -13688,6 +13700,9 @@ PglErr OxBgenToPgen(const char* bgenname, const char* samplename, const char* co
reterr = kPglRetThreadCreateFail;
+ reterr = kPglRetDegenerateData;
+ break;
if (reterr == kPglRetMalformedInput) {
@@ -14474,7 +14489,7 @@ PglErr LoadMap(const char* mapname, MiscFlags misc_flags, ChrInfo* cip, uint32_t
if (unlikely(!line_start)) {
if (!TextStreamErrcode2(&map_txs, &reterr)) {
logerrputs("Error: Empty .map file.\n");
- goto LoadMap_ret_INCONSISTENT_INPUT;
+ goto LoadMap_ret_DEGENERATE_DATA;
goto LoadMap_ret_TSTREAM_FAIL;
@@ -14687,6 +14702,9 @@ PglErr LoadMap(const char* mapname, MiscFlags misc_flags, ChrInfo* cip, uint32_t
reterr = kPglRetInconsistentInput;
+ reterr = kPglRetDegenerateData;
+ break;
// ForgetExtraChrNames(1, cip);
@@ -15305,7 +15323,7 @@ PglErr Plink1DosageToPgen(const char* dosagename, const char* famname, const cha
if (unlikely(!variant_ct)) {
if (!variant_skip_ct) {
logerrputs("Error: Empty --import-dosage file.\n");
- goto Plink1DosageToPgen_ret_INCONSISTENT_INPUT;
+ goto Plink1DosageToPgen_ret_DEGENERATE_DATA;
logerrprintfww("Error: All %" PRIuPTR " variant%s in --import-dosage file skipped.\n", variant_skip_ct, (variant_skip_ct == 1)? "" : "s");
goto Plink1DosageToPgen_ret_INCONSISTENT_INPUT;
@@ -15604,6 +15622,9 @@ PglErr Plink1DosageToPgen(const char* dosagename, const char* famname, const cha
reterr = kPglRetInconsistentInput;
+ Plink1DosageToPgen_ret_DEGENERATE_DATA:
+ reterr = kPglRetDegenerateData;
+ break;
CleanupSpgw(&spgw, &reterr);
@@ -2307,10 +2307,12 @@ PglErr UpdateSampleSexes(const uintptr_t* sample_include, const SampleIdInfo* si
} else if (ujj == 70) {
// 'F'/'f'
sexval = 2;
- } else if (unlikely((!male0) && (sexval != 30))) {
+ } else if (unlikely((!male0) && (sexval != 30) && (ujj != 85))) {
// allow 'N' = missing to make 1/2/NA work
+ // allow 'U'/'u' since this is actually being used by Illumina
+ // GenCall and Affymetrix APT
// don't permit 'n' for now
- snprintf(g_logbuf, kLogbufSize, "Error: Invalid sex value on line %" PRIuPTR " of --update-sex file. (Acceptable values: 1/M/m = male, 2/F/f = female, 0/N = missing.)\n", line_idx);
+ snprintf(g_logbuf, kLogbufSize, "Error: Invalid sex value on line %" PRIuPTR " of --update-sex file. (Acceptable values: 1/M/m = male, 2/F/f = female, 0/N/U = missing.)\n", line_idx);
goto UpdateSampleSexes_ret_MALFORMED_INPUT_WW;
} else {
// with 'male0', everything else is treated as missing
View it on GitLab: https://salsa.debian.org/med-team/plink2/-/commit/478fa15a9c3f8ddd39b3c7a13256f694a14f8cbe
