[med-svn] [Git][med-team/plink2][debian/experimental] 11 commits: Add -latomic on armel
Michael R. Crusoe
gitlab at salsa.debian.org
Sat Dec 26 18:06:52 GMT 2020
Michael R. Crusoe pushed to branch debian/experimental at Debian Med / plink2
Commits:
820e592d by Michael R. Crusoe at 2020-12-23T16:29:54+01:00
Add -latomic on armel
- - - - -
5e40e318 by Michael R. Crusoe at 2020-12-23T16:31:52+01:00
New upstream version 2.00~a3-201212+dfsg
- - - - -
aa663733 by Michael R. Crusoe at 2020-12-23T16:31:54+01:00
Update upstream source from tag 'upstream/2.00_a3-201212+dfsg'
Update to upstream version '2.00~a3-201212+dfsg'
with Debian dir 473acdfe6aa5098bb88498c2b9ed8d2941f9fd5d
- - - - -
0cfd251b by Michael R. Crusoe at 2020-12-23T16:32:13+01:00
New upstream release.
- - - - -
8b974c54 by Michael R. Crusoe at 2020-12-23T16:33:19+01:00
routine-update: Standards-Version: 4.5.1
- - - - -
e47fd820 by Michael R. Crusoe at 2020-12-23T16:36:04+01:00
Mark use_packaged_libdeflate.patch as being Debian specific
- - - - -
17fe74ca by Michael R. Crusoe at 2020-12-23T17:44:17+01:00
debian/patches/portable_m128i_setting: use portable methods to set __m128i, should fix build on ppc64el.
- - - - -
585c49a5 by Michael R. Crusoe at 2020-12-23T18:16:07+01:00
release 2.00~a3-201212+dfsg-1
- - - - -
7dfe5357 by Michael R. Crusoe at 2020-12-26T17:44:37+01:00
Experimental rebuild with libsimde-dev 0.7.0~rc-1 or later.
- - - - -
8d863a83 by Michael R. Crusoe at 2020-12-26T17:44:37+01:00
Add '-latomic' for mipsel, m68k, and powerpc as well
- - - - -
d108abe6 by Michael R. Crusoe at 2020-12-26T18:53:43+01:00
Improve portable __m128i setting patch to include __m256i setting as well. Also forward it upstream.
- - - - -
12 changed files:
- debian/changelog
- debian/control
- + debian/patches/portable_m128i_setting
- debian/patches/series
- debian/patches/use_packaged_libdeflate.patch
- debian/rules
- plink2.cc
- plink2_cmdline.cc
- plink2_cmdline.h
- plink2_export.cc
- plink2_help.cc
- plink2_matrix_calc.cc
Changes:
=====================================
debian/changelog
=====================================
@@ -1,3 +1,25 @@
+plink2 (2.00~a3-201212+dfsg-2~0exp0simde) experimental; urgency=medium
+
+ * Team upload.
+ * Experimental rebuild with libsimde-dev 0.7.0~rc-1 or later.
+ * Add '-latomic' for mipsel, m68k, and powerpc as well
+ * Improve portable __m128i setting patch to include __m256i setting as well.
+ Also forward it upstream.
+
+ -- Michael R. Crusoe <crusoe at debian.org> Sat, 26 Dec 2020 17:41:43 +0100
+
+plink2 (2.00~a3-201212+dfsg-1) unstable; urgency=medium
+
+ * Team upload.
+ * Add -latomic on armel
+ * New upstream release.
+ * Standards-Version: 4.5.1 (routine-update)
+ * Mark use_packaged_libdeflate.patch as being Debian specific
+ * debian/patches/portable_m128i_setting: use portable methods to set
+ __m128i, should fix build on ppc64el.
+
+ -- Michael R. Crusoe <crusoe at debian.org> Wed, 23 Dec 2020 18:15:47 +0100
+
plink2 (2.00~a3-201028+dfsg-2) unstable; urgency=medium
* Inject -I/usr/include/simde
=====================================
debian/control
=====================================
@@ -9,9 +9,9 @@ Build-Depends: debhelper-compat (= 13),
liblapack-dev,
libzstd-dev (>= 1.4.4),
zlib1g-dev,
- libsimde-dev (>= 0.0.0.git.20200531),
+ libsimde-dev (>= 0.7.0~rc-1),
libdeflate-dev
-Standards-Version: 4.5.0
+Standards-Version: 4.5.1
Vcs-Browser: https://salsa.debian.org/med-team/plink2
Vcs-Git: https://salsa.debian.org/med-team/plink2.git
Homepage: https://www.cog-genomics.org/plink/2.0/
=====================================
debian/patches/portable_m128i_setting
=====================================
@@ -0,0 +1,193 @@
+Author: Michael R. Crusoe <crusoe at debian.org>
+Description: Use portable methods to set __m{128,256}i from static values
+Forwarded: https://github.com/chrchang/plink-ng/pull/164
+--- plink2.orig/plink2_glm.cc
++++ plink2/plink2_glm.cc
+@@ -2171,20 +2171,20 @@
+ # ifdef FVEC_32
+ static inline VecF fmath_exp_ps(VecF xxv) {
+ __m256 xx = R_CAST(__m256, xxv);
+- const __m256i mask7ff = {0x7fffffff7fffffffLLU, 0x7fffffff7fffffffLLU, 0x7fffffff7fffffffLLU, 0x7fffffff7fffffffLLU};
++ const __m256i mask7ff = _mm256_set1_epi32(UINT32_C(0x7fffffff));
+ // 88
+- const __m256i max_x = {0x42b0000042b00000LLU, 0x42b0000042b00000LLU, 0x42b0000042b00000LLU, 0x42b0000042b00000LLU};
++ const __m256i max_x = _mm256_set1_epi32(UINT32_C(0x42b00000));
+ // -88
+ // more sensible 0xc2b00000... not used here due to "narrowing conversion"
+ // warning
+- const __m256i min_x = {-0x3d4fffff3d500000LL, -0x3d4fffff3d500000LL, -0x3d4fffff3d500000LL, -0x3d4fffff3d500000LL};
++ const __m256i min_x = _mm256_set1_epi64x(INT64_C(-0x3d4fffff3d500000));
+ // 2^10 / log(2)
+- const __m256i const_aa = {0x44b8aa3b44b8aa3bLLU, 0x44b8aa3b44b8aa3bLLU, 0x44b8aa3b44b8aa3bLLU, 0x44b8aa3b44b8aa3bLLU};
++ const __m256i const_aa = _mm256_set1_epi32(UINT32_C(0x44b8aa3b));
+ // log(2) / 2^10
+- const __m256i const_bb = {0x3a3172183a317218LLU, 0x3a3172183a317218LLU, 0x3a3172183a317218LLU, 0x3a3172183a317218LLU};
+- const __m256i f1 = {0x3f8000003f800000LLU, 0x3f8000003f800000LLU, 0x3f8000003f800000LLU, 0x3f8000003f800000LLU};
+- const __m256i mask_s = {0x3ff000003ffLLU, 0x3ff000003ffLLU, 0x3ff000003ffLLU, 0x3ff000003ffLLU};
+- const __m256i i127s = {0x1fc000001fc00LLU, 0x1fc000001fc00LLU, 0x1fc000001fc00LLU, 0x1fc000001fc00LLU};
++ const __m256i const_bb = _mm256_set1_epi32(UINT64_C(0x3a317218));
++ const __m256i f1 = _mm256_set1_epi32(UINT32_C(0x3f800000));
++ const __m256i mask_s = _mm256_set1_epi32(UINT64_C(0x000003ff));
++ const __m256i i127s = _mm256_set1_epi32(UINT32_C(0x0001fc00));
+ const __m256i limit = _mm256_castps_si256(_mm256_and_ps(xx, R_CAST(__m256, mask7ff)));
+ const int32_t over = _mm256_movemask_epi8(_mm256_cmpgt_epi32(limit, max_x));
+ if (over) {
+@@ -2291,22 +2291,22 @@
+
+ static inline VecF fmath_exp_ps(VecF xxv) {
+ __m128 xx = xxv;
+- const __m128i mask7ff = {0x7fffffff7fffffffLLU, 0x7fffffff7fffffffLLU};
++ const __m128i mask7ff = _mm_set1_epi32(UINT32_C(0x7fffffff));
+
+ // 88
+- const __m128i max_x = {0x42b0000042b00000LLU, 0x42b0000042b00000LLU};
++ const __m128i max_x = _mm_set1_epi32(UINT32_C(0x42b00000));
+ // -88
+ // more sensible 0xc2b00000... not used here due to "narrowing conversion"
+ // warning
+- const __m128i min_x = {-0x3d4fffff3d500000LL, -0x3d4fffff3d500000LL};
++ const __m128i min_x = _mm_set1_epi64x(INT64_C(-0x3d4fffff3d500000));
+ // 2^10 / log(2)
+- const __m128i const_aa = {0x44b8aa3b44b8aa3bLLU, 0x44b8aa3b44b8aa3bLLU};
++ const __m128i const_aa = _mm_set1_epi32(UINT32_C(0x44b8aa3b));
+ // log(2) / 2^10
+- const __m128i const_bb = {0x3a3172183a317218LLU, 0x3a3172183a317218LLU};
++ const __m128i const_bb = _mm_set1_epi32(UINT32_C(0x3a317218));
+
+- const __m128i f1 = {0x3f8000003f800000LLU, 0x3f8000003f800000LLU};
+- const __m128i mask_s = {0x3ff000003ffLLU, 0x3ff000003ffLLU};
+- const __m128i i127s = {0x1fc000001fc00LLU, 0x1fc000001fc00LLU};
++ const __m128i f1 = _mm_set1_epi32(UINT32_C(0x3f800000));
++ const __m128i mask_s = _mm_set1_epi64x(UINT64_C(0x3ff000003ff));
++ const __m128i i127s = _mm_set1_epi32(UINT32_C(0x0001fc00));
+ const __m128i limit = _mm_castps_si128(_mm_and_ps(xx, R_CAST(__m128, mask7ff)));
+ const int32_t over = _mm_movemask_epi8(_mm_cmpgt_epi32(limit, max_x));
+ if (over) {
+--- plink2.orig/plink2_ld.cc
++++ plink2/plink2_ld.cc
+@@ -2157,11 +2157,11 @@
+ void FillDosageUhet(const Dosage* dosage_vec, uint32_t dosagev_ct, Dosage* dosage_uhet) {
+ const __m256i* dosage_vvec_iter = R_CAST(const __m256i*, dosage_vec);
+ # if defined(__APPLE__) && ((!defined(__cplusplus)) || (__cplusplus < 201103L))
+- const __m256i all_n32768 = {0x8000800080008000LLU, 0x8000800080008000LLU, 0x8000800080008000LLU, 0x8000800080008000LLU};
+- const __m256i all_n16384 = {0xc000c000c000c000LLU, 0xc000c000c000c000LLU, 0xc000c000c000c000LLU, 0xc000c000c000c000LLU};
++ const __m256i all_n32768 = _mm256_set1_epi16(UINT16_C(0x8000));
++ const __m256i all_n16384 = _mm256_set1_epi16(UINT16_C(0xc000));
+ # else
+- const __m256i all_n32768 = {-0x7fff7fff7fff8000LL, -0x7fff7fff7fff8000LL, -0x7fff7fff7fff8000LL, -0x7fff7fff7fff8000LL};
+- const __m256i all_n16384 = {-0x3fff3fff3fff4000LL, -0x3fff3fff3fff4000LL, -0x3fff3fff3fff4000LL, -0x3fff3fff3fff4000LL};
++ const __m256i all_n32768 = _mm256_set1_epi64x(INT64_C(-0x7fff7fff7fff8000));
++ const __m256i all_n16384 = _mm256_set1_epi64x(INT64_C(-0x3fff3fff3fff4000));
+ # endif
+ const __m256i all0 = _mm256_setzero_si256();
+ const __m256i all1 = _mm256_cmpeq_epi16(all0, all0);
+@@ -2195,7 +2195,7 @@
+ uint64_t DenseDosageSum(const Dosage* dosage_vec, uint32_t vec_ct) {
+ // end of dosage_vec assumed to be missing-padded (0-padded also ok)
+ const __m256i* dosage_vvec_iter = R_CAST(const __m256i*, dosage_vec);
+- const __m256i m16 = {kMask0000FFFF, kMask0000FFFF, kMask0000FFFF, kMask0000FFFF};
++ const __m256i m16 = _mm256_set1_epi64x(kMask0000FFFF);
+ const __m256i all1 = _mm256_cmpeq_epi16(m16, m16);
+ uint64_t sum = 0;
+ for (uint32_t vecs_left = vec_ct; ; ) {
+@@ -2231,7 +2231,7 @@
+ // end of dosage_vec assumed to be missing-padded (0-padded also ok)
+ const __m256i* dosage_vvec_iter = R_CAST(const __m256i*, dosage_vec);
+ const __m256i* dosage_mask_vvec_iter = R_CAST(const __m256i*, dosage_mask_vec);
+- const __m256i m16 = {kMask0000FFFF, kMask0000FFFF, kMask0000FFFF, kMask0000FFFF};
++ const __m256i m16 = _mm256_set1_epi64x(kMask0000FFFF);
+ const __m256i all1 = _mm256_cmpeq_epi16(m16, m16);
+ uint64_t sum = 0;
+ for (uint32_t vecs_left = vec_ct; ; ) {
+@@ -2267,7 +2267,7 @@
+ uint64_t DosageUnsignedDotprod(const Dosage* dosage_vec0, const Dosage* dosage_vec1, uint32_t vec_ct) {
+ const __m256i* dosage_vvec0_iter = R_CAST(const __m256i*, dosage_vec0);
+ const __m256i* dosage_vvec1_iter = R_CAST(const __m256i*, dosage_vec1);
+- const __m256i m16 = {kMask0000FFFF, kMask0000FFFF, kMask0000FFFF, kMask0000FFFF};
++ const __m256i m16 = _mm256_set1_epi64x(kMask0000FFFF);
+ const __m256i all1 = _mm256_cmpeq_epi16(m16, m16);
+ uint64_t dotprod = 0;
+ for (uint32_t vecs_left = vec_ct; ; ) {
+@@ -2312,7 +2312,7 @@
+ uint64_t DosageUnsignedNomissDotprod(const Dosage* dosage_vec0, const Dosage* dosage_vec1, uint32_t vec_ct) {
+ const __m256i* dosage_vvec0_iter = R_CAST(const __m256i*, dosage_vec0);
+ const __m256i* dosage_vvec1_iter = R_CAST(const __m256i*, dosage_vec1);
+- const __m256i m16 = {kMask0000FFFF, kMask0000FFFF, kMask0000FFFF, kMask0000FFFF};
++ const __m256i m16 = _mm256_set1_epi64x(kMask0000FFFF);
+ uint64_t dotprod = 0;
+ for (uint32_t vecs_left = vec_ct; ; ) {
+ __m256i dotprod_lo = _mm256_setzero_si256();
+@@ -2350,8 +2350,8 @@
+ int64_t DosageSignedDotprod(const SDosage* dphase_delta0, const SDosage* dphase_delta1, uint32_t vec_ct) {
+ const __m256i* dphase_delta0_iter = R_CAST(const __m256i*, dphase_delta0);
+ const __m256i* dphase_delta1_iter = R_CAST(const __m256i*, dphase_delta1);
+- const __m256i m16 = {kMask0000FFFF, kMask0000FFFF, kMask0000FFFF, kMask0000FFFF};
+- const __m256i all_4096 = {0x1000100010001000LLU, 0x1000100010001000LLU, 0x1000100010001000LLU, 0x1000100010001000LLU};
++ const __m256i m16 = _mm256_set1_epi64x(kMask0000FFFF);
++ const __m256i all_4096 = _mm256_set1_epi16(UINT16_C(0x1000));
+ uint64_t dotprod = 0;
+ for (uint32_t vecs_left = vec_ct; ; ) {
+ __m256i dotprod_lo = _mm256_setzero_si256();
+@@ -2400,11 +2400,11 @@
+ void FillDosageUhet(const Dosage* dosage_vec, uint32_t dosagev_ct, Dosage* dosage_uhet) {
+ const __m128i* dosage_vvec_iter = R_CAST(const __m128i*, dosage_vec);
+ # if defined(__APPLE__) && ((!defined(__cplusplus)) || (__cplusplus < 201103L))
+- const __m128i all_n32768 = {0x8000800080008000LLU, 0x8000800080008000LLU};
+- const __m128i all_n16384 = {0xc000c000c000c000LLU, 0xc000c000c000c000LLU};
++ const __m128i all_n32768 = _mm_set1_epi16(UINT16_C(0x8000));
++ const __m128i all_n16384 = _mm_set1_epi16(UINT16_C(0xc000));
+ # else
+- const __m128i all_n32768 = {-0x7fff7fff7fff8000LL, -0x7fff7fff7fff8000LL};
+- const __m128i all_n16384 = {-0x3fff3fff3fff4000LL, -0x3fff3fff3fff4000LL};
++ const __m128i all_n32768 = _mm_set1_epi64x(INT64_C(-0x7fff7fff7fff8000));
++ const __m128i all_n16384 = _mm_set1_epi64x(INT64_C(-0x3fff3fff3fff4000));
+ # endif
+ const __m128i all0 = _mm_setzero_si128();
+ const __m128i all1 = _mm_cmpeq_epi16(all0, all0);
+@@ -2435,7 +2435,7 @@
+ uint64_t DenseDosageSum(const Dosage* dosage_vec, uint32_t vec_ct) {
+ // end of dosage_vec assumed to be missing-padded (0-padded also ok)
+ const __m128i* dosage_vvec_iter = R_CAST(const __m128i*, dosage_vec);
+- const __m128i m16 = {kMask0000FFFF, kMask0000FFFF};
++ const __m128i m16 = _mm_set1_epi64x(kMask0000FFFF);
+ const __m128i all1 = _mm_cmpeq_epi16(m16, m16);
+ uint64_t sum = 0;
+ for (uint32_t vecs_left = vec_ct; ; ) {
+@@ -2471,7 +2471,7 @@
+ // end of dosage_vec assumed to be missing-padded (0-padded also ok)
+ const __m128i* dosage_vvec_iter = R_CAST(const __m128i*, dosage_vec);
+ const __m128i* dosage_mask_vvec_iter = R_CAST(const __m128i*, dosage_mask_vec);
+- const __m128i m16 = {kMask0000FFFF, kMask0000FFFF};
++ const __m128i m16 = _mm_set1_epi64x(kMask0000FFFF);
+ const __m128i all1 = _mm_cmpeq_epi16(m16, m16);
+ uint64_t sum = 0;
+ for (uint32_t vecs_left = vec_ct; ; ) {
+@@ -2507,7 +2507,7 @@
+ uint64_t DosageUnsignedDotprod(const Dosage* dosage_vec0, const Dosage* dosage_vec1, uint32_t vec_ct) {
+ const __m128i* dosage_vvec0_iter = R_CAST(const __m128i*, dosage_vec0);
+ const __m128i* dosage_vvec1_iter = R_CAST(const __m128i*, dosage_vec1);
+- const __m128i m16 = {kMask0000FFFF, kMask0000FFFF};
++ const __m128i m16 = _mm_set1_epi64x(kMask0000FFFF);
+ const __m128i all1 = _mm_cmpeq_epi16(m16, m16);
+ uint64_t dotprod = 0;
+ for (uint32_t vecs_left = vec_ct; ; ) {
+@@ -2550,7 +2550,7 @@
+ uint64_t DosageUnsignedNomissDotprod(const Dosage* dosage_vec0, const Dosage* dosage_vec1, uint32_t vec_ct) {
+ const __m128i* dosage_vvec0_iter = R_CAST(const __m128i*, dosage_vec0);
+ const __m128i* dosage_vvec1_iter = R_CAST(const __m128i*, dosage_vec1);
+- const __m128i m16 = {kMask0000FFFF, kMask0000FFFF};
++ const __m128i m16 = _mm_set1_epi64x(kMask0000FFFF);
+ uint64_t dotprod = 0;
+ for (uint32_t vecs_left = vec_ct; ; ) {
+ __m128i dotprod_lo = _mm_setzero_si128();
+@@ -2588,8 +2588,8 @@
+ int64_t DosageSignedDotprod(const SDosage* dphase_delta0, const SDosage* dphase_delta1, uint32_t vec_ct) {
+ const __m128i* dphase_delta0_iter = R_CAST(const __m128i*, dphase_delta0);
+ const __m128i* dphase_delta1_iter = R_CAST(const __m128i*, dphase_delta1);
+- const __m128i m16 = {kMask0000FFFF, kMask0000FFFF};
+- const __m128i all_4096 = {0x1000100010001000LLU, 0x1000100010001000LLU};
++ const __m128i m16 = _mm_set1_epi64x(kMask0000FFFF);
++ const __m128i all_4096 = _mm_set1_epi16(UINT16_C(0x1000));
+ uint64_t dotprod = 0;
+ for (uint32_t vecs_left = vec_ct; ; ) {
+ __m128i dotprod_lo = _mm_setzero_si128();
=====================================
debian/patches/series
=====================================
@@ -1,2 +1,3 @@
+portable_m128i_setting
Fix_Makefile.patch
use_packaged_libdeflate.patch
=====================================
debian/patches/use_packaged_libdeflate.patch
=====================================
@@ -1,5 +1,6 @@
-Author: Michael R. Crusoe <michael.crusoe at gmail.com>
+Author: Michael R. Crusoe <crusoe at debian.org>
Description: Use the Debian packaged version of libdeflate
+Forwarded: not-needed
--- a/Makefile.src
+++ b/Makefile.src
@@ -5,7 +5,7 @@
=====================================
debian/rules
=====================================
@@ -5,6 +5,9 @@ ifeq (ppc64el,$(DEB_HOST_ARCH))
DEB_CFLAGS_MAINT_APPEND+=-mcpu=power8
DEB_CXXFLAGS_MAINT_APPEND+=-mcpu=power8
endif
+ifneq (,$(filter $(DEB_HOST_ARCH),armel mispel m68k powerpc))
+ DEB_LDFLAGS_MAINT_APPEND+=-latomic
+endif
export DEB_BUILD_MAINT_OPTIONS = hardening=+all
export DEB_CFLAGS_MAINT_APPEND+=-DSIMDE_ENABLE_OPENMP -fopenmp-simd -O3 -I/usr/include/simde
export DEB_CXXFLAGS_MAINT_APPEND+=-DSIMDE_ENABLE_OPENMP -fopenmp-simd -O3 -I/usr/include/simde
=====================================
plink2.cc
=====================================
@@ -70,7 +70,7 @@ static const char ver_str[] = "PLINK v2.00a3"
#ifdef USE_MKL
" Intel"
#endif
- " (28 Oct 2020)";
+ " (12 Dec 2020)";
static const char ver_str2[] =
// include leading space if day < 10, so character length stays the same
""
=====================================
plink2_cmdline.cc
=====================================
@@ -710,6 +710,15 @@ BoolErr bigstack_calloc_u64(uintptr_t ct, uint64_t** u64_arr_ptr) {
return 0;
}
+BoolErr bigstack_calloc_v(uintptr_t ct, VecW** v_arr_ptr) {
+ *v_arr_ptr = S_CAST(VecW*, bigstack_alloc(ct * kBytesPerVec));
+ if (unlikely(!(*v_arr_ptr))) {
+ return 1;
+ }
+ ZeroVecArr(ct, *v_arr_ptr);
+ return 0;
+}
+
BoolErr bigstack_calloc_cp(uintptr_t ct, char*** cp_arr_ptr) {
*cp_arr_ptr = S_CAST(char**, bigstack_alloc(ct * sizeof(intptr_t)));
if (unlikely(!(*cp_arr_ptr))) {
@@ -2005,6 +2014,24 @@ void CopyBitarrRange(const uintptr_t* __restrict src_bitarr, uintptr_t src_start
}
}
+void VerticalCounterUpdate(const uintptr_t* acc1, uint32_t acc1_vec_ct, uint32_t* rem15_and_255d15, VecW* acc4_8_32) {
+ VcountIncr1To4(acc1, acc1_vec_ct, acc4_8_32);
+ rem15_and_255d15[0] -= 1;
+ if (!rem15_and_255d15[0]) {
+ const uint32_t acc4_vec_ct = acc1_vec_ct * 4;
+ VecW* acc8 = &(acc4_8_32[acc4_vec_ct]);
+ Vcount0Incr4To8(acc4_vec_ct, acc4_8_32, acc8);
+ rem15_and_255d15[0] = 15;
+ rem15_and_255d15[1] -= 1;
+ if (!rem15_and_255d15[1]) {
+ const uint32_t acc8_vec_ct = acc4_vec_ct * 2;
+ VecW* acc32 = &(acc8[acc8_vec_ct]);
+ Vcount0Incr8To32(acc8_vec_ct, acc8, acc32);
+ rem15_and_255d15[1] = 17;
+ }
+ }
+}
+
// advances forward_ct set bits; forward_ct must be positive. (stays put if
// forward_ct == 1 and current bit is set. may want to tweak this interface,
// easy to introduce off-by-one bugs...)
=====================================
plink2_cmdline.h
=====================================
@@ -628,6 +628,8 @@ BoolErr bigstack_calloc_w(uintptr_t ct, uintptr_t** w_arr_ptr);
BoolErr bigstack_calloc_u64(uintptr_t ct, uint64_t** u64_arr_ptr);
+BoolErr bigstack_calloc_v(uintptr_t ct, VecW** v_arr_ptr);
+
BoolErr bigstack_calloc_cp(uintptr_t ct, char*** cp_arr_ptr);
BoolErr bigstack_calloc_kcp(uintptr_t ct, const char*** kcp_arr_ptr);
@@ -1652,6 +1654,8 @@ HEADER_INLINE void Vcount0Incr8To32(uint32_t acc8_vec_ct, VecW* acc8_iter, VecW*
}
}
+void VerticalCounterUpdate(const uintptr_t* acc1, uint32_t acc1_vec_ct, uint32_t* rem15_and_255d15, VecW* acc4_8_32);
+
// forward_ct must be positive. Stays put if forward_ct == 1 and current bit
// is set.
=====================================
plink2_export.cc
=====================================
@@ -9718,7 +9718,7 @@ PglErr Export012Smaj(const char* outname, const uintptr_t* orig_sample_include,
// (calc_thread_ct * kDosagePerCacheline * 2.375 + variant_ct * 2)
uintptr_t bytes_avail = bigstack_left();
// account for rounding
- const uintptr_t round_ceil = kCacheline + calc_thread_ct * (3 * kCacheline + kDosagePerCacheline * (2 * sizeof(intptr_t) + sizeof(Dosage)));
+ const uintptr_t round_ceil = kCacheline + calc_thread_ct * (3 * kCacheline + kDosagePerCacheline * (2 * sizeof(intptr_t)));
if (unlikely(bytes_avail < round_ceil)) {
goto Export012Smaj_ret_NOMEM;
}
=====================================
plink2_help.cc
=====================================
@@ -1370,9 +1370,10 @@ PglErr DispHelp(const char* const* argvk, uint32_t param_ct) {
" not suppressed.)\n"
);
HelpPrint("vcf\0bcf\0bgen\0double-id\0const-fid\0id-delim\0", &help_ctrl, 0,
-" --double-id : Set both FIDs and IIDs to the VCF/.bgen sample ID.\n"
-" --const-fid [ID] : Set all FIDs to the given constant. If '0' (the\n"
-" default), no FID column is created.\n"
+" --double-id : When importing single-part sample IDs, set both FID and\n"
+" IID to the original ID.\n"
+" --const-fid [ID] : When importing single-part sample IDs, set FID to the\n"
+" given constant and IID to the original ID.\n"
" --id-delim [d] : Normally parses single-delimiter sample IDs as\n"
" <FID><d><IID>, and double-delimiter IDs as\n"
" <FID><d><IID><d><SID>; default delimiter is '_'.\n"
=====================================
plink2_matrix_calc.cc
=====================================
@@ -5838,8 +5838,7 @@ PglErr ScoreReport(const uintptr_t* sample_include, const SampleIdInfo* siip, co
}
# endif
#endif
- if (unlikely((g_bigstack_base > g_bigstack_end) ||
- bigstack_end_alloc_u32(raw_variant_ctl, &variant_include_cumulative_popcounts) ||
+ if (unlikely(bigstack_end_alloc_u32(raw_variant_ctl, &variant_include_cumulative_popcounts) ||
bigstack_end_calloc_w(BitCtToWordCt(S_CAST(uint64_t, qsr_ct) * variant_ct), &qsr_include) ||
bigstack_end_alloc_cp(qsr_ct, &range_names))) {
goto ScoreReport_ret_NOMEM;
@@ -5980,7 +5979,7 @@ PglErr ScoreReport(const uintptr_t* sample_include, const SampleIdInfo* siip, co
goto ScoreReport_ret_TSTREAM_FAIL;
}
}
- uint32_t lines_to_skip_p1 = 1 + ((flags / kfScoreHeaderIgnore) & 1);
+ const uint32_t lines_to_skip_p1 = 1 + ((flags / kfScoreHeaderIgnore) & 1);
char* line_start;
for (uint32_t uii = 0; uii != lines_to_skip_p1; ++uii) {
++line_idx;
@@ -6124,13 +6123,20 @@ PglErr ScoreReport(const uintptr_t* sample_include, const SampleIdInfo* siip, co
raw_allele_ct = allele_idx_offsets[raw_variant_ct];
}
const uintptr_t raw_allele_ctl = BitCtToWordCt(raw_allele_ct);
+ const uintptr_t qsr_ct_nz = qsr_ct + (qsr_ct == 0);
uint32_t* sample_include_cumulative_popcounts = nullptr;
uintptr_t* sex_nonmale_collapsed = nullptr;
uintptr_t* genovec_buf = nullptr;
uintptr_t* dosage_present_buf = nullptr;
Dosage* dosage_main_buf = nullptr;
- uintptr_t* missing_acc1 = nullptr;
- uintptr_t* missing_male_acc1 = nullptr;
+ uintptr_t* missing_bitvec = nullptr;
+ uintptr_t* missing_male_bitvec = nullptr;
+ VecW* missing_accx = nullptr;
+ VecW* missing_male_accx = nullptr;
+ uint32_t* allele_ct_bases;
+ int32_t* male_allele_ct_deltas;
+ uint32_t* variant_ct_rems;
+ uint32_t* variant_hap_ct_rems;
uint64_t* ddosage_sums;
uint64_t* ddosage_incrs;
uintptr_t* already_seen_variants;
@@ -6141,15 +6147,20 @@ PglErr ScoreReport(const uintptr_t* sample_include, const SampleIdInfo* siip, co
bigstack_alloc_d(kScoreVariantBlockSize * score_final_col_ct, &(ctx.score_coefs_cmaj[0])) ||
bigstack_alloc_d(kScoreVariantBlockSize * score_final_col_ct, &(ctx.score_coefs_cmaj[1])) ||
bigstack_calloc_d(score_final_col_ct * sample_ct, &ctx.final_scores_cmaj) ||
- // bugfix (4 Nov 2017): need raw_sample_ctl here, not sample_ctl
bigstack_alloc_u32(raw_sample_ctl, &sample_include_cumulative_popcounts) ||
bigstack_alloc_w(sample_ctl, &sex_nonmale_collapsed) ||
bigstack_alloc_w(sample_ctl2, &genovec_buf) ||
bigstack_alloc_w(sample_ctl, &dosage_present_buf) ||
bigstack_alloc_dosage(sample_ct, &dosage_main_buf) ||
- bigstack_alloc_w(45 * acc1_vec_ct * kWordsPerVec, &missing_acc1) ||
- bigstack_alloc_w(45 * acc1_vec_ct * kWordsPerVec, &missing_male_acc1) ||
- bigstack_calloc_u64(sample_ct, &ddosage_sums) ||
+ bigstack_alloc_w(acc1_vec_ct * kWordsPerVec, &missing_bitvec) ||
+ bigstack_alloc_w(acc1_vec_ct * kWordsPerVec, &missing_male_bitvec) ||
+ bigstack_calloc_v(11 * acc4_vec_ct * qsr_ct_nz, &missing_accx) ||
+ bigstack_calloc_v(11 * acc4_vec_ct * qsr_ct_nz, &missing_male_accx) ||
+ bigstack_calloc_u32(qsr_ct_nz, &allele_ct_bases) ||
+ bigstack_calloc_i32(qsr_ct_nz, &male_allele_ct_deltas) ||
+ bigstack_alloc_u32(qsr_ct_nz * 2, &variant_ct_rems) ||
+ bigstack_alloc_u32(qsr_ct_nz * 2, &variant_hap_ct_rems) ||
+ bigstack_calloc_u64(qsr_ct_nz * sample_ct, &ddosage_sums) ||
bigstack_calloc_u64(sample_ct, &ddosage_incrs) ||
bigstack_calloc_w(raw_variant_ctl, &already_seen_variants) ||
bigstack_calloc_w(raw_allele_ctl, &already_seen_alleles) ||
@@ -6158,18 +6169,12 @@ PglErr ScoreReport(const uintptr_t* sample_include, const SampleIdInfo* siip, co
}
SetThreadFuncAndData(CalcScoreThread, &ctx, &tg);
- VecW* missing_diploid_acc4 = &(R_CAST(VecW*, missing_acc1)[acc1_vec_ct]);
- VecW* missing_diploid_acc8 = &(missing_diploid_acc4[acc4_vec_ct]);
- VecW* missing_diploid_acc32 = &(missing_diploid_acc8[acc8_vec_ct]);
- VecW* missing_haploid_acc4 = &(R_CAST(VecW*, missing_male_acc1)[acc1_vec_ct]);
- VecW* missing_haploid_acc8 = &(missing_haploid_acc4[acc4_vec_ct]);
- VecW* missing_haploid_acc32 = &(missing_haploid_acc8[acc8_vec_ct]);
- ZeroVecArr(acc4_vec_ct, missing_diploid_acc4);
- ZeroVecArr(acc8_vec_ct, missing_diploid_acc8);
- ZeroVecArr(acc8_vec_ct * 4, missing_diploid_acc32);
- ZeroVecArr(acc4_vec_ct, missing_haploid_acc4);
- ZeroVecArr(acc8_vec_ct, missing_haploid_acc8);
- ZeroVecArr(acc8_vec_ct * 4, missing_haploid_acc32);
+ for (uintptr_t qsr_idx = 0; qsr_idx != qsr_ct_nz; ++qsr_idx) {
+ variant_ct_rems[2 * qsr_idx] = 15;
+ variant_ct_rems[2 * qsr_idx + 1] = 17;
+ variant_hap_ct_rems[2 * qsr_idx] = 15;
+ variant_hap_ct_rems[2 * qsr_idx + 1] = 17;
+ }
FillCumulativePopcounts(sample_include, raw_sample_ctl, sample_include_cumulative_popcounts);
CopyBitarrSubset(sex_male, sample_include, sample_ct, sex_nonmale_collapsed);
AlignedBitarrInvert(sample_ct, sex_nonmale_collapsed);
@@ -6211,12 +6216,6 @@ PglErr ScoreReport(const uintptr_t* sample_include, const SampleIdInfo* siip, co
double geno_slope = kRecipDosageMax;
double geno_intercept = 0.0;
double cur_allele_freq = 0.0;
- uint32_t variant_ct_rem15 = 15;
- uint32_t variant_ct_rem255d15 = 17;
- uint32_t variant_hap_ct_rem15 = 15;
- uint32_t variant_hap_ct_rem255d15 = 17;
- uint32_t allele_ct_base = 0;
- int32_t male_allele_ct_delta = 0;
uint32_t valid_variant_ct = 0;
uintptr_t missing_var_id_ct = 0;
uintptr_t duplicated_var_id_ct = 0;
@@ -6325,9 +6324,9 @@ PglErr ScoreReport(const uintptr_t* sample_include, const SampleIdInfo* siip, co
const uint32_t is_y = (chr_idx == y_code);
ZeroTrailingNyps(sample_ct, genovec_buf);
- GenoarrToMissingnessUnsafe(genovec_buf, sample_ct, missing_acc1);
+ GenoarrToMissingnessUnsafe(genovec_buf, sample_ct, missing_bitvec);
if (dosage_ct) {
- BitvecInvmask(dosage_present_buf, sample_ctl, missing_acc1);
+ BitvecInvmask(dosage_present_buf, sample_ctl, missing_bitvec);
}
FillCurDdosageInts(genovec_buf, dosage_present_buf, dosage_main_buf, sample_ct, dosage_ct, 2 - is_nonx_haploid, ddosage_incrs);
double ploidy_d;
@@ -6339,25 +6338,47 @@ PglErr ScoreReport(const uintptr_t* sample_include, const SampleIdInfo* siip, co
const uintptr_t sample_idx = BitIter1(sex_nonmale_collapsed, &sample_idx_base, &sex_nonmale_collapsed_bits);
ddosage_incrs[sample_idx] = 0;
}
- male_allele_ct_delta += is_new_variant;
- BitvecInvmask(sex_nonmale_collapsed, sample_ctl, missing_acc1);
- } else {
- allele_ct_base += is_new_variant;
+ if (is_new_variant) {
+ if (!qsr_ct) {
+ male_allele_ct_deltas[0] += 1;
+ } else {
+ const uintptr_t bit_idx_base = RawToSubsettedPos(variant_include, variant_include_cumulative_popcounts, variant_uidx) * qsr_ct;
+ for (uintptr_t qsr_idx = 0; qsr_idx != qsr_ct; ++qsr_idx) {
+ if (IsSet(qsr_include, qsr_idx + bit_idx_base)) {
+ male_allele_ct_deltas[qsr_idx] += 1;
+ }
+ }
+ }
+ }
+ BitvecInvmask(sex_nonmale_collapsed, sample_ctl, missing_bitvec);
+ } else if (is_new_variant) {
+ if (!qsr_ct) {
+ allele_ct_bases[0] += 1;
+ } else {
+ const uintptr_t bit_idx_base = RawToSubsettedPos(variant_include, variant_include_cumulative_popcounts, variant_uidx) * qsr_ct;
+ for (uintptr_t qsr_idx = 0; qsr_idx != qsr_ct; ++qsr_idx) {
+ if (IsSet(qsr_include, qsr_idx + bit_idx_base)) {
+ allele_ct_bases[qsr_idx] += 1;
+ }
+ }
+ }
}
if (is_new_variant) {
- VcountIncr1To4(missing_acc1, acc1_vec_ct, missing_haploid_acc4);
- if (!(--variant_hap_ct_rem15)) {
- Vcount0Incr4To8(acc4_vec_ct, missing_haploid_acc4, missing_haploid_acc8);
- variant_hap_ct_rem15 = 15;
- if (!(--variant_hap_ct_rem255d15)) {
- Vcount0Incr8To32(acc8_vec_ct, missing_haploid_acc8, missing_haploid_acc32);
- variant_hap_ct_rem255d15 = 17;
+ if (!qsr_ct) {
+ VerticalCounterUpdate(missing_bitvec, acc1_vec_ct, variant_hap_ct_rems, missing_male_accx);
+ } else {
+ const uintptr_t bit_idx_base = RawToSubsettedPos(variant_include, variant_include_cumulative_popcounts, variant_uidx) * qsr_ct;
+ for (uintptr_t qsr_idx = 0; qsr_idx != qsr_ct; ++qsr_idx) {
+ if (IsSet(qsr_include, qsr_idx + bit_idx_base)) {
+ allele_ct_bases[qsr_idx] += 2;
+ VerticalCounterUpdate(missing_bitvec, acc1_vec_ct, &(variant_ct_rems[2 * qsr_idx]), &(missing_male_accx[acc4_vec_ct * 11 * qsr_idx]));
+ }
}
}
}
if (is_y) {
- memcpy(missing_male_acc1, missing_acc1, sample_ctl * sizeof(intptr_t));
- BitvecOr(sex_nonmale_collapsed, sample_ctl, missing_acc1);
+ memcpy(missing_male_bitvec, missing_bitvec, sample_ctl * sizeof(intptr_t));
+ BitvecOr(sex_nonmale_collapsed, sample_ctl, missing_bitvec);
}
ploidy_d = 1.0;
} else {
@@ -6368,35 +6389,39 @@ PglErr ScoreReport(const uintptr_t* sample_include, const SampleIdInfo* siip, co
const uintptr_t sample_idx = BitIter0(sex_nonmale_collapsed, &sample_idx_base, &sex_nonmale_collapsed_inv_bits);
ddosage_incrs[sample_idx] /= 2;
}
- BitvecInvmaskCopy(missing_acc1, sex_nonmale_collapsed, sample_ctl, missing_male_acc1);
- BitvecAnd(sex_nonmale_collapsed, sample_ctl, missing_acc1);
+ BitvecInvmaskCopy(missing_bitvec, sex_nonmale_collapsed, sample_ctl, missing_male_bitvec);
+ BitvecAnd(sex_nonmale_collapsed, sample_ctl, missing_bitvec);
}
if (is_new_variant) {
- VcountIncr1To4(missing_acc1, acc1_vec_ct, missing_diploid_acc4);
- if (!(--variant_ct_rem15)) {
- Vcount0Incr4To8(acc4_vec_ct, missing_diploid_acc4, missing_diploid_acc8);
- variant_ct_rem15 = 15;
- if (!(--variant_ct_rem255d15)) {
- Vcount0Incr8To32(acc8_vec_ct, missing_diploid_acc8, missing_diploid_acc32);
- variant_ct_rem255d15 = 17;
+ if (!qsr_ct) {
+ allele_ct_bases[0] += 2;
+ VerticalCounterUpdate(missing_bitvec, acc1_vec_ct, variant_ct_rems, missing_accx);
+ } else {
+ const uintptr_t bit_idx_base = RawToSubsettedPos(variant_include, variant_include_cumulative_popcounts, variant_uidx) * qsr_ct;
+ for (uintptr_t qsr_idx = 0; qsr_idx != qsr_ct; ++qsr_idx) {
+ if (IsSet(qsr_include, qsr_idx + bit_idx_base)) {
+ allele_ct_bases[qsr_idx] += 2;
+ VerticalCounterUpdate(missing_bitvec, acc1_vec_ct, &(variant_ct_rems[2 * qsr_idx]), &(missing_accx[acc4_vec_ct * 11 * qsr_idx]));
+ }
}
}
- allele_ct_base += 2;
}
if (is_relevant_x) {
if (is_new_variant) {
- --male_allele_ct_delta;
- VcountIncr1To4(missing_male_acc1, acc1_vec_ct, missing_haploid_acc4);
- if (!(--variant_hap_ct_rem15)) {
- Vcount0Incr4To8(acc4_vec_ct, missing_haploid_acc4, missing_haploid_acc8);
- variant_hap_ct_rem15 = 15;
- if (!(--variant_hap_ct_rem255d15)) {
- Vcount0Incr8To32(acc8_vec_ct, missing_haploid_acc8, missing_haploid_acc32);
- variant_hap_ct_rem255d15 = 17;
+ if (!qsr_ct) {
+ male_allele_ct_deltas[0] -= 1;
+ VerticalCounterUpdate(missing_male_bitvec, acc1_vec_ct, variant_hap_ct_rems, missing_male_accx);
+ } else {
+ const uintptr_t bit_idx_base = RawToSubsettedPos(variant_include, variant_include_cumulative_popcounts, variant_uidx) * qsr_ct;
+ for (uintptr_t qsr_idx = 0; qsr_idx != qsr_ct; ++qsr_idx) {
+ if (IsSet(qsr_include, qsr_idx + bit_idx_base)) {
+ male_allele_ct_deltas[qsr_idx] -= 1;
+ VerticalCounterUpdate(missing_male_bitvec, acc1_vec_ct, &(variant_hap_ct_rems[2 * qsr_idx]), &(missing_male_accx[acc4_vec_ct * 11 * qsr_idx]));
+ }
}
}
}
- BitvecOr(missing_male_acc1, sample_ctl, missing_acc1);
+ BitvecOr(missing_male_bitvec, sample_ctl, missing_bitvec);
}
if (!domrec) {
ploidy_d = 2.0;
@@ -6421,8 +6446,20 @@ PglErr ScoreReport(const uintptr_t* sample_include, const SampleIdInfo* siip, co
ploidy_d = 1.0;
}
}
- for (uint32_t sample_idx = 0; sample_idx != sample_ct; ++sample_idx) {
- ddosage_sums[sample_idx] += ddosage_incrs[sample_idx];
+ if (!qsr_ct) {
+ for (uint32_t sample_idx = 0; sample_idx != sample_ct; ++sample_idx) {
+ ddosage_sums[sample_idx] += ddosage_incrs[sample_idx];
+ }
+ } else {
+ const uintptr_t bit_idx_base = RawToSubsettedPos(variant_include, variant_include_cumulative_popcounts, variant_uidx) * qsr_ct;
+ for (uintptr_t qsr_idx = 0; qsr_idx != qsr_ct; ++qsr_idx) {
+ if (IsSet(qsr_include, qsr_idx + bit_idx_base)) {
+ uint64_t* cur_ddosage_sums = &(ddosage_sums[qsr_idx * sample_ct]);
+ for (uint32_t sample_idx = 0; sample_idx != sample_ct; ++sample_idx) {
+ cur_ddosage_sums[sample_idx] += ddosage_incrs[sample_idx];
+ }
+ }
+ }
}
if (allele_freqs) {
cur_allele_freq = GetAlleleFreq(&(allele_freqs[allele_idx_offset_base - variant_uidx]), cur_allele_idx, cur_allele_ct);
@@ -6449,7 +6486,7 @@ PglErr ScoreReport(const uintptr_t* sample_include, const SampleIdInfo* siip, co
// wraparound
geno_intercept = (-1.0 * kDosageMax) * ploidy_d * cur_allele_freq * geno_slope;
}
- const uint32_t missing_ct = PopcountWords(missing_acc1, sample_ctl);
+ const uint32_t missing_ct = PopcountWords(missing_bitvec, sample_ctl);
const uint32_t nm_sample_ct = sample_ct - missing_ct;
if (missing_ct) {
double missing_effect = 0.0;
@@ -6460,40 +6497,40 @@ PglErr ScoreReport(const uintptr_t* sample_include, const SampleIdInfo* siip, co
if (is_y || is_relevant_x) {
ZeroDArr(sample_ct, cur_dosages_vmaj_iter);
if (!no_meanimpute) {
- const uint32_t male_missing_ct = PopcountWords(missing_male_acc1, sample_ctl);
- uintptr_t missing_male_acc1_bits = missing_male_acc1[0];
+ const uint32_t male_missing_ct = PopcountWords(missing_male_bitvec, sample_ctl);
+ uintptr_t missing_male_bits = missing_male_bitvec[0];
for (uint32_t male_missing_idx = 0; male_missing_idx != male_missing_ct; ++male_missing_idx) {
- const uintptr_t sample_idx = BitIter1(missing_male_acc1, &sample_idx_base, &missing_male_acc1_bits);
+ const uintptr_t sample_idx = BitIter1(missing_male_bitvec, &sample_idx_base, &missing_male_bits);
cur_dosages_vmaj_iter[sample_idx] = missing_effect;
}
if (is_relevant_x) {
- // missing_male_acc1 not used after this point, so okay to
+ // missing_male_bitvec not used after this point, so okay to
// use buffer for nonmales
- BitvecAndCopy(missing_acc1, sex_nonmale_collapsed, sample_ctl, missing_male_acc1);
+ BitvecAndCopy(missing_bitvec, sex_nonmale_collapsed, sample_ctl, missing_male_bitvec);
missing_effect *= 2;
// bugfix (8 Jul 2018): need to reset sample_idx
sample_idx_base = 0;
- missing_male_acc1_bits = missing_male_acc1[0];
- const uint32_t nonmale_missing_ct = PopcountWords(missing_male_acc1, sample_ctl);
+ missing_male_bits = missing_male_bitvec[0];
+ const uint32_t nonmale_missing_ct = PopcountWords(missing_male_bitvec, sample_ctl);
for (uint32_t nonmale_missing_idx = 0; nonmale_missing_idx != nonmale_missing_ct; ++nonmale_missing_idx) {
- const uintptr_t sample_idx = BitIter1(missing_male_acc1, &sample_idx_base, &missing_male_acc1_bits);
+ const uintptr_t sample_idx = BitIter1(missing_male_bitvec, &sample_idx_base, &missing_male_bits);
cur_dosages_vmaj_iter[sample_idx] = missing_effect;
}
}
}
} else {
missing_effect *= ploidy_d;
- uintptr_t missing_acc1_bits = missing_acc1[0];
+ uintptr_t missing_bits = missing_bitvec[0];
for (uint32_t missing_idx = 0; missing_idx != missing_ct; ++missing_idx) {
- const uintptr_t sample_idx = BitIter1(missing_acc1, &sample_idx_base, &missing_acc1_bits);
+ const uintptr_t sample_idx = BitIter1(missing_bitvec, &sample_idx_base, &missing_bits);
cur_dosages_vmaj_iter[sample_idx] = missing_effect;
}
}
}
uintptr_t sample_idx_base = 0;
- uintptr_t missing_acc1_inv_bits = ~missing_acc1[0];
+ uintptr_t missing_inv_bits = ~missing_bitvec[0];
for (uint32_t nm_sample_idx = 0; nm_sample_idx != nm_sample_ct; ++nm_sample_idx) {
- const uintptr_t sample_idx = BitIter0(missing_acc1, &sample_idx_base, &missing_acc1_inv_bits);
+ const uintptr_t sample_idx = BitIter0(missing_bitvec, &sample_idx_base, &missing_inv_bits);
cur_dosages_vmaj_iter[sample_idx] = u63tod(ddosage_incrs[sample_idx]) * geno_slope + geno_intercept;
}
if (se_mode) {
@@ -6533,7 +6570,7 @@ PglErr ScoreReport(const uintptr_t* sample_include, const SampleIdInfo* siip, co
cur_score_coefs_iter = &(cur_score_coefs_iter[kScoreVariantBlockSize]);
} else {
const uintptr_t bit_idx_base = RawToSubsettedPos(variant_include, variant_include_cumulative_popcounts, variant_uidx) * qsr_ct;
- for (uint32_t qsr_idx = 0; qsr_idx != qsr_ct; ++qsr_idx) {
+ for (uintptr_t qsr_idx = 0; qsr_idx != qsr_ct; ++qsr_idx) {
double cur_coef = raw_coef * u31tod(IsSet(qsr_include, qsr_idx + bit_idx_base));
*cur_score_coefs_iter = cur_coef;
cur_score_coefs_iter = &(cur_score_coefs_iter[kScoreVariantBlockSize]);
@@ -6579,10 +6616,18 @@ PglErr ScoreReport(const uintptr_t* sample_include, const SampleIdInfo* siip, co
if (unlikely(TextStreamErrcode2(&score_txs, &reterr))) {
goto ScoreReport_ret_TSTREAM_FAIL;
}
- VcountIncr4To8(missing_diploid_acc4, acc4_vec_ct, missing_diploid_acc8);
- VcountIncr8To32(missing_diploid_acc8, acc8_vec_ct, missing_diploid_acc32);
- VcountIncr4To8(missing_haploid_acc4, acc4_vec_ct, missing_haploid_acc8);
- VcountIncr8To32(missing_haploid_acc8, acc8_vec_ct, missing_haploid_acc32);
+ for (uintptr_t qsr_idx = 0; qsr_idx != qsr_ct_nz; ++qsr_idx) {
+ VecW* missing_diploid_acc4 = &(missing_accx[acc4_vec_ct * 11 * qsr_idx]);
+ VecW* missing_diploid_acc8 = &(missing_diploid_acc4[acc4_vec_ct]);
+ VecW* missing_diploid_acc32 = &(missing_diploid_acc8[acc8_vec_ct]);
+ VecW* missing_haploid_acc4 = &(missing_male_accx[acc4_vec_ct * 11 * qsr_idx]);
+ VecW* missing_haploid_acc8 = &(missing_haploid_acc4[acc4_vec_ct]);
+ VecW* missing_haploid_acc32 = &(missing_haploid_acc8[acc8_vec_ct]);
+ VcountIncr4To8(missing_diploid_acc4, acc4_vec_ct, missing_diploid_acc8);
+ VcountIncr8To32(missing_diploid_acc8, acc8_vec_ct, missing_diploid_acc32);
+ VcountIncr4To8(missing_haploid_acc4, acc4_vec_ct, missing_haploid_acc8);
+ VcountIncr8To32(missing_haploid_acc8, acc8_vec_ct, missing_haploid_acc32);
+ }
const uint32_t is_not_first_block = ThreadsAreActive(&tg);
putc_unlocked('\r', stdout);
if (missing_var_id_ct || missing_allele_code_ct || duplicated_var_id_ct) {
@@ -6644,8 +6689,7 @@ PglErr ScoreReport(const uintptr_t* sample_include, const SampleIdInfo* siip, co
logprintf("Variant list written to %s .\n", outname);
}
- const uint32_t qsr_ct_nz = qsr_ct + (qsr_ct == 0);
- for (uint32_t qsr_idx = 0; qsr_idx != qsr_ct_nz; ++qsr_idx) {
+ for (uintptr_t qsr_idx = 0; qsr_idx != qsr_ct_nz; ++qsr_idx) {
char* outname_end2 = outname_end;
if (range_names) {
*outname_end2++ = '.';
@@ -6720,8 +6764,8 @@ PglErr ScoreReport(const uintptr_t* sample_include, const SampleIdInfo* siip, co
}
}
AppendBinaryEoln(&cswritep);
- const uint32_t* scrambled_missing_diploid_cts = R_CAST(uint32_t*, missing_diploid_acc32);
- const uint32_t* scrambled_missing_haploid_cts = R_CAST(uint32_t*, missing_haploid_acc32);
+ const uint32_t* scrambled_missing_diploid_cts = &(R_CAST(uint32_t*, missing_accx)[acc4_vec_ct * (3 + 11 * qsr_idx) * kInt32PerVec]);
+ const uint32_t* scrambled_missing_haploid_cts = &(R_CAST(uint32_t*, missing_male_accx)[acc4_vec_ct * (3 + 11 * qsr_idx) * kInt32PerVec]);
const char* output_missing_pheno = g_output_missing_pheno;
const uint32_t omp_slen = strlen(output_missing_pheno);
@@ -6769,7 +6813,7 @@ PglErr ScoreReport(const uintptr_t* sample_include, const SampleIdInfo* siip, co
cswritep = memcpya(cswritep, output_missing_pheno, omp_slen);
}
const uint32_t scrambled_idx = VcountScramble1(sample_idx);
- uint32_t denom = allele_ct_base + IsSet(sex_male, sample_uidx) * male_allele_ct_delta;
+ uint32_t denom = allele_ct_bases[qsr_idx] + IsSet(sex_male, sample_uidx) * male_allele_ct_deltas[qsr_idx];
const uint32_t nallele = denom - 2 * scrambled_missing_diploid_cts[scrambled_idx] - scrambled_missing_haploid_cts[scrambled_idx];
if (write_nallele) {
*cswritep++ = '\t';
@@ -6784,7 +6828,7 @@ PglErr ScoreReport(const uintptr_t* sample_include, const SampleIdInfo* siip, co
}
if (write_dosage_sum) {
*cswritep++ = '\t';
- cswritep = ddosagetoa(ddosage_sums[sample_idx], cswritep);
+ cswritep = ddosagetoa(ddosage_sums[sample_idx + qsr_idx * sample_ct], cswritep);
}
const double* final_score_col = &(ctx.final_scores_cmaj[sample_idx]);
if (write_score_avgs) {
View it on GitLab: https://salsa.debian.org/med-team/plink2/-/compare/9e04356413eb459b597b974f6875615553fe83c6...d108abe68ae88e372b8b912d48267ffe697d5946
--
View it on GitLab: https://salsa.debian.org/med-team/plink2/-/compare/9e04356413eb459b597b974f6875615553fe83c6...d108abe68ae88e372b8b912d48267ffe697d5946
You're receiving this email because of your account on salsa.debian.org.
-------------- next part --------------
An HTML attachment was scrubbed...
URL: <http://alioth-lists.debian.net/pipermail/debian-med-commit/attachments/20201226/5afb6af8/attachment-0001.html>
More information about the debian-med-commit
mailing list