[med-svn] [Git][med-team/minimac4][master] 13 commits: libomp-jonathonl-dev is available
Andreas Tille (@tille)
gitlab at salsa.debian.org
Tue Jan 24 10:51:58 GMT 2023
Andreas Tille pushed to branch master at Debian Med / minimac4
Commits:
9106c447 by Andreas Tille at 2023-01-18T16:47:42+01:00
libomp-jonathonl-dev is available
- - - - -
aed71b83 by Andreas Tille at 2023-01-18T16:48:01+01:00
routine-update: New upstream version
- - - - -
d69e258f by Andreas Tille at 2023-01-18T16:48:01+01:00
New upstream version 4.1.1
- - - - -
65cd3722 by Andreas Tille at 2023-01-18T16:48:02+01:00
Update upstream source from tag 'upstream/4.1.1'
Update to upstream version '4.1.1'
with Debian dir 0a702b21994bb0cd0ca11ff4c1396d1758104c6d
- - - - -
e9ecd785 by Andreas Tille at 2023-01-18T16:48:02+01:00
routine-update: Standards-Version: 4.6.2
- - - - -
0d99c95c by Andreas Tille at 2023-01-18T16:54:51+01:00
Rename autopkgtest script to run-unit-test
- - - - -
366058e0 by Andreas Tille at 2023-01-24T11:12:59+01:00
First attempt to fix autopkgtest
- - - - -
5889050f by Andreas Tille at 2023-01-24T11:13:31+01:00
New upstream version 4.1.2
- - - - -
7ba66c5c by Andreas Tille at 2023-01-24T11:13:31+01:00
routine-update: New upstream version
- - - - -
ae10eea2 by Andreas Tille at 2023-01-24T11:13:32+01:00
Update upstream source from tag 'upstream/4.1.2'
Update to upstream version '4.1.2'
with Debian dir b2b49836d0f2b898560d4d2adc7a90ae27f9ded8
- - - - -
824fbab8 by Andreas Tille at 2023-01-24T11:43:52+01:00
Refresh patches
- - - - -
32e85434 by Andreas Tille at 2023-01-24T11:47:27+01:00
Switch on build tests provided upstream
- - - - -
9566da07 by Andreas Tille at 2023-01-24T11:51:06+01:00
[skip ci] Build-Depends bcftools needed for build tests
- - - - -
24 changed files:
- + .github/workflows/build_and_test.yml
- CMakeLists.txt
- README.md
- debian/changelog
- debian/control
- debian/patches/lzma.patch
- debian/rules
- debian/tests/control
- debian/tests/run-sample-analysis → debian/tests/run-unit-test
- package-linux.sh
- requirements.txt
- src/dosage_writer.cpp
- src/dosage_writer.hpp
- src/hidden_markov_model.cpp
- src/input_prep.cpp
- src/prog_args.hpp
- src/unique_haplotype.cpp
- src/unique_haplotype.hpp
- src/variant.hpp
- + test/input/ref_panel.vcf
- + test/input/ref_panel_with_haploid_sample.vcf
- + test/input/tar_panel.vcf
- + test/input/tar_panel_with_haploid_sample.vcf
- + test/simple-test.sh
Changes:
=====================================
.github/workflows/build_and_test.yml
=====================================
@@ -0,0 +1,27 @@
+name: build and run tests
+on:
+ push:
+ branches: [ master ]
+ pull_request:
+ branches: [ master ]
+jobs:
+ build:
+ runs-on: ubuntu-20.04
+ steps:
+ - uses: actions/checkout at v2
+ - name: build and test
+ shell: bash
+ run: |
+ set -euo pipefail
+ sudo apt update
+ sudo apt install -y build-essential bcftools cmake git gzip python3-dev python3-pip python3-setuptools
+ pip3 --version
+ pip3 install wheel
+ pip3 install cget
+ cget ignore xz
+ cget install -f ./requirements.txt
+ cmake --version
+ mkdir build && cd build
+ cmake -DBUILD_TESTS=1 -DCMAKE_TOOLCHAIN_FILE=../cget/cget/cget.cmake -DCMAKE_BUILD_TYPE=Debug ..
+ make
+ make CTEST_OUTPUT_ON_FAILURE=1 test
=====================================
CMakeLists.txt
=====================================
@@ -1,5 +1,5 @@
cmake_minimum_required(VERSION 3.2)
-project(minimac4 VERSION 4.1.0)
+project(minimac4 VERSION 4.1.2)
if(NOT CMAKE_BUILD_TYPE)
set(CMAKE_BUILD_TYPE "Release" CACHE STRING "Choose build type (Debug|Release|RelWithDebInfo|MinSizeRel)" FORCE)
@@ -32,6 +32,12 @@ add_executable(minimac4
target_link_libraries(minimac4 savvy ${OMPP_LIBRARY} Threads::Threads)
+if(BUILD_TESTS)
+ enable_testing()
+ add_test(simple_test ${CMAKE_SOURCE_DIR}/test/simple-test.sh ${CMAKE_BINARY_DIR}/minimac4 m4_simple_test_output ${CMAKE_SOURCE_DIR}/test/input/ref_panel.vcf ${CMAKE_SOURCE_DIR}/test/input/tar_panel.vcf)
+ add_test(haploid_and_diploid_test ${CMAKE_SOURCE_DIR}/test/simple-test.sh ${CMAKE_BINARY_DIR}/minimac4 m4_haploid_and_diploid_test_output ${CMAKE_SOURCE_DIR}/test/input/ref_panel_with_haploid_sample.vcf ${CMAKE_SOURCE_DIR}/test/input/tar_panel_with_haploid_sample.vcf)
+endif()
+
install(TARGETS minimac4 RUNTIME DESTINATION bin)
set(CPACK_PACKAGE_VERSION_MAJOR ${PROJECT_VERSION_MAJOR})
=====================================
README.md
=====================================
@@ -25,6 +25,15 @@ make # Build.
make install # Install
```
+To build and run tests from build directory:
+```bash
+# bcftools is required to run tests
+cmake -DCMAKE_TOOLCHAIN_FILE=../cget/cget/cget.cmake -DBUILD_TESTS=ON ..
+make
+make CTEST_OUTPUT_ON_FAILURE=1 test
+```
+
+
## Usage
See `minimac4 --help` for detailed usage.
=====================================
debian/changelog
=====================================
@@ -1,9 +1,12 @@
-minimac4 (4.1.0-1) UNRELEASED; urgency=medium
+minimac4 (4.1.2-1) UNRELEASED; urgency=medium
* Team upload.
* New upstream version
* Build-Depends: libsavvy-dev, libomp-jonathonl-dev, libzstd-dev
- TODO: https://github.com/jonathonl/omp
+ * Standards-Version: 4.6.2 (routine-update)
+ * Rename autopkgtest script to run-unit-test
+ * Switch on build tests provided upstream
+ * Build-Depends bcftools needed for build tests
-- Andreas Tille <tille at debian.org> Thu, 22 Sep 2022 09:28:59 +0200
=====================================
debian/control
=====================================
@@ -10,10 +10,11 @@ Build-Depends: debhelper-compat (= 13),
libstatgen-dev,
libshrinkwrap-dev,
libsavvy-dev,
- libomp-jonathonl-dev (>= 0.0+git20211216.5228495),
+ libomp-jonathonl-dev,
liblzma-dev,
- libzstd-dev
-Standards-Version: 4.6.1
+ libzstd-dev,
+ bcftools <!nocheck>
+Standards-Version: 4.6.2
Vcs-Browser: https://salsa.debian.org/med-team/minimac4
Vcs-Git: https://salsa.debian.org/med-team/minimac4.git
Homepage: https://genome.sph.umich.edu/wiki/Minimac4
=====================================
debian/patches/lzma.patch
=====================================
@@ -11,5 +11,5 @@ Description: Make sure lzma can be linked
-target_link_libraries(minimac4 savvy ${OMPP_LIBRARY} Threads::Threads)
+target_link_libraries(minimac4 savvy ${OMPP_LIBRARY} lzma Threads::Threads)
- install(TARGETS minimac4 RUNTIME DESTINATION bin)
-
+ if(BUILD_TESTS)
+ enable_testing()
=====================================
debian/rules
=====================================
@@ -6,3 +6,6 @@ export DEB_CPPFLAGS_MAINT_APPEND = -I/usr/include/libStatGen
%:
dh $@
+
+override_dh_auto_configure:
+ dh_auto_configure -- -DBUILD_TESTS=ON
=====================================
debian/tests/control
=====================================
@@ -1,2 +1,2 @@
-Tests: run-sample-analysis
+Tests: run-unit-test
Depends: @
=====================================
debian/tests/run-sample-analysis → debian/tests/run-unit-test
=====================================
@@ -16,11 +16,17 @@ cp -a /usr/share/doc/${pkg}/examples/* $AUTOPKGTEST_TMP
gzip refPanel.m3vcf
gzip targetStudy.vcf
+set -x
+# asked for help at: https://github.com/statgen/Minimac4/issues/54
+
+minimac4 --update-m3vcf refPanel.m3vcf.gz > refPanel.msav
+
minimac4 \
- --refHaps refPanel.m3vcf.gz \
+ --refHaps refPanel.msav \
--haps targetStudy.vcf.gz \
- --prefix testRun
+ --output testRun
+exit
zcat testRun.dose.vcf.gz > testRun.dose.vcf
# Remove not reproducible headers
=====================================
package-linux.sh
=====================================
@@ -12,6 +12,7 @@ cd ${build_dir}
export CFLAGS="-fPIC"
export CXXFLAGS="-fPIC"
+cget ignore xz
cget install zlib,http://zlib.net/fossils/zlib-1.2.11.tar.gz
rm cget/lib/libz.so
cget install -f ${src_dir}/requirements.txt
=====================================
requirements.txt
=====================================
@@ -1,2 +1,2 @@
statgen/savvy at v2.1.0
-jonathonl/omp at 522849581eb900a8bce9621a557ef1cadf600441
+jonathonl/omp at v1.0.0
=====================================
src/dosage_writer.cpp
=====================================
@@ -345,6 +345,32 @@ bool dosage_writer::has_good_r2(savvy::site_info& site)
return true;
}
+bool dosage_writer::mean_impute(savvy::compressed_vector<float>& sparse_dosages)
+{
+ std::int64_t n = sparse_dosages.size();
+ float s = 0.f;
+ for (auto it = sparse_dosages.begin(); it != sparse_dosages.end(); ++it)
+ {
+ if (savvy::typed_value::is_special_value(*it))
+ --n;
+ else
+ s += *it;
+ }
+
+ if (n > 0)
+ {
+ float mean = s / n;
+ for (auto it = sparse_dosages.begin(); it != sparse_dosages.end(); ++it)
+ {
+ if (savvy::typed_value::is_missing(*it))
+ *it = mean;
+ }
+ return true;
+ }
+
+ return false;
+}
+
bool dosage_writer::write_dosages(const full_dosages_results& hmm_results, const std::vector<target_variant>& tar_variants, const std::vector<target_variant>& tar_only_variants, std::pair<std::size_t, std::size_t> observed_range, const reduced_haplotypes& full_reference_data, const savvy::region& impute_region)
{
assert(hmm_results.dimensions()[0] == full_reference_data.variant_size());
@@ -378,21 +404,25 @@ bool dosage_writer::write_dosages(const full_dosages_results& hmm_results, const
while(tar_only_it != tar_only_variants.end() && tar_only_it->pos <= ref_it->pos)
{
- out_var = savvy::site_info(tar_only_it->chrom, tar_only_it->pos, tar_only_it->ref, {tar_only_it->alt}, ""/*ref_var.id()*/); // TODO: ID
+ out_var = savvy::site_info(tar_only_it->chrom, tar_only_it->pos, tar_only_it->ref, {tar_only_it->alt}, tar_only_it->id);
std::vector<std::int8_t> observed(tar_only_it->gt.begin() + observed_range.first, tar_only_it->gt.begin() + observed_range.second);
- sparse_dosages.assign(observed.begin(), observed.end());
- set_info_fields(out_var, sparse_dosages, {}, observed);
- if (has_good_r2(out_var))
+ sparse_dosages.assign(observed.begin(), observed.end(), savvy::typed_value::reserved_transformation_functor<float>());
+
+ if (mean_impute(sparse_dosages))
{
- if (sites_out_file_)
+ set_info_fields(out_var, sparse_dosages, {}, observed);
+ if (has_good_r2(out_var))
{
- savvy::variant site_var;
- dynamic_cast<savvy::site_info&>(site_var) = out_var;
- sites_out_file_->write(site_var);
- }
+ if (sites_out_file_)
+ {
+ savvy::variant site_var;
+ dynamic_cast<savvy::site_info&>(site_var) = out_var;
+ sites_out_file_->write(site_var);
+ }
- set_format_fields(out_var, sparse_dosages);
- out_file_ << out_var;
+ set_format_fields(out_var, sparse_dosages);
+ out_file_ << out_var;
+ }
}
++tar_only_it;
}
@@ -407,7 +437,7 @@ bool dosage_writer::write_dosages(const full_dosages_results& hmm_results, const
// loo_dosages[j] = float(std::int16_t(hmm_results.loo_dosage(tar_idx, j) * bin_scalar_ + 0.5f)) / bin_scalar_;
// }
- out_var = savvy::site_info(ref_it->chrom, ref_it->pos, ref_it->ref, {ref_it->alt}, ""/*ref_var.id()*/); // TODO: ID
+ out_var = savvy::site_info(ref_it->chrom, ref_it->pos, ref_it->ref, {ref_it->alt}, ref_it->id);
assert(!std::isnan(hmm_results.dosages_[i][0]));
sparse_dosages.assign(hmm_results.dosages_[i].begin(), hmm_results.dosages_[i].end());
if (ref_matches_tar)
@@ -418,7 +448,7 @@ bool dosage_writer::write_dosages(const full_dosages_results& hmm_results, const
if (emp_out_file_ && has_good_r2(out_var))
{
- out_var_emp = savvy::site_info(ref_it->chrom, ref_it->pos, ref_it->ref, {ref_it->alt}, ""/*ref_var.id()*/);
+ out_var_emp = savvy::site_info(ref_it->chrom, ref_it->pos, ref_it->ref, {ref_it->alt}, ref_it->id);
out_var_emp.set_info("TYPED", std::vector<std::int8_t>());
out_var_emp.set_info("IMPUTED", std::vector<std::int8_t>());
out_var_emp.set_format("GT", observed);
@@ -452,21 +482,25 @@ bool dosage_writer::write_dosages(const full_dosages_results& hmm_results, const
while(tar_only_it != tar_only_variants.end() && tar_only_it->pos <= impute_region.to())
{
- out_var = savvy::site_info(tar_only_it->chrom, tar_only_it->pos, tar_only_it->ref, {tar_only_it->alt}, ""/*ref_var.id()*/); // TODO: ID
+ out_var = savvy::site_info(tar_only_it->chrom, tar_only_it->pos, tar_only_it->ref, {tar_only_it->alt}, tar_only_it->id);
std::vector<std::int8_t> observed(tar_only_it->gt.begin() + observed_range.first, tar_only_it->gt.begin() + observed_range.second);
- sparse_dosages.assign(observed.begin(), observed.end());
- set_info_fields(out_var, sparse_dosages, {}, observed);
- if (has_good_r2(out_var))
+ sparse_dosages.assign(observed.begin(), observed.end(), savvy::typed_value::reserved_transformation_functor<float>());
+
+ if (mean_impute(sparse_dosages))
{
- if (sites_out_file_)
+ set_info_fields(out_var, sparse_dosages, {}, observed);
+ if (has_good_r2(out_var))
{
- savvy::variant site_var;
- dynamic_cast<savvy::site_info&>(site_var) = out_var;
- sites_out_file_->write(site_var);
- }
+ if (sites_out_file_)
+ {
+ savvy::variant site_var;
+ dynamic_cast<savvy::site_info&>(site_var) = out_var;
+ sites_out_file_->write(site_var);
+ }
- set_format_fields(out_var, sparse_dosages);
- out_file_ << out_var;
+ set_format_fields(out_var, sparse_dosages);
+ out_file_ << out_var;
+ }
}
++tar_only_it;
}
@@ -607,6 +641,8 @@ void dosage_writer::set_format_fields(savvy::variant& out_var, savvy::compressed
{
if (savvy::typed_value::is_end_of_vector(v))
return savvy::typed_value::end_of_vector_value<std::int8_t>();
+// else if (savvy::typed_value::is_missing(v)) // TODO: This would be necessary without mean-imputation of target only variants
+// return savvy::typed_value::missing_value<std::int8_t>();
return std::int8_t(v < 0.5f ? 0 : 1);
});
out_var.set_format("GT", sparse_gt_);
=====================================
src/dosage_writer.hpp
=====================================
@@ -55,6 +55,7 @@ public:
void print_mean_er2(std::ostream& os) const;
private:
+ static bool mean_impute(savvy::compressed_vector<float>& sparse_dosages);
static std::vector<std::pair<std::string, std::string>> gen_headers(const std::vector<std::string>& fmt_fields, const std::string& chromosome, bool is_temp);
static std::vector<std::pair<std::string, std::string>> gen_emp_headers(const std::string& chromosome);
static savvy::file::format format_from_filename(const std::string& filename, savvy::file::format default_format);
=====================================
src/hidden_markov_model.cpp
=====================================
@@ -313,7 +313,7 @@ void hidden_markov_model::impute_typed_site(double& prob_sum, std::size_t& prev_
// if (template_haps[i])
// p_alt = std::min(1.f, std::max(0.f, prob)); // TODO: + (1. - p_alt) * AF_other to support larger thresholds
dose = template_haps[i] ? 1.f : 0.f;
- loo_dose = observed < 0 ? savvy::typed_value::missing_value<float>() : dose;
+ loo_dose = savvy::typed_value::is_missing(observed) ? savvy::typed_value::missing_value<float>() : dose;
return;
}
}
@@ -344,7 +344,7 @@ void hidden_markov_model::impute_typed_site(double& prob_sum, std::size_t& prev_
dose = std::min(1.f, std::max(0.f, float(p_alt / prob_sum)));
dose = float(std::int16_t(dose * bin_scalar_ + 0.5f)) / bin_scalar_; // bin
- if (observed < 0)
+ if (savvy::typed_value::is_missing(observed))
{
loo_dose = savvy::typed_value::missing_value<float>();
}
=====================================
src/input_prep.cpp
=====================================
@@ -90,7 +90,7 @@ void init_ploidies(std::vector<std::uint8_t>& ploidies, const std::vector<std::i
std::fill(ploidies.begin(), ploidies.end(), max_ploidy);
for (std::size_t i = 0; i < gt_vec.size(); ++i)
{
- if (gt_vec[i] < 0)
+ if (savvy::typed_value::is_end_of_vector(gt_vec[i]))
--ploidies[i / max_ploidy];
}
}
@@ -103,7 +103,7 @@ std::int64_t check_ploidies(const std::vector<std::uint8_t>& ploidies, const std
std::uint8_t p = max_ploidy;
for (std::size_t j = 0; j < max_ploidy; ++j)
{
- if (gt_vec[i * max_ploidy + j] < 0)
+ if (savvy::typed_value::is_end_of_vector(gt_vec[i * max_ploidy + j]))
--p;
}
@@ -150,7 +150,7 @@ bool load_target_haplotypes(const std::string& file_path, const savvy::genomic_r
for (std::size_t i = 0; i < var.alts().size(); ++i)
{
std::size_t allele_idx = i + 1;
- target_sites.push_back({var.chromosome(), var.position(), var.ref(), var.alts()[i], true, false, nan, nan, nan, {}});
+ target_sites.push_back({var.chromosome(), var.position(), var.id(), var.ref(), var.alts()[i], true, false, nan, nan, nan, {}});
if (var.alts().size() == 1)
tmp_geno.swap(target_sites.back().gt);
else
@@ -205,6 +205,7 @@ bool load_reference_haplotypes(const std::string& file_path,
int res;
while ((res = block.deserialize(input, var)) > 0)
{
+ block.remove_eov();
if (block.variants().empty() || block.variants().front().pos > extended_reg.to())
break;
@@ -223,7 +224,7 @@ bool load_reference_haplotypes(const std::string& file_path,
for (std::size_t i = 0; i < tmp_geno.size(); ++i)
tmp_geno[i] = ref_it->gt[block.unique_map()[i]];
- typed_only_reference_data.compress_variant({ref_it->chrom, ref_it->pos, ref_it->ref, ref_it->alt, ref_it->err, ref_it->recom, ref_it->cm}, tmp_geno);
+ typed_only_reference_data.compress_variant({ref_it->chrom, ref_it->pos, ref_it->id, ref_it->ref, ref_it->alt, ref_it->err, ref_it->recom, ref_it->cm}, tmp_geno);
it->af = std::accumulate(tmp_geno.begin(), tmp_geno.end(), 0.f) / tmp_geno.size();
it->af = float((--typed_only_reference_data.end())->ac) / tmp_geno.size();
@@ -308,7 +309,7 @@ bool load_reference_haplotypes(const std::string& file_path,
for (std::size_t i = 0; i < tmp_geno.size(); ++i)
tmp_geno[i] = ref_it->gt[block.unique_map()[i]];
- typed_only_reference_data.compress_variant({ref_it->chrom, ref_it->pos, ref_it->ref, ref_it->alt, ref_it->err, ref_it->recom, ref_it->cm}, tmp_geno);
+ typed_only_reference_data.compress_variant({ref_it->chrom, ref_it->pos, ref_it->id, ref_it->ref, ref_it->alt, ref_it->err, ref_it->recom, ref_it->cm}, tmp_geno);
it->af = std::accumulate(tmp_geno.begin(), tmp_geno.end(), 0.f) / tmp_geno.size();
it->af = float((--typed_only_reference_data.end())->ac) / tmp_geno.size();
@@ -378,6 +379,7 @@ bool load_reference_haplotypes_old_recom_approach(const std::string& file_path,
int res;
while ((res = block.deserialize(input, var)) > 0)
{
+ block.remove_eov();
if (block.variants().empty() || block.variants().front().pos > extended_reg.to())
break;
@@ -403,7 +405,7 @@ bool load_reference_haplotypes_old_recom_approach(const std::string& file_path,
for (std::size_t i = 0; i < tmp_geno.size(); ++i)
tmp_geno[i] = ref_it->gt[block.unique_map()[i]];
- typed_only_reference_data.compress_variant({ref_it->chrom, ref_it->pos, ref_it->ref, ref_it->alt, ref_it->err, ref_it->recom, ref_it->cm}, tmp_geno);
+ typed_only_reference_data.compress_variant({ref_it->chrom, ref_it->pos, ref_it->id, ref_it->ref, ref_it->alt, ref_it->err, ref_it->recom, ref_it->cm}, tmp_geno);
it->af = std::accumulate(tmp_geno.begin(), tmp_geno.end(), 0.f) / tmp_geno.size();
it->af = float((--typed_only_reference_data.end())->ac) / tmp_geno.size();
@@ -501,7 +503,7 @@ bool load_reference_haplotypes_old_recom_approach(const std::string& file_path,
for (std::size_t i = 0; i < tmp_geno.size(); ++i)
tmp_geno[i] = ref_it->gt[block.unique_map()[i]];
- typed_only_reference_data.compress_variant({ref_it->chrom, ref_it->pos, ref_it->ref, ref_it->alt, ref_it->err, ref_it->recom, ref_it->cm}, tmp_geno);
+ typed_only_reference_data.compress_variant({ref_it->chrom, ref_it->pos, ref_it->id, ref_it->ref, ref_it->alt, ref_it->err, ref_it->recom, ref_it->cm}, tmp_geno);
it->af = std::accumulate(tmp_geno.begin(), tmp_geno.end(), 0.f) / tmp_geno.size();
it->af = float((--typed_only_reference_data.end())->ac) / tmp_geno.size();
@@ -795,7 +797,7 @@ bool compress_reference_panel(const std::string& input_path, const std::string&
float flt_nan = std::numeric_limits<float>::quiet_NaN();
unique_haplotype_block block;
- block.compress_variant(reference_site_info(var.chrom(), var.pos(), var.ref(), var.alts().size() ? var.alts()[0] : "", flt_nan, flt_nan, std::numeric_limits<double>::quiet_NaN()), gts);
+ block.compress_variant(reference_site_info(var.chrom(), var.pos(), var.id(), var.ref(), var.alts().size() ? var.alts()[0] : "", flt_nan, flt_nan, std::numeric_limits<double>::quiet_NaN()), gts);
std::size_t variant_cnt = 1;
std::size_t block_cnt = 0;
@@ -821,7 +823,7 @@ bool compress_reference_panel(const std::string& input_path, const std::string&
return std::cerr << "Error: could not read GT from variant record\n", false;
bool ret = block.compress_variant(
- reference_site_info(var.chrom(), var.pos(), var.ref(), var.alts().size() ? var.alts()[0] : "", flt_nan, flt_nan, std::numeric_limits<double>::quiet_NaN()),
+ reference_site_info(var.chrom(), var.pos(), var.id(), var.ref(), var.alts().size() ? var.alts()[0] : "", flt_nan, flt_nan, std::numeric_limits<double>::quiet_NaN()),
gts);
if (!ret)
@@ -851,9 +853,12 @@ bool compress_reference_panel(const std::string& input_path, const std::string&
// if (map_file)
// block.fill_cm(*map_file);
- if (!block.serialize(output_file))
- return std::cerr << "Error: serializing final block failed\n", false;
- ++block_cnt;
+ if (block.variants().size())
+ {
+ if (!block.serialize(output_file))
+ return std::cerr << "Error: serializing final block failed\n", false;
+ ++block_cnt;
+ }
return !input_file.bad() && output_file.good();
-}
\ No newline at end of file
+}
=====================================
src/prog_args.hpp
=====================================
@@ -112,6 +112,8 @@ public:
{"rsid", no_argument, 0, '\x01', nullptr},
//{"passOnly", no_argument, 0, '\x01', nullptr},
{"meta", no_argument, 0, '\x01', nullptr},
+ {"noPhoneHome", no_argument, 0, '\x01', nullptr},
+ {"referenceEstimates", no_argument, 0, '\x01', nullptr},
{"haps", required_argument, 0, '\x02', nullptr},
{"refHaps", required_argument, 0, '\x02', nullptr},
{"prefix", required_argument, 0, '\x02', nullptr},
@@ -123,7 +125,7 @@ public:
{"ChunkOverlapMb", required_argument, 0, '\x02', nullptr},
{"ChunkLengthMb", required_argument, 0, '\x02', nullptr},
{"cpus", required_argument, 0, '\x02', nullptr},
- {"minRatio", no_argument, 0, '\x02', nullptr}
+ {"minRatio", required_argument, 0, '\x02', nullptr}
})
{
}
@@ -259,6 +261,16 @@ public:
meta_ = true;
break;
}
+ else if (std::string(long_options_[long_index].name) == "noPhoneHome")
+ {
+ std::cerr << "Warning: --noPhoneHome is deprecated and ignored\n";
+ break;
+ }
+ else if (std::string(long_options_[long_index].name) == "referenceEstimates")
+ {
+ std::cerr << "Warning: --referenceEstimates is deprecated and ignored\n";
+ break;
+ }
// else pass through to default
case '\x02':
{
=====================================
src/unique_haplotype.cpp
=====================================
@@ -12,7 +12,7 @@ bool unique_haplotype_block::compress_variant(const reference_site_info& site_in
if (alleles.empty())
return false;
- variants_.emplace_back(site_info.chrom, site_info.pos, site_info.ref, site_info.alt, site_info.err, site_info.recom, site_info.cm, 0, std::vector<std::int8_t>());
+ variants_.emplace_back(site_info.chrom, site_info.pos, site_info.id, site_info.ref, site_info.alt, site_info.err, site_info.recom, site_info.cm, 0, std::vector<std::int8_t>());
if (variants_.size() == 1)
{
@@ -23,6 +23,12 @@ bool unique_haplotype_block::compress_variant(const reference_site_info& site_in
for (std::size_t i = 1; i < alleles.size(); ++i)
{
+ if (savvy::typed_value::is_end_of_vector(alleles[i]))
+ {
+ unique_map_[i] = savvy::typed_value::end_of_vector_value<decltype(unique_map_)::value_type>();
+ continue;
+ }
+
std::size_t j = 0;
for (; j < variants_[0].gt.size(); ++j)
{
@@ -49,6 +55,13 @@ bool unique_haplotype_block::compress_variant(const reference_site_info& site_in
for (std::size_t i = 0; i < alleles.size(); ++i)
{
+ if (savvy::typed_value::is_end_of_vector(alleles[i]))
+ {
+ if (!savvy::typed_value::is_end_of_vector(unique_map_[i]))
+ return std::cerr << "Error: sample ploidy is not consistent\n", false;
+ continue;
+ }
+
std::size_t original_hap_idx = unique_map_[i];
if (variants_.back().gt[original_hap_idx] != alleles[i])
{
@@ -102,7 +115,7 @@ bool unique_haplotype_block::compress_variant(const reference_site_info& site_in
variants_.back().ac += cardinalities_[i];
}
- assert(std::accumulate(cardinalities_.begin(), cardinalities_.end(), std::size_t(0)) == unique_map_.size());
+ assert(std::accumulate(cardinalities_.begin(), cardinalities_.end(), std::size_t(0)) == unique_map_.size() - std::count(unique_map_.begin(), unique_map_.end(), savvy::typed_value::end_of_vector_value<decltype(unique_map_)::value_type>()));
return true;
}
@@ -172,7 +185,7 @@ bool unique_haplotype_block::serialize(savvy::writer& output_file)
{
var = savvy::site_info(variants_.front().chrom,
variants_.front().pos,
- variants_.front().ref, {"<BLOCK>"});
+ variants_.front().ref, {"<BLOCK>"}, variants_.front().id);
std::int32_t end_pos = variants_.front().pos;
for (auto it = variants_.begin(); it != variants_.end(); ++it)
@@ -189,7 +202,7 @@ bool unique_haplotype_block::serialize(savvy::writer& output_file)
for (auto it = variants_.begin(); it != variants_.end() && output_file; ++it)
{
- var = savvy::variant(it->chrom, it->pos, it->ref, {it->alt});
+ var = savvy::variant(it->chrom, it->pos, it->ref, {it->alt}, it->id);
var.set_info("AC", std::int32_t(it->ac));
var.set_info("AN", std::int32_t(unique_map_.size()));
if (!std::isnan(it->err))
@@ -228,7 +241,10 @@ int unique_haplotype_block::deserialize(savvy::reader& input_file, savvy::varian
cardinalities_.resize(n_reps);
for (auto it = unique_map_.begin(); it != unique_map_.end(); ++it)
- ++cardinalities_[*it];
+ {
+ if (*it >= 0)
+ ++cardinalities_[*it];
+ }
variants_.reserve(n_variants);
while (input_file >> var)
@@ -240,6 +256,7 @@ int unique_haplotype_block::deserialize(savvy::reader& input_file, savvy::varian
variants_.back().chrom = var.chrom();
variants_.back().pos = var.position();
+ variants_.back().id = var.id();
variants_.back().ref = var.ref();
variants_.back().alt = var.alts().size() ? var.alts()[0] : "";
var.get_info("ERR", variants_.back().err);
@@ -263,6 +280,17 @@ int unique_haplotype_block::deserialize(savvy::reader& input_file, savvy::varian
}
}
+void unique_haplotype_block::remove_eov()
+{
+ for (auto it = unique_map_.begin(); it != unique_map_.end(); )
+ {
+ if (savvy::typed_value::is_end_of_vector(*it))
+ it = unique_map_.erase(it);
+ else
+ ++it;
+ }
+}
+
bool unique_haplotype_block::deserialize(std::istream& is, int m3vcf_version, std::size_t n_haplotypes)
{
clear();
@@ -379,6 +407,7 @@ bool unique_haplotype_block::deserialize(std::istream& is, int m3vcf_version, st
variants_[i].chrom = cols[0];
variants_[i].pos = std::atoll(cols[1].c_str());
+ variants_[i].id = cols[2];
variants_[i].ref = cols[3];
variants_[i].alt = cols[4];
=====================================
src/unique_haplotype.hpp
=====================================
@@ -39,6 +39,9 @@ public:
bool deserialize(std::istream& is, int m3vcf_version, std::size_t n_haplotypes);
int deserialize(savvy::reader& input_file, savvy::variant& var); // return <0 is error, 0 is EOF, >0 good
bool serialize(savvy::writer& output_file);
+
+ // removes end_of_vector values from unique_map. Can cause serialization errors if not handled properly.
+ void remove_eov();
};
class reduced_haplotypes
=====================================
src/variant.hpp
=====================================
@@ -10,6 +10,7 @@ struct target_variant
{
std::string chrom;
std::uint32_t pos;
+ std::string id;
std::string ref;
std::string alt;
bool in_tar; // site exists in target file
@@ -24,6 +25,7 @@ struct reference_site_info
{
std::string chrom;
std::uint32_t pos = 0;
+ std::string id;
std::string ref;
std::string alt;
float err = std::numeric_limits<float>::quiet_NaN();
@@ -35,6 +37,7 @@ struct reference_site_info
reference_site_info(std::string _chrom,
std::uint32_t _pos,
+ std::string _id,
std::string _ref,
std::string _alt,
float _err,
@@ -43,6 +46,7 @@ struct reference_site_info
:
chrom(std::move(_chrom)),
pos(_pos),
+ id(_id),
ref(std::move(_ref)),
alt(std::move(_alt)),
err(_err),
@@ -58,6 +62,7 @@ struct reference_variant : public reference_site_info
reference_variant(const std::string& _chrom,
std::uint32_t _pos,
+ const std::string& _id,
const std::string& _ref,
const std::string& _alt,
float _err,
@@ -66,7 +71,7 @@ struct reference_variant : public reference_site_info
std::size_t _ac,
std::vector<std::int8_t> _gt)
:
- reference_site_info(_chrom, _pos, _ref, _alt, _err, _recom, _cm),
+ reference_site_info(_chrom, _pos, _id, _ref, _alt, _err, _recom, _cm),
ac(_ac),
gt(std::move(_gt))
{
@@ -81,6 +86,7 @@ struct sparse_ref_variant : public reference_site_info
std::vector<std::uint32_t> alt_allele_offsets;
sparse_ref_variant(const std::string& _chrom,
std::uint32_t _pos,
+ const std::string& _id,
const std::string& _ref,
const std::string& _alt,
float _err,
@@ -89,7 +95,7 @@ struct sparse_ref_variant : public reference_site_info
std::size_t _ac,
const std::size_t* off_it, const std::size_t* off_it_end)
:
- reference_site_info(_chrom, _pos, _ref, _alt, _err, _recom, _cm),
+ reference_site_info(_chrom, _pos, _id, _ref, _alt, _err, _recom, _cm),
ac(_ac),
alt_allele_offsets(off_it, off_it_end)
{
=====================================
test/input/ref_panel.vcf
=====================================
@@ -0,0 +1,39 @@
+##fileformat=VCFv4.2
+##FILTER=<ID=PASS,Description="All filters passed">
+##contig=<ID=chr20,length=64444167>
+##INFO=<ID=AC,Number=A,Type=Integer,Description="Alternate Allele Counts">
+##INFO=<ID=AN,Number=1,Type=Integer,Description="Total Number Allele Counts">
+##phasing=phased
+##FORMAT=<ID=GT,Number=1,Type=String,Description="Genotype">
+#CHROM POS ID REF ALT QUAL FILTER INFO FORMAT HG01879 HG00551 HG00403 HG00096 HG01583
+chr20 10000775 rs13042390 A G 255 PASS AC=4;AN=10 GT 0|0 0|1 1|0 0|1 1|0
+chr20 10001327 rs141420999 A C 255 PASS AC=1;AN=10 GT 0|0 0|0 0|0 0|0 0|1
+chr20 10001999 rs13038545 A T 255 PASS AC=1;AN=10 GT 0|0 0|0 0|0 0|1 0|0
+chr20 10002000 rs13037970 C T 255 PASS AC=1;AN=10 GT 0|0 0|0 0|0 0|1 0|0
+chr20 10002077 rs6057071 G A 255 PASS AC=8;AN=10 GT 1|1 0|1 1|1 1|1 0|1
+chr20 10002092 rs559251430 A G 255 PASS AC=1;AN=10 GT 0|0 0|0 1|0 0|0 0|0
+chr20 10002161 rs11906973 A T 255 PASS AC=1;AN=10 GT 0|0 0|0 0|0 1|0 0|0
+chr20 10002243 rs60292786 T C 255 PASS AC=1;AN=10 GT 0|0 0|0 0|0 1|0 0|0
+chr20 10002468 rs6118851 G A 255 PASS AC=2;AN=10 GT 0|0 0|0 1|1 0|0 0|0
+chr20 10002652 rs6039649 T C 255 PASS AC=7;AN=10 GT 1|1 0|0 1|1 1|0 1|1
+chr20 10002751 rs74623966 A G 255 PASS AC=1;AN=10 GT 0|0 0|0 0|0 1|0 0|0
+chr20 10004393 rs17507263 A G 255 PASS AC=4;AN=10 GT 0|1 0|0 1|1 0|0 0|1
+chr20 10005015 rs6108374 G A 255 PASS AC=5;AN=10 GT 0|1 0|1 1|1 0|0 0|1
+chr20 10005281 rs4816196 T C 255 PASS AC=3;AN=10 GT 0|0 0|0 1|1 0|0 0|1
+chr20 10005437 rs8115116 A G 255 PASS AC=1;AN=10 GT 0|0 0|1 0|0 0|0 0|0
+chr20 10005695 rs6087099 A G 255 PASS AC=8;AN=10 GT 1|1 1|1 1|1 0|1 0|1
+chr20 10005758 rs6057072 C T 255 PASS AC=4;AN=10 GT 0|1 0|0 1|1 0|0 0|1
+chr20 10005834 rs56242456 T C 255 PASS AC=1;AN=10 GT 0|0 0|1 0|0 0|0 0|0
+chr20 10006207 rs78434004 C A 255 PASS AC=2;AN=10 GT 0|0 0|0 0|1 0|0 0|1
+chr20 10006628 rs8123132 C T 255 PASS AC=3;AN=10 GT 1|0 1|0 0|0 0|1 0|0
+chr20 10006761 rs6118852 A G 255 PASS AC=4;AN=10 GT 0|0 0|1 1|1 0|0 0|1
+chr20 10006826 rs4816197 A G 255 PASS AC=4;AN=10 GT 0|0 0|1 1|1 0|0 0|1
+chr20 10007565 rs8124931 C A 255 PASS AC=1;AN=10 GT 0|1 0|0 0|0 0|0 0|0
+chr20 10008603 rs4816199 G A 255 PASS AC=8;AN=10 GT 1|1 1|1 1|1 0|1 0|1
+chr20 10009048 rs7343217 G A 255 PASS AC=3;AN=10 GT 0|0 0|0 1|1 0|0 0|1
+chr20 10009051 rs881767 A G 255 PASS AC=4;AN=10 GT 1|1 1|0 0|0 0|1 0|0
+chr20 10009180 rs2327258 C T 255 PASS AC=8;AN=10 GT 1|1 1|1 1|1 0|1 0|1
+chr20 10009286 rs4078321 G A 255 PASS AC=1;AN=10 GT 0|0 0|1 0|0 0|0 0|0
+chr20 10009481 rs724674 G A 255 PASS AC=8;AN=10 GT 1|1 1|1 1|1 0|1 0|1
+chr20 10009685 rs35232282 CTA C 255 PASS AC=3;AN=10 GT 0|0 0|0 1|1 0|0 0|1
+chr20 10009927 rs12480468 C T 255 PASS AC=1;AN=10 GT 0|0 0|1 0|0 0|0 0|0
=====================================
test/input/ref_panel_with_haploid_sample.vcf
=====================================
@@ -0,0 +1,39 @@
+##fileformat=VCFv4.2
+##FILTER=<ID=PASS,Description="All filters passed">
+##contig=<ID=chr20,length=64444167>
+##INFO=<ID=AC,Number=A,Type=Integer,Description="Alternate Allele Counts">
+##INFO=<ID=AN,Number=1,Type=Integer,Description="Total Number Allele Counts">
+##phasing=phased
+##FORMAT=<ID=GT,Number=1,Type=String,Description="Genotype">
+#CHROM POS ID REF ALT QUAL FILTER INFO FORMAT HG01879 HG00551 HG00403 HG00096 HG01583
+chr20 10000775 rs13042390 A G 255 PASS AC=4;AN=10 GT 0|0 0|1 0 0|1 1|0
+chr20 10001327 rs141420999 A C 255 PASS AC=1;AN=10 GT 0|0 0|0 0 0|0 0|1
+chr20 10001999 rs13038545 A T 255 PASS AC=1;AN=10 GT 0|0 0|0 0 0|1 0|0
+chr20 10002000 rs13037970 C T 255 PASS AC=1;AN=10 GT 0|0 0|0 0 0|1 0|0
+chr20 10002077 rs6057071 G A 255 PASS AC=8;AN=10 GT 1|1 0|1 1 1|1 0|1
+chr20 10002092 rs559251430 A G 255 PASS AC=1;AN=10 GT 0|0 0|0 0 0|0 0|0
+chr20 10002161 rs11906973 A T 255 PASS AC=1;AN=10 GT 0|0 0|0 0 1|0 0|0
+chr20 10002243 rs60292786 T C 255 PASS AC=1;AN=10 GT 0|0 0|0 0 1|0 0|0
+chr20 10002468 rs6118851 G A 255 PASS AC=2;AN=10 GT 0|0 0|0 1 0|0 0|0
+chr20 10002652 rs6039649 T C 255 PASS AC=7;AN=10 GT 1|1 0|0 1 1|0 1|1
+chr20 10002751 rs74623966 A G 255 PASS AC=1;AN=10 GT 0|0 0|0 0 1|0 0|0
+chr20 10004393 rs17507263 A G 255 PASS AC=4;AN=10 GT 0|1 0|0 1 0|0 0|1
+chr20 10005015 rs6108374 G A 255 PASS AC=5;AN=10 GT 0|1 0|1 1 0|0 0|1
+chr20 10005281 rs4816196 T C 255 PASS AC=3;AN=10 GT 0|0 0|0 1 0|0 0|1
+chr20 10005437 rs8115116 A G 255 PASS AC=1;AN=10 GT 0|0 0|1 0 0|0 0|0
+chr20 10005695 rs6087099 A G 255 PASS AC=8;AN=10 GT 1|1 1|1 1 0|1 0|1
+chr20 10005758 rs6057072 C T 255 PASS AC=4;AN=10 GT 0|1 0|0 1 0|0 0|1
+chr20 10005834 rs56242456 T C 255 PASS AC=1;AN=10 GT 0|0 0|1 0 0|0 0|0
+chr20 10006207 rs78434004 C A 255 PASS AC=2;AN=10 GT 0|0 0|0 1 0|0 0|1
+chr20 10006628 rs8123132 C T 255 PASS AC=3;AN=10 GT 1|0 1|0 0 0|1 0|0
+chr20 10006761 rs6118852 A G 255 PASS AC=4;AN=10 GT 0|0 0|1 1 0|0 0|1
+chr20 10006826 rs4816197 A G 255 PASS AC=4;AN=10 GT 0|0 0|1 1 0|0 0|1
+chr20 10007565 rs8124931 C A 255 PASS AC=1;AN=10 GT 0|1 0|0 0 0|0 0|0
+chr20 10008603 rs4816199 G A 255 PASS AC=8;AN=10 GT 1|1 1|1 1 0|1 0|1
+chr20 10009048 rs7343217 G A 255 PASS AC=3;AN=10 GT 0|0 0|0 1 0|0 0|1
+chr20 10009051 rs881767 A G 255 PASS AC=4;AN=10 GT 1|1 1|0 0 0|1 0|0
+chr20 10009180 rs2327258 C T 255 PASS AC=8;AN=10 GT 1|1 1|1 1 0|1 0|1
+chr20 10009286 rs4078321 G A 255 PASS AC=1;AN=10 GT 0|0 0|1 0 0|0 0|0
+chr20 10009481 rs724674 G A 255 PASS AC=8;AN=10 GT 1|1 1|1 1 0|1 0|1
+chr20 10009685 rs35232282 CTA C 255 PASS AC=3;AN=10 GT 0|0 0|0 1 0|0 0|1
+chr20 10009927 rs12480468 C T 255 PASS AC=1;AN=10 GT 0|0 0|1 0 0|0 0|0
=====================================
test/input/tar_panel.vcf
=====================================
@@ -0,0 +1,36 @@
+##fileformat=VCFv4.2
+##FILTER=<ID=PASS,Description="All filters passed">
+##contig=<ID=chr20,length=64444167>
+##INFO=<ID=AC,Number=A,Type=Integer,Description="Alternate Allele Counts">
+##INFO=<ID=AN,Number=1,Type=Integer,Description="Total Number Allele Counts">
+##phasing=phased
+##FORMAT=<ID=GT,Number=1,Type=String,Description="Genotype">
+#CHROM POS ID REF ALT QUAL FILTER INFO FORMAT HG01879 HG00551 HG00403 HG00096 HG01583
+chr20 10000775 rs13042390 A G 255 PASS AC=4;AN=10 GT 0|0 0|1 1|0 0|1 1|0
+chr20 10001327 rs141420999 A C 255 PASS AC=1;AN=10 GT 0|0 0|0 0|0 0|0 0|1
+chr20 10002077 rs6057071 G A 255 PASS AC=8;AN=10 GT 1|1 0|1 1|1 1|1 0|1
+chr20 10002161 rs11906973 A T 255 PASS AC=1;AN=10 GT 0|0 0|0 0|0 1|0 0|0
+chr20 10002243 rs60292786 T C 255 PASS AC=1;AN=10 GT 0|0 0|0 0|0 1|0 0|0
+chr20 10002468 rs6118851 G A 255 PASS AC=2;AN=10 GT 0|0 0|0 1|1 0|0 0|0
+chr20 10002652 rs6039649 T C 255 PASS AC=7;AN=10 GT 1|1 0|0 1|1 1|0 1|1
+chr20 10002751 rs74623966 A G 255 PASS AC=1;AN=10 GT 0|0 0|0 0|0 1|0 0|0
+chr20 10004393 rs17507263 A G 255 PASS AC=4;AN=10 GT 0|1 0|0 1|1 0|0 0|1
+chr20 10005015 rs6108374 G A 255 PASS AC=5;AN=10 GT 0|1 0|1 1|1 0|0 0|1
+chr20 10005281 rs4816196 T C 255 PASS AC=3;AN=10 GT 0|0 0|0 1|1 0|0 0|1
+chr20 10005437 rs8115116 A G 255 PASS AC=1;AN=10 GT 0|0 0|1 0|0 0|0 0|0
+chr20 10005695 rs6087099 A G 255 PASS AC=8;AN=10 GT 1|1 1|1 1|1 0|1 0|1
+chr20 10005758 rs6057072 C T 255 PASS AC=4;AN=10 GT 0|1 0|0 1|1 0|0 0|1
+chr20 10005834 rs56242456 T C 255 PASS AC=1;AN=10 GT 0|0 0|1 0|0 0|0 0|0
+chr20 10006207 rs78434004 C A 255 PASS AC=2;AN=10 GT 0|0 0|0 0|1 0|0 0|1
+chr20 10006628 rs8123132 C T 255 PASS AC=3;AN=10 GT 1|0 1|0 0|0 0|1 0|0
+chr20 10006761 rs6118852 A G 255 PASS AC=4;AN=10 GT 0|0 0|1 1|1 0|0 0|1
+chr20 10006826 rs4816197 A G 255 PASS AC=4;AN=10 GT 0|0 0|1 1|1 0|0 0|1
+chr20 10007565 rs8124931 C A 255 PASS AC=1;AN=10 GT 0|1 0|0 0|0 0|0 0|0
+chr20 10008603 rs4816199 G A 255 PASS AC=8;AN=10 GT 1|1 1|1 1|1 0|1 0|1
+chr20 10009048 rs7343217 G A 255 PASS AC=3;AN=10 GT 0|0 0|0 1|1 0|0 0|1
+chr20 10009051 rs881767 A G 255 PASS AC=4;AN=10 GT 1|1 1|0 0|0 0|1 0|0
+chr20 10009180 rs2327258 C T 255 PASS AC=8;AN=10 GT 1|1 1|1 1|1 0|1 0|1
+chr20 10009286 rs4078321 G A 255 PASS AC=1;AN=10 GT 0|0 0|1 0|0 0|0 0|0
+chr20 10009481 rs724674 G A 255 PASS AC=8;AN=10 GT 1|1 1|1 1|1 0|1 0|1
+chr20 10009685 rs35232282 CTA C 255 PASS AC=3;AN=10 GT 0|0 0|0 1|1 0|0 0|1
+chr20 10009927 rs12480468 C T 255 PASS AC=1;AN=10 GT 0|0 0|1 0|0 0|0 0|0
=====================================
test/input/tar_panel_with_haploid_sample.vcf
=====================================
@@ -0,0 +1,36 @@
+##fileformat=VCFv4.2
+##FILTER=<ID=PASS,Description="All filters passed">
+##contig=<ID=chr20,length=64444167>
+##INFO=<ID=AC,Number=A,Type=Integer,Description="Alternate Allele Counts">
+##INFO=<ID=AN,Number=1,Type=Integer,Description="Total Number Allele Counts">
+##phasing=phased
+##FORMAT=<ID=GT,Number=1,Type=String,Description="Genotype">
+#CHROM POS ID REF ALT QUAL FILTER INFO FORMAT HG01879 HG00551 HG00403 HG00096 HG01583
+chr20 10000775 rs13042390 A G 255 PASS AC=4;AN=10 GT 0|0 0|1 0 0|1 1|0
+chr20 10001327 rs141420999 A C 255 PASS AC=1;AN=10 GT 0|0 0|0 0 0|0 0|1
+chr20 10002077 rs6057071 G A 255 PASS AC=8;AN=10 GT 1|1 0|1 1 1|1 0|1
+chr20 10002161 rs11906973 A T 255 PASS AC=1;AN=10 GT 0|0 0|0 0 1|0 0|0
+chr20 10002243 rs60292786 T C 255 PASS AC=1;AN=10 GT 0|0 0|0 0 1|0 0|0
+chr20 10002468 rs6118851 G A 255 PASS AC=2;AN=10 GT 0|0 0|0 1 0|0 0|0
+chr20 10002652 rs6039649 T C 255 PASS AC=7;AN=10 GT 1|1 0|0 1 1|0 1|1
+chr20 10002751 rs74623966 A G 255 PASS AC=1;AN=10 GT 0|0 0|0 0 1|0 0|0
+chr20 10004393 rs17507263 A G 255 PASS AC=4;AN=10 GT 0|1 0|0 1 0|0 0|1
+chr20 10005015 rs6108374 G A 255 PASS AC=5;AN=10 GT 0|1 0|1 1 0|0 0|1
+chr20 10005281 rs4816196 T C 255 PASS AC=3;AN=10 GT 0|0 0|0 1 0|0 0|1
+chr20 10005437 rs8115116 A G 255 PASS AC=1;AN=10 GT 0|0 0|1 0 0|0 0|0
+chr20 10005695 rs6087099 A G 255 PASS AC=8;AN=10 GT 1|1 1|1 1 0|1 0|1
+chr20 10005758 rs6057072 C T 255 PASS AC=4;AN=10 GT 0|1 0|0 1 0|0 0|1
+chr20 10005834 rs56242456 T C 255 PASS AC=1;AN=10 GT 0|0 0|1 0 0|0 0|0
+chr20 10006207 rs78434004 C A 255 PASS AC=2;AN=10 GT 0|0 0|0 1 0|0 0|1
+chr20 10006628 rs8123132 C T 255 PASS AC=3;AN=10 GT 1|0 1|0 0 0|1 0|0
+chr20 10006761 rs6118852 A G 255 PASS AC=4;AN=10 GT 0|0 0|1 1 0|0 0|1
+chr20 10006826 rs4816197 A G 255 PASS AC=4;AN=10 GT 0|0 0|1 1 0|0 0|1
+chr20 10007565 rs8124931 C A 255 PASS AC=1;AN=10 GT 0|1 0|0 0 0|0 0|0
+chr20 10008603 rs4816199 G A 255 PASS AC=8;AN=10 GT 1|1 1|1 1 0|1 0|1
+chr20 10009048 rs7343217 G A 255 PASS AC=3;AN=10 GT 0|0 0|0 1 0|0 0|1
+chr20 10009051 rs881767 A G 255 PASS AC=4;AN=10 GT 1|1 1|0 0 0|1 0|0
+chr20 10009180 rs2327258 C T 255 PASS AC=8;AN=10 GT 1|1 1|1 1 0|1 0|1
+chr20 10009286 rs4078321 G A 255 PASS AC=1;AN=10 GT 0|0 0|1 0 0|0 0|0
+chr20 10009481 rs724674 G A 255 PASS AC=8;AN=10 GT 1|1 1|1 1 0|1 0|1
+chr20 10009685 rs35232282 CTA C 255 PASS AC=3;AN=10 GT 0|0 0|0 1 0|0 0|1
+chr20 10009927 rs12480468 C T 255 PASS AC=1;AN=10 GT 0|0 0|1 0 0|0 0|0
=====================================
test/simple-test.sh
=====================================
@@ -0,0 +1,25 @@
+#!/bin/bash
+set -euo pipefail
+
+m4=$1
+d=$2
+mkdir -p $d
+
+
+ref_vcf=$d/$(basename $3).gz
+ref_msav=$d/$(basename $3 .vcf).msav
+tar_vcf=$d/$(basename $4).gz
+imputed_vcf=$d/imputed.vcf.gz
+
+which bcftools || (>&2 echo "Error: bcftools is required to run tests. On debian run 'apt install bcftools'"; exit 1)
+
+bcftools view $3 -Oz -o $ref_vcf
+bcftools index $ref_vcf
+bcftools view $4 -Oz -o $tar_vcf
+bcftools index $tar_vcf
+$m4 --compress-reference $ref_vcf > $ref_msav
+$m4 $ref_msav $tar_vcf -f GT -O vcf.gz > $imputed_vcf
+
+gzip -cd $imputed_vcf | grep -v "^#" | cut -f9- > $d/imputed_gt_matrix.tsv
+gzip -cd $ref_vcf | grep -v "^#" | cut -f9- > $d/ref_gt_matrix.tsv
+diff $d/ref_gt_matrix.tsv $d/imputed_gt_matrix.tsv
\ No newline at end of file
View it on GitLab: https://salsa.debian.org/med-team/minimac4/-/compare/4bf08043478e303c5d21ccea25e20000b06183f7...9566da074d2ce69ed70dd5b9cae6db342d1cd493
--
View it on GitLab: https://salsa.debian.org/med-team/minimac4/-/compare/4bf08043478e303c5d21ccea25e20000b06183f7...9566da074d2ce69ed70dd5b9cae6db342d1cd493
You're receiving this email because of your account on salsa.debian.org.
-------------- next part --------------
An HTML attachment was scrubbed...
URL: <http://alioth-lists.debian.net/pipermail/debian-med-commit/attachments/20230124/d8804cdd/attachment-0001.htm>
More information about the debian-med-commit
mailing list