[med-svn] [Git][med-team/minimac4][master] 13 commits: libomp-jonathonl-dev is available

Andreas Tille (@tille) gitlab at salsa.debian.org
Tue Jan 24 10:51:58 GMT 2023



Andreas Tille pushed to branch master at Debian Med / minimac4


Commits:
9106c447 by Andreas Tille at 2023-01-18T16:47:42+01:00
libomp-jonathonl-dev is available

- - - - -
aed71b83 by Andreas Tille at 2023-01-18T16:48:01+01:00
routine-update: New upstream version

- - - - -
d69e258f by Andreas Tille at 2023-01-18T16:48:01+01:00
New upstream version 4.1.1
- - - - -
65cd3722 by Andreas Tille at 2023-01-18T16:48:02+01:00
Update upstream source from tag 'upstream/4.1.1'

Update to upstream version '4.1.1'
with Debian dir 0a702b21994bb0cd0ca11ff4c1396d1758104c6d
- - - - -
e9ecd785 by Andreas Tille at 2023-01-18T16:48:02+01:00
routine-update: Standards-Version: 4.6.2

- - - - -
0d99c95c by Andreas Tille at 2023-01-18T16:54:51+01:00
Rename autopkgtest script to run-unit-test

- - - - -
366058e0 by Andreas Tille at 2023-01-24T11:12:59+01:00
First attempt to fix autopkgtest

- - - - -
5889050f by Andreas Tille at 2023-01-24T11:13:31+01:00
New upstream version 4.1.2
- - - - -
7ba66c5c by Andreas Tille at 2023-01-24T11:13:31+01:00
routine-update: New upstream version

- - - - -
ae10eea2 by Andreas Tille at 2023-01-24T11:13:32+01:00
Update upstream source from tag 'upstream/4.1.2'

Update to upstream version '4.1.2'
with Debian dir b2b49836d0f2b898560d4d2adc7a90ae27f9ded8
- - - - -
824fbab8 by Andreas Tille at 2023-01-24T11:43:52+01:00
Refresh patches

- - - - -
32e85434 by Andreas Tille at 2023-01-24T11:47:27+01:00
Switch on build tests provided upstream

- - - - -
9566da07 by Andreas Tille at 2023-01-24T11:51:06+01:00
[skip ci] Build-Depends bcftools needed for build tests

- - - - -


24 changed files:

- + .github/workflows/build_and_test.yml
- CMakeLists.txt
- README.md
- debian/changelog
- debian/control
- debian/patches/lzma.patch
- debian/rules
- debian/tests/control
- debian/tests/run-sample-analysis → debian/tests/run-unit-test
- package-linux.sh
- requirements.txt
- src/dosage_writer.cpp
- src/dosage_writer.hpp
- src/hidden_markov_model.cpp
- src/input_prep.cpp
- src/prog_args.hpp
- src/unique_haplotype.cpp
- src/unique_haplotype.hpp
- src/variant.hpp
- + test/input/ref_panel.vcf
- + test/input/ref_panel_with_haploid_sample.vcf
- + test/input/tar_panel.vcf
- + test/input/tar_panel_with_haploid_sample.vcf
- + test/simple-test.sh


Changes:

=====================================
.github/workflows/build_and_test.yml
=====================================
@@ -0,0 +1,27 @@
+name: build and run tests
+on:
+  push:
+    branches: [ master ]
+  pull_request:
+    branches: [ master ]
+jobs:
+  build:
+    runs-on: ubuntu-20.04
+    steps:
+    - uses: actions/checkout at v2
+    - name: build and test
+      shell: bash
+      run: |
+        set -euo pipefail
+        sudo apt update
+        sudo apt install -y build-essential bcftools cmake git gzip python3-dev python3-pip python3-setuptools
+        pip3 --version
+        pip3 install wheel
+        pip3 install cget
+        cget ignore xz
+        cget install -f ./requirements.txt
+        cmake --version
+        mkdir build && cd build
+        cmake -DBUILD_TESTS=1 -DCMAKE_TOOLCHAIN_FILE=../cget/cget/cget.cmake -DCMAKE_BUILD_TYPE=Debug ..
+        make
+        make CTEST_OUTPUT_ON_FAILURE=1 test


=====================================
CMakeLists.txt
=====================================
@@ -1,5 +1,5 @@
 cmake_minimum_required(VERSION 3.2)
-project(minimac4 VERSION 4.1.0)
+project(minimac4 VERSION 4.1.2)
 
 if(NOT CMAKE_BUILD_TYPE)
     set(CMAKE_BUILD_TYPE "Release" CACHE STRING "Choose build type (Debug|Release|RelWithDebInfo|MinSizeRel)" FORCE)
@@ -32,6 +32,12 @@ add_executable(minimac4
 
 target_link_libraries(minimac4 savvy ${OMPP_LIBRARY} Threads::Threads)
 
+if(BUILD_TESTS)
+    enable_testing()
+    add_test(simple_test ${CMAKE_SOURCE_DIR}/test/simple-test.sh ${CMAKE_BINARY_DIR}/minimac4 m4_simple_test_output ${CMAKE_SOURCE_DIR}/test/input/ref_panel.vcf ${CMAKE_SOURCE_DIR}/test/input/tar_panel.vcf)
+    add_test(haploid_and_diploid_test ${CMAKE_SOURCE_DIR}/test/simple-test.sh ${CMAKE_BINARY_DIR}/minimac4 m4_haploid_and_diploid_test_output ${CMAKE_SOURCE_DIR}/test/input/ref_panel_with_haploid_sample.vcf ${CMAKE_SOURCE_DIR}/test/input/tar_panel_with_haploid_sample.vcf)
+endif()
+
 install(TARGETS minimac4 RUNTIME DESTINATION bin)
 
 set(CPACK_PACKAGE_VERSION_MAJOR ${PROJECT_VERSION_MAJOR})


=====================================
README.md
=====================================
@@ -25,6 +25,15 @@ make                                                    # Build.
 make install                                            # Install
 ```
 
+To build and run tests from build directory:
+```bash
+# bcftools is required to run tests
+cmake -DCMAKE_TOOLCHAIN_FILE=../cget/cget/cget.cmake -DBUILD_TESTS=ON ..
+make
+make CTEST_OUTPUT_ON_FAILURE=1 test
+```
+
+
 ## Usage
 See `minimac4 --help` for detailed usage.
 


=====================================
debian/changelog
=====================================
@@ -1,9 +1,12 @@
-minimac4 (4.1.0-1) UNRELEASED; urgency=medium
+minimac4 (4.1.2-1) UNRELEASED; urgency=medium
 
   * Team upload.
   * New upstream version
   * Build-Depends: libsavvy-dev, libomp-jonathonl-dev, libzstd-dev
-  TODO: https://github.com/jonathonl/omp
+  * Standards-Version: 4.6.2 (routine-update)
+  * Rename autopkgtest script to run-unit-test
+  * Switch on build tests provided upstream
+  * Build-Depends bcftools needed for build tests
 
  -- Andreas Tille <tille at debian.org>  Thu, 22 Sep 2022 09:28:59 +0200
 


=====================================
debian/control
=====================================
@@ -10,10 +10,11 @@ Build-Depends: debhelper-compat (= 13),
                libstatgen-dev,
                libshrinkwrap-dev,
                libsavvy-dev,
-               libomp-jonathonl-dev (>= 0.0+git20211216.5228495),
+               libomp-jonathonl-dev,
                liblzma-dev,
-               libzstd-dev
-Standards-Version: 4.6.1
+               libzstd-dev,
+               bcftools <!nocheck>
+Standards-Version: 4.6.2
 Vcs-Browser: https://salsa.debian.org/med-team/minimac4
 Vcs-Git: https://salsa.debian.org/med-team/minimac4.git
 Homepage: https://genome.sph.umich.edu/wiki/Minimac4


=====================================
debian/patches/lzma.patch
=====================================
@@ -11,5 +11,5 @@ Description: Make sure lzma can be linked
 -target_link_libraries(minimac4 savvy ${OMPP_LIBRARY} Threads::Threads)
 +target_link_libraries(minimac4 savvy ${OMPP_LIBRARY} lzma Threads::Threads)
  
- install(TARGETS minimac4 RUNTIME DESTINATION bin)
- 
+ if(BUILD_TESTS)
+     enable_testing()


=====================================
debian/rules
=====================================
@@ -6,3 +6,6 @@ export DEB_CPPFLAGS_MAINT_APPEND = -I/usr/include/libStatGen
 
 %:
 	dh $@
+
+override_dh_auto_configure:
+	dh_auto_configure -- -DBUILD_TESTS=ON


=====================================
debian/tests/control
=====================================
@@ -1,2 +1,2 @@
-Tests: run-sample-analysis
+Tests: run-unit-test
 Depends: @


=====================================
debian/tests/run-sample-analysis → debian/tests/run-unit-test
=====================================
@@ -16,11 +16,17 @@ cp -a /usr/share/doc/${pkg}/examples/* $AUTOPKGTEST_TMP
 gzip refPanel.m3vcf
 gzip targetStudy.vcf
 
+set -x
+# asked for help at: https://github.com/statgen/Minimac4/issues/54
+
+minimac4 --update-m3vcf refPanel.m3vcf.gz > refPanel.msav
+
 minimac4 \
-         --refHaps refPanel.m3vcf.gz \
+         --refHaps refPanel.msav \
          --haps targetStudy.vcf.gz \
-         --prefix testRun
+         --output testRun
 
+exit
 zcat testRun.dose.vcf.gz > testRun.dose.vcf
 
 # Remove not reproducible headers


=====================================
package-linux.sh
=====================================
@@ -12,6 +12,7 @@ cd ${build_dir}
 
 export CFLAGS="-fPIC"
 export CXXFLAGS="-fPIC"
+cget ignore xz
 cget install zlib,http://zlib.net/fossils/zlib-1.2.11.tar.gz
 rm cget/lib/libz.so
 cget install -f ${src_dir}/requirements.txt


=====================================
requirements.txt
=====================================
@@ -1,2 +1,2 @@
 statgen/savvy at v2.1.0
-jonathonl/omp at 522849581eb900a8bce9621a557ef1cadf600441
+jonathonl/omp at v1.0.0


=====================================
src/dosage_writer.cpp
=====================================
@@ -345,6 +345,32 @@ bool dosage_writer::has_good_r2(savvy::site_info& site)
   return true;
 }
 
+bool dosage_writer::mean_impute(savvy::compressed_vector<float>& sparse_dosages)
+{
+  std::int64_t n = sparse_dosages.size();
+  float s = 0.f;
+  for (auto it = sparse_dosages.begin(); it != sparse_dosages.end(); ++it)
+  {
+    if (savvy::typed_value::is_special_value(*it))
+      --n;
+    else
+      s += *it;
+  }
+
+  if (n > 0)
+  {
+    float mean = s / n;
+    for (auto it = sparse_dosages.begin(); it != sparse_dosages.end(); ++it)
+    {
+      if (savvy::typed_value::is_missing(*it))
+        *it = mean;
+    }
+    return true;
+  }
+
+  return false;
+}
+
 bool dosage_writer::write_dosages(const full_dosages_results& hmm_results, const std::vector<target_variant>& tar_variants, const std::vector<target_variant>& tar_only_variants, std::pair<std::size_t, std::size_t> observed_range, const reduced_haplotypes& full_reference_data, const savvy::region& impute_region)
 {
   assert(hmm_results.dimensions()[0] == full_reference_data.variant_size());
@@ -378,21 +404,25 @@ bool dosage_writer::write_dosages(const full_dosages_results& hmm_results, const
 
     while(tar_only_it != tar_only_variants.end() && tar_only_it->pos <= ref_it->pos)
     {
-      out_var = savvy::site_info(tar_only_it->chrom, tar_only_it->pos, tar_only_it->ref, {tar_only_it->alt}, ""/*ref_var.id()*/); // TODO: ID
+      out_var = savvy::site_info(tar_only_it->chrom, tar_only_it->pos, tar_only_it->ref, {tar_only_it->alt}, tar_only_it->id);
       std::vector<std::int8_t> observed(tar_only_it->gt.begin() + observed_range.first, tar_only_it->gt.begin() + observed_range.second);
-      sparse_dosages.assign(observed.begin(), observed.end());
-      set_info_fields(out_var, sparse_dosages, {}, observed);
-      if (has_good_r2(out_var))
+      sparse_dosages.assign(observed.begin(), observed.end(), savvy::typed_value::reserved_transformation_functor<float>());
+
+      if (mean_impute(sparse_dosages))
       {
-        if (sites_out_file_)
+        set_info_fields(out_var, sparse_dosages, {}, observed);
+        if (has_good_r2(out_var))
         {
-          savvy::variant site_var;
-          dynamic_cast<savvy::site_info&>(site_var) = out_var;
-          sites_out_file_->write(site_var);
-        }
+          if (sites_out_file_)
+          {
+            savvy::variant site_var;
+            dynamic_cast<savvy::site_info&>(site_var) = out_var;
+            sites_out_file_->write(site_var);
+          }
 
-        set_format_fields(out_var, sparse_dosages);
-        out_file_ << out_var;
+          set_format_fields(out_var, sparse_dosages);
+          out_file_ << out_var;
+        }
       }
       ++tar_only_it;
     }
@@ -407,7 +437,7 @@ bool dosage_writer::write_dosages(const full_dosages_results& hmm_results, const
     //          loo_dosages[j] = float(std::int16_t(hmm_results.loo_dosage(tar_idx, j) * bin_scalar_ + 0.5f)) / bin_scalar_;
     //      }
 
-    out_var = savvy::site_info(ref_it->chrom, ref_it->pos, ref_it->ref, {ref_it->alt}, ""/*ref_var.id()*/); // TODO: ID
+    out_var = savvy::site_info(ref_it->chrom, ref_it->pos, ref_it->ref, {ref_it->alt}, ref_it->id);
     assert(!std::isnan(hmm_results.dosages_[i][0]));
     sparse_dosages.assign(hmm_results.dosages_[i].begin(), hmm_results.dosages_[i].end());
     if (ref_matches_tar)
@@ -418,7 +448,7 @@ bool dosage_writer::write_dosages(const full_dosages_results& hmm_results, const
 
       if (emp_out_file_ && has_good_r2(out_var))
       {
-        out_var_emp = savvy::site_info(ref_it->chrom, ref_it->pos, ref_it->ref, {ref_it->alt}, ""/*ref_var.id()*/);
+        out_var_emp = savvy::site_info(ref_it->chrom, ref_it->pos, ref_it->ref, {ref_it->alt}, ref_it->id);
         out_var_emp.set_info("TYPED", std::vector<std::int8_t>());
         out_var_emp.set_info("IMPUTED", std::vector<std::int8_t>());
         out_var_emp.set_format("GT", observed);
@@ -452,21 +482,25 @@ bool dosage_writer::write_dosages(const full_dosages_results& hmm_results, const
 
   while(tar_only_it != tar_only_variants.end() && tar_only_it->pos <= impute_region.to())
   {
-    out_var = savvy::site_info(tar_only_it->chrom, tar_only_it->pos, tar_only_it->ref, {tar_only_it->alt}, ""/*ref_var.id()*/); // TODO: ID
+    out_var = savvy::site_info(tar_only_it->chrom, tar_only_it->pos, tar_only_it->ref, {tar_only_it->alt}, tar_only_it->id);
     std::vector<std::int8_t> observed(tar_only_it->gt.begin() + observed_range.first, tar_only_it->gt.begin() + observed_range.second);
-    sparse_dosages.assign(observed.begin(), observed.end());
-    set_info_fields(out_var, sparse_dosages, {}, observed);
-    if (has_good_r2(out_var))
+    sparse_dosages.assign(observed.begin(), observed.end(), savvy::typed_value::reserved_transformation_functor<float>());
+
+    if (mean_impute(sparse_dosages))
     {
-      if (sites_out_file_)
+      set_info_fields(out_var, sparse_dosages, {}, observed);
+      if (has_good_r2(out_var))
       {
-        savvy::variant site_var;
-        dynamic_cast<savvy::site_info&>(site_var) = out_var;
-        sites_out_file_->write(site_var);
-      }
+        if (sites_out_file_)
+        {
+          savvy::variant site_var;
+          dynamic_cast<savvy::site_info&>(site_var) = out_var;
+          sites_out_file_->write(site_var);
+        }
 
-      set_format_fields(out_var, sparse_dosages);
-      out_file_ << out_var;
+        set_format_fields(out_var, sparse_dosages);
+        out_file_ << out_var;
+      }
     }
     ++tar_only_it;
   }
@@ -607,6 +641,8 @@ void dosage_writer::set_format_fields(savvy::variant& out_var, savvy::compressed
       {
         if (savvy::typed_value::is_end_of_vector(v))
           return savvy::typed_value::end_of_vector_value<std::int8_t>();
+//        else if (savvy::typed_value::is_missing(v)) // TODO: This would be necessary without mean-imputation of target only variants
+//          return savvy::typed_value::missing_value<std::int8_t>();
         return std::int8_t(v < 0.5f ? 0 : 1);
       });
     out_var.set_format("GT", sparse_gt_);


=====================================
src/dosage_writer.hpp
=====================================
@@ -55,6 +55,7 @@ public:
 
   void print_mean_er2(std::ostream& os) const;
 private:
+  static bool mean_impute(savvy::compressed_vector<float>& sparse_dosages);
   static std::vector<std::pair<std::string, std::string>> gen_headers(const std::vector<std::string>& fmt_fields, const std::string& chromosome, bool is_temp);
   static std::vector<std::pair<std::string, std::string>> gen_emp_headers(const std::string& chromosome);
   static savvy::file::format format_from_filename(const std::string& filename, savvy::file::format default_format);


=====================================
src/hidden_markov_model.cpp
=====================================
@@ -313,7 +313,7 @@ void hidden_markov_model::impute_typed_site(double& prob_sum, std::size_t& prev_
 //      if (template_haps[i])
 //        p_alt = std::min(1.f, std::max(0.f, prob)); // TODO: + (1. - p_alt) * AF_other to support larger thresholds
       dose = template_haps[i] ? 1.f : 0.f;
-      loo_dose = observed < 0 ? savvy::typed_value::missing_value<float>() : dose;
+      loo_dose = savvy::typed_value::is_missing(observed) ? savvy::typed_value::missing_value<float>() : dose;
       return;
     }
   }
@@ -344,7 +344,7 @@ void hidden_markov_model::impute_typed_site(double& prob_sum, std::size_t& prev_
   dose = std::min(1.f, std::max(0.f, float(p_alt / prob_sum)));
   dose = float(std::int16_t(dose * bin_scalar_ + 0.5f)) / bin_scalar_; // bin
 
-  if (observed < 0)
+  if (savvy::typed_value::is_missing(observed))
   {
     loo_dose = savvy::typed_value::missing_value<float>();
   }


=====================================
src/input_prep.cpp
=====================================
@@ -90,7 +90,7 @@ void init_ploidies(std::vector<std::uint8_t>& ploidies, const std::vector<std::i
   std::fill(ploidies.begin(), ploidies.end(), max_ploidy);
   for (std::size_t i = 0; i < gt_vec.size(); ++i)
   {
-    if (gt_vec[i] < 0)
+    if (savvy::typed_value::is_end_of_vector(gt_vec[i]))
       --ploidies[i / max_ploidy];
   }
 }
@@ -103,7 +103,7 @@ std::int64_t check_ploidies(const std::vector<std::uint8_t>& ploidies, const std
     std::uint8_t p = max_ploidy;
     for (std::size_t j = 0; j < max_ploidy; ++j)
     {
-      if (gt_vec[i * max_ploidy + j] < 0)
+      if (savvy::typed_value::is_end_of_vector(gt_vec[i * max_ploidy + j]))
         --p;
     }
 
@@ -150,7 +150,7 @@ bool load_target_haplotypes(const std::string& file_path, const savvy::genomic_r
     for (std::size_t i = 0; i < var.alts().size(); ++i)
     {
       std::size_t allele_idx = i + 1;
-      target_sites.push_back({var.chromosome(), var.position(), var.ref(), var.alts()[i], true, false, nan, nan, nan, {}});
+      target_sites.push_back({var.chromosome(), var.position(), var.id(), var.ref(), var.alts()[i], true, false, nan, nan, nan, {}});
       if (var.alts().size() == 1)
         tmp_geno.swap(target_sites.back().gt);
       else
@@ -205,6 +205,7 @@ bool load_reference_haplotypes(const std::string& file_path,
     int res;
     while ((res = block.deserialize(input, var)) > 0)
     {
+      block.remove_eov();
       if (block.variants().empty() || block.variants().front().pos > extended_reg.to())
         break;
 
@@ -223,7 +224,7 @@ bool load_reference_haplotypes(const std::string& file_path,
             for (std::size_t i = 0; i < tmp_geno.size(); ++i)
               tmp_geno[i] = ref_it->gt[block.unique_map()[i]];
 
-            typed_only_reference_data.compress_variant({ref_it->chrom, ref_it->pos, ref_it->ref, ref_it->alt, ref_it->err, ref_it->recom, ref_it->cm}, tmp_geno);
+            typed_only_reference_data.compress_variant({ref_it->chrom, ref_it->pos, ref_it->id, ref_it->ref, ref_it->alt, ref_it->err, ref_it->recom, ref_it->cm}, tmp_geno);
 
             it->af = std::accumulate(tmp_geno.begin(), tmp_geno.end(), 0.f) / tmp_geno.size();
             it->af = float((--typed_only_reference_data.end())->ac) / tmp_geno.size();
@@ -308,7 +309,7 @@ bool load_reference_haplotypes(const std::string& file_path,
             for (std::size_t i = 0; i < tmp_geno.size(); ++i)
               tmp_geno[i] = ref_it->gt[block.unique_map()[i]];
 
-            typed_only_reference_data.compress_variant({ref_it->chrom, ref_it->pos, ref_it->ref, ref_it->alt, ref_it->err, ref_it->recom, ref_it->cm}, tmp_geno);
+            typed_only_reference_data.compress_variant({ref_it->chrom, ref_it->pos, ref_it->id, ref_it->ref, ref_it->alt, ref_it->err, ref_it->recom, ref_it->cm}, tmp_geno);
 
             it->af = std::accumulate(tmp_geno.begin(), tmp_geno.end(), 0.f) / tmp_geno.size();
             it->af = float((--typed_only_reference_data.end())->ac) / tmp_geno.size();
@@ -378,6 +379,7 @@ bool load_reference_haplotypes_old_recom_approach(const std::string& file_path,
     int res;
     while ((res = block.deserialize(input, var)) > 0)
     {
+      block.remove_eov();
       if (block.variants().empty() || block.variants().front().pos > extended_reg.to())
         break;
 
@@ -403,7 +405,7 @@ bool load_reference_haplotypes_old_recom_approach(const std::string& file_path,
             for (std::size_t i = 0; i < tmp_geno.size(); ++i)
               tmp_geno[i] = ref_it->gt[block.unique_map()[i]];
 
-            typed_only_reference_data.compress_variant({ref_it->chrom, ref_it->pos, ref_it->ref, ref_it->alt, ref_it->err, ref_it->recom, ref_it->cm}, tmp_geno);
+            typed_only_reference_data.compress_variant({ref_it->chrom, ref_it->pos, ref_it->id, ref_it->ref, ref_it->alt, ref_it->err, ref_it->recom, ref_it->cm}, tmp_geno);
 
             it->af = std::accumulate(tmp_geno.begin(), tmp_geno.end(), 0.f) / tmp_geno.size();
             it->af = float((--typed_only_reference_data.end())->ac) / tmp_geno.size();
@@ -501,7 +503,7 @@ bool load_reference_haplotypes_old_recom_approach(const std::string& file_path,
             for (std::size_t i = 0; i < tmp_geno.size(); ++i)
               tmp_geno[i] = ref_it->gt[block.unique_map()[i]];
 
-            typed_only_reference_data.compress_variant({ref_it->chrom, ref_it->pos, ref_it->ref, ref_it->alt, ref_it->err, ref_it->recom, ref_it->cm}, tmp_geno);
+            typed_only_reference_data.compress_variant({ref_it->chrom, ref_it->pos, ref_it->id, ref_it->ref, ref_it->alt, ref_it->err, ref_it->recom, ref_it->cm}, tmp_geno);
 
             it->af = std::accumulate(tmp_geno.begin(), tmp_geno.end(), 0.f) / tmp_geno.size();
             it->af = float((--typed_only_reference_data.end())->ac) / tmp_geno.size();
@@ -795,7 +797,7 @@ bool compress_reference_panel(const std::string& input_path, const std::string&
 
   float flt_nan = std::numeric_limits<float>::quiet_NaN();
   unique_haplotype_block block;
-  block.compress_variant(reference_site_info(var.chrom(), var.pos(), var.ref(), var.alts().size() ? var.alts()[0] : "", flt_nan, flt_nan, std::numeric_limits<double>::quiet_NaN()), gts);
+  block.compress_variant(reference_site_info(var.chrom(), var.pos(), var.id(), var.ref(), var.alts().size() ? var.alts()[0] : "", flt_nan, flt_nan, std::numeric_limits<double>::quiet_NaN()), gts);
   std::size_t variant_cnt = 1;
   std::size_t block_cnt = 0;
 
@@ -821,7 +823,7 @@ bool compress_reference_panel(const std::string& input_path, const std::string&
       return std::cerr << "Error: could not read GT from variant record\n", false;
 
     bool ret = block.compress_variant(
-      reference_site_info(var.chrom(), var.pos(), var.ref(), var.alts().size() ? var.alts()[0] : "", flt_nan, flt_nan, std::numeric_limits<double>::quiet_NaN()),
+      reference_site_info(var.chrom(), var.pos(), var.id(), var.ref(), var.alts().size() ? var.alts()[0] : "", flt_nan, flt_nan, std::numeric_limits<double>::quiet_NaN()),
       gts);
 
     if (!ret)
@@ -851,9 +853,12 @@ bool compress_reference_panel(const std::string& input_path, const std::string&
   // if (map_file)
   //  block.fill_cm(*map_file);
 
-  if (!block.serialize(output_file))
-    return std::cerr << "Error: serializing final block failed\n", false;
-  ++block_cnt;
+  if (block.variants().size())
+  {
+    if (!block.serialize(output_file))
+      return std::cerr << "Error: serializing final block failed\n", false;
+    ++block_cnt;
+  }
 
   return !input_file.bad() && output_file.good();
-}
\ No newline at end of file
+}


=====================================
src/prog_args.hpp
=====================================
@@ -112,6 +112,8 @@ public:
         {"rsid", no_argument, 0, '\x01', nullptr},
         //{"passOnly", no_argument, 0, '\x01', nullptr},
         {"meta", no_argument, 0, '\x01', nullptr},
+        {"noPhoneHome", no_argument, 0, '\x01', nullptr},
+        {"referenceEstimates", no_argument, 0, '\x01', nullptr},
         {"haps", required_argument, 0, '\x02', nullptr},
         {"refHaps", required_argument, 0, '\x02', nullptr},
         {"prefix", required_argument, 0, '\x02', nullptr},
@@ -123,7 +125,7 @@ public:
         {"ChunkOverlapMb", required_argument, 0, '\x02', nullptr},
         {"ChunkLengthMb", required_argument, 0, '\x02', nullptr},
         {"cpus", required_argument, 0, '\x02', nullptr},
-        {"minRatio", no_argument, 0, '\x02', nullptr}
+        {"minRatio", required_argument, 0, '\x02', nullptr}
       })
   {
   }
@@ -259,6 +261,16 @@ public:
           meta_ = true;
           break;
         }
+        else if (std::string(long_options_[long_index].name) == "noPhoneHome")
+        {
+          std::cerr << "Warning: --noPhoneHome is deprecated and ignored\n";
+          break;
+        }
+        else if (std::string(long_options_[long_index].name) == "referenceEstimates")
+        {
+          std::cerr << "Warning: --referenceEstimates is deprecated and ignored\n";
+          break;
+        }
         // else pass through to default
       case '\x02':
         {


=====================================
src/unique_haplotype.cpp
=====================================
@@ -12,7 +12,7 @@ bool unique_haplotype_block::compress_variant(const reference_site_info& site_in
   if (alleles.empty())
     return false;
 
-  variants_.emplace_back(site_info.chrom, site_info.pos, site_info.ref, site_info.alt, site_info.err, site_info.recom, site_info.cm, 0, std::vector<std::int8_t>());
+  variants_.emplace_back(site_info.chrom, site_info.pos, site_info.id, site_info.ref, site_info.alt, site_info.err, site_info.recom, site_info.cm, 0, std::vector<std::int8_t>());
 
   if (variants_.size() == 1)
   {
@@ -23,6 +23,12 @@ bool unique_haplotype_block::compress_variant(const reference_site_info& site_in
 
     for (std::size_t i = 1; i < alleles.size(); ++i)
     {
+      if (savvy::typed_value::is_end_of_vector(alleles[i]))
+      {
+        unique_map_[i] = savvy::typed_value::end_of_vector_value<decltype(unique_map_)::value_type>();
+        continue;
+      }
+
       std::size_t j = 0;
       for (; j < variants_[0].gt.size(); ++j)
       {
@@ -49,6 +55,13 @@ bool unique_haplotype_block::compress_variant(const reference_site_info& site_in
 
     for (std::size_t i = 0; i < alleles.size(); ++i)
     {
+      if (savvy::typed_value::is_end_of_vector(alleles[i]))
+      {
+        if (!savvy::typed_value::is_end_of_vector(unique_map_[i]))
+          return std::cerr << "Error: sample ploidy is not consistent\n", false;
+        continue;
+      }
+
       std::size_t original_hap_idx = unique_map_[i];
       if (variants_.back().gt[original_hap_idx] != alleles[i])
       {
@@ -102,7 +115,7 @@ bool unique_haplotype_block::compress_variant(const reference_site_info& site_in
       variants_.back().ac += cardinalities_[i];
   }
 
-  assert(std::accumulate(cardinalities_.begin(), cardinalities_.end(), std::size_t(0)) == unique_map_.size());
+  assert(std::accumulate(cardinalities_.begin(), cardinalities_.end(), std::size_t(0)) == unique_map_.size() - std::count(unique_map_.begin(), unique_map_.end(), savvy::typed_value::end_of_vector_value<decltype(unique_map_)::value_type>()));
 
   return true;
 }
@@ -172,7 +185,7 @@ bool unique_haplotype_block::serialize(savvy::writer& output_file)
   {
     var = savvy::site_info(variants_.front().chrom,
       variants_.front().pos,
-      variants_.front().ref, {"<BLOCK>"});
+      variants_.front().ref, {"<BLOCK>"}, variants_.front().id);
 
     std::int32_t end_pos = variants_.front().pos;
     for (auto it = variants_.begin(); it != variants_.end(); ++it)
@@ -189,7 +202,7 @@ bool unique_haplotype_block::serialize(savvy::writer& output_file)
 
     for (auto it = variants_.begin(); it != variants_.end() && output_file; ++it)
     {
-      var = savvy::variant(it->chrom, it->pos, it->ref, {it->alt});
+      var = savvy::variant(it->chrom, it->pos, it->ref, {it->alt}, it->id);
       var.set_info("AC", std::int32_t(it->ac));
       var.set_info("AN", std::int32_t(unique_map_.size()));
       if (!std::isnan(it->err))
@@ -228,7 +241,10 @@ int unique_haplotype_block::deserialize(savvy::reader& input_file, savvy::varian
 
   cardinalities_.resize(n_reps);
   for (auto it = unique_map_.begin(); it != unique_map_.end(); ++it)
-    ++cardinalities_[*it];
+  {
+    if (*it >= 0)
+      ++cardinalities_[*it];
+  }
 
   variants_.reserve(n_variants);
   while (input_file >> var)
@@ -240,6 +256,7 @@ int unique_haplotype_block::deserialize(savvy::reader& input_file, savvy::varian
 
     variants_.back().chrom = var.chrom();
     variants_.back().pos = var.position();
+    variants_.back().id = var.id();
     variants_.back().ref = var.ref();
     variants_.back().alt = var.alts().size() ? var.alts()[0] : "";
     var.get_info("ERR", variants_.back().err);
@@ -263,6 +280,17 @@ int unique_haplotype_block::deserialize(savvy::reader& input_file, savvy::varian
   }
 }
 
+void unique_haplotype_block::remove_eov()
+{
+  for (auto it = unique_map_.begin(); it != unique_map_.end(); )
+  {
+    if (savvy::typed_value::is_end_of_vector(*it))
+      it = unique_map_.erase(it);
+    else
+      ++it;
+  }
+}
+
 bool unique_haplotype_block::deserialize(std::istream& is, int m3vcf_version, std::size_t n_haplotypes)
 {
   clear();
@@ -379,6 +407,7 @@ bool unique_haplotype_block::deserialize(std::istream& is, int m3vcf_version, st
 
     variants_[i].chrom = cols[0];
     variants_[i].pos = std::atoll(cols[1].c_str());
+    variants_[i].id = cols[2];
     variants_[i].ref = cols[3];
     variants_[i].alt = cols[4];
 


=====================================
src/unique_haplotype.hpp
=====================================
@@ -39,6 +39,9 @@ public:
   bool deserialize(std::istream& is, int m3vcf_version, std::size_t n_haplotypes);
   int deserialize(savvy::reader& input_file, savvy::variant& var); // return <0 is error, 0 is EOF, >0 good
   bool serialize(savvy::writer& output_file);
+
+  // removes end_of_vector values from unique_map. Can cause serialization errors if not handled properly.
+  void remove_eov();
 };
 
 class reduced_haplotypes


=====================================
src/variant.hpp
=====================================
@@ -10,6 +10,7 @@ struct target_variant
 {
   std::string chrom;
   std::uint32_t pos;
+  std::string id;
   std::string ref;
   std::string alt;
   bool in_tar; // site exists in target file
@@ -24,6 +25,7 @@ struct reference_site_info
 {
   std::string chrom;
   std::uint32_t pos = 0;
+  std::string id;
   std::string ref;
   std::string alt;
   float err = std::numeric_limits<float>::quiet_NaN();
@@ -35,6 +37,7 @@ struct reference_site_info
 
   reference_site_info(std::string _chrom,
     std::uint32_t _pos,
+    std::string _id,
     std::string _ref,
     std::string _alt,
     float _err,
@@ -43,6 +46,7 @@ struct reference_site_info
     :
     chrom(std::move(_chrom)),
     pos(_pos),
+    id(_id),
     ref(std::move(_ref)),
     alt(std::move(_alt)),
     err(_err),
@@ -58,6 +62,7 @@ struct reference_variant : public reference_site_info
 
   reference_variant(const std::string& _chrom,
     std::uint32_t _pos,
+    const std::string& _id,
     const std::string& _ref,
     const std::string& _alt,
     float _err,
@@ -66,7 +71,7 @@ struct reference_variant : public reference_site_info
     std::size_t _ac,
     std::vector<std::int8_t> _gt)
     :
-    reference_site_info(_chrom, _pos, _ref, _alt, _err, _recom, _cm),
+    reference_site_info(_chrom, _pos, _id, _ref, _alt, _err, _recom, _cm),
     ac(_ac),
     gt(std::move(_gt))
   {
@@ -81,6 +86,7 @@ struct sparse_ref_variant : public reference_site_info
   std::vector<std::uint32_t> alt_allele_offsets;
   sparse_ref_variant(const std::string& _chrom,
     std::uint32_t _pos,
+    const std::string& _id,
     const std::string& _ref,
     const std::string& _alt,
     float _err,
@@ -89,7 +95,7 @@ struct sparse_ref_variant : public reference_site_info
     std::size_t _ac,
     const std::size_t* off_it, const std::size_t* off_it_end)
     :
-    reference_site_info(_chrom, _pos, _ref, _alt, _err, _recom, _cm),
+    reference_site_info(_chrom, _pos, _id, _ref, _alt, _err, _recom, _cm),
     ac(_ac),
     alt_allele_offsets(off_it, off_it_end)
   {


=====================================
test/input/ref_panel.vcf
=====================================
@@ -0,0 +1,39 @@
+##fileformat=VCFv4.2
+##FILTER=<ID=PASS,Description="All filters passed">
+##contig=<ID=chr20,length=64444167>
+##INFO=<ID=AC,Number=A,Type=Integer,Description="Alternate Allele Counts">
+##INFO=<ID=AN,Number=1,Type=Integer,Description="Total Number Allele Counts">
+##phasing=phased
+##FORMAT=<ID=GT,Number=1,Type=String,Description="Genotype">
+#CHROM	POS	ID	REF	ALT	QUAL	FILTER	INFO	FORMAT	HG01879	HG00551	HG00403	HG00096	HG01583
+chr20	10000775	rs13042390	A	G	255	PASS	AC=4;AN=10	GT	0|0	0|1	1|0	0|1	1|0
+chr20	10001327	rs141420999	A	C	255	PASS	AC=1;AN=10	GT	0|0	0|0	0|0	0|0	0|1
+chr20	10001999	rs13038545	A	T	255	PASS	AC=1;AN=10	GT	0|0	0|0	0|0	0|1	0|0
+chr20	10002000	rs13037970	C	T	255	PASS	AC=1;AN=10	GT	0|0	0|0	0|0	0|1	0|0
+chr20	10002077	rs6057071	G	A	255	PASS	AC=8;AN=10	GT	1|1	0|1	1|1	1|1	0|1
+chr20	10002092	rs559251430	A	G	255	PASS	AC=1;AN=10	GT	0|0	0|0	1|0	0|0	0|0
+chr20	10002161	rs11906973	A	T	255	PASS	AC=1;AN=10	GT	0|0	0|0	0|0	1|0	0|0
+chr20	10002243	rs60292786	T	C	255	PASS	AC=1;AN=10	GT	0|0	0|0	0|0	1|0	0|0
+chr20	10002468	rs6118851	G	A	255	PASS	AC=2;AN=10	GT	0|0	0|0	1|1	0|0	0|0
+chr20	10002652	rs6039649	T	C	255	PASS	AC=7;AN=10	GT	1|1	0|0	1|1	1|0	1|1
+chr20	10002751	rs74623966	A	G	255	PASS	AC=1;AN=10	GT	0|0	0|0	0|0	1|0	0|0
+chr20	10004393	rs17507263	A	G	255	PASS	AC=4;AN=10	GT	0|1	0|0	1|1	0|0	0|1
+chr20	10005015	rs6108374	G	A	255	PASS	AC=5;AN=10	GT	0|1	0|1	1|1	0|0	0|1
+chr20	10005281	rs4816196	T	C	255	PASS	AC=3;AN=10	GT	0|0	0|0	1|1	0|0	0|1
+chr20	10005437	rs8115116	A	G	255	PASS	AC=1;AN=10	GT	0|0	0|1	0|0	0|0	0|0
+chr20	10005695	rs6087099	A	G	255	PASS	AC=8;AN=10	GT	1|1	1|1	1|1	0|1	0|1
+chr20	10005758	rs6057072	C	T	255	PASS	AC=4;AN=10	GT	0|1	0|0	1|1	0|0	0|1
+chr20	10005834	rs56242456	T	C	255	PASS	AC=1;AN=10	GT	0|0	0|1	0|0	0|0	0|0
+chr20	10006207	rs78434004	C	A	255	PASS	AC=2;AN=10	GT	0|0	0|0	0|1	0|0	0|1
+chr20	10006628	rs8123132	C	T	255	PASS	AC=3;AN=10	GT	1|0	1|0	0|0	0|1	0|0
+chr20	10006761	rs6118852	A	G	255	PASS	AC=4;AN=10	GT	0|0	0|1	1|1	0|0	0|1
+chr20	10006826	rs4816197	A	G	255	PASS	AC=4;AN=10	GT	0|0	0|1	1|1	0|0	0|1
+chr20	10007565	rs8124931	C	A	255	PASS	AC=1;AN=10	GT	0|1	0|0	0|0	0|0	0|0
+chr20	10008603	rs4816199	G	A	255	PASS	AC=8;AN=10	GT	1|1	1|1	1|1	0|1	0|1
+chr20	10009048	rs7343217	G	A	255	PASS	AC=3;AN=10	GT	0|0	0|0	1|1	0|0	0|1
+chr20	10009051	rs881767	A	G	255	PASS	AC=4;AN=10	GT	1|1	1|0	0|0	0|1	0|0
+chr20	10009180	rs2327258	C	T	255	PASS	AC=8;AN=10	GT	1|1	1|1	1|1	0|1	0|1
+chr20	10009286	rs4078321	G	A	255	PASS	AC=1;AN=10	GT	0|0	0|1	0|0	0|0	0|0
+chr20	10009481	rs724674	G	A	255	PASS	AC=8;AN=10	GT	1|1	1|1	1|1	0|1	0|1
+chr20	10009685	rs35232282	CTA	C	255	PASS	AC=3;AN=10	GT	0|0	0|0	1|1	0|0	0|1
+chr20	10009927	rs12480468	C	T	255	PASS	AC=1;AN=10	GT	0|0	0|1	0|0	0|0	0|0


=====================================
test/input/ref_panel_with_haploid_sample.vcf
=====================================
@@ -0,0 +1,39 @@
+##fileformat=VCFv4.2
+##FILTER=<ID=PASS,Description="All filters passed">
+##contig=<ID=chr20,length=64444167>
+##INFO=<ID=AC,Number=A,Type=Integer,Description="Alternate Allele Counts">
+##INFO=<ID=AN,Number=1,Type=Integer,Description="Total Number Allele Counts">
+##phasing=phased
+##FORMAT=<ID=GT,Number=1,Type=String,Description="Genotype">
+#CHROM	POS	ID	REF	ALT	QUAL	FILTER	INFO	FORMAT	HG01879	HG00551	HG00403	HG00096	HG01583
+chr20	10000775	rs13042390	A	G	255	PASS	AC=4;AN=10	GT	0|0	0|1	0	0|1	1|0
+chr20	10001327	rs141420999	A	C	255	PASS	AC=1;AN=10	GT	0|0	0|0	0	0|0	0|1
+chr20	10001999	rs13038545	A	T	255	PASS	AC=1;AN=10	GT	0|0	0|0	0	0|1	0|0
+chr20	10002000	rs13037970	C	T	255	PASS	AC=1;AN=10	GT	0|0	0|0	0	0|1	0|0
+chr20	10002077	rs6057071	G	A	255	PASS	AC=8;AN=10	GT	1|1	0|1	1	1|1	0|1
+chr20	10002092	rs559251430	A	G	255	PASS	AC=1;AN=10	GT	0|0	0|0	0	0|0	0|0
+chr20	10002161	rs11906973	A	T	255	PASS	AC=1;AN=10	GT	0|0	0|0	0	1|0	0|0
+chr20	10002243	rs60292786	T	C	255	PASS	AC=1;AN=10	GT	0|0	0|0	0	1|0	0|0
+chr20	10002468	rs6118851	G	A	255	PASS	AC=2;AN=10	GT	0|0	0|0	1	0|0	0|0
+chr20	10002652	rs6039649	T	C	255	PASS	AC=7;AN=10	GT	1|1	0|0	1	1|0	1|1
+chr20	10002751	rs74623966	A	G	255	PASS	AC=1;AN=10	GT	0|0	0|0	0	1|0	0|0
+chr20	10004393	rs17507263	A	G	255	PASS	AC=4;AN=10	GT	0|1	0|0	1	0|0	0|1
+chr20	10005015	rs6108374	G	A	255	PASS	AC=5;AN=10	GT	0|1	0|1	1	0|0	0|1
+chr20	10005281	rs4816196	T	C	255	PASS	AC=3;AN=10	GT	0|0	0|0	1	0|0	0|1
+chr20	10005437	rs8115116	A	G	255	PASS	AC=1;AN=10	GT	0|0	0|1	0	0|0	0|0
+chr20	10005695	rs6087099	A	G	255	PASS	AC=8;AN=10	GT	1|1	1|1	1	0|1	0|1
+chr20	10005758	rs6057072	C	T	255	PASS	AC=4;AN=10	GT	0|1	0|0	1	0|0	0|1
+chr20	10005834	rs56242456	T	C	255	PASS	AC=1;AN=10	GT	0|0	0|1	0	0|0	0|0
+chr20	10006207	rs78434004	C	A	255	PASS	AC=2;AN=10	GT	0|0	0|0	1	0|0	0|1
+chr20	10006628	rs8123132	C	T	255	PASS	AC=3;AN=10	GT	1|0	1|0	0	0|1	0|0
+chr20	10006761	rs6118852	A	G	255	PASS	AC=4;AN=10	GT	0|0	0|1	1	0|0	0|1
+chr20	10006826	rs4816197	A	G	255	PASS	AC=4;AN=10	GT	0|0	0|1	1	0|0	0|1
+chr20	10007565	rs8124931	C	A	255	PASS	AC=1;AN=10	GT	0|1	0|0	0	0|0	0|0
+chr20	10008603	rs4816199	G	A	255	PASS	AC=8;AN=10	GT	1|1	1|1	1	0|1	0|1
+chr20	10009048	rs7343217	G	A	255	PASS	AC=3;AN=10	GT	0|0	0|0	1	0|0	0|1
+chr20	10009051	rs881767	A	G	255	PASS	AC=4;AN=10	GT	1|1	1|0	0	0|1	0|0
+chr20	10009180	rs2327258	C	T	255	PASS	AC=8;AN=10	GT	1|1	1|1	1	0|1	0|1
+chr20	10009286	rs4078321	G	A	255	PASS	AC=1;AN=10	GT	0|0	0|1	0	0|0	0|0
+chr20	10009481	rs724674	G	A	255	PASS	AC=8;AN=10	GT	1|1	1|1	1	0|1	0|1
+chr20	10009685	rs35232282	CTA	C	255	PASS	AC=3;AN=10	GT	0|0	0|0	1	0|0	0|1
+chr20	10009927	rs12480468	C	T	255	PASS	AC=1;AN=10	GT	0|0	0|1	0	0|0	0|0


=====================================
test/input/tar_panel.vcf
=====================================
@@ -0,0 +1,36 @@
+##fileformat=VCFv4.2
+##FILTER=<ID=PASS,Description="All filters passed">
+##contig=<ID=chr20,length=64444167>
+##INFO=<ID=AC,Number=A,Type=Integer,Description="Alternate Allele Counts">
+##INFO=<ID=AN,Number=1,Type=Integer,Description="Total Number Allele Counts">
+##phasing=phased
+##FORMAT=<ID=GT,Number=1,Type=String,Description="Genotype">
+#CHROM	POS	ID	REF	ALT	QUAL	FILTER	INFO	FORMAT	HG01879	HG00551	HG00403	HG00096	HG01583
+chr20	10000775	rs13042390	A	G	255	PASS	AC=4;AN=10	GT	0|0	0|1	1|0	0|1	1|0
+chr20	10001327	rs141420999	A	C	255	PASS	AC=1;AN=10	GT	0|0	0|0	0|0	0|0	0|1
+chr20	10002077	rs6057071	G	A	255	PASS	AC=8;AN=10	GT	1|1	0|1	1|1	1|1	0|1
+chr20	10002161	rs11906973	A	T	255	PASS	AC=1;AN=10	GT	0|0	0|0	0|0	1|0	0|0
+chr20	10002243	rs60292786	T	C	255	PASS	AC=1;AN=10	GT	0|0	0|0	0|0	1|0	0|0
+chr20	10002468	rs6118851	G	A	255	PASS	AC=2;AN=10	GT	0|0	0|0	1|1	0|0	0|0
+chr20	10002652	rs6039649	T	C	255	PASS	AC=7;AN=10	GT	1|1	0|0	1|1	1|0	1|1
+chr20	10002751	rs74623966	A	G	255	PASS	AC=1;AN=10	GT	0|0	0|0	0|0	1|0	0|0
+chr20	10004393	rs17507263	A	G	255	PASS	AC=4;AN=10	GT	0|1	0|0	1|1	0|0	0|1
+chr20	10005015	rs6108374	G	A	255	PASS	AC=5;AN=10	GT	0|1	0|1	1|1	0|0	0|1
+chr20	10005281	rs4816196	T	C	255	PASS	AC=3;AN=10	GT	0|0	0|0	1|1	0|0	0|1
+chr20	10005437	rs8115116	A	G	255	PASS	AC=1;AN=10	GT	0|0	0|1	0|0	0|0	0|0
+chr20	10005695	rs6087099	A	G	255	PASS	AC=8;AN=10	GT	1|1	1|1	1|1	0|1	0|1
+chr20	10005758	rs6057072	C	T	255	PASS	AC=4;AN=10	GT	0|1	0|0	1|1	0|0	0|1
+chr20	10005834	rs56242456	T	C	255	PASS	AC=1;AN=10	GT	0|0	0|1	0|0	0|0	0|0
+chr20	10006207	rs78434004	C	A	255	PASS	AC=2;AN=10	GT	0|0	0|0	0|1	0|0	0|1
+chr20	10006628	rs8123132	C	T	255	PASS	AC=3;AN=10	GT	1|0	1|0	0|0	0|1	0|0
+chr20	10006761	rs6118852	A	G	255	PASS	AC=4;AN=10	GT	0|0	0|1	1|1	0|0	0|1
+chr20	10006826	rs4816197	A	G	255	PASS	AC=4;AN=10	GT	0|0	0|1	1|1	0|0	0|1
+chr20	10007565	rs8124931	C	A	255	PASS	AC=1;AN=10	GT	0|1	0|0	0|0	0|0	0|0
+chr20	10008603	rs4816199	G	A	255	PASS	AC=8;AN=10	GT	1|1	1|1	1|1	0|1	0|1
+chr20	10009048	rs7343217	G	A	255	PASS	AC=3;AN=10	GT	0|0	0|0	1|1	0|0	0|1
+chr20	10009051	rs881767	A	G	255	PASS	AC=4;AN=10	GT	1|1	1|0	0|0	0|1	0|0
+chr20	10009180	rs2327258	C	T	255	PASS	AC=8;AN=10	GT	1|1	1|1	1|1	0|1	0|1
+chr20	10009286	rs4078321	G	A	255	PASS	AC=1;AN=10	GT	0|0	0|1	0|0	0|0	0|0
+chr20	10009481	rs724674	G	A	255	PASS	AC=8;AN=10	GT	1|1	1|1	1|1	0|1	0|1
+chr20	10009685	rs35232282	CTA	C	255	PASS	AC=3;AN=10	GT	0|0	0|0	1|1	0|0	0|1
+chr20	10009927	rs12480468	C	T	255	PASS	AC=1;AN=10	GT	0|0	0|1	0|0	0|0	0|0


=====================================
test/input/tar_panel_with_haploid_sample.vcf
=====================================
@@ -0,0 +1,36 @@
+##fileformat=VCFv4.2
+##FILTER=<ID=PASS,Description="All filters passed">
+##contig=<ID=chr20,length=64444167>
+##INFO=<ID=AC,Number=A,Type=Integer,Description="Alternate Allele Counts">
+##INFO=<ID=AN,Number=1,Type=Integer,Description="Total Number Allele Counts">
+##phasing=phased
+##FORMAT=<ID=GT,Number=1,Type=String,Description="Genotype">
+#CHROM	POS	ID	REF	ALT	QUAL	FILTER	INFO	FORMAT	HG01879	HG00551	HG00403	HG00096	HG01583
+chr20	10000775	rs13042390	A	G	255	PASS	AC=4;AN=10	GT	0|0	0|1	0	0|1	1|0
+chr20	10001327	rs141420999	A	C	255	PASS	AC=1;AN=10	GT	0|0	0|0	0	0|0	0|1
+chr20	10002077	rs6057071	G	A	255	PASS	AC=8;AN=10	GT	1|1	0|1	1	1|1	0|1
+chr20	10002161	rs11906973	A	T	255	PASS	AC=1;AN=10	GT	0|0	0|0	0	1|0	0|0
+chr20	10002243	rs60292786	T	C	255	PASS	AC=1;AN=10	GT	0|0	0|0	0	1|0	0|0
+chr20	10002468	rs6118851	G	A	255	PASS	AC=2;AN=10	GT	0|0	0|0	1	0|0	0|0
+chr20	10002652	rs6039649	T	C	255	PASS	AC=7;AN=10	GT	1|1	0|0	1	1|0	1|1
+chr20	10002751	rs74623966	A	G	255	PASS	AC=1;AN=10	GT	0|0	0|0	0	1|0	0|0
+chr20	10004393	rs17507263	A	G	255	PASS	AC=4;AN=10	GT	0|1	0|0	1	0|0	0|1
+chr20	10005015	rs6108374	G	A	255	PASS	AC=5;AN=10	GT	0|1	0|1	1	0|0	0|1
+chr20	10005281	rs4816196	T	C	255	PASS	AC=3;AN=10	GT	0|0	0|0	1	0|0	0|1
+chr20	10005437	rs8115116	A	G	255	PASS	AC=1;AN=10	GT	0|0	0|1	0	0|0	0|0
+chr20	10005695	rs6087099	A	G	255	PASS	AC=8;AN=10	GT	1|1	1|1	1	0|1	0|1
+chr20	10005758	rs6057072	C	T	255	PASS	AC=4;AN=10	GT	0|1	0|0	1	0|0	0|1
+chr20	10005834	rs56242456	T	C	255	PASS	AC=1;AN=10	GT	0|0	0|1	0	0|0	0|0
+chr20	10006207	rs78434004	C	A	255	PASS	AC=2;AN=10	GT	0|0	0|0	1	0|0	0|1
+chr20	10006628	rs8123132	C	T	255	PASS	AC=3;AN=10	GT	1|0	1|0	0	0|1	0|0
+chr20	10006761	rs6118852	A	G	255	PASS	AC=4;AN=10	GT	0|0	0|1	1	0|0	0|1
+chr20	10006826	rs4816197	A	G	255	PASS	AC=4;AN=10	GT	0|0	0|1	1	0|0	0|1
+chr20	10007565	rs8124931	C	A	255	PASS	AC=1;AN=10	GT	0|1	0|0	0	0|0	0|0
+chr20	10008603	rs4816199	G	A	255	PASS	AC=8;AN=10	GT	1|1	1|1	1	0|1	0|1
+chr20	10009048	rs7343217	G	A	255	PASS	AC=3;AN=10	GT	0|0	0|0	1	0|0	0|1
+chr20	10009051	rs881767	A	G	255	PASS	AC=4;AN=10	GT	1|1	1|0	0	0|1	0|0
+chr20	10009180	rs2327258	C	T	255	PASS	AC=8;AN=10	GT	1|1	1|1	1	0|1	0|1
+chr20	10009286	rs4078321	G	A	255	PASS	AC=1;AN=10	GT	0|0	0|1	0	0|0	0|0
+chr20	10009481	rs724674	G	A	255	PASS	AC=8;AN=10	GT	1|1	1|1	1	0|1	0|1
+chr20	10009685	rs35232282	CTA	C	255	PASS	AC=3;AN=10	GT	0|0	0|0	1	0|0	0|1
+chr20	10009927	rs12480468	C	T	255	PASS	AC=1;AN=10	GT	0|0	0|1	0	0|0	0|0


=====================================
test/simple-test.sh
=====================================
@@ -0,0 +1,25 @@
+#!/bin/bash
+set -euo pipefail
+
+m4=$1
+d=$2
+mkdir -p $d
+
+
+ref_vcf=$d/$(basename $3).gz
+ref_msav=$d/$(basename $3 .vcf).msav
+tar_vcf=$d/$(basename $4).gz
+imputed_vcf=$d/imputed.vcf.gz
+
+which bcftools || (>&2 echo "Error: bcftools is required to run tests. On debian run 'apt install bcftools'"; exit 1)
+
+bcftools view $3 -Oz -o $ref_vcf
+bcftools index $ref_vcf
+bcftools view $4 -Oz -o $tar_vcf
+bcftools index $tar_vcf
+$m4 --compress-reference $ref_vcf > $ref_msav
+$m4 $ref_msav $tar_vcf -f GT -O vcf.gz > $imputed_vcf
+
+gzip -cd $imputed_vcf | grep -v "^#" | cut -f9- > $d/imputed_gt_matrix.tsv
+gzip -cd $ref_vcf | grep -v "^#" | cut -f9- > $d/ref_gt_matrix.tsv
+diff $d/ref_gt_matrix.tsv $d/imputed_gt_matrix.tsv
\ No newline at end of file



View it on GitLab: https://salsa.debian.org/med-team/minimac4/-/compare/4bf08043478e303c5d21ccea25e20000b06183f7...9566da074d2ce69ed70dd5b9cae6db342d1cd493

-- 
View it on GitLab: https://salsa.debian.org/med-team/minimac4/-/compare/4bf08043478e303c5d21ccea25e20000b06183f7...9566da074d2ce69ed70dd5b9cae6db342d1cd493
You're receiving this email because of your account on salsa.debian.org.


-------------- next part --------------
An HTML attachment was scrubbed...
URL: <http://alioth-lists.debian.net/pipermail/debian-med-commit/attachments/20230124/d8804cdd/attachment-0001.htm>


More information about the debian-med-commit mailing list