[med-svn] [Git][med-team/minimac4][upstream] New upstream version 4.1.4

Étienne Mollier (@emollier) gitlab at salsa.debian.org
Wed Jul 19 23:46:30 BST 2023



Étienne Mollier pushed to branch upstream at Debian Med / minimac4


Commits:
6fe4c281 by Étienne Mollier at 2023-07-20T00:42:06+02:00
New upstream version 4.1.4
- - - - -


16 changed files:

- + CMakeGet.cmake
- CMakeLists.txt
- README.md
- − dep/libstatgen.cmake
- + dependencies.cmake
- − install.sh
- package-linux.sh
- packaging-dockerfile-ubuntu20 → packaging-dockerfile-alpine3.17
- src/dosage_writer.cpp
- src/hidden_markov_model.cpp
- src/hidden_markov_model.hpp
- src/input_prep.cpp
- src/main.cpp
- src/prog_args.hpp
- src/recombination.hpp
- test/simple-test.sh


Changes:

=====================================
CMakeGet.cmake
=====================================
@@ -0,0 +1,473 @@
+
+set(BUILD_DEPS On CACHE BOOL "Build dependencies")
+
+include(ProcessorCount)
+
+if(CMAKE_VERSION VERSION_LESS "3.4")
+function(cget_parse_arguments prefix _optionNames _singleArgNames _multiArgNames)
+    # first set all result variables to empty/FALSE
+    foreach(arg_name ${_singleArgNames} ${_multiArgNames})
+        set(${prefix}_${arg_name})
+    endforeach()
+
+    foreach(option ${_optionNames})
+        set(${prefix}_${option} FALSE)
+    endforeach()
+
+    set(${prefix}_UNPARSED_ARGUMENTS)
+
+    set(insideValues FALSE)
+    set(currentArgName)
+
+    # now iterate over all arguments and fill the result variables
+    foreach(currentArg ${ARGN})
+        list(FIND _optionNames "${currentArg}" optionIndex)  # ... then this marks the end of the arguments belonging to this keyword
+        list(FIND _singleArgNames "${currentArg}" singleArgIndex)  # ... then this marks the end of the arguments belonging to this keyword
+        list(FIND _multiArgNames "${currentArg}" multiArgIndex)  # ... then this marks the end of the arguments belonging to this keyword
+
+        if(${optionIndex} EQUAL -1  AND  ${singleArgIndex} EQUAL -1  AND  ${multiArgIndex} EQUAL -1)
+            if(insideValues)
+                if("${insideValues}" STREQUAL "SINGLE")
+                    set(${prefix}_${currentArgName} ${currentArg})
+                    set(insideValues FALSE)
+                elseif("${insideValues}" STREQUAL "MULTI")
+                    list(APPEND ${prefix}_${currentArgName} ${currentArg})
+                endif()
+            else()
+                list(APPEND ${prefix}_UNPARSED_ARGUMENTS ${currentArg})
+            endif()
+        else()
+            if(NOT ${optionIndex} EQUAL -1)
+                set(${prefix}_${currentArg} TRUE)
+                set(insideValues FALSE)
+            elseif(NOT ${singleArgIndex} EQUAL -1)
+                set(currentArgName ${currentArg})
+                set(${prefix}_${currentArgName})
+                set(insideValues "SINGLE")
+            elseif(NOT ${multiArgIndex} EQUAL -1)
+                set(currentArgName ${currentArg})
+                set(insideValues "MULTI")
+            endif()
+        endif()
+
+    endforeach()
+
+    # propagate the result variables to the caller:
+    foreach(arg_name ${_singleArgNames} ${_multiArgNames} ${_optionNames})
+        set(${prefix}_${arg_name}  ${${prefix}_${arg_name}} PARENT_SCOPE)
+    endforeach()
+    set(${prefix}_UNPARSED_ARGUMENTS ${${prefix}_UNPARSED_ARGUMENTS} PARENT_SCOPE)
+
+endfunction()
+else()
+    macro(cget_parse_arguments prefix _optionNames _singleArgNames _multiArgNames)
+        cmake_parse_arguments(${prefix} "${_optionNames}" "${_singleArgNames}" "${_multiArgNames}" ${ARGN})
+    endmacro()
+endif()
+
+set(_cget_tmp_dir "${CMAKE_CURRENT_LIST_DIR}/tmp")
+if(CMAKE_HOST_UNIX)
+    foreach(dir "$ENV{TMP}" "$ENV{TMPDIR}" "/tmp")
+        if(EXISTS "${dir}" AND NOT "${dir}" STREQUAL "")
+            file(TO_CMAKE_PATH "${dir}" _cget_tmp_dir)
+            break()
+        endif()
+    endforeach()
+endif()
+set(_tmp_dir_count 0)
+
+macro(cget_mktemp_dir OUT)
+    string(TIMESTAMP cget_mktemp_dir_STAMP "%H-%M-%S")
+    string(RANDOM cget_mktemp_dir_RAND)
+    set(cget_mktemp_dir_PREFIX "${_cget_tmp_dir}/cget-${cget_mktemp_dir_STAMP}-${cget_mktemp_dir_RAND}")
+    math(EXPR _tmp_dir_count "${_tmp_dir_count} + 1")
+    set(${OUT} "${cget_mktemp_dir_PREFIX}-${_tmp_dir_count}")
+    file(MAKE_DIRECTORY ${${OUT}})
+endmacro()
+
+macro(cget_set_parse_flag VAR OPT)
+    unset(${VAR}_${OPT})
+    foreach(FLAG ${ARGN})
+        if(${VAR}_private_${FLAG})
+            set(${VAR}_${OPT} ${${VAR}_private_${FLAG}})
+        endif()
+        unset(${VAR}_private_${FLAG})
+    endforeach()
+endmacro()
+
+macro(cget_parse_requirement VAR)
+    unset(${VAR}_PKG)
+    set(${VAR}_private_options --build -b --test -t)
+    set(${VAR}_private_oneValueArgs -H --hash -X --cmake --file -f)
+    set(${VAR}_private_multiValueArgs -D --define)
+
+    set(cget_parse_requirement_args)
+    foreach(ARG ${ARGN})
+        if(ARG MATCHES "^-([^-])(.+)")
+            list(APPEND cget_parse_requirement_args -${CMAKE_MATCH_1})
+            list(APPEND cget_parse_requirement_args ${CMAKE_MATCH_2})
+        else()
+            list(APPEND cget_parse_requirement_args ${ARG})
+        endif()
+    endforeach()
+
+    cget_parse_arguments(${VAR}_private "${${VAR}_private_options}" "${${VAR}_private_oneValueArgs}" "${${VAR}_private_multiValueArgs}" ${cget_parse_requirement_args})
+
+    if(${VAR}_private_UNPARSED_ARGUMENTS)
+        list(GET ${VAR}_private_UNPARSED_ARGUMENTS 0 ${VAR}_PKG)
+        list(LENGTH ${VAR}_private_UNPARSED_ARGUMENTS ${VAR}_private_UNPARSED_ARGUMENTS_SIZE)
+        if(${VAR}_private_UNPARSED_ARGUMENTS_SIZE GREATER 1)
+            list(REMOVE_AT ${VAR}_private_UNPARSED_ARGUMENTS 0)
+            message(WARNING "Unknown keywords given in requirements file: \"${${VAR}_private_UNPARSED_ARGUMENTS}\"")
+        endif()
+    endif()
+
+    cget_set_parse_flag(${VAR} BUILD --build -b)
+    cget_set_parse_flag(${VAR} TEST --test -t)
+    cget_set_parse_flag(${VAR} CMAKE --cmake -X)
+    cget_set_parse_flag(${VAR} HASH --hash -H)
+    cget_set_parse_flag(${VAR} DEFINE --define -D)
+    cget_set_parse_flag(${VAR} FILE --file -f)
+    set(${VAR}_CMAKE_ARGS)
+    foreach(DEFINE ${${VAR}_DEFINE})
+        list(APPEND ${VAR}_CMAKE_ARGS "-D${DEFINE}")
+    endforeach()
+endmacro()
+
+function(cget_exec)
+    execute_process(${ARGN} RESULT_VARIABLE RESULT)
+    if(NOT RESULT EQUAL 0)
+        string(REPLACE ";" " " CMD "${ARGN}")
+        string(REPLACE "COMMAND " "" CMD "${CMD}")
+        message(FATAL_ERROR "Process failed: ${CMD}")
+    endif()
+endfunction()
+
+function(cget_download)
+    file(DOWNLOAD ${ARGN} STATUS RESULT_LIST)
+    list(GET RESULT_LIST 0 RESULT)
+    list(GET RESULT_LIST 1 RESULT_MESSAGE)
+    if(NOT RESULT EQUAL 0)
+        message(FATAL_ERROR "Download failed: ${RESULT_MESSAGE}: ${ARGN}")
+    endif()
+endfunction()
+
+set(_cget_install_dir_count 0)
+set_property(GLOBAL PROPERTY _cget_install_dir_count 0)
+function(cget_install_dir DIR)
+    set(options)
+    set(oneValueArgs PREFIX BUILD_DIR)
+    set(multiValueArgs CMAKE_ARGS)
+
+    cget_parse_arguments(PARSE "${options}" "${oneValueArgs}" "${multiValueArgs}" ${ARGN})
+
+    if(PARSE_UNPARSED_ARGUMENTS)
+        message(FATAL_ERROR "Unknown keywords given to cget_install_dir(): \"${PARSE_UNPARSED_ARGUMENTS}\"")
+    endif()
+
+    if(NOT EXISTS ${DIR}/CMakeLists.txt)
+        message(FATAL_ERROR "cget_install_dir(): ${DIR} is not a cmake package")
+    endif()
+
+    set(PREFIX ${PARSE_PREFIX})
+    set(BUILD_DIR ${PARSE_BUILD_DIR})
+    if(NOT EXISTS ${BUILD_DIR})
+        file(MAKE_DIRECTORY ${BUILD_DIR})
+    endif()
+    cget_exec(COMMAND ${CMAKE_COMMAND}
+        -DCMAKE_PREFIX_PATH=${PREFIX} 
+        -DCMAKE_INSTALL_PREFIX=${PREFIX}
+        ${PARSE_CMAKE_ARGS}
+        ${DIR}
+        WORKING_DIRECTORY ${BUILD_DIR}
+    )
+    set(CONFIG "Release")
+    foreach(ARG ${PARSE_CMAKE_ARGS})
+        string(TOUPPER ${ARG} ARG_UPPER)
+        if(ARG_UPPER MATCHES "-DCMAKE_BUILD_TYPE")
+            string(SUBSTRING ${ARG_UPPER} 19 -1 BUILD_TYPE)
+            if(BUILD_TYPE STREQUAL "DEBUG")
+                set(CONFIG "Debug")
+            endif()
+        endif()
+    endforeach()
+    set(BUILD_ARGS)
+    if(EXISTS ${BUILD_DIR}/Makefile)
+        ProcessorCount(N)
+        set(BUILD_ARGS -- -j ${N})
+    endif()
+    cget_exec(COMMAND ${CMAKE_COMMAND} --build ${BUILD_DIR} --config ${CONFIG} ${BUILD_ARGS})
+    cget_exec(COMMAND ${CMAKE_COMMAND} --build ${BUILD_DIR} --target install --config ${CONFIG} ${BUILD_ARGS})
+
+    get_property(_tmp_count GLOBAL PROPERTY _cget_install_dir_count)
+    math(EXPR _tmp_count "${_tmp_count} + 1")
+    set_property(GLOBAL PROPERTY _cget_install_dir_count ${_tmp_count})
+
+    file(REMOVE_RECURSE ${BUILD_DIR})
+endfunction()
+
+function(cget_parse_src_name URL VARIANT SRC)
+    if(SRC MATCHES "@")
+        string(REPLACE "@" ";" SRC_LIST ${SRC})
+        list(GET SRC_LIST 0 _URL)
+        list(GET SRC_LIST 1 _VARIANT)
+        set(${URL} ${_URL} PARENT_SCOPE)
+        set(${VARIANT} ${_VARIANT} PARENT_SCOPE)
+    else()
+        set(${URL} ${SRC} PARENT_SCOPE)
+        set(${VARIANT} ${ARGN} PARENT_SCOPE)
+    endif()
+endfunction()
+
+function(cget_find_recipe RECIPE_DIR SRC)
+    cget_parse_src_name(NAME VARIANT ${SRC})
+    foreach(RECIPE ${ARGN})
+        if(EXISTS ${RECIPE}/${NAME}/package.txt OR EXISTS ${RECIPE}/${NAME}/requirements.txt)
+            # TODO: Check variant
+            set(${RECIPE_DIR} ${RECIPE}/${NAME} PARENT_SCOPE)
+            break()
+        endif()
+    endforeach()
+endfunction()
+
+function(cget_validate_gh_src NAME)
+    if(NAME MATCHES "[^A-Za-z0-9_./@-]")
+        message(FATAL_ERROR "Not a valid name: ${NAME}")
+    endif()
+endfunction()
+
+function(cget_parse_pkg NAME URL PKG)
+    string(REPLACE "," ";" PKG_NAMES ${PKG})
+    list(GET PKG_NAMES -1 PKG_SRC)
+    list(GET PKG_NAMES 0 PKG_NAME)
+    set(${NAME} ${PKG_NAME} PARENT_SCOPE)
+    if(PKG_SRC MATCHES "://")
+        set(${URL} ${PKG_SRC} PARENT_SCOPE)
+    else()
+        get_filename_component(PKG_SRC_FULL ${PKG_SRC} ABSOLUTE)
+        if(EXISTS ${PKG_SRC_FULL})
+            set(${URL} file://${PKG_SRC_FULL} PARENT_SCOPE)
+        else()
+            # Parse recipe dir
+            cget_find_recipe(RECIPE_DIR ${PKG_SRC} ${ARGN})
+            if(EXISTS ${RECIPE_DIR})
+                set(${URL} recipe://${RECIPE_DIR} PARENT_SCOPE)
+                set(${NAME} ${PKG_SRC} ${PKG_NAME} PARENT_SCOPE)
+            else()
+                cget_validate_gh_src(${PKG_SRC})
+                # Parse github url
+                cget_parse_src_name(GH_NAME GH_BRANCH ${PKG_SRC} HEAD)
+                cget_validate_gh_src(${GH_NAME})
+                cget_validate_gh_src(${GH_BRANCH})
+                set(${NAME} ${GH_NAME} ${PKG_NAME} PARENT_SCOPE)
+                if(GH_NAME MATCHES "/")
+                    set(${URL} "https://github.com/${GH_NAME}/archive/${GH_BRANCH}.tar.gz" PARENT_SCOPE)
+                else()
+                    set(${URL} "https://github.com/${GH_NAME}/${GH_NAME}/archive/${GH_BRANCH}.tar.gz" PARENT_SCOPE)
+                endif()
+            endif()
+        endif()
+    endif()
+endfunction()
+
+function(cget_unpack FILENAME DIR)
+    file(MAKE_DIRECTORY ${DIR})
+    cget_exec(COMMAND ${CMAKE_COMMAND} -E tar xzf ${FILENAME}
+        WORKING_DIRECTORY ${DIR}
+    )
+endfunction()
+
+function(cget_fetch DIR DOWNLOAD_DIR URL)
+    if("${URL}" MATCHES "file://")
+        string(REPLACE "file://" "" LOCAL_PATH ${URL})
+        if(IS_DIRECTORY ${LOCAL_PATH})
+            file(COPY ${LOCAL_PATH} DESTINATION ${DOWNLOAD_DIR}/)
+        else()
+            cget_unpack(${LOCAL_PATH} ${DOWNLOAD_DIR})
+        endif()
+    else()
+        string(REPLACE "/" ";" PATH_LIST ${URL})
+        
+        list(GET PATH_LIST -1 FILENAME)
+        message("Downloading ${URL}")
+        cget_download(${URL} ${DOWNLOAD_DIR}/${FILENAME} ${ARGN})
+        cget_unpack("${DOWNLOAD_DIR}/${FILENAME}" "${DOWNLOAD_DIR}")
+        file(REMOVE ${DOWNLOAD_DIR}/${FILENAME})
+    endif()
+    set(${DIR} ${DOWNLOAD_DIR} PARENT_SCOPE)
+    file(GLOB FILES LIST_DIRECTORIES true RELATIVE ${DOWNLOAD_DIR} ${DOWNLOAD_DIR}/*)
+    list(LENGTH FILES NFILES)
+    if(NFILES EQUAL 0)
+        # No files found, so this is an error
+        message(FATAL_ERROR "Failed to fetch: ${URL}")
+    elseif(NFILES EQUAL 1)
+        list(GET FILES 0 _DIR)
+        if(IS_DIRECTORY ${DOWNLOAD_DIR}/${_DIR})
+            # If there is just one directory, then adjust the path
+            set(${DIR} ${DOWNLOAD_DIR}/${_DIR} PARENT_SCOPE)
+        endif()
+    endif()
+endfunction()
+
+set(_cget_cmake_original_file "__cget_original_cmake_file__.cmake")
+function(cget_find_cmake FILE DIR)
+    if(EXISTS ${DIR}/CMakeLists.txt)
+        file(RENAME ${DIR}/CMakeLists.txt ${DIR}/${_cget_cmake_original_file})
+    endif()
+    get_filename_component(BASENAME ${FILE} NAME)
+    if(EXISTS ${FILE})
+        file(COPY ${FILE} DESTINATION ${DIR}/)
+        file(RENAME ${DIR}/${BASENAME} ${DIR}/CMakeLists.txt)
+    else()
+        string(REPLACE ".cmake" "" REMOTE_CMAKE ${BASENAME})
+        cget_download(https://raw.githubusercontent.com/pfultz2/cget/master/cget/cmake/${REMOTE_CMAKE}.cmake ${DIR}/CMakeLists.txt)
+    endif()
+endfunction()
+
+function(cget_check_pkg_install FOUND)
+    get_property(INSTALLED_PKGS GLOBAL PROPERTY CGET_INSTALLED_PACKAGES)
+    set(FOUND 0 PARENT_SCOPE)
+    foreach(NAME ${ARGN})
+        list(FIND INSTALLED_PKGS ${NAME} IDX)
+        if(NOT IDX EQUAL "-1")
+            set(FOUND 1 PARENT_SCOPE)
+        endif()
+    endforeach()
+endfunction()
+
+function(cget_add_pkg_install)
+    get_property(INSTALLED_PKGS GLOBAL PROPERTY CGET_INSTALLED_PACKAGES)
+    foreach(NAME ${ARGN})
+        list(FIND INSTALLED_PKGS ${NAME} IDX)
+        if(IDX EQUAL "-1")
+            set_property(GLOBAL APPEND PROPERTY CGET_INSTALLED_PACKAGES ${NAME})
+        endif()
+    endforeach()
+endfunction()
+
+function(cget_get_absolute_path VAR PATH FILENAME)
+    get_filename_component(FILE_DIR ${FILENAME} DIRECTORY)
+    if(NOT IS_ABSOLUTE "${PATH}")
+        set(${VAR} "${FILE_DIR}/${PATH}" PARENT_SCOPE)
+    else()
+        set(${VAR} "${PATH}" PARENT_SCOPE)
+    endif()
+endfunction()
+
+function(cmake_get PKG)
+if(BUILD_DEPS)
+    set(options NO_RECIPE)
+    set(oneValueArgs PREFIX HASH CMAKE_FILE)
+    set(multiValueArgs CMAKE_ARGS)
+    
+    cget_parse_arguments(PARSE "${options}" "${oneValueArgs}" "${multiValueArgs}" ${ARGN})
+
+    if(PARSE_UNPARSED_ARGUMENTS)
+        message(FATAL_ERROR "Unknown keywords given to cmake_get(): \"${PARSE_UNPARSED_ARGUMENTS}\"")
+    endif()
+
+    if(PARSE_NO_RECIPE)
+        cget_parse_pkg(NAMES URL ${PKG})
+    else()
+        cget_parse_pkg(NAMES URL ${PKG} ${PARSE_PREFIX}/etc/cget/recipes)
+    endif()
+    cget_check_pkg_install(FOUND ${NAMES})
+    if(NOT FOUND)
+
+        if(URL MATCHES "recipe://")
+            string(REPLACE "recipe://" "" RECIPE ${URL})
+            if(EXISTS ${RECIPE}/requirements.txt)
+                cmake_get_from(${RECIPE}/requirements.txt PREFIX ${PARSE_PREFIX} CMAKE_ARGS ${PARSE_CMAKE_ARGS})
+            endif()
+            if(EXISTS ${RECIPE}/package.txt)
+                cmake_get_from(${RECIPE}/package.txt PREFIX ${PARSE_PREFIX} CMAKE_ARGS ${PARSE_CMAKE_ARGS} NO_RECIPE)
+            endif()
+        else()
+            cget_mktemp_dir(TMP_DIR)
+
+            if(PREFIX_HASH)
+                string(TOUPPER ${PREFIX_HASH} _HASH)
+                string(REPLACE ":" "=" _HASH ${_HASH})
+                set(HASH EXPECTED_HASH ${_HASH})
+            endif()
+
+            cget_fetch(DIR ${TMP_DIR}/download ${URL} ${HASH} SHOW_PROGRESS)
+            if(EXISTS ${DIR}/requirements.txt)
+                cmake_get_from(${DIR}/requirements.txt ${BASE_DIR_ARG} PREFIX ${PARSE_PREFIX} CMAKE_ARGS ${PARSE_CMAKE_ARGS})
+            endif()
+            if(PARSE_CMAKE_FILE)
+                cget_find_cmake(${PARSE_CMAKE_FILE} ${DIR})
+            endif()
+            cget_install_dir(${DIR} BUILD_DIR ${TMP_DIR}/build PREFIX ${PARSE_PREFIX} CMAKE_ARGS -DCGET_CMAKE_ORIGINAL_SOURCE_FILE=${DIR}/${_cget_cmake_original_file} ${PARSE_CMAKE_ARGS})
+
+            file(REMOVE_RECURSE ${TMP_DIR})
+        endif()
+        cget_add_pkg_install(${NAMES})
+    endif()
+endif()
+endfunction()
+
+set(_cmake_get_configure_reqs 0)
+function(cmake_get_from FILENAME)
+if(BUILD_DEPS)
+    set(options NO_RECIPE)
+    set(oneValueArgs PREFIX)
+    set(multiValueArgs CMAKE_ARGS)
+    cget_parse_arguments(PARSE "${options}" "${oneValueArgs}" "${multiValueArgs}" ${ARGN})
+
+    if(NOT EXISTS ${FILENAME})
+        message(FATAL_ERROR "File ${FILENAME} does not exists")
+    endif()
+
+    if(PARSE_UNPARSED_ARGUMENTS)
+        message(FATAL_ERROR "Unknown keywords given to cmake_get_from(): \"${PARSE_UNPARSED_ARGUMENTS}\"")
+    endif()
+
+    file(STRINGS ${FILENAME} LINES)
+    foreach(LINE ${LINES})
+        separate_arguments(WORDS UNIX_COMMAND "${LINE}")
+        set(REQ)
+        foreach(WORD ${WORDS})
+            if(WORD MATCHES "^#")
+                break()
+            endif()
+            list(APPEND REQ ${WORD})
+        endforeach()
+        list(LENGTH REQ REQ_LEN)
+        if(REQ_LEN GREATER 0)
+            cget_parse_requirement(PARSE_REQ ${REQ})
+            if(_cmake_get_configure_reqs)
+                string(CONFIGURE ${PARSE_REQ_PKG} PARSE_REQ_PKG @ONLY)
+            endif()
+            if(PARSE_NO_RECIPE)
+                set(NO_RECIPE "NO_RECIPE")
+            endif()
+            set(REQ_CMAKE)
+            if(PARSE_REQ_CMAKE)
+                cget_get_absolute_path(REQ_CMAKE ${PARSE_REQ_CMAKE} ${FILENAME})
+            endif()
+            if(PARSE_REQ_FILE)
+                cget_get_absolute_path(REQ_FILE ${PARSE_REQ_FILE} ${FILENAME})
+                cmake_get_from(${REQ_FILE}
+                    ${NO_RECIPE}
+                    PREFIX ${PARSE_PREFIX} 
+                    CMAKE_ARGS ${PARSE_CMAKE_ARGS} ${PARSE_REQ_CMAKE_ARGS}
+                )
+            else()
+                cget_get_absolute_path(REQ_PKG ${PARSE_REQ_PKG} ${FILENAME})
+                if(NOT EXISTS ${REQ_PKG})
+                    set(REQ_PKG ${PARSE_REQ_PKG})
+                endif()
+                cmake_get(${REQ_PKG}
+                    ${NO_RECIPE}
+                    PREFIX ${PARSE_PREFIX} 
+                    HASH ${PARSE_REQ_HASH}
+                    CMAKE_FILE ${REQ_CMAKE}
+                    CMAKE_ARGS ${PARSE_CMAKE_ARGS} ${PARSE_REQ_CMAKE_ARGS}
+                )
+            endif()
+        endif()
+    endforeach()
+endif()
+endfunction()
+


=====================================
CMakeLists.txt
=====================================
@@ -1,5 +1,5 @@
 cmake_minimum_required(VERSION 3.2)
-project(minimac4 VERSION 4.1.2)
+project(minimac4 VERSION 4.1.4)
 
 if(NOT CMAKE_BUILD_TYPE)
     set(CMAKE_BUILD_TYPE "Release" CACHE STRING "Choose build type (Debug|Release|RelWithDebInfo|MinSizeRel)" FORCE)
@@ -32,6 +32,9 @@ add_executable(minimac4
 
 target_link_libraries(minimac4 savvy ${OMPP_LIBRARY} Threads::Threads)
 
+add_custom_target(manuals
+                  COMMAND help2man --output "${CMAKE_BINARY_DIR}/minimac4.1" "${CMAKE_BINARY_DIR}/minimac4")
+
 if(BUILD_TESTS)
     enable_testing()
     add_test(simple_test ${CMAKE_SOURCE_DIR}/test/simple-test.sh ${CMAKE_BINARY_DIR}/minimac4 m4_simple_test_output ${CMAKE_SOURCE_DIR}/test/input/ref_panel.vcf ${CMAKE_SOURCE_DIR}/test/input/tar_panel.vcf)
@@ -39,6 +42,9 @@ if(BUILD_TESTS)
 endif()
 
 install(TARGETS minimac4 RUNTIME DESTINATION bin)
+install(FILES ${CMAKE_CURRENT_BINARY_DIR}/minimac4.1
+        DESTINATION share/man/man1
+        OPTIONAL)
 
 set(CPACK_PACKAGE_VERSION_MAJOR ${PROJECT_VERSION_MAJOR})
 set(CPACK_PACKAGE_VERSION_MINOR ${PROJECT_VERSION_MINOR})


=====================================
README.md
=====================================
@@ -4,18 +4,19 @@ Minimac4 is a lower memory and more computationally efficient
 implementation of the genotype imputation algorithms in 
 minimac/mininac2/minimac3.
 
-<<< SEE http://genome.sph.umich.edu/wiki/Minimac4 FOR DOCUMENTATION >>>
+## Installation
+A prebuilt Linux x86 executable is available at https://github.com/statgen/Minimac4/releases/latest by downloading and running minimac4-{version}-Linux-x86_64.sh. Alternativbely, you can build from source.
 
-## Prerequisites
-Automatic installation of Minimac4 requires [cget](http://cget.readthedocs.io/en/latest/src/intro.html#installing-cget) and cmake >= v3.2.
+## Prerequisites for building from source
+Minimac4 requires GCC >= v5 or a compiler with full C++11 support. Automatic installation of Minimac4 requires [cget](http://cget.readthedocs.io/en/latest/src/intro.html#installing-cget) and CMake >= v3.2.
 
-## Installation
-The easiest way to install Minimac4 and its dependencies is to use cget:
+## Building from source
+The simplest way to build the latest Minimac4 and its dependencies is to use cget:
 ```bash
 cget install --prefix <install_prefix> statgen/Minimac4
 ```
 
-Alternatively, you can install manually:
+Alternatively, you can build manually:
 ```bash
 cd Minimac4
 cget install -f ./requirements.txt                      # Install dependencies locally.
@@ -33,6 +34,15 @@ make
 make CTEST_OUTPUT_ON_FAILURE=1 test
 ```
 
+Since some users have reported issues with installing cget with pip, a cmake-only alternative for installing dependencies is available:
+```shell
+cmake -P dependencies.cmake deps/
+mkdir build; cd build
+cmake -DCMAKE_PREFIX_PATH=$(pwd)/../deps/ -DCMAKE_CXX_FLAGS="-I$(pwd)/../deps/include" ..
+make
+make install
+```
+
 
 ## Usage
 See `minimac4 --help` for detailed usage.
@@ -42,7 +52,7 @@ A typical Minimac4 command line for imputation is as follows
 minimac4 reference.msav target.vcf.gz > imputed.sav
 ```
 
-Here reference.msav is a reference panel (e.g. 1000 Genomes) compressed with MVCF encoding, 
+Here reference.msav is a reference panel (e.g. [1000 Genomes download](ftp://share.sph.umich.edu/minimac4/panels/g1k_p3_msav_files_with_estimates.tar.gz)) compressed with MVCF encoding, 
 target.vcf.gz is an indexed VCF containing phased genotype array data, 
 and imputed.sav is the imputed output.
 
@@ -61,3 +71,15 @@ Meta-imputation with MetaMinimac2 requires `--empirical-output` (or `-e`) to be
 ```bash
 minimac4 reference.msav target.bcf -o imputed.dose.sav -e imputed.empirical_dose.sav
 ```
+
+## Reference Panel Creation
+If an M3VCF file is already available, it can be converted to the new MVCF format with:
+```
+minimac4 --update-m3vcf reference.m3vcf.gz > reference.msav
+```
+
+Otherwise, phased VCFs containing the reference haplotypes can be compressed into an MVCF with:
+```
+minimac4 --compress-reference reference.{sav,bcf,vcf.gz} > reference.msav
+``` 
+


=====================================
dep/libstatgen.cmake deleted
=====================================
@@ -1,15 +0,0 @@
-cmake_minimum_required(VERSION 3.2)
-project(libStatGen VERSION 1.0.0)
-
-#execute_process(COMMAND ./configure --prefix=${CMAKE_INSTALL_PREFIX} WORKING_DIRECTORY ${CMAKE_CURRENT_SOURCE_DIR})
-
-add_custom_target(libStatGen ALL COMMAND ${CMAKE_COMMAND} -E env CPATH=${CGET_PREFIX}/include make WORKING_DIRECTORY ${CMAKE_CURRENT_SOURCE_DIR} COMMENT "Builing libStatGen ...")
-
-file(GLOB_RECURSE LSG_HEADER_LIST "bam/*.h" "fastq/*.h" "general/*.h" "glf/*.h" "samtools/*.h" "vcf/*.h")
-install(FILES ${LSG_HEADER_LIST} DESTINATION include)
-
-if (BUILD_SHARED_LIBS)
-    install(FILES ${CMAKE_SHARED_LIBRARY_PREFIX}StatGen${CMAKE_SHARED_LIBRARY_SUFFIX} DESTINATION lib)
-else()
-    install(FILES ${CMAKE_STATIC_LIBRARY_PREFIX}StatGen${CMAKE_STATIC_LIBRARY_SUFFIX} DESTINATION lib)
-endif()


=====================================
dependencies.cmake
=====================================
@@ -0,0 +1,17 @@
+#!/usr/bin/env cmake -P
+
+set(ARGS)
+foreach(i RANGE 4 ${CMAKE_ARGC})
+    list(APPEND ARGS ${CMAKE_ARGV${i}})
+endforeach()
+
+set(_PREFIX ${CMAKE_ARGV3})
+
+# Make sure this is in the module path
+list(APPEND CMAKE_MODULE_PATH ${CMAKEGET_MODULE_PATH})
+include(${CMAKE_CURRENT_LIST_DIR}/CMakeGet.cmake)
+
+get_filename_component(PREFIX ${_PREFIX} ABSOLUTE)
+# Install recipes
+#cmake_get(pfultz2/cget-recipes PREFIX ${PREFIX} CMAKE_ARGS ${ARGS})
+cmake_get_from(${CMAKE_CURRENT_LIST_DIR}/requirements.txt PREFIX ${PREFIX} CMAKE_ARGS ${ARGS})


=====================================
install.sh deleted
=====================================
@@ -1,12 +0,0 @@
-#!/bin/sh
-
-rm -rf cget/ release-build/ install.log
-echo -e "Installing Dependencies - Libstatgen ..."
-cget install -f requirements.txt &> install.log
-mkdir release-build
-cd release-build/
-echo -e "Generating MakeFiles ..."
-cmake -DCMAKE_TOOLCHAIN_FILE=../cget/cget/cget.cmake -DCMAKE_BUILD_TYPE=Release ..
-make
-echo "Binary created at /release-build/minimac4"
-


=====================================
package-linux.sh
=====================================
@@ -1,10 +1,10 @@
 #!/bin/bash
-# Usage: docker build -t packaging-ubuntu20 -f packaging-dockerfile-ubuntu20 . 
-#        docker run -v `pwd`:/app -w /app packaging-ubuntu20 ./package-linux.sh
+# Usage: docker build -t packaging-alpine3.17 -f packaging-dockerfile-alpine3.17 . 
+#        docker run -v `pwd`:/app -w /app packaging-alpine3.17 ./package-linux.sh
 set -euo pipefail
 
 src_dir=`pwd`
-build_dir=${src_dir}/pkg-build
+build_dir=${src_dir}/pkg-build-alpine
 
 rm -rf ${build_dir}
 mkdir ${build_dir}
@@ -12,28 +12,33 @@ cd ${build_dir}
 
 export CFLAGS="-fPIC"
 export CXXFLAGS="-fPIC"
-cget ignore xz
-cget install zlib,http://zlib.net/fossils/zlib-1.2.11.tar.gz
-rm cget/lib/libz.so
-cget install -f ${src_dir}/requirements.txt
+
+cmake -P ../dependencies.cmake deps/ -DSHRINKWRAP_PREFER_STATIC=ON
+rm deps/lib/libz.so
 unset CFLAGS
 unset CXXFLAGS
 
+arc=`uname -m`
 
 cmake \
   -DCMAKE_BUILD_TYPE=Release \
-  -DCMAKE_TOOLCHAIN_FILE=cget/cget/cget.cmake \
+  -DCMAKE_PREFIX_PATH=$(pwd)/deps \
+  -DCMAKE_CXX_FLAGS="-I$(pwd)/deps/include" \
   -DCMAKE_BUILD_WITH_INSTALL_RPATH=ON \
-  -DCMAKE_EXE_LINKER_FLAGS="-static -static-libgcc -static-libstdc++" \
-  -DCPACK_GENERATOR="STGZ;DEB;RPM" \
+  -DCMAKE_EXE_LINKER_FLAGS="-static" \
+  -DCPACK_GENERATOR="STGZ" \
+  -DCPACK_SYSTEM_NAME="Linux-${arc}" \
+  -DCPACK_RESOURCE_FILE_LICENSE=${src_dir}/LICENSE \
   -DCPACK_PACKAGE_CONTACT="csg-devel at umich.edu" \
   ${src_dir}
 
+#-DCMAKE_EXE_LINKER_FLAGS="-static -static-libgcc -static-libstdc++" 
+
 make
-#make manuals
+make manuals
 make package
 
 mkdir -p ${src_dir}/pkg/
-cp minimac4-*.{sh,deb,rpm} ${src_dir}/pkg/
+cp minimac4-*.sh ${src_dir}/pkg/
 cd ${src_dir}
 rm -r ${build_dir}


=====================================
packaging-dockerfile-ubuntu20 → packaging-dockerfile-alpine3.17
=====================================
@@ -1,17 +1,9 @@
-FROM ubuntu:20.04
+FROM alpine:3.17
 
 ENV TZ=Etc/UTC
 RUN ln -snf /usr/share/zoneinfo/$TZ /etc/localtime && echo $TZ > /etc/timezone
 
-RUN apt-get update && apt-get install -y \
-    build-essential \
+RUN apk update && apk add --no-cache binutils bash cmake make libgcc musl-dev gcc g++ \
     curl \
-    cmake \
     git \
-    help2man \
-    lsb-release \
-    python3 \
-    python3-pip \
-    rpm
-
-RUN pip install cget
+    help2man


=====================================
src/dosage_writer.cpp
=====================================
@@ -58,7 +58,7 @@ std::vector<std::pair<std::string, std::string>> dosage_writer::gen_headers(cons
       {"source", "Minimac v" + std::string(VERSION)},
       {"phasing", "full"},
       {"contig", "<ID=" + std::string(chromosome) + ">"},
-      {"INFO", "<ID=AF,Number=1,Type=Float,Description=\"Estimated Alternate Allele Frequency\">"},
+      {"INFO", "<ID=AF,Number=A,Type=Float,Description=\"Estimated Alternate Allele Frequency\">"},
       {"INFO", "<ID=MAF,Number=1,Type=Float,Description=\"Estimated Minor Allele Frequency\">"},
       {"INFO", "<ID=AVG_CS,Number=1,Type=Float,Description=\"Average Call Score\">"},
       {"INFO", "<ID=R2,Number=1,Type=Float,Description=\"Estimated Imputation Accuracy (R-square)\">"},
@@ -73,11 +73,11 @@ std::vector<std::pair<std::string, std::string>> dosage_writer::gen_headers(cons
     if (f == "GT")
       headers.emplace_back("FORMAT", "<ID=GT,Number=1,Type=String,Description=\"Genotype\">");
     else if (f == "DS")
-      headers.emplace_back("FORMAT", "<ID=DS,Number=1,Type=Float,Description=\"Estimated Alternate Allele Dosage : [P(0/1)+2*P(1/1)]\">");
+      headers.emplace_back("FORMAT", "<ID=DS,Number=A,Type=Float,Description=\"Estimated Alternate Allele Dosage : [P(0/1)+2*P(1/1)]\">");
     else if (f == "HDS")
-      headers.emplace_back("FORMAT", "<ID=HDS,Number=2,Type=Float,Description=\"Estimated Haploid Alternate Allele Dosage \">");
+      headers.emplace_back("FORMAT", "<ID=HDS,Number=.,Type=Float,Description=\"Estimated Haploid Alternate Allele Dosage \">");
     else if (f == "GP")
-      headers.emplace_back("FORMAT", "<ID=GP,Number=3,Type=Float,Description=\"Estimated Posterior Probabilities for Genotypes 0/0, 0/1 and 1/1 \">");
+      headers.emplace_back("FORMAT", "<ID=GP,Number=G,Type=Float,Description=\"Estimated Posterior Probabilities for Genotypes 0/0, 0/1 and 1/1 \">");
     else if (f == "SD")
       headers.emplace_back("FORMAT", "<ID=SD,Number=1,Type=Float,Description=\"Variance of Posterior Genotype Probabilities\">");
   }
@@ -103,7 +103,7 @@ std::vector<std::pair<std::string, std::string>> dosage_writer::gen_emp_headers(
     {"INFO", "<ID=IMPUTED,Number=0,Type=Flag,Description=\"Marker was imputed\">"},
     {"INFO", "<ID=TYPED,Number=0,Type=Flag,Description=\"Marker was genotyped\">"},
     {"FORMAT","<ID=GT,Number=1,Type=String,Description=\"Genotyped alleles from Array\">"},
-    {"FORMAT","<ID=LDS,Number=2,Type=Float,Description=\"Leave-one-out Imputed Dosage : Estimated Haploid Alternate Allele Dosage assuming site was NOT genotyped\">"}};
+    {"FORMAT","<ID=LDS,Number=.,Type=Float,Description=\"Leave-one-out Imputed Dosage : Estimated Haploid Alternate Allele Dosage assuming site was NOT genotyped\">"}};
 
   // TODO: command string
 
@@ -112,21 +112,30 @@ std::vector<std::pair<std::string, std::string>> dosage_writer::gen_emp_headers(
 
 savvy::file::format dosage_writer::format_from_filename(const std::string& filename, savvy::file::format default_format)
 {
-  if (filename.size() >= 3)
+  if (filename.size() >= 4)
   {
-    const std::string ext = filename.substr(filename.size() - 3);
-    if (ext == "sav")
+    const std::string ext = filename.substr(filename.size() - 4);
+    if (ext == ".sav")
       return savvy::file::format::sav;
-    else if (ext == "bcf")
+    else if (ext == ".bcf")
       return savvy::file::format::bcf;
-    else if (ext == "vcf")
+    else if (ext == ".vcf")
       return savvy::file::format::vcf;
   }
 
-  if (filename.size() >= 6)
+  if (filename.size() >= 5)
+  {
+    const std::string ext = filename.substr(filename.size() - 5);
+    if (ext == ".usav")
+      return savvy::file::format::sav;
+    else if (ext == ".ubcf")
+      return savvy::file::format::bcf;
+  }
+
+  if (filename.size() >= 7)
   {
-    const std::string ext = filename.substr(filename.size() - 6);
-    if (ext == "vcf.gz")
+    const std::string ext = filename.substr(filename.size() - 7);
+    if (ext == ".vcf.gz")
       return savvy::file::format::vcf;
   }
 
@@ -138,12 +147,30 @@ int dosage_writer::clevel_from_filename(const std::string& filename, int default
   if (filename.size() >= 4)
   {
     const std::string ext = filename.substr(filename.size() - 4);
-    if (ext == ".sav" || ext == ".bcf" || ext == "f.gz")
+    if (ext == ".sav")
+      return 6;
+    else if (ext == ".bcf")
       return 6;
-    else if (ext == "usav" || ext == "ubcf" || ext == ".vcf")
+    else if (ext == ".vcf")
+      return 0;
+  }
+
+  if (filename.size() >= 5)
+  {
+    const std::string ext = filename.substr(filename.size() - 5);
+    if (ext == ".usav")
+      return 0;
+    else if (ext == ".ubcf")
       return 0;
   }
 
+  if (filename.size() >= 7)
+  {
+    const std::string ext = filename.substr(filename.size() - 7);
+    if (ext == ".vcf.gz")
+      return 6;
+  }
+
   return default_clevel;
 }
 
@@ -181,6 +208,7 @@ bool dosage_writer::merge_temp_files(std::list<savvy::reader>& temp_files, std::
   std::vector<float> partial_lds;
   std::vector<std::int8_t> pasted_gt;
   std::vector<std::int8_t> partial_gt;
+  std::size_t max_ploidy = 0;
 
   int good_count = temp_files.size();
   while (good_count == temp_files.size())
@@ -194,8 +222,16 @@ bool dosage_writer::merge_temp_files(std::list<savvy::reader>& temp_files, std::
     good_count = 0;
     for (auto it = temp_files.begin(); it != temp_files.end(); ++it)
     {
-      good_count += (int)it->read(out_var).good();
+      bool read_succeeded = it->read(out_var).good();
+      good_count += (int)read_succeeded;
       out_var.get_format("HDS", partial_hds);
+
+      // verify max ploidy is consistent across all temp files
+      if (max_ploidy == 0)
+        max_ploidy = partial_hds.size() / it->samples().size();
+      if (read_succeeded && max_ploidy != (partial_hds.size() / it->samples().size()))
+        return std::cerr << "Error: max ploidy is not consistent across temp files. This should never happen. Please report." << std::endl, false;
+
       std::size_t old_size = pasted_hds.size();
       pasted_hds.resize(old_size + partial_hds.size());
       for (auto jt = partial_hds.begin(); jt != partial_hds.end(); ++jt)
@@ -292,6 +328,9 @@ bool dosage_writer::merge_temp_files(std::list<savvy::reader>& temp_files, std::
             if (good_count_emp < temp_emp_files.size())
               return std::cerr << "Error: record mismatch in empirical temp files" << std::endl, false;
 
+            if (pasted_hds.size() != pasted_gt.size() || pasted_lds.size() != pasted_gt.size())
+              return std::cerr << "Error: Merged HDS, LDS, and GT are not consistent. This should never happen. Please report." << std::endl, false;
+
             out_var_emp.set_format("GT", pasted_gt);
             out_var_emp.set_format("LDS", pasted_lds);
             emp_out_file_->write(out_var_emp);


=====================================
src/hidden_markov_model.cpp
=====================================
@@ -8,11 +8,12 @@
 constexpr float hidden_markov_model::jump_fix;
 constexpr float hidden_markov_model::jump_threshold;
 
-hidden_markov_model::hidden_markov_model(float s3_prob_threshold, float s1_prob_threshold, float diff_threshold, float background_error) :
+hidden_markov_model::hidden_markov_model(float s3_prob_threshold, float s1_prob_threshold, float diff_threshold, float background_error, float decay) :
   prob_threshold_(s3_prob_threshold),
   s1_prob_threshold_(s1_prob_threshold),
   diff_threshold_(diff_threshold),
-  background_error_(background_error)
+  background_error_(background_error),
+  decay_(decay)
 {
 }
 
@@ -466,7 +467,7 @@ void hidden_markov_model::impute(double& prob_sum, std::size_t& prev_best_typed_
   assert(row == 0 || tar_variants[row].pos >= tar_variants[row - 1].pos);
   std::size_t mid_point = row == 0 ? 1 : std::max<std::int32_t>(1, std::int32_t(tar_variants[row].pos) - std::int32_t(tar_variants[row].pos - tar_variants[row - 1].pos) / 2);
   if (full_ref_ritr == full_ref_rend || full_ref_ritr->pos < mid_point) // TODO: stop traverse_backward at beginning of region.
-    return;
+    return; // We are outside the impute region (in the extended region)
 
   float typed_dose = std::numeric_limits<float>::quiet_NaN();
   float typed_loo_dose = std::numeric_limits<float>::quiet_NaN();
@@ -544,19 +545,19 @@ void hidden_markov_model::impute(double& prob_sum, std::size_t& prev_best_typed_
         an += card;
       }
 
-      if (p_alt > 1. || p_alt < 0.)
-      {
-        auto a = 0;
-      }
-
       assert(full_ref_ritr->ac >= ac);
       assert(n_templates >= an);
       if (n_templates - an > 0)
         p_alt += (1. - best_sum) * (double(full_ref_ritr->ac - ac) / double(n_templates - an));
 
-      if (p_alt > 1.0f)
+      if (decay_ > 0. && (row == 0 || row + 1 == tar_variants.size()))
       {
-        auto a = 0;
+        // Either the first or last typed site, so decay.
+
+        // We are estimating cM with physical distance for now. TODO: add tar_variants[row].cm
+        double cm_estimate = std::abs(std::int64_t(full_ref_ritr->pos) - std::int64_t(tar_variants[row].pos)) / 1e6;
+        double decay = recombination::cm_to_switch_prob(cm_estimate, decay_);
+        p_alt = p_alt * (1. - decay) + (full_ref_ritr->ac / double(n_templates)) * decay;
       }
 
       p_alt = std::max(0., std::min(1., p_alt));


=====================================
src/hidden_markov_model.hpp
=====================================
@@ -20,6 +20,15 @@ public:
     loo_dosages_.resize(n_loo_rows, std::vector<float>(n_columns, savvy::typed_value::end_of_vector_value<float>()));
   }
 
+  void fill_eov()
+  {
+    for (auto& v : dosages_)
+      std::fill(v.begin(), v.end(),savvy::typed_value::end_of_vector_value<float>());
+
+    for (auto& v : loo_dosages_)
+      std::fill(v.begin(), v.end(),savvy::typed_value::end_of_vector_value<float>());
+  }
+
   std::array<std::size_t, 2> dimensions() const { return {dosages_.size(), dosages_.empty() ? 0 : dosages_[0].size()}; }
   std::array<std::size_t, 2> dimensions_loo() const { return {dosages_.size(), dosages_.empty() ? 0 : dosages_[0].size()}; }
 
@@ -40,6 +49,7 @@ private:
   float s1_prob_threshold_ = -1.f;
   float diff_threshold_ = 0.01f;
   float background_error_ = 1e-5f;
+  double decay_ = 0.;
   static constexpr float jump_fix = 1e15f;
   static constexpr float jump_threshold = 1e-10f;
   const std::int16_t bin_scalar_ = 1000;
@@ -54,7 +64,7 @@ private:
   std::vector<float> best_s3_probs_;
 
 public:
-  hidden_markov_model(float s3_prob_threshold, float s1_prob_threshold, float diff_threshold, float background_error);
+  hidden_markov_model(float s3_prob_threshold, float s1_prob_threshold, float diff_threshold, float background_error, float decay);
 
   void traverse_forward(const std::deque<unique_haplotype_block>& ref_haps,
     const std::vector<target_variant>& tar_variant,


=====================================
src/input_prep.cpp
=====================================
@@ -680,8 +680,8 @@ bool convert_old_m3vcf(const std::string& input_path, const std::string& output_
   headers.insert(headers.begin(), {"fileformat","VCFv4.2"});
   if (!phasing_header_present)
     headers.emplace_back("phasing","full");
-  headers.emplace_back("INFO", "<ID=AC,Number=1,Type=Integer,Description=\"Total number of alternate alleles in called genotypes\">");
-  headers.emplace_back("INFO", "<ID=AN,Number=1,Type=Float,Description=\"Total number of alleles in called genotypes\">");
+  headers.emplace_back("INFO", "<ID=AC,Number=A,Type=Integer,Description=\"Total number of alternate alleles in called genotypes\">");
+  headers.emplace_back("INFO", "<ID=AN,Number=1,Type=Integer,Description=\"Total number of alleles in called genotypes\">");
   headers.emplace_back("INFO","<ID=REPS,Number=1,Type=Integer,Description=\"Number of distinct haplotypes in block\">");
   headers.emplace_back("INFO","<ID=VARIANTS,Number=1,Type=Integer,Description=\"Number of variants in block\">");
   headers.emplace_back("INFO","<ID=ERR,Number=1,Type=Float,Description=\"Error parameter for HMM\">");
@@ -760,8 +760,8 @@ bool compress_reference_panel(const std::string& input_path, const std::string&
     {"subfileformat","MVCFv3.0"},
     {"fileformat","VCFv4.2"},
     {"phasing","full"},
-    {"INFO", "<ID=AC,Number=1,Type=Integer,Description=\"Total number of alternate alleles in called genotypes\">"},
-    {"INFO", "<ID=AN,Number=1,Type=Float,Description=\"Total number of alleles in called genotypes\">"},
+    {"INFO", "<ID=AC,Number=A,Type=Integer,Description=\"Total number of alternate alleles in called genotypes\">"},
+    {"INFO", "<ID=AN,Number=1,Type=Integer,Description=\"Total number of alleles in called genotypes\">"},
     {"INFO","<ID=REPS,Number=1,Type=Integer,Description=\"Number of distinct haplotypes in block\">"},
     {"INFO","<ID=VARIANTS,Number=1,Type=Integer,Description=\"Number of variants in block\">"},
     {"INFO","<ID=ERR,Number=1,Type=Float,Description=\"Error parameter for HMM\">"},


=====================================
src/main.cpp
=====================================
@@ -107,7 +107,7 @@ public:
 
       std::cerr << "Running HMM with " << args.threads() << " threads ..." << std::endl;
       start_time = std::time(nullptr);
-      std::vector<hidden_markov_model> hmms(tpool.thread_count(), {args.prob_threshold(), args.prob_threshold_s1(), args.diff_threshold(), 1e-5f});
+      std::vector<hidden_markov_model> hmms(tpool.thread_count(), {args.prob_threshold(), args.prob_threshold_s1(), args.diff_threshold(), 1e-5f, args.decay()});
 
       std::size_t ploidy = target_sites[0].gt.size() / sample_ids.size();
       std::size_t haplotype_buffer_size = args.temp_buffer() * ploidy;
@@ -126,6 +126,9 @@ public:
             hmm_results.loo_dosages_[j].resize(group_size);
         }
 
+        if (i > 0)
+          hmm_results.fill_eov();
+
         start_time = std::time(nullptr);
         omp::parallel_for_exp(
           omp::static_schedule(), omp::sequence_iterator(i), omp::sequence_iterator(i + group_size), [&](int& i, const omp::iteration_context& ctx)
@@ -146,14 +149,14 @@ public:
         if (target_sites[0].gt.size() > haplotype_buffer_size)
         {
           std::string out_emp_path;
-          std::string out_path = "/tmp/m4_" + std::to_string(i / haplotype_buffer_size) + "_XXXXXX";
+          std::string out_path = args.temp_prefix() + std::to_string(i / haplotype_buffer_size) + "_XXXXXX";
           tmp_fd = mkstemp(&out_path[0]);
           if (tmp_fd < 0)
             return std::cerr << "Error: could not open temp file (" << out_path << ")" << std::endl, false;
 
           if (args.emp_out_path().size())
           {
-            out_emp_path = "/tmp/m4_" + std::to_string(i / haplotype_buffer_size) + "_emp_XXXXXX";
+            out_emp_path =  args.temp_prefix() + std::to_string(i / haplotype_buffer_size) + "_emp_XXXXXX";
             tmp_emp_fd = mkstemp(&out_emp_path[0]);
             if (tmp_emp_fd < 0)
               return std::cerr << "Error: could not open temp file (" << out_emp_path << ")" << std::endl, false;


=====================================
src/prog_args.hpp
=====================================
@@ -9,6 +9,7 @@
 #include <vector>
 #include <unordered_set>
 #include <cstdint>
+#include <cstdlib>
 #include <fstream>
 #include <iterator>
 
@@ -19,6 +20,7 @@ private:
   std::string tar_path_;
   std::string map_path_;
   std::string out_path_ = "/dev/stdout";
+  std::string temp_prefix_;
   std::string prefix_; // deprecated
   std::string emp_out_path_;
   std::string sites_out_path_;
@@ -31,6 +33,7 @@ private:
   std::int64_t chunk_size_ = 20000000;
   std::int64_t overlap_ = 3000000;
   std::int16_t threads_ = 1;
+  float decay_ = 0.f;
   float min_r2_ = -1.f;
   float min_ratio_ = 1e-5f;
   float prob_threshold_ = 0.01f;
@@ -56,6 +59,7 @@ public:
   const std::string& out_path() const { return out_path_; }
   const std::string& emp_out_path() const { return emp_out_path_; }
   const std::string& sites_out_path() const { return sites_out_path_; }
+  const std::string& temp_prefix() const { return temp_prefix_; }
   savvy::file::format out_format() const { return out_format_; }
   std::uint8_t out_compression() const { return out_compression_; }
   const std::vector<std::string>& fmt_fields() const { return fmt_fields_; }
@@ -65,6 +69,7 @@ public:
   std::int64_t overlap() const { return overlap_; }
   std::int16_t threads() const { return threads_; }
   std::size_t temp_buffer() const { return temp_buffer_ ; }
+  float decay() const { return decay_; }
   float min_r2() const { return min_r2_; }
   float min_ratio() const { return min_ratio_; }
   float prob_threshold() const { return prob_threshold_; }
@@ -79,7 +84,9 @@ public:
 
   prog_args() :
     getopt_wrapper(
-      "Usage: minimac4 [opts ...] <reference.msav> <target.{sav,bcf,vcf.gz}>",
+      "Usage: minimac4 [opts ...] <reference.msav> <target.{sav,bcf,vcf.gz}>\n"
+      "       minimac4 [opts ...] --update-m3vcf <reference.m3vcf.gz>\n"
+      "       minimac4 [opts ...] --compress-reference <reference.{sav,bcf,vcf.gz}>",
       {
         {"all-typed-sites", no_argument, 0, 'a', "Include in the output sites that exist only in target VCF"},
         {"temp-buffer", required_argument, 0, 'b', "Number of samples to impute before writing to temporary files (default: 200)"},
@@ -89,24 +96,26 @@ public:
         {"format", required_argument, 0, 'f', "Comma-separated list of format fields to generate (GT, HDS, DS, GP, or SD; default: HDS)"},
         {"map", required_argument, 0, 'm', "Genetic map file"},
         {"output", required_argument, 0, 'o', "Output path (default: /dev/stdout)"},
-        {"output-format", required_argument, 0, 'O', "Output file format (bcf, sav, vcf.gz, ubcf, usav, or vcf; default: bcf)"},
+        {"output-format", required_argument, 0, 'O', "Default output file format used for ambiguous filenames (bcf, sav, vcf.gz, ubcf, usav, or vcf; default: sav)"},
         //{"pass-only", no_argument, 0, 'p', "Only imports variants with FILTER column set to PASS"},
         {"region", required_argument, 0, 'r', "Genomic region to impute"},
         {"sites", required_argument, 0, 's', "Output path for sites-only file"},
         {"threads", required_argument, 0, 't', "Number of threads (default: 1)"},
         {"version", no_argument, 0, 'v', "Print version"},
         {"overlap", required_argument, 0, 'w', "Size (in base pairs) of overlap before and after impute region to use as input to HMM (default: 3000000)"},
+        {"decay", required_argument, 0, '\x02', "Decay rate for dosages in flanking regions (default: disabled with 0)"},
         {"min-r2", required_argument, 0, '\x02', "Minimum estimated r-square for output variants"},
         {"min-ratio", required_argument, 0, '\x02', "Minimum ratio of number of target sites to reference sites (default: 0.00001)"},
         {"match-error", required_argument, 0, '\x02', "Error parameter for HMM match probabilities (default: 0.01)"},
         {"min-recom", required_argument, 0, '\x02', "Minimum recombination probability (default: 0.00001)"},
         {"prob-threshold", required_argument, 0, '\x02', "Probability threshold used for template selection"},
-        {"prob-threshold-s1", required_argument, 0, '\x02', ""}, // "Probability threshold used for template selection in original state space"},
+        {"prob-threshold-s1", required_argument, 0, '\x02', "Probability threshold used for template selection in original state space"},
         {"diff-threshold", required_argument, 0, '\x02', "Probability diff threshold used in template selection"},
         {"sample-ids", required_argument, 0, '\x02', "Comma-separated list of sample IDs to subset from reference panel"},
         {"sample-ids-file", required_argument, 0, '\x02', "Text file containing sample IDs to subset from reference panel (one ID per line)"},
-        {"update-m3vcf", no_argument, 0, '\x01', nullptr},
-        {"compress-reference", no_argument, 0, '\x01', nullptr},
+        {"temp-prefix", required_argument, 0, '\x02', "Prefix path for temporary output files (default: ${TMPDIR}/m4_)"},
+        {"update-m3vcf", no_argument, 0, '\x01', "Converts M3VCF to MVCF (default output: /dev/stdout)"},
+        {"compress-reference", no_argument, 0, '\x01', "Compresses VCF to MVCF (default output: /dev/stdout)"},
         // vvvv deprecated vvvv //
         {"allTypedSites", no_argument, 0, '\x01', nullptr},
         {"rsid", no_argument, 0, '\x01', nullptr},
@@ -275,7 +284,12 @@ public:
       case '\x02':
         {
           std::string long_opt_str = std::string(long_options_[long_index].name);
-          if (long_opt_str == "min-r2")
+          if (long_opt_str == "decay")
+          {
+            decay_ = std::atof(optarg ? optarg : "");
+            break;
+          }
+          else if (long_opt_str == "min-r2")
           {
             min_r2_ = std::atof(optarg ? optarg : "");
             break;
@@ -305,6 +319,11 @@ public:
             prob_threshold_s1_ = std::min(1., std::atof(optarg ? optarg : ""));
             break;
           }
+          else if (long_opt_str == "temp-prefix")
+          {
+            temp_prefix_ = optarg ? optarg : "";
+            break;
+          }
           else if (long_opt_str == "diff-threshold")
           {
             diff_threshold_ = std::max(0., std::atof(optarg ? optarg : ""));
@@ -446,6 +465,22 @@ public:
         emp_out_path_ = prefix_ + ".empiricalDose." + suffix;
     }
 
+    if (temp_prefix_.empty())
+    {
+      char* tmpdir = std::getenv("TMPDIR");
+      if (tmpdir && strlen(tmpdir))
+      {
+        std::string p(tmpdir);
+        if (p.back() != '/')
+          p += '/';
+        temp_prefix_ = p + "m4_";
+      }
+      else
+      {
+        temp_prefix_ = "/tmp/m4_";
+      }
+    }
+
     if (!emp_out_path_.empty() && std::find(fmt_fields_.begin(), fmt_fields_.end(), "HDS") == fmt_fields_.end())
       fmt_fields_.emplace_back("HDS");
 


=====================================
src/recombination.hpp
=====================================
@@ -26,6 +26,7 @@ public:
   static bool parse_map_file(const std::string& map_file_path, std::vector<target_variant>& sites, float recom_min);
   static double haldane(double cm) { return (1. - std::exp(-cm/50.))/2.;}
   static double cm_to_switch_prob(double cm) { return 1. - std::exp(-cm/100.);}
+  static double cm_to_switch_prob(double cm, double decay_rate) { return 1. - std::exp(-decay_rate * cm/100.);}
   static double haldane_inverse(double recom_prob) { return 50. * std::log(1. / (1. - 2. * recom_prob)); }
   static double switch_prob_to_cm(double recom_prob) { return 100. * std::log(1. / (1. - recom_prob)); }
 private:


=====================================
test/simple-test.sh
=====================================
@@ -18,7 +18,7 @@ bcftools index $ref_vcf
 bcftools view $4 -Oz -o $tar_vcf
 bcftools index $tar_vcf
 $m4 --compress-reference $ref_vcf > $ref_msav
-$m4 $ref_msav $tar_vcf -f GT -O vcf.gz > $imputed_vcf
+$m4 $ref_msav $tar_vcf -f GT -O vcf.gz --temp-buffer 2 > $imputed_vcf
 
 gzip -cd $imputed_vcf | grep -v "^#" | cut -f9- > $d/imputed_gt_matrix.tsv
 gzip -cd $ref_vcf | grep -v "^#" | cut -f9- > $d/ref_gt_matrix.tsv



View it on GitLab: https://salsa.debian.org/med-team/minimac4/-/commit/6fe4c28160597587ede46846bbca4bef4009e0d8

-- 
View it on GitLab: https://salsa.debian.org/med-team/minimac4/-/commit/6fe4c28160597587ede46846bbca4bef4009e0d8
You're receiving this email because of your account on salsa.debian.org.


-------------- next part --------------
An HTML attachment was scrubbed...
URL: <http://alioth-lists.debian.net/pipermail/debian-med-commit/attachments/20230719/dce3db60/attachment-0001.htm>


More information about the debian-med-commit mailing list