[med-svn] [Git][med-team/minimac4][upstream] New upstream version 4.1.4
Étienne Mollier (@emollier)
gitlab at salsa.debian.org
Wed Jul 19 23:46:30 BST 2023
Étienne Mollier pushed to branch upstream at Debian Med / minimac4
Commits:
6fe4c281 by Étienne Mollier at 2023-07-20T00:42:06+02:00
New upstream version 4.1.4
- - - - -
16 changed files:
- + CMakeGet.cmake
- CMakeLists.txt
- README.md
- − dep/libstatgen.cmake
- + dependencies.cmake
- − install.sh
- package-linux.sh
- packaging-dockerfile-ubuntu20 → packaging-dockerfile-alpine3.17
- src/dosage_writer.cpp
- src/hidden_markov_model.cpp
- src/hidden_markov_model.hpp
- src/input_prep.cpp
- src/main.cpp
- src/prog_args.hpp
- src/recombination.hpp
- test/simple-test.sh
Changes:
=====================================
CMakeGet.cmake
=====================================
@@ -0,0 +1,473 @@
+
+set(BUILD_DEPS On CACHE BOOL "Build dependencies")
+
+include(ProcessorCount)
+
+if(CMAKE_VERSION VERSION_LESS "3.4")
+function(cget_parse_arguments prefix _optionNames _singleArgNames _multiArgNames)
+ # first set all result variables to empty/FALSE
+ foreach(arg_name ${_singleArgNames} ${_multiArgNames})
+ set(${prefix}_${arg_name})
+ endforeach()
+
+ foreach(option ${_optionNames})
+ set(${prefix}_${option} FALSE)
+ endforeach()
+
+ set(${prefix}_UNPARSED_ARGUMENTS)
+
+ set(insideValues FALSE)
+ set(currentArgName)
+
+ # now iterate over all arguments and fill the result variables
+ foreach(currentArg ${ARGN})
+ list(FIND _optionNames "${currentArg}" optionIndex) # ... then this marks the end of the arguments belonging to this keyword
+ list(FIND _singleArgNames "${currentArg}" singleArgIndex) # ... then this marks the end of the arguments belonging to this keyword
+ list(FIND _multiArgNames "${currentArg}" multiArgIndex) # ... then this marks the end of the arguments belonging to this keyword
+
+ if(${optionIndex} EQUAL -1 AND ${singleArgIndex} EQUAL -1 AND ${multiArgIndex} EQUAL -1)
+ if(insideValues)
+ if("${insideValues}" STREQUAL "SINGLE")
+ set(${prefix}_${currentArgName} ${currentArg})
+ set(insideValues FALSE)
+ elseif("${insideValues}" STREQUAL "MULTI")
+ list(APPEND ${prefix}_${currentArgName} ${currentArg})
+ endif()
+ else()
+ list(APPEND ${prefix}_UNPARSED_ARGUMENTS ${currentArg})
+ endif()
+ else()
+ if(NOT ${optionIndex} EQUAL -1)
+ set(${prefix}_${currentArg} TRUE)
+ set(insideValues FALSE)
+ elseif(NOT ${singleArgIndex} EQUAL -1)
+ set(currentArgName ${currentArg})
+ set(${prefix}_${currentArgName})
+ set(insideValues "SINGLE")
+ elseif(NOT ${multiArgIndex} EQUAL -1)
+ set(currentArgName ${currentArg})
+ set(insideValues "MULTI")
+ endif()
+ endif()
+
+ endforeach()
+
+ # propagate the result variables to the caller:
+ foreach(arg_name ${_singleArgNames} ${_multiArgNames} ${_optionNames})
+ set(${prefix}_${arg_name} ${${prefix}_${arg_name}} PARENT_SCOPE)
+ endforeach()
+ set(${prefix}_UNPARSED_ARGUMENTS ${${prefix}_UNPARSED_ARGUMENTS} PARENT_SCOPE)
+
+endfunction()
+else()
+ macro(cget_parse_arguments prefix _optionNames _singleArgNames _multiArgNames)
+ cmake_parse_arguments(${prefix} "${_optionNames}" "${_singleArgNames}" "${_multiArgNames}" ${ARGN})
+ endmacro()
+endif()
+
+set(_cget_tmp_dir "${CMAKE_CURRENT_LIST_DIR}/tmp")
+if(CMAKE_HOST_UNIX)
+ foreach(dir "$ENV{TMP}" "$ENV{TMPDIR}" "/tmp")
+ if(EXISTS "${dir}" AND NOT "${dir}" STREQUAL "")
+ file(TO_CMAKE_PATH "${dir}" _cget_tmp_dir)
+ break()
+ endif()
+ endforeach()
+endif()
+set(_tmp_dir_count 0)
+
+macro(cget_mktemp_dir OUT)
+ string(TIMESTAMP cget_mktemp_dir_STAMP "%H-%M-%S")
+ string(RANDOM cget_mktemp_dir_RAND)
+ set(cget_mktemp_dir_PREFIX "${_cget_tmp_dir}/cget-${cget_mktemp_dir_STAMP}-${cget_mktemp_dir_RAND}")
+ math(EXPR _tmp_dir_count "${_tmp_dir_count} + 1")
+ set(${OUT} "${cget_mktemp_dir_PREFIX}-${_tmp_dir_count}")
+ file(MAKE_DIRECTORY ${${OUT}})
+endmacro()
+
+macro(cget_set_parse_flag VAR OPT)
+ unset(${VAR}_${OPT})
+ foreach(FLAG ${ARGN})
+ if(${VAR}_private_${FLAG})
+ set(${VAR}_${OPT} ${${VAR}_private_${FLAG}})
+ endif()
+ unset(${VAR}_private_${FLAG})
+ endforeach()
+endmacro()
+
+macro(cget_parse_requirement VAR)
+ unset(${VAR}_PKG)
+ set(${VAR}_private_options --build -b --test -t)
+ set(${VAR}_private_oneValueArgs -H --hash -X --cmake --file -f)
+ set(${VAR}_private_multiValueArgs -D --define)
+
+ set(cget_parse_requirement_args)
+ foreach(ARG ${ARGN})
+ if(ARG MATCHES "^-([^-])(.+)")
+ list(APPEND cget_parse_requirement_args -${CMAKE_MATCH_1})
+ list(APPEND cget_parse_requirement_args ${CMAKE_MATCH_2})
+ else()
+ list(APPEND cget_parse_requirement_args ${ARG})
+ endif()
+ endforeach()
+
+ cget_parse_arguments(${VAR}_private "${${VAR}_private_options}" "${${VAR}_private_oneValueArgs}" "${${VAR}_private_multiValueArgs}" ${cget_parse_requirement_args})
+
+ if(${VAR}_private_UNPARSED_ARGUMENTS)
+ list(GET ${VAR}_private_UNPARSED_ARGUMENTS 0 ${VAR}_PKG)
+ list(LENGTH ${VAR}_private_UNPARSED_ARGUMENTS ${VAR}_private_UNPARSED_ARGUMENTS_SIZE)
+ if(${VAR}_private_UNPARSED_ARGUMENTS_SIZE GREATER 1)
+ list(REMOVE_AT ${VAR}_private_UNPARSED_ARGUMENTS 0)
+ message(WARNING "Unknown keywords given in requirements file: \"${${VAR}_private_UNPARSED_ARGUMENTS}\"")
+ endif()
+ endif()
+
+ cget_set_parse_flag(${VAR} BUILD --build -b)
+ cget_set_parse_flag(${VAR} TEST --test -t)
+ cget_set_parse_flag(${VAR} CMAKE --cmake -X)
+ cget_set_parse_flag(${VAR} HASH --hash -H)
+ cget_set_parse_flag(${VAR} DEFINE --define -D)
+ cget_set_parse_flag(${VAR} FILE --file -f)
+ set(${VAR}_CMAKE_ARGS)
+ foreach(DEFINE ${${VAR}_DEFINE})
+ list(APPEND ${VAR}_CMAKE_ARGS "-D${DEFINE}")
+ endforeach()
+endmacro()
+
+function(cget_exec)
+ execute_process(${ARGN} RESULT_VARIABLE RESULT)
+ if(NOT RESULT EQUAL 0)
+ string(REPLACE ";" " " CMD "${ARGN}")
+ string(REPLACE "COMMAND " "" CMD "${CMD}")
+ message(FATAL_ERROR "Process failed: ${CMD}")
+ endif()
+endfunction()
+
+function(cget_download)
+ file(DOWNLOAD ${ARGN} STATUS RESULT_LIST)
+ list(GET RESULT_LIST 0 RESULT)
+ list(GET RESULT_LIST 1 RESULT_MESSAGE)
+ if(NOT RESULT EQUAL 0)
+ message(FATAL_ERROR "Download failed: ${RESULT_MESSAGE}: ${ARGN}")
+ endif()
+endfunction()
+
+set(_cget_install_dir_count 0)
+set_property(GLOBAL PROPERTY _cget_install_dir_count 0)
+function(cget_install_dir DIR)
+ set(options)
+ set(oneValueArgs PREFIX BUILD_DIR)
+ set(multiValueArgs CMAKE_ARGS)
+
+ cget_parse_arguments(PARSE "${options}" "${oneValueArgs}" "${multiValueArgs}" ${ARGN})
+
+ if(PARSE_UNPARSED_ARGUMENTS)
+ message(FATAL_ERROR "Unknown keywords given to cget_install_dir(): \"${PARSE_UNPARSED_ARGUMENTS}\"")
+ endif()
+
+ if(NOT EXISTS ${DIR}/CMakeLists.txt)
+ message(FATAL_ERROR "cget_install_dir(): ${DIR} is not a cmake package")
+ endif()
+
+ set(PREFIX ${PARSE_PREFIX})
+ set(BUILD_DIR ${PARSE_BUILD_DIR})
+ if(NOT EXISTS ${BUILD_DIR})
+ file(MAKE_DIRECTORY ${BUILD_DIR})
+ endif()
+ cget_exec(COMMAND ${CMAKE_COMMAND}
+ -DCMAKE_PREFIX_PATH=${PREFIX}
+ -DCMAKE_INSTALL_PREFIX=${PREFIX}
+ ${PARSE_CMAKE_ARGS}
+ ${DIR}
+ WORKING_DIRECTORY ${BUILD_DIR}
+ )
+ set(CONFIG "Release")
+ foreach(ARG ${PARSE_CMAKE_ARGS})
+ string(TOUPPER ${ARG} ARG_UPPER)
+ if(ARG_UPPER MATCHES "-DCMAKE_BUILD_TYPE")
+ string(SUBSTRING ${ARG_UPPER} 19 -1 BUILD_TYPE)
+ if(BUILD_TYPE STREQUAL "DEBUG")
+ set(CONFIG "Debug")
+ endif()
+ endif()
+ endforeach()
+ set(BUILD_ARGS)
+ if(EXISTS ${BUILD_DIR}/Makefile)
+ ProcessorCount(N)
+ set(BUILD_ARGS -- -j ${N})
+ endif()
+ cget_exec(COMMAND ${CMAKE_COMMAND} --build ${BUILD_DIR} --config ${CONFIG} ${BUILD_ARGS})
+ cget_exec(COMMAND ${CMAKE_COMMAND} --build ${BUILD_DIR} --target install --config ${CONFIG} ${BUILD_ARGS})
+
+ get_property(_tmp_count GLOBAL PROPERTY _cget_install_dir_count)
+ math(EXPR _tmp_count "${_tmp_count} + 1")
+ set_property(GLOBAL PROPERTY _cget_install_dir_count ${_tmp_count})
+
+ file(REMOVE_RECURSE ${BUILD_DIR})
+endfunction()
+
+function(cget_parse_src_name URL VARIANT SRC)
+ if(SRC MATCHES "@")
+ string(REPLACE "@" ";" SRC_LIST ${SRC})
+ list(GET SRC_LIST 0 _URL)
+ list(GET SRC_LIST 1 _VARIANT)
+ set(${URL} ${_URL} PARENT_SCOPE)
+ set(${VARIANT} ${_VARIANT} PARENT_SCOPE)
+ else()
+ set(${URL} ${SRC} PARENT_SCOPE)
+ set(${VARIANT} ${ARGN} PARENT_SCOPE)
+ endif()
+endfunction()
+
+function(cget_find_recipe RECIPE_DIR SRC)
+ cget_parse_src_name(NAME VARIANT ${SRC})
+ foreach(RECIPE ${ARGN})
+ if(EXISTS ${RECIPE}/${NAME}/package.txt OR EXISTS ${RECIPE}/${NAME}/requirements.txt)
+ # TODO: Check variant
+ set(${RECIPE_DIR} ${RECIPE}/${NAME} PARENT_SCOPE)
+ break()
+ endif()
+ endforeach()
+endfunction()
+
+function(cget_validate_gh_src NAME)
+ if(NAME MATCHES "[^A-Za-z0-9_./@-]")
+ message(FATAL_ERROR "Not a valid name: ${NAME}")
+ endif()
+endfunction()
+
+function(cget_parse_pkg NAME URL PKG)
+ string(REPLACE "," ";" PKG_NAMES ${PKG})
+ list(GET PKG_NAMES -1 PKG_SRC)
+ list(GET PKG_NAMES 0 PKG_NAME)
+ set(${NAME} ${PKG_NAME} PARENT_SCOPE)
+ if(PKG_SRC MATCHES "://")
+ set(${URL} ${PKG_SRC} PARENT_SCOPE)
+ else()
+ get_filename_component(PKG_SRC_FULL ${PKG_SRC} ABSOLUTE)
+ if(EXISTS ${PKG_SRC_FULL})
+ set(${URL} file://${PKG_SRC_FULL} PARENT_SCOPE)
+ else()
+ # Parse recipe dir
+ cget_find_recipe(RECIPE_DIR ${PKG_SRC} ${ARGN})
+ if(EXISTS ${RECIPE_DIR})
+ set(${URL} recipe://${RECIPE_DIR} PARENT_SCOPE)
+ set(${NAME} ${PKG_SRC} ${PKG_NAME} PARENT_SCOPE)
+ else()
+ cget_validate_gh_src(${PKG_SRC})
+ # Parse github url
+ cget_parse_src_name(GH_NAME GH_BRANCH ${PKG_SRC} HEAD)
+ cget_validate_gh_src(${GH_NAME})
+ cget_validate_gh_src(${GH_BRANCH})
+ set(${NAME} ${GH_NAME} ${PKG_NAME} PARENT_SCOPE)
+ if(GH_NAME MATCHES "/")
+ set(${URL} "https://github.com/${GH_NAME}/archive/${GH_BRANCH}.tar.gz" PARENT_SCOPE)
+ else()
+ set(${URL} "https://github.com/${GH_NAME}/${GH_NAME}/archive/${GH_BRANCH}.tar.gz" PARENT_SCOPE)
+ endif()
+ endif()
+ endif()
+ endif()
+endfunction()
+
+function(cget_unpack FILENAME DIR)
+ file(MAKE_DIRECTORY ${DIR})
+ cget_exec(COMMAND ${CMAKE_COMMAND} -E tar xzf ${FILENAME}
+ WORKING_DIRECTORY ${DIR}
+ )
+endfunction()
+
+function(cget_fetch DIR DOWNLOAD_DIR URL)
+ if("${URL}" MATCHES "file://")
+ string(REPLACE "file://" "" LOCAL_PATH ${URL})
+ if(IS_DIRECTORY ${LOCAL_PATH})
+ file(COPY ${LOCAL_PATH} DESTINATION ${DOWNLOAD_DIR}/)
+ else()
+ cget_unpack(${LOCAL_PATH} ${DOWNLOAD_DIR})
+ endif()
+ else()
+ string(REPLACE "/" ";" PATH_LIST ${URL})
+
+ list(GET PATH_LIST -1 FILENAME)
+ message("Downloading ${URL}")
+ cget_download(${URL} ${DOWNLOAD_DIR}/${FILENAME} ${ARGN})
+ cget_unpack("${DOWNLOAD_DIR}/${FILENAME}" "${DOWNLOAD_DIR}")
+ file(REMOVE ${DOWNLOAD_DIR}/${FILENAME})
+ endif()
+ set(${DIR} ${DOWNLOAD_DIR} PARENT_SCOPE)
+ file(GLOB FILES LIST_DIRECTORIES true RELATIVE ${DOWNLOAD_DIR} ${DOWNLOAD_DIR}/*)
+ list(LENGTH FILES NFILES)
+ if(NFILES EQUAL 0)
+ # No files found, so this is an error
+ message(FATAL_ERROR "Failed to fetch: ${URL}")
+ elseif(NFILES EQUAL 1)
+ list(GET FILES 0 _DIR)
+ if(IS_DIRECTORY ${DOWNLOAD_DIR}/${_DIR})
+ # If there is just one directory, then adjust the path
+ set(${DIR} ${DOWNLOAD_DIR}/${_DIR} PARENT_SCOPE)
+ endif()
+ endif()
+endfunction()
+
+set(_cget_cmake_original_file "__cget_original_cmake_file__.cmake")
+function(cget_find_cmake FILE DIR)
+ if(EXISTS ${DIR}/CMakeLists.txt)
+ file(RENAME ${DIR}/CMakeLists.txt ${DIR}/${_cget_cmake_original_file})
+ endif()
+ get_filename_component(BASENAME ${FILE} NAME)
+ if(EXISTS ${FILE})
+ file(COPY ${FILE} DESTINATION ${DIR}/)
+ file(RENAME ${DIR}/${BASENAME} ${DIR}/CMakeLists.txt)
+ else()
+ string(REPLACE ".cmake" "" REMOTE_CMAKE ${BASENAME})
+ cget_download(https://raw.githubusercontent.com/pfultz2/cget/master/cget/cmake/${REMOTE_CMAKE}.cmake ${DIR}/CMakeLists.txt)
+ endif()
+endfunction()
+
+function(cget_check_pkg_install FOUND)
+ get_property(INSTALLED_PKGS GLOBAL PROPERTY CGET_INSTALLED_PACKAGES)
+ set(FOUND 0 PARENT_SCOPE)
+ foreach(NAME ${ARGN})
+ list(FIND INSTALLED_PKGS ${NAME} IDX)
+ if(NOT IDX EQUAL "-1")
+ set(FOUND 1 PARENT_SCOPE)
+ endif()
+ endforeach()
+endfunction()
+
+function(cget_add_pkg_install)
+ get_property(INSTALLED_PKGS GLOBAL PROPERTY CGET_INSTALLED_PACKAGES)
+ foreach(NAME ${ARGN})
+ list(FIND INSTALLED_PKGS ${NAME} IDX)
+ if(IDX EQUAL "-1")
+ set_property(GLOBAL APPEND PROPERTY CGET_INSTALLED_PACKAGES ${NAME})
+ endif()
+ endforeach()
+endfunction()
+
+function(cget_get_absolute_path VAR PATH FILENAME)
+ get_filename_component(FILE_DIR ${FILENAME} DIRECTORY)
+ if(NOT IS_ABSOLUTE "${PATH}")
+ set(${VAR} "${FILE_DIR}/${PATH}" PARENT_SCOPE)
+ else()
+ set(${VAR} "${PATH}" PARENT_SCOPE)
+ endif()
+endfunction()
+
+function(cmake_get PKG)
+if(BUILD_DEPS)
+ set(options NO_RECIPE)
+ set(oneValueArgs PREFIX HASH CMAKE_FILE)
+ set(multiValueArgs CMAKE_ARGS)
+
+ cget_parse_arguments(PARSE "${options}" "${oneValueArgs}" "${multiValueArgs}" ${ARGN})
+
+ if(PARSE_UNPARSED_ARGUMENTS)
+ message(FATAL_ERROR "Unknown keywords given to cmake_get(): \"${PARSE_UNPARSED_ARGUMENTS}\"")
+ endif()
+
+ if(PARSE_NO_RECIPE)
+ cget_parse_pkg(NAMES URL ${PKG})
+ else()
+ cget_parse_pkg(NAMES URL ${PKG} ${PARSE_PREFIX}/etc/cget/recipes)
+ endif()
+ cget_check_pkg_install(FOUND ${NAMES})
+ if(NOT FOUND)
+
+ if(URL MATCHES "recipe://")
+ string(REPLACE "recipe://" "" RECIPE ${URL})
+ if(EXISTS ${RECIPE}/requirements.txt)
+ cmake_get_from(${RECIPE}/requirements.txt PREFIX ${PARSE_PREFIX} CMAKE_ARGS ${PARSE_CMAKE_ARGS})
+ endif()
+ if(EXISTS ${RECIPE}/package.txt)
+ cmake_get_from(${RECIPE}/package.txt PREFIX ${PARSE_PREFIX} CMAKE_ARGS ${PARSE_CMAKE_ARGS} NO_RECIPE)
+ endif()
+ else()
+ cget_mktemp_dir(TMP_DIR)
+
+ if(PREFIX_HASH)
+ string(TOUPPER ${PREFIX_HASH} _HASH)
+ string(REPLACE ":" "=" _HASH ${_HASH})
+ set(HASH EXPECTED_HASH ${_HASH})
+ endif()
+
+ cget_fetch(DIR ${TMP_DIR}/download ${URL} ${HASH} SHOW_PROGRESS)
+ if(EXISTS ${DIR}/requirements.txt)
+ cmake_get_from(${DIR}/requirements.txt ${BASE_DIR_ARG} PREFIX ${PARSE_PREFIX} CMAKE_ARGS ${PARSE_CMAKE_ARGS})
+ endif()
+ if(PARSE_CMAKE_FILE)
+ cget_find_cmake(${PARSE_CMAKE_FILE} ${DIR})
+ endif()
+ cget_install_dir(${DIR} BUILD_DIR ${TMP_DIR}/build PREFIX ${PARSE_PREFIX} CMAKE_ARGS -DCGET_CMAKE_ORIGINAL_SOURCE_FILE=${DIR}/${_cget_cmake_original_file} ${PARSE_CMAKE_ARGS})
+
+ file(REMOVE_RECURSE ${TMP_DIR})
+ endif()
+ cget_add_pkg_install(${NAMES})
+ endif()
+endif()
+endfunction()
+
+set(_cmake_get_configure_reqs 0)
+function(cmake_get_from FILENAME)
+if(BUILD_DEPS)
+ set(options NO_RECIPE)
+ set(oneValueArgs PREFIX)
+ set(multiValueArgs CMAKE_ARGS)
+ cget_parse_arguments(PARSE "${options}" "${oneValueArgs}" "${multiValueArgs}" ${ARGN})
+
+ if(NOT EXISTS ${FILENAME})
+ message(FATAL_ERROR "File ${FILENAME} does not exists")
+ endif()
+
+ if(PARSE_UNPARSED_ARGUMENTS)
+ message(FATAL_ERROR "Unknown keywords given to cmake_get_from(): \"${PARSE_UNPARSED_ARGUMENTS}\"")
+ endif()
+
+ file(STRINGS ${FILENAME} LINES)
+ foreach(LINE ${LINES})
+ separate_arguments(WORDS UNIX_COMMAND "${LINE}")
+ set(REQ)
+ foreach(WORD ${WORDS})
+ if(WORD MATCHES "^#")
+ break()
+ endif()
+ list(APPEND REQ ${WORD})
+ endforeach()
+ list(LENGTH REQ REQ_LEN)
+ if(REQ_LEN GREATER 0)
+ cget_parse_requirement(PARSE_REQ ${REQ})
+ if(_cmake_get_configure_reqs)
+ string(CONFIGURE ${PARSE_REQ_PKG} PARSE_REQ_PKG @ONLY)
+ endif()
+ if(PARSE_NO_RECIPE)
+ set(NO_RECIPE "NO_RECIPE")
+ endif()
+ set(REQ_CMAKE)
+ if(PARSE_REQ_CMAKE)
+ cget_get_absolute_path(REQ_CMAKE ${PARSE_REQ_CMAKE} ${FILENAME})
+ endif()
+ if(PARSE_REQ_FILE)
+ cget_get_absolute_path(REQ_FILE ${PARSE_REQ_FILE} ${FILENAME})
+ cmake_get_from(${REQ_FILE}
+ ${NO_RECIPE}
+ PREFIX ${PARSE_PREFIX}
+ CMAKE_ARGS ${PARSE_CMAKE_ARGS} ${PARSE_REQ_CMAKE_ARGS}
+ )
+ else()
+ cget_get_absolute_path(REQ_PKG ${PARSE_REQ_PKG} ${FILENAME})
+ if(NOT EXISTS ${REQ_PKG})
+ set(REQ_PKG ${PARSE_REQ_PKG})
+ endif()
+ cmake_get(${REQ_PKG}
+ ${NO_RECIPE}
+ PREFIX ${PARSE_PREFIX}
+ HASH ${PARSE_REQ_HASH}
+ CMAKE_FILE ${REQ_CMAKE}
+ CMAKE_ARGS ${PARSE_CMAKE_ARGS} ${PARSE_REQ_CMAKE_ARGS}
+ )
+ endif()
+ endif()
+ endforeach()
+endif()
+endfunction()
+
=====================================
CMakeLists.txt
=====================================
@@ -1,5 +1,5 @@
cmake_minimum_required(VERSION 3.2)
-project(minimac4 VERSION 4.1.2)
+project(minimac4 VERSION 4.1.4)
if(NOT CMAKE_BUILD_TYPE)
set(CMAKE_BUILD_TYPE "Release" CACHE STRING "Choose build type (Debug|Release|RelWithDebInfo|MinSizeRel)" FORCE)
@@ -32,6 +32,9 @@ add_executable(minimac4
target_link_libraries(minimac4 savvy ${OMPP_LIBRARY} Threads::Threads)
+add_custom_target(manuals
+ COMMAND help2man --output "${CMAKE_BINARY_DIR}/minimac4.1" "${CMAKE_BINARY_DIR}/minimac4")
+
if(BUILD_TESTS)
enable_testing()
add_test(simple_test ${CMAKE_SOURCE_DIR}/test/simple-test.sh ${CMAKE_BINARY_DIR}/minimac4 m4_simple_test_output ${CMAKE_SOURCE_DIR}/test/input/ref_panel.vcf ${CMAKE_SOURCE_DIR}/test/input/tar_panel.vcf)
@@ -39,6 +42,9 @@ if(BUILD_TESTS)
endif()
install(TARGETS minimac4 RUNTIME DESTINATION bin)
+install(FILES ${CMAKE_CURRENT_BINARY_DIR}/minimac4.1
+ DESTINATION share/man/man1
+ OPTIONAL)
set(CPACK_PACKAGE_VERSION_MAJOR ${PROJECT_VERSION_MAJOR})
set(CPACK_PACKAGE_VERSION_MINOR ${PROJECT_VERSION_MINOR})
=====================================
README.md
=====================================
@@ -4,18 +4,19 @@ Minimac4 is a lower memory and more computationally efficient
implementation of the genotype imputation algorithms in
minimac/mininac2/minimac3.
-<<< SEE http://genome.sph.umich.edu/wiki/Minimac4 FOR DOCUMENTATION >>>
+## Installation
+A prebuilt Linux x86 executable is available at https://github.com/statgen/Minimac4/releases/latest by downloading and running minimac4-{version}-Linux-x86_64.sh. Alternativbely, you can build from source.
-## Prerequisites
-Automatic installation of Minimac4 requires [cget](http://cget.readthedocs.io/en/latest/src/intro.html#installing-cget) and cmake >= v3.2.
+## Prerequisites for building from source
+Minimac4 requires GCC >= v5 or a compiler with full C++11 support. Automatic installation of Minimac4 requires [cget](http://cget.readthedocs.io/en/latest/src/intro.html#installing-cget) and CMake >= v3.2.
-## Installation
-The easiest way to install Minimac4 and its dependencies is to use cget:
+## Building from source
+The simplest way to build the latest Minimac4 and its dependencies is to use cget:
```bash
cget install --prefix <install_prefix> statgen/Minimac4
```
-Alternatively, you can install manually:
+Alternatively, you can build manually:
```bash
cd Minimac4
cget install -f ./requirements.txt # Install dependencies locally.
@@ -33,6 +34,15 @@ make
make CTEST_OUTPUT_ON_FAILURE=1 test
```
+Since some users have reported issues with installing cget with pip, a cmake-only alternative for installing dependencies is available:
+```shell
+cmake -P dependencies.cmake deps/
+mkdir build; cd build
+cmake -DCMAKE_PREFIX_PATH=$(pwd)/../deps/ -DCMAKE_CXX_FLAGS="-I$(pwd)/../deps/include" ..
+make
+make install
+```
+
## Usage
See `minimac4 --help` for detailed usage.
@@ -42,7 +52,7 @@ A typical Minimac4 command line for imputation is as follows
minimac4 reference.msav target.vcf.gz > imputed.sav
```
-Here reference.msav is a reference panel (e.g. 1000 Genomes) compressed with MVCF encoding,
+Here reference.msav is a reference panel (e.g. [1000 Genomes download](ftp://share.sph.umich.edu/minimac4/panels/g1k_p3_msav_files_with_estimates.tar.gz)) compressed with MVCF encoding,
target.vcf.gz is an indexed VCF containing phased genotype array data,
and imputed.sav is the imputed output.
@@ -61,3 +71,15 @@ Meta-imputation with MetaMinimac2 requires `--empirical-output` (or `-e`) to be
```bash
minimac4 reference.msav target.bcf -o imputed.dose.sav -e imputed.empirical_dose.sav
```
+
+## Reference Panel Creation
+If an M3VCF file is already available, it can be converted to the new MVCF format with:
+```
+minimac4 --update-m3vcf reference.m3vcf.gz > reference.msav
+```
+
+Otherwise, phased VCFs containing the reference haplotypes can be compressed into an MVCF with:
+```
+minimac4 --compress-reference reference.{sav,bcf,vcf.gz} > reference.msav
+```
+
=====================================
dep/libstatgen.cmake deleted
=====================================
@@ -1,15 +0,0 @@
-cmake_minimum_required(VERSION 3.2)
-project(libStatGen VERSION 1.0.0)
-
-#execute_process(COMMAND ./configure --prefix=${CMAKE_INSTALL_PREFIX} WORKING_DIRECTORY ${CMAKE_CURRENT_SOURCE_DIR})
-
-add_custom_target(libStatGen ALL COMMAND ${CMAKE_COMMAND} -E env CPATH=${CGET_PREFIX}/include make WORKING_DIRECTORY ${CMAKE_CURRENT_SOURCE_DIR} COMMENT "Builing libStatGen ...")
-
-file(GLOB_RECURSE LSG_HEADER_LIST "bam/*.h" "fastq/*.h" "general/*.h" "glf/*.h" "samtools/*.h" "vcf/*.h")
-install(FILES ${LSG_HEADER_LIST} DESTINATION include)
-
-if (BUILD_SHARED_LIBS)
- install(FILES ${CMAKE_SHARED_LIBRARY_PREFIX}StatGen${CMAKE_SHARED_LIBRARY_SUFFIX} DESTINATION lib)
-else()
- install(FILES ${CMAKE_STATIC_LIBRARY_PREFIX}StatGen${CMAKE_STATIC_LIBRARY_SUFFIX} DESTINATION lib)
-endif()
=====================================
dependencies.cmake
=====================================
@@ -0,0 +1,17 @@
+#!/usr/bin/env cmake -P
+
+set(ARGS)
+foreach(i RANGE 4 ${CMAKE_ARGC})
+ list(APPEND ARGS ${CMAKE_ARGV${i}})
+endforeach()
+
+set(_PREFIX ${CMAKE_ARGV3})
+
+# Make sure this is in the module path
+list(APPEND CMAKE_MODULE_PATH ${CMAKEGET_MODULE_PATH})
+include(${CMAKE_CURRENT_LIST_DIR}/CMakeGet.cmake)
+
+get_filename_component(PREFIX ${_PREFIX} ABSOLUTE)
+# Install recipes
+#cmake_get(pfultz2/cget-recipes PREFIX ${PREFIX} CMAKE_ARGS ${ARGS})
+cmake_get_from(${CMAKE_CURRENT_LIST_DIR}/requirements.txt PREFIX ${PREFIX} CMAKE_ARGS ${ARGS})
=====================================
install.sh deleted
=====================================
@@ -1,12 +0,0 @@
-#!/bin/sh
-
-rm -rf cget/ release-build/ install.log
-echo -e "Installing Dependencies - Libstatgen ..."
-cget install -f requirements.txt &> install.log
-mkdir release-build
-cd release-build/
-echo -e "Generating MakeFiles ..."
-cmake -DCMAKE_TOOLCHAIN_FILE=../cget/cget/cget.cmake -DCMAKE_BUILD_TYPE=Release ..
-make
-echo "Binary created at /release-build/minimac4"
-
=====================================
package-linux.sh
=====================================
@@ -1,10 +1,10 @@
#!/bin/bash
-# Usage: docker build -t packaging-ubuntu20 -f packaging-dockerfile-ubuntu20 .
-# docker run -v `pwd`:/app -w /app packaging-ubuntu20 ./package-linux.sh
+# Usage: docker build -t packaging-alpine3.17 -f packaging-dockerfile-alpine3.17 .
+# docker run -v `pwd`:/app -w /app packaging-alpine3.17 ./package-linux.sh
set -euo pipefail
src_dir=`pwd`
-build_dir=${src_dir}/pkg-build
+build_dir=${src_dir}/pkg-build-alpine
rm -rf ${build_dir}
mkdir ${build_dir}
@@ -12,28 +12,33 @@ cd ${build_dir}
export CFLAGS="-fPIC"
export CXXFLAGS="-fPIC"
-cget ignore xz
-cget install zlib,http://zlib.net/fossils/zlib-1.2.11.tar.gz
-rm cget/lib/libz.so
-cget install -f ${src_dir}/requirements.txt
+
+cmake -P ../dependencies.cmake deps/ -DSHRINKWRAP_PREFER_STATIC=ON
+rm deps/lib/libz.so
unset CFLAGS
unset CXXFLAGS
+arc=`uname -m`
cmake \
-DCMAKE_BUILD_TYPE=Release \
- -DCMAKE_TOOLCHAIN_FILE=cget/cget/cget.cmake \
+ -DCMAKE_PREFIX_PATH=$(pwd)/deps \
+ -DCMAKE_CXX_FLAGS="-I$(pwd)/deps/include" \
-DCMAKE_BUILD_WITH_INSTALL_RPATH=ON \
- -DCMAKE_EXE_LINKER_FLAGS="-static -static-libgcc -static-libstdc++" \
- -DCPACK_GENERATOR="STGZ;DEB;RPM" \
+ -DCMAKE_EXE_LINKER_FLAGS="-static" \
+ -DCPACK_GENERATOR="STGZ" \
+ -DCPACK_SYSTEM_NAME="Linux-${arc}" \
+ -DCPACK_RESOURCE_FILE_LICENSE=${src_dir}/LICENSE \
-DCPACK_PACKAGE_CONTACT="csg-devel at umich.edu" \
${src_dir}
+#-DCMAKE_EXE_LINKER_FLAGS="-static -static-libgcc -static-libstdc++"
+
make
-#make manuals
+make manuals
make package
mkdir -p ${src_dir}/pkg/
-cp minimac4-*.{sh,deb,rpm} ${src_dir}/pkg/
+cp minimac4-*.sh ${src_dir}/pkg/
cd ${src_dir}
rm -r ${build_dir}
=====================================
packaging-dockerfile-ubuntu20 → packaging-dockerfile-alpine3.17
=====================================
@@ -1,17 +1,9 @@
-FROM ubuntu:20.04
+FROM alpine:3.17
ENV TZ=Etc/UTC
RUN ln -snf /usr/share/zoneinfo/$TZ /etc/localtime && echo $TZ > /etc/timezone
-RUN apt-get update && apt-get install -y \
- build-essential \
+RUN apk update && apk add --no-cache binutils bash cmake make libgcc musl-dev gcc g++ \
curl \
- cmake \
git \
- help2man \
- lsb-release \
- python3 \
- python3-pip \
- rpm
-
-RUN pip install cget
+ help2man
=====================================
src/dosage_writer.cpp
=====================================
@@ -58,7 +58,7 @@ std::vector<std::pair<std::string, std::string>> dosage_writer::gen_headers(cons
{"source", "Minimac v" + std::string(VERSION)},
{"phasing", "full"},
{"contig", "<ID=" + std::string(chromosome) + ">"},
- {"INFO", "<ID=AF,Number=1,Type=Float,Description=\"Estimated Alternate Allele Frequency\">"},
+ {"INFO", "<ID=AF,Number=A,Type=Float,Description=\"Estimated Alternate Allele Frequency\">"},
{"INFO", "<ID=MAF,Number=1,Type=Float,Description=\"Estimated Minor Allele Frequency\">"},
{"INFO", "<ID=AVG_CS,Number=1,Type=Float,Description=\"Average Call Score\">"},
{"INFO", "<ID=R2,Number=1,Type=Float,Description=\"Estimated Imputation Accuracy (R-square)\">"},
@@ -73,11 +73,11 @@ std::vector<std::pair<std::string, std::string>> dosage_writer::gen_headers(cons
if (f == "GT")
headers.emplace_back("FORMAT", "<ID=GT,Number=1,Type=String,Description=\"Genotype\">");
else if (f == "DS")
- headers.emplace_back("FORMAT", "<ID=DS,Number=1,Type=Float,Description=\"Estimated Alternate Allele Dosage : [P(0/1)+2*P(1/1)]\">");
+ headers.emplace_back("FORMAT", "<ID=DS,Number=A,Type=Float,Description=\"Estimated Alternate Allele Dosage : [P(0/1)+2*P(1/1)]\">");
else if (f == "HDS")
- headers.emplace_back("FORMAT", "<ID=HDS,Number=2,Type=Float,Description=\"Estimated Haploid Alternate Allele Dosage \">");
+ headers.emplace_back("FORMAT", "<ID=HDS,Number=.,Type=Float,Description=\"Estimated Haploid Alternate Allele Dosage \">");
else if (f == "GP")
- headers.emplace_back("FORMAT", "<ID=GP,Number=3,Type=Float,Description=\"Estimated Posterior Probabilities for Genotypes 0/0, 0/1 and 1/1 \">");
+ headers.emplace_back("FORMAT", "<ID=GP,Number=G,Type=Float,Description=\"Estimated Posterior Probabilities for Genotypes 0/0, 0/1 and 1/1 \">");
else if (f == "SD")
headers.emplace_back("FORMAT", "<ID=SD,Number=1,Type=Float,Description=\"Variance of Posterior Genotype Probabilities\">");
}
@@ -103,7 +103,7 @@ std::vector<std::pair<std::string, std::string>> dosage_writer::gen_emp_headers(
{"INFO", "<ID=IMPUTED,Number=0,Type=Flag,Description=\"Marker was imputed\">"},
{"INFO", "<ID=TYPED,Number=0,Type=Flag,Description=\"Marker was genotyped\">"},
{"FORMAT","<ID=GT,Number=1,Type=String,Description=\"Genotyped alleles from Array\">"},
- {"FORMAT","<ID=LDS,Number=2,Type=Float,Description=\"Leave-one-out Imputed Dosage : Estimated Haploid Alternate Allele Dosage assuming site was NOT genotyped\">"}};
+ {"FORMAT","<ID=LDS,Number=.,Type=Float,Description=\"Leave-one-out Imputed Dosage : Estimated Haploid Alternate Allele Dosage assuming site was NOT genotyped\">"}};
// TODO: command string
@@ -112,21 +112,30 @@ std::vector<std::pair<std::string, std::string>> dosage_writer::gen_emp_headers(
savvy::file::format dosage_writer::format_from_filename(const std::string& filename, savvy::file::format default_format)
{
- if (filename.size() >= 3)
+ if (filename.size() >= 4)
{
- const std::string ext = filename.substr(filename.size() - 3);
- if (ext == "sav")
+ const std::string ext = filename.substr(filename.size() - 4);
+ if (ext == ".sav")
return savvy::file::format::sav;
- else if (ext == "bcf")
+ else if (ext == ".bcf")
return savvy::file::format::bcf;
- else if (ext == "vcf")
+ else if (ext == ".vcf")
return savvy::file::format::vcf;
}
- if (filename.size() >= 6)
+ if (filename.size() >= 5)
+ {
+ const std::string ext = filename.substr(filename.size() - 5);
+ if (ext == ".usav")
+ return savvy::file::format::sav;
+ else if (ext == ".ubcf")
+ return savvy::file::format::bcf;
+ }
+
+ if (filename.size() >= 7)
{
- const std::string ext = filename.substr(filename.size() - 6);
- if (ext == "vcf.gz")
+ const std::string ext = filename.substr(filename.size() - 7);
+ if (ext == ".vcf.gz")
return savvy::file::format::vcf;
}
@@ -138,12 +147,30 @@ int dosage_writer::clevel_from_filename(const std::string& filename, int default
if (filename.size() >= 4)
{
const std::string ext = filename.substr(filename.size() - 4);
- if (ext == ".sav" || ext == ".bcf" || ext == "f.gz")
+ if (ext == ".sav")
+ return 6;
+ else if (ext == ".bcf")
return 6;
- else if (ext == "usav" || ext == "ubcf" || ext == ".vcf")
+ else if (ext == ".vcf")
+ return 0;
+ }
+
+ if (filename.size() >= 5)
+ {
+ const std::string ext = filename.substr(filename.size() - 5);
+ if (ext == ".usav")
+ return 0;
+ else if (ext == ".ubcf")
return 0;
}
+ if (filename.size() >= 7)
+ {
+ const std::string ext = filename.substr(filename.size() - 7);
+ if (ext == ".vcf.gz")
+ return 6;
+ }
+
return default_clevel;
}
@@ -181,6 +208,7 @@ bool dosage_writer::merge_temp_files(std::list<savvy::reader>& temp_files, std::
std::vector<float> partial_lds;
std::vector<std::int8_t> pasted_gt;
std::vector<std::int8_t> partial_gt;
+ std::size_t max_ploidy = 0;
int good_count = temp_files.size();
while (good_count == temp_files.size())
@@ -194,8 +222,16 @@ bool dosage_writer::merge_temp_files(std::list<savvy::reader>& temp_files, std::
good_count = 0;
for (auto it = temp_files.begin(); it != temp_files.end(); ++it)
{
- good_count += (int)it->read(out_var).good();
+ bool read_succeeded = it->read(out_var).good();
+ good_count += (int)read_succeeded;
out_var.get_format("HDS", partial_hds);
+
+ // verify max ploidy is consistent across all temp files
+ if (max_ploidy == 0)
+ max_ploidy = partial_hds.size() / it->samples().size();
+ if (read_succeeded && max_ploidy != (partial_hds.size() / it->samples().size()))
+ return std::cerr << "Error: max ploidy is not consistent across temp files. This should never happen. Please report." << std::endl, false;
+
std::size_t old_size = pasted_hds.size();
pasted_hds.resize(old_size + partial_hds.size());
for (auto jt = partial_hds.begin(); jt != partial_hds.end(); ++jt)
@@ -292,6 +328,9 @@ bool dosage_writer::merge_temp_files(std::list<savvy::reader>& temp_files, std::
if (good_count_emp < temp_emp_files.size())
return std::cerr << "Error: record mismatch in empirical temp files" << std::endl, false;
+ if (pasted_hds.size() != pasted_gt.size() || pasted_lds.size() != pasted_gt.size())
+ return std::cerr << "Error: Merged HDS, LDS, and GT are not consistent. This should never happen. Please report." << std::endl, false;
+
out_var_emp.set_format("GT", pasted_gt);
out_var_emp.set_format("LDS", pasted_lds);
emp_out_file_->write(out_var_emp);
=====================================
src/hidden_markov_model.cpp
=====================================
@@ -8,11 +8,12 @@
constexpr float hidden_markov_model::jump_fix;
constexpr float hidden_markov_model::jump_threshold;
-hidden_markov_model::hidden_markov_model(float s3_prob_threshold, float s1_prob_threshold, float diff_threshold, float background_error) :
+hidden_markov_model::hidden_markov_model(float s3_prob_threshold, float s1_prob_threshold, float diff_threshold, float background_error, float decay) :
prob_threshold_(s3_prob_threshold),
s1_prob_threshold_(s1_prob_threshold),
diff_threshold_(diff_threshold),
- background_error_(background_error)
+ background_error_(background_error),
+ decay_(decay)
{
}
@@ -466,7 +467,7 @@ void hidden_markov_model::impute(double& prob_sum, std::size_t& prev_best_typed_
assert(row == 0 || tar_variants[row].pos >= tar_variants[row - 1].pos);
std::size_t mid_point = row == 0 ? 1 : std::max<std::int32_t>(1, std::int32_t(tar_variants[row].pos) - std::int32_t(tar_variants[row].pos - tar_variants[row - 1].pos) / 2);
if (full_ref_ritr == full_ref_rend || full_ref_ritr->pos < mid_point) // TODO: stop traverse_backward at beginning of region.
- return;
+ return; // We are outside the impute region (in the extended region)
float typed_dose = std::numeric_limits<float>::quiet_NaN();
float typed_loo_dose = std::numeric_limits<float>::quiet_NaN();
@@ -544,19 +545,19 @@ void hidden_markov_model::impute(double& prob_sum, std::size_t& prev_best_typed_
an += card;
}
- if (p_alt > 1. || p_alt < 0.)
- {
- auto a = 0;
- }
-
assert(full_ref_ritr->ac >= ac);
assert(n_templates >= an);
if (n_templates - an > 0)
p_alt += (1. - best_sum) * (double(full_ref_ritr->ac - ac) / double(n_templates - an));
- if (p_alt > 1.0f)
+ if (decay_ > 0. && (row == 0 || row + 1 == tar_variants.size()))
{
- auto a = 0;
+ // Either the first or last typed site, so decay.
+
+ // We are estimating cM with physical distance for now. TODO: add tar_variants[row].cm
+ double cm_estimate = std::abs(std::int64_t(full_ref_ritr->pos) - std::int64_t(tar_variants[row].pos)) / 1e6;
+ double decay = recombination::cm_to_switch_prob(cm_estimate, decay_);
+ p_alt = p_alt * (1. - decay) + (full_ref_ritr->ac / double(n_templates)) * decay;
}
p_alt = std::max(0., std::min(1., p_alt));
=====================================
src/hidden_markov_model.hpp
=====================================
@@ -20,6 +20,15 @@ public:
loo_dosages_.resize(n_loo_rows, std::vector<float>(n_columns, savvy::typed_value::end_of_vector_value<float>()));
}
+ void fill_eov()
+ {
+ for (auto& v : dosages_)
+ std::fill(v.begin(), v.end(),savvy::typed_value::end_of_vector_value<float>());
+
+ for (auto& v : loo_dosages_)
+ std::fill(v.begin(), v.end(),savvy::typed_value::end_of_vector_value<float>());
+ }
+
std::array<std::size_t, 2> dimensions() const { return {dosages_.size(), dosages_.empty() ? 0 : dosages_[0].size()}; }
std::array<std::size_t, 2> dimensions_loo() const { return {dosages_.size(), dosages_.empty() ? 0 : dosages_[0].size()}; }
@@ -40,6 +49,7 @@ private:
float s1_prob_threshold_ = -1.f;
float diff_threshold_ = 0.01f;
float background_error_ = 1e-5f;
+ double decay_ = 0.;
static constexpr float jump_fix = 1e15f;
static constexpr float jump_threshold = 1e-10f;
const std::int16_t bin_scalar_ = 1000;
@@ -54,7 +64,7 @@ private:
std::vector<float> best_s3_probs_;
public:
- hidden_markov_model(float s3_prob_threshold, float s1_prob_threshold, float diff_threshold, float background_error);
+ hidden_markov_model(float s3_prob_threshold, float s1_prob_threshold, float diff_threshold, float background_error, float decay);
void traverse_forward(const std::deque<unique_haplotype_block>& ref_haps,
const std::vector<target_variant>& tar_variant,
=====================================
src/input_prep.cpp
=====================================
@@ -680,8 +680,8 @@ bool convert_old_m3vcf(const std::string& input_path, const std::string& output_
headers.insert(headers.begin(), {"fileformat","VCFv4.2"});
if (!phasing_header_present)
headers.emplace_back("phasing","full");
- headers.emplace_back("INFO", "<ID=AC,Number=1,Type=Integer,Description=\"Total number of alternate alleles in called genotypes\">");
- headers.emplace_back("INFO", "<ID=AN,Number=1,Type=Float,Description=\"Total number of alleles in called genotypes\">");
+ headers.emplace_back("INFO", "<ID=AC,Number=A,Type=Integer,Description=\"Total number of alternate alleles in called genotypes\">");
+ headers.emplace_back("INFO", "<ID=AN,Number=1,Type=Integer,Description=\"Total number of alleles in called genotypes\">");
headers.emplace_back("INFO","<ID=REPS,Number=1,Type=Integer,Description=\"Number of distinct haplotypes in block\">");
headers.emplace_back("INFO","<ID=VARIANTS,Number=1,Type=Integer,Description=\"Number of variants in block\">");
headers.emplace_back("INFO","<ID=ERR,Number=1,Type=Float,Description=\"Error parameter for HMM\">");
@@ -760,8 +760,8 @@ bool compress_reference_panel(const std::string& input_path, const std::string&
{"subfileformat","MVCFv3.0"},
{"fileformat","VCFv4.2"},
{"phasing","full"},
- {"INFO", "<ID=AC,Number=1,Type=Integer,Description=\"Total number of alternate alleles in called genotypes\">"},
- {"INFO", "<ID=AN,Number=1,Type=Float,Description=\"Total number of alleles in called genotypes\">"},
+ {"INFO", "<ID=AC,Number=A,Type=Integer,Description=\"Total number of alternate alleles in called genotypes\">"},
+ {"INFO", "<ID=AN,Number=1,Type=Integer,Description=\"Total number of alleles in called genotypes\">"},
{"INFO","<ID=REPS,Number=1,Type=Integer,Description=\"Number of distinct haplotypes in block\">"},
{"INFO","<ID=VARIANTS,Number=1,Type=Integer,Description=\"Number of variants in block\">"},
{"INFO","<ID=ERR,Number=1,Type=Float,Description=\"Error parameter for HMM\">"},
=====================================
src/main.cpp
=====================================
@@ -107,7 +107,7 @@ public:
std::cerr << "Running HMM with " << args.threads() << " threads ..." << std::endl;
start_time = std::time(nullptr);
- std::vector<hidden_markov_model> hmms(tpool.thread_count(), {args.prob_threshold(), args.prob_threshold_s1(), args.diff_threshold(), 1e-5f});
+ std::vector<hidden_markov_model> hmms(tpool.thread_count(), {args.prob_threshold(), args.prob_threshold_s1(), args.diff_threshold(), 1e-5f, args.decay()});
std::size_t ploidy = target_sites[0].gt.size() / sample_ids.size();
std::size_t haplotype_buffer_size = args.temp_buffer() * ploidy;
@@ -126,6 +126,9 @@ public:
hmm_results.loo_dosages_[j].resize(group_size);
}
+ if (i > 0)
+ hmm_results.fill_eov();
+
start_time = std::time(nullptr);
omp::parallel_for_exp(
omp::static_schedule(), omp::sequence_iterator(i), omp::sequence_iterator(i + group_size), [&](int& i, const omp::iteration_context& ctx)
@@ -146,14 +149,14 @@ public:
if (target_sites[0].gt.size() > haplotype_buffer_size)
{
std::string out_emp_path;
- std::string out_path = "/tmp/m4_" + std::to_string(i / haplotype_buffer_size) + "_XXXXXX";
+ std::string out_path = args.temp_prefix() + std::to_string(i / haplotype_buffer_size) + "_XXXXXX";
tmp_fd = mkstemp(&out_path[0]);
if (tmp_fd < 0)
return std::cerr << "Error: could not open temp file (" << out_path << ")" << std::endl, false;
if (args.emp_out_path().size())
{
- out_emp_path = "/tmp/m4_" + std::to_string(i / haplotype_buffer_size) + "_emp_XXXXXX";
+ out_emp_path = args.temp_prefix() + std::to_string(i / haplotype_buffer_size) + "_emp_XXXXXX";
tmp_emp_fd = mkstemp(&out_emp_path[0]);
if (tmp_emp_fd < 0)
return std::cerr << "Error: could not open temp file (" << out_emp_path << ")" << std::endl, false;
=====================================
src/prog_args.hpp
=====================================
@@ -9,6 +9,7 @@
#include <vector>
#include <unordered_set>
#include <cstdint>
+#include <cstdlib>
#include <fstream>
#include <iterator>
@@ -19,6 +20,7 @@ private:
std::string tar_path_;
std::string map_path_;
std::string out_path_ = "/dev/stdout";
+ std::string temp_prefix_;
std::string prefix_; // deprecated
std::string emp_out_path_;
std::string sites_out_path_;
@@ -31,6 +33,7 @@ private:
std::int64_t chunk_size_ = 20000000;
std::int64_t overlap_ = 3000000;
std::int16_t threads_ = 1;
+ float decay_ = 0.f;
float min_r2_ = -1.f;
float min_ratio_ = 1e-5f;
float prob_threshold_ = 0.01f;
@@ -56,6 +59,7 @@ public:
const std::string& out_path() const { return out_path_; }
const std::string& emp_out_path() const { return emp_out_path_; }
const std::string& sites_out_path() const { return sites_out_path_; }
+ const std::string& temp_prefix() const { return temp_prefix_; }
savvy::file::format out_format() const { return out_format_; }
std::uint8_t out_compression() const { return out_compression_; }
const std::vector<std::string>& fmt_fields() const { return fmt_fields_; }
@@ -65,6 +69,7 @@ public:
std::int64_t overlap() const { return overlap_; }
std::int16_t threads() const { return threads_; }
std::size_t temp_buffer() const { return temp_buffer_ ; }
+ float decay() const { return decay_; }
float min_r2() const { return min_r2_; }
float min_ratio() const { return min_ratio_; }
float prob_threshold() const { return prob_threshold_; }
@@ -79,7 +84,9 @@ public:
prog_args() :
getopt_wrapper(
- "Usage: minimac4 [opts ...] <reference.msav> <target.{sav,bcf,vcf.gz}>",
+ "Usage: minimac4 [opts ...] <reference.msav> <target.{sav,bcf,vcf.gz}>\n"
+ " minimac4 [opts ...] --update-m3vcf <reference.m3vcf.gz>\n"
+ " minimac4 [opts ...] --compress-reference <reference.{sav,bcf,vcf.gz}>",
{
{"all-typed-sites", no_argument, 0, 'a', "Include in the output sites that exist only in target VCF"},
{"temp-buffer", required_argument, 0, 'b', "Number of samples to impute before writing to temporary files (default: 200)"},
@@ -89,24 +96,26 @@ public:
{"format", required_argument, 0, 'f', "Comma-separated list of format fields to generate (GT, HDS, DS, GP, or SD; default: HDS)"},
{"map", required_argument, 0, 'm', "Genetic map file"},
{"output", required_argument, 0, 'o', "Output path (default: /dev/stdout)"},
- {"output-format", required_argument, 0, 'O', "Output file format (bcf, sav, vcf.gz, ubcf, usav, or vcf; default: bcf)"},
+ {"output-format", required_argument, 0, 'O', "Default output file format used for ambiguous filenames (bcf, sav, vcf.gz, ubcf, usav, or vcf; default: sav)"},
//{"pass-only", no_argument, 0, 'p', "Only imports variants with FILTER column set to PASS"},
{"region", required_argument, 0, 'r', "Genomic region to impute"},
{"sites", required_argument, 0, 's', "Output path for sites-only file"},
{"threads", required_argument, 0, 't', "Number of threads (default: 1)"},
{"version", no_argument, 0, 'v', "Print version"},
{"overlap", required_argument, 0, 'w', "Size (in base pairs) of overlap before and after impute region to use as input to HMM (default: 3000000)"},
+ {"decay", required_argument, 0, '\x02', "Decay rate for dosages in flanking regions (default: disabled with 0)"},
{"min-r2", required_argument, 0, '\x02', "Minimum estimated r-square for output variants"},
{"min-ratio", required_argument, 0, '\x02', "Minimum ratio of number of target sites to reference sites (default: 0.00001)"},
{"match-error", required_argument, 0, '\x02', "Error parameter for HMM match probabilities (default: 0.01)"},
{"min-recom", required_argument, 0, '\x02', "Minimum recombination probability (default: 0.00001)"},
{"prob-threshold", required_argument, 0, '\x02', "Probability threshold used for template selection"},
- {"prob-threshold-s1", required_argument, 0, '\x02', ""}, // "Probability threshold used for template selection in original state space"},
+ {"prob-threshold-s1", required_argument, 0, '\x02', "Probability threshold used for template selection in original state space"},
{"diff-threshold", required_argument, 0, '\x02', "Probability diff threshold used in template selection"},
{"sample-ids", required_argument, 0, '\x02', "Comma-separated list of sample IDs to subset from reference panel"},
{"sample-ids-file", required_argument, 0, '\x02', "Text file containing sample IDs to subset from reference panel (one ID per line)"},
- {"update-m3vcf", no_argument, 0, '\x01', nullptr},
- {"compress-reference", no_argument, 0, '\x01', nullptr},
+ {"temp-prefix", required_argument, 0, '\x02', "Prefix path for temporary output files (default: ${TMPDIR}/m4_)"},
+ {"update-m3vcf", no_argument, 0, '\x01', "Converts M3VCF to MVCF (default output: /dev/stdout)"},
+ {"compress-reference", no_argument, 0, '\x01', "Compresses VCF to MVCF (default output: /dev/stdout)"},
// vvvv deprecated vvvv //
{"allTypedSites", no_argument, 0, '\x01', nullptr},
{"rsid", no_argument, 0, '\x01', nullptr},
@@ -275,7 +284,12 @@ public:
case '\x02':
{
std::string long_opt_str = std::string(long_options_[long_index].name);
- if (long_opt_str == "min-r2")
+ if (long_opt_str == "decay")
+ {
+ decay_ = std::atof(optarg ? optarg : "");
+ break;
+ }
+ else if (long_opt_str == "min-r2")
{
min_r2_ = std::atof(optarg ? optarg : "");
break;
@@ -305,6 +319,11 @@ public:
prob_threshold_s1_ = std::min(1., std::atof(optarg ? optarg : ""));
break;
}
+ else if (long_opt_str == "temp-prefix")
+ {
+ temp_prefix_ = optarg ? optarg : "";
+ break;
+ }
else if (long_opt_str == "diff-threshold")
{
diff_threshold_ = std::max(0., std::atof(optarg ? optarg : ""));
@@ -446,6 +465,22 @@ public:
emp_out_path_ = prefix_ + ".empiricalDose." + suffix;
}
+ if (temp_prefix_.empty())
+ {
+ char* tmpdir = std::getenv("TMPDIR");
+ if (tmpdir && strlen(tmpdir))
+ {
+ std::string p(tmpdir);
+ if (p.back() != '/')
+ p += '/';
+ temp_prefix_ = p + "m4_";
+ }
+ else
+ {
+ temp_prefix_ = "/tmp/m4_";
+ }
+ }
+
if (!emp_out_path_.empty() && std::find(fmt_fields_.begin(), fmt_fields_.end(), "HDS") == fmt_fields_.end())
fmt_fields_.emplace_back("HDS");
=====================================
src/recombination.hpp
=====================================
@@ -26,6 +26,7 @@ public:
static bool parse_map_file(const std::string& map_file_path, std::vector<target_variant>& sites, float recom_min);
static double haldane(double cm) { return (1. - std::exp(-cm/50.))/2.;}
static double cm_to_switch_prob(double cm) { return 1. - std::exp(-cm/100.);}
+ static double cm_to_switch_prob(double cm, double decay_rate) { return 1. - std::exp(-decay_rate * cm/100.);}
static double haldane_inverse(double recom_prob) { return 50. * std::log(1. / (1. - 2. * recom_prob)); }
static double switch_prob_to_cm(double recom_prob) { return 100. * std::log(1. / (1. - recom_prob)); }
private:
=====================================
test/simple-test.sh
=====================================
@@ -18,7 +18,7 @@ bcftools index $ref_vcf
bcftools view $4 -Oz -o $tar_vcf
bcftools index $tar_vcf
$m4 --compress-reference $ref_vcf > $ref_msav
-$m4 $ref_msav $tar_vcf -f GT -O vcf.gz > $imputed_vcf
+$m4 $ref_msav $tar_vcf -f GT -O vcf.gz --temp-buffer 2 > $imputed_vcf
gzip -cd $imputed_vcf | grep -v "^#" | cut -f9- > $d/imputed_gt_matrix.tsv
gzip -cd $ref_vcf | grep -v "^#" | cut -f9- > $d/ref_gt_matrix.tsv
View it on GitLab: https://salsa.debian.org/med-team/minimac4/-/commit/6fe4c28160597587ede46846bbca4bef4009e0d8
--
View it on GitLab: https://salsa.debian.org/med-team/minimac4/-/commit/6fe4c28160597587ede46846bbca4bef4009e0d8
You're receiving this email because of your account on salsa.debian.org.
-------------- next part --------------
An HTML attachment was scrubbed...
URL: <http://alioth-lists.debian.net/pipermail/debian-med-commit/attachments/20230719/dce3db60/attachment-0001.htm>
More information about the debian-med-commit
mailing list