[med-svn] [Git][med-team/kissplice][master] 7 commits: Fix debian/watch to newest gitlab urls
François Gindraud (@fgindraud)
gitlab at salsa.debian.org
Thu May 23 17:05:15 BST 2024
François Gindraud pushed to branch master at Debian Med / kissplice
Commits:
9fe9e789 by Francois Gindraud at 2024-05-23T16:18:07+02:00
Fix debian/watch to newest gitlab urls
uscan was not able to detect new releases.
Updated debian/watch to the recommendations for gitlab from https://wiki.debian.org/debian/watch.
- - - - -
8cb2aef9 by Francois Gindraud at 2024-05-23T17:39:12+02:00
New upstream version 2.6.4
- - - - -
5fb7330a by Francois Gindraud at 2024-05-23T17:39:12+02:00
routine-update: New upstream version
- - - - -
93c75fae by Francois Gindraud at 2024-05-23T17:39:15+02:00
Update upstream source from tag 'upstream/2.6.4'
Update to upstream version '2.6.4'
with Debian dir 40c663019b9a2a1b9732004903f856f4c340dea5
- - - - -
419b01e0 by Francois Gindraud at 2024-05-23T17:47:36+02:00
Adapt build system changes for 2.6.4
Kissplice 2.6.4 removes the redirect script to bcalm (used in the system bcalm install mode),
in favor of a toggle in the main script.
Thus remove the debian overrides that replaced the script with a symlink.
Also force the user guide to be built.
- - - - -
740d5d65 by Francois Gindraud at 2024-05-23T17:51:42+02:00
Fix dh_auto_clean for __pycache__ directories
- - - - -
a094828d by Francois Gindraud at 2024-05-23T17:58:26+02:00
Forgot Changelog entries
- - - - -
18 changed files:
- CMakeLists.txt
- ChangeLog
- README.md
- debian/changelog
- debian/links
- debian/rules
- debian/watch
- doc/CMakeLists.txt
- doc/user_guide.in.tex
- kissplice.in.py
- + readme.cmake.md
- tests/CMakeLists.txt
- − thirdparty/bcalm_redirect.in
- thirdparty/kissreads/src/CMakeLists.txt
- thirdparty/kissreads/src/extension_algorithm.cpp
- thirdparty/kissreads/src/fragment_index.cpp
- thirdparty/kissreads/src/libchash.cpp
- thirdparty/kissreads/src/outputs.cpp
Changes:
=====================================
CMakeLists.txt
=====================================
@@ -1,9 +1,9 @@
-# 3.1 required by: zlib targets, Threads (OpenMP target fallback)
-cmake_minimum_required(VERSION 3.1)
+# 3.9 required by: zlib targets, OpenMP target
+cmake_minimum_required(VERSION 3.9)
project(
kissplice
- VERSION 2.6.2 # Definition which is propagated through PROJECT_VERSION
+ VERSION 2.6.3 # Definition which is propagated through PROJECT_VERSION
LANGUAGES CXX
)
@@ -20,8 +20,6 @@ endif()
# Cmake already adds -g in Debug and -O3 in Release, so no need to set them manually.
# Enables common warnings for ALL targets.
add_compile_options(-Wall)
-# Profiling with gprof requires -pg which is better to add manually with -DCMAKE_(C|CXX)_FLAGS=-pg
-# `perf` is a more modern replacement anyway and does not require special compilation.
# Date for manpage and pdf doc
string(TIMESTAMP CONFIGURE_DATE "%Y-%m-%d" UTC)
@@ -30,31 +28,24 @@ string(TIMESTAMP CONFIGURE_DATE "%Y-%m-%d" UTC)
# structure and paths
# kissplice is composed of:
# - internal binaries which should go to <prefix>/libexec/kissplice
-# - a main frontend "binary" which is a python script which should go to <prefix>/bin
-# The frontend script expects this architecture to hold (with relative paths) to find the internal binaries.
-# For tests to work, this architecture is replicated in the temporary build directory wih <prefix>=<build_dir>
+# - a entry point "binary" which is a python script which should go to <prefix>/bin
+# The entry point script locates internal binaries using relative paths from itself.
+# For tests to work, this architecture is replicated in the temporary build directory wih <prefix>=<build_dir>.
include(GNUInstallDirs) # GNU standard installation directories definitions (bin, libexec, and definitions for doc/man)
# Path to internal binaries relative to prefix. This is used in install() for internal binaries, to let the cmake prefix handling work.
set(RELATIVE_INTERNAL_BINDIR "${CMAKE_INSTALL_LIBEXECDIR}/kissplice")
# Absolute path to build location of internal binaries. This is used to define RUNTIME_OUTPUT_DIRECTORY of internal binaries (build dir).
set(BUILD_INTERNAL_BINDIR "${PROJECT_BINARY_DIR}/${RELATIVE_INTERNAL_BINDIR}")
-# Absolute path to build location of frontend binaries (kissplice script only for now)
+# Absolute path to build location of main entry point (kissplice script only for now)
set(BUILD_BINDIR "${PROJECT_BINARY_DIR}/${CMAKE_INSTALL_BINDIR}")
# Relative path from script to internal binaries directory, written to kissplice script
file(RELATIVE_PATH KISSPLICE_BINDIR_TO_INTERNAL_BINDIR "${BUILD_BINDIR}" "${BUILD_INTERNAL_BINDIR}")
-# Complete the main script template to include the relative directory
-configure_file("${PROJECT_SOURCE_DIR}/kissplice.in.py" "${BUILD_BINDIR}/kissplice" @ONLY)
-install(PROGRAMS "${BUILD_BINDIR}/kissplice" DESTINATION "${CMAKE_INSTALL_BINDIR}")
-
###############################################################################
-# bcalm dependency
-# bcalm is a binary program needed for tests and after install.
+# bcalm dependency : bcalm is a binary program needed for tests and after install.
# It is packaged on debian, so default to using the system wide version (best practice for packaging kissplice).
-# The USE_BUNDLED_BCALM option can be used to locally build and ship an install of bcalm, placed in the internal binaries directory.
-# In either case a "bcalm" executable will be placed in internal binaries (bundled binary or a redirect script).
-# This simplifies kissplice.py which can always look in libexec/kissplice/bcalm.
-# This "bcalm" is placed in both build dir and install dir.
+# The USE_BUNDLED_BCALM option can be used to locally build and ship an install of bcalm, placed in the internal binaries directory (build and install).
+# The BCALM_PACKAGING_MODE string variable propagates this choice to the kissplice script (with known values unlike USE_BUNDLED_BCALM).
option(
USE_BUNDLED_BCALM
"by default kissplice requires an already installed bcalm program ; enable this option to bundle a local bcalm with kissplice."
@@ -64,31 +55,34 @@ if(NOT USE_BUNDLED_BCALM)
# Check system bcalm
find_program(SYSTEM_BCALM_PATH NAMES bcalm)
mark_as_advanced(SYSTEM_BCALM_PATH)
- if(NOT SYSTEM_BCALM_PATH)
- message(
- FATAL_ERROR "bcalm not found. install it with system packages,\
- or provide path to a custom install with -DCMAKE_PROGRAM_PATH=<prefix>,\
- or use bundled bcalm version with -DUSE_BUNDLED_BCALM=TRUE"
- )
+ if(SYSTEM_BCALM_PATH)
+ message(STATUS "Found bcalm tool: ${SYSTEM_BCALM_PATH}")
+ else()
+ message(WARNING "bcalm not found in PATH. install it with system packages, or use bundled bcalm version with -DUSE_BUNDLED_BCALM=TRUE")
endif()
- message(STATUS "Found bcalm tool: ${SYSTEM_BCALM_PATH}")
- # Add a simple redirection to system bcalm in internal binaries, as kissplice expects bcalm there.
- configure_file("${CMAKE_CURRENT_SOURCE_DIR}/thirdparty/bcalm_redirect.in" "${BUILD_INTERNAL_BINDIR}/bcalm" @ONLY)
+ set(BCALM_PACKAGING_MODE "system")
else()
message(STATUS "using bundled version of bcalm (will be compiled at make time)")
include(ExternalProject)
ExternalProject_Add(
bundled_bcalm
PREFIX bundled_bcalm
- GIT_REPOSITORY "https://github.com/GATB/bcalm.git"
- GIT_TAG v2.2.3
+ # Using commit with Mac fix from a fork awaiting merge. FIXME replace by upstream (GATB user) when merged.
+ GIT_REPOSITORY "https://github.com/fgindraud/bcalm.git"
+ GIT_TAG cf371b697c537ca492da877f2531ebd494a4c79e
UPDATE_COMMAND "" # Prevent rebuilding bcalm everytime
- # Prevent install which is broken (vendored hdf5 ignores CMAKE_INSTALL_PREFIX).
- # Manually extract the bcalm binary to our internal binary build dir. This is run inside bcalm build dir.
+ # Manually extract the bcalm binary to our internal binary build dir, instead of broken install target.
+ # This is run inside bcalm build dir.
INSTALL_COMMAND "${CMAKE_COMMAND}" -E copy bcalm "${BUILD_INTERNAL_BINDIR}"
)
+ install(PROGRAMS "${BUILD_INTERNAL_BINDIR}/bcalm" DESTINATION "${RELATIVE_INTERNAL_BINDIR}")
+ set(BCALM_PACKAGING_MODE "bundled")
endif()
-install(PROGRAMS "${BUILD_INTERNAL_BINDIR}/bcalm" DESTINATION "${RELATIVE_INTERNAL_BINDIR}")
+
+###############################################################################
+# Complete the main script template to include the relative directory and bcalm location.
+configure_file("${PROJECT_SOURCE_DIR}/kissplice.in.py" "${BUILD_BINDIR}/kissplice" @ONLY)
+install(PROGRAMS "${BUILD_BINDIR}/kissplice" DESTINATION "${CMAKE_INSTALL_BINDIR}")
###############################################################################
# subdirs
=====================================
ChangeLog
=====================================
@@ -1,6 +1,24 @@
-UNRELEASED
+2024-05-23 Francois Gindraud <francois.gindraud at inria.fr>
+ * 2.6.3 -> 2.6.4
+ Bug fix for building kissplice and bcalm on ARM MacOS.
+ Improvements to switch bteween bundled and system mode for bcalm.
+
+2024-04-04 Francois Gindraud <francois.gindraud at inria.fr>
+ * 2.6.2 -> 2.6.3
+ Improvements to install process and documentation.
+ Introducing a debian/ubuntu binary build for convenience, useful for students with no root access.
+
+2022-04-29 Audric Cologne <audric.cologne at univ-lyon1.fr>
+ * 2.6.1 -> 2.6.2
Improvements:
- manpage and user_guide.pdf build process should now be reproducible
+ Method:
+ - Most of duplicated bubbles are now compressed in KisSplice (note the case for too large IR), improving speed and output clarity
+ - Remove paths with too low Shannon entropy, improving speed
+ - These two new filters can be disable with --keep-redundancy and --keep-low-complexity
+ - Information on removed or compressed bubbles can be output with --get-redundance-info and --get-low-complexity-info
+ Option:
+ - Added --type1-only parameter to only quantify and output Type_1 bubbles (Alternative Splicing Events). Major speed-up for b > 10
2022-04-05 Francois Gindraud <francois.gindraud at inria.fr>
* 2.6.0 -> 2.6.1
=====================================
README.md
=====================================
@@ -2,53 +2,59 @@
Main page: http://kissplice.prabi.fr/
-[User guide](https://gitlab.inria.fr/erable/kissplice/-/jobs/artifacts/master/file/prefix/share/doc/kissplice/user_guide.pdf?job=check_install_and_user_guide) from latest build.
+[User guide](https://gitlab.inria.fr/erable/kissplice/-/jobs/artifacts/master/file/prefix/share/doc/kissplice/user_guide.pdf?job=with_bundled_bcalm_and_guide) from latest build.
-# Pre-packaged versions
+# How to install
+Install options by decreasing order of convenience
+
+## Debian/Ubuntu packages
+Kissplice is available as a system package in Debian/Ubuntu
+```bash
+apt install kissplice
+kissplice --help
+```
+
+## Conda
+Kissplice is available in conda in the [bioconda channel](https://bioconda.github.io/).
+Bioconda creates packages for Linux (x86_64 and arm64) and MacOS (x86_64 = intel macs before M1).
## Docker
You can find the latest version of KisSplice, KisSplice2RefGenome and KissDE [on Docker Hub](https://hub.docker.com/repository/docker/dwishsan/kissplice-pipeline).
We also propose a [stand-alone Docker image for the KissDE Shiny App](https://hub.docker.com/repository/docker/dwishsan/kde-shiny) for KisSplice results exploration.
-## Linux
-A package for kissplice is available on Debian / Ubuntu, but it may lag the current version.
-
-# Build from source
+## Binary package (Debian/Ubuntu)
+A precompiled standalone binary package for ubuntu is available in the [release page](https://gitlab.inria.fr/erable/kissplice/-/releases).
+```bash
+tar xf kissplice-binary-ubuntu-<version>.tar.gz
+kissplice-binary-ubuntu-<version>/bin/kissplice --help
+```
-## Dependencies
-Required:
-- cmake >= 3.1
+## Build from source
+Required dependencies:
+- cmake >= 3.9
- C/C++11 compiler toolchain (binutils, gcc or clang)
- python3 to run kissplice
-Recommended but optional:
-- [bcalm](https://github.com/GATB/bcalm) >= v2.2.2 ; as a fallback a locally compiled version of bcalm can be used by passing `-DUSE_BUNDLED_BCALM=TRUE` to cmake. A debian/ubuntu package is available.
-- latex toolchain with standard packages to build the user_guide ; this can be disabled by passing `-DUSER_GUIDE=OFF` to cmake
+Optional dependencies:
+- [bcalm](https://github.com/GATB/bcalm) >= v2.2.2. A locally compiled version of bcalm can instead be used by passing `-DUSE_BUNDLED_BCALM=ON` to cmake for convenience, but requires git.
+- latex toolchain : only if you request to build the user guide by passing `-DUSER_GUIDE=ON` to cmake
+
+The following commands assume you are a user that is not familiar with cmake, and wants a local install from source with only the required dependencies (useful for targets like Arm MacOS where no package exists yet).
+If you are a developper or a maintainer of a package, see the [detailed cmake documentation](readme.cmake.md).
-## Build
-Download from the latest [release](https://gitlab.inria.fr/erable/kissplice/-/releases) and uncompress it.
+Download a *source code* archive from the latest [release](https://gitlab.inria.fr/erable/kissplice/-/releases) and uncompress it.
You can also clone the git repository, but the latest code may be broken.
-Then compile using :
-```
-cd kissplice/ # Replace by the directory where the release tar.gz was uncompressed, or where the git was cloned
-mkdir build
-cd build
-cmake -DCMAKE_BUILD_TYPE=Release ..
-make -j 4
-```
+```bash
+cd kissplice/ # directory where the release tar.gz was uncompressed, or where the git was cloned
-After build the user guide (if not disabled) can be found in `build/doc/user_guide.pdf`.
+# Replace install_directory by the path where you want your install to be.
+# If you have latex installed you can add -DUSER_GUIDE=ON to generate the user guide at install_directory/doc/user_guide.pdf
+cmake -S . -B build/ -DCMAKE_BUILD_TYPE=Release -DUSE_BUNDLED_BCALM=ON -DCMAKE_INSTALL_PREFIX=install_directory
-The tool can be installed to a custom directory (like `~/.local/`) with:
-```
-cmake -DCMAKE_INSTALL_PREFIX=<directory> ..
-make
-make install
-```
+cmake --build build/ # add -jN to use up to N cpu cores in parallel
+cmake --install build/
-# Running on the sample test
-```
-cd kissplice/build
-./bin/kissplice -r ../sample_example/mock1.fq -r ../sample_example/mock2.fq -r ../sample_example/virus1.fq -r ../sample_example/virus2.fq
+# Example of use
+install_directory/bin/kissplice -r sample_example/mock1.fq -r sample_example/mock2.fq -r sample_example/virus1.fq -r sample_example/virus2.fq
```
=====================================
debian/changelog
=====================================
@@ -1,3 +1,11 @@
+kissplice (2.6.4-1) UNRELEASED; urgency=medium
+
+ * New upstream version
+ * Propagate upstream build system changes for bcalm and guide
+ * Fix dh_auto_clean
+
+ -- François Gindraud <francois.gindraud at inria.fr> Thu, 23 May 2024 17:39:12 +0200
+
kissplice (2.6.2-3) unstable; urgency=medium
* Add build support for loongarch64
=====================================
debian/links
=====================================
@@ -1,2 +1 @@
usr/share/kissplice/kissplice.py usr/bin/kissplice
-usr/bin/bcalm usr/lib/kissplice/bcalm
=====================================
debian/rules
=====================================
@@ -19,17 +19,16 @@ include /usr/share/dpkg/default.mk
dh $@
override_dh_auto_configure:
- dh_auto_configure -- -DCMAKE_INSTALL_LIBEXECDIR=lib/
+ dh_auto_configure -- -DCMAKE_INSTALL_LIBEXECDIR=lib/ -DUSER_GUIDE=ON
override_dh_compress:
dh_compress --exclude=.pdf
override_dh_install:
rm -rf debian/$(DEB_SOURCE)/usr/README.md debian/$(DEB_SOURCE)/build debian/$(DEB_SOURCE)/builds
- rm -f debian/$(DEB_SOURCE)/usr/lib/kissplice/bcalm
mkdir -p debian/$(DEB_SOURCE)/usr/share/$(DEB_SOURCE)
mv debian/$(DEB_SOURCE)/usr/bin/kissplice debian/$(DEB_SOURCE)/usr/share/$(DEB_SOURCE)/kissplice.py
override_dh_auto_clean:
dh_auto_clean
- find . -name __pycache__/ -type d | xargs rm -rf
+ find . -name __pycache__ -type d | xargs rm -rf
=====================================
debian/watch
=====================================
@@ -1,4 +1,4 @@
version=4
-opts="repacksuffix=+dfsg,dversionmangle=auto" \
- https://gitlab.inria.fr/erable/kissplice/-/tags .*/@PACKAGE at -@ANY_VERSION@\.tar\.gz
+opts="searchmode=plain" \
+ https://gitlab.inria.fr/erable/@PACKAGE@/tags?sort=updated_desc -/archive/v?\d[\d.]+/@PACKAGE at -@ANY_VERSION@@ARCHIVE_EXT@
=====================================
doc/CMakeLists.txt
=====================================
@@ -1,7 +1,7 @@
option(
USER_GUIDE
"compile and install a pdf user_guide ; requires pdflatex and standard latex packages"
- ON
+ OFF
)
if (USER_GUIDE)
find_package(LATEX)
=====================================
doc/user_guide.in.tex
=====================================
@@ -15,7 +15,7 @@
%\usepackage{dsfont}
\usepackage{amsthm}
\usepackage{enumerate}
-\usepackage{url}
+\usepackage{hyperref}
\usepackage{fancyvrb}
\usepackage{listings}
\lstset{
@@ -64,81 +64,79 @@ Lopez-Maestre et al., SNP calling from RNA-seq data without a reference genome:
If you use \soft, kissplice2refgenome and kissDE, please cite:\\
Benoit-Pilven et al. Complementarity of assembly-first and mapping-first approaches for alternative splicing annotation and differential analysis from RNAseq data. Sci Rep 8, 4307 (2018). https://doi.org/10.1038/s41598-018-21770-7
-\subsection{Forum}
-If you have questions, please use primarily the biostars forum: \url{www.biostars.org/t/kissplice/}
-
-\subsection{Mailing list}
-To be informed about new releases, bugs or updates, please subscribe to the mailing list
-\texttt{kissplice-users at lists.gforge.inria.fr}.\\
-To do so, please subscribe at :
-\url{http://lists.gforge.inria.fr/cgi-bin/mailman/listinfo/kissplice-users}.
-Do not hesitate to write to the list for any comment or questions on \soft.
-
-\subsection{Requirements}
-\esoft uses the \texttt{CMake} building tool (at least 3.1 CMake version is required). If it is not directly available on your system, you can download it from \url{http://www.cmake.org/cmake/resources/software.html}. \esoft relies on \texttt{zlib} that is included in most systems, however it is also available from \url{http://zlib.net}.
+\subsection{Online ressources}
+The main repository is at \url{https://gitlab.inria.fr/erable/kissplice}.
+It contains releases, an issue-tracker for questions or bug reports, and allows people to contribute.
+Biostars forum: \url{www.biostars.org/t/kissplice/} (seems unused).
\subsection{Installation}
-\esoft is written in C/C\texttt{++} and is running on Mac OS X and Linux 64 bits platforms..\\
-In order to install \soft, you need to:
-\begin{enumerate}
-\item Uncompress the archive file:
-\begin{small}
-\begin{Verbatim}[frame=single]
-tar zxvf kissplice-x.y.z.tar.gz
-\end{Verbatim}
-\end{small}
-\item Go into the corresponding directory:
-\begin{small}\begin{Verbatim}[frame=single]
-cd kissplice-x.y.z
-\end{Verbatim}
-\end{small}
-\item Launch cmake:
-\begin{small}
-\begin{Verbatim}[frame=single]
-cmake .
+\esoft is written in C/C\texttt{++}/Python and is running on Mac OS X and Linux 64 bits platforms.
+It can be installed using the following methods, by decreasing order of convenience.
+
+\subsubsection{Debian/Ubuntu package}
+\esoft is available as a system package in Debian/Ubuntu.
+\begin{Verbatim}
+apt install kissplice
+kissplice --help
\end{Verbatim}
-\end{small}
-If \texttt{zlib} library is present in your system in an uncommon path, launch cmake with the following options:
-\begin{small}
-\begin{Verbatim}[frame=single]
-cmake . -DCMAKE_LIBRARY_PATH=/path/to/zlib -DCMAKE_INCLUDE_PATH=/path/to/zlib
-\end{Verbatim}
-\end{small}
-\item Compile the package:
-\begin{small}
-\begin{Verbatim}[frame=single]
-make
-\end{Verbatim}
-\end{small}
-\item Run KisSplice:
-\begin{small}
-\begin{Verbatim}[frame=single]
-./bin kissplice
+
+\subsubsection{Conda}
+Kissplice is available in conda in the \href{https://bioconda.github.io/}{bioconda channel}.
+Bioconda creates packages for Linux (x86\_64 and arm64) and MacOS (x86\_64 = intel macs before M1).
+
+\subsubsection{Docker}
+You can find the latest version of KisSplice, KisSplice2RefGenome and KissDE on \href{https://hub.docker.com/repository/docker/dwishsan/kissplice-pipeline}{Docker Hub}.
+
+We also propose a stand-alone \href{https://hub.docker.com/repository/docker/dwishsan/kde-shiny}{Docker image} for the KissDE Shiny App for KisSplice results exploration.
+
+\subsubsection{Binary release for Debian/Ubuntu}
+A precompiled standalone binary package for ubuntu is available in the \href{https://gitlab.inria.fr/erable/kissplice/-/releases}{release page}.
+\begin{Verbatim}
+# Decompress downloaded archive
+tar xf kissplice-binary-ubuntu-<version>.tar.gz
+# Access the kissplice tool
+kissplice-binary-ubuntu-<version>/bin/kissplice --help
\end{Verbatim}
-\end{small}
-%\item Run the functional and integration tests:
-%\begin{small}
-%\begin{Verbatim}[frame=single]
-%make test
-%\end{Verbatim}
-%\end{small}
-%\item Install the package on your system:
-%\begin{small}
-%\begin{Verbatim}[frame=single]
-%sudo make install
-%\end{Verbatim}
-%\end{small}
-\item If you want to locally install the package in a particular directory \texttt{/path\_to\_install/} (like \texttt{$\mathtt{\sim}$/.local/}), then do :
-\begin{small}\begin{Verbatim}[frame=single]
-cmake . -DCMAKE_INSTALL_PREFIX=/path_to_install/
-make
-make install
+
+\subsubsection{Compile from source}
+Required dependencies :
+\begin{itemize}
+\item cmake >= 3.9
+\item C/C++11 compiler toolchain (binutils, gcc or clang)
+\item python3 to run kissplice
+\item git
+\end{itemize}
+
+Download a source code archive from the latest \href{https://gitlab.inria.fr/erable/kissplice/-/releases}{release} and uncompress it.
+\begin{Verbatim}
+# directory where the release tar.gz was uncompressed
+cd kissplice/
+
+# Replace install_directory by the path where you want your install to be.
+# If you have latex installed you can add -DUSER_GUIDE=ON to generate
+# the user guide at install_directory/doc/user_guide.pdf
+cmake -S . -B build/ -DCMAKE_BUILD_TYPE=Release -DUSE_BUNDLED_BCALM=ON
+ -DCMAKE_INSTALL_PREFIX=install_directory
+
+cmake --build build/ # add -jN to use up to N cpu cores in parallel
+cmake --install build/
+
+# Example of use
+install_directory/bin/kissplice -r sample_example/mock1.fq -r sample_example/mock2.fq
+ -r sample_example/virus1.fq -r sample_example/virus2.fq
\end{Verbatim}
-\end{small}
-\end{enumerate}
-%If you do not install the software, the program is available in the repertory \textbf{bin}.
+If you want to build from source and install to a local directory (like \texttt{~/.local/}):
+\begin{Verbatim}
+# directory where the release tar.gz was uncompressed
+# or where the git was cloned
+cd kissplice/
+cmake -S . -B build/ -DCMAKE_BUILD_TYPE=Release
+ -DCMAKE_INSTALL_PREFIX=<install directory>
+cmake --build build/
+cmake --install build/
+\end{Verbatim}
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\section{Usage}
=====================================
kissplice.in.py
=====================================
@@ -1,58 +1,58 @@
#!/usr/bin/env python3
- # ***************************************************************************
- #
- # KisSplice
- # de-novo calling alternative splicing events from RNA-seq data.
- #
- # ***************************************************************************
- #
- # Copyright INRIA
- # contributors : Vincent Lacroix
- # Pierre Peterlongo
- # Gustavo Sacomoto
- # Alice Julien-Laferriere
- # David Parsons
- # Vincent Miele
- # Leandro Lima
- # Audric Cologne
- #
- # pierre.peterlongo at inria.fr
- # vincent.lacroix at univ-lyon1.fr
- #
- # This software is a computer program whose purpose is to detect alternative
- # splicing events from RNA-seq data.
- #
- # This software is governed by the CeCILL license under French law and
- # abiding by the rules of distribution of free software. You can use,
- # modify and/ or redistribute the software under the terms of the CeCILL
- # license as circulated by CEA, CNRS and INRIA at the following URL
- # "http://www.cecill.info".
-
- # As a counterpart to the access to the source code and rights to copy,
- # modify and redistribute granted by the license, users are provided only
- # with a limited warranty and the software's author, the holder of the
- # economic rights, and the successive licensors have only limited
- # liability.
-
- # In this respect, the user's attention is drawn to the risks associated
- # with loading, using, modifying and/or developing or reproducing the
- # software by the user in light of its specific status of free software,
- # that may mean that it is complicated to manipulate, and that also
- # therefore means that it is reserved for developers and experienced
- # professionals having in-depth computer knowledge. Users are therefore
- # encouraged to load and test the software's suitability as regards their
- # requirements in conditions enabling the security of their systems and/or
- # data to be ensured and, more generally, to use and operate it in the
- # same conditions as regards security.
- #
- # The fact that you are presently reading this means that you have had
- # knowledge of the CeCILL license and that you accept its terms.
+# ***************************************************************************
+#
+# KisSplice
+# de-novo calling alternative splicing events from RNA-seq data.
+#
+# ***************************************************************************
+#
+# Copyright INRIA
+# contributors : Vincent Lacroix
+# Pierre Peterlongo
+# Gustavo Sacomoto
+# Alice Julien-Laferriere
+# David Parsons
+# Vincent Miele
+# Leandro Lima
+# Audric Cologne
+#
+# pierre.peterlongo at inria.fr
+# vincent.lacroix at univ-lyon1.fr
+#
+# This software is a computer program whose purpose is to detect alternative
+# splicing events from RNA-seq data.
+#
+# This software is governed by the CeCILL license under French law and
+# abiding by the rules of distribution of free software. You can use,
+# modify and/ or redistribute the software under the terms of the CeCILL
+# license as circulated by CEA, CNRS and INRIA at the following URL
+# "http://www.cecill.info".
+
+# As a counterpart to the access to the source code and rights to copy,
+# modify and redistribute granted by the license, users are provided only
+# with a limited warranty and the software's author, the holder of the
+# economic rights, and the successive licensors have only limited
+# liability.
+
+# In this respect, the user's attention is drawn to the risks associated
+# with loading, using, modifying and/or developing or reproducing the
+# software by the user in light of its specific status of free software,
+# that may mean that it is complicated to manipulate, and that also
+# therefore means that it is reserved for developers and experienced
+# professionals having in-depth computer knowledge. Users are therefore
+# encouraged to load and test the software's suitability as regards their
+# requirements in conditions enabling the security of their systems and/or
+# data to be ensured and, more generally, to use and operate it in the
+# same conditions as regards security.
+#
+# The fact that you are presently reading this means that you have had
+# knowledge of the CeCILL license and that you accept its terms.
import os
import re
import sys
import time
import shlex
-import struct
+import logging
import shutil
import os.path
import tempfile
@@ -60,7 +60,6 @@ import argparse
import threading
import multiprocessing
from random import randint
-from operator import itemgetter
from subprocess import Popen, PIPE, STDOUT
@@ -92,6 +91,14 @@ WLEN=41 # Window length on sequence to compute entropy
WSLIDE=41 # Right slide of the window on the sequence
###############################################################
+###############################################################
+# SCRIPT_BINDIR : absolute path to the main executable (this file), computed at runtime
+# @KISSPLICE_BINDIR_TO_INTERNAL_BINDIR@ : relative path from main script to internal binaries, set by cmake
+# INTERNAL_BINDIR : absolute path to the secondary executables (eg ks_kissreads)
+SCRIPT_BINDIR = os.path.dirname(os.path.abspath(__file__))
+INTERNAL_BINDIR = os.path.realpath(os.path.join(SCRIPT_BINDIR, "@KISSPLICE_BINDIR_TO_INTERNAL_BINDIR@"))
+# Where to find bcalm ; BCALM_PACKAGING_MODE is set by cmake to either 'bundled' or 'system'.
+BCALM_PATH = os.path.join(INTERNAL_BINDIR, "bcalm") if "@BCALM_PACKAGING_MODE@" == "bundled" else "bcalm"
############### NEW FUNCTIONS -> ENTROPY ###############
# Natural log aproximation using ln(x)=lim(x->inf) n(x**(1/n)-1)
@@ -599,7 +606,7 @@ def redundancyAndLowComplexityRemoval(workdir, mainFileName, keep_rd=False, keep
if not keep_lc or not keep_rd:
toMove.append(t1fileName)
toRm.append(t234fileName)
- splitT1T234("/".join([workdir,mainFileName]), "/".join([workdir,t1fileName]), "/".join([workdir,t234fileName]))
+ splitT1T234(os.path.join(workdir,mainFileName), os.path.join(workdir,t1fileName), os.path.join(workdir,t234fileName))
if not keep_rd or not keep_lc:
# 2) Define some dictionnaries...
@@ -611,12 +618,12 @@ def redundancyAndLowComplexityRemoval(workdir, mainFileName, keep_rd=False, keep
dBCC2lc={} # d[bcc]=[ [removed cycle due to low complexity, entropy value], ...]
# ... and an output file
t1fileNameComp="all_bcc_type1_compressed.fa"
- fComp=open("/".join([workdir,t1fileNameComp]), "w")
+ fComp=open(os.path.join(workdir,t1fileNameComp), "w")
if not keep_rd:
toMove.append(t1fileNameComp)
# 3) Open and read first line of Type_1 file
- f=open("/".join([workdir,t1fileName]),"r")
+ f=open(os.path.join(workdir,t1fileName),"r")
lFasta="KO"
while lFasta and lFasta=="KO":
lFasta=readFasta4(f, k=kval, rmEntropy=not keep_lc, entropy_threshold=lc_ent, dBCC2lc=dBCC2lc) # [ [bcc, cycle, type, length_up, length_low], [seq_up, seq_low, seq_var] ]
@@ -654,25 +661,25 @@ def redundancyAndLowComplexityRemoval(workdir, mainFileName, keep_rd=False, keep
fNameSummary="get_redundancy_info_summary.tsv"
fNameRd="get_redundancy_info_compressed_bubbles.tsv"
toMove.extend([fNameSummary,fNameRd])
- makeSummaryRd("/".join([workdir,fNameSummary]), "/".join([workdir,fNameRd]), dBcc2cycleComp, dBccLen2nCompress)
+ makeSummaryRd(os.path.join(workdir,fNameSummary), os.path.join(workdir,fNameRd), dBcc2cycleComp, dBccLen2nCompress)
if get_lc_info:
fNameLc="get_low-complexity_info.tsv"
toMove.append(fNameLc)
- makeSummaryLc("/".join([workdir,fNameLc]), dBCC2lc)
+ makeSummaryLc(os.path.join(workdir,fNameLc), dBCC2lc)
# 6) Write a new mainFile, combining filtered type 1 and type 234 or type 1 only
if t1o:
if not keep_rd or not keep_lc:
- os.system("cat "+"/".join([workdir,t1fileNameComp])+" > "+"/".join([workdir,mainFileName]))
+ shutil.copy(os.path.join(workdir,t1fileNameComp), os.path.join(workdir,mainFileName))
else:
- os.system("cat "+"/".join([workdir,t1fileName])+" > "+"/".join([workdir,mainFileName]))
+ shutil.copy(os.path.join(workdir,t1fileName), os.path.join(workdir,mainFileName))
else:
- os.system("cat "+"/".join([workdir,t1fileNameComp])+" "+"/".join([workdir,t234fileName])+" > "+"/".join([workdir,mainFileName]))
+ os.system("cat "+os.path.join(workdir,t1fileNameComp)+" "+os.path.join(workdir,t234fileName)+" > "+os.path.join(workdir,mainFileName))
# 7) Remove some files
for fRm in toRm:
- os.system("rm "+"/".join([workdir,fRm]))
+ os.remove(os.path.join(workdir,fRm))
print(getTimestamp() + "--> Done!")
printlg(getTimestamp() + "--> Done!")
@@ -736,9 +743,8 @@ def printlg (*args):
global logFile
print(''.join(str(arg) for arg in args), file=logFile)
-# get the timestamp as string
-def getTimestamp():
- return "["+time.strftime("%H:%M:%S")+" "+time.strftime("%d/%m/%Y")+"] "
+def getTimestamp() -> str:
+ return f"[{time.strftime('%H:%M:%S %d/%m/%Y')}] "
class Command(object): # deprecated in the future with Python3
@@ -804,10 +810,6 @@ def mkdirTmp(tmpdir=None):
workdir = tempfile.mkdtemp(prefix="kissplice.", dir=tmpdir)
return workdir
-def cleanTmp(workdir):
- shutil.rmtree(workdir)
-
-
def subprocessLauncher(command_line, out_file = "", mode = 'w', verbose = False, timeout = MAXTIMEOUT):
command = Command()
command.run(command_line, out_file, mode, verbose, timeout)
@@ -874,7 +876,7 @@ def BCALMUnitigs2DotAbundance(inputFileName, outputFileName):
# Run debruijn graph construction
-def build_graph(internal_bindir, workdir, readfiles, kval, graphfile, min_cov, nbCores, verbose = False):
+def build_graph(workdir, readfiles, kval, graphfile, min_cov, nbCores, verbose = False):
print(getTimestamp() + "--> Building de Bruijn graph...")
printlg(getTimestamp() + "--> Building de Bruijn graph...")
print("Graph will be written in "+graphfile+".[edges/nodes]")
@@ -892,9 +894,7 @@ def build_graph(internal_bindir, workdir, readfiles, kval, graphfile, min_cov, n
previousWD = os.getcwd()
os.chdir(workdir+"/bcalm")
- #execute BCALM
- command_line = "%s/bcalm -in %s -kmer-size %d -abundance-min %d -nb-cores %d -out bcalm_out"%(internal_bindir, all_read_files, kval, min_cov, nbCores)
- subprocessLauncher(command_line, verbose=verbose)
+ subprocessLauncher(f"{BCALM_PATH} -in {all_read_files} -kmer-size {kval} -abundance-min {min_cov} -nb-cores {nbCores} -out bcalm_out", verbose=verbose)
os.chdir(previousWD)
@@ -911,7 +911,7 @@ def build_graph(internal_bindir, workdir, readfiles, kval, graphfile, min_cov, n
printlg(getTimestamp() + "--> Done!")
#Run error_removal for the graph (overwrite edge file)
-def error_removal(internal_bindir, graphfile, nobuild, cutoff, verbose = False):
+def error_removal(graphfile, nobuild, cutoff, verbose = False):
print("\n" + getTimestamp() + "--> Removing sequencing errors...")
printlg("\n" + getTimestamp() + "--> Removing sequencing errors...")
@@ -928,7 +928,7 @@ def error_removal(internal_bindir, graphfile, nobuild, cutoff, verbose = False):
#Run the error-removal
- command_line = internal_bindir+"/ks_error_removal "+graphfile+".edges "+graphfile+".abundance "+str(cutoff)+" "+graphfile+"_C"+str(cutoff)
+ command_line = INTERNAL_BINDIR+"/ks_error_removal "+graphfile+".edges "+graphfile+".abundance "+str(cutoff)+" "+graphfile+"_C"+str(cutoff)
subprocessLauncher(command_line, verbose=verbose)
print(getTimestamp() + "--> Done!")
printlg(getTimestamp() + "--> Done!")
@@ -936,7 +936,7 @@ def error_removal(internal_bindir, graphfile, nobuild, cutoff, verbose = False):
#Run the modules on the graph
-def run_modules(internal_bindir, workdir, graphfile, kval, cutoff, verbose = False, output_context = False, exec_error_removal = False):
+def run_modules(workdir, graphfile, kval, cutoff, verbose = False, output_context = False, exec_error_removal = False):
if not os.path.exists(workdir+"/bcc"):
os.mkdir(workdir+"/bcc")
print("\n" + getTimestamp() + "--> Finding the BCCs...")
@@ -946,7 +946,7 @@ def run_modules(internal_bindir, workdir, graphfile, kval, cutoff, verbose = Fal
if exec_error_removal:
edge_suffix = "_C"+str(cutoff)+".edges"
- command_line = internal_bindir+"/ks_run_modules "+graphfile+edge_suffix+" "+graphfile+".nodes "+str(kval)+" "+workdir+"/bcc/graph"
+ command_line = INTERNAL_BINDIR+"/ks_run_modules "+graphfile+edge_suffix+" "+graphfile+".nodes "+str(kval)+" "+workdir+"/bcc/graph"
if output_context:
command_line += " --output-context"
@@ -973,7 +973,7 @@ def find_bcc_ids_ordered_by_size(workdir, min_length = 4):
f.close()
return (bccnum2size, bccnumorderedbysize)
-def enumerate_all_bubbles(internal_bindir, workdir, outdir, kval, bval, output_snps, min_edit_dist, max_cycles, UL_MAX, LL_MAX, LL_MIN, timeout, nbprocs=1, verbose = False, output_context = False, output_path = False, output_branch_count = False, experimental = False, max_memory = 0):
+def enumerate_all_bubbles(workdir, outdir, kval, bval, output_snps, min_edit_dist, max_cycles, UL_MAX, LL_MAX, LL_MIN, timeout, nbprocs=1, verbose = False, output_context = False, output_path = False, output_branch_count = False, experimental = False, max_memory = 0):
print("\n" + getTimestamp() + "--> Enumerating all bubbles...")
printlg("\n" + getTimestamp() + "--> Enumerating all bubbles...")
@@ -988,7 +988,7 @@ def enumerate_all_bubbles(internal_bindir, workdir, outdir, kval, bval, output_s
file2size = {}
# filling num_snps
- count_alreadyfoundSNPs(workdir);
+ count_alreadyfoundSNPs(workdir)
# ordering bcc by decreasing size and filtering if <4 nodes
bccnum2size, bccnumorderedbysize = find_bcc_ids_ordered_by_size(workdir, 4)
@@ -1000,7 +1000,7 @@ def enumerate_all_bubbles(internal_bindir, workdir, outdir, kval, bval, output_s
pool = multiprocessing.Pool(nbprocs)
TASKS = []
for i in bccnumorderedbysize:
- TASKS += [(enumerate_bubbles_core, i, internal_bindir, workdir, outdir, kval, bval, output_snps, min_edit_dist, max_cycles, UL_MAX, LL_MAX, LL_MIN, timeout, verbose, output_context, output_path, output_branch_count, experimental, max_memory)]
+ TASKS += [(enumerate_bubbles_core, i, workdir, outdir, kval, bval, output_snps, min_edit_dist, max_cycles, UL_MAX, LL_MAX, LL_MIN, timeout, verbose, output_context, output_path, output_branch_count, experimental, max_memory)]
imap_unordered_it = pool.imap_unordered(eval_func_tuple, TASKS, 1)
@@ -1045,7 +1045,7 @@ def enumerate_all_bubbles(internal_bindir, workdir, outdir, kval, bval, output_s
-def enumerate_bubbles_core(i, internal_bindir, workdir, outdir, kval, bval, output_snps, min_edit_dist, max_cycles, UL_MAX, LL_MAX, LL_MIN, timeout, verbose = False, output_context = False, output_path = False, output_branch_count = False, experimental = False, max_memory = 0):
+def enumerate_bubbles_core(i, workdir, outdir, kval, bval, output_snps, min_edit_dist, max_cycles, UL_MAX, LL_MAX, LL_MIN, timeout, verbose = False, output_context = False, output_path = False, output_branch_count = False, experimental = False, max_memory = 0):
if verbose:
print("@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@")
print("Enumerating bubbles in biconnected component "+str(i))
@@ -1062,7 +1062,7 @@ def enumerate_bubbles_core(i, internal_bindir, workdir, outdir, kval, bval, outp
num_snps_bcc = 0
if str(i) in num_snps:
num_snps_bcc = num_snps[str(i)]
- command_line = internal_bindir+"/ks_bubble_enumeration "+ infofile+" "+ contents_file_edges+" "+ contents_file_nodes+" "+ basename_edges+" "+ basename_nodes\
+ command_line = INTERNAL_BINDIR+"/ks_bubble_enumeration "+ infofile+" "+ contents_file_edges+" "+ contents_file_nodes+" "+ basename_edges+" "+ basename_nodes\
+" "+str(i) \
+" "+str(kval)+" "+workdir+"/bcc/tmp_bcc_sequences_"+str(kval)+"_"+multiprocessing.current_process().name+" "+str(min_edit_dist) \
+" bcc_"+str(i) + " " + str(num_snps_bcc) + " -u "+str(UL_MAX) \
@@ -1086,8 +1086,8 @@ def enumerate_bubbles_core(i, internal_bindir, workdir, outdir, kval, bval, outp
flag = i
# Always append the results if the enumeration, even when it's incomplete.
- command_line_type0 = internal_bindir+"/ks_clean_duplicates " + workdir + "/bcc/tmp_bcc_sequences_" + str(kval) +"_"+multiprocessing.current_process().name+ "_type0.fa"
- command_line_type1234 = internal_bindir+"/ks_clean_duplicates " + workdir + "/bcc/tmp_bcc_sequences_" + str(kval) +"_"+multiprocessing.current_process().name+ "_type1234.fa"
+ command_line_type0 = INTERNAL_BINDIR+"/ks_clean_duplicates " + workdir + "/bcc/tmp_bcc_sequences_" + str(kval) +"_"+multiprocessing.current_process().name+ "_type0.fa"
+ command_line_type1234 = INTERNAL_BINDIR+"/ks_clean_duplicates " + workdir + "/bcc/tmp_bcc_sequences_" + str(kval) +"_"+multiprocessing.current_process().name+ "_type1234.fa"
subprocessLauncher(command_line_type0, workdir+"/tmp_all_bcc_type0_"+str(kval)+"_"+multiprocessing.current_process().name, 'a', verbose=verbose) # append ALL BCC IN THE SAME FILE
subprocessLauncher(command_line_type1234, workdir+"/tmp_all_bcc_type1234_"+str(kval)+"_"+multiprocessing.current_process().name, 'a', verbose=verbose) # append ALL BCC IN THE SAME FILE
@@ -1128,14 +1128,14 @@ def concatenate_graph_all_log_bcc_to_all_bcc_type0(workdir, kval, output_snps):
destinationSNPS.close()
-def check_read_coverage_and_sort_all_bubbles(internal_bindir, readfiles, workdir, outdir, kval, output_snps, infix_name,
+def check_read_coverage_and_sort_all_bubbles(readfiles, workdir, outdir, kval, output_snps, infix_name,
countsMethods, minOverlap, substitutions, substitutionsSNP, getMappingInfo, stranded, strandedAbsoluteThreshold, strandedRelativeThreshold, nbprocs, verbose = False):
# Two KisSreads executions, one for type 0 one for type 1234
# Du to the k extension, anchor should be of size k+1 for SNP
commandLineType0=""
if output_snps > 0:
- commandLineType0 = internal_bindir+"/ks_kissreadsSNPS "+workdir+"/all_bcc_type0_"+str(kval)+" "+readfiles+" -i 5 -S 25 -O "+str(kval+minOverlap)+" -o "+ workdir+"/coherentType0.fa -u "+workdir+"/uncoherentType0.fa -d " + str(substitutionsSNP) + " -c 1 -n -t "+str(nbprocs)
+ commandLineType0 = INTERNAL_BINDIR+"/ks_kissreadsSNPS "+workdir+"/all_bcc_type0_"+str(kval)+" "+readfiles+" -i 5 -S 25 -O "+str(kval+minOverlap)+" -o "+ workdir+"/coherentType0.fa -u "+workdir+"/uncoherentType0.fa -d " + str(substitutionsSNP) + " -c 1 -n -t "+str(nbprocs)
if stranded:
commandLineType0+=" -x -a " + str(strandedAbsoluteThreshold) + " -r " + str(strandedRelativeThreshold) + " "
if getMappingInfo:
@@ -1154,7 +1154,7 @@ def check_read_coverage_and_sort_all_bubbles(internal_bindir, readfiles, workdir
printlg("\n" + getTimestamp() + "--> Computing read coherence and coverage...")
# no n options anymore
- commandLineType1234 = internal_bindir+"/ks_kissreadsSplice "+workdir+"/all_bcc_type1234_"+str(kval)+" "+readfiles+" -i 5 -k "+str(kval)+" -S 25 -O "+str(kval+minOverlap)+" -o "+workdir+"/coherentType1234.fa -u "+workdir+"/uncoherentType1234.fa -d " + str(substitutions) + " -c 1 -j " + countsMethods +" -l " + str(minOverlap) +" -t "+str(nbprocs)
+ commandLineType1234 = INTERNAL_BINDIR+"/ks_kissreadsSplice "+workdir+"/all_bcc_type1234_"+str(kval)+" "+readfiles+" -i 5 -k "+str(kval)+" -S 25 -O "+str(kval+minOverlap)+" -o "+workdir+"/coherentType1234.fa -u "+workdir+"/uncoherentType1234.fa -d " + str(substitutions) + " -c 1 -j " + countsMethods +" -l " + str(minOverlap) +" -t "+str(nbprocs)
if stranded:
commandLineType1234+=" -x -a " + str(strandedAbsoluteThreshold) + " -r " + str(strandedRelativeThreshold) + " "
if getMappingInfo:
@@ -1233,7 +1233,7 @@ def check_read_coverage_and_sort_all_bubbles(internal_bindir, readfiles, workdir
return nb
-def sort_all_bubbles(internal_bindir, readfiles, workdir, outdir, kval, output_snps, infix_name, shouldDoReadCoherence, verbose = False):
+def sort_all_bubbles(readfiles, workdir, outdir, kval, output_snps, infix_name, shouldDoReadCoherence, verbose = False):
print("\n" + getTimestamp() + "--> Starting Bubble Output Module")
printlg("\n" + getTimestamp() + "--> Starting Bubble Output Module")
if shouldDoReadCoherence:
@@ -1296,7 +1296,7 @@ def sort_all_bubbles(internal_bindir, readfiles, workdir, outdir, kval, output_s
return nb
-def save_bccs_from_list(bcc_list, dir_name, internal_bindir, workdir, outdir, verbose = False):
+def save_bccs_from_list(bcc_list, dir_name, workdir, outdir, verbose = False):
if not os.path.exists(outdir + dir_name):
os.mkdir(outdir + dir_name)
infofile = workdir+"/bcc/graph_info_bcc"
@@ -1306,7 +1306,7 @@ def save_bccs_from_list(bcc_list, dir_name, internal_bindir, workdir, outdir, ve
basename_nodes = workdir+"/bcc/graph_all_nodes_bcc"
for i in bcc_list:
- command_line = internal_bindir+"/ks_print_bcc "+ infofile+" "+ contents_file_edges+" "+ contents_file_nodes+" "+ basename_edges+" "+ basename_nodes\
+ command_line = INTERNAL_BINDIR+"/ks_print_bcc "+ infofile+" "+ contents_file_edges+" "+ contents_file_nodes+" "+ basename_edges+" "+ basename_nodes\
+" "+str(i)+" "\
+ outdir+ dir_name + "/graph_bcc_"+str(i)+".edges "\
+ outdir+ dir_name + "/graph_bcc_"+str(i)+".nodes"
@@ -1337,12 +1337,6 @@ def dieToFatalError (msg):
# Main
# ############################################################################
def main():
- # script_bin_dir : absolute path to the main executable (this file), computed at runtime
- # @KISSPLICE_BINDIR_TO_INTERNAL_BINDIR@ : relative path from main script to internal binaries, set by cmake
- # internal_bindir : absolute path to the secondary executables (eg ks_kissreads)
- script_bindir = os.path.dirname(os.path.abspath(sys.argv[0]))
- internal_bindir = os.path.realpath(os.path.join(script_bindir, '@KISSPLICE_BINDIR_TO_INTERNAL_BINDIR@'))
-
# ========================================================================
# Manage command line arguments
# ========================================================================
@@ -1536,7 +1530,7 @@ def main():
# ------------------------------------------------------------------------
if not nobuild:
t = time.time()
- build_graph(internal_bindir, workdir, readfiles, kval, graphfile, min_cov, options.nbprocs, options.verbose)
+ build_graph(workdir, readfiles, kval, graphfile, min_cov, options.nbprocs, options.verbose)
print("Elapsed time: ", round(time.time() - t,1), " seconds")
printlg("Elapsed time: ", round(time.time() - t,1), " seconds")
@@ -1545,7 +1539,7 @@ def main():
# ------------------------------------------------------------------------
t = time.time()
if float(options.min_relative_cov) > 0.0001:
- exec_error_removal = error_removal(internal_bindir, graphfile, nobuild, options.min_relative_cov, options.verbose)
+ exec_error_removal = error_removal(graphfile, nobuild, options.min_relative_cov, options.verbose)
else:
exec_error_removal = False
@@ -1557,7 +1551,7 @@ def main():
# Decompose and simplify DBG
# ------------------------------------------------------------------------
t = time.time()
- run_modules(internal_bindir, workdir, graphfile, kval, options.min_relative_cov, options.verbose, options.output_context, exec_error_removal)
+ run_modules(workdir, graphfile, kval, options.min_relative_cov, options.verbose, options.output_context, exec_error_removal)
print("Elapsed time: ", round(time.time() - t,1), " seconds")
printlg("Elapsed time: ", round(time.time() - t,1), " seconds")
@@ -1566,7 +1560,7 @@ def main():
# Enumerate Bubbles
# ------------------------------------------------------------------------
t = time.time()
- enumerate_all_bubbles(internal_bindir, workdir, outdir, kval, options.bval, output_snps, min_edit_dist, max_cycles, \
+ enumerate_all_bubbles(workdir, outdir, kval, options.bval, output_snps, min_edit_dist, max_cycles, \
UL_MAX, LL_MAX, LL_MIN, float(options.timeout), options.nbprocs, options.verbose, \
options.output_context, options.output_path, options.output_branch_count, options.experimental, options.max_memory)
print("Elapsed time: ", round(time.time() - t,1), " seconds")
@@ -1577,10 +1571,10 @@ def main():
# Sort and remove redundancy/low-complexity bubbles (optionnal), only keep type_1 events (optionnal)
# ------------------------------------------------------------------------
t = time.time()
- nb = sort_all_bubbles(internal_bindir, readfiles, workdir, outdir, kval, output_snps, infix_name, not only_graph, options.verbose)
+ nb = sort_all_bubbles(readfiles, workdir, outdir, kval, output_snps, infix_name, not only_graph, options.verbose)
filesToMove=redundancyAndLowComplexityRemoval(workdir, "all_bcc_type1234_"+str(kval), options.keep_rd, options.keep_lc, options.lc_ent, options.get_rd_info, options.get_lc_info, options.t1o, kval)
for fToMove in filesToMove:
- shutil.move("/".join([workdir,fToMove]), "/".join([outdir,fToMove]))
+ shutil.move(os.path.join(workdir,fToMove), os.path.join(outdir,fToMove))
print("Elapsed time: "+str(round(time.time() - t,1))+" seconds")
printlg("Elapsed time: "+str(round(time.time() - t,1))+" seconds")
@@ -1589,7 +1583,7 @@ def main():
# ------------------------------------------------------------------------
t = time.time()
if not only_graph:
- nb = check_read_coverage_and_sort_all_bubbles(internal_bindir, readfiles, workdir, outdir, kval, output_snps, infix_name, countsMethod, minOverlap, options.mism, options.mismSNP, options.get_mapping_info, options.stranded, options.strandedAbsoluteThreshold, options.strandedRelativeThreshold, options.nbprocs, options.verbose)
+ nb = check_read_coverage_and_sort_all_bubbles(readfiles, workdir, outdir, kval, output_snps, infix_name, countsMethod, minOverlap, options.mism, options.mismSNP, options.get_mapping_info, options.stranded, options.strandedAbsoluteThreshold, options.strandedRelativeThreshold, options.nbprocs, options.verbose)
print("Elapsed time: ", round(time.time() - t,1), " seconds\n")
printlg("Elapsed time: ", round(time.time() - t,1), " seconds\n")
@@ -1618,21 +1612,19 @@ def main():
bcc_dir = "/unfinished_bcc"
print("\t\t Backup files for the unfinished BCCs are in " + outdir + bcc_dir + "\n")
printlg("\t\t Backup files for the unfinished BCCs are in " + outdir + bcc_dir + "\n")
- save_bccs_from_list(unfinished_bccs, bcc_dir, internal_bindir, workdir, outdir, options.verbose)
+ save_bccs_from_list(unfinished_bccs, bcc_dir, workdir, outdir, options.verbose)
if options.keep_all_bccs:
bcc_dir = "/bcc"
print("\t\t Edge and node files of all BCCs are in " + outdir + bcc_dir + "\n")
printlg("\t\t Edge and node files of all BCCs are in " + outdir + bcc_dir + "\n")
all_bccs = find_bcc_ids_ordered_by_size(workdir)[1]
- save_bccs_from_list(all_bccs, bcc_dir, internal_bindir, workdir, outdir, options.verbose)
+ save_bccs_from_list(all_bccs, bcc_dir, workdir, outdir, options.verbose)
if options.output_path: # move paths file to outdir
- shutil.move(workdir+"/all_paths_k"+str(kval), outdir + "/all_paths_k"+str(kval))
+ shutil.move(f"{workdir}/all_paths_k{kval}", f"{outdir}/all_paths_k{kval}")
-
-
# ------------------------------------------------------------------------
# Output number of events of each type
# ------------------------------------------------------------------------
@@ -1667,7 +1659,7 @@ def main():
# Clean tmp directory
# ------------------------------------------------------------------------
logFile.close()
- cleanTmp(workdir)
+ shutil.rmtree(workdir)
if __name__ == '__main__':
main()
=====================================
readme.cmake.md
=====================================
@@ -0,0 +1,76 @@
+Some useful information for developers about CMake.
+
+Cmake has some tutorials but they are not great, like https://cmake.org/cmake/help/latest/guide/tutorial/index.html
+`man cmake` has some overviews.
+
+Cmake steps are usually :
+- *configure* : setup a temporary *build directory* with a specific build configuration, perform platform checks (for dependencies)
+- *build* : compile the code / doc / ... in the given configuration
+- optional *test* : run tests to check if the build is ok
+- *install* : copy the generated files from the temporary *build* directory to the final install directory
+
+# Configure
+Configure command
+```bash
+cmake -S source_directory/ -B build_directory/ <build options...>
+# source directory is the cloned git root
+# build directory can be placed anywhere, but very often found at <git root>/build/
+```
+
+Options are usually all given at once to the configure command.
+That are *cached* in the build directory so they can be updated, one by one or many at once.
+```bash
+cmake -S . -B build/ -DCMAKE_BUILD_TYPE=Debug -DCMAKE_INSTALL_PREFIX=~/.local/
+cmake -S . -B build/ -DCMAKE_BUILD_TYPE=Release # updates CMAKE_BUILD_TYPE but keeps the other from before
+```
+
+Cmake options:
+- `-DCMAKE_BUILD_TYPE=Debug|Release|...` : presets of debug / optimisation / warning compiler flags
+- `-DCMAKE_INSTALL_PREFIX=path/` : during install, install to `path/{bin,lib,share}` instead of `/usr/{bin,lib,share}`. Also look at path when checking for libraries or executable dependencies.
+- `-DCMAKE_C_COMPILER=gcc -DCMAKE_CXX_COMPILER=g++` : override compiler
+
+Kissplice defined options
+- `USER_GUIDE=ON|OFF` : optionally build a pdf guide (requires latex distribution).
+- `USE_BUNDLED_BCALM=ON|OFF` : by default, requires `bcalm` to be installed (system wide or in `CMAKE_INSTALL_PREFIX`). For convenience you can set this to `ON` to compile alocal bcalm and install it alongside kissplice.
+
+You can reset the configuration / build cache by deleting the build directory.
+
+# Build
+```bash
+cmake --build build_directory/
+# Useful option : -jN to use up to N cpu cores in parallel.
+```
+
+# Test
+```bash
+# No "modern" cmake command, must run make in build dir directly
+cd build_directory/
+make test
+# Alternative that prints test output on failure :
+ctest --output-on-failure
+```
+
+Kissplice also has a sample dataset :
+```bash
+build_directory/bin/kissplice -r sample_example/reads1.fa -r sample_example/reads2.fa -v
+```
+
+# Install
+```bash
+# Install to the directory selected by CMAKE_INSTALL_PREFIX at configure time
+cmake --install build_directory/
+# Alternative : override the target directory, but less reliable for some projects
+cmake --install build_directory/ --prefix override_target/
+```
+
+# Other stuff
+## Generating documentation for developers
+```bash
+cd doc/dev/
+make
+firefox index.html
+```
+
+## Profiling
+Use https://perf.wiki.kernel.org/ on a binary with debug symbols (build type = Debug or RelWithDebInfo).
+Profiling build type has been removed as gprof is inconvenient to use, but if you still want to use it manually add `-pg` to C/CXX flags.
=====================================
tests/CMakeLists.txt
=====================================
@@ -1,4 +1,2 @@
ADD_SUBDIRECTORY(integration_tests)
-ADD_SUBDIRECTORY(functional_tests)
-
-add_test(IsBcalmStartable "${BUILD_INTERNAL_BINDIR}/bcalm" -help)
\ No newline at end of file
+ADD_SUBDIRECTORY(functional_tests)
\ No newline at end of file
=====================================
thirdparty/bcalm_redirect.in deleted
=====================================
@@ -1,3 +0,0 @@
-#!/usr/bin/env bash
-# Redirect to system bcalm found during cmake configuration time
-exec "@SYSTEM_BCALM_PATH@" "$@"
=====================================
thirdparty/kissreads/src/CMakeLists.txt
=====================================
@@ -1,13 +1,6 @@
# kissreads requirements
find_package(ZLIB REQUIRED) # imports the ZLIB::ZLIB target
find_package(OpenMP) # for CMake 3.9+, import targets OpenMP::OpenMP_<lang>
-if(DEFINED OpenMP_CXX_FLAGS AND NOT TARGET OpenMP::OpenMP_CXX)
- # For older CMake emulate target (https://cliutils.gitlab.io/modern-cmake/chapters/packages/OpenMP.html)
- find_package(Threads REQUIRED)
- add_library(OpenMP::OpenMP_CXX IMPORTED INTERFACE)
- set_property(TARGET OpenMP::OpenMP_CXX PROPERTY INTERFACE_COMPILE_OPTIONS ${OpenMP_CXX_FLAGS})
- set_property(TARGET OpenMP::OpenMP_CXX PROPERTY INTERFACE_LINK_LIBRARIES ${OpenMP_CXX_FLAGS} Threads::Threads)
-endif()
# Get parent directory for a nicer alternative to "../include"
get_property(PARENT_DIRECTORY DIRECTORY PROPERTY PARENT_DIRECTORY)
=====================================
thirdparty/kissreads/src/extension_algorithm.cpp
=====================================
@@ -370,7 +370,7 @@ float read_coherence (gzFile reads_file,
uint64_t nb_seeds;
// working variables
- int read_len, i, ii, starter_id, pwi, stop, read_coherence;
+ int read_len, starter_id, pwi, stop, read_coherence;
long int read_number=0;
// map of starter -> position (for each read and direction, stores the starter and position already tested.)
@@ -424,7 +424,7 @@ float read_coherence (gzFile reads_file,
// char validSeed;
for(direction=0;direction<2;direction++){ // try the two possible directions of the read
toinit=1; // we have to init a new seed
- for (i=0;i<stop;i++){ // for all possible seed on the read
+ for (int i=0;i<stop;i++){ // for all possible seed on the read
if(toinit) {
coded_seed=codeSeed(read+i); // init the seed
toinit=0;
@@ -436,7 +436,7 @@ float read_coherence (gzFile reads_file,
// for each occurrence of this seed on the starter:
- for (ii=offset_seed; ii<offset_seed+nb_seeds; ii++) {
+ for (uint64_t ii=offset_seed; ii<offset_seed+nb_seeds; ii++) {
couple * value = &(seed_table[ii]);
// printf("value->a = %d\n", value->a); //DEB
starter = all_starters[value->a]->w;
=====================================
thirdparty/kissreads/src/fragment_index.cpp
=====================================
@@ -92,11 +92,10 @@ void index_one_seed(const char * seed, const int fragment_id, const int position
char * strdup_upper_case(char * in){
// count number of upper case letters in "in"
int count =0;
- int i;
- for(i=0;i<strlen(in);i++) if(in[i]>='A' && in[i]<='Z') count++;
+ for(size_t i=0;i<strlen(in);i++) if(in[i]>='A' && in[i]<='Z') count++;
char * temp = (char *) malloc(sizeof(char)*(count+1)); test_alloc(temp);
int j=0;
- for(i=0;i<strlen(in);i++) if(in[i]>='A' && in[i]<='Z') temp[j++]=in[i];
+ for(size_t i=0;i<strlen(in);i++) if(in[i]>='A' && in[i]<='Z') temp[j++]=in[i];
temp[j]='\0';
return temp;
}
@@ -105,13 +104,12 @@ char * strdup_upper_case(char * in){
char * strdup_first_lower(char * in){
// count number of first lower case letters in "in"
int count =0;
- int i;
- for(i=0;i<strlen(in);i++)
+ for(size_t i=0;i<strlen(in);i++)
if(in[i]>='a' && in[i]<='z') count++;
else break;
char * temp = (char *) malloc(sizeof(char)*(count+1)); test_alloc(temp);
int j=0;
- for(i=0;i<strlen(in);i++)
+ for(size_t i=0;i<strlen(in);i++)
if(in[i]>='a' && in[i]<='z') temp[j++]=in[i];
else break;
temp[j]='\0';
@@ -122,8 +120,7 @@ char * strdup_first_lower(char * in){
char * strdup_last_lower(char * in){
// count number of first lower case letters in "in"
int count =0;
- int i;
- for(i=strlen(in)-1;i>=0;i--)
+ for(int i=strlen(in)-1;i>=0;i--)
if(in[i]>='a' && in[i]<='z') {
count++;
}
@@ -131,7 +128,7 @@ char * strdup_last_lower(char * in){
char * temp = (char *) malloc(sizeof(char)*(count+1)); test_alloc(temp);
int j=count-1;
- for(i=strlen(in)-1;i>=0;i--)
+ for(int i=strlen(in)-1;i>=0;i--)
if(in[i]>='a' && in[i]<='z'){
temp[j--]=in[i];
}
@@ -152,8 +149,7 @@ p_fragment_info * index_starters_from_input_file (const int k, int nb_events_per
test_alloc(temp_fragment2);
int witness; // is a fragment was read ?
kmer_type coded_seed;
- int i,j,z,stop;
- char validSeed;
+ int i,stop;
char * line = (char*) malloc(sizeof(char)*1048576);
uint64_t total_seeds = 0 ;
@@ -225,9 +221,9 @@ p_fragment_info * index_starters_from_input_file (const int k, int nb_events_per
all_starters[fragment_id]->sum_quality_per_position[i] = (int *) malloc (strlen(all_starters[fragment_id]->w)*sizeof(int)); test_alloc(all_starters[fragment_id]->sum_quality_per_position[i]);
#endif
- for(z=0;z<strlen(all_starters[fragment_id]->w); z++) all_starters[fragment_id]->read_coherent_positions[i][z]=0;
- for(z=0;z<strlen(all_starters[fragment_id]->w); z++) all_starters[fragment_id]->read_coherent_positions[i][z]=0;
- for(z=0;z<strlen(all_starters[fragment_id]->w); z++) all_starters[fragment_id]->sum_quality_per_position[i][z]=0;
+ for(size_t z=0;z<strlen(all_starters[fragment_id]->w); z++) all_starters[fragment_id]->read_coherent_positions[i][z]=0;
+ for(size_t z=0;z<strlen(all_starters[fragment_id]->w); z++) all_starters[fragment_id]->read_coherent_positions[i][z]=0;
+ for(size_t z=0;z<strlen(all_starters[fragment_id]->w); z++) all_starters[fragment_id]->sum_quality_per_position[i][z]=0;
all_starters[fragment_id]->number_mapped_reads[i]=0;
=====================================
thirdparty/kissreads/src/libchash.cpp
=====================================
@@ -577,7 +577,7 @@ static SparseBucket *SparseNextBucket(SparseIterator *iter)
return iter->binSparse[iter->posGroup].binSparse + iter->posOffset;
iter->posOffset = 0; /* start the next group */
- for ( iter->posGroup++; iter->posGroup < SPARSE_GROUPS(iter->cBuckets);
+ for ( iter->posGroup++; iter->posGroup < static_cast<long>(SPARSE_GROUPS(iter->cBuckets));
iter->posGroup++ )
if ( iter->binSparse[iter->posGroup].cOccupied > 0 )
return iter->binSparse[iter->posGroup].binSparse; /* + 0 */
=====================================
thirdparty/kissreads/src/outputs.cpp
=====================================
@@ -570,7 +570,6 @@ void print_couple_i(const char * comment, FILE* out, const p_fragment_info * res
* prints a couple using the reads starting position instead of coverage per position
*/
void print_quadruplet_i(FILE* out, const p_fragment_info * results_against_set, int cycle_id, int number_of_read_sets, int qual){
- int j;
int cov_1[number_of_read_sets] ; // coverage path 1 au
int cov_2[number_of_read_sets] ; // coverage path 2 vb
int cov_3[number_of_read_sets] ; // coverage path 3 av'
@@ -592,7 +591,7 @@ void print_quadruplet_i(FILE* out, const p_fragment_info * results_against_set,
qual_2[read_set_id] = 0;
qual_3[read_set_id] = 0;
qual_4[read_set_id] = 0;
- for (j=kmer_size-1;j<=strlen(results_against_set[cycle_id]->w)-kmer_size;j++){
+ for (size_t j=kmer_size-1;j<=strlen(results_against_set[cycle_id]->w)-kmer_size;j++){
#ifdef CHARQUAL // FIXME: IT SHOULKD BE THE OPOSIT NO ? (PIERRE APRL 2013)
if(results_against_set[cycle_id]->read_coherent_positions[read_set_id][j]) qual_1[read_set_id] = qual_1[read_set_id] + results_against_set[cycle_id]->sum_quality_per_position[read_set_id][j];
if(results_against_set[cycle_id+1]->read_coherent_positions[read_set_id][j]) qual_2[read_set_id] = qual_2[read_set_id] + results_against_set[cycle_id+1]->sum_quality_per_position[read_set_id][j];
@@ -851,13 +850,12 @@ void print_sequence_i(FILE* out, const p_fragment_info * results_against_set, in
int sum[number_of_read_sets];
int avg[number_of_read_sets];
int read_set_id;
- int j;
if( qual ){
// we are providing results for generic dataset
for(read_set_id=0;read_set_id<number_of_read_sets;read_set_id++){
avg[read_set_id] = 0;
- for (j=kmer_size-1;j<=strlen(results_against_set[cycle_id]->w)-kmer_size;j++){
+ for (size_t j=kmer_size-1;j<=strlen(results_against_set[cycle_id]->w)-kmer_size;j++){
if(results_against_set[cycle_id]->read_coherent_positions[read_set_id][j]){
#ifdef CHARQUAL
avg[read_set_id] = avg[read_set_id] + results_against_set[cycle_id]->sum_quality_per_position[read_set_id][j];
View it on GitLab: https://salsa.debian.org/med-team/kissplice/-/compare/76221547bcee69f6d360f668d7800c93d8a6bbf6...a094828d0124c16f97d09787a230e8b5fa6fec17
--
This project does not include diff previews in email notifications.
View it on GitLab: https://salsa.debian.org/med-team/kissplice/-/compare/76221547bcee69f6d360f668d7800c93d8a6bbf6...a094828d0124c16f97d09787a230e8b5fa6fec17
You're receiving this email because of your account on salsa.debian.org.
-------------- next part --------------
An HTML attachment was scrubbed...
URL: <http://alioth-lists.debian.net/pipermail/debian-med-commit/attachments/20240523/a85f6b48/attachment-0001.htm>
More information about the debian-med-commit
mailing list