[med-svn] [libzstd] 01/04: Imported Upstream version 0.4.7

Kevin Murray daube-guest at moszumanska.debian.org
Tue Feb 2 06:39:15 UTC 2016


This is an automated email from the git hooks/post-receive script.

daube-guest pushed a commit to branch master
in repository libzstd.

commit a974e364ef0f6e589d052c23908e5713d0bf35e1
Author: Kevin Murray <spam at kdmurray.id.au>
Date:   Tue Feb 2 01:20:50 2016 +0200

    Imported Upstream version 0.4.7
---
 .gitignore                                         |   8 +-
 .travis.yml                                        |   9 +-
 Makefile                                           |   7 +-
 NEWS                                               |  12 +
 Makefile => contrib/cmake/CMakeLists.txt           | 100 ++---
 .../CMakeModules/AddExtraCompilationFlags.cmake    | 331 ++++++++++++++++
 contrib/cmake/cmake_uninstall.cmake.in             |  22 ++
 contrib/cmake/lib/CMakeLists.txt                   | 206 ++++++++++
 contrib/cmake/programs/CMakeLists.txt              |  84 ++++
 images/CSpeed.png                                  | Bin 36039 -> 35874 bytes
 images/DSpeed.png                                  | Bin 9676 -> 9143 bytes
 lib/Makefile                                       |  23 +-
 lib/bitstream.h                                    |   4 +-
 lib/{error.h => error_private.h}                   |  66 ++--
 lib/error_public.h                                 |  70 ++++
 lib/fse.c                                          |  97 ++---
 lib/fse.h                                          |  46 +--
 lib/fse_static.h                                   |  53 +--
 lib/huff0.c                                        |  79 ++--
 lib/huff0.h                                        |  16 +-
 lib/huff0_static.h                                 |  51 ++-
 lib/legacy/zstd_legacy.h                           |   6 +-
 lib/legacy/zstd_v01.c                              |  43 +-
 lib/legacy/zstd_v02.c                              |  61 +--
 lib/legacy/zstd_v03.c                              |  61 +--
 lib/zstd.h                                         |  20 +-
 lib/zstd_buffered.c                                |   7 +-
 lib/zstd_compress.c                                | 438 ++++++++++-----------
 lib/zstd_decompress.c                              |  64 ++-
 lib/zstd_internal.h                                |   2 +-
 lib/zstd_static.h                                  |  93 +++--
 programs/Makefile                                  |  13 +-
 programs/bench.c                                   |  38 +-
 programs/fileio.c                                  |  12 +-
 programs/fuzzer.c                                  | 135 ++++++-
 programs/xxhash.c                                  | 163 ++++----
 programs/xxhash.h                                  | 146 ++++---
 programs/zbufftest.c                               |   8 +-
 visual/2013/zstdlib/zstdlib.rc                     | Bin 4610 -> 5204 bytes
 visual/2013/zstdlib/zstdlib.vcxproj                |   1 -
 visual/2013/zstdlib/zstdlib.vcxproj.filters        | 155 ++++----
 41 files changed, 1876 insertions(+), 874 deletions(-)

diff --git a/.gitignore b/.gitignore
index 072fe39..8641d7f 100644
--- a/.gitignore
+++ b/.gitignore
@@ -37,4 +37,10 @@ ipch/
 *.opendb
 *.opensdf
 *.sdf
-*.cachefile
\ No newline at end of file
+*.cachefile
+
+# IDEA solution files
+*.idea
+
+# Other files
+.directory
diff --git a/.travis.yml b/.travis.yml
index 2d5a73a..065e6f1 100644
--- a/.travis.yml
+++ b/.travis.yml
@@ -1,6 +1,5 @@
 language: c
-compiler: gcc
-script: make $ZSTD_TRAVIS_CI_ENV
+
 before_install:
   - sudo apt-get update  -qq
   - sudo apt-get install -qq gcc-arm-linux-gnueabi
@@ -11,6 +10,7 @@ before_install:
 
 env:
   - ZSTD_TRAVIS_CI_ENV=travis-install
+  - ZSTD_TRAVIS_CI_ENV=cmaketest
   - ZSTD_TRAVIS_CI_ENV=clangtest  
   - ZSTD_TRAVIS_CI_ENV=gpptest  
   - ZSTD_TRAVIS_CI_ENV=armtest  
@@ -21,6 +21,11 @@ env:
   - ZSTD_TRAVIS_CI_ENV=asan
   - ZSTD_TRAVIS_CI_ENV=asan32
   - ZSTD_TRAVIS_CI_ENV="-C programs valgrindTest"  
+  
+compiler: gcc
+
+script: 
+  - make $ZSTD_TRAVIS_CI_ENV
 
 matrix:
   fast_finish: true
diff --git a/Makefile b/Makefile
index a741034..88de5d0 100644
--- a/Makefile
+++ b/Makefile
@@ -31,8 +31,8 @@
 #  - Public forum : https://groups.google.com/forum/#!forum/lz4c
 # ################################################################
 
-# Version number
-export VERSION := 0.4.5
+# force a version number : uncomment below export (otherwise, default to the one declared into zstd.h)
+#export VERSION := 0.4.6
 
 PRGDIR  = programs
 ZSTDDIR = lib
@@ -79,6 +79,9 @@ travis-install:
 test:
 	$(MAKE) -C $(PRGDIR) $@
 
+cmaketest:
+	cd contrib/cmake ; cmake . ; $(MAKE)
+
 clangtest: clean
 	clang -v
 	$(MAKE) all CC=clang MOREFLAGS="-Werror -Wconversion -Wno-sign-conversion"
diff --git a/NEWS b/NEWS
index ee9a458..8aaf02d 100644
--- a/NEWS
+++ b/NEWS
@@ -1,3 +1,15 @@
+v0.4.7
+Improved : small compression speed improvement in HC mode
+Changed : `zstd_decompress.c` has ZSTD_LEGACY_SUPPORT to 0 by default
+fix : bt search bug
+
+v0.4.6
+fix : fast compression mode on Windows
+New : cmake configuration file, thanks to Artyom Dymchenko
+Improved : high compression mode on repetitive data
+New : block-level API
+New : ZSTD_duplicateCCtx()
+
 v0.4.5
 new : -m/--multiple : compress/decompress multiple files
 
diff --git a/Makefile b/contrib/cmake/CMakeLists.txt
similarity index 52%
copy from Makefile
copy to contrib/cmake/CMakeLists.txt
index a741034..3687c9e 100644
--- a/Makefile
+++ b/contrib/cmake/CMakeLists.txt
@@ -31,75 +31,31 @@
 #  - Public forum : https://groups.google.com/forum/#!forum/lz4c
 # ################################################################
 
-# Version number
-export VERSION := 0.4.5
-
-PRGDIR  = programs
-ZSTDDIR = lib
-
-# Define nul output
-ifneq (,$(filter Windows%,$(OS)))
-VOID = nul
-else
-VOID = /dev/null
-endif
-
-.PHONY: default all zstdprogram clean install uninstall travis-install test clangtest gpptest armtest usan asan uasan
-
-default: zstdprogram
-
-all: 
-	$(MAKE) -C $(ZSTDDIR) $@
-	$(MAKE) -C $(PRGDIR) $@
-
-zstdprogram:
-	$(MAKE) -C $(PRGDIR)
-
-clean:
-	@$(MAKE) -C $(ZSTDDIR) $@ > $(VOID)
-	@$(MAKE) -C $(PRGDIR) $@ > $(VOID)
-	@echo Cleaning completed
-
-
-#------------------------------------------------------------------------
-#make install is validated only for Linux, OSX, kFreeBSD and Hurd targets
-ifneq (,$(filter $(shell uname),Linux Darwin GNU/kFreeBSD GNU))
-
-install:
-	$(MAKE) -C $(ZSTDDIR) $@
-	$(MAKE) -C $(PRGDIR) $@
-
-uninstall:
-	$(MAKE) -C $(ZSTDDIR) $@
-	$(MAKE) -C $(PRGDIR) $@
-
-travis-install:
-	$(MAKE) install PREFIX=~/install_test_dir
-
-test:
-	$(MAKE) -C $(PRGDIR) $@
-
-clangtest: clean
-	clang -v
-	$(MAKE) all CC=clang MOREFLAGS="-Werror -Wconversion -Wno-sign-conversion"
-
-gpptest: clean
-	$(MAKE) all CC=g++ CFLAGS="-O3 -Wall -Wextra -Wundef -Wshadow -Wcast-align -Werror"
-
-armtest: clean
-	$(MAKE) -C $(ZSTDDIR) all CC=arm-linux-gnueabi-gcc MOREFLAGS="-Werror"
-	$(MAKE) -C $(PRGDIR) CC=arm-linux-gnueabi-gcc MOREFLAGS="-Werror -static"
-
-usan: clean
-	$(MAKE) test CC=clang MOREFLAGS="-g -fsanitize=undefined"
-
-asan: clean
-	$(MAKE) test CC=clang MOREFLAGS="-g -fsanitize=address"
-
-asan32: clean
-	$(MAKE) -C $(PRGDIR) test32 CC=clang MOREFLAGS="-g -fsanitize=address"
-
-uasan: clean
-	$(MAKE) test CC=clang MOREFLAGS="-g -fsanitize=address -fsanitize=undefined"
-
-endif
+PROJECT(zstd)
+CMAKE_MINIMUM_REQUIRED(VERSION 2.8.7)
+
+OPTION(ZSTD_LEGACY_SUPPORT "LEGACY SUPPORT" OFF)
+
+IF (ZSTD_LEGACY_SUPPORT)
+    MESSAGE(STATUS "ZSTD_LEGACY_SUPPORT defined!")
+    ADD_DEFINITIONS(-DZSTD_LEGACY_SUPPORT=1)
+ELSE (ZSTD_LEGACY_SUPPORT)
+    MESSAGE(STATUS "ZSTD_LEGACY_SUPPORT not defined!")
+    ADD_DEFINITIONS(-DZSTD_LEGACY_SUPPORT=0)
+ENDIF (ZSTD_LEGACY_SUPPORT)
+
+# TARGET_INCLUDE_DIRECTORIES can use in version 2.8.11 and greate
+IF ((${CMAKE_MAJOR_VERSION} EQUAL 2) AND (${CMAKE_MINOR_VERSION} EQUAL 8) AND (${CMAKE_PATCH_VERSION} LESS 11))
+    SET(WORKAROUND_OUTDATED_CODE_STYLE TRUE)
+ELSE ()
+    SET(WORKAROUND_OUTDATED_CODE_STYLE FALSE)
+ENDIF ((${CMAKE_MAJOR_VERSION} EQUAL 2) AND (${CMAKE_MINOR_VERSION} EQUAL 8) AND (${CMAKE_PATCH_VERSION} LESS 11))
+
+ADD_SUBDIRECTORY(lib)
+ADD_SUBDIRECTORY(programs)
+
+#-----------------------------------------------------------------------------
+# Add extra compilation flags
+#-----------------------------------------------------------------------------
+INCLUDE(CMakeModules/AddExtraCompilationFlags.cmake)
+ADD_EXTRA_COMPILATION_FLAGS()
diff --git a/contrib/cmake/CMakeModules/AddExtraCompilationFlags.cmake b/contrib/cmake/CMakeModules/AddExtraCompilationFlags.cmake
new file mode 100644
index 0000000..2d59fab
--- /dev/null
+++ b/contrib/cmake/CMakeModules/AddExtraCompilationFlags.cmake
@@ -0,0 +1,331 @@
+MACRO(ADD_EXTRA_COMPILATION_FLAGS)
+    include(CheckCXXCompilerFlag)
+    include(CheckCCompilerFlag)
+    if (CMAKE_COMPILER_IS_GNUCXX OR MINGW) #Not only UNIX but also WIN32 for MinGW
+
+        set(POSITION_INDEPENDENT_CODE_FLAG "-fPIC")
+        CHECK_C_COMPILER_FLAG(${POSITION_INDEPENDENT_CODE_FLAG} POSITION_INDEPENDENT_CODE_FLAG_ALLOWED)
+        if (POSITION_INDEPENDENT_CODE_FLAG_ALLOWED)
+            MESSAGE("Compiler flag ${POSITION_INDEPENDENT_CODE_FLAG} allowed")
+            set(ACTIVATE_POSITION_INDEPENDENT_CODE_FLAG "ON" CACHE BOOL "activate -fPIC flag")
+        else ()
+            MESSAGE("Compiler flag ${POSITION_INDEPENDENT_CODE_FLAG} not allowed")
+        endif (POSITION_INDEPENDENT_CODE_FLAG_ALLOWED)
+
+        set(WARNING_UNDEF "-Wundef")
+        CHECK_C_COMPILER_FLAG(${WARNING_UNDEF} WARNING_UNDEF_ALLOWED)
+        if (WARNING_UNDEF_ALLOWED)
+            MESSAGE("Compiler flag ${WARNING_UNDEF} allowed")
+            set(ACTIVATE_WARNING_UNDEF "ON" CACHE BOOL "activate -Wundef flag")
+        else ()
+            MESSAGE("Compiler flag ${WARNING_UNDEF} not allowed")
+        endif (WARNING_UNDEF_ALLOWED)
+
+        set(WARNING_SHADOW "-Wshadow")
+        CHECK_C_COMPILER_FLAG(${WARNING_SHADOW} WARNING_SHADOW_ALLOWED)
+        if (WARNING_SHADOW_ALLOWED)
+            MESSAGE("Compiler flag ${WARNING_SHADOW} allowed")
+            set(ACTIVATE_WARNING_SHADOW "ON" CACHE BOOL "activate -Wshadow flag")
+        else ()
+            MESSAGE("Compiler flag ${WARNING_SHADOW} not allowed")
+        endif (WARNING_SHADOW_ALLOWED)
+
+        set(WARNING_CAST_ALIGN "-Wcast-align")
+        CHECK_C_COMPILER_FLAG(${WARNING_CAST_ALIGN} WARNING_CAST_ALIGN_ALLOWED)
+        if (WARNING_CAST_ALIGN_ALLOWED)
+            MESSAGE("Compiler flag ${WARNING_CAST_ALIGN} allowed")
+            set(ACTIVATE_WARNING_CAST_ALIGN "ON" CACHE BOOL "activate -Wcast-align flag")
+        else ()
+            MESSAGE("Compiler flag ${WARNING_CAST_ALIGN} not allowed")
+        endif (WARNING_CAST_ALIGN_ALLOWED)
+
+        set(WARNING_CAST_QUAL "-Wcast-qual")
+        CHECK_C_COMPILER_FLAG(${WARNING_CAST_QUAL} WARNING_CAST_QUAL_ALLOWED)
+        if (WARNING_CAST_QUAL_ALLOWED)
+            MESSAGE("Compiler flag ${WARNING_CAST_QUAL} allowed")
+            set(ACTIVATE_WARNING_CAST_QUAL "ON" CACHE BOOL "activate -Wcast-qual flag")
+        else ()
+            MESSAGE("Compiler flag ${WARNING_CAST_QUAL} not allowed")
+        endif (WARNING_CAST_QUAL_ALLOWED)
+
+        set(WARNING_STRICT_PROTOTYPES "-Wstrict-prototypes")
+        CHECK_C_COMPILER_FLAG(${WARNING_STRICT_PROTOTYPES} WARNING_STRICT_PROTOTYPES_ALLOWED)
+        if (WARNING_STRICT_PROTOTYPES_ALLOWED)
+            MESSAGE("Compiler flag ${WARNING_STRICT_PROTOTYPES} allowed")
+            set(ACTIVATE_WARNING_STRICT_PROTOTYPES "ON" CACHE BOOL "activate -Wstrict-prototypes flag")
+        else ()
+            MESSAGE("Compiler flag ${WARNING_STRICT_PROTOTYPES} not allowed")
+        endif (WARNING_STRICT_PROTOTYPES_ALLOWED)
+
+        set(WARNING_ALL "-Wall")
+        CHECK_C_COMPILER_FLAG(${WARNING_ALL} WARNING_ALL_ALLOWED)
+        if (WARNING_ALL_ALLOWED)
+            MESSAGE("Compiler flag ${WARNING_ALL} allowed")
+            set(ACTIVATE_WARNING_ALL "ON" CACHE BOOL "activate -Wall flag")
+        else ()
+            MESSAGE("Compiler flag ${WARNING_ALL} not allowed")
+        endif (WARNING_ALL_ALLOWED)
+
+        set(WARNING_EXTRA "-Wextra")
+        CHECK_C_COMPILER_FLAG(${WARNING_EXTRA} WARNING_EXTRA_ALLOWED)
+        if (WARNING_EXTRA_ALLOWED)
+            MESSAGE("Compiler flag ${WARNING_EXTRA} allowed")
+            set(ACTIVATE_WARNING_EXTRA "ON" CACHE BOOL "activate -Wextra flag")
+        else ()
+            MESSAGE("Compiler flag ${WARNING_EXTRA} not allowed")
+        endif (WARNING_EXTRA_ALLOWED)
+
+        set(WARNING_FLOAT_EQUAL "-Wfloat-equal")
+        CHECK_C_COMPILER_FLAG(${WARNING_FLOAT_EQUAL} WARNING_FLOAT_EQUAL_ALLOWED)
+        if (WARNING_FLOAT_EQUAL_ALLOWED)
+            MESSAGE("Compiler flag ${WARNING_FLOAT_EQUAL} allowed")
+            set(ACTIVATE_WARNING_FLOAT_EQUAL "OFF" CACHE BOOL "activate -Wfloat-equal flag")
+        else ()
+            MESSAGE("Compiler flag ${WARNING_FLOAT_EQUAL} not allowed")
+        endif (WARNING_FLOAT_EQUAL_ALLOWED)
+
+        set(WARNING_SIGN_CONVERSION "-Wsign-conversion")
+        CHECK_C_COMPILER_FLAG(${WARNING_SIGN_CONVERSION} WARNING_SIGN_CONVERSION_ALLOWED)
+        if (WARNING_SIGN_CONVERSION_ALLOWED)
+            MESSAGE("Compiler flag ${WARNING_SIGN_CONVERSION} allowed")
+            set(ACTIVATE_WARNING_SIGN_CONVERSION "OFF" CACHE BOOL "activate -Wsign-conversion flag")
+        else ()
+            MESSAGE("Compiler flag ${WARNING_SIGN_CONVERSION} not allowed")
+        endif (WARNING_SIGN_CONVERSION_ALLOWED)
+
+        if (ACTIVATE_POSITION_INDEPENDENT_CODE_FLAG)
+            list(APPEND CMAKE_C_FLAGS ${POSITION_INDEPENDENT_CODE_FLAG})
+        else ()
+            string(REPLACE ${POSITION_INDEPENDENT_CODE_FLAG} "" CMAKE_C_FLAGS "${POSITION_INDEPENDENT_CODE_FLAG}")
+        endif (ACTIVATE_POSITION_INDEPENDENT_CODE_FLAG)
+
+        if (ACTIVATE_WARNING_UNDEF)
+            list(APPEND CMAKE_CXX_FLAGS ${WARNING_UNDEF})
+            list(APPEND CMAKE_C_FLAGS ${WARNING_UNDEF})
+        else ()
+            string(REPLACE ${WARNING_UNDEF} "" CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS}")
+            string(REPLACE ${WARNING_UNDEF} "" CMAKE_C_FLAGS "${CMAKE_C_FLAGS}")
+        endif (ACTIVATE_WARNING_UNDEF)
+
+        if (ACTIVATE_WARNING_SHADOW)
+            list(APPEND CMAKE_CXX_FLAGS ${WARNING_SHADOW})
+            list(APPEND CMAKE_C_FLAGS ${WARNING_SHADOW})
+        else ()
+            string(REPLACE ${WARNING_SHADOW} "" CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS}")
+            string(REPLACE ${WARNING_SHADOW} "" CMAKE_C_FLAGS "${CMAKE_C_FLAGS}")
+        endif (ACTIVATE_WARNING_SHADOW)
+
+        if (ACTIVATE_WARNING_CAST_QUAL)
+            list(APPEND CMAKE_CXX_FLAGS ${WARNING_CAST_QUAL})
+            list(APPEND CMAKE_C_FLAGS ${WARNING_CAST_QUAL})
+        else ()
+            string(REPLACE ${WARNING_CAST_QUAL} "" CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS}")
+            string(REPLACE ${WARNING_CAST_QUAL} "" CMAKE_C_FLAGS "${CMAKE_C_FLAGS}")
+        endif (ACTIVATE_WARNING_CAST_QUAL)
+
+        if (ACTIVATE_WARNING_CAST_ALIGN)
+            list(APPEND CMAKE_CXX_FLAGS ${WARNING_CAST_ALIGN})
+            list(APPEND CMAKE_C_FLAGS ${WARNING_CAST_ALIGN})
+        else ()
+            string(REPLACE ${WARNING_CAST_ALIGN} "" CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS}")
+            string(REPLACE ${WARNING_CAST_ALIGN} "" CMAKE_C_FLAGS "${CMAKE_C_FLAGS}")
+        endif (ACTIVATE_WARNING_CAST_ALIGN)
+
+        if (ACTIVATE_WARNING_STRICT_PROTOTYPES)
+            list(APPEND CMAKE_CXX_FLAGS ${WARNING_STRICT_PROTOTYPES})
+            list(APPEND CMAKE_C_FLAGS ${WARNING_STRICT_PROTOTYPES})
+        else ()
+            string(REPLACE ${WARNING_STRICT_PROTOTYPES} "" CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS}")
+            string(REPLACE ${WARNING_STRICT_PROTOTYPES} "" CMAKE_C_FLAGS "${CMAKE_C_FLAGS}")
+        endif (ACTIVATE_WARNING_STRICT_PROTOTYPES)
+
+        if (ACTIVATE_WARNING_ALL)
+            list(APPEND CMAKE_CXX_FLAGS ${WARNING_ALL})
+            list(APPEND CMAKE_C_FLAGS ${WARNING_ALL})
+        else ()
+            string(REPLACE ${WARNING_ALL} "" CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS}")
+            string(REPLACE ${WARNING_ALL} "" CMAKE_C_FLAGS "${CMAKE_C_FLAGS}")
+        endif (ACTIVATE_WARNING_ALL)
+
+        if (ACTIVATE_WARNING_EXTRA)
+            list(APPEND CMAKE_CXX_FLAGS ${WARNING_EXTRA})
+            list(APPEND CMAKE_C_FLAGS ${WARNING_EXTRA})
+        else ()
+            string(REPLACE ${WARNING_EXTRA} "" CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS}")
+            string(REPLACE ${WARNING_EXTRA} "" CMAKE_C_FLAGS "${CMAKE_C_FLAGS}")
+        endif (ACTIVATE_WARNING_EXTRA)
+
+        if (ACTIVATE_WARNING_FLOAT_EQUAL)
+            list(APPEND CMAKE_CXX_FLAGS ${WARNING_FLOAT_EQUAL})
+            list(APPEND CMAKE_C_FLAGS ${WARNING_FLOAT_EQUAL})
+        else ()
+            string(REPLACE ${WARNING_FLOAT_EQUAL} "" CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS}")
+            string(REPLACE ${WARNING_FLOAT_EQUAL} "" CMAKE_C_FLAGS "${CMAKE_C_FLAGS}")
+        endif (ACTIVATE_WARNING_FLOAT_EQUAL)
+
+        if (ACTIVATE_WARNING_SIGN_CONVERSION)
+            list(APPEND CMAKE_CXX_FLAGS ${WARNING_SIGN_CONVERSION})
+            list(APPEND CMAKE_C_FLAGS ${WARNING_SIGN_CONVERSION})
+        else ()
+            string(REPLACE ${WARNING_SIGN_CONVERSION} "" CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS}")
+            string(REPLACE ${WARNING_SIGN_CONVERSION} "" CMAKE_C_FLAGS "${CMAKE_C_FLAGS}")
+        endif (ACTIVATE_WARNING_SIGN_CONVERSION)
+
+        #Set c++11 by default
+        list(APPEND CMAKE_CXX_FLAGS "-std=c++11")
+
+        #Set c99 by default
+        list(APPEND CMAKE_C_FLAGS "-std=c99")
+
+    elseif (MSVC)
+        # Add specific compilation flags for Windows Visual
+
+        set(WARNING_ALL "/Wall")
+        CHECK_C_COMPILER_FLAG(${WARNING_ALL} WARNING_ALL_ALLOWED)
+        if (WARNING_ALL_ALLOWED)
+            MESSAGE("Compiler flag ${WARNING_ALL} allowed")
+            set(ACTIVATE_WARNING_ALL "OFF" CACHE BOOL "activate /Wall flag")
+        else ()
+            MESSAGE("Compiler flag ${WARNING_ALL} not allowed")
+        endif (WARNING_ALL_ALLOWED)
+        	
+        set(RTC_FLAG "/RTC1")
+        CHECK_C_COMPILER_FLAG(${RTC_FLAG} RTC_FLAG_ALLOWED)
+        if (RTC_FLAG_ALLOWED)
+            MESSAGE("Compiler flag ${RTC_FLAG} allowed")
+            set(ACTIVATE_RTC_FLAG "ON" CACHE BOOL "activate /RTC1 flag")
+        else ()
+            MESSAGE("Compiler flag ${RTC_FLAG} not allowed")
+        endif (RTC_FLAG_ALLOWED)
+        	
+        set(ZC_FLAG "/Zc:forScope")
+        CHECK_C_COMPILER_FLAG(${ZC_FLAG} ZC_FLAG_ALLOWED)
+        if (ZC_FLAG_ALLOWED)
+            MESSAGE("Compiler flag ${ZC_FLAG} allowed")
+            set(ACTIVATE_ZC_FLAG "ON" CACHE BOOL "activate /Zc:forScope flag")
+        else ()
+            MESSAGE("Compiler flag ${ZC_FLAG} not allowed")
+        endif (ZC_FLAG_ALLOWED)
+        	
+        set(GD_FLAG "/Gd")
+        CHECK_C_COMPILER_FLAG(${GD_FLAG} GD_FLAG_ALLOWED)
+        if (GD_FLAG_ALLOWED)
+            MESSAGE("Compiler flag ${GD_FLAG} allowed")
+            set(ACTIVATE_GD_FLAG "ON" CACHE BOOL "activate /Gd flag")
+        else ()
+            MESSAGE("Compiler flag ${GD_FLAG} not allowed")
+        endif (GD_FLAG_ALLOWED)
+        	
+        set(ANALYZE_FLAG "/analyze:stacksize25000")
+        CHECK_C_COMPILER_FLAG(${ANALYZE_FLAG} ANALYZE_FLAG_ALLOWED)
+        if (ANALYZE_FLAG_ALLOWED)
+            MESSAGE("Compiler flag ${ANALYZE_FLAG} allowed")
+            set(ACTIVATE_ANALYZE_FLAG "ON" CACHE BOOL "activate /ANALYZE flag")
+        else ()
+            MESSAGE("Compiler flag ${ANALYZE_FLAG} not allowed")
+        endif (ANALYZE_FLAG_ALLOWED)
+
+        if (ACTIVATE_WARNING_ALL)
+            list(APPEND CMAKE_CXX_FLAGS ${WARNING_ALL})
+            list(APPEND CMAKE_C_FLAGS ${WARNING_ALL})
+        else ()
+            string(REPLACE ${WARNING_ALL} "" CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS}")
+            string(REPLACE ${WARNING_ALL} "" CMAKE_C_FLAGS "${CMAKE_C_FLAGS}")
+        endif (ACTIVATE_WARNING_ALL)
+        	
+        # Only for DEBUG version
+        if (ACTIVATE_RTC_FLAG)
+            list(APPEND CMAKE_CXX_FLAGS_DEBUG ${RTC_FLAG})
+            list(APPEND CMAKE_C_FLAGS_DEBUG ${RTC_FLAG})
+        else ()
+            string(REPLACE ${RTC_FLAG} "" CMAKE_CXX_FLAGS_DEBUG "${CMAKE_CXX_FLAGS_DEBUG}")
+            string(REPLACE ${RTC_FLAG} "" CMAKE_C_FLAGS_DEBUG "${CMAKE_C_FLAGS_DEBUG}")
+        endif (ACTIVATE_RTC_FLAG)
+        	
+        if (ACTIVATE_ZC_FLAG)
+            list(APPEND CMAKE_CXX_FLAGS ${ZC_FLAG})
+            list(APPEND CMAKE_C_FLAGS ${ZC_FLAG})
+        else ()
+            string(REPLACE ${ZC_FLAG} "" CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS}")
+            string(REPLACE ${ZC_FLAG} "" CMAKE_C_FLAGS "${CMAKE_C_FLAGS}")
+        endif (ACTIVATE_ZC_FLAG)
+        	
+        if (ACTIVATE_GD_FLAG)
+            list(APPEND CMAKE_CXX_FLAGS ${GD_FLAG})
+            list(APPEND CMAKE_C_FLAGS ${GD_FLAG})
+        else ()
+            string(REPLACE ${GD_FLAG} "" CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS}")
+            string(REPLACE ${GD_FLAG} "" CMAKE_C_FLAGS "${CMAKE_C_FLAGS}")
+        endif (ACTIVATE_GD_FLAG)
+        	
+        if (ACTIVATE_ANALYZE_FLAG)
+            list(APPEND CMAKE_CXX_FLAGS ${ANALYZE_FLAG})
+            list(APPEND CMAKE_C_FLAGS ${ANALYZE_FLAG})
+        else ()
+            string(REPLACE ${ANALYZE_FLAG} "" CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS}")
+            string(REPLACE ${ANALYZE_FLAG} "" CMAKE_C_FLAGS "${CMAKE_C_FLAGS}")
+        endif (ACTIVATE_ANALYZE_FLAG)
+        	
+        if (MSVC80 OR MSVC90 OR MSVC10 OR MSVC11)
+            # To avoid compiler warning (level 4) C4571, compile with /EHa if you still want
+            # your catch(...) blocks to catch structured exceptions.
+            list(APPEND CMAKE_CXX_FLAGS "/EHa")
+        endif (MSVC80 OR MSVC90 OR MSVC10 OR MSVC11)
+
+        set(MULTITHREADED_COMPILATION "/MP")
+        MESSAGE("Compiler flag ${MULTITHREADED_COMPILATION} allowed")
+        set(ACTIVATE_MULTITHREADED_COMPILATION "ON" CACHE BOOL "activate /MP flag")
+
+        if (ACTIVATE_MULTITHREADED_COMPILATION)
+            list(APPEND CMAKE_CXX_FLAGS ${MULTITHREADED_COMPILATION})
+            list(APPEND CMAKE_C_FLAGS ${MULTITHREADED_COMPILATION})
+        else ()
+            string(REPLACE ${MULTITHREADED_COMPILATION} "" CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS}")
+            string(REPLACE ${MULTITHREADED_COMPILATION} "" CMAKE_C_FLAGS "${CMAKE_C_FLAGS}")
+        endif (ACTIVATE_MULTITHREADED_COMPILATION)
+
+        #For exceptions
+        list(APPEND CMAKE_CXX_FLAGS "/EHsc")
+        list(APPEND CMAKE_C_FLAGS "/EHsc")
+        
+        # UNICODE SUPPORT
+        list(APPEND CMAKE_CXX_FLAGS "/D_UNICODE /DUNICODE")
+        list(APPEND CMAKE_C_FLAGS "/D_UNICODE /DUNICODE")
+    endif ()
+
+    # Remove duplicates compilation flags
+	FOREACH (flag_var CMAKE_C_FLAGS CMAKE_C_FLAGS_DEBUG CMAKE_C_FLAGS_RELEASE
+                CMAKE_C_FLAGS_MINSIZEREL CMAKE_C_FLAGS_RELWITHDEBINFO
+		        CMAKE_CXX_FLAGS CMAKE_CXX_FLAGS_DEBUG CMAKE_CXX_FLAGS_RELEASE
+		                CMAKE_CXX_FLAGS_MINSIZEREL CMAKE_CXX_FLAGS_RELWITHDEBINFO)
+	    separate_arguments(${flag_var})
+	    list(REMOVE_DUPLICATES ${flag_var})
+	    string(REPLACE ";" " " ${flag_var} "${${flag_var}}")
+	    set(${flag_var} "${${flag_var}}" CACHE STRING "common build flags" FORCE)
+    ENDFOREACH (flag_var)  
+
+    if (MSVC)
+        # Replace /MT to /MD flag
+    	# Replace /O2 to /O3 flag
+        FOREACH (flag_var CMAKE_C_FLAGS CMAKE_C_FLAGS_DEBUG CMAKE_C_FLAGS_RELEASE
+                CMAKE_C_FLAGS_MINSIZEREL CMAKE_C_FLAGS_RELWITHDEBINFO
+		        CMAKE_CXX_FLAGS CMAKE_CXX_FLAGS_DEBUG CMAKE_CXX_FLAGS_RELEASE
+		                CMAKE_CXX_FLAGS_MINSIZEREL CMAKE_CXX_FLAGS_RELWITHDEBINFO)
+            STRING(REGEX REPLACE "/MT" "/MD" ${flag_var} "${${flag_var}}")
+        	STRING(REGEX REPLACE "/O2" "/Ox" ${flag_var} "${${flag_var}}")
+        ENDFOREACH (flag_var)      
+    endif ()
+
+    set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS}" CACHE STRING "Updated flags" FORCE)
+    set(CMAKE_CXX_FLAGS_DEBUG "${CMAKE_CXX_FLAGS_DEBUG}" CACHE STRING "Updated flags" FORCE)
+    set(CMAKE_CXX_FLAGS_RELEASE "${CMAKE_CXX_FLAGS_RELEASE}" CACHE STRING "Updated flags" FORCE)
+    set(CMAKE_CXX_FLAGS_MINSIZEREL "${CMAKE_CXX_FLAGS_MINSIZEREL}" CACHE STRING "Updated flags" FORCE)
+    set(CMAKE_CXX_FLAGS_RELWITHDEBINFO "${CMAKE_CXX_FLAGS_RELWITHDEBINFO}" CACHE STRING "Updated flags" FORCE)
+
+    set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS}" CACHE STRING "Updated flags" FORCE)
+    set(CMAKE_C_FLAGS_DEBUG "${CMAKE_C_FLAGS_DEBUG}" CACHE STRING "Updated flags" FORCE)
+    set(CMAKE_C_FLAGS_RELEASE "${CMAKE_C_FLAGS_RELEASE}" CACHE STRING "Updated flags" FORCE)
+    set(CMAKE_C_FLAGS_MINSIZEREL "${CMAKE_C_FLAGS_MINSIZEREL}" CACHE STRING "Updated flags" FORCE)
+    set(CMAKE_C_FLAGS_RELWITHDEBINFO "${CMAKE_C_FLAGS_RELWITHDEBINFO}" CACHE STRING "Updated flags" FORCE)
+
+ENDMACRO(ADD_EXTRA_COMPILATION_FLAGS)
diff --git a/contrib/cmake/cmake_uninstall.cmake.in b/contrib/cmake/cmake_uninstall.cmake.in
new file mode 100644
index 0000000..e3774dc
--- /dev/null
+++ b/contrib/cmake/cmake_uninstall.cmake.in
@@ -0,0 +1,22 @@
+ 
+if(NOT EXISTS "@CMAKE_BINARY_DIR@/install_manifest.txt")
+  message(FATAL_ERROR "Cannot find install manifest: @CMAKE_BINARY_DIR@/install_manifest.txt")
+endif(NOT EXISTS "@CMAKE_BINARY_DIR@/install_manifest.txt")
+
+file(READ "@CMAKE_BINARY_DIR@/install_manifest.txt" files)
+string(REGEX REPLACE "\n" ";" files "${files}")
+foreach(file ${files})
+  message(STATUS "Uninstalling $ENV{DESTDIR}${file}")
+  if(IS_SYMLINK "$ENV{DESTDIR}${file}" OR EXISTS "$ENV{DESTDIR}${file}")
+    exec_program(
+      "@CMAKE_COMMAND@" ARGS "-E remove \"$ENV{DESTDIR}${file}\""
+      OUTPUT_VARIABLE rm_out
+      RETURN_VALUE rm_retval
+      )
+    if(NOT "${rm_retval}" STREQUAL 0)
+      message(FATAL_ERROR "Problem when removing $ENV{DESTDIR}${file}")
+    endif(NOT "${rm_retval}" STREQUAL 0)
+  else(IS_SYMLINK "$ENV{DESTDIR}${file}" OR EXISTS "$ENV{DESTDIR}${file}")
+    message(STATUS "File $ENV{DESTDIR}${file} does not exist.")
+  endif(IS_SYMLINK "$ENV{DESTDIR}${file}" OR EXISTS "$ENV{DESTDIR}${file}")
+endforeach(file)
diff --git a/contrib/cmake/lib/CMakeLists.txt b/contrib/cmake/lib/CMakeLists.txt
new file mode 100644
index 0000000..a8247dd
--- /dev/null
+++ b/contrib/cmake/lib/CMakeLists.txt
@@ -0,0 +1,206 @@
+# ################################################################
+# zstd - Makefile
+# Copyright (C) Yann Collet 2014-2015
+# All rights reserved.
+# 
+# BSD license
+#
+# Redistribution and use in source and binary forms, with or without modification,
+# are permitted provided that the following conditions are met:
+# 
+# * Redistributions of source code must retain the above copyright notice, this
+#   list of conditions and the following disclaimer.
+# 
+# * Redistributions in binary form must reproduce the above copyright notice, this
+#   list of conditions and the following disclaimer in the documentation and/or
+#   other materials provided with the distribution.
+# 
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
+# ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+# WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+# DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR
+# ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+# (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+# LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON
+# ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+# SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+# 
+# You can contact the author at :
+#  - zstd source repository : https://github.com/Cyan4973/zstd
+#  - Public forum : https://groups.google.com/forum/#!forum/lz4c
+# ################################################################
+
+# Get library version based on information from input content (use regular exp)
+function(GetLibraryVersion _content _outputVar1 _outputVar2 _outputVar3)
+    string(REGEX MATCHALL ".*define ZSTD_VERSION_MAJOR+.* ([0-9]+).*define ZSTD_VERSION_MINOR+.* ([0-9]+).*define ZSTD_VERSION_RELEASE+.* ([0-9]+)" VERSION_REGEX "${_content}")
+    SET(${_outputVar1} ${CMAKE_MATCH_1} PARENT_SCOPE)
+    SET(${_outputVar2} ${CMAKE_MATCH_2} PARENT_SCOPE)
+    SET(${_outputVar3} ${CMAKE_MATCH_3} PARENT_SCOPE)
+endfunction()
+
+PROJECT(libzstd)
+
+SET(CMAKE_INCLUDE_CURRENT_DIR TRUE)
+
+# Define project root directory
+SET(ROOT_DIR ../../..)
+
+# Define library directory, where sources and header files are located
+SET(LIBRARY_DIR ${ROOT_DIR}/lib)
+INCLUDE_DIRECTORIES(${LIBRARY_DIR})
+
+# Read file content
+FILE(READ ${LIBRARY_DIR}/zstd.h HEADER_CONTENT)
+
+# Parse version
+GetLibraryVersion("${HEADER_CONTENT}" LIBVER_MAJOR LIBVER_MINOR LIBVER_RELEASE)
+MESSAGE("ZSTD VERSION ${LIBVER_MAJOR}.${LIBVER_MINOR}.${LIBVER_RELEASE}")
+
+SET(Sources
+        ${LIBRARY_DIR}/fse.c
+        ${LIBRARY_DIR}/huff0.c
+        ${LIBRARY_DIR}/zstd_buffered.c
+        ${LIBRARY_DIR}/zstd_compress.c
+        ${LIBRARY_DIR}/zstd_decompress.c)
+
+SET(Headers
+        ${LIBRARY_DIR}/bitstream.h
+        ${LIBRARY_DIR}/error_private.h
+        ${LIBRARY_DIR}/error_public.h
+        ${LIBRARY_DIR}/fse.h
+        ${LIBRARY_DIR}/fse_static.h
+        ${LIBRARY_DIR}/huff0.h
+        ${LIBRARY_DIR}/huff0_static.h
+        ${LIBRARY_DIR}/mem.h
+        ${LIBRARY_DIR}/zstd_buffered_static.h
+        ${LIBRARY_DIR}/zstd_buffered.h
+        ${LIBRARY_DIR}/zstd_internal.h
+        ${LIBRARY_DIR}/zstd_static.h
+        ${LIBRARY_DIR}/zstd.h)
+
+IF (ZSTD_LEGACY_SUPPORT)
+    SET(LIBRARY_LEGACY_DIR ${LIBRARY_DIR}/legacy)
+    INCLUDE_DIRECTORIES(${LIBRARY_LEGACY_DIR})
+
+    SET(Sources ${Sources}
+            ${LIBRARY_LEGACY_DIR}/zstd_v01.c
+            ${LIBRARY_LEGACY_DIR}/zstd_v02.c
+            ${LIBRARY_LEGACY_DIR}/zstd_v03.c)
+
+    SET(Headers ${Headers}
+            ${LIBRARY_LEGACY_DIR}/zstd_legacy.h
+            ${LIBRARY_LEGACY_DIR}/zstd_v01.h
+            ${LIBRARY_LEGACY_DIR}/zstd_v02.h
+            ${LIBRARY_LEGACY_DIR}/zstd_v03.h)
+ENDIF (ZSTD_LEGACY_SUPPORT)
+
+IF (MSVC)
+    SET(MSVC_RESOURCE_DIR ${ROOT_DIR}/visual/2013/zstdlib)
+    SET(PlatformDependResources ${MSVC_RESOURCE_DIR}/resource.h ${MSVC_RESOURCE_DIR}/zstdlib.rc)
+ENDIF (MSVC)
+
+# Split project to static and shared libraries build
+ADD_LIBRARY(libzstd_static STATIC ${Sources} ${Headers} ${PlatformDependResources})
+ADD_LIBRARY(libzstd_shared SHARED ${Sources} ${Headers} ${PlatformDependResources})
+
+# Add specific compile definitions for MSVC project
+IF (MSVC)
+    SET_TARGET_PROPERTIES(libzstd_static PROPERTIES COMPILE_DEFINITIONS "ZSTD_HEAPMODE=0;_CRT_SECURE_NO_WARNINGS")
+    SET_TARGET_PROPERTIES(libzstd_shared PROPERTIES COMPILE_DEFINITIONS "ZSTD_DLL_EXPORT=1;ZSTD_HEAPMODE=0;_CONSOLE;_CRT_SECURE_NO_WARNINGS")
+ENDIF (MSVC)
+
+# Define include directories
+IF (NOT WORKAROUND_OUTDATED_CODE_STYLE)
+    TARGET_INCLUDE_DIRECTORIES(libzstd_static PUBLIC ${LIBRARY_DIR})
+    TARGET_INCLUDE_DIRECTORIES(libzstd_shared PUBLIC ${LIBRARY_DIR})
+    IF (ZSTD_LEGACY_SUPPORT)
+        TARGET_INCLUDE_DIRECTORIES(libzstd_static PUBLIC ${LIBRARY_LEGACY_DIR})
+        TARGET_INCLUDE_DIRECTORIES(libzstd_shared PUBLIC ${LIBRARY_LEGACY_DIR})
+    ENDIF (ZSTD_LEGACY_SUPPORT)
+ENDIF (NOT WORKAROUND_OUTDATED_CODE_STYLE)
+
+# Define library base name
+IF (UNIX)
+    SET(LIBRARY_BASE_NAME libzstd)
+ELSEIF (MSVC)
+    SET(LIBRARY_BASE_NAME zstdlib)
+ELSE ()
+    MESSAGE(FATAL_ERROR "Unsupported build type")
+ENDIF (UNIX)
+
+# Define static and shared library names
+SET(STATIC_LIBRARY_OUTPUT_NAME ${LIBRARY_BASE_NAME})
+SET(SHARED_LIBRARY_OUTPUT_NAME ${LIBRARY_BASE_NAME}.${LIBVER_MAJOR}.${LIBVER_MINOR}.${LIBVER_RELEASE})
+
+IF (MSVC)
+    IF (CMAKE_SIZEOF_VOID_P MATCHES "8")
+        SET(STATIC_LIBRARY_OUTPUT_NAME ${STATIC_LIBRARY_OUTPUT_NAME}_x64)
+        SET(SHARED_LIBRARY_OUTPUT_NAME ${SHARED_LIBRARY_OUTPUT_NAME}_x64)
+    ELSE ()
+        SET(STATIC_LIBRARY_OUTPUT_NAME ${STATIC_LIBRARY_OUTPUT_NAME}_x86)
+        SET(SHARED_LIBRARY_OUTPUT_NAME ${SHARED_LIBRARY_OUTPUT_NAME}_x86)
+    ENDIF (CMAKE_SIZEOF_VOID_P MATCHES "8")
+ENDIF (MSVC)
+
+SET_TARGET_PROPERTIES(
+        libzstd_static
+        PROPERTIES
+        PREFIX ""
+        OUTPUT_NAME ${STATIC_LIBRARY_OUTPUT_NAME})
+
+SET_TARGET_PROPERTIES(
+        libzstd_shared
+        PROPERTIES
+        PREFIX ""
+        OUTPUT_NAME ${SHARED_LIBRARY_OUTPUT_NAME})
+
+IF (UNIX)
+    SET(PREFIX /usr/local)
+    SET(INSTALL_LIBRARY_DIR ${PREFIX}/lib)
+    SET(INSTALL_INCLUDE_DIR ${PREFIX}/include)
+
+    # install target
+    INSTALL(FILES ${LIBRARY_DIR}/zstd.h DESTINATION ${INSTALL_INCLUDE_DIR})
+    INSTALL(TARGETS libzstd_static DESTINATION ${INSTALL_LIBRARY_DIR})
+    INSTALL(TARGETS libzstd_shared LIBRARY DESTINATION ${INSTALL_LIBRARY_DIR})
+
+    # Create symlinks and setup this files
+    SET(SHARED_LIBRARY_LINK ${SHARED_LIBRARY_OUTPUT_NAME}${CMAKE_SHARED_LIBRARY_SUFFIX})
+    SET(SHARED_LIBRARY_SYMLINK1 ${LIBRARY_BASE_NAME}${CMAKE_SHARED_LIBRARY_SUFFIX})
+    SET(SHARED_LIBRARY_SYMLINK2 ${LIBRARY_BASE_NAME}${CMAKE_SHARED_LIBRARY_SUFFIX}.${LIBVER_MAJOR})
+
+    SET(SHARED_LIBRARY_LINK_PATH ${CMAKE_CURRENT_BINARY_DIR}/${SHARED_LIBRARY_LINK})
+    SET(SHARED_LIBRARY_SYMLINK1_PATH ${CMAKE_CURRENT_BINARY_DIR}/${SHARED_LIBRARY_SYMLINK1})
+    SET(SHARED_LIBRARY_SYMLINK2_PATH ${CMAKE_CURRENT_BINARY_DIR}/${SHARED_LIBRARY_SYMLINK2})
+
+    if (EXISTS ${SHARED_LIBRARY_SYMLINK1_PATH})
+        FILE(REMOVE ${SHARED_LIBRARY_SYMLINK1_PATH})
+    endif (EXISTS ${SHARED_LIBRARY_SYMLINK1_PATH})
+
+    if (EXISTS ${SHARED_LIBRARY_SYMLINK2_PATH})
+        FILE(REMOVE ${SHARED_LIBRARY_SYMLINK2_PATH})
+    endif (EXISTS ${SHARED_LIBRARY_SYMLINK2_PATH})
+
+    ADD_CUSTOM_COMMAND(TARGET libzstd_shared POST_BUILD
+            COMMAND ln -s ${SHARED_LIBRARY_LINK} ${SHARED_LIBRARY_SYMLINK1}
+            DEPENDS ${SHARED_LIBRARY_LINK_PATH}
+            COMMENT "Generating symbolic link")
+
+    ADD_CUSTOM_COMMAND(TARGET libzstd_shared POST_BUILD
+            COMMAND ln -s ${SHARED_LIBRARY_LINK} ${SHARED_LIBRARY_SYMLINK2}
+            DEPENDS ${SHARED_LIBRARY_LINK_PATH}
+            COMMENT "Generating symbolic link")
+
+    INSTALL(FILES ${SHARED_LIBRARY_SYMLINK1_PATH} DESTINATION ${INSTALL_LIBRARY_DIR})
+    INSTALL(FILES ${SHARED_LIBRARY_SYMLINK2_PATH} DESTINATION ${INSTALL_LIBRARY_DIR})
+
+    # uninstall target
+    CONFIGURE_FILE(
+            "${CMAKE_SOURCE_DIR}/cmake_uninstall.cmake.in"
+            "${CMAKE_BINARY_DIR}/cmake_uninstall.cmake"
+            IMMEDIATE @ONLY)
+
+    ADD_CUSTOM_TARGET(uninstall
+            COMMAND ${CMAKE_COMMAND} -P ${CMAKE_BINARY_DIR}/cmake_uninstall.cmake)
+ENDIF (UNIX)
diff --git a/contrib/cmake/programs/CMakeLists.txt b/contrib/cmake/programs/CMakeLists.txt
new file mode 100644
index 0000000..af9a057
--- /dev/null
+++ b/contrib/cmake/programs/CMakeLists.txt
@@ -0,0 +1,84 @@
+# ################################################################
+# zstd - Makefile
+# Copyright (C) Yann Collet 2014-2015
+# All rights reserved.
+#
+# BSD license
+#
+# Redistribution and use in source and binary forms, with or without modification,
+# are permitted provided that the following conditions are met:
+#
+# * Redistributions of source code must retain the above copyright notice, this
+#   list of conditions and the following disclaimer.
+#
+# * Redistributions in binary form must reproduce the above copyright notice, this
+#   list of conditions and the following disclaimer in the documentation and/or
+#   other materials provided with the distribution.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
+# ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+# WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+# DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR
+# ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+# (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+# LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON
+# ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+# SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+#
+# You can contact the author at :
+#  - zstd source repository : https://github.com/Cyan4973/zstd
+#  - Public forum : https://groups.google.com/forum/#!forum/lz4c
+# ################################################################
+
+PROJECT(programs)
+
+SET(CMAKE_INCLUDE_CURRENT_DIR TRUE)
+
+# Define project root directory
+SET(ROOT_DIR ../../..)
+
+# Define programs directory, where sources and header files are located
+SET(PROGRAMS_DIR ${ROOT_DIR}/programs)
+INCLUDE_DIRECTORIES(${PROGRAMS_DIR})
+
+IF (WORKAROUND_OUTDATED_CODE_STYLE)
+    # Define library directory, where sources and header files are located
+    SET(LIBRARY_DIR ${ROOT_DIR}/lib)
+    INCLUDE_DIRECTORIES(${LIBRARY_DIR})
+ENDIF (WORKAROUND_OUTDATED_CODE_STYLE)
+
+IF (ZSTD_LEGACY_SUPPORT)
+    SET(PROGRAMS_LEGACY_DIR ${PROGRAMS_DIR}/legacy)
+    INCLUDE_DIRECTORIES(${PROGRAMS_LEGACY_DIR})
+
+    IF (WORKAROUND_OUTDATED_CODE_STYLE)
+        INCLUDE_DIRECTORIES(${LIBRARY_DIR}/legacy)
+    ENDIF (WORKAROUND_OUTDATED_CODE_STYLE)
+
+    SET(ZSTD_FILEIO_LEGACY ${PROGRAMS_LEGACY_DIR}/fileio_legacy.c)
+ENDIF (ZSTD_LEGACY_SUPPORT)
+
+ADD_EXECUTABLE(zstd ${PROGRAMS_DIR}/zstdcli.c ${PROGRAMS_DIR}/fileio.c ${PROGRAMS_DIR}/bench.c ${PROGRAMS_DIR}/xxhash.c ${PROGRAMS_DIR}/datagen.c ${ZSTD_FILEIO_LEGACY})
+TARGET_LINK_LIBRARIES(zstd libzstd_static)
+
+ADD_EXECUTABLE(fullbench ${PROGRAMS_DIR}/datagen.c ${PROGRAMS_DIR}/fullbench.c)
+TARGET_LINK_LIBRARIES(fullbench libzstd_static)
+
+ADD_EXECUTABLE(fuzzer ${PROGRAMS_DIR}/datagen.c ${PROGRAMS_DIR}/xxhash.c ${PROGRAMS_DIR}/fuzzer.c)
+TARGET_LINK_LIBRARIES(fuzzer libzstd_static)
+
+IF (UNIX)
+    ADD_EXECUTABLE(zstd-noBench ${PROGRAMS_DIR}/zstdcli.c ${PROGRAMS_DIR}/fileio.c ${ZSTD_FILEIO_LEGACY})
+    TARGET_LINK_LIBRARIES(zstd-noBench libzstd_static)
+    SET_TARGET_PROPERTIES(zstd-noBench PROPERTIES COMPILE_DEFINITIONS "ZSTD_NOBENCH")
+
+    ADD_EXECUTABLE(zbufftest ${PROGRAMS_DIR}/datagen.c ${PROGRAMS_DIR}/xxhash.c ${PROGRAMS_DIR}/zbufftest.c)
+    TARGET_LINK_LIBRARIES(zbufftest libzstd_static)
+
+    ADD_EXECUTABLE(paramgrill ${PROGRAMS_DIR}/datagen.c ${PROGRAMS_DIR}/xxhash.c ${PROGRAMS_DIR}/paramgrill.c)
+    TARGET_LINK_LIBRARIES(paramgrill libzstd_static m) #m is math library
+
+    ADD_EXECUTABLE(datagen ${PROGRAMS_DIR}/datagen.c ${PROGRAMS_DIR}/datagencli.c)
+    TARGET_LINK_LIBRARIES(datagen libzstd_static)
+ENDIF (UNIX)
diff --git a/images/CSpeed.png b/images/CSpeed.png
index fe54752..5ba0561 100644
Binary files a/images/CSpeed.png and b/images/CSpeed.png differ
diff --git a/images/DSpeed.png b/images/DSpeed.png
index 4db6437..1cd4713 100644
Binary files a/images/DSpeed.png and b/images/DSpeed.png differ
diff --git a/lib/Makefile b/lib/Makefile
index c621f63..a7172b6 100644
--- a/lib/Makefile
+++ b/lib/Makefile
@@ -32,17 +32,21 @@
 # ################################################################
 
 # Version numbers
-LIBVER_MAJOR=`sed -n '/define ZSTD_VERSION_MAJOR/s/.*[[:blank:]]\([0-9][0-9]*\).*/\1/p' < zstd.h`
-LIBVER_MINOR=`sed -n '/define ZSTD_VERSION_MINOR/s/.*[[:blank:]]\([0-9][0-9]*\).*/\1/p' < zstd.h`
-LIBVER_PATCH=`sed -n '/define ZSTD_VERSION_RELEASE/s/.*[[:blank:]]\([0-9][0-9]*\).*/\1/p' < zstd.h`
-LIBVER  = $(LIBVER_MAJOR).$(LIBVER_MINOR).$(LIBVER_PATCH)
+LIBVER_MAJOR_SCRIPT:=`sed -n '/define ZSTD_VERSION_MAJOR/s/.*[[:blank:]]\([0-9][0-9]*\).*/\1/p' < zstd.h`
+LIBVER_MINOR_SCRIPT:=`sed -n '/define ZSTD_VERSION_MINOR/s/.*[[:blank:]]\([0-9][0-9]*\).*/\1/p' < zstd.h`
+LIBVER_PATCH_SCRIPT:=`sed -n '/define ZSTD_VERSION_RELEASE/s/.*[[:blank:]]\([0-9][0-9]*\).*/\1/p' < zstd.h`
+LIBVER_SCRIPT:= $(LIBVER_MAJOR_SCRIPT).$(LIBVER_MINOR_SCRIPT).$(LIBVER_PATCH_SCRIPT)
+LIBVER_MAJOR := $(shell echo $(LIBVER_MAJOR_SCRIPT))
+LIBVER_MINOR := $(shell echo $(LIBVER_MINOR_SCRIPT))
+LIBVER_PATCH := $(shell echo $(LIBVER_PATCH_SCRIPT))
+LIBVER := $(shell echo $(LIBVER_SCRIPT))
 VERSION?= $(LIBVER)
 
 DESTDIR?=
 PREFIX ?= /usr/local
 CPPFLAGS= -I.
 CFLAGS ?= -O3
-CFLAGS += -std=c99 -Wall -Wextra -Wundef -Wshadow -Wcast-qual -Wcast-align -Wstrict-prototypes
+CFLAGS += -std=c99 -Wall -Wextra -Wundef -Wshadow -Wcast-qual -Wcast-align -Wstrict-prototypes -Wstrict-aliasing=1
 FLAGS   = $(CPPFLAGS) $(CFLAGS) $(LDFLAGS) $(MOREFLAGS)
 
 LIBDIR ?= $(PREFIX)/lib
@@ -98,12 +102,13 @@ clean:
 #make install is validated only for Linux, OSX, kFreeBSD and Hurd targets
 ifneq (,$(filter $(shell uname),Linux Darwin GNU/kFreeBSD GNU))
 
-libzstd.pc: libzstd.pc.in Makefile
+libzstd.pc:
+libzstd.pc: libzstd.pc.in
 	@echo creating pkgconfig
 	@sed -e 's|@PREFIX@|$(PREFIX)|' \
-            -e 's|@LIBDIR@|$(LIBDIR)|' \
-            -e 's|@INCLUDEDIR@|$(INCLUDEDIR)|' \
-            -e 's|@VERSION@|$(VERSION)|' \
+             -e 's|@LIBDIR@|$(LIBDIR)|' \
+             -e 's|@INCLUDEDIR@|$(INCLUDEDIR)|' \
+             -e 's|@VERSION@|$(VERSION)|' \
              $< >$@
 
 install: libzstd libzstd.pc
diff --git a/lib/bitstream.h b/lib/bitstream.h
index dcfe8b0..fbd0f3f 100644
--- a/lib/bitstream.h
+++ b/lib/bitstream.h
@@ -50,8 +50,8 @@ extern "C" {
 /******************************************
 *  Includes
 ******************************************/
-#include "mem.h"     /* unaligned access routines */
-#include "error.h"   /* error codes and messages */
+#include "mem.h"            /* unaligned access routines */
+#include "error_private.h"  /* error codes and messages */
 
 
 /********************************************
diff --git a/lib/error.h b/lib/error_private.h
similarity index 56%
rename from lib/error.h
rename to lib/error_private.h
index 3c39c39..e567538 100644
--- a/lib/error.h
+++ b/lib/error_private.h
@@ -1,6 +1,6 @@
 /* ******************************************************************
    Error codes and messages
-   Copyright (C) 2013-2015, Yann Collet
+   Copyright (C) 2013-2016, Yann Collet
 
    BSD 2-Clause License (http://www.opensource.org/licenses/bsd-license.php)
 
@@ -28,9 +28,10 @@
    OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 
    You can contact the author at :
-   - Source repository : https://github.com/Cyan4973/FiniteStateEntropy
-   - Public forum : https://groups.google.com/forum/#!forum/lz4c
+   - Source repository : https://github.com/Cyan4973/zstd
 ****************************************************************** */
+/* Note : this module is expected to remain private, do not expose it */
+
 #ifndef ERROR_H_MODULE
 #define ERROR_H_MODULE
 
@@ -39,56 +40,65 @@ extern "C" {
 #endif
 
 
-/******************************************
+/* *****************************************
 *  Includes
 ******************************************/
-#include <stddef.h>    /* size_t, ptrdiff_t */
+#include <stddef.h>        /* size_t, ptrdiff_t */
+#include "error_public.h"  /* enum list */
 
 
-/******************************************
+/* *****************************************
 *  Compiler-specific
 ******************************************/
-#if defined (__cplusplus) || (defined (__STDC_VERSION__) && (__STDC_VERSION__ >= 199901L) /* C99 */)
+#if defined(__GNUC__)
+#  define ERR_STATIC static __attribute__((unused))
+#elif defined (__cplusplus) || (defined (__STDC_VERSION__) && (__STDC_VERSION__ >= 199901L) /* C99 */)
 #  define ERR_STATIC static inline
 #elif defined(_MSC_VER)
 #  define ERR_STATIC static __inline
-#elif defined(__GNUC__)
-#  define ERR_STATIC static __attribute__((unused))
 #else
 #  define ERR_STATIC static  /* this version may generate warnings for unused static functions; disable the relevant warning */
 #endif
 
 
-/******************************************
-*  Error Management
+/* *****************************************
+*  Error Codes
 ******************************************/
 #define PREFIX(name) ZSTD_error_##name
 
+#ifdef ERROR
+#  undef ERROR   /* reported already defined on VS 2015 by Rich Geldreich */
+#endif
 #define ERROR(name) (size_t)-PREFIX(name)
 
-#define ERROR_LIST(ITEM) \
-        ITEM(PREFIX(No_Error)) ITEM(PREFIX(GENERIC)) \
-        ITEM(PREFIX(prefix_unknown)) ITEM(PREFIX(frameParameter_unsupported)) ITEM(PREFIX(frameParameter_unsupportedBy32bitsImplementation)) \
-        ITEM(PREFIX(init_missing)) ITEM(PREFIX(memory_allocation)) \
-        ITEM(PREFIX(dstSize_tooSmall)) ITEM(PREFIX(srcSize_wrong)) \
-        ITEM(PREFIX(corruption_detected)) \
-        ITEM(PREFIX(tableLog_tooLarge)) ITEM(PREFIX(maxSymbolValue_tooLarge)) ITEM(PREFIX(maxSymbolValue_tooSmall)) \
-        ITEM(PREFIX(maxCode))
-
-#define ERROR_GENERATE_ENUM(ENUM) ENUM,
-typedef enum { ERROR_LIST(ERROR_GENERATE_ENUM) } ERR_codes;  /* enum is exposed, to detect & handle specific errors; compare function result to -enum value */
+ERR_STATIC unsigned ERR_isError(size_t code) { return (code > ERROR(maxCode)); }
 
-#define ERROR_CONVERTTOSTRING(STRING) #STRING,
-#define ERROR_GENERATE_STRING(EXPR) ERROR_CONVERTTOSTRING(EXPR)
-static const char* ERR_strings[] = { ERROR_LIST(ERROR_GENERATE_STRING) };
 
-ERR_STATIC unsigned ERR_isError(size_t code) { return (code > ERROR(maxCode)); }
+/* *****************************************
+*  Error Strings
+******************************************/
 
 ERR_STATIC const char* ERR_getErrorName(size_t code)
 {
     static const char* codeError = "Unspecified error code";
-    if (ERR_isError(code)) return ERR_strings[-(int)(code)];
-    return codeError;
+    switch( (size_t)(0-code) )
+    {
+    case ZSTD_error_No_Error: return "No error detected";
+    case ZSTD_error_GENERIC:  return "Error (generic)";
+    case ZSTD_error_prefix_unknown: return "Unknown frame descriptor";
+    case ZSTD_error_frameParameter_unsupported: return "Unsupported frame parameter";
+    case ZSTD_error_frameParameter_unsupportedBy32bitsImplementation: return "Frame parameter unsupported in 32-bits mode";
+    case ZSTD_error_init_missing: return "Context should be init first";
+    case ZSTD_error_memory_allocation: return "Allocation error : not enough memory";
+    case ZSTD_error_dstSize_tooSmall: return "Destination buffer is too small";
+    case ZSTD_error_srcSize_wrong: return "Src size incorrect";
+    case ZSTD_error_corruption_detected: return "Corrupted block detected";
+    case ZSTD_error_tableLog_tooLarge: return "tableLog requires too much memory";
+    case ZSTD_error_maxSymbolValue_tooLarge: return "Unsupported max possible Symbol Value : too large";
+    case ZSTD_error_maxSymbolValue_tooSmall: return "Specified maxSymbolValue is too small";
+    case ZSTD_error_maxCode:
+    default: return codeError;
+    }
 }
 
 
diff --git a/lib/error_public.h b/lib/error_public.h
new file mode 100644
index 0000000..78b0e80
--- /dev/null
+++ b/lib/error_public.h
@@ -0,0 +1,70 @@
+/* ******************************************************************
+   Error codes list
+   Copyright (C) 2016, Yann Collet
+
+   BSD 2-Clause License (http://www.opensource.org/licenses/bsd-license.php)
+
+   Redistribution and use in source and binary forms, with or without
+   modification, are permitted provided that the following conditions are
+   met:
+
+       * Redistributions of source code must retain the above copyright
+   notice, this list of conditions and the following disclaimer.
+       * Redistributions in binary form must reproduce the above
+   copyright notice, this list of conditions and the following disclaimer
+   in the documentation and/or other materials provided with the
+   distribution.
+
+   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+   You can contact the author at :
+   - Source repository : https://github.com/Cyan4973/zstd
+****************************************************************** */
+#ifndef ERROR_PUBLIC_H_MODULE
+#define ERROR_PUBLIC_H_MODULE
+
+#if defined (__cplusplus)
+extern "C" {
+#endif
+
+
+/* ****************************************
+*  error list
+******************************************/
+enum {
+  ZSTD_error_No_Error,
+  ZSTD_error_GENERIC,
+  ZSTD_error_prefix_unknown,
+  ZSTD_error_frameParameter_unsupported,
+  ZSTD_error_frameParameter_unsupportedBy32bitsImplementation,
+  ZSTD_error_init_missing,
+  ZSTD_error_memory_allocation,
+  ZSTD_error_stage_wrong,
+  ZSTD_error_dstSize_tooSmall,
+  ZSTD_error_srcSize_wrong,
+  ZSTD_error_corruption_detected,
+  ZSTD_error_tableLog_tooLarge,
+  ZSTD_error_maxSymbolValue_tooLarge,
+  ZSTD_error_maxSymbolValue_tooSmall,
+  ZSTD_error_maxCode
+};
+
+/* note : functions provide error codes in reverse negative order,
+          so compare with (size_t)(0-enum) */
+
+
+#if defined (__cplusplus)
+}
+#endif
+
+#endif /* ERROR_PUBLIC_H_MODULE */
diff --git a/lib/fse.c b/lib/fse.c
index 38e40a5..e74c1e8 100644
--- a/lib/fse.c
+++ b/lib/fse.c
@@ -34,10 +34,10 @@
 
 #ifndef FSE_COMMONDEFS_ONLY
 
-/****************************************************************
+/* **************************************************************
 *  Tuning parameters
 ****************************************************************/
-/* MEMORY_USAGE :
+/*!MEMORY_USAGE :
 *  Memory usage formula : N->2^N Bytes (examples : 10 -> 1KB; 12 -> 4KB ; 16 -> 64KB; 20 -> 1MB; etc.)
 *  Increasing memory usage improves compression ratio
 *  Reduced memory usage can improve speed, due to cache effect
@@ -45,26 +45,23 @@
 #define FSE_MAX_MEMORY_USAGE 14
 #define FSE_DEFAULT_MEMORY_USAGE 13
 
-/* FSE_MAX_SYMBOL_VALUE :
+/*!FSE_MAX_SYMBOL_VALUE :
 *  Maximum symbol value authorized.
 *  Required for proper stack allocation */
 #define FSE_MAX_SYMBOL_VALUE 255
 
 
-/****************************************************************
+/* **************************************************************
 *  template functions type & suffix
 ****************************************************************/
 #define FSE_FUNCTION_TYPE BYTE
 #define FSE_FUNCTION_EXTENSION
+#define FSE_DECODE_TYPE FSE_decode_t
 
 
-/****************************************************************
-*  Byte symbol type
-****************************************************************/
 #endif   /* !FSE_COMMONDEFS_ONLY */
 
-
-/****************************************************************
+/* **************************************************************
 *  Compiler specifics
 ****************************************************************/
 #ifdef _MSC_VER    /* Visual Studio */
@@ -82,7 +79,7 @@
 #endif
 
 
-/****************************************************************
+/* **************************************************************
 *  Includes
 ****************************************************************/
 #include <stdlib.h>     /* malloc, free, qsort */
@@ -92,7 +89,7 @@
 #include "fse_static.h"
 
 
-/****************************************************************
+/* ***************************************************************
 *  Constants
 *****************************************************************/
 #define FSE_MAX_TABLELOG  (FSE_MAX_MEMORY_USAGE-2)
@@ -107,20 +104,20 @@
 #endif
 
 
-/****************************************************************
+/* **************************************************************
 *  Error Management
 ****************************************************************/
 #define FSE_STATIC_ASSERT(c) { enum { FSE_static_assert = 1/(int)(!!(c)) }; }   /* use only *after* variable declarations */
 
 
-/****************************************************************
+/* **************************************************************
 *  Complex types
 ****************************************************************/
 typedef U32 CTable_max_t[FSE_CTABLE_SIZE_U32(FSE_MAX_TABLELOG, FSE_MAX_SYMBOL_VALUE)];
 typedef U32 DTable_max_t[FSE_DTABLE_SIZE_U32(FSE_MAX_TABLELOG)];
 
 
-/****************************************************************
+/* **************************************************************
 *  Templates
 ****************************************************************/
 /*
@@ -144,8 +141,7 @@ typedef U32 DTable_max_t[FSE_DTABLE_SIZE_U32(FSE_MAX_TABLELOG)];
 
 
 /* Function templates */
-size_t FSE_FUNCTION_NAME(FSE_count_generic, FSE_FUNCTION_EXTENSION)
-(unsigned* count, unsigned* maxSymbolValuePtr, const FSE_FUNCTION_TYPE* source, size_t sourceSize, unsigned safe)
+size_t FSE_count_generic(unsigned* count, unsigned* maxSymbolValuePtr, const FSE_FUNCTION_TYPE* source, size_t sourceSize, unsigned safe)
 {
     const FSE_FUNCTION_TYPE* ip = source;
     const FSE_FUNCTION_TYPE* const iend = ip+sourceSize;
@@ -226,7 +222,7 @@ size_t FSE_FUNCTION_NAME(FSE_count_generic, FSE_FUNCTION_EXTENSION)
 size_t FSE_FUNCTION_NAME(FSE_countFast, FSE_FUNCTION_EXTENSION)
 (unsigned* count, unsigned* maxSymbolValuePtr, const FSE_FUNCTION_TYPE* source, size_t sourceSize)
 {
-    return FSE_FUNCTION_NAME(FSE_count_generic, FSE_FUNCTION_EXTENSION) (count, maxSymbolValuePtr, source, sourceSize, 0);
+    return FSE_count_generic(count, maxSymbolValuePtr, source, sourceSize, 0);
 }
 
 size_t FSE_FUNCTION_NAME(FSE_count, FSE_FUNCTION_EXTENSION)
@@ -235,25 +231,26 @@ size_t FSE_FUNCTION_NAME(FSE_count, FSE_FUNCTION_EXTENSION)
     if ((sizeof(FSE_FUNCTION_TYPE)==1) && (*maxSymbolValuePtr >= 255))
     {
         *maxSymbolValuePtr = 255;
-        return FSE_FUNCTION_NAME(FSE_count_generic, FSE_FUNCTION_EXTENSION) (count, maxSymbolValuePtr, source, sourceSize, 0);
+        return FSE_count_generic(count, maxSymbolValuePtr, source, sourceSize, 0);
     }
-    return FSE_FUNCTION_NAME(FSE_count_generic, FSE_FUNCTION_EXTENSION) (count, maxSymbolValuePtr, source, sourceSize, 1);
+    return FSE_count_generic(count, maxSymbolValuePtr, source, sourceSize, 1);
 }
 
 
 static U32 FSE_tableStep(U32 tableSize) { return (tableSize>>1) + (tableSize>>3) + 3; }
 
-size_t FSE_FUNCTION_NAME(FSE_buildCTable, FSE_FUNCTION_EXTENSION)
-(FSE_CTable* ct, const short* normalizedCounter, unsigned maxSymbolValue, unsigned tableLog)
+size_t FSE_buildCTable(FSE_CTable* ct, const short* normalizedCounter, unsigned maxSymbolValue, unsigned tableLog)
 {
     const unsigned tableSize = 1 << tableLog;
     const unsigned tableMask = tableSize - 1;
-    U16* tableU16 = ( (U16*) ct) + 2;
-    FSE_symbolCompressionTransform* symbolTT = (FSE_symbolCompressionTransform*) (((U32*)ct) + 1 + (tableLog ? tableSize>>1 : 1) );
+    void* const ptr = ct;
+    U16* const tableU16 = ( (U16*) ptr) + 2;
+    void* const FSCT = ((U32*)ptr) + 1 /* header */ + (tableLog ? tableSize>>1 : 1) ;
+    FSE_symbolCompressionTransform* const symbolTT = (FSE_symbolCompressionTransform*) (FSCT);
     const unsigned step = FSE_tableStep(tableSize);
     unsigned cumul[FSE_MAX_SYMBOL_VALUE+2];
     U32 position = 0;
-    FSE_FUNCTION_TYPE tableSymbol[FSE_MAX_TABLESIZE]; /* init isn't necessary, even if static analyzer complain about it */
+    FSE_FUNCTION_TYPE tableSymbol[FSE_MAX_TABLESIZE]; /* memset() is not necessary, even if static analyzer complain about it */
     U32 highThreshold = tableSize-1;
     unsigned symbol;
     unsigned i;
@@ -269,7 +266,7 @@ size_t FSE_FUNCTION_NAME(FSE_buildCTable, FSE_FUNCTION_EXTENSION)
     cumul[0] = 0;
     for (i=1; i<=maxSymbolValue+1; i++)
     {
-        if (normalizedCounter[i-1]==-1)   /* Low prob symbol */
+        if (normalizedCounter[i-1]==-1)   /* Low proba symbol */
         {
             cumul[i] = cumul[i-1] + 1;
             tableSymbol[highThreshold--] = (FSE_FUNCTION_TYPE)(i-1);
@@ -287,7 +284,7 @@ size_t FSE_FUNCTION_NAME(FSE_buildCTable, FSE_FUNCTION_EXTENSION)
         {
             tableSymbol[position] = (FSE_FUNCTION_TYPE)symbol;
             position = (position + step) & tableMask;
-            while (position > highThreshold) position = (position + step) & tableMask;   /* Lowprob area */
+            while (position > highThreshold) position = (position + step) & tableMask;   /* Low proba area */
         }
     }
 
@@ -296,7 +293,7 @@ size_t FSE_FUNCTION_NAME(FSE_buildCTable, FSE_FUNCTION_EXTENSION)
     /* Build table */
     for (i=0; i<tableSize; i++)
     {
-        FSE_FUNCTION_TYPE s = tableSymbol[i];   /* static analyzer doesn't understand tableSymbol is properly initialized */
+        FSE_FUNCTION_TYPE s = tableSymbol[i];   /* note : static analyzer may not understand tableSymbol is properly initialized */
         tableU16[cumul[s]++] = (U16) (tableSize+i);   /* TableU16 : sorted by symbol order; gives next state value */
     }
 
@@ -332,24 +329,22 @@ size_t FSE_FUNCTION_NAME(FSE_buildCTable, FSE_FUNCTION_EXTENSION)
 }
 
 
-#define FSE_DECODE_TYPE FSE_TYPE_NAME(FSE_decode_t, FSE_FUNCTION_EXTENSION)
-
-FSE_DTable* FSE_FUNCTION_NAME(FSE_createDTable, FSE_FUNCTION_EXTENSION) (unsigned tableLog)
+FSE_DTable* FSE_createDTable (unsigned tableLog)
 {
     if (tableLog > FSE_TABLELOG_ABSOLUTE_MAX) tableLog = FSE_TABLELOG_ABSOLUTE_MAX;
     return (FSE_DTable*)malloc( FSE_DTABLE_SIZE_U32(tableLog) * sizeof (U32) );
 }
 
-void FSE_FUNCTION_NAME(FSE_freeDTable, FSE_FUNCTION_EXTENSION) (FSE_DTable* dt)
+void FSE_freeDTable (FSE_DTable* dt)
 {
     free(dt);
 }
 
-size_t FSE_FUNCTION_NAME(FSE_buildDTable, FSE_FUNCTION_EXTENSION)
-(FSE_DTable* dt, const short* normalizedCounter, unsigned maxSymbolValue, unsigned tableLog)
+size_t FSE_buildDTable(FSE_DTable* dt, const short* normalizedCounter, unsigned maxSymbolValue, unsigned tableLog)
 {
-    FSE_DTableHeader* const DTableH = (FSE_DTableHeader*)dt;
-    FSE_DECODE_TYPE* const tableDecode = (FSE_DECODE_TYPE*) (dt+1);   /* because dt is unsigned, 32-bits aligned on 32-bits */
+    FSE_DTableHeader DTableH;
+    void* const tdPtr = dt+1;   /* because dt is unsigned, 32-bits aligned on 32-bits */
+    FSE_DECODE_TYPE* const tableDecode = (FSE_DECODE_TYPE*) (tdPtr);
     const U32 tableSize = 1 << tableLog;
     const U32 tableMask = tableSize-1;
     const U32 step = FSE_tableStep(tableSize);
@@ -365,7 +360,7 @@ size_t FSE_FUNCTION_NAME(FSE_buildDTable, FSE_FUNCTION_EXTENSION)
     if (tableLog > FSE_MAX_TABLELOG) return ERROR(tableLog_tooLarge);
 
     /* Init, lay down lowprob symbols */
-    DTableH[0].tableLog = (U16)tableLog;
+    DTableH.tableLog = (U16)tableLog;
     for (s=0; s<=maxSymbolValue; s++)
     {
         if (normalizedCounter[s]==-1)
@@ -406,7 +401,8 @@ size_t FSE_FUNCTION_NAME(FSE_buildDTable, FSE_FUNCTION_EXTENSION)
         }
     }
 
-    DTableH->fastMode = (U16)noLarge;
+    DTableH.fastMode = (U16)noLarge;
+    memcpy(dt, &DTableH, sizeof(DTableH));
     return 0;
 }
 
@@ -890,8 +886,10 @@ size_t FSE_buildCTable_raw (FSE_CTable* ct, unsigned nbBits)
     const unsigned tableSize = 1 << nbBits;
     const unsigned tableMask = tableSize - 1;
     const unsigned maxSymbolValue = tableMask;
-    U16* tableU16 = ( (U16*) ct) + 2;
-    FSE_symbolCompressionTransform* symbolTT = (FSE_symbolCompressionTransform*) ((((U32*)ct)+1) + (tableSize>>1));
+    void* const ptr = ct;
+    U16* const tableU16 = ( (U16*) ptr) + 2;
+    void* const FSCT = ((U32*)ptr) + 1 /* header */ + (tableSize>>1);   /* assumption : tableLog >= 1 */
+    FSE_symbolCompressionTransform* const symbolTT = (FSE_symbolCompressionTransform*) (FSCT);
     unsigned s;
 
     /* Sanity checks */
@@ -918,8 +916,10 @@ size_t FSE_buildCTable_raw (FSE_CTable* ct, unsigned nbBits)
 /* fake FSE_CTable, for rle (100% always same symbol) input */
 size_t FSE_buildCTable_rle (FSE_CTable* ct, BYTE symbolValue)
 {
-    U16* tableU16 = ( (U16*) ct) + 2;
-    FSE_symbolCompressionTransform* symbolTT = (FSE_symbolCompressionTransform*) ((U32*)ct + 2);
+    void* ptr = ct;
+    U16* tableU16 = ( (U16*) ptr) + 2;
+    void* FSCTptr = (U32*)ptr + 2;
+    FSE_symbolCompressionTransform* symbolTT = (FSE_symbolCompressionTransform*) FSCTptr;
 
     /* header */
     tableU16[-2] = (U16) 0;
@@ -1076,8 +1076,10 @@ size_t FSE_compress (void* dst, size_t dstSize, const void* src, size_t srcSize)
 *********************************************************/
 size_t FSE_buildDTable_rle (FSE_DTable* dt, BYTE symbolValue)
 {
-    FSE_DTableHeader* const DTableH = (FSE_DTableHeader*)dt;
-    FSE_decode_t* const cell = (FSE_decode_t*)(dt + 1);   /* because dt is unsigned */
+    void* ptr = dt;
+    FSE_DTableHeader* const DTableH = (FSE_DTableHeader*)ptr;
+    void* dPtr = dt + 1;
+    FSE_decode_t* const cell = (FSE_decode_t*)dPtr;
 
     DTableH->tableLog = 0;
     DTableH->fastMode = 0;
@@ -1092,8 +1094,10 @@ size_t FSE_buildDTable_rle (FSE_DTable* dt, BYTE symbolValue)
 
 size_t FSE_buildDTable_raw (FSE_DTable* dt, unsigned nbBits)
 {
-    FSE_DTableHeader* const DTableH = (FSE_DTableHeader*)dt;
-    FSE_decode_t* const dinfo = (FSE_decode_t*)(dt + 1);   /* because dt is unsigned */
+    void* ptr = dt;
+    FSE_DTableHeader* const DTableH = (FSE_DTableHeader*)ptr;
+    void* dPtr = dt + 1;
+    FSE_decode_t* const dinfo = (FSE_decode_t*)dPtr;
     const unsigned tableSize = 1 << nbBits;
     const unsigned tableMask = tableSize - 1;
     const unsigned maxSymbolValue = tableMask;
@@ -1189,7 +1193,8 @@ size_t FSE_decompress_usingDTable(void* dst, size_t originalSize,
                             const void* cSrc, size_t cSrcSize,
                             const FSE_DTable* dt)
 {
-    const FSE_DTableHeader* DTableH = (const FSE_DTableHeader*)dt;
+    const void* ptr = dt;
+    const FSE_DTableHeader* DTableH = (const FSE_DTableHeader*)ptr;
     const U32 fastMode = DTableH->fastMode;
 
     /* select fast mode (static) */
diff --git a/lib/fse.h b/lib/fse.h
index 24d1d26..dd1190f 100644
--- a/lib/fse.h
+++ b/lib/fse.h
@@ -40,20 +40,20 @@ extern "C" {
 #endif
 
 
-/******************************************
+/* *****************************************
 *  Includes
 ******************************************/
 #include <stddef.h>    /* size_t, ptrdiff_t */
 
 
-/******************************************
+/* *****************************************
 *  FSE simple functions
 ******************************************/
 size_t FSE_compress(void* dst, size_t maxDstSize,
               const void* src, size_t srcSize);
 size_t FSE_decompress(void* dst,  size_t maxDstSize,
                 const void* cSrc, size_t cSrcSize);
-/*
+/*!
 FSE_compress():
     Compress content of buffer 'src', of size 'srcSize', into destination buffer 'dst'.
     'dst' buffer must be already allocated. Compression runs faster is maxDstSize >= FSE_compressBound(srcSize)
@@ -74,7 +74,7 @@ FSE_decompress():
 */
 
 
-/******************************************
+/* *****************************************
 *  Tool functions
 ******************************************/
 size_t FSE_compressBound(size_t size);       /* maximum compressed size */
@@ -84,10 +84,10 @@ unsigned    FSE_isError(size_t code);        /* tells if a return value is an er
 const char* FSE_getErrorName(size_t code);   /* provides error code string (useful for debugging) */
 
 
-/******************************************
+/* *****************************************
 *  FSE advanced functions
 ******************************************/
-/*
+/*!
 FSE_compress2():
     Same as FSE_compress(), but allows the selection of 'maxSymbolValue' and 'tableLog'
     Both parameters can be defined as '0' to mean : use default value
@@ -99,10 +99,10 @@ FSE_compress2():
 size_t FSE_compress2 (void* dst, size_t dstSize, const void* src, size_t srcSize, unsigned maxSymbolValue, unsigned tableLog);
 
 
-/******************************************
+/* *****************************************
 *  FSE detailed API
 ******************************************/
-/*
+/*!
 FSE_compress() does the following:
 1. count symbol occurrence from source[] into table count[]
 2. normalize counters so that sum(count[]) == Power_of_2 (2^tableLog)
@@ -122,7 +122,7 @@ or to save and provide normalized distribution using external method.
 
 /* *** COMPRESSION *** */
 
-/*
+/*!
 FSE_count():
    Provides the precise count of each symbol within a table 'count'
    'count' is a table of unsigned int, of minimum size (maxSymbolValuePtr[0]+1).
@@ -132,14 +132,14 @@ FSE_count():
             if FSE_isError(return), it's an error code. */
 size_t FSE_count(unsigned* count, unsigned* maxSymbolValuePtr, const unsigned char* src, size_t srcSize);
 
-/*
+/*!
 FSE_optimalTableLog():
    dynamically downsize 'tableLog' when conditions are met.
    It saves CPU time, by using smaller tables, while preserving or even improving compression ratio.
    return : recommended tableLog (necessarily <= initial 'tableLog') */
 unsigned FSE_optimalTableLog(unsigned tableLog, size_t srcSize, unsigned maxSymbolValue);
 
-/*
+/*!
 FSE_normalizeCount():
    normalize counters so that sum(count[]) == Power_of_2 (2^tableLog)
    'normalizedCounter' is a table of short, of minimum size (maxSymbolValue+1).
@@ -147,13 +147,13 @@ FSE_normalizeCount():
             or an errorCode, which can be tested using FSE_isError() */
 size_t FSE_normalizeCount(short* normalizedCounter, unsigned tableLog, const unsigned* count, size_t srcSize, unsigned maxSymbolValue);
 
-/*
+/*!
 FSE_NCountWriteBound():
    Provides the maximum possible size of an FSE normalized table, given 'maxSymbolValue' and 'tableLog'
    Typically useful for allocation purpose. */
 size_t FSE_NCountWriteBound(unsigned maxSymbolValue, unsigned tableLog);
 
-/*
+/*!
 FSE_writeNCount():
    Compactly save 'normalizedCounter' into 'buffer'.
    return : size of the compressed table
@@ -161,21 +161,21 @@ FSE_writeNCount():
 size_t FSE_writeNCount (void* buffer, size_t bufferSize, const short* normalizedCounter, unsigned maxSymbolValue, unsigned tableLog);
 
 
-/*
+/*!
 Constructor and Destructor of type FSE_CTable
     Note that its size depends on 'tableLog' and 'maxSymbolValue' */
-typedef unsigned FSE_CTable;   /* don't allocate that. It's just a way to be more restrictive than void* */
+typedef unsigned FSE_CTable;   /* don't allocate that. It's only meant to be more restrictive than void* */
 FSE_CTable* FSE_createCTable (unsigned tableLog, unsigned maxSymbolValue);
 void        FSE_freeCTable (FSE_CTable* ct);
 
-/*
+/*!
 FSE_buildCTable():
    Builds 'ct', which must be already allocated, using FSE_createCTable()
    return : 0
             or an errorCode, which can be tested using FSE_isError() */
 size_t FSE_buildCTable(FSE_CTable* ct, const short* normalizedCounter, unsigned maxSymbolValue, unsigned tableLog);
 
-/*
+/*!
 FSE_compress_usingCTable():
    Compress 'src' using 'ct' into 'dst' which must be already allocated
    return : size of compressed data (<= maxDstSize)
@@ -183,7 +183,7 @@ FSE_compress_usingCTable():
             or an errorCode, which can be tested using FSE_isError() */
 size_t FSE_compress_usingCTable (void* dst, size_t maxDstSize, const void* src, size_t srcSize, const FSE_CTable* ct);
 
-/*
+/*!
 Tutorial :
 ----------
 The first step is to count all symbols. FSE_count() does this job very fast.
@@ -229,7 +229,7 @@ If there is an error, the function will return an ErrorCode (which can be tested
 
 /* *** DECOMPRESSION *** */
 
-/*
+/*!
 FSE_readNCount():
    Read compactly saved 'normalizedCounter' from 'rBuffer'.
    return : size read from 'rBuffer'
@@ -237,21 +237,21 @@ FSE_readNCount():
             maxSymbolValuePtr[0] and tableLogPtr[0] will also be updated with their respective values */
 size_t FSE_readNCount (short* normalizedCounter, unsigned* maxSymbolValuePtr, unsigned* tableLogPtr, const void* rBuffer, size_t rBuffSize);
 
-/*
+/*!
 Constructor and Destructor of type FSE_DTable
     Note that its size depends on 'tableLog' */
 typedef unsigned FSE_DTable;   /* don't allocate that. It's just a way to be more restrictive than void* */
 FSE_DTable* FSE_createDTable(unsigned tableLog);
 void        FSE_freeDTable(FSE_DTable* dt);
 
-/*
+/*!
 FSE_buildDTable():
    Builds 'dt', which must be already allocated, using FSE_createDTable()
    return : 0,
             or an errorCode, which can be tested using FSE_isError() */
 size_t FSE_buildDTable (FSE_DTable* dt, const short* normalizedCounter, unsigned maxSymbolValue, unsigned tableLog);
 
-/*
+/*!
 FSE_decompress_usingDTable():
    Decompress compressed source 'cSrc' of size 'cSrcSize' using 'dt'
    into 'dst' which must be already allocated.
@@ -259,7 +259,7 @@ FSE_decompress_usingDTable():
             or an errorCode, which can be tested using FSE_isError() */
 size_t FSE_decompress_usingDTable(void* dst, size_t maxDstSize, const void* cSrc, size_t cSrcSize, const FSE_DTable* dt);
 
-/*
+/*!
 Tutorial :
 ----------
 (Note : these functions only decompress FSE-compressed blocks.
diff --git a/lib/fse_static.h b/lib/fse_static.h
index 01e2566..a881e35 100644
--- a/lib/fse_static.h
+++ b/lib/fse_static.h
@@ -40,31 +40,31 @@ extern "C" {
 #endif
 
 
-/******************************************
-*  FSE API compatible with DLL
-******************************************/
+/* *****************************************
+*  Dependencies
+*******************************************/
 #include "fse.h"
 #include "bitstream.h"
 
 
-/******************************************
+/* *****************************************
 *  Static allocation
-******************************************/
+*******************************************/
 /* FSE buffer bounds */
 #define FSE_NCOUNTBOUND 512
 #define FSE_BLOCKBOUND(size) (size + (size>>7))
 #define FSE_COMPRESSBOUND(size) (FSE_NCOUNTBOUND + FSE_BLOCKBOUND(size))   /* Macro version, useful for static allocation */
 
-/* You can statically allocate FSE CTable/DTable as a table of unsigned using below macro */
+/* It is possible to statically allocate FSE CTable/DTable as a table of unsigned using below macros */
 #define FSE_CTABLE_SIZE_U32(maxTableLog, maxSymbolValue)   (1 + (1<<(maxTableLog-1)) + ((maxSymbolValue+1)*2))
 #define FSE_DTABLE_SIZE_U32(maxTableLog)                   (1 + (1<<maxTableLog))
 
 
-/******************************************
+/* *****************************************
 *  FSE advanced API
-******************************************/
+*******************************************/
 size_t FSE_countFast(unsigned* count, unsigned* maxSymbolValuePtr, const unsigned char* src, size_t srcSize);
-/* same as FSE_count(), but blindly trust that all values within src are <= maxSymbolValuePtr[0] */
+/* same as FSE_count(), but blindly trust that all values within src are <= *maxSymbolValuePtr  */
 
 size_t FSE_buildCTable_raw (FSE_CTable* ct, unsigned nbBits);
 /* build a fake FSE_CTable, designed to not compress an input, where each symbol uses nbBits */
@@ -79,10 +79,10 @@ size_t FSE_buildDTable_rle (FSE_DTable* dt, unsigned char symbolValue);
 /* build a fake FSE_DTable, designed to always generate the same symbolValue */
 
 
-/******************************************
+/* *****************************************
 *  FSE symbol compression API
-******************************************/
-/*
+*******************************************/
+/*!
    This API consists of small unitary functions, which highly benefit from being inlined.
    You will want to enable link-time-optimization to ensure these functions are properly inlined in your binary.
    Visual seems to do it automatically.
@@ -103,7 +103,7 @@ static void FSE_encodeSymbol(BIT_CStream_t* bitC, FSE_CState_t* CStatePtr, unsig
 
 static void FSE_flushCState(BIT_CStream_t* bitC, const FSE_CState_t* CStatePtr);
 
-/*
+/*!
 These functions are inner components of FSE_compress_usingCTable().
 They allow the creation of custom streams, mixing multiple tables and bit sources.
 
@@ -147,9 +147,9 @@ If there is an error, it returns an errorCode (which can be tested using FSE_isE
 */
 
 
-/******************************************
+/* *****************************************
 *  FSE symbol decompression API
-******************************************/
+*******************************************/
 typedef struct
 {
     size_t      state;
@@ -163,7 +163,7 @@ static unsigned char FSE_decodeSymbol(FSE_DState_t* DStatePtr, BIT_DStream_t* bi
 
 static unsigned FSE_endOfDState(const FSE_DState_t* DStatePtr);
 
-/*
+/*!
 Let's now decompose FSE_decompress_usingDTable() into its unitary components.
 You will decode FSE-encoded symbols from the bitStream,
 and also any other bitFields you put in, **in reverse order**.
@@ -213,16 +213,16 @@ Check also the states. There might be some symbols left there, if some high prob
 */
 
 
-/******************************************
+/* *****************************************
 *  FSE unsafe API
-******************************************/
+*******************************************/
 static unsigned char FSE_decodeSymbolFast(FSE_DState_t* DStatePtr, BIT_DStream_t* bitD);
 /* faster, but works only if nbBits is always >= 1 (otherwise, result will be corrupted) */
 
 
-/******************************************
-*  Implementation of inline functions
-******************************************/
+/* *****************************************
+*  Implementation of inlined functions
+*******************************************/
 typedef struct
 {
     int deltaFindState;
@@ -231,10 +231,12 @@ typedef struct
 
 MEM_STATIC void FSE_initCState(FSE_CState_t* statePtr, const FSE_CTable* ct)
 {
-    const U32 tableLog = ( (const U16*) ct) [0];
+    const void* ptr = ct;
+    const U16* u16ptr = (const U16*) ptr;
+    const U32 tableLog = MEM_read16(ptr);
     statePtr->value = (ptrdiff_t)1<<tableLog;
-    statePtr->stateTable = ((const U16*) ct) + 2;
-    statePtr->symbolTT = (const void*)((const U32*)ct + 1 + (tableLog ? (1<<(tableLog-1)) : 1));
+    statePtr->stateTable = u16ptr+2;
+    statePtr->symbolTT = ((const U32*)ct + 1 + (tableLog ? (1<<(tableLog-1)) : 1));
     statePtr->stateLog = tableLog;
 }
 
@@ -269,7 +271,8 @@ typedef struct
 
 MEM_STATIC void FSE_initDState(FSE_DState_t* DStatePtr, BIT_DStream_t* bitD, const FSE_DTable* dt)
 {
-    const FSE_DTableHeader* const DTableH = (const FSE_DTableHeader*)dt;
+    const void* ptr = dt;
+    const FSE_DTableHeader* const DTableH = (const FSE_DTableHeader*)ptr;
     DStatePtr->state = BIT_readBits(bitD, DTableH->tableLog);
     BIT_reloadDStream(bitD);
     DStatePtr->table = dt + 1;
diff --git a/lib/huff0.c b/lib/huff0.c
index 3ddb7e5..26a7639 100644
--- a/lib/huff0.c
+++ b/lib/huff0.c
@@ -32,7 +32,7 @@
     - Public forum : https://groups.google.com/forum/#!forum/lz4c
 ****************************************************************** */
 
-/****************************************************************
+/* **************************************************************
 *  Compiler specifics
 ****************************************************************/
 #if defined (__cplusplus) || (defined (__STDC_VERSION__) && (__STDC_VERSION__ >= 199901L) /* C99 */)
@@ -57,7 +57,7 @@
 #endif
 
 
-/****************************************************************
+/* **************************************************************
 *  Includes
 ****************************************************************/
 #include <stdlib.h>     /* malloc, free, qsort */
@@ -68,7 +68,7 @@
 #include "fse.h"        /* header compression */
 
 
-/****************************************************************
+/* **************************************************************
 *  Constants
 ****************************************************************/
 #define HUF_ABSOLUTEMAX_TABLELOG  16   /* absolute limit of HUF_MAX_TABLELOG. Beyond that value, code does not work */
@@ -80,27 +80,21 @@
 #endif
 
 
-/****************************************************************
+/* **************************************************************
 *  Error Management
 ****************************************************************/
-#define HUF_STATIC_ASSERT(c) { enum { HUF_static_assert = 1/(int)(!!(c)) }; }   /* use only *after* variable declarations */
-
-
-/******************************************
-*  Helper functions
-******************************************/
 unsigned HUF_isError(size_t code) { return ERR_isError(code); }
-
 const char* HUF_getErrorName(size_t code) { return ERR_getErrorName(code); }
+#define HUF_STATIC_ASSERT(c) { enum { HUF_static_assert = 1/(int)(!!(c)) }; }   /* use only *after* variable declarations */
 
 
-/*********************************************************
+/* *******************************************************
 *  Huff0 : Huffman block compression
 *********************************************************/
-typedef struct HUF_CElt_s {
+struct HUF_CElt_s {
   U16  val;
   BYTE nbBits;
-} HUF_CElt ;
+};   /* typedef'd to HUF_CElt within huff0_static.h */
 
 typedef struct nodeElt_s {
     U32 count;
@@ -320,7 +314,7 @@ size_t HUF_buildCTable (HUF_CElt* tree, const U32* count, U32 maxSymbolValue, U3
     /* sort, decreasing order */
     HUF_sort(huffNode, count, maxSymbolValue);
 
-    // init for parents
+    /* init for parents */
     nonNullRank = maxSymbolValue;
     while(huffNode[nonNullRank].count == 0) nonNullRank--;
     lowS = nonNullRank; nodeRoot = nodeNb + lowS - 1; lowN = nodeNb;
@@ -330,7 +324,7 @@ size_t HUF_buildCTable (HUF_CElt* tree, const U32* count, U32 maxSymbolValue, U3
     for (n=nodeNb; n<=nodeRoot; n++) huffNode[n].count = (U32)(1U<<30);
     huffNode0[0].count = (U32)(1U<<31);
 
-    // create parents
+    /* create parents */
     while (nodeNb <= nodeRoot)
     {
         U32 n1 = (huffNode[lowS].count < huffNode[lowN].count) ? lowS-- : lowN++;
@@ -340,7 +334,7 @@ size_t HUF_buildCTable (HUF_CElt* tree, const U32* count, U32 maxSymbolValue, U3
         nodeNb++;
     }
 
-    // distribute weights (unlimited tree height)
+    /* distribute weights (unlimited tree height) */
     huffNode[nodeRoot].nbBits = 0;
     for (n=nodeRoot-1; n>=STARTNODE; n--)
         huffNode[n].nbBits = huffNode[ huffNode[n].parent ].nbBits + 1;
@@ -368,9 +362,9 @@ size_t HUF_buildCTable (HUF_CElt* tree, const U32* count, U32 maxSymbolValue, U3
             }
         }
         for (n=0; n<=maxSymbolValue; n++)
-            tree[huffNode[n].byte].nbBits = huffNode[n].nbBits;   // push nbBits per symbol, symbol order
+            tree[huffNode[n].byte].nbBits = huffNode[n].nbBits;   /* push nbBits per symbol, symbol order */
         for (n=0; n<=maxSymbolValue; n++)
-            tree[n].val = valPerRank[tree[n].nbBits]++;   // assign value within rank, symbol order
+            tree[n].val = valPerRank[tree[n].nbBits]++;   /* assign value within rank, symbol order */
     }
 
     return maxNbBits;
@@ -636,12 +630,12 @@ size_t HUF_readDTableX2 (U16* DTable, const void* src, size_t srcSize)
     BYTE huffWeight[HUF_MAX_SYMBOL_VALUE + 1];
     U32 rankVal[HUF_ABSOLUTEMAX_TABLELOG + 1];   /* large enough for values from 0 to 16 */
     U32 tableLog = 0;
-    const BYTE* ip = (const BYTE*) src;
-    size_t iSize = ip[0];
+    size_t iSize;
     U32 nbSymbols = 0;
     U32 n;
     U32 nextRankStart;
-    HUF_DEltX2* const dt = (HUF_DEltX2*)(DTable + 1);
+    void* const dtPtr = DTable + 1;
+    HUF_DEltX2* const dt = (HUF_DEltX2*)dtPtr;
 
     HUF_STATIC_ASSERT(sizeof(HUF_DEltX2) == sizeof(U16));   /* if compilation fails here, assertion is false */
     //memset(huffWeight, 0, sizeof(huffWeight));   /* is not necessary, even though some analyzer complain ... */
@@ -730,7 +724,8 @@ size_t HUF_decompress1X2_usingDTable(
     BYTE* const oend = op + dstSize;
     size_t errorCode;
     const U32 dtLog = DTable[0];
-    const HUF_DEltX2* const dt = ((const HUF_DEltX2*)DTable) +1;
+    const void* dtPtr = DTable;
+    const HUF_DEltX2* const dt = ((const HUF_DEltX2*)dtPtr)+1;
     BIT_DStream_t bitD;
     errorCode = BIT_initDStream(&bitD, cSrc, cSrcSize);
     if (HUF_isError(errorCode)) return errorCode;
@@ -770,8 +765,8 @@ size_t HUF_decompress4X2_usingDTable(
         const BYTE* const istart = (const BYTE*) cSrc;
         BYTE* const ostart = (BYTE*) dst;
         BYTE* const oend = ostart + dstSize;
-
-        const HUF_DEltX2* const dt = ((const HUF_DEltX2*)DTable) +1;
+        const void* const dtPtr = DTable;
+        const HUF_DEltX2* const dt = ((const HUF_DEltX2*)dtPtr) +1;
         const U32 dtLog = DTable[0];
         size_t errorCode;
 
@@ -978,9 +973,9 @@ size_t HUF_readDTableX4 (U32* DTable, const void* src, size_t srcSize)
     rankVal_t rankVal;
     U32 tableLog, maxW, sizeOfSort, nbSymbols;
     const U32 memLog = DTable[0];
-    const BYTE* ip = (const BYTE*) src;
-    size_t iSize = ip[0];
-    HUF_DEltX4* const dt = ((HUF_DEltX4*)DTable) + 1;
+    size_t iSize;
+    void* dtPtr = DTable;
+    HUF_DEltX4* const dt = ((HUF_DEltX4*)dtPtr) + 1;
 
     HUF_STATIC_ASSERT(sizeof(HUF_DEltX4) == sizeof(U32));   /* if compilation fails here, assertion is false */
     if (memLog > HUF_ABSOLUTEMAX_TABLELOG) return ERROR(tableLog_tooLarge);
@@ -1127,7 +1122,8 @@ size_t HUF_decompress1X4_usingDTable(
     BYTE* const oend = ostart + dstSize;
 
     const U32 dtLog = DTable[0];
-    const HUF_DEltX4* const dt = ((const HUF_DEltX4*)DTable) +1;
+    const void* const dtPtr = DTable;
+    const HUF_DEltX4* const dt = ((const HUF_DEltX4*)dtPtr) +1;
     size_t errorCode;
 
     /* Init */
@@ -1170,8 +1166,8 @@ size_t HUF_decompress4X4_usingDTable(
         const BYTE* const istart = (const BYTE*) cSrc;
         BYTE* const ostart = (BYTE*) dst;
         BYTE* const oend = ostart + dstSize;
-
-        const HUF_DEltX4* const dt = ((const HUF_DEltX4*)DTable) +1;
+        const void* const dtPtr = DTable;
+        const HUF_DEltX4* const dt = ((const HUF_DEltX4*)dtPtr) +1;
         const U32 dtLog = DTable[0];
         size_t errorCode;
 
@@ -1352,8 +1348,7 @@ size_t HUF_readDTableX6 (U32* DTable, const void* src, size_t srcSize)
     U32 tableLog, maxW, sizeOfSort, nbSymbols;
     rankVal_t rankVal;
     const U32 memLog = DTable[0];
-    const BYTE* ip = (const BYTE*) src;
-    size_t iSize = ip[0];
+    size_t iSize;
 
     if (memLog > HUF_ABSOLUTEMAX_TABLELOG) return ERROR(tableLog_tooLarge);
     //memset(weightList, 0, sizeof(weightList));   /* is not necessary, even though some analyzer complain ... */
@@ -1418,8 +1413,10 @@ size_t HUF_readDTableX6 (U32* DTable, const void* src, size_t srcSize)
 
     /* fill tables */
     {
-        HUF_DDescX6* DDescription = (HUF_DDescX6*)(DTable+1);
-        HUF_DSeqX6* DSequence = (HUF_DSeqX6*)(DTable + 1 + ((size_t)1<<(memLog-1)));
+        void* ddPtr = DTable+1;
+        HUF_DDescX6* DDescription = (HUF_DDescX6*)ddPtr;
+        void* dsPtr = DTable + 1 + ((size_t)1<<(memLog-1));
+        HUF_DSeqX6* DSequence = (HUF_DSeqX6*)dsPtr;
         HUF_DSeqX6 DSeq;
         HUF_DDescX6 DDesc;
         DSeq.sequence = 0;
@@ -1478,8 +1475,10 @@ static U32 HUF_decodeLastSymbolsX6(void* op, const U32 maxL, BIT_DStream_t* DStr
 
 static inline size_t HUF_decodeStreamX6(BYTE* p, BIT_DStream_t* bitDPtr, BYTE* const pEnd, const U32* DTable, const U32 dtLog)
 {
-    const HUF_DDescX6* dd = (const HUF_DDescX6*)(DTable+1);
-    const HUF_DSeqX6* ds = (const HUF_DSeqX6*)(DTable + 1 + ((size_t)1<<(dtLog-1)));
+    const void* const ddPtr = DTable+1;
+    const HUF_DDescX6* dd = (const HUF_DDescX6*)ddPtr;
+    const void* const dsPtr = DTable + 1 + ((size_t)1<<(dtLog-1));
+    const HUF_DSeqX6* ds = (const HUF_DSeqX6*)dsPtr;
     BYTE* const pStart = p;
 
     /* up to 16 symbols at a time */
@@ -1557,8 +1556,10 @@ size_t HUF_decompress4X6_usingDTable(
         BYTE* const oend = ostart + dstSize;
 
         const U32 dtLog = DTable[0];
-        const HUF_DDescX6* dd = (const HUF_DDescX6*)(DTable+1);
-        const HUF_DSeqX6* ds = (const HUF_DSeqX6*)(DTable + 1 + ((size_t)1<<(dtLog-1)));
+        const void* const ddPtr = DTable+1;
+        const HUF_DDescX6* dd = (const HUF_DDescX6*)ddPtr;
+        const void* const dsPtr = DTable + 1 + ((size_t)1<<(dtLog-1));
+        const HUF_DSeqX6* ds = (const HUF_DSeqX6*)dsPtr;
         size_t errorCode;
 
         /* Init */
diff --git a/lib/huff0.h b/lib/huff0.h
index 613d9b9..2ebd5cf 100644
--- a/lib/huff0.h
+++ b/lib/huff0.h
@@ -40,25 +40,25 @@ extern "C" {
 #endif
 
 
-/******************************************
+/* ****************************************
 *  Dependency
 ******************************************/
 #include <stddef.h>    /* size_t */
 
 
-/******************************************
+/* ****************************************
 *  Huff0 simple functions
 ******************************************/
 size_t HUF_compress(void* dst, size_t maxDstSize,
               const void* src, size_t srcSize);
 size_t HUF_decompress(void* dst,  size_t dstSize,
                 const void* cSrc, size_t cSrcSize);
-/*
+/*!
 HUF_compress():
     Compress content of buffer 'src', of size 'srcSize', into destination buffer 'dst'.
     'dst' buffer must be already allocated. Compression runs faster if maxDstSize >= HUF_compressBound(srcSize).
     Note : srcSize must be <= 128 KB
-    return : size of compressed data (<= maxDstSize)
+    @return : size of compressed data (<= maxDstSize)
     Special values : if return == 0, srcData is not compressible => Nothing is stored within dst !!!
                      if return == 1, srcData is a single repeated byte symbol (RLE compression)
                      if HUF_isError(return), compression failed (more details using HUF_getErrorName())
@@ -68,12 +68,12 @@ HUF_decompress():
     into already allocated destination buffer 'dst', of size 'dstSize'.
     'dstSize' must be the exact size of original (uncompressed) data.
     Note : in contrast with FSE, HUF_decompress can regenerate RLE (cSrcSize==1) and uncompressed (cSrcSize==dstSize) data, because it knows size to regenerate.
-    return : size of regenerated data (== dstSize)
-             or an error code, which can be tested using HUF_isError()
+    @return : size of regenerated data (== dstSize)
+              or an error code, which can be tested using HUF_isError()
 */
 
 
-/******************************************
+/* ****************************************
 *  Tool functions
 ******************************************/
 size_t HUF_compressBound(size_t size);       /* maximum compressed size */
@@ -83,7 +83,7 @@ unsigned    HUF_isError(size_t code);        /* tells if a return value is an er
 const char* HUF_getErrorName(size_t code);   /* provides error code string (useful for debugging) */
 
 
-/******************************************
+/* ****************************************
 *  Advanced functions
 ******************************************/
 size_t HUF_compress2 (void* dst, size_t dstSize, const void* src, size_t srcSize, unsigned maxSymbolValue, unsigned tableLog);
diff --git a/lib/huff0_static.h b/lib/huff0_static.h
index e8af19e..5df0727 100644
--- a/lib/huff0_static.h
+++ b/lib/huff0_static.h
@@ -40,13 +40,13 @@ extern "C" {
 #endif
 
 
-/******************************************
+/* ****************************************
 *  Dependency
 ******************************************/
 #include "huff0.h"
 
 
-/******************************************
+/* ****************************************
 *  Static allocation macros
 ******************************************/
 /* Huff0 buffer bounds */
@@ -64,14 +64,57 @@ extern "C" {
         unsigned int DTable[HUF_DTABLE_SIZE(maxTableLog) * 3 / 2] = { maxTableLog }
 
 
-/******************************************
-*  Advanced functions
+/* ****************************************
+*  Advanced decompression functions
 ******************************************/
 size_t HUF_decompress4X2 (void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize);   /* single-symbol decoder */
 size_t HUF_decompress4X4 (void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize);   /* double-symbols decoder */
 size_t HUF_decompress4X6 (void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize);   /* quad-symbols decoder */
 
 
+/* ****************************************
+*  Huff0 detailed API
+******************************************/
+/*!
+HUF_compress() does the following:
+1. count symbol occurrence from source[] into table count[] using FSE_count()
+2. build Huffman table from count using HUF_buildCTable()
+3. save Huffman table to memory buffer using HUF_writeCTable()
+4. encode the data stream using HUF_compress_usingCTable()
+
+The following API allows targeting specific sub-functions for advanced tasks.
+For example, it's possible to compress several blocks using the same 'CTable',
+or to save and regenerate 'CTable' using external methods.
+*/
+
+/* FSE_count() : find it within "fse.h" */
+
+typedef struct HUF_CElt_s HUF_CElt;   /* incomplete type */
+size_t HUF_buildCTable (HUF_CElt* tree, const unsigned* count, unsigned maxSymbolValue, unsigned maxNbBits);
+size_t HUF_writeCTable (void* dst, size_t maxDstSize, const HUF_CElt* tree, unsigned maxSymbolValue, unsigned huffLog);
+size_t HUF_compress_usingCTable(void* dst, size_t dstSize, const void* src, size_t srcSize, const HUF_CElt* CTable);
+
+
+/*!
+HUF_decompress() does the following:
+1. select the decompression algorithm (X2, X4, X6) based on pre-computed heuristics
+2. build Huffman table from save, using HUF_readDTableXn()
+3. decode 1 or 4 segments in parallel using HUF_decompressSXn_usingDTable
+
+*/
+size_t HUF_readDTableX2 (unsigned short* DTable, const void* src, size_t srcSize);
+size_t HUF_readDTableX4 (unsigned* DTable, const void* src, size_t srcSize);
+size_t HUF_readDTableX6 (unsigned* DTable, const void* src, size_t srcSize);
+
+size_t HUF_decompress1X2 (void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize);   /* single-symbol decoder */
+size_t HUF_decompress1X4 (void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize);   /* double-symbol decoder */
+size_t HUF_decompress1X6 (void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize);   /* quad-symbol decoder */
+
+size_t HUF_decompress4X2_usingDTable(void* dst, size_t maxDstSize, const void* cSrc, size_t cSrcSize, const unsigned short* DTable);
+size_t HUF_decompress4X4_usingDTable(void* dst, size_t maxDstSize, const void* cSrc, size_t cSrcSize, const unsigned* DTable);
+size_t HUF_decompress4X6_usingDTable(void* dst, size_t maxDstSize, const void* cSrc, size_t cSrcSize, const unsigned* DTable);
+
+
 #if defined (__cplusplus)
 }
 #endif
diff --git a/lib/legacy/zstd_legacy.h b/lib/legacy/zstd_legacy.h
index 308d134..b3e5eb2 100644
--- a/lib/legacy/zstd_legacy.h
+++ b/lib/legacy/zstd_legacy.h
@@ -40,8 +40,8 @@ extern "C" {
 /* *************************************
 *  Includes
 ***************************************/
-#include "mem.h"        /* MEM_STATIC */
-#include "error.h"      /* ERROR */
+#include "mem.h"            /* MEM_STATIC */
+#include "error_private.h"  /* ERROR */
 #include "zstd_v01.h"
 #include "zstd_v02.h"
 #include "zstd_v03.h"
@@ -51,7 +51,7 @@ MEM_STATIC unsigned ZSTD_isLegacy (U32 magicNumberLE)
 	switch(magicNumberLE)
 	{
 		case ZSTDv01_magicNumberLE :
-		case ZSTDv02_magicNumber : 
+		case ZSTDv02_magicNumber :
 		case ZSTDv03_magicNumber : return 1;
 		default : return 0;
 	}
diff --git a/lib/legacy/zstd_v01.c b/lib/legacy/zstd_v01.c
index ccd57d5..2fcf188 100644
--- a/lib/legacy/zstd_v01.c
+++ b/lib/legacy/zstd_v01.c
@@ -418,8 +418,9 @@ typedef struct {
 static size_t FSE_FUNCTION_NAME(FSE_buildDTable, FSE_FUNCTION_EXTENSION)
 (FSE_DTable* dt, const short* normalizedCounter, unsigned maxSymbolValue, unsigned tableLog)
 {
-    FSE_DTableHeader* const DTableH = (FSE_DTableHeader*)dt;
-    FSE_DECODE_TYPE* const tableDecode = (FSE_DECODE_TYPE*) (dt+1);   /* because dt is unsigned, 32-bits aligned on 32-bits */
+    void* ptr = dt;
+    FSE_DTableHeader* const DTableH = (FSE_DTableHeader*)ptr;
+    FSE_DECODE_TYPE* const tableDecode = (FSE_DECODE_TYPE*)(ptr) + 1;   /* because dt is unsigned, 32-bits aligned on 32-bits */
     const U32 tableSize = 1 << tableLog;
     const U32 tableMask = tableSize-1;
     const U32 step = FSE_tableStep(tableSize);
@@ -615,8 +616,9 @@ static size_t FSE_readNCount (short* normalizedCounter, unsigned* maxSVPtr, unsi
 *********************************************************/
 static size_t FSE_buildDTable_rle (FSE_DTable* dt, BYTE symbolValue)
 {
-    FSE_DTableHeader* const DTableH = (FSE_DTableHeader*)dt;
-    FSE_decode_t* const cell = (FSE_decode_t*)(dt + 1);   /* because dt is unsigned */
+    void* ptr = dt;
+    FSE_DTableHeader* const DTableH = (FSE_DTableHeader*)ptr;
+    FSE_decode_t* const cell = (FSE_decode_t*)(ptr) + 1;   /* because dt is unsigned */
 
     DTableH->tableLog = 0;
     DTableH->fastMode = 0;
@@ -631,8 +633,9 @@ static size_t FSE_buildDTable_rle (FSE_DTable* dt, BYTE symbolValue)
 
 static size_t FSE_buildDTable_raw (FSE_DTable* dt, unsigned nbBits)
 {
-    FSE_DTableHeader* const DTableH = (FSE_DTableHeader*)dt;
-    FSE_decode_t* const dinfo = (FSE_decode_t*)(dt + 1);   /* because dt is unsigned */
+    void* ptr = dt;
+    FSE_DTableHeader* const DTableH = (FSE_DTableHeader*)ptr;
+    FSE_decode_t* const dinfo = (FSE_decode_t*)(ptr) + 1;   /* because dt is unsigned */
     const unsigned tableSize = 1 << nbBits;
     const unsigned tableMask = tableSize - 1;
     const unsigned maxSymbolValue = tableMask;
@@ -701,7 +704,7 @@ static size_t FSE_initDStream(FSE_DStream_t* bitD, const void* srcBuffer, size_t
 }
 
 
-/* FSE_lookBits
+/*!FSE_lookBits
  * Provides next n bits from the bitContainer.
  * bitContainer is not modified (bits are still present for next read/look)
  * On 32-bits, maxNbBits==25
@@ -726,7 +729,7 @@ static void FSE_skipBits(FSE_DStream_t* bitD, U32 nbBits)
 }
 
 
-/* FSE_readBits
+/*!FSE_readBits
  * Read next n bits from the bitContainer.
  * On 32-bits, don't read more than maxNbBits==25
  * On 64-bits, don't read more than maxNbBits==57
@@ -782,7 +785,8 @@ static unsigned FSE_reloadDStream(FSE_DStream_t* bitD)
 
 static void FSE_initDState(FSE_DState_t* DStatePtr, FSE_DStream_t* bitD, const FSE_DTable* dt)
 {
-    const FSE_DTableHeader* const DTableH = (const FSE_DTableHeader*)dt;
+    const void* ptr = dt;
+    const FSE_DTableHeader* const DTableH = (const FSE_DTableHeader*)ptr;
     DStatePtr->state = FSE_readBits(bitD, DTableH->tableLog);
     FSE_reloadDStream(bitD);
     DStatePtr->table = dt + 1;
@@ -898,11 +902,11 @@ static size_t FSE_decompress_usingDTable(void* dst, size_t originalSize,
                             const void* cSrc, size_t cSrcSize,
                             const FSE_DTable* dt)
 {
-    const FSE_DTableHeader* DTableH = (const FSE_DTableHeader*)dt;
-    const U32 fastMode = DTableH->fastMode;
+    FSE_DTableHeader DTableH;
+    memcpy(&DTableH, dt, sizeof(DTableH));   /* memcpy() into local variable, to avoid strict aliasing warning */
 
     /* select fast mode (static) */
-    if (fastMode) return FSE_decompress_usingDTable_generic(dst, originalSize, cSrc, cSrcSize, dt, 1);
+    if (DTableH.fastMode) return FSE_decompress_usingDTable_generic(dst, originalSize, cSrc, cSrcSize, dt, 1);
     return FSE_decompress_usingDTable_generic(dst, originalSize, cSrc, cSrcSize, dt, 0);
 }
 
@@ -935,7 +939,7 @@ static size_t FSE_decompress(void* dst, size_t maxDstSize, const void* cSrc, siz
 
 
 
-/*********************************************************
+/* *******************************************************
 *  Huff0 : Huffman block compression
 *********************************************************/
 #define HUF_MAX_SYMBOL_VALUE 255
@@ -959,7 +963,7 @@ typedef struct nodeElt_s {
 } nodeElt;
 
 
-/*********************************************************
+/* *******************************************************
 *  Huff0 : Huffman block decompression
 *********************************************************/
 typedef struct {
@@ -978,7 +982,8 @@ static size_t HUF_readDTable (U16* DTable, const void* src, size_t srcSize)
     size_t oSize;
     U32 n;
     U32 nextRankStart;
-    HUF_DElt* const dt = (HUF_DElt*)(DTable + 1);
+    void* ptr = DTable+1;
+    HUF_DElt* const dt = (HUF_DElt*)ptr;
 
     FSE_STATIC_ASSERT(sizeof(HUF_DElt) == sizeof(U16));   /* if compilation fails here, assertion is false */
     //memset(huffWeight, 0, sizeof(huffWeight));   /* should not be necessary, but some analyzer complain ... */
@@ -1082,7 +1087,8 @@ static size_t HUF_decompress_usingDTable(   /* -3% slower when non static */
     BYTE* const omax = op + maxDstSize;
     BYTE* const olimit = omax-15;
 
-    const HUF_DElt* const dt = (const HUF_DElt*)(DTable+1);
+    const void* ptr = DTable;
+    const HUF_DElt* const dt = (const HUF_DElt*)(ptr)+1;
     const U32 dtLog = DTable[0];
     size_t errorCode;
     U32 reloadStatus;
@@ -1624,6 +1630,7 @@ static size_t ZSTD_decodeLiteralsBlock(void* ctx,
             ip += litcSize;
             break;
         }
+    case bt_end:
     default:
         return (size_t)-ZSTD_ERROR_GENERIC;
     }
@@ -1988,8 +1995,8 @@ static size_t ZSTD_decompressBlock(
 {
     /* blockType == blockCompressed, srcSize is trusted */
     const BYTE* ip = (const BYTE*)src;
-    const BYTE* litPtr;
-    size_t litSize;
+    const BYTE* litPtr = NULL;
+    size_t litSize = 0;
     size_t errorCode;
 
     /* Decode literals sub-block */
diff --git a/lib/legacy/zstd_v02.c b/lib/legacy/zstd_v02.c
index 67d726f..73136f8 100644
--- a/lib/legacy/zstd_v02.c
+++ b/lib/legacy/zstd_v02.c
@@ -891,8 +891,9 @@ typedef struct
 
 MEM_STATIC void FSE_initDState(FSE_DState_t* DStatePtr, BIT_DStream_t* bitD, const FSE_DTable* dt)
 {
-    const FSE_DTableHeader* const DTableH = (const FSE_DTableHeader*)dt;
-    DStatePtr->state = BIT_readBits(bitD, DTableH->tableLog);
+    FSE_DTableHeader DTableH;
+    memcpy(&DTableH, dt, sizeof(DTableH));
+    DStatePtr->state = BIT_readBits(bitD, DTableH.tableLog);
     BIT_reloadDStream(bitD);
     DStatePtr->table = dt + 1;
 }
@@ -1272,8 +1273,9 @@ static U32 FSE_tableStep(U32 tableSize) { return (tableSize>>1) + (tableSize>>3)
 static size_t FSE_FUNCTION_NAME(FSE_buildDTable, FSE_FUNCTION_EXTENSION)
 (FSE_DTable* dt, const short* normalizedCounter, unsigned maxSymbolValue, unsigned tableLog)
 {
-    FSE_DTableHeader* const DTableH = (FSE_DTableHeader*)dt;
-    FSE_DECODE_TYPE* const tableDecode = (FSE_DECODE_TYPE*) (dt+1);   /* because dt is unsigned, 32-bits aligned on 32-bits */
+    void* ptr = dt+1;
+    FSE_DECODE_TYPE* const tableDecode = (FSE_DECODE_TYPE*)ptr;
+    FSE_DTableHeader DTableH;
     const U32 tableSize = 1 << tableLog;
     const U32 tableMask = tableSize-1;
     const U32 step = FSE_tableStep(tableSize);
@@ -1289,7 +1291,7 @@ static size_t FSE_FUNCTION_NAME(FSE_buildDTable, FSE_FUNCTION_EXTENSION)
     if (tableLog > FSE_MAX_TABLELOG) return ERROR(tableLog_tooLarge);
 
     /* Init, lay down lowprob symbols */
-    DTableH[0].tableLog = (U16)tableLog;
+    DTableH.tableLog = (U16)tableLog;
     for (s=0; s<=maxSymbolValue; s++)
     {
         if (normalizedCounter[s]==-1)
@@ -1330,7 +1332,8 @@ static size_t FSE_FUNCTION_NAME(FSE_buildDTable, FSE_FUNCTION_EXTENSION)
         }
     }
 
-    DTableH->fastMode = (U16)noLarge;
+    DTableH.fastMode = (U16)noLarge;
+    memcpy(dt, &DTableH, sizeof(DTableH));   /* memcpy(), to avoid strict aliasing warnings */
     return 0;
 }
 
@@ -1468,8 +1471,9 @@ static size_t FSE_readNCount (short* normalizedCounter, unsigned* maxSVPtr, unsi
 *********************************************************/
 static size_t FSE_buildDTable_rle (FSE_DTable* dt, BYTE symbolValue)
 {
-    FSE_DTableHeader* const DTableH = (FSE_DTableHeader*)dt;
-    FSE_decode_t* const cell = (FSE_decode_t*)(dt + 1);   /* because dt is unsigned */
+    void* ptr = dt;
+    FSE_DTableHeader* const DTableH = (FSE_DTableHeader*)ptr;
+    FSE_decode_t* const cell = (FSE_decode_t*)(ptr) + 1;   /* because dt is unsigned */
 
     DTableH->tableLog = 0;
     DTableH->fastMode = 0;
@@ -1484,8 +1488,9 @@ static size_t FSE_buildDTable_rle (FSE_DTable* dt, BYTE symbolValue)
 
 static size_t FSE_buildDTable_raw (FSE_DTable* dt, unsigned nbBits)
 {
-    FSE_DTableHeader* const DTableH = (FSE_DTableHeader*)dt;
-    FSE_decode_t* const dinfo = (FSE_decode_t*)(dt + 1);   /* because dt is unsigned */
+    void* ptr = dt;
+    FSE_DTableHeader* const DTableH = (FSE_DTableHeader*)ptr;
+    FSE_decode_t* const dinfo = (FSE_decode_t*)(ptr) + 1;   /* because dt is unsigned */
     const unsigned tableSize = 1 << nbBits;
     const unsigned tableMask = tableSize - 1;
     const unsigned maxSymbolValue = tableMask;
@@ -1581,11 +1586,11 @@ static size_t FSE_decompress_usingDTable(void* dst, size_t originalSize,
                             const void* cSrc, size_t cSrcSize,
                             const FSE_DTable* dt)
 {
-    const FSE_DTableHeader* DTableH = (const FSE_DTableHeader*)dt;
-    const U32 fastMode = DTableH->fastMode;
+    FSE_DTableHeader DTableH;
+    memcpy(&DTableH, dt, sizeof(DTableH));
 
     /* select fast mode (static) */
-    if (fastMode) return FSE_decompress_usingDTable_generic(dst, originalSize, cSrc, cSrcSize, dt, 1);
+    if (DTableH.fastMode) return FSE_decompress_usingDTable_generic(dst, originalSize, cSrc, cSrcSize, dt, 1);
     return FSE_decompress_usingDTable_generic(dst, originalSize, cSrc, cSrcSize, dt, 0);
 }
 
@@ -1810,7 +1815,8 @@ static size_t HUF_readDTableX2 (U16* DTable, const void* src, size_t srcSize)
     U32 nbSymbols = 0;
     U32 n;
     U32 nextRankStart;
-    HUF_DEltX2* const dt = (HUF_DEltX2*)(DTable + 1);
+    void* ptr = DTable+1;
+    HUF_DEltX2* const dt = (HUF_DEltX2*)ptr;
 
     HUF_STATIC_ASSERT(sizeof(HUF_DEltX2) == sizeof(U16));   /* if compilation fails here, assertion is false */
     //memset(huffWeight, 0, sizeof(huffWeight));   /* is not necessary, even though some analyzer complain ... */
@@ -1903,7 +1909,8 @@ static size_t HUF_decompress4X2_usingDTable(
         BYTE* const ostart = (BYTE*) dst;
         BYTE* const oend = ostart + dstSize;
 
-        const HUF_DEltX2* const dt = ((const HUF_DEltX2*)DTable) +1;
+        const void* ptr = DTable;
+        const HUF_DEltX2* const dt = ((const HUF_DEltX2*)ptr) +1;
         const U32 dtLog = DTable[0];
         size_t errorCode;
 
@@ -2112,7 +2119,8 @@ static size_t HUF_readDTableX4 (U32* DTable, const void* src, size_t srcSize)
     const U32 memLog = DTable[0];
     const BYTE* ip = (const BYTE*) src;
     size_t iSize = ip[0];
-    HUF_DEltX4* const dt = ((HUF_DEltX4*)DTable) + 1;
+    void* ptr = DTable;
+    HUF_DEltX4* const dt = ((HUF_DEltX4*)ptr) + 1;
 
     HUF_STATIC_ASSERT(sizeof(HUF_DEltX4) == sizeof(U32));   /* if compilation fails here, assertion is false */
     if (memLog > HUF_ABSOLUTEMAX_TABLELOG) return ERROR(tableLog_tooLarge);
@@ -2262,7 +2270,8 @@ static size_t HUF_decompress4X4_usingDTable(
         BYTE* const ostart = (BYTE*) dst;
         BYTE* const oend = ostart + dstSize;
 
-        const HUF_DEltX4* const dt = ((const HUF_DEltX4*)DTable) +1;
+        const void* ptr = DTable;
+        const HUF_DEltX4* const dt = ((const HUF_DEltX4*)ptr) +1;
         const U32 dtLog = DTable[0];
         size_t errorCode;
 
@@ -2510,8 +2519,10 @@ static size_t HUF_readDTableX6 (U32* DTable, const void* src, size_t srcSize)
 
     /* fill tables */
     {
-        HUF_DDescX6* DDescription = (HUF_DDescX6*)(DTable+1);
-        HUF_DSeqX6* DSequence = (HUF_DSeqX6*)(DTable + 1 + ((size_t)1<<(memLog-1)));
+        void* ptr = DTable+1;
+        HUF_DDescX6* DDescription = (HUF_DDescX6*)(ptr);
+        void* dSeqStart = DTable + 1 + ((size_t)1<<(memLog-1));
+        HUF_DSeqX6* DSequence = (HUF_DSeqX6*)(dSeqStart);
         HUF_DSeqX6 DSeq;
         HUF_DDescX6 DDesc;
         DSeq.sequence = 0;
@@ -2570,8 +2581,10 @@ static U32 HUF_decodeLastSymbolsX6(void* op, const U32 maxL, BIT_DStream_t* DStr
 
 static inline size_t HUF_decodeStreamX6(BYTE* p, BIT_DStream_t* bitDPtr, BYTE* const pEnd, const U32* DTable, const U32 dtLog)
 {
-    const HUF_DDescX6* dd = (const HUF_DDescX6*)(DTable+1);
-    const HUF_DSeqX6* ds = (const HUF_DSeqX6*)(DTable + 1 + ((size_t)1<<(dtLog-1)));
+    const void* ddPtr = DTable+1;
+    const HUF_DDescX6* dd = (const HUF_DDescX6*)(ddPtr);
+    const void* dsPtr = DTable + 1 + ((size_t)1<<(dtLog-1));
+    const HUF_DSeqX6* ds = (const HUF_DSeqX6*)(dsPtr);
     BYTE* const pStart = p;
 
     /* up to 16 symbols at a time */
@@ -2611,8 +2624,10 @@ static size_t HUF_decompress4X6_usingDTable(
         BYTE* const oend = ostart + dstSize;
 
         const U32 dtLog = DTable[0];
-        const HUF_DDescX6* dd = (const HUF_DDescX6*)(DTable+1);
-        const HUF_DSeqX6* ds = (const HUF_DSeqX6*)(DTable + 1 + ((size_t)1<<(dtLog-1)));
+        const void* ddPtr = DTable+1;
+        const HUF_DDescX6* dd = (const HUF_DDescX6*)(ddPtr);
+        const void* dsPtr = DTable + 1 + ((size_t)1<<(dtLog-1));
+        const HUF_DSeqX6* ds = (const HUF_DSeqX6*)(dsPtr);
         size_t errorCode;
 
         /* Init */
diff --git a/lib/legacy/zstd_v03.c b/lib/legacy/zstd_v03.c
index ae67941..5c75eb4 100644
--- a/lib/legacy/zstd_v03.c
+++ b/lib/legacy/zstd_v03.c
@@ -891,8 +891,9 @@ typedef struct
 
 MEM_STATIC void FSE_initDState(FSE_DState_t* DStatePtr, BIT_DStream_t* bitD, const FSE_DTable* dt)
 {
-    const FSE_DTableHeader* const DTableH = (const FSE_DTableHeader*)dt;
-    DStatePtr->state = BIT_readBits(bitD, DTableH->tableLog);
+    FSE_DTableHeader DTableH;
+    memcpy(&DTableH, dt, sizeof(DTableH));
+    DStatePtr->state = BIT_readBits(bitD, DTableH.tableLog);
     BIT_reloadDStream(bitD);
     DStatePtr->table = dt + 1;
 }
@@ -1272,8 +1273,9 @@ static U32 FSE_tableStep(U32 tableSize) { return (tableSize>>1) + (tableSize>>3)
 static size_t FSE_FUNCTION_NAME(FSE_buildDTable, FSE_FUNCTION_EXTENSION)
 (FSE_DTable* dt, const short* normalizedCounter, unsigned maxSymbolValue, unsigned tableLog)
 {
-    FSE_DTableHeader* const DTableH = (FSE_DTableHeader*)dt;
-    FSE_DECODE_TYPE* const tableDecode = (FSE_DECODE_TYPE*) (dt+1);   /* because dt is unsigned, 32-bits aligned on 32-bits */
+    void* ptr = dt+1;
+    FSE_DTableHeader DTableH;
+    FSE_DECODE_TYPE* const tableDecode = (FSE_DECODE_TYPE*)ptr;
     const U32 tableSize = 1 << tableLog;
     const U32 tableMask = tableSize-1;
     const U32 step = FSE_tableStep(tableSize);
@@ -1289,7 +1291,7 @@ static size_t FSE_FUNCTION_NAME(FSE_buildDTable, FSE_FUNCTION_EXTENSION)
     if (tableLog > FSE_MAX_TABLELOG) return ERROR(tableLog_tooLarge);
 
     /* Init, lay down lowprob symbols */
-    DTableH[0].tableLog = (U16)tableLog;
+    DTableH.tableLog = (U16)tableLog;
     for (s=0; s<=maxSymbolValue; s++)
     {
         if (normalizedCounter[s]==-1)
@@ -1330,7 +1332,8 @@ static size_t FSE_FUNCTION_NAME(FSE_buildDTable, FSE_FUNCTION_EXTENSION)
         }
     }
 
-    DTableH->fastMode = (U16)noLarge;
+    DTableH.fastMode = (U16)noLarge;
+    memcpy(dt, &DTableH, sizeof(DTableH));
     return 0;
 }
 
@@ -1468,8 +1471,9 @@ static size_t FSE_readNCount (short* normalizedCounter, unsigned* maxSVPtr, unsi
 *********************************************************/
 static size_t FSE_buildDTable_rle (FSE_DTable* dt, BYTE symbolValue)
 {
-    FSE_DTableHeader* const DTableH = (FSE_DTableHeader*)dt;
-    FSE_decode_t* const cell = (FSE_decode_t*)(dt + 1);   /* because dt is unsigned */
+    void* ptr = dt;
+    FSE_DTableHeader* const DTableH = (FSE_DTableHeader*)ptr;
+    FSE_decode_t* const cell = (FSE_decode_t*)(ptr) + 1;
 
     DTableH->tableLog = 0;
     DTableH->fastMode = 0;
@@ -1484,8 +1488,9 @@ static size_t FSE_buildDTable_rle (FSE_DTable* dt, BYTE symbolValue)
 
 static size_t FSE_buildDTable_raw (FSE_DTable* dt, unsigned nbBits)
 {
-    FSE_DTableHeader* const DTableH = (FSE_DTableHeader*)dt;
-    FSE_decode_t* const dinfo = (FSE_decode_t*)(dt + 1);   /* because dt is unsigned */
+    void* ptr = dt;
+    FSE_DTableHeader* const DTableH = (FSE_DTableHeader*)ptr;
+    FSE_decode_t* const dinfo = (FSE_decode_t*)(ptr) + 1;
     const unsigned tableSize = 1 << nbBits;
     const unsigned tableMask = tableSize - 1;
     const unsigned maxSymbolValue = tableMask;
@@ -1581,11 +1586,11 @@ static size_t FSE_decompress_usingDTable(void* dst, size_t originalSize,
                             const void* cSrc, size_t cSrcSize,
                             const FSE_DTable* dt)
 {
-    const FSE_DTableHeader* DTableH = (const FSE_DTableHeader*)dt;
-    const U32 fastMode = DTableH->fastMode;
+    FSE_DTableHeader DTableH;
+    memcpy(&DTableH, dt, sizeof(DTableH));
 
     /* select fast mode (static) */
-    if (fastMode) return FSE_decompress_usingDTable_generic(dst, originalSize, cSrc, cSrcSize, dt, 1);
+    if (DTableH.fastMode) return FSE_decompress_usingDTable_generic(dst, originalSize, cSrc, cSrcSize, dt, 1);
     return FSE_decompress_usingDTable_generic(dst, originalSize, cSrc, cSrcSize, dt, 0);
 }
 
@@ -1810,7 +1815,8 @@ static size_t HUF_readDTableX2 (U16* DTable, const void* src, size_t srcSize)
     U32 nbSymbols = 0;
     U32 n;
     U32 nextRankStart;
-    HUF_DEltX2* const dt = (HUF_DEltX2*)(DTable + 1);
+    void* ptr = DTable+1;
+    HUF_DEltX2* const dt = (HUF_DEltX2*)(ptr);
 
     HUF_STATIC_ASSERT(sizeof(HUF_DEltX2) == sizeof(U16));   /* if compilation fails here, assertion is false */
     //memset(huffWeight, 0, sizeof(huffWeight));   /* is not necessary, even though some analyzer complain ... */
@@ -1903,7 +1909,8 @@ static size_t HUF_decompress4X2_usingDTable(
         BYTE* const ostart = (BYTE*) dst;
         BYTE* const oend = ostart + dstSize;
 
-        const HUF_DEltX2* const dt = ((const HUF_DEltX2*)DTable) +1;
+        const void* ptr = DTable;
+        const HUF_DEltX2* const dt = ((const HUF_DEltX2*)ptr) +1;
         const U32 dtLog = DTable[0];
         size_t errorCode;
 
@@ -2112,7 +2119,8 @@ static size_t HUF_readDTableX4 (U32* DTable, const void* src, size_t srcSize)
     const U32 memLog = DTable[0];
     const BYTE* ip = (const BYTE*) src;
     size_t iSize = ip[0];
-    HUF_DEltX4* const dt = ((HUF_DEltX4*)DTable) + 1;
+    void* ptr = DTable;
+    HUF_DEltX4* const dt = ((HUF_DEltX4*)ptr) + 1;
 
     HUF_STATIC_ASSERT(sizeof(HUF_DEltX4) == sizeof(U32));   /* if compilation fails here, assertion is false */
     if (memLog > HUF_ABSOLUTEMAX_TABLELOG) return ERROR(tableLog_tooLarge);
@@ -2262,7 +2270,8 @@ static size_t HUF_decompress4X4_usingDTable(
         BYTE* const ostart = (BYTE*) dst;
         BYTE* const oend = ostart + dstSize;
 
-        const HUF_DEltX4* const dt = ((const HUF_DEltX4*)DTable) +1;
+        const void* ptr = DTable;
+        const HUF_DEltX4* const dt = ((const HUF_DEltX4*)ptr) +1;
         const U32 dtLog = DTable[0];
         size_t errorCode;
 
@@ -2510,8 +2519,10 @@ static size_t HUF_readDTableX6 (U32* DTable, const void* src, size_t srcSize)
 
     /* fill tables */
     {
-        HUF_DDescX6* DDescription = (HUF_DDescX6*)(DTable+1);
-        HUF_DSeqX6* DSequence = (HUF_DSeqX6*)(DTable + 1 + ((size_t)1<<(memLog-1)));
+        void* ddPtr = DTable+1;
+        HUF_DDescX6* DDescription = (HUF_DDescX6*)(ddPtr);
+        void* dsPtr = DTable + 1 + ((size_t)1<<(memLog-1));
+        HUF_DSeqX6* DSequence = (HUF_DSeqX6*)(dsPtr);
         HUF_DSeqX6 DSeq;
         HUF_DDescX6 DDesc;
         DSeq.sequence = 0;
@@ -2570,8 +2581,10 @@ static U32 HUF_decodeLastSymbolsX6(void* op, const U32 maxL, BIT_DStream_t* DStr
 
 static inline size_t HUF_decodeStreamX6(BYTE* p, BIT_DStream_t* bitDPtr, BYTE* const pEnd, const U32* DTable, const U32 dtLog)
 {
-    const HUF_DDescX6* dd = (const HUF_DDescX6*)(DTable+1);
-    const HUF_DSeqX6* ds = (const HUF_DSeqX6*)(DTable + 1 + ((size_t)1<<(dtLog-1)));
+    const void* ddPtr = DTable+1;
+    const HUF_DDescX6* dd = (const HUF_DDescX6*)(ddPtr);
+    const void* dsPtr = DTable + 1 + ((size_t)1<<(dtLog-1));
+    const HUF_DSeqX6* ds = (const HUF_DSeqX6*)(dsPtr);
     BYTE* const pStart = p;
 
     /* up to 16 symbols at a time */
@@ -2611,8 +2624,10 @@ static size_t HUF_decompress4X6_usingDTable(
         BYTE* const oend = ostart + dstSize;
 
         const U32 dtLog = DTable[0];
-        const HUF_DDescX6* dd = (const HUF_DDescX6*)(DTable+1);
-        const HUF_DSeqX6* ds = (const HUF_DSeqX6*)(DTable + 1 + ((size_t)1<<(dtLog-1)));
+        const void* ddPtr = DTable+1;
+        const HUF_DDescX6* dd = (const HUF_DDescX6*)(ddPtr);
+        const void* dsPtr = DTable + 1 + ((size_t)1<<(dtLog-1));
+        const HUF_DSeqX6* ds = (const HUF_DSeqX6*)(dsPtr);
         size_t errorCode;
 
         /* Init */
diff --git a/lib/zstd.h b/lib/zstd.h
index d6eb0b5..695d26d 100644
--- a/lib/zstd.h
+++ b/lib/zstd.h
@@ -44,7 +44,7 @@ extern "C" {
 
 
 /* ***************************************************************
-*  Tuning parameters
+*  Export parameters
 *****************************************************************/
 /*!
 *  ZSTD_DLL_EXPORT :
@@ -62,7 +62,7 @@ extern "C" {
 ***************************************/
 #define ZSTD_VERSION_MAJOR    0    /* for breaking interface changes  */
 #define ZSTD_VERSION_MINOR    4    /* for new (non-breaking) interface capabilities */
-#define ZSTD_VERSION_RELEASE  5    /* for tweaks, bug-fixes, or development */
+#define ZSTD_VERSION_RELEASE  7    /* for tweaks, bug-fixes, or development */
 #define ZSTD_VERSION_NUMBER  (ZSTD_VERSION_MAJOR *100*100 + ZSTD_VERSION_MINOR *100 + ZSTD_VERSION_RELEASE)
 ZSTDLIB_API unsigned ZSTD_versionNumber (void);
 
@@ -107,16 +107,24 @@ ZSTDLIB_API const char* ZSTD_getErrorName(size_t code);    /** provides error co
 /* *************************************
 *  Advanced functions
 ***************************************/
+/** Compression context management */
 typedef struct ZSTD_CCtx_s ZSTD_CCtx;   /* incomplete type */
 ZSTDLIB_API ZSTD_CCtx* ZSTD_createCCtx(void);
 ZSTDLIB_API size_t     ZSTD_freeCCtx(ZSTD_CCtx* cctx);
 
-/**
-ZSTD_compressCCtx() :
-    Same as ZSTD_compress(), but requires a ZSTD_CCtx working space already allocated
-*/
+/** ZSTD_compressCCtx() :
+    Same as ZSTD_compress(), but requires an already allocated ZSTD_CCtx (see ZSTD_createCCtx()) */
 ZSTDLIB_API size_t ZSTD_compressCCtx(ZSTD_CCtx* ctx, void* dst, size_t maxDstSize, const void* src, size_t srcSize, int compressionLevel);
 
+/** Decompression context management */
+typedef struct ZSTD_DCtx_s ZSTD_DCtx;
+ZSTDLIB_API ZSTD_DCtx* ZSTD_createDCtx(void);
+ZSTDLIB_API size_t     ZSTD_freeDCtx(ZSTD_DCtx* dctx);
+
+/** ZSTD_decompressDCtx
+*   Same as ZSTD_decompress(), but requires an already allocated ZSTD_DCtx (see ZSTD_createDCtx()) */
+ZSTDLIB_API size_t ZSTD_decompressDCtx(ZSTD_DCtx* ctx, void* dst, size_t maxDstSize, const void* src, size_t srcSize);
+
 
 #if defined (__cplusplus)
 }
diff --git a/lib/zstd_buffered.c b/lib/zstd_buffered.c
index 99148c2..48721d6 100644
--- a/lib/zstd_buffered.c
+++ b/lib/zstd_buffered.c
@@ -39,7 +39,7 @@
 *  Includes
 ***************************************/
 #include <stdlib.h>
-#include "error.h"
+#include "error_private.h"
 #include "zstd_static.h"
 #include "zstd_buffered_static.h"
 
@@ -143,7 +143,7 @@ size_t ZBUFF_compressInit_advanced(ZBUFF_CCtx* zbc, ZSTD_parameters params)
         if (zbc->outBuff == NULL) return ERROR(memory_allocation);
     }
 
-    zbc->outBuffContentSize = ZSTD_compressBegin_advanced(zbc->zc, zbc->outBuff, zbc->outBuffSize, params);
+    zbc->outBuffContentSize = ZSTD_compressBegin_advanced(zbc->zc, params);
     if (ZSTD_isError(zbc->outBuffContentSize)) return zbc->outBuffContentSize;
 
     zbc->inToCompress = 0;
@@ -243,6 +243,8 @@ static size_t ZBUFF_compressContinue_generic(ZBUFF_CCtx* zbc,
                 zbc->stage = ZBUFFcs_load;
                 break;
             }
+        default:
+            return ERROR(GENERIC);   /* impossible */
         }
     }
 
@@ -534,6 +536,7 @@ size_t ZBUFF_decompressContinue(ZBUFF_DCtx* zbc, void* dst, size_t* maxDstSizePt
                 notDone = 0;
                 break;
             }
+        default: return ERROR(GENERIC);   /* impossible */
         }
     }
 
diff --git a/lib/zstd_compress.c b/lib/zstd_compress.c
index c6d81eb..d01807b 100644
--- a/lib/zstd_compress.c
+++ b/lib/zstd_compress.c
@@ -39,7 +39,6 @@
 #  define FORCE_INLINE static __forceinline
 #  include <intrin.h>                    /* For Visual 2005 */
 #  pragma warning(disable : 4127)        /* disable: C4127: conditional expression is constant */
-#  pragma warning(disable : 4324)        /* disable: C4324: padded structure */
 #else
 #  define GCC_VERSION (__GNUC__ * 100 + __GNUC_MINOR__)
 #  ifdef __GNUC__
@@ -65,7 +64,7 @@
 /* *************************************
 *  Constants
 ***************************************/
-unsigned ZSTD_maxCLevel(void) { return ZSTD_MAX_CLEVEL; }
+ZSTDLIB_API unsigned ZSTD_maxCLevel(void) { return ZSTD_MAX_CLEVEL; }
 static const U32 g_searchStrength = 8;
 
 
@@ -109,10 +108,14 @@ struct ZSTD_CCtx_s
     U32   dictLimit;        /* below that point, need extDict */
     U32   lowLimit;         /* below that point, no more data */
     U32   nextToUpdate;     /* index from which to continue dictionary update */
+    U32   stage;
     ZSTD_parameters params;
     void* workSpace;
     size_t workSpaceSize;
     size_t blockSize;
+    size_t hbSize;
+    char headerBuffer[ZSTD_frameHeaderSize_max];
+
 
     seqStore_t seqStore;    /* sequences storage ptrs */
     U32* hashTable;
@@ -205,6 +208,8 @@ static size_t ZSTD_resetCCtx_advanced (ZSTD_CCtx* zc,
     zc->seqStore.litLengthStart =  zc->seqStore.litStart + blockSize;
     zc->seqStore.matchLengthStart = zc->seqStore.litLengthStart + (blockSize>>2);
     zc->seqStore.dumpsStart = zc->seqStore.matchLengthStart + (blockSize>>2);
+    zc->hbSize = 0;
+    zc->stage = 0;
 
     return 0;
 }
@@ -761,7 +766,8 @@ size_t ZSTD_compressBlock_fast_generic(ZSTD_CCtx* zc,
     const BYTE* const istart = (const BYTE*)src;
     const BYTE* ip = istart;
     const BYTE* anchor = istart;
-    const BYTE* const lowest = base + zc->dictLimit;
+    const U32 lowIndex = zc->dictLimit;
+    const BYTE* const lowest = base + lowIndex;
     const BYTE* const iend = istart + srcSize;
     const BYTE* const ilimit = iend - 8;
 
@@ -772,9 +778,9 @@ size_t ZSTD_compressBlock_fast_generic(ZSTD_CCtx* zc,
     ZSTD_resetSeqStore(seqStorePtr);
     if (ip < lowest+4)
     {
-        hashTable[ZSTD_hashPtr(lowest+1, hBits, mls)] = zc->dictLimit+1;
-        hashTable[ZSTD_hashPtr(lowest+2, hBits, mls)] = zc->dictLimit+2;
-        hashTable[ZSTD_hashPtr(lowest+3, hBits, mls)] = zc->dictLimit+3;
+        hashTable[ZSTD_hashPtr(lowest+1, hBits, mls)] = lowIndex+1;
+        hashTable[ZSTD_hashPtr(lowest+2, hBits, mls)] = lowIndex+2;
+        hashTable[ZSTD_hashPtr(lowest+3, hBits, mls)] = lowIndex+3;
         ip = lowest+4;
     }
 
@@ -784,10 +790,12 @@ size_t ZSTD_compressBlock_fast_generic(ZSTD_CCtx* zc,
         size_t mlCode;
         size_t offset;
         const size_t h = ZSTD_hashPtr(ip, hBits, mls);
-        const BYTE* match = base + hashTable[h];
-        hashTable[h] = (U32)(ip-base);
+        const U32 matchIndex = hashTable[h];
+        const BYTE* match = base + matchIndex;
+        const U32 current = (U32)(ip-base);
+        hashTable[h] = current;   /* update hash table */
 
-        if (MEM_read32(ip+1-offset_1) == MEM_read32(ip+1))   /* note : by construction, offset_1 <= (ip-base) */
+        if (MEM_read32(ip+1-offset_1) == MEM_read32(ip+1))   /* note : by construction, offset_1 <= current */
         {
             mlCode = ZSTD_count(ip+1+MINMATCH, ip+1+MINMATCH-offset_1, iend);
             ip++;
@@ -795,7 +803,7 @@ size_t ZSTD_compressBlock_fast_generic(ZSTD_CCtx* zc,
         }
         else
         {
-            if ( (match <= lowest) ||
+            if ( (matchIndex <= lowIndex) ||
                  (MEM_read32(match) != MEM_read32(ip)) )
             {
                 ip += ((ip-anchor) >> g_searchStrength) + 1;
@@ -816,7 +824,7 @@ size_t ZSTD_compressBlock_fast_generic(ZSTD_CCtx* zc,
         if (ip <= ilimit)
         {
             /* Fill Table */
-            hashTable[ZSTD_hashPtr(ip-(mlCode+MINMATCH)+2, hBits, mls)] = (U32)(ip-(mlCode+MINMATCH)+2-base);  /* here because ip-(mlCode+MINMATCH)+2 could be > iend-8 without ip <= ilimit check*/
+            hashTable[ZSTD_hashPtr(base+current+2, hBits, mls)] = current+2;  /* here because current+2 could be > iend-8 */
             hashTable[ZSTD_hashPtr(ip-2, hBits, mls)] = (U32)(ip-2-base);
             /* check immediate repcode */
             while ( (ip <= ilimit)
@@ -1014,7 +1022,8 @@ size_t ZSTD_compressBlock_fast_extDict(ZSTD_CCtx* ctx,
 /** ZSTD_insertBt1 : add one or multiple positions to tree
 *   @ip : assumed <= iend-8
 *   @return : nb of positions added */
-static U32 ZSTD_insertBt1(ZSTD_CCtx* zc, const BYTE* const ip, const U32 mls, const BYTE* const iend, U32 nbCompares)
+static U32 ZSTD_insertBt1(ZSTD_CCtx* zc, const BYTE* const ip, const U32 mls, const BYTE* const iend, U32 nbCompares,
+                          U32 extDict)
 {
     U32* const hashTable = zc->hashTable;
     const U32 hashLog = zc->params.hashLog;
@@ -1025,217 +1034,52 @@ static U32 ZSTD_insertBt1(ZSTD_CCtx* zc, const BYTE* const ip, const U32 mls, co
     U32 matchIndex  = hashTable[h];
     size_t commonLengthSmaller=0, commonLengthLarger=0;
     const BYTE* const base = zc->base;
+    const BYTE* const dictBase = zc->dictBase;
+    const U32 dictLimit = zc->dictLimit;
+    const BYTE* const dictEnd = dictBase + dictLimit;
+    const BYTE* const prefixStart = base + dictLimit;
     const BYTE* match = base + matchIndex;
     const U32 current = (U32)(ip-base);
     const U32 btLow = btMask >= current ? 0 : current - btMask;
     U32* smallerPtr = bt + 2*(current&btMask);
-    U32* largerPtr  = bt + 2*(current&btMask) + 1;
+    U32* largerPtr  = smallerPtr + 1;
     U32 dummy32;   /* to be nullified at the end */
     const U32 windowLow = zc->lowLimit;
-
-    if ( (current-matchIndex == 1)   /* RLE */
-        && (matchIndex > windowLow)
-        && (MEM_read64(match) == MEM_read64(ip)) )
-    {
-        size_t rleLength = ZSTD_count(ip+8, match+8, iend) + 8;
-        return (U32)(rleLength - mls);
-    }
+    U32 matchEndIdx = current+8;
+    U32 predictedSmall = *(bt + 2*((current-1)&btMask) + 0);
+    U32 predictedLarge = *(bt + 2*((current-1)&btMask) + 1);
+    predictedSmall += (predictedSmall>0);
+    predictedLarge += (predictedLarge>0);
 
     hashTable[h] = current;   /* Update Hash Table */
 
     while (nbCompares-- && (matchIndex > windowLow))
     {
         U32* nextPtr = bt + 2*(matchIndex & btMask);
+        const U32* predictPtr = bt + 2*((matchIndex-1) & btMask);   /* written this way, as bt is a roll buffer */
         size_t matchLength = MIN(commonLengthSmaller, commonLengthLarger);   /* guaranteed minimum nb of common bytes */
 
-        match = base + matchIndex;
-        if (match[matchLength] == ip[matchLength])
-            matchLength += ZSTD_count(ip+matchLength+1, match+matchLength+1, iend) +1;
-
-        if (ip+matchLength == iend)   /* equal : no way to know if inf or sup */
-            break;   /* drop , to guarantee consistency ; miss a bit of compression, but other solutions can corrupt the tree */
-
-        if (match[matchLength] < ip[matchLength])
-        {
-            /* match is smaller than current */
-            *smallerPtr = matchIndex;             /* update smaller idx */
-            commonLengthSmaller = matchLength;    /* all smaller will now have at least this guaranteed common length */
+        if (matchIndex == predictedSmall)
+        {   /* no need to check length, result known */
+            *smallerPtr = matchIndex;
             if (matchIndex <= btLow) { smallerPtr=&dummy32; break; }   /* beyond tree size, stop the search */
             smallerPtr = nextPtr+1;               /* new "smaller" => larger of match */
             matchIndex = nextPtr[1];              /* new matchIndex larger than previous (closer to current) */
-        }
-        else
-        {
-            /* match is larger than current */
-            *largerPtr = matchIndex;
-            commonLengthLarger = matchLength;
-            if (matchIndex <= btLow) { largerPtr=&dummy32; break; }   /* beyond tree size, stop the search */
-            largerPtr = nextPtr;
-            matchIndex = nextPtr[0];
-        }
-    }
-
-    *smallerPtr = *largerPtr = 0;
-    return 1;
-}
-
-
-FORCE_INLINE /* inlining is important to hardwire a hot branch (template emulation) */
-size_t ZSTD_insertBtAndFindBestMatch (
-                        ZSTD_CCtx* zc,
-                        const BYTE* const ip, const BYTE* const iend,
-                        size_t* offsetPtr,
-                        U32 nbCompares, const U32 mls)
-{
-    U32* const hashTable = zc->hashTable;
-    const U32 hashLog = zc->params.hashLog;
-    const size_t h  = ZSTD_hashPtr(ip, hashLog, mls);
-    U32* const bt   = zc->contentTable;
-    const U32 btLog = zc->params.contentLog - 1;
-    const U32 btMask= (1 << btLog) - 1;
-    U32 matchIndex  = hashTable[h];
-    size_t commonLengthSmaller=0, commonLengthLarger=0;
-    const BYTE* const base = zc->base;
-    const U32 current = (U32)(ip-base);
-    const U32 btLow = btMask >= current ? 0 : current - btMask;
-    const U32 windowLow = zc->lowLimit;
-    U32* smallerPtr = bt + 2*(current&btMask);
-    U32* largerPtr  = bt + 2*(current&btMask) + 1;
-    size_t bestLength = 0;
-    U32 dummy32;   /* to be nullified at the end */
-
-    hashTable[h] = current;   /* Update Hash Table */
-
-    while (nbCompares-- && (matchIndex > windowLow))
-    {
-        U32* nextPtr = bt + 2*(matchIndex & btMask);
-        const BYTE* match = base + matchIndex;
-        size_t matchLength = MIN(commonLengthSmaller, commonLengthLarger);   /* guaranteed minimum nb of common bytes */
-
-        if (match[matchLength] == ip[matchLength])
-            matchLength += ZSTD_count(ip+matchLength+1, match+matchLength+1, iend) +1;
-
-        if (matchLength > bestLength)
-        {
-            if ( (4*(int)(matchLength-bestLength)) > (int)(ZSTD_highbit(current-matchIndex+1) - ZSTD_highbit((U32)offsetPtr[0]+1)) )
-                bestLength = matchLength, *offsetPtr = current - matchIndex;
-            if (ip+matchLength == iend)   /* equal : no way to know if inf or sup */
-                break;   /* drop, to guarantee consistency (miss a little bit of compression) */
+            predictedSmall = predictPtr[1] + (predictPtr[1]>0);
+            continue;
         }
 
-        if (match[matchLength] < ip[matchLength])
+        if (matchIndex == predictedLarge)
         {
-            /* match is smaller than current */
-            *smallerPtr = matchIndex;             /* update smaller idx */
-            commonLengthSmaller = matchLength;    /* all smaller will now have at least this guaranteed common length */
-            if (matchIndex <= btLow) { smallerPtr=&dummy32; break; }   /* beyond tree size, stop the search */
-            smallerPtr = nextPtr+1;               /* new "smaller" => larger of match */
-            matchIndex = nextPtr[1];              /* new matchIndex larger than previous (closer to current) */
-        }
-        else
-        {
-            /* match is larger than current */
             *largerPtr = matchIndex;
-            commonLengthLarger = matchLength;
             if (matchIndex <= btLow) { largerPtr=&dummy32; break; }   /* beyond tree size, stop the search */
             largerPtr = nextPtr;
             matchIndex = nextPtr[0];
+            predictedLarge = predictPtr[0] + (predictPtr[0]>0);
+            continue;
         }
-    }
-
-    *smallerPtr = *largerPtr = 0;
 
-    zc->nextToUpdate = current+1;   /* current has been inserted */
-    return bestLength;
-}
-
-
-static const BYTE* ZSTD_updateTree(ZSTD_CCtx* zc, const BYTE* const ip, const BYTE* const iend, const U32 nbCompares, const U32 mls)
-{
-    const BYTE* const base = zc->base;
-    const U32 target = (U32)(ip - base);
-    U32 idx = zc->nextToUpdate;
-
-    for( ; idx < target ; )
-        idx += ZSTD_insertBt1(zc, base+idx, mls, iend, nbCompares);
-
-    zc->nextToUpdate = idx;
-    return base + idx;
-}
-
-
-/** Tree updater, providing best match */
-FORCE_INLINE /* inlining is important to hardwire a hot branch (template emulation) */
-size_t ZSTD_BtFindBestMatch (
-                        ZSTD_CCtx* zc,
-                        const BYTE* const ip, const BYTE* const iLimit,
-                        size_t* offsetPtr,
-                        const U32 maxNbAttempts, const U32 mls)
-{
-    const BYTE* nextToUpdate = ZSTD_updateTree(zc, ip, iLimit, maxNbAttempts, mls);
-    if (nextToUpdate > ip) /* RLE data */
-        { *offsetPtr = 1; return ZSTD_count(ip, ip-1, iLimit); }
-    return ZSTD_insertBtAndFindBestMatch(zc, ip, iLimit, offsetPtr, maxNbAttempts, mls);
-}
-
-
-FORCE_INLINE size_t ZSTD_BtFindBestMatch_selectMLS (
-                        ZSTD_CCtx* zc,   /* Index table will be updated */
-                        const BYTE* ip, const BYTE* const iLimit,
-                        size_t* offsetPtr,
-                        const U32 maxNbAttempts, const U32 matchLengthSearch)
-{
-    switch(matchLengthSearch)
-    {
-    default :
-    case 4 : return ZSTD_BtFindBestMatch(zc, ip, iLimit, offsetPtr, maxNbAttempts, 4);
-    case 5 : return ZSTD_BtFindBestMatch(zc, ip, iLimit, offsetPtr, maxNbAttempts, 5);
-    case 6 : return ZSTD_BtFindBestMatch(zc, ip, iLimit, offsetPtr, maxNbAttempts, 6);
-    }
-}
-
-
-/** ZSTD_insertBt1_extDict : add one or multiple positions to tree
-*   @ip : assumed <= iend-8
-*   @return : nb of positions added */
-static U32 ZSTD_insertBt1_extDict(ZSTD_CCtx* zc, const BYTE* const ip, const U32 mls, const BYTE* const iend, U32 nbCompares)
-{
-    U32* const hashTable = zc->hashTable;
-    const U32 hashLog = zc->params.hashLog;
-    const size_t h  = ZSTD_hashPtr(ip, hashLog, mls);
-    U32* const bt   = zc->contentTable;
-    const U32 btLog = zc->params.contentLog - 1;
-    const U32 btMask= (1 << btLog) - 1;
-    U32 matchIndex  = hashTable[h];
-    size_t commonLengthSmaller=0, commonLengthLarger=0;
-    const BYTE* const base = zc->base;
-    const BYTE* const dictBase = zc->dictBase;
-    const U32 dictLimit = zc->dictLimit;
-    const BYTE* const dictEnd = dictBase + dictLimit;
-    const BYTE* const prefixStart = base + dictLimit;
-    const BYTE* match = base + matchIndex;
-    const U32 current = (U32)(ip-base);
-    const U32 btLow = btMask >= current ? 0 : current - btMask;
-    U32* smallerPtr = bt + 2*(current&btMask);
-    U32* largerPtr  = bt + 2*(current&btMask) + 1;
-    U32 dummy32;   /* to be nullified at the end */
-    const U32 windowLow = zc->lowLimit;
-
-    if ( (current-matchIndex == 1)   /* RLE */
-        && (MEM_read64(match) == MEM_read64(ip)) )
-    {
-        size_t rleLength = ZSTD_count(ip+8, match+8, iend) + 8;
-        return (U32)(rleLength - mls);
-    }
-
-    hashTable[h] = current;   /* Update Hash Table */
-
-    while (nbCompares-- && (matchIndex > windowLow))
-    {
-        U32* nextPtr = bt + 2*(matchIndex & btMask);
-        size_t matchLength = MIN(commonLengthSmaller, commonLengthLarger);   /* guaranteed minimum nb of common bytes */
-
-        if (matchIndex+matchLength >= dictLimit)
+        if ((!extDict) || (matchIndex+matchLength >= dictLimit))
         {
             match = base + matchIndex;
             if (match[matchLength] == ip[matchLength])
@@ -1249,6 +1093,9 @@ static U32 ZSTD_insertBt1_extDict(ZSTD_CCtx* zc, const BYTE* const ip, const U32
 				match = base + matchIndex;   /* to prepare for next usage of match[matchLength] */
         }
 
+        if (matchLength > matchEndIdx - matchIndex)
+            matchEndIdx = matchIndex + (U32)matchLength;
+
         if (ip+matchLength == iend)   /* equal : no way to know if inf or sup */
             break;   /* drop , to guarantee consistency ; miss a bit of compression, but other solutions can corrupt the tree */
 
@@ -1273,30 +1120,27 @@ static U32 ZSTD_insertBt1_extDict(ZSTD_CCtx* zc, const BYTE* const ip, const U32
     }
 
     *smallerPtr = *largerPtr = 0;
-    return 1;
+    return (matchEndIdx > current + 8) ? matchEndIdx - current - 8 : 1;
 }
 
 
-static const BYTE* ZSTD_updateTree_extDict(ZSTD_CCtx* zc, const BYTE* const ip, const BYTE* const iend, const U32 nbCompares, const U32 mls)
+static void ZSTD_updateTree(ZSTD_CCtx* zc, const BYTE* const ip, const BYTE* const iend, const U32 nbCompares, const U32 mls)
 {
     const BYTE* const base = zc->base;
     const U32 target = (U32)(ip - base);
     U32 idx = zc->nextToUpdate;
 
     for( ; idx < target ; )
-        idx += ZSTD_insertBt1_extDict(zc, base+idx, mls, iend, nbCompares);
-
-    zc->nextToUpdate = idx;
-    return base + idx;
+        idx += ZSTD_insertBt1(zc, base+idx, mls, iend, nbCompares, 0);
 }
 
-
 FORCE_INLINE /* inlining is important to hardwire a hot branch (template emulation) */
-size_t ZSTD_insertBtAndFindBestMatch_extDict (
+size_t ZSTD_insertBtAndFindBestMatch (
                         ZSTD_CCtx* zc,
                         const BYTE* const ip, const BYTE* const iend,
                         size_t* offsetPtr,
-                        U32 nbCompares, const U32 mls)
+                        U32 nbCompares, const U32 mls,
+                        U32 extDict)
 {
     U32* const hashTable = zc->hashTable;
     const U32 hashLog = zc->params.hashLog;
@@ -1317,6 +1161,7 @@ size_t ZSTD_insertBtAndFindBestMatch_extDict (
     U32* smallerPtr = bt + 2*(current&btMask);
     U32* largerPtr  = bt + 2*(current&btMask) + 1;
     size_t bestLength = 0;
+    U32 matchEndIdx = current+8;
     U32 dummy32;   /* to be nullified at the end */
 
     hashTable[h] = current;   /* Update Hash Table */
@@ -1327,7 +1172,7 @@ size_t ZSTD_insertBtAndFindBestMatch_extDict (
         size_t matchLength = MIN(commonLengthSmaller, commonLengthLarger);   /* guaranteed minimum nb of common bytes */
         const BYTE* match;
 
-        if (matchIndex+matchLength >= dictLimit)
+        if ((!extDict) || (matchIndex+matchLength >= dictLimit))
         {
             match = base + matchIndex;
             if (match[matchLength] == ip[matchLength])
@@ -1343,6 +1188,8 @@ size_t ZSTD_insertBtAndFindBestMatch_extDict (
 
         if (matchLength > bestLength)
         {
+            if (matchLength > matchEndIdx - matchIndex)
+                matchEndIdx = matchIndex + (U32)matchLength;
             if ( (4*(int)(matchLength-bestLength)) > (int)(ZSTD_highbit(current-matchIndex+1) - ZSTD_highbit((U32)offsetPtr[0]+1)) )
                 bestLength = matchLength, *offsetPtr = current - matchIndex;
             if (ip+matchLength == iend)   /* equal : no way to know if inf or sup */
@@ -1371,10 +1218,52 @@ size_t ZSTD_insertBtAndFindBestMatch_extDict (
 
     *smallerPtr = *largerPtr = 0;
 
-    zc->nextToUpdate = current+1;   /* current has been inserted */
+    zc->nextToUpdate = (matchEndIdx > current + 8) ? matchEndIdx - 8 : current+1;
     return bestLength;
 }
 
+
+/** Tree updater, providing best match */
+FORCE_INLINE /* inlining is important to hardwire a hot branch (template emulation) */
+size_t ZSTD_BtFindBestMatch (
+                        ZSTD_CCtx* zc,
+                        const BYTE* const ip, const BYTE* const iLimit,
+                        size_t* offsetPtr,
+                        const U32 maxNbAttempts, const U32 mls)
+{
+    if (ip < zc->base + zc->nextToUpdate) return 0;   /* skipped area */
+    ZSTD_updateTree(zc, ip, iLimit, maxNbAttempts, mls);
+    return ZSTD_insertBtAndFindBestMatch(zc, ip, iLimit, offsetPtr, maxNbAttempts, mls, 0);
+}
+
+
+FORCE_INLINE size_t ZSTD_BtFindBestMatch_selectMLS (
+                        ZSTD_CCtx* zc,   /* Index table will be updated */
+                        const BYTE* ip, const BYTE* const iLimit,
+                        size_t* offsetPtr,
+                        const U32 maxNbAttempts, const U32 matchLengthSearch)
+{
+    switch(matchLengthSearch)
+    {
+    default :
+    case 4 : return ZSTD_BtFindBestMatch(zc, ip, iLimit, offsetPtr, maxNbAttempts, 4);
+    case 5 : return ZSTD_BtFindBestMatch(zc, ip, iLimit, offsetPtr, maxNbAttempts, 5);
+    case 6 : return ZSTD_BtFindBestMatch(zc, ip, iLimit, offsetPtr, maxNbAttempts, 6);
+    }
+}
+
+
+static void ZSTD_updateTree_extDict(ZSTD_CCtx* zc, const BYTE* const ip, const BYTE* const iend, const U32 nbCompares, const U32 mls)
+{
+    const BYTE* const base = zc->base;
+    const U32 target = (U32)(ip - base);
+    U32 idx = zc->nextToUpdate;
+
+    for( ; idx < target ; )
+        idx += ZSTD_insertBt1(zc, base+idx, mls, iend, nbCompares, 1);
+}
+
+
 /** Tree updater, providing best match */
 FORCE_INLINE /* inlining is important to hardwire a hot branch (template emulation) */
 size_t ZSTD_BtFindBestMatch_extDict (
@@ -1383,10 +1272,9 @@ size_t ZSTD_BtFindBestMatch_extDict (
                         size_t* offsetPtr,
                         const U32 maxNbAttempts, const U32 mls)
 {
-    const BYTE* nextToUpdate = ZSTD_updateTree_extDict(zc, ip, iLimit, maxNbAttempts, mls);
-    if (nextToUpdate > ip) /* RLE data */
-        { *offsetPtr = 1; return ZSTD_count(ip, ip-1, iLimit); }
-    return ZSTD_insertBtAndFindBestMatch_extDict(zc, ip, iLimit, offsetPtr, maxNbAttempts, mls);
+    if (ip < zc->base + zc->nextToUpdate) return 0;   /* skipped area */
+    ZSTD_updateTree_extDict(zc, ip, iLimit, maxNbAttempts, mls);
+    return ZSTD_insertBtAndFindBestMatch(zc, ip, iLimit, offsetPtr, maxNbAttempts, mls, 1);
 }
 
 
@@ -1930,7 +1818,7 @@ static ZSTD_blockCompressor ZSTD_selectBlockCompressor(ZSTD_strategy strat, int
 }
 
 
-size_t ZSTD_compressBlock(ZSTD_CCtx* zc, void* dst, size_t maxDstSize, const void* src, size_t srcSize)
+static size_t ZSTD_compressBlock_internal(ZSTD_CCtx* zc, void* dst, size_t maxDstSize, const void* src, size_t srcSize)
 {
     ZSTD_blockCompressor blockCompressor = ZSTD_selectBlockCompressor(zc->params.strategy, zc->lowLimit < zc->dictLimit);
     if (srcSize < MIN_CBLOCK_SIZE+3) return 0;   /* don't even attempt compression below a certain srcSize */
@@ -1963,12 +1851,13 @@ static size_t ZSTD_compress_generic (ZSTD_CCtx* ctxPtr,
             if (ctxPtr->dictLimit < ctxPtr->lowLimit) ctxPtr->dictLimit = ctxPtr->lowLimit;
         }
 
-        cSize = ZSTD_compressBlock(ctxPtr, op+3, maxDstSize-3, ip, blockSize);
+        cSize = ZSTD_compressBlock_internal(ctxPtr, op+3, maxDstSize-3, ip, blockSize);
         if (ZSTD_isError(cSize)) return cSize;
 
         if (cSize == 0)
         {
             cSize = ZSTD_noCompressBlock(op, maxDstSize, ip, blockSize);   /* block is not compressible */
+            if (ZSTD_isError(cSize)) return cSize;
         }
         else
         {
@@ -1989,11 +1878,23 @@ static size_t ZSTD_compress_generic (ZSTD_CCtx* ctxPtr,
 }
 
 
-size_t ZSTD_compressContinue (ZSTD_CCtx* zc,
-                                 void* dst, size_t dstSize,
-                           const void* src, size_t srcSize)
+static size_t ZSTD_compressContinue_internal (ZSTD_CCtx* zc,
+                              void* dst, size_t dstSize,
+                        const void* src, size_t srcSize,
+                               U32 frame)
 {
     const BYTE* const ip = (const BYTE*) src;
+    size_t hbSize = 0;
+
+    if (frame && (zc->stage==0))
+    {
+        hbSize = zc->hbSize;
+        if (dstSize <= hbSize) return ERROR(dstSize_tooSmall);
+        zc->stage = 1;
+        memcpy(dst, zc->headerBuffer, hbSize);
+        dstSize -= hbSize;
+        dst = (char*)dst + hbSize;
+    }
 
     /* Check if blocks follow each other */
     if (src != zc->nextSrc)
@@ -2024,7 +1925,7 @@ size_t ZSTD_compressContinue (ZSTD_CCtx* zc,
         else zc->nextToUpdate -= correction;
     }
 
-    /* input-dictionary overlap */
+    /* if input and dictionary overlap : reduce dictionary (presumed modified by input) */
     if ((ip+srcSize > zc->dictBase + zc->lowLimit) && (ip < zc->dictBase + zc->dictLimit))
     {
         zc->lowLimit = (U32)(ip + srcSize - zc->dictBase);
@@ -2032,10 +1933,31 @@ size_t ZSTD_compressContinue (ZSTD_CCtx* zc,
     }
 
     zc->nextSrc = ip + srcSize;
+    {
+        size_t cSize;
+        if (frame) cSize = ZSTD_compress_generic (zc, dst, dstSize, src, srcSize);
+        else cSize = ZSTD_compressBlock_internal (zc, dst, dstSize, src, srcSize);
+        if (ZSTD_isError(cSize)) return cSize;
+        return cSize + hbSize;
+    }
+}
 
-    return ZSTD_compress_generic (zc, dst, dstSize, src, srcSize);
+
+size_t ZSTD_compressContinue (ZSTD_CCtx* zc,
+                              void* dst, size_t dstSize,
+                        const void* src, size_t srcSize)
+{
+    return ZSTD_compressContinue_internal(zc, dst, dstSize, src, srcSize, 1);
 }
 
+
+size_t ZSTD_compressBlock(ZSTD_CCtx* zc, void* dst, size_t maxDstSize, const void* src, size_t srcSize)
+{
+    if (srcSize > BLOCKSIZE) return ERROR(srcSize_wrong);
+    return ZSTD_compressContinue_internal(zc, dst, maxDstSize, src, srcSize, 0);
+}
+
+
 size_t ZSTD_compress_insertDictionary(ZSTD_CCtx* zc, const void* src, size_t srcSize)
 {
     const BYTE* const ip = (const BYTE*) src;
@@ -2065,6 +1987,7 @@ size_t ZSTD_compress_insertDictionary(ZSTD_CCtx* zc, const void* src, size_t src
 
     case ZSTD_btlazy2:
         ZSTD_updateTree(zc, iend-8, iend, 1 << zc->params.searchLog, zc->params.searchLength);
+        zc->nextToUpdate = (U32)(iend - zc->base);
         break;
 
     default:
@@ -2075,24 +1998,56 @@ size_t ZSTD_compress_insertDictionary(ZSTD_CCtx* zc, const void* src, size_t src
 }
 
 
+/*! ZSTD_duplicateCCtx
+*   Duplicate an existing context @srcCCtx into another one @dstCCtx.
+*   Only works during stage 0 (i.e. before first call to ZSTD_compressContinue())
+*   @return : 0, or an error code */
+size_t ZSTD_duplicateCCtx(ZSTD_CCtx* dstCCtx, const ZSTD_CCtx* srcCCtx)
+{
+    const U32 contentLog = (srcCCtx->params.strategy == ZSTD_fast) ? 1 : srcCCtx->params.contentLog;
+    const size_t tableSpace = ((1 << contentLog) + (1 << srcCCtx->params.hashLog)) * sizeof(U32);
+
+    if (srcCCtx->stage!=0) return ERROR(stage_wrong);
+
+    ZSTD_resetCCtx_advanced(dstCCtx, srcCCtx->params);
+
+    /* copy tables */
+    memcpy(dstCCtx->hashTable, srcCCtx->hashTable, tableSpace);
+
+    /* copy frame header */
+    dstCCtx->hbSize = srcCCtx->hbSize;
+    memcpy(dstCCtx->headerBuffer , srcCCtx->headerBuffer, srcCCtx->hbSize);
+
+    /* copy dictionary pointers */
+    dstCCtx->nextToUpdate= srcCCtx->nextToUpdate;
+    dstCCtx->nextSrc     = srcCCtx->nextSrc;
+    dstCCtx->base        = srcCCtx->base;
+    dstCCtx->dictBase    = srcCCtx->dictBase;
+    dstCCtx->dictLimit   = srcCCtx->dictLimit;
+    dstCCtx->lowLimit    = srcCCtx->lowLimit;
+
+    return 0;
+}
+
+
 /*! ZSTD_compressBegin_advanced
-*   Write frame header, according to params
-*   @return : nb of bytes written */
+*   @return : 0, or an error code */
 size_t ZSTD_compressBegin_advanced(ZSTD_CCtx* ctx,
-                                   void* dst, size_t maxDstSize,
                                    ZSTD_parameters params)
 {
     size_t errorCode;
 
     ZSTD_validateParams(&params);
 
-    if (maxDstSize < ZSTD_frameHeaderSize_max) return ERROR(dstSize_tooSmall);
     errorCode = ZSTD_resetCCtx_advanced(ctx, params);
     if (ZSTD_isError(errorCode)) return errorCode;
 
-    MEM_writeLE32(dst, ZSTD_MAGICNUMBER); /* Write Header */
-    ((BYTE*)dst)[4] = (BYTE)(params.windowLog - ZSTD_WINDOWLOG_ABSOLUTEMIN);
-    return ZSTD_frameHeaderSize_min;
+    MEM_writeLE32(ctx->headerBuffer, ZSTD_MAGICNUMBER);   /* Write Header */
+    ((BYTE*)ctx->headerBuffer)[4] = (BYTE)(params.windowLog - ZSTD_WINDOWLOG_ABSOLUTEMIN);
+    ctx->hbSize = ZSTD_frameHeaderSize_min;
+    ctx->stage = 0;
+
+    return 0;
 }
 
 
@@ -2111,29 +2066,38 @@ ZSTD_parameters ZSTD_getParams(int compressionLevel, U64 srcSizeHint)
 }
 
 
-size_t ZSTD_compressBegin(ZSTD_CCtx* ctx, void* dst, size_t maxDstSize, int compressionLevel)
+size_t ZSTD_compressBegin(ZSTD_CCtx* ctx, int compressionLevel)
 {
-    return ZSTD_compressBegin_advanced(ctx, dst, maxDstSize, ZSTD_getParams(compressionLevel, 0));
+    return ZSTD_compressBegin_advanced(ctx, ZSTD_getParams(compressionLevel, 0));
 }
 
 
 /*! ZSTD_compressEnd
 *   Write frame epilogue
 *   @return : nb of bytes written into dst (or an error code) */
-size_t ZSTD_compressEnd(ZSTD_CCtx* ctx, void* dst, size_t maxDstSize)
+size_t ZSTD_compressEnd(ZSTD_CCtx* zc, void* dst, size_t maxDstSize)
 {
     BYTE* op = (BYTE*)dst;
+    size_t hbSize = 0;
 
-    /* Sanity check */
-    (void)ctx;
-    if (maxDstSize < 3) return ERROR(dstSize_tooSmall);
+    /* empty frame */
+    if (zc->stage==0)
+    {
+        hbSize = zc->hbSize;
+        if (maxDstSize <= hbSize) return ERROR(dstSize_tooSmall);
+        zc->stage = 1;
+        memcpy(dst, zc->headerBuffer, hbSize);
+        maxDstSize -= hbSize;
+        op += hbSize;
+    }
 
-    /* End of frame */
+    /* frame epilogue */
+    if (maxDstSize < 3) return ERROR(dstSize_tooSmall);
     op[0] = (BYTE)(bt_end << 6);
     op[1] = 0;
     op[2] = 0;
 
-    return 3;
+    return 3+hbSize;
 }
 
 size_t ZSTD_compress_advanced (ZSTD_CCtx* ctx,
@@ -2147,10 +2111,8 @@ size_t ZSTD_compress_advanced (ZSTD_CCtx* ctx,
     size_t oSize;
 
     /* Header */
-    oSize = ZSTD_compressBegin_advanced(ctx, dst, maxDstSize, params);
+    oSize = ZSTD_compressBegin_advanced(ctx, params);
     if(ZSTD_isError(oSize)) return oSize;
-    op += oSize;
-    maxDstSize -= oSize;
 
     /* dictionary */
     if (dict)
@@ -2189,7 +2151,7 @@ size_t ZSTD_compress(void* dst, size_t maxDstSize, const void* src, size_t srcSi
     ZSTD_CCtx ctxBody;
     memset(&ctxBody, 0, sizeof(ctxBody));
     result = ZSTD_compressCCtx(&ctxBody, dst, maxDstSize, src, srcSize, compressionLevel);
-    free(ctxBody.workSpace);   /* can't free ctxBody, since it's on stack; free heap content */
+    free(ctxBody.workSpace);   /* can't free ctxBody, since it's on stack; just free heap content */
     return result;
 }
 
diff --git a/lib/zstd_decompress.c b/lib/zstd_decompress.c
index 3431e32..4a026df 100644
--- a/lib/zstd_decompress.c
+++ b/lib/zstd_decompress.c
@@ -35,20 +35,19 @@
 *****************************************************************/
 /*!
  * HEAPMODE :
- * Select how default compression functions will allocate memory for their hash table,
- * in memory stack (0, fastest), or in memory heap (1, requires malloc())
- * Note that compression context is fairly large, as a consequence heap memory is recommended.
+ * Select how default decompression function ZSTD_decompress() will allocate memory,
+ * in memory stack (0), or in memory heap (1, requires malloc())
  */
 #ifndef ZSTD_HEAPMODE
 #  define ZSTD_HEAPMODE 1
-#endif /* ZSTD_HEAPMODE */
+#endif
 
 /*!
 *  LEGACY_SUPPORT :
-*  ZSTD_decompress() can decode older formats (starting from zstd 0.1+)
+*  ZSTD_decompress() can decode older formats (v0.1+) if set to 1
 */
 #ifndef ZSTD_LEGACY_SUPPORT
-#  define ZSTD_LEGACY_SUPPORT 1
+#  define ZSTD_LEGACY_SUPPORT 0
 #endif
 
 
@@ -658,8 +657,19 @@ static size_t ZSTD_decompressSequences(
 }
 
 
-static size_t ZSTD_decompressBlock(
-                            ZSTD_DCtx* dctx,
+static void ZSTD_checkContinuity(ZSTD_DCtx* dctx, const void* dst)
+{
+    if (dst != dctx->previousDstEnd)   /* not contiguous */
+    {
+        dctx->dictEnd = dctx->previousDstEnd;
+        dctx->vBase = (const char*)dst - ((const char*)(dctx->previousDstEnd) - (const char*)(dctx->base));
+        dctx->base = dst;
+        dctx->previousDstEnd = dst;
+    }
+}
+
+
+static size_t ZSTD_decompressBlock_internal(ZSTD_DCtx* dctx,
                             void* dst, size_t maxDstSize,
                       const void* src, size_t srcSize)
 {
@@ -676,6 +686,15 @@ static size_t ZSTD_decompressBlock(
 }
 
 
+size_t ZSTD_decompressBlock(ZSTD_DCtx* dctx,
+                            void* dst, size_t maxDstSize,
+                      const void* src, size_t srcSize)
+{
+    ZSTD_checkContinuity(dctx, dst);
+    return ZSTD_decompressBlock_internal(dctx, dst, maxDstSize, src, srcSize);
+}
+
+
 size_t ZSTD_decompress_usingDict(ZSTD_DCtx* ctx,
                                  void* dst, size_t maxDstSize,
                                  const void* src, size_t srcSize,
@@ -736,7 +755,7 @@ size_t ZSTD_decompress_usingDict(ZSTD_DCtx* ctx,
         switch(blockProperties.blockType)
         {
         case bt_compressed:
-            decodedSize = ZSTD_decompressBlock(ctx, op, oend-op, ip, cBlockSize);
+            decodedSize = ZSTD_decompressBlock_internal(ctx, op, oend-op, ip, cBlockSize);
             break;
         case bt_raw :
             decodedSize = ZSTD_copyRawBlock(op, oend-op, ip, cBlockSize);
@@ -770,8 +789,17 @@ size_t ZSTD_decompressDCtx(ZSTD_DCtx* dctx, void* dst, size_t maxDstSize, const
 
 size_t ZSTD_decompress(void* dst, size_t maxDstSize, const void* src, size_t srcSize)
 {
+#if defined(ZSTD_HEAPMODE) && (ZSTD_HEAPMODE==1)
+    size_t regenSize;
+    ZSTD_DCtx* dctx = ZSTD_createDCtx();
+    if (dctx==NULL) return ERROR(memory_allocation);
+    regenSize = ZSTD_decompressDCtx(dctx, dst, maxDstSize, src, srcSize);
+    ZSTD_freeDCtx(dctx);
+    return regenSize;
+#else
     ZSTD_DCtx dctx;
     return ZSTD_decompressDCtx(&dctx, dst, maxDstSize, src, srcSize);
+#endif
 }
 
 
@@ -787,13 +815,7 @@ size_t ZSTD_decompressContinue(ZSTD_DCtx* ctx, void* dst, size_t maxDstSize, con
 {
     /* Sanity check */
     if (srcSize != ctx->expected) return ERROR(srcSize_wrong);
-    if (dst != ctx->previousDstEnd)   /* not contiguous */
-    {
-        ctx->dictEnd = ctx->previousDstEnd;
-        ctx->vBase = (const char*)dst - ((const char*)(ctx->previousDstEnd) - (const char*)(ctx->base));
-        ctx->base = dst;
-        ctx->previousDstEnd = dst;
-    }
+    ZSTD_checkContinuity(ctx, dst);
 
     /* Decompress : frame header; part 1 */
     switch (ctx->stage)
@@ -850,7 +872,7 @@ size_t ZSTD_decompressContinue(ZSTD_DCtx* ctx, void* dst, size_t maxDstSize, con
             switch(ctx->bType)
             {
             case bt_compressed:
-                rSize = ZSTD_decompressBlock(ctx, dst, maxDstSize, src, srcSize);
+                rSize = ZSTD_decompressBlock_internal(ctx, dst, maxDstSize, src, srcSize);
                 break;
             case bt_raw :
                 rSize = ZSTD_copyRawBlock(dst, maxDstSize, src, srcSize);
@@ -875,10 +897,10 @@ size_t ZSTD_decompressContinue(ZSTD_DCtx* ctx, void* dst, size_t maxDstSize, con
 }
 
 
-void ZSTD_decompress_insertDictionary(ZSTD_DCtx* ctx, const void* src, size_t srcSize)
+void ZSTD_decompress_insertDictionary(ZSTD_DCtx* ctx, const void* dict, size_t dictSize)
 {
     ctx->dictEnd = ctx->previousDstEnd;
-    ctx->vBase = (const char*)src - ((const char*)(ctx->previousDstEnd) - (const char*)(ctx->base));
-    ctx->base = src;
-    ctx->previousDstEnd = (const char*)src + srcSize;
+    ctx->vBase = (const char*)dict - ((const char*)(ctx->previousDstEnd) - (const char*)(ctx->base));
+    ctx->base = dict;
+    ctx->previousDstEnd = (const char*)dict + dictSize;
 }
diff --git a/lib/zstd_internal.h b/lib/zstd_internal.h
index bddfc92..cae2cb8 100644
--- a/lib/zstd_internal.h
+++ b/lib/zstd_internal.h
@@ -41,7 +41,7 @@ extern "C" {
 *  Includes
 ***************************************/
 #include "mem.h"
-#include "error.h"
+#include "error_private.h"
 
 
 /* *************************************
diff --git a/lib/zstd_static.h b/lib/zstd_static.h
index f78d464..c60fa65 100644
--- a/lib/zstd_static.h
+++ b/lib/zstd_static.h
@@ -33,9 +33,9 @@
 #ifndef ZSTD_STATIC_H
 #define ZSTD_STATIC_H
 
-/* The objects defined into this file should be considered experimental.
- * They are not labelled stable, as their prototype may change in the future.
- * You can use them for tests, provide feedback, or if you can endure risk of future changes.
+/* The objects defined into this file shall be considered experimental.
+ * They are not considered stable, as their prototype may change in the future.
+ * You can use them for tests, provide feedback, or if you can endure risks of future changes.
  */
 
 #if defined (__cplusplus)
@@ -108,40 +108,33 @@ ZSTDLIB_API size_t ZSTD_compress_advanced (ZSTD_CCtx* ctx,
                                      const void* dict,size_t dictSize,
                                            ZSTD_parameters params);
 
-/** Decompression context management */
-typedef struct ZSTD_DCtx_s ZSTD_DCtx;
-ZSTDLIB_API ZSTD_DCtx* ZSTD_createDCtx(void);
-ZSTDLIB_API size_t     ZSTD_freeDCtx(ZSTD_DCtx* dctx);
-
-/** ZSTD_decompressDCtx
-*   Same as ZSTD_decompress, with pre-allocated DCtx structure */
-size_t ZSTD_decompressDCtx(ZSTD_DCtx* ctx, void* dst, size_t maxDstSize, const void* src, size_t srcSize);
-
 /** ZSTD_decompress_usingDict
 *   Same as ZSTD_decompressDCtx, using a Dictionary content as prefix
 *   Note : dict can be NULL, in which case, it's equivalent to ZSTD_decompressDCtx() */
-size_t ZSTD_decompress_usingDict(ZSTD_DCtx* ctx,
-                                 void* dst, size_t maxDstSize,
-                                 const void* src, size_t srcSize,
-                                 const void* dict, size_t dictSize);
+ZSTDLIB_API size_t ZSTD_decompress_usingDict(ZSTD_DCtx* ctx,
+                                             void* dst, size_t maxDstSize,
+                                       const void* src, size_t srcSize,
+                                       const void* dict,size_t dictSize);
 
 
 /* **************************************
 *  Streaming functions (direct mode)
 ****************************************/
-ZSTDLIB_API size_t ZSTD_compressBegin(ZSTD_CCtx* cctx, void* dst, size_t maxDstSize, int compressionLevel);
-ZSTDLIB_API size_t ZSTD_compressBegin_advanced(ZSTD_CCtx* ctx, void* dst, size_t maxDstSize, ZSTD_parameters params);
+ZSTDLIB_API size_t ZSTD_compressBegin(ZSTD_CCtx* cctx, int compressionLevel);
+ZSTDLIB_API size_t ZSTD_compressBegin_advanced(ZSTD_CCtx* ctx, ZSTD_parameters params);
+
 ZSTDLIB_API size_t ZSTD_compress_insertDictionary(ZSTD_CCtx* ctx, const void* src, size_t srcSize);
+ZSTDLIB_API size_t ZSTD_duplicateCCtx(ZSTD_CCtx* dstCCtx, const ZSTD_CCtx* srcCCtx);
 
 ZSTDLIB_API size_t ZSTD_compressContinue(ZSTD_CCtx* cctx, void* dst, size_t maxDstSize, const void* src, size_t srcSize);
 ZSTDLIB_API size_t ZSTD_compressEnd(ZSTD_CCtx* cctx, void* dst, size_t maxDstSize);
 
 /**
-  Streaming compression, direct mode (bufferless)
+  Streaming compression, synchronous mode (bufferless)
 
   A ZSTD_CCtx object is required to track streaming operations.
   Use ZSTD_createCCtx() / ZSTD_freeCCtx() to manage it.
-  A ZSTD_CCtx object can be re-used multiple times.
+  ZSTD_CCtx object can be re-used multiple times within successive compression operations.
 
   First operation is to start a new frame.
   Use ZSTD_compressBegin().
@@ -151,15 +144,20 @@ ZSTDLIB_API size_t ZSTD_compressEnd(ZSTD_CCtx* cctx, void* dst, size_t maxDstSiz
   Note that dictionary presence is a "hidden" information,
   the decoder needs to be aware that it is required for proper decoding, or decoding will fail.
 
+  If you want to compress a lot of messages using same dictionary,
+  it can be beneficial to duplicate compression context rather than reloading dictionary each time.
+  In such case, use ZSTD_duplicateCCtx(), which will need an already created ZSTD_CCtx,
+  in order to duplicate compression context into it.
+
   Then, consume your input using ZSTD_compressContinue().
-  The interface is synchronous, so all input will be consumed.
+  The interface is synchronous, so all input will be consumed and produce a compressed output.
   You must ensure there is enough space in destination buffer to store compressed data under worst case scenario.
   Worst case evaluation is provided by ZSTD_compressBound().
 
   Finish a frame with ZSTD_compressEnd(), which will write the epilogue.
   Without it, the frame will be considered incomplete by decoders.
 
-  You can then reuse ZSTD_CCtx to compress new frames.
+  You can then reuse ZSTD_CCtx to compress some new frame.
 */
 
 
@@ -198,9 +196,36 @@ ZSTDLIB_API size_t ZSTD_decompressContinue(ZSTD_DCtx* dctx, void* dst, size_t ma
   It can be zero, which is not an error; it just means ZSTD_decompressContinue() has decoded some header.
 
   A frame is fully decoded when ZSTD_nextSrcSizeToDecompress() returns zero.
+  Context can then be reset to start a new decompression.
 */
 
 
+/* **************************************
+*  Block functions
+****************************************/
+
+/*!Block functions produce and decode raw zstd blocks, without frame metadata.
+   It saves associated header sizes.
+   But user will have to save and regenerate fields required to regenerate data, such as block sizes.
+
+   A few rules to respect :
+   - Uncompressed block size must be <= 128 KB
+   - Compressing or decompressing require a context structure
+     + Use ZSTD_createXCtx() to create them
+   - It is necessary to init context before starting
+     + compression : ZSTD_compressBegin(), which allows selection of compression level or parameters
+     + decompression : ZSTD_resetDCtx()
+     + If you compress multiple blocks without resetting, next blocks will create references to previous ones
+   - Dictionary can optionally be inserted, using ZSTD_de/compress_insertDictionary()
+   - When a block is considered not compressible enough, ZSTD_compressBlock() result will be zero.
+     + User must test for such outcome and be able to deal with uncompressed data
+     + ZSTD_decompressBlock() doesn't accept uncompressed data as input
+*/
+
+size_t ZSTD_compressBlock  (ZSTD_CCtx* cctx, void* dst, size_t maxDstSize, const void* src, size_t srcSize);
+size_t ZSTD_decompressBlock(ZSTD_DCtx* dctx, void* dst, size_t maxDstSize, const void* src, size_t srcSize);
+
+
 /* *************************************
 *  Pre-defined compression levels
 ***************************************/
@@ -236,14 +261,14 @@ static const ZSTD_parameters ZSTD_defaultParameters[4][ZSTD_MAX_CLEVEL+1] = {
     {  0, 18, 13, 14,  1,  7, ZSTD_fast    },  /* level  0 - never used */
     {  0, 18, 14, 15,  1,  6, ZSTD_fast    },  /* level  1 */
     {  0, 18, 14, 15,  1,  5, ZSTD_fast    },  /* level  2 */
-    {  0, 18, 12, 15,  3,  7, ZSTD_greedy  },  /* level  3 */
-    {  0, 18, 13, 15,  4,  7, ZSTD_greedy  },  /* level  4 */
-    {  0, 18, 14, 15,  5,  7, ZSTD_greedy  },  /* level  5 */
-    {  0, 18, 13, 15,  4,  7, ZSTD_lazy    },  /* level  6 */
-    {  0, 18, 14, 16,  5,  7, ZSTD_lazy    },  /* level  7 */
-    {  0, 18, 15, 16,  6,  7, ZSTD_lazy    },  /* level  8 */
-    {  0, 18, 15, 15,  7,  7, ZSTD_lazy    },  /* level  9 */
-    {  0, 18, 16, 16,  7,  7, ZSTD_lazy    },  /* level 10 */
+    {  0, 18, 12, 15,  3,  4, ZSTD_greedy  },  /* level  3 */
+    {  0, 18, 13, 15,  4,  4, ZSTD_greedy  },  /* level  4 */
+    {  0, 18, 14, 15,  5,  4, ZSTD_greedy  },  /* level  5 */
+    {  0, 18, 13, 15,  4,  4, ZSTD_lazy    },  /* level  6 */
+    {  0, 18, 14, 16,  5,  4, ZSTD_lazy    },  /* level  7 */
+    {  0, 18, 15, 16,  6,  4, ZSTD_lazy    },  /* level  8 */
+    {  0, 18, 15, 15,  7,  4, ZSTD_lazy    },  /* level  9 */
+    {  0, 18, 16, 16,  7,  4, ZSTD_lazy    },  /* level 10 */
     {  0, 18, 16, 16,  8,  4, ZSTD_lazy    },  /* level 11 */
     {  0, 18, 17, 16,  8,  4, ZSTD_lazy    },  /* level 12 */
     {  0, 18, 17, 16,  9,  4, ZSTD_lazy    },  /* level 13 */
@@ -259,8 +284,8 @@ static const ZSTD_parameters ZSTD_defaultParameters[4][ZSTD_MAX_CLEVEL+1] = {
     /*    W,  C,  H,  S,  L, strat */
     { 0, 17, 12, 12,  1,  4, ZSTD_fast    },  /* level  0 - never used */
     { 0, 17, 12, 13,  1,  6, ZSTD_fast    },  /* level  1 */
-    { 0, 17, 15, 16,  1,  5, ZSTD_fast    },  /* level  2 */
-    { 0, 17, 16, 17,  1,  5, ZSTD_fast    },  /* level  3 */
+    { 0, 17, 14, 16,  1,  5, ZSTD_fast    },  /* level  2 */
+    { 0, 17, 15, 17,  1,  5, ZSTD_fast    },  /* level  3 */
     { 0, 17, 13, 15,  2,  4, ZSTD_greedy  },  /* level  4 */
     { 0, 17, 15, 17,  3,  4, ZSTD_greedy  },  /* level  5 */
     { 0, 17, 14, 17,  3,  4, ZSTD_lazy    },  /* level  6 */
@@ -283,7 +308,7 @@ static const ZSTD_parameters ZSTD_defaultParameters[4][ZSTD_MAX_CLEVEL+1] = {
     /*     W,  C,  H,  S,  L, strat */
     {  0,  0,  0,  0,  0,  0, ZSTD_fast    },  /* level  0 - never used */
     {  0, 14, 14, 14,  1,  4, ZSTD_fast    },  /* level  1 */
-    {  0, 14, 14, 16,  1,  4, ZSTD_fast    },  /* level  1 */
+    {  0, 14, 14, 16,  1,  4, ZSTD_fast    },  /* level  2 */
     {  0, 14, 14, 14,  5,  4, ZSTD_greedy  },  /* level  3 */
     {  0, 14, 14, 14,  8,  4, ZSTD_greedy  },  /* level  4 */
     {  0, 14, 11, 14,  6,  4, ZSTD_lazy    },  /* level  5 */
@@ -309,7 +334,7 @@ static const ZSTD_parameters ZSTD_defaultParameters[4][ZSTD_MAX_CLEVEL+1] = {
 /* *************************************
 *  Error management
 ***************************************/
-#include "error.h"
+#include "error_public.h"
 
 
 #if defined (__cplusplus)
diff --git a/programs/Makefile b/programs/Makefile
index 57fa87c..c64cbe6 100644
--- a/programs/Makefile
+++ b/programs/Makefile
@@ -30,13 +30,22 @@
 # fullbench32: Same as fullbench, but forced to compile in 32-bits mode
 # ##########################################################################
 
-VERSION?= 0.4.5
+# Version numbers
+LIBVER_MAJOR_SCRIPT:=`sed -n '/define ZSTD_VERSION_MAJOR/s/.*[[:blank:]]\([0-9][0-9]*\).*/\1/p' < ../lib/zstd.h`
+LIBVER_MINOR_SCRIPT:=`sed -n '/define ZSTD_VERSION_MINOR/s/.*[[:blank:]]\([0-9][0-9]*\).*/\1/p' < ../lib/zstd.h`
+LIBVER_PATCH_SCRIPT:=`sed -n '/define ZSTD_VERSION_RELEASE/s/.*[[:blank:]]\([0-9][0-9]*\).*/\1/p' < ../lib/zstd.h`
+LIBVER_SCRIPT:= $(LIBVER_MAJOR_SCRIPT).$(LIBVER_MINOR_SCRIPT).$(LIBVER_PATCH_SCRIPT)
+LIBVER_MAJOR := $(shell echo $(LIBVER_MAJOR_SCRIPT))
+LIBVER_MINOR := $(shell echo $(LIBVER_MINOR_SCRIPT))
+LIBVER_PATCH := $(shell echo $(LIBVER_PATCH_SCRIPT))
+LIBVER := $(shell echo $(LIBVER_SCRIPT))
+VERSION?= $(LIBVER)
 
 DESTDIR?=
 PREFIX ?= /usr/local
 CPPFLAGS= -I../lib -DZSTD_VERSION=\"$(VERSION)\"
 CFLAGS ?= -O3  # -falign-loops=32   # not always beneficial
-CFLAGS += -std=c99 -Wall -Wextra -Wundef -Wshadow -Wcast-qual -Wcast-align -Wstrict-prototypes
+CFLAGS += -std=c99 -Wall -Wextra -Wcast-qual -Wcast-align -Wshadow -Wstrict-aliasing=1 -Wswitch-enum -Wstrict-prototypes -Wundef
 FLAGS   = $(CPPFLAGS) $(CFLAGS) $(LDFLAGS) $(MOREFLAGS)
 
 BINDIR  = $(PREFIX)/bin
diff --git a/programs/bench.c b/programs/bench.c
index b80d699..fcd674a 100644
--- a/programs/bench.c
+++ b/programs/bench.c
@@ -220,10 +220,12 @@ static int BMK_benchMem(const void* srcBuffer, size_t srcSize,
 {
     const size_t blockSize = (g_blockSize ? g_blockSize : srcSize) + (!srcSize);   /* avoid div by 0 */
     const U32 maxNbBlocks = (U32) ((srcSize + (blockSize-1)) / blockSize) + nbFiles;
+    size_t largestBlockSize = 0;
     blockParam_t* const blockTable = (blockParam_t*) malloc(maxNbBlocks * sizeof(blockParam_t));
     const size_t maxCompressedSize = ZSTD_compressBound(srcSize) + (maxNbBlocks * 1024);   /* add some room for safety */
     void* const compressedBuffer = malloc(maxCompressedSize);
     void* const resultBuffer = malloc(srcSize);
+    ZSTD_CCtx* refCtx = ZSTD_createCCtx();
     ZSTD_CCtx* ctx = ZSTD_createCCtx();
     ZSTD_DCtx* dctx = ZSTD_createDCtx();
     U64 crcOrig = XXH64(srcBuffer, srcSize, 0);
@@ -233,7 +235,7 @@ static int BMK_benchMem(const void* srcBuffer, size_t srcSize,
     if (strlen(displayName)>17) displayName += strlen(displayName)-17;   /* can only display 17 characters */
 
     /* Memory allocation & restrictions */
-    if (!compressedBuffer || !resultBuffer || !blockTable || !ctx || !dctx)
+    if (!compressedBuffer || !resultBuffer || !blockTable || !refCtx || !ctx || !dctx)
         EXM_THROW(31, "not enough memory");
 
     /* Init blockTable data */
@@ -259,6 +261,7 @@ static int BMK_benchMem(const void* srcBuffer, size_t srcSize,
                 cPtr += blockTable[nbBlocks].cRoom;
                 resPtr += thisBlockSize;
                 remaining -= thisBlockSize;
+                if (thisBlockSize > largestBlockSize) largestBlockSize = thisBlockSize;
             }
         }
     }
@@ -291,12 +294,27 @@ static int BMK_benchMem(const void* srcBuffer, size_t srcSize,
             milliTime = BMK_GetMilliStart();
             while (BMK_GetMilliSpan(milliTime) < TIMELOOP)
             {
+                ZSTD_compressBegin_advanced(refCtx, ZSTD_getParams(cLevel, dictBufferSize+largestBlockSize));
+                ZSTD_compress_insertDictionary(refCtx, dictBuffer, dictBufferSize);
                 for (blockNb=0; blockNb<nbBlocks; blockNb++)
-                    blockTable[blockNb].cSize = ZSTD_compress_usingDict(ctx,
+                {
+                    ZSTD_duplicateCCtx(ctx, refCtx);
+                    size_t rSize = ZSTD_compressContinue(ctx,
+                                          blockTable[blockNb].cPtr,  blockTable[blockNb].cRoom,
+                                          blockTable[blockNb].srcPtr,blockTable[blockNb].srcSize);
+                    if (ZSTD_isError(rSize)) EXM_THROW(1, "ZSTD_compressContinue() failed : %s", ZSTD_getErrorName(rSize));
+                    blockTable[blockNb].cSize = rSize;
+                    rSize = ZSTD_compressEnd(ctx,
+                                          blockTable[blockNb].cPtr  + rSize,
+                                          blockTable[blockNb].cRoom - rSize);
+                    if (ZSTD_isError(rSize)) EXM_THROW(2, "ZSTD_compressEnd() failed : %s", ZSTD_getErrorName(rSize));
+                    blockTable[blockNb].cSize += rSize;
+                }
+                    /*blockTable[blockNb].cSize = ZSTD_compress_usingDict(ctx,
                                                               blockTable[blockNb].cPtr,  blockTable[blockNb].cRoom,
                                                               blockTable[blockNb].srcPtr,blockTable[blockNb].srcSize,
                                                               dictBuffer, dictBufferSize,
-                                                              cLevel);
+                                                              cLevel);*/
                 nbLoops++;
             }
             milliTime = BMK_GetMilliSpan(milliTime);
@@ -333,14 +351,21 @@ static int BMK_benchMem(const void* srcBuffer, size_t srcSize,
             crcCheck = XXH64(resultBuffer, srcSize, 0);
             if (crcOrig!=crcCheck)
             {
-                unsigned u;
-                unsigned eBlockSize = (unsigned)(MIN(65536*2, blockSize));
+                size_t u;
                 DISPLAY("\n!!! WARNING !!! %14s : Invalid Checksum : %x != %x\n", displayName, (unsigned)crcOrig, (unsigned)crcCheck);
                 for (u=0; u<srcSize; u++)
                 {
                     if (((const BYTE*)srcBuffer)[u] != ((const BYTE*)resultBuffer)[u])
                     {
-                        printf("Decoding error at pos %u (block %u, pos %u) \n", u, u / eBlockSize, u % eBlockSize);
+                        U32 bn;
+                        size_t bacc = 0;
+                        printf("Decoding error at pos %u ", (U32)u);
+                        for (bn = 0; bn < nbBlocks; bn++)
+                        {
+                            if (bacc + blockTable[bn].srcSize > u) break;
+                            bacc += blockTable[bn].srcSize;
+                        }
+                        printf("(block %u, pos %u) \n", bn, (U32)(u - bacc));
                         break;
                     }
                 }
@@ -358,6 +383,7 @@ static int BMK_benchMem(const void* srcBuffer, size_t srcSize,
     /* clean up */
     free(compressedBuffer);
     free(resultBuffer);
+    ZSTD_freeCCtx(refCtx);
     ZSTD_freeCCtx(ctx);
     ZSTD_freeDCtx(dctx);
     return 0;
diff --git a/programs/fileio.c b/programs/fileio.c
index 3867301..0d49af2 100644
--- a/programs/fileio.c
+++ b/programs/fileio.c
@@ -225,7 +225,6 @@ static int FIO_getFiles(FILE** fileOutPtr, FILE** fileInPtr,
             if (*fileOutPtr != 0)
             {
                 /* prompt for overwrite authorization */
-                int ch = 'N';
                 fclose(*fileOutPtr);
                 DISPLAY("Warning : %s already exists \n", dstFileName);
                 if ((g_displayLevel <= 1) || (*fileInPtr == stdin))
@@ -235,11 +234,14 @@ static int FIO_getFiles(FILE** fileOutPtr, FILE** fileInPtr,
                     return 1;
                 }
                 DISPLAY("Overwrite ? (y/N) : ");
-                while((ch = getchar()) != '\n' && ch != EOF);   /* flush integrated */
-                if ((ch!='Y') && (ch!='y'))
                 {
-                    DISPLAY("No. Operation aborted : %s already exists \n", dstFileName);
-                    return 1;
+                    int ch = getchar();
+                    if ((ch!='Y') && (ch!='y'))
+                    {
+                        DISPLAY("No. Operation aborted : %s already exists \n", dstFileName);
+                        return 1;
+                    }
+                    while ((ch!=EOF) && (ch!='\n')) ch = getchar();  /* flush rest of input line */
                 }
             }
         }
diff --git a/programs/fuzzer.c b/programs/fuzzer.c
index f3fda45..4058ef2 100644
--- a/programs/fuzzer.c
+++ b/programs/fuzzer.c
@@ -185,20 +185,81 @@ static int basicUnitTests(U32 seed, double compressibility)
     DISPLAYLEVEL(4, "test%3i : decompress with 1 missing byte : ", testNb++);
     result = ZSTD_decompress(decodedBuffer, COMPRESSIBLE_NOISE_LENGTH, compressedBuffer, cSize-1);
     if (!ZSTD_isError(result)) goto _output_error;
-    if (result != ERROR(srcSize_wrong)) goto _output_error;
+    if (result != (size_t)-ZSTD_error_srcSize_wrong) goto _output_error;
     DISPLAYLEVEL(4, "OK \n");
 
     DISPLAYLEVEL(4, "test%3i : decompress with 1 too much byte : ", testNb++);
     result = ZSTD_decompress(decodedBuffer, COMPRESSIBLE_NOISE_LENGTH, compressedBuffer, cSize+1);
     if (!ZSTD_isError(result)) goto _output_error;
-    if (result != ERROR(srcSize_wrong)) goto _output_error;
+    if (result != (size_t)-ZSTD_error_srcSize_wrong) goto _output_error;
     DISPLAYLEVEL(4, "OK \n");
 
+    /* Dictionary and Duplication tests */
+    {
+        ZSTD_CCtx* ctxOrig = ZSTD_createCCtx();
+        ZSTD_CCtx* ctxDuplicated = ZSTD_createCCtx();
+        ZSTD_DCtx* dctx = ZSTD_createDCtx();
+        const size_t dictSize = 500;
+        size_t cSizeOrig;
+
+        DISPLAYLEVEL(4, "test%3i : load dictionary into context : ", testNb++);
+        result = ZSTD_compressBegin(ctxOrig, 2);
+        if (ZSTD_isError(result)) goto _output_error;
+        result = ZSTD_compress_insertDictionary(ctxOrig, CNBuffer, dictSize);
+        if (ZSTD_isError(result)) goto _output_error;
+        result = ZSTD_duplicateCCtx(ctxDuplicated, ctxOrig);
+        if (ZSTD_isError(result)) goto _output_error;
+        DISPLAYLEVEL(4, "OK \n");
+
+        DISPLAYLEVEL(4, "test%3i : compress with dictionary : ", testNb++);
+        cSize = 0;
+        result = ZSTD_compressContinue(ctxOrig, compressedBuffer, ZSTD_compressBound(COMPRESSIBLE_NOISE_LENGTH), (const char*)CNBuffer + dictSize, COMPRESSIBLE_NOISE_LENGTH - dictSize);
+        if (ZSTD_isError(result)) goto _output_error;
+        cSize += result;
+        result = ZSTD_compressEnd(ctxOrig, (char*)compressedBuffer+cSize, ZSTD_compressBound(COMPRESSIBLE_NOISE_LENGTH)-cSize);
+        if (ZSTD_isError(result)) goto _output_error;
+        cSize += result;
+        DISPLAYLEVEL(4, "OK (%u bytes : %.2f%%)\n", (U32)cSize, (double)cSize/COMPRESSIBLE_NOISE_LENGTH*100);
+
+        DISPLAYLEVEL(4, "test%3i : frame built with dictionary should be decompressible : ", testNb++);
+        result = ZSTD_decompress_usingDict(dctx,
+                                           decodedBuffer, COMPRESSIBLE_NOISE_LENGTH,
+                                           compressedBuffer, cSize,
+                                           CNBuffer, dictSize);
+        if (ZSTD_isError(result)) goto _output_error;
+        if (result != COMPRESSIBLE_NOISE_LENGTH - dictSize) goto _output_error;
+        ZSTD_freeCCtx(ctxOrig);   /* if ctxOrig is read, will produce segfault */
+        DISPLAYLEVEL(4, "OK \n");
+
+        DISPLAYLEVEL(4, "test%3i : compress with duplicated context : ", testNb++);
+        cSizeOrig = cSize;
+        cSize = 0;
+        result = ZSTD_compressContinue(ctxDuplicated, compressedBuffer, ZSTD_compressBound(COMPRESSIBLE_NOISE_LENGTH), (const char*)CNBuffer + dictSize, COMPRESSIBLE_NOISE_LENGTH - dictSize);
+        if (ZSTD_isError(result)) goto _output_error;
+        cSize += result;
+        result = ZSTD_compressEnd(ctxDuplicated, (char*)compressedBuffer+cSize, ZSTD_compressBound(COMPRESSIBLE_NOISE_LENGTH)-cSize);
+        if (ZSTD_isError(result)) goto _output_error;
+        cSize += result;
+        if (cSize != cSizeOrig) goto _output_error;   /* should be identical == have same size */
+        ZSTD_freeCCtx(ctxDuplicated);
+        DISPLAYLEVEL(4, "OK (%u bytes : %.2f%%)\n", (U32)cSize, (double)cSize/COMPRESSIBLE_NOISE_LENGTH*100);
+
+        DISPLAYLEVEL(4, "test%3i : frame built with duplicated context should be decompressible : ", testNb++);
+        result = ZSTD_decompress_usingDict(dctx,
+                                           decodedBuffer, COMPRESSIBLE_NOISE_LENGTH,
+                                           compressedBuffer, cSize,
+                                           CNBuffer, dictSize);
+        if (ZSTD_isError(result)) goto _output_error;
+        if (result != COMPRESSIBLE_NOISE_LENGTH - dictSize) goto _output_error;
+        ZSTD_freeDCtx(dctx);
+        DISPLAYLEVEL(4, "OK \n");
+    }
+
     /* Decompression defense tests */
     DISPLAYLEVEL(4, "test%3i : Check input length for magic number : ", testNb++);
     result = ZSTD_decompress(decodedBuffer, COMPRESSIBLE_NOISE_LENGTH, CNBuffer, 3);
     if (!ZSTD_isError(result)) goto _output_error;
-    if (result != ERROR(srcSize_wrong)) goto _output_error;
+    if (result != (size_t)-ZSTD_error_srcSize_wrong) goto _output_error;
     DISPLAYLEVEL(4, "OK \n");
 
     DISPLAYLEVEL(4, "test%3i : Check magic Number : ", testNb++);
@@ -207,6 +268,52 @@ static int basicUnitTests(U32 seed, double compressibility)
     if (!ZSTD_isError(result)) goto _output_error;
     DISPLAYLEVEL(4, "OK \n");
 
+    /* block API tests */
+    {
+        ZSTD_CCtx* const cctx = ZSTD_createCCtx();
+        ZSTD_DCtx* const dctx = ZSTD_createDCtx();
+        const size_t blockSize = 100 KB;
+        const size_t dictSize = 16 KB;
+
+        /* basic block compression */
+        DISPLAYLEVEL(4, "test%3i : Block compression test : ", testNb++);
+        result = ZSTD_compressBegin(cctx, 5);
+        if (ZSTD_isError(result)) goto _output_error;
+        cSize = ZSTD_compressBlock(cctx, compressedBuffer, ZSTD_compressBound(blockSize), CNBuffer, blockSize);
+        if (ZSTD_isError(cSize)) goto _output_error;
+        DISPLAYLEVEL(4, "OK \n");
+
+        DISPLAYLEVEL(4, "test%3i : Block decompression test : ", testNb++);
+        result = ZSTD_resetDCtx(dctx);
+        if (ZSTD_isError(result)) goto _output_error;
+        result = ZSTD_decompressBlock(dctx, decodedBuffer, COMPRESSIBLE_NOISE_LENGTH, compressedBuffer, cSize);
+        if (ZSTD_isError(result)) goto _output_error;
+        if (result != blockSize) goto _output_error;
+        DISPLAYLEVEL(4, "OK \n");
+
+        /* dictionary block compression */
+        DISPLAYLEVEL(4, "test%3i : Dictionary Block compression test : ", testNb++);
+        result = ZSTD_compressBegin(cctx, 5);
+        if (ZSTD_isError(result)) goto _output_error;
+        result = ZSTD_compress_insertDictionary(cctx, CNBuffer, dictSize);
+        if (ZSTD_isError(result)) goto _output_error;
+        cSize = ZSTD_compressBlock(cctx, compressedBuffer, ZSTD_compressBound(blockSize), (char*)CNBuffer+dictSize, blockSize);
+        if (ZSTD_isError(cSize)) goto _output_error;
+        DISPLAYLEVEL(4, "OK \n");
+
+        DISPLAYLEVEL(4, "test%3i : Dictionary Block decompression test : ", testNb++);
+        result = ZSTD_resetDCtx(dctx);
+        if (ZSTD_isError(result)) goto _output_error;
+        ZSTD_decompress_insertDictionary(dctx, CNBuffer, dictSize);
+        result = ZSTD_decompressBlock(dctx, decodedBuffer, COMPRESSIBLE_NOISE_LENGTH, compressedBuffer, cSize);
+        if (ZSTD_isError(result)) goto _output_error;
+        if (result != blockSize) goto _output_error;
+        DISPLAYLEVEL(4, "OK \n");
+
+        ZSTD_freeCCtx(cctx);
+        ZSTD_freeDCtx(dctx);
+    }
+
     /* long rle test */
     {
         size_t sampleSize = 0;
@@ -268,11 +375,13 @@ int fuzzerTests(U32 seed, U32 nbTests, unsigned startTest, double compressibilit
     U32 result = 0;
     U32 testNb = 0;
     U32 coreSeed = seed, lseed = 0;
+    ZSTD_CCtx* refCtx;
     ZSTD_CCtx* ctx;
     ZSTD_DCtx* dctx;
     U32 startTime = FUZ_GetMilliStart();
 
     /* allocation */
+    refCtx = ZSTD_createCCtx();
     ctx = ZSTD_createCCtx();
     dctx= ZSTD_createDCtx();
     cNoiseBuffer[0] = (BYTE*)malloc (srcBufferSize);
@@ -284,7 +393,7 @@ int fuzzerTests(U32 seed, U32 nbTests, unsigned startTest, double compressibilit
     mirrorBuffer = (BYTE*)malloc (dstBufferSize);
     cBuffer   = (BYTE*)malloc (cBufferSize);
     CHECK (!cNoiseBuffer[0] || !cNoiseBuffer[1] || !cNoiseBuffer[2] || !cNoiseBuffer[3] || !cNoiseBuffer[4]
-           || !dstBuffer || !mirrorBuffer || !cBuffer || !ctx || !dctx,
+           || !dstBuffer || !mirrorBuffer || !cBuffer || !refCtx || !ctx || !dctx,
            "Not enough memory, fuzzer tests cancelled");
 
     /* Create initial samples */
@@ -305,7 +414,7 @@ int fuzzerTests(U32 seed, U32 nbTests, unsigned startTest, double compressibilit
         size_t sampleSize, sampleStart, maxTestSize, totalTestSize;
         size_t cSize, dSize, dSupSize, errorCode, totalCSize, totalGenSize;
         U32 sampleSizeLog, buffNb, cLevelMod, nbChunks, n;
-        XXH64_state_t crc64;
+        XXH64_CREATESTATE_STATIC(xxh64);
         U64 crcOrig, crcDest;
         int cLevel;
         BYTE* sampleBuffer;
@@ -447,7 +556,7 @@ int fuzzerTests(U32 seed, U32 nbTests, unsigned startTest, double compressibilit
         }
 
         /* Streaming compression of scattered segments test */
-        XXH64_reset(&crc64, 0);
+        XXH64_reset(xxh64, 0);
         nbChunks = (FUZ_rand(&lseed) & 127) + 2;
         sampleSizeLog = FUZ_rand(&lseed) % maxSrcLog;
         maxTestSize = (size_t)1 << sampleSizeLog;
@@ -461,10 +570,13 @@ int fuzzerTests(U32 seed, U32 nbTests, unsigned startTest, double compressibilit
         dict = srcBuffer + sampleStart;
         dictSize = sampleSize;
 
-        cSize = ZSTD_compressBegin(ctx, cBuffer, cBufferSize, (FUZ_rand(&lseed) % (20 - (sampleSizeLog/3))) + 1);
-        errorCode = ZSTD_compress_insertDictionary(ctx, dict, dictSize);
+        errorCode = ZSTD_compressBegin(refCtx, (FUZ_rand(&lseed) % (20 - (sampleSizeLog/3))) + 1);
+        CHECK (ZSTD_isError(errorCode), "start streaming error : %s", ZSTD_getErrorName(errorCode));
+        errorCode = ZSTD_compress_insertDictionary(refCtx, dict, dictSize);
         CHECK (ZSTD_isError(errorCode), "dictionary insertion error : %s", ZSTD_getErrorName(errorCode));
-        totalTestSize = 0;
+        errorCode = ZSTD_duplicateCCtx(ctx, refCtx);
+        CHECK (ZSTD_isError(errorCode), "context duplication error : %s", ZSTD_getErrorName(errorCode));
+        totalTestSize = 0; cSize = 0;
         for (n=0; n<nbChunks; n++)
         {
             sampleSizeLog = FUZ_rand(&lseed) % maxSampleLog;
@@ -481,14 +593,14 @@ int fuzzerTests(U32 seed, U32 nbTests, unsigned startTest, double compressibilit
             CHECK (ZSTD_isError(errorCode), "multi-segments compression error : %s", ZSTD_getErrorName(errorCode));
             cSize += errorCode;
 
-            XXH64_update(&crc64, srcBuffer+sampleStart, sampleSize);
+            XXH64_update(xxh64, srcBuffer+sampleStart, sampleSize);
             memcpy(mirrorBuffer + totalTestSize, srcBuffer+sampleStart, sampleSize);
             totalTestSize += sampleSize;
         }
         errorCode = ZSTD_compressEnd(ctx, cBuffer+cSize, cBufferSize-cSize);
         CHECK (ZSTD_isError(errorCode), "multi-segments epilogue error : %s", ZSTD_getErrorName(errorCode));
         cSize += errorCode;
-        crcOrig = XXH64_digest(&crc64);
+        crcOrig = XXH64_digest(xxh64);
 
         /* streaming decompression test */
         errorCode = ZSTD_resetDCtx(dctx);
@@ -517,6 +629,7 @@ int fuzzerTests(U32 seed, U32 nbTests, unsigned startTest, double compressibilit
     DISPLAY("\r%u fuzzer tests completed   \n", testNb-1);
 
 _cleanup:
+    ZSTD_freeCCtx(refCtx);
     ZSTD_freeCCtx(ctx);
     ZSTD_freeDCtx(dctx);
     free(cNoiseBuffer[0]);
diff --git a/programs/xxhash.c b/programs/xxhash.c
index 511d994..d33113f 100644
--- a/programs/xxhash.c
+++ b/programs/xxhash.c
@@ -1,6 +1,6 @@
 /*
 xxHash - Fast Hash algorithm
-Copyright (C) 2012-2015, Yann Collet
+Copyright (C) 2012-2016, Yann Collet
 
 BSD 2-Clause License (http://www.opensource.org/licenses/bsd-license.php)
 
@@ -32,18 +32,18 @@ You can contact the author at :
 */
 
 
-/**************************************
+/* *************************************
 *  Tuning parameters
-**************************************/
-/* XXH_FORCE_MEMORY_ACCESS
+***************************************/
+/*!XXH_FORCE_MEMORY_ACCESS
  * By default, access to unaligned memory is controlled by `memcpy()`, which is safe and portable.
  * Unfortunately, on some target/compiler combinations, the generated assembly is sub-optimal.
  * The below switch allow to select different access method for improved performance.
  * Method 0 (default) : use `memcpy()`. Safe and portable.
  * Method 1 : `__packed` statement. It depends on compiler extension (ie, not portable).
  *            This method is safe if your compiler supports it, and *generally* as fast or faster than `memcpy`.
- * Method 2 : direct access. This method is portable but violate C standard.
- *            It can generate buggy code on targets which generate assembly depending on alignment.
+ * Method 2 : direct access. This method doesn't depend on compiler but violate C standard.
+ *            It can generate buggy code on targets which do not support unaligned memory accesses.
  *            But in some circumstances, it's the only known way to get the most performance (ie GCC + ARMv6)
  * See http://stackoverflow.com/a/32095106/646947 for details.
  * Prefer these methods in priority order (0 > 1 > 2)
@@ -57,14 +57,14 @@ You can contact the author at :
 #  endif
 #endif
 
-/* XXH_ACCEPT_NULL_INPUT_POINTER :
+/*!XXH_ACCEPT_NULL_INPUT_POINTER :
  * If the input pointer is a null pointer, xxHash default behavior is to trigger a memory access error, since it is a bad pointer.
  * When this option is enabled, xxHash output for null input pointers will be the same as a null-length input.
  * By default, this option is disabled. To enable it, uncomment below define :
  */
 /* #define XXH_ACCEPT_NULL_INPUT_POINTER 1 */
 
-/* XXH_FORCE_NATIVE_FORMAT :
+/*!XXH_FORCE_NATIVE_FORMAT :
  * By default, xxHash library provides endian-independant Hash values, based on little-endian convention.
  * Results are therefore identical for little-endian and big-endian CPU.
  * This comes at a performance cost for big-endian CPU, since some swapping is required to emulate little-endian format.
@@ -74,9 +74,9 @@ You can contact the author at :
  */
 #define XXH_FORCE_NATIVE_FORMAT 0
 
-/* XXH_USELESS_ALIGN_BRANCH :
+/*!XXH_USELESS_ALIGN_BRANCH :
  * This is a minor performance trick, only useful with lots of very small keys.
- * It means : don't make a test between aligned/unaligned, because performance will be the same.
+ * It means : don't check for aligned/unaligned input, because performance will be the same.
  * It saves one initial branch per hash.
  */
 #if defined(__i386) || defined(_M_IX86) || defined(__x86_64__) || defined(_M_X64)
@@ -84,7 +84,7 @@ You can contact the author at :
 #endif
 
 
-/**************************************
+/* *************************************
 *  Compiler Specific Options
 ***************************************/
 #ifdef _MSC_VER    /* Visual Studio */
@@ -103,10 +103,9 @@ You can contact the author at :
 #endif
 
 
-/**************************************
+/* *************************************
 *  Includes & Memory related functions
 ***************************************/
-#include "xxhash.h"
 /* Modify the local functions below should you wish to use some other memory routines */
 /* for malloc(), free() */
 #include <stdlib.h>
@@ -116,23 +115,28 @@ static void  XXH_free  (void* p)  { free(p); }
 #include <string.h>
 static void* XXH_memcpy(void* dest, const void* src, size_t size) { return memcpy(dest,src,size); }
 
+#include "xxhash.h"
 
-/**************************************
+
+/* *************************************
 *  Basic Types
 ***************************************/
-#if defined (__STDC_VERSION__) && __STDC_VERSION__ >= 199901L   /* C99 */
-# include <stdint.h>
-  typedef uint8_t  BYTE;
-  typedef uint16_t U16;
-  typedef uint32_t U32;
-  typedef  int32_t S32;
-  typedef uint64_t U64;
-#else
-  typedef unsigned char      BYTE;
-  typedef unsigned short     U16;
-  typedef unsigned int       U32;
-  typedef   signed int       S32;
-  typedef unsigned long long U64;
+#ifndef MEM_MODULE
+# define MEM_MODULE
+# if defined (__STDC_VERSION__) && __STDC_VERSION__ >= 199901L   /* C99 */
+#   include <stdint.h>
+    typedef uint8_t  BYTE;
+    typedef uint16_t U16;
+    typedef uint32_t U32;
+    typedef  int32_t S32;
+    typedef uint64_t U64;
+#  else
+    typedef unsigned char      BYTE;
+    typedef unsigned short     U16;
+    typedef unsigned int       U32;
+    typedef   signed int       S32;
+    typedef unsigned long long U64;
+#  endif
 #endif
 
 
@@ -174,7 +178,7 @@ static U64 XXH_read64(const void* memPtr)
 #endif // XXH_FORCE_DIRECT_MEMORY_ACCESS
 
 
-/******************************************
+/* ****************************************
 *  Compiler-specific Functions and Macros
 ******************************************/
 #define GCC_VERSION (__GNUC__ * 100 + __GNUC_MINOR__)
@@ -216,19 +220,19 @@ static U64 XXH_swap64 (U64 x)
 #endif
 
 
-/***************************************
+/* *************************************
 *  Architecture Macros
 ***************************************/
 typedef enum { XXH_bigEndian=0, XXH_littleEndian=1 } XXH_endianess;
 
-/* XXH_CPU_LITTLE_ENDIAN can be defined externally, for example one the compiler command line */
+/* XXH_CPU_LITTLE_ENDIAN can be defined externally, for example on the compiler command line */
 #ifndef XXH_CPU_LITTLE_ENDIAN
-    static const int one = 1;
-#   define XXH_CPU_LITTLE_ENDIAN   (*(const char*)(&one))
+    static const int g_one = 1;
+#   define XXH_CPU_LITTLE_ENDIAN   (*(const char*)(&g_one))
 #endif
 
 
-/*****************************
+/* ***************************
 *  Memory reads
 *****************************/
 typedef enum { XXH_aligned, XXH_unaligned } XXH_alignment;
@@ -260,13 +264,13 @@ FORCE_INLINE U64 XXH_readLE64(const void* ptr, XXH_endianess endian)
 }
 
 
-/***************************************
+/* *************************************
 *  Macros
 ***************************************/
-#define XXH_STATIC_ASSERT(c)   { enum { XXH_static_assert = 1/(!!(c)) }; }    /* use only *after* variable declarations */
+#define XXH_STATIC_ASSERT(c)   { enum { XXH_static_assert = 1/(int)(!!(c)) }; }    /* use only *after* variable declarations */
 
 
-/***************************************
+/* *************************************
 *  Constants
 ***************************************/
 #define PRIME32_1   2654435761U
@@ -281,8 +285,10 @@ FORCE_INLINE U64 XXH_readLE64(const void* ptr, XXH_endianess endian)
 #define PRIME64_4  9650029242287828579ULL
 #define PRIME64_5  2870177450012600261ULL
 
+XXH_PUBLIC_API unsigned XXH_versionNumber (void) { return XXH_VERSION_NUMBER; }
 
-/*****************************
+
+/* ***************************
 *  Simple Hash Functions
 *****************************/
 FORCE_INLINE U32 XXH32_endian_align(const void* input, size_t len, U32 seed, XXH_endianess endian, XXH_alignment align)
@@ -362,7 +368,7 @@ FORCE_INLINE U32 XXH32_endian_align(const void* input, size_t len, U32 seed, XXH
 }
 
 
-unsigned int XXH32 (const void* input, size_t len, unsigned int seed)
+XXH_PUBLIC_API unsigned int XXH32 (const void* input, size_t len, unsigned int seed)
 {
 #if 0
     /* Simple version, good for code maintenance, but unfortunately slow for small inputs */
@@ -502,7 +508,7 @@ FORCE_INLINE U64 XXH64_endian_align(const void* input, size_t len, U64 seed, XXH
 }
 
 
-unsigned long long XXH64 (const void* input, size_t len, unsigned long long seed)
+XXH_PUBLIC_API unsigned long long XXH64 (const void* input, size_t len, unsigned long long seed)
 {
 #if 0
     /* Simple version, good for code maintenance, but unfortunately slow for small inputs */
@@ -530,12 +536,12 @@ unsigned long long XXH64 (const void* input, size_t len, unsigned long long seed
 #endif
 }
 
-/****************************************************
+/* **************************************************
 *  Advanced Hash Functions
 ****************************************************/
 
 /*** Allocation ***/
-typedef struct
+struct XXH32_state_s
 {
     U64 total_len;
     U32 seed;
@@ -545,9 +551,9 @@ typedef struct
     U32 v4;
     U32 mem32[4];   /* defined as U32 for alignment */
     U32 memsize;
-} XXH_istate32_t;
+};   /* typedef'd to XXH32_state_t within xxhash.h */
 
-typedef struct
+struct XXH64_state_s
 {
     U64 total_len;
     U64 seed;
@@ -557,26 +563,26 @@ typedef struct
     U64 v4;
     U64 mem64[4];   /* defined as U64 for alignment */
     U32 memsize;
-} XXH_istate64_t;
+};   /* typedef'd to XXH64_state_t within xxhash.h */
 
 
-XXH32_state_t* XXH32_createState(void)
+XXH_PUBLIC_API XXH32_state_t* XXH32_createState(void)
 {
-    XXH_STATIC_ASSERT(sizeof(XXH32_state_t) >= sizeof(XXH_istate32_t));   /* A compilation error here means XXH32_state_t is not large enough */
+    XXH_STATIC_ASSERT(sizeof(XXH32_stateBody_t) >= sizeof(XXH32_state_t));   /* A compilation error here means XXH32_state_t is not large enough */
     return (XXH32_state_t*)XXH_malloc(sizeof(XXH32_state_t));
 }
-XXH_errorcode XXH32_freeState(XXH32_state_t* statePtr)
+XXH_PUBLIC_API XXH_errorcode XXH32_freeState(XXH32_state_t* statePtr)
 {
     XXH_free(statePtr);
     return XXH_OK;
 }
 
-XXH64_state_t* XXH64_createState(void)
+XXH_PUBLIC_API XXH64_state_t* XXH64_createState(void)
 {
-    XXH_STATIC_ASSERT(sizeof(XXH64_state_t) >= sizeof(XXH_istate64_t));   /* A compilation error here means XXH64_state_t is not large enough */
+    XXH_STATIC_ASSERT(sizeof(XXH64_stateBody_t) >= sizeof(XXH64_state_t));   /* A compilation error here means XXH64_state_t is not large enough */
     return (XXH64_state_t*)XXH_malloc(sizeof(XXH64_state_t));
 }
-XXH_errorcode XXH64_freeState(XXH64_state_t* statePtr)
+XXH_PUBLIC_API XXH_errorcode XXH64_freeState(XXH64_state_t* statePtr)
 {
     XXH_free(statePtr);
     return XXH_OK;
@@ -585,36 +591,36 @@ XXH_errorcode XXH64_freeState(XXH64_state_t* statePtr)
 
 /*** Hash feed ***/
 
-XXH_errorcode XXH32_reset(XXH32_state_t* state_in, unsigned int seed)
+XXH_PUBLIC_API XXH_errorcode XXH32_reset(XXH32_state_t* statePtr, unsigned int seed)
 {
-    XXH_istate32_t* state = (XXH_istate32_t*) state_in;
-    state->seed = seed;
-    state->v1 = seed + PRIME32_1 + PRIME32_2;
-    state->v2 = seed + PRIME32_2;
-    state->v3 = seed + 0;
-    state->v4 = seed - PRIME32_1;
-    state->total_len = 0;
-    state->memsize = 0;
+    XXH32_state_t state;   /* using a local state to memcpy() in order to avoid strict-aliasing warnings */
+    memset(&state, 0, sizeof(state));
+    state.seed = seed;
+    state.v1 = seed + PRIME32_1 + PRIME32_2;
+    state.v2 = seed + PRIME32_2;
+    state.v3 = seed + 0;
+    state.v4 = seed - PRIME32_1;
+    memcpy(statePtr, &state, sizeof(state));
     return XXH_OK;
 }
 
-XXH_errorcode XXH64_reset(XXH64_state_t* state_in, unsigned long long seed)
+
+XXH_PUBLIC_API XXH_errorcode XXH64_reset(XXH64_state_t* statePtr, unsigned long long seed)
 {
-    XXH_istate64_t* state = (XXH_istate64_t*) state_in;
-    state->seed = seed;
-    state->v1 = seed + PRIME64_1 + PRIME64_2;
-    state->v2 = seed + PRIME64_2;
-    state->v3 = seed + 0;
-    state->v4 = seed - PRIME64_1;
-    state->total_len = 0;
-    state->memsize = 0;
+    XXH64_state_t state;   /* using a local state to memcpy() in order to avoid strict-aliasing warnings */
+    memset(&state, 0, sizeof(state));
+    state.seed = seed;
+    state.v1 = seed + PRIME64_1 + PRIME64_2;
+    state.v2 = seed + PRIME64_2;
+    state.v3 = seed + 0;
+    state.v4 = seed - PRIME64_1;
+    memcpy(statePtr, &state, sizeof(state));
     return XXH_OK;
 }
 
 
-FORCE_INLINE XXH_errorcode XXH32_update_endian (XXH32_state_t* state_in, const void* input, size_t len, XXH_endianess endian)
+FORCE_INLINE XXH_errorcode XXH32_update_endian (XXH32_state_t* state, const void* input, size_t len, XXH_endianess endian)
 {
-    XXH_istate32_t* state = (XXH_istate32_t *) state_in;
     const BYTE* p = (const BYTE*)input;
     const BYTE* const bEnd = p + len;
 
@@ -701,7 +707,7 @@ FORCE_INLINE XXH_errorcode XXH32_update_endian (XXH32_state_t* state_in, const v
     return XXH_OK;
 }
 
-XXH_errorcode XXH32_update (XXH32_state_t* state_in, const void* input, size_t len)
+XXH_PUBLIC_API XXH_errorcode XXH32_update (XXH32_state_t* state_in, const void* input, size_t len)
 {
     XXH_endianess endian_detected = (XXH_endianess)XXH_CPU_LITTLE_ENDIAN;
 
@@ -713,9 +719,8 @@ XXH_errorcode XXH32_update (XXH32_state_t* state_in, const void* input, size_t l
 
 
 
-FORCE_INLINE U32 XXH32_digest_endian (const XXH32_state_t* state_in, XXH_endianess endian)
+FORCE_INLINE U32 XXH32_digest_endian (const XXH32_state_t* state, XXH_endianess endian)
 {
-    const XXH_istate32_t* state = (const XXH_istate32_t*) state_in;
     const BYTE * p = (const BYTE*)state->mem32;
     const BYTE* bEnd = (const BYTE*)(state->mem32) + state->memsize;
     U32 h32;
@@ -755,7 +760,7 @@ FORCE_INLINE U32 XXH32_digest_endian (const XXH32_state_t* state_in, XXH_endiane
 }
 
 
-unsigned int XXH32_digest (const XXH32_state_t* state_in)
+XXH_PUBLIC_API unsigned int XXH32_digest (const XXH32_state_t* state_in)
 {
     XXH_endianess endian_detected = (XXH_endianess)XXH_CPU_LITTLE_ENDIAN;
 
@@ -766,9 +771,8 @@ unsigned int XXH32_digest (const XXH32_state_t* state_in)
 }
 
 
-FORCE_INLINE XXH_errorcode XXH64_update_endian (XXH64_state_t* state_in, const void* input, size_t len, XXH_endianess endian)
+FORCE_INLINE XXH_errorcode XXH64_update_endian (XXH64_state_t* state, const void* input, size_t len, XXH_endianess endian)
 {
-    XXH_istate64_t * state = (XXH_istate64_t *) state_in;
     const BYTE* p = (const BYTE*)input;
     const BYTE* const bEnd = p + len;
 
@@ -855,7 +859,7 @@ FORCE_INLINE XXH_errorcode XXH64_update_endian (XXH64_state_t* state_in, const v
     return XXH_OK;
 }
 
-XXH_errorcode XXH64_update (XXH64_state_t* state_in, const void* input, size_t len)
+XXH_PUBLIC_API XXH_errorcode XXH64_update (XXH64_state_t* state_in, const void* input, size_t len)
 {
     XXH_endianess endian_detected = (XXH_endianess)XXH_CPU_LITTLE_ENDIAN;
 
@@ -867,9 +871,8 @@ XXH_errorcode XXH64_update (XXH64_state_t* state_in, const void* input, size_t l
 
 
 
-FORCE_INLINE U64 XXH64_digest_endian (const XXH64_state_t* state_in, XXH_endianess endian)
+FORCE_INLINE U64 XXH64_digest_endian (const XXH64_state_t* state, XXH_endianess endian)
 {
-    const XXH_istate64_t * state = (const XXH_istate64_t *) state_in;
     const BYTE * p = (const BYTE*)state->mem64;
     const BYTE* bEnd = (const BYTE*)state->mem64 + state->memsize;
     U64 h64;
@@ -949,7 +952,7 @@ FORCE_INLINE U64 XXH64_digest_endian (const XXH64_state_t* state_in, XXH_endiane
 }
 
 
-unsigned long long XXH64_digest (const XXH64_state_t* state_in)
+XXH_PUBLIC_API unsigned long long XXH64_digest (const XXH64_state_t* state_in)
 {
     XXH_endianess endian_detected = (XXH_endianess)XXH_CPU_LITTLE_ENDIAN;
 
diff --git a/programs/xxhash.h b/programs/xxhash.h
index c60aa61..4b1e1dc 100644
--- a/programs/xxhash.h
+++ b/programs/xxhash.h
@@ -1,7 +1,7 @@
 /*
    xxHash - Extremely Fast Hash algorithm
    Header File
-   Copyright (C) 2012-2015, Yann Collet.
+   Copyright (C) 2012-2016, Yann Collet.
 
    BSD 2-Clause License (http://www.opensource.org/licenses/bsd-license.php)
 
@@ -71,34 +71,56 @@ extern "C" {
 #endif
 
 
-/*****************************
+/* ****************************
 *  Definitions
-*****************************/
+******************************/
 #include <stddef.h>   /* size_t */
 typedef enum { XXH_OK=0, XXH_ERROR } XXH_errorcode;
 
 
-/*****************************
-*  Namespace Emulation
-*****************************/
-/* Motivations :
+/* ****************************
+*  API modifier
+******************************/
+/*!XXH_PRIVATE_API
+*  Transforms all publics symbols within `xxhash.c` into private ones.
+*  Methodology :
+*  instead of : #include "xxhash.h"
+*  do :
+*     #define XXH_PRIVATE_API
+*     #include "xxhash.c"   // note the .c , instead of .h
+*  also : don't compile and link xxhash.c separately
+*/
+#ifdef XXH_PRIVATE_API
+#  if defined(__GNUC__)
+#    define XXH_PUBLIC_API static __attribute__((unused))
+#  elif defined (__cplusplus) || (defined (__STDC_VERSION__) && (__STDC_VERSION__ >= 199901L) /* C99 */)
+#    define XXH_PUBLIC_API static inline
+#  elif defined(_MSC_VER)
+#    define XXH_PUBLIC_API static __inline
+#  else
+#    define XXH_PUBLIC_API static   /* this version may generate warnings for unused static functions; disable the relevant warning */
+#  endif
+#else
+#  define XXH_PUBLIC_API   /* do nothing */
+#endif
 
-If you need to include xxHash into your library,
-but wish to avoid xxHash symbols to be present on your library interface
-in an effort to avoid potential name collision if another library also includes xxHash,
+/*!XXH_NAMESPACE, aka Namespace Emulation :
 
-you can use XXH_NAMESPACE, which will automatically prefix any symbol from xxHash
-with the value of XXH_NAMESPACE (so avoid to keep it NULL, and avoid numeric values).
+If you want to include _and expose_ xxHash functions from within your own library,
+but also want to avoid symbol collisions with another library which also includes xxHash,
 
-Note that no change is required within the calling program :
-it can still call xxHash functions using their regular name.
-They will be automatically translated by this header.
+you can use XXH_NAMESPACE, to automatically prefix any public symbol from `xxhash.c`
+with the value of XXH_NAMESPACE (so avoid to keep it NULL and avoid numeric values).
+
+Note that no change is required within the calling program as long as it also includes `xxhash.h` :
+regular symbol name will be automatically translated by this header.
 */
 #ifdef XXH_NAMESPACE
 #  define XXH_CAT(A,B) A##B
 #  define XXH_NAME2(A,B) XXH_CAT(A,B)
 #  define XXH32 XXH_NAME2(XXH_NAMESPACE, XXH32)
 #  define XXH64 XXH_NAME2(XXH_NAMESPACE, XXH64)
+#  define XXH_versionNumber XXH_NAME2(XXH_NAMESPACE, XXH_versionNumber)
 #  define XXH32_createState XXH_NAME2(XXH_NAMESPACE, XXH32_createState)
 #  define XXH64_createState XXH_NAME2(XXH_NAMESPACE, XXH64_createState)
 #  define XXH32_freeState XXH_NAME2(XXH_NAMESPACE, XXH32_freeState)
@@ -112,78 +134,90 @@ They will be automatically translated by this header.
 #endif
 
 
-/*****************************
+/* *************************************
+*  Version
+***************************************/
+#define XXH_VERSION_MAJOR    0
+#define XXH_VERSION_MINOR    5
+#define XXH_VERSION_RELEASE  0
+#define XXH_VERSION_NUMBER  (XXH_VERSION_MAJOR *100*100 + XXH_VERSION_MINOR *100 + XXH_VERSION_RELEASE)
+XXH_PUBLIC_API unsigned XXH_versionNumber (void);
+
+
+/* ****************************
 *  Simple Hash Functions
-*****************************/
+******************************/
 
-unsigned int       XXH32 (const void* input, size_t length, unsigned seed);
-unsigned long long XXH64 (const void* input, size_t length, unsigned long long seed);
+XXH_PUBLIC_API unsigned int       XXH32 (const void* input, size_t length, unsigned int seed);
+XXH_PUBLIC_API unsigned long long XXH64 (const void* input, size_t length, unsigned long long seed);
 
-/*
+/*!
 XXH32() :
     Calculate the 32-bits hash of sequence "length" bytes stored at memory address "input".
     The memory between input & input+length must be valid (allocated and read-accessible).
     "seed" can be used to alter the result predictably.
-    This function successfully passes all SMHasher tests.
     Speed on Core 2 Duo @ 3 GHz (single thread, SMHasher benchmark) : 5.4 GB/s
 XXH64() :
     Calculate the 64-bits hash of sequence of length "len" stored at memory address "input".
-    Faster on 64-bits systems. Slower on 32-bits systems.
+    "seed" can be used to alter the result predictably.
+    This function runs faster on 64-bits systems, but slower on 32-bits systems (see benchmark).
 */
 
 
-
-/*****************************
+/* ****************************
 *  Advanced Hash Functions
-*****************************/
-typedef struct { long long ll[ 6]; } XXH32_state_t;
-typedef struct { long long ll[11]; } XXH64_state_t;
+******************************/
+typedef struct XXH32_state_s XXH32_state_t;   /* incomplete */
+typedef struct XXH64_state_s XXH64_state_t;   /* incomplete */
 
-/*
-These structures allow static allocation of XXH states.
-States must then be initialized using XXHnn_reset() before first use.
 
-If you prefer dynamic allocation, please refer to functions below.
-*/
+/*!Static allocation
+   For static linking only, do not use in the context of DLL ! */
+typedef struct { long long ll[ 6]; } XXH32_stateBody_t;
+typedef struct { long long ll[11]; } XXH64_stateBody_t;
 
-XXH32_state_t* XXH32_createState(void);
-XXH_errorcode  XXH32_freeState(XXH32_state_t* statePtr);
+#define XXH32_CREATESTATE_STATIC(name) XXH32_stateBody_t name##xxhbody; void* name##xxhvoid = &(name##xxhbody); XXH32_state_t* name = (XXH32_state_t*)(name##xxhvoid)   /* no final ; */
+#define XXH64_CREATESTATE_STATIC(name) XXH64_stateBody_t name##xxhbody; void* name##xxhvoid = &(name##xxhbody); XXH64_state_t* name = (XXH64_state_t*)(name##xxhvoid)   /* no final ; */
 
-XXH64_state_t* XXH64_createState(void);
-XXH_errorcode  XXH64_freeState(XXH64_state_t* statePtr);
 
-/*
-These functions create and release memory for XXH state.
-States must then be initialized using XXHnn_reset() before first use.
-*/
+/*!Dynamic allocation
+   To be preferred in the context of DLL */
 
+XXH_PUBLIC_API XXH32_state_t* XXH32_createState(void);
+XXH_PUBLIC_API XXH_errorcode  XXH32_freeState(XXH32_state_t* statePtr);
 
-XXH_errorcode XXH32_reset  (XXH32_state_t* statePtr, unsigned seed);
-XXH_errorcode XXH32_update (XXH32_state_t* statePtr, const void* input, size_t length);
-unsigned int  XXH32_digest (const XXH32_state_t* statePtr);
+XXH_PUBLIC_API XXH64_state_t* XXH64_createState(void);
+XXH_PUBLIC_API XXH_errorcode  XXH64_freeState(XXH64_state_t* statePtr);
 
-XXH_errorcode      XXH64_reset  (XXH64_state_t* statePtr, unsigned long long seed);
-XXH_errorcode      XXH64_update (XXH64_state_t* statePtr, const void* input, size_t length);
-unsigned long long XXH64_digest (const XXH64_state_t* statePtr);
 
-/*
-These functions calculate the xxHash of an input provided in multiple smaller packets,
-as opposed to an input provided as a single block.
+/* hash streaming */
+
+XXH_PUBLIC_API XXH_errorcode XXH32_reset  (XXH32_state_t* statePtr, unsigned int seed);
+XXH_PUBLIC_API XXH_errorcode XXH32_update (XXH32_state_t* statePtr, const void* input, size_t length);
+XXH_PUBLIC_API unsigned int  XXH32_digest (const XXH32_state_t* statePtr);
+
+XXH_PUBLIC_API XXH_errorcode      XXH64_reset  (XXH64_state_t* statePtr, unsigned long long seed);
+XXH_PUBLIC_API XXH_errorcode      XXH64_update (XXH64_state_t* statePtr, const void* input, size_t length);
+XXH_PUBLIC_API unsigned long long XXH64_digest (const XXH64_state_t* statePtr);
+
+/*!
+These functions generate the xxHash of an input provided in multiple segments,
+as opposed to provided as a single block.
 
-XXH state space must first be allocated, using either static or dynamic method provided above.
+XXH state must first be allocated, using either static or dynamic method provided above.
 
 Start a new hash by initializing state with a seed, using XXHnn_reset().
 
 Then, feed the hash state by calling XXHnn_update() as many times as necessary.
-Obviously, input must be valid, meaning allocated and read accessible.
+Obviously, input must be valid, hence allocated and read accessible.
 The function returns an error code, with 0 meaning OK, and any other value meaning there is an error.
 
-Finally, you can produce a hash anytime, by using XXHnn_digest().
-This function returns the final nn-bits hash.
-You can nonetheless continue feeding the hash state with more input,
-and therefore get some new hashes, by calling again XXHnn_digest().
+Finally, a hash value can be produced anytime, by using XXHnn_digest().
+This function returns the nn-bits hash.
+It's nonetheless possible to continue inserting input into the hash state
+and later on generate some new hashes, by calling again XXHnn_digest().
 
-When you are done, don't forget to free XXH state space, using typically XXHnn_freeState().
+When done, free XXH state space if it was allocated dynamically.
 */
 
 
diff --git a/programs/zbufftest.c b/programs/zbufftest.c
index ab8aa34..f9677f0 100644
--- a/programs/zbufftest.c
+++ b/programs/zbufftest.c
@@ -286,7 +286,7 @@ int fuzzerTests(U32 seed, U32 nbTests, unsigned startTest, double compressibilit
         size_t maxTestSize, totalTestSize, readSize, totalCSize, genSize, totalGenSize;
         size_t errorCode;
         U32 sampleSizeLog, buffNb, n, nbChunks;
-        XXH64_state_t crc64;
+        XXH64_CREATESTATE_STATIC(xxh64);
         U64 crcOrig, crcDest;
 
         /* init */
@@ -313,7 +313,7 @@ int fuzzerTests(U32 seed, U32 nbTests, unsigned startTest, double compressibilit
         srcBuffer = cNoiseBuffer[buffNb];
 
         /* Multi - segments compression test */
-        XXH64_reset(&crc64, 0);
+        XXH64_reset(xxh64, 0);
         nbChunks = (FUZ_rand(&lseed) & 127) + 2;
         sampleSizeLog = FUZ_rand(&lseed) % maxSrcLog;
         maxTestSize = (size_t)1 << sampleSizeLog;
@@ -347,7 +347,7 @@ int fuzzerTests(U32 seed, U32 nbTests, unsigned startTest, double compressibilit
             errorCode = ZBUFF_compressContinue(zc, cBuffer+cSize, &genSize, srcBuffer+sampleStart, &readSize);
             CHECK (ZBUFF_isError(errorCode), "compression error : %s", ZBUFF_getErrorName(errorCode));
 
-            XXH64_update(&crc64, srcBuffer+sampleStart, readSize);
+            XXH64_update(xxh64, srcBuffer+sampleStart, readSize);
             memcpy(copyBuffer+totalTestSize, srcBuffer+sampleStart, readSize);
             cSize += genSize;
             totalTestSize += readSize;
@@ -371,7 +371,7 @@ int fuzzerTests(U32 seed, U32 nbTests, unsigned startTest, double compressibilit
         CHECK (ZBUFF_isError(errorCode), "compression error : %s", ZBUFF_getErrorName(errorCode));
         CHECK (errorCode != 0, "frame epilogue not fully consumed");
         cSize += genSize;
-        crcOrig = XXH64_digest(&crc64);
+        crcOrig = XXH64_digest(xxh64);
 
         /* multi - fragments decompression test */
         ZBUFF_decompressInit(zd);
diff --git a/visual/2013/zstdlib/zstdlib.rc b/visual/2013/zstdlib/zstdlib.rc
index d5b107e..93e221d 100644
Binary files a/visual/2013/zstdlib/zstdlib.rc and b/visual/2013/zstdlib/zstdlib.rc differ
diff --git a/visual/2013/zstdlib/zstdlib.vcxproj b/visual/2013/zstdlib/zstdlib.vcxproj
index 1245955..a580048 100644
--- a/visual/2013/zstdlib/zstdlib.vcxproj
+++ b/visual/2013/zstdlib/zstdlib.vcxproj
@@ -27,7 +27,6 @@
   </ItemGroup>
   <ItemGroup>
     <ClInclude Include="..\..\..\lib\bitstream.h" />
-    <ClInclude Include="..\..\..\lib\error.h" />
     <ClInclude Include="..\..\..\lib\fse.h" />
     <ClInclude Include="..\..\..\lib\fse_static.h" />
     <ClInclude Include="..\..\..\lib\huff0.h" />
diff --git a/visual/2013/zstdlib/zstdlib.vcxproj.filters b/visual/2013/zstdlib/zstdlib.vcxproj.filters
index 9e930d7..8600c5f 100644
--- a/visual/2013/zstdlib/zstdlib.vcxproj.filters
+++ b/visual/2013/zstdlib/zstdlib.vcxproj.filters
@@ -1,80 +1,77 @@
-<?xml version="1.0" encoding="utf-8"?>
-<Project ToolsVersion="4.0" xmlns="http://schemas.microsoft.com/developer/msbuild/2003">
-  <ItemGroup>
-    <Filter Include="Source Files">
-      <UniqueIdentifier>{4FC737F1-C7A5-4376-A066-2A32D752A2FF}</UniqueIdentifier>
-      <Extensions>cpp;c;cc;cxx;def;odl;idl;hpj;bat;asm;asmx</Extensions>
-    </Filter>
-    <Filter Include="Header Files">
-      <UniqueIdentifier>{93995380-89BD-4b04-88EB-625FBE52EBFB}</UniqueIdentifier>
-      <Extensions>h;hh;hpp;hxx;hm;inl;inc;xsd</Extensions>
-    </Filter>
-    <Filter Include="Resource Files">
-      <UniqueIdentifier>{67DA6AB6-F800-4c08-8B7A-83BB121AAD01}</UniqueIdentifier>
-      <Extensions>rc;ico;cur;bmp;dlg;rc2;rct;bin;rgs;gif;jpg;jpeg;jpe;resx;tiff;tif;png;wav;mfcribbon-ms</Extensions>
-    </Filter>
-  </ItemGroup>
-  <ItemGroup>
-    <ClCompile Include="..\..\..\lib\fse.c">
-      <Filter>Source Files</Filter>
-    </ClCompile>
-    <ClCompile Include="..\..\..\lib\huff0.c">
-      <Filter>Source Files</Filter>
-    </ClCompile>
-    <ClCompile Include="..\..\..\lib\zstd_buffered.c">
-      <Filter>Source Files</Filter>
-    </ClCompile>
-    <ClCompile Include="..\..\..\lib\zstd_compress.c">
-      <Filter>Source Files</Filter>
-    </ClCompile>
-    <ClCompile Include="..\..\..\lib\zstd_decompress.c">
-      <Filter>Source Files</Filter>
-    </ClCompile>
-  </ItemGroup>
-  <ItemGroup>
-    <ClInclude Include="..\..\..\lib\fse.h">
-      <Filter>Header Files</Filter>
-    </ClInclude>
-    <ClInclude Include="..\..\..\lib\fse_static.h">
-      <Filter>Header Files</Filter>
-    </ClInclude>
-    <ClInclude Include="..\..\..\lib\zstd.h">
-      <Filter>Header Files</Filter>
-    </ClInclude>
-    <ClInclude Include="..\..\..\lib\zstd_static.h">
-      <Filter>Header Files</Filter>
-    </ClInclude>
-    <ClInclude Include="..\..\..\lib\huff0.h">
-      <Filter>Header Files</Filter>
-    </ClInclude>
-    <ClInclude Include="..\..\..\lib\huff0_static.h">
-      <Filter>Header Files</Filter>
-    </ClInclude>
-    <ClInclude Include="resource.h">
-      <Filter>Header Files</Filter>
-    </ClInclude>
-    <ClInclude Include="..\..\..\lib\error.h">
-      <Filter>Header Files</Filter>
-    </ClInclude>
-    <ClInclude Include="..\..\..\lib\bitstream.h">
-      <Filter>Header Files</Filter>
-    </ClInclude>
-    <ClInclude Include="..\..\..\lib\zstd_internal.h">
-      <Filter>Header Files</Filter>
-    </ClInclude>
-    <ClInclude Include="..\..\..\lib\zstd_buffered.h">
-      <Filter>Header Files</Filter>
-    </ClInclude>
-    <ClInclude Include="..\..\..\lib\zstd_buffered_static.h">
-      <Filter>Header Files</Filter>
-    </ClInclude>
-    <ClInclude Include="..\..\..\lib\mem.h">
-      <Filter>Header Files</Filter>
-    </ClInclude>
-  </ItemGroup>
-  <ItemGroup>
-    <ResourceCompile Include="zstdlib.rc">
-      <Filter>Resource Files</Filter>
-    </ResourceCompile>
-  </ItemGroup>
+<?xml version="1.0" encoding="utf-8"?>
+<Project ToolsVersion="4.0" xmlns="http://schemas.microsoft.com/developer/msbuild/2003">
+  <ItemGroup>
+    <Filter Include="Source Files">
+      <UniqueIdentifier>{4FC737F1-C7A5-4376-A066-2A32D752A2FF}</UniqueIdentifier>
+      <Extensions>cpp;c;cc;cxx;def;odl;idl;hpj;bat;asm;asmx</Extensions>
+    </Filter>
+    <Filter Include="Header Files">
+      <UniqueIdentifier>{93995380-89BD-4b04-88EB-625FBE52EBFB}</UniqueIdentifier>
+      <Extensions>h;hh;hpp;hxx;hm;inl;inc;xsd</Extensions>
+    </Filter>
+    <Filter Include="Resource Files">
+      <UniqueIdentifier>{67DA6AB6-F800-4c08-8B7A-83BB121AAD01}</UniqueIdentifier>
+      <Extensions>rc;ico;cur;bmp;dlg;rc2;rct;bin;rgs;gif;jpg;jpeg;jpe;resx;tiff;tif;png;wav;mfcribbon-ms</Extensions>
+    </Filter>
+  </ItemGroup>
+  <ItemGroup>
+    <ClCompile Include="..\..\..\lib\fse.c">
+      <Filter>Source Files</Filter>
+    </ClCompile>
+    <ClCompile Include="..\..\..\lib\huff0.c">
+      <Filter>Source Files</Filter>
+    </ClCompile>
+    <ClCompile Include="..\..\..\lib\zstd_buffered.c">
+      <Filter>Source Files</Filter>
+    </ClCompile>
+    <ClCompile Include="..\..\..\lib\zstd_compress.c">
+      <Filter>Source Files</Filter>
+    </ClCompile>
+    <ClCompile Include="..\..\..\lib\zstd_decompress.c">
+      <Filter>Source Files</Filter>
+    </ClCompile>
+  </ItemGroup>
+  <ItemGroup>
+    <ClInclude Include="..\..\..\lib\fse.h">
+      <Filter>Header Files</Filter>
+    </ClInclude>
+    <ClInclude Include="..\..\..\lib\fse_static.h">
+      <Filter>Header Files</Filter>
+    </ClInclude>
+    <ClInclude Include="..\..\..\lib\zstd.h">
+      <Filter>Header Files</Filter>
+    </ClInclude>
+    <ClInclude Include="..\..\..\lib\zstd_static.h">
+      <Filter>Header Files</Filter>
+    </ClInclude>
+    <ClInclude Include="..\..\..\lib\huff0.h">
+      <Filter>Header Files</Filter>
+    </ClInclude>
+    <ClInclude Include="..\..\..\lib\huff0_static.h">
+      <Filter>Header Files</Filter>
+    </ClInclude>
+    <ClInclude Include="resource.h">
+      <Filter>Header Files</Filter>
+    </ClInclude>
+    <ClInclude Include="..\..\..\lib\bitstream.h">
+      <Filter>Header Files</Filter>
+    </ClInclude>
+    <ClInclude Include="..\..\..\lib\zstd_internal.h">
+      <Filter>Header Files</Filter>
+    </ClInclude>
+    <ClInclude Include="..\..\..\lib\zstd_buffered.h">
+      <Filter>Header Files</Filter>
+    </ClInclude>
+    <ClInclude Include="..\..\..\lib\zstd_buffered_static.h">
+      <Filter>Header Files</Filter>
+    </ClInclude>
+    <ClInclude Include="..\..\..\lib\mem.h">
+      <Filter>Header Files</Filter>
+    </ClInclude>
+  </ItemGroup>
+  <ItemGroup>
+    <ResourceCompile Include="zstdlib.rc">
+      <Filter>Resource Files</Filter>
+    </ResourceCompile>
+  </ItemGroup>
 </Project>
\ No newline at end of file

-- 
Alioth's /usr/local/bin/git-commit-notice on /srv/git.debian.org/git/debian-med/libzstd.git



More information about the debian-med-commit mailing list