[med-svn] [libzstd] 01/14: Imported Upstream version 0.4.0

Kevin Murray daube-guest at moszumanska.debian.org
Thu Dec 17 06:13:30 UTC 2015


This is an automated email from the git hooks/post-receive script.

daube-guest pushed a commit to branch master
in repository libzstd.

commit 11b400c275f86330c34e391cdf39f70d9b03d5fa
Author: Kevin Murray <spam at kdmurray.id.au>
Date:   Tue Dec 1 13:54:16 2015 +1100

    Imported Upstream version 0.4.0
---
 .gitattributes                                  |   15 +
 .gitignore                                      |   22 +
 .travis.yml                                     |   24 +
 Makefile                                        |  102 +
 NEWS                                            |   51 +
 README.md                                       |   44 +
 images/CSpeed.png                               |  Bin 0 -> 37278 bytes
 images/DSpeed.png                               |  Bin 0 -> 9676 bytes
 lib/LICENSE                                     |   26 +
 lib/Makefile                                    |  120 +
 lib/bitstream.h                                 |  384 +++
 lib/error.h                                     |   99 +
 lib/fse.c                                       | 1229 ++++++++
 lib/fse.h                                       |  295 ++
 lib/fse_static.h                                |  310 ++
 lib/huff0.c                                     | 1718 +++++++++++
 lib/huff0.h                                     |   96 +
 lib/huff0_static.h                              |   79 +
 lib/legacy/zstd_legacy.h                        |   85 +
 lib/legacy/zstd_v01.c                           | 2171 +++++++++++++
 lib/legacy/zstd_v01.h                           |  100 +
 lib/legacy/zstd_v02.c                           | 3730 +++++++++++++++++++++++
 lib/legacy/zstd_v02.h                           |   99 +
 lib/legacy/zstd_v03.c                           | 3730 +++++++++++++++++++++++
 lib/legacy/zstd_v03.h                           |   99 +
 lib/libzstd.pc.in                               |   14 +
 lib/mem.h                                       |  289 ++
 lib/zstd.c                                      | 1462 +++++++++
 lib/zstd.h                                      |  111 +
 lib/zstd_buffered.c                             |  551 ++++
 lib/zstd_buffered.h                             |  147 +
 lib/zstd_buffered_static.h                      |   62 +
 lib/zstd_compress.c                             | 2116 +++++++++++++
 lib/zstd_decompress.c                           |  877 ++++++
 lib/zstd_internal.h                             |  122 +
 lib/zstd_static.h                               |  234 ++
 programs/.gitignore                             |   31 +
 programs/COPYING                                |  339 ++
 programs/Makefile                               |  268 ++
 programs/bench.c                                |  529 ++++
 programs/bench.h                                |   35 +
 programs/datagen.c                              |  234 ++
 programs/datagen.h                              |   40 +
 programs/datagencli.c                           |  190 ++
 programs/fileio.c                               |  419 +++
 programs/fileio.h                               |   69 +
 programs/fullbench.c                            |  590 ++++
 programs/fuzzer.c                               |  596 ++++
 programs/legacy/fileio_legacy.c                 |  345 +++
 programs/legacy/fileio_legacy.h                 |   46 +
 programs/paramgrill.c                           | 1178 +++++++
 programs/xxhash.c                               |  962 ++++++
 programs/xxhash.h                               |  192 ++
 programs/zbufftest.c                            |  600 ++++
 programs/zstd.1                                 |   78 +
 programs/zstdcli.c                              |  386 +++
 visual/2012/fullbench/fullbench.vcxproj         |  188 ++
 visual/2012/fullbench/fullbench.vcxproj.filters |   81 +
 visual/2012/fuzzer/fuzzer.vcxproj               |  191 ++
 visual/2012/fuzzer/fuzzer.vcxproj.filters       |   90 +
 visual/2012/zstd.sln                            |   46 +
 visual/2012/zstd/zstd.vcxproj                   |  198 ++
 visual/2012/zstd/zstd.vcxproj.filters           |  111 +
 63 files changed, 28645 insertions(+)

diff --git a/.gitattributes b/.gitattributes
new file mode 100644
index 0000000..b9d54fb
--- /dev/null
+++ b/.gitattributes
@@ -0,0 +1,15 @@
+# Set the default behavior
+* text eol=lf
+
+# Explicitly declare source files
+*.c text eol=lf
+*.h text eol=lf
+
+# Denote files that should not be modified.
+*.odt binary
+*.png binary
+# Visual Studio
+*.sln binary
+*.suo binary
+*.vcxproj* binary
+
diff --git a/.gitignore b/.gitignore
new file mode 100644
index 0000000..c4c9282
--- /dev/null
+++ b/.gitignore
@@ -0,0 +1,22 @@
+# Object files
+*.o
+*.ko
+
+# Libraries
+*.lib
+*.a
+
+# Shared objects (inc. Windows DLLs)
+*.dll
+*.so
+*.so.*
+*.dylib
+
+# Executables
+*.exe
+*.out
+*.app
+
+# Visual solution files
+*.suo
+*.user
diff --git a/.travis.yml b/.travis.yml
new file mode 100644
index 0000000..83a6f23
--- /dev/null
+++ b/.travis.yml
@@ -0,0 +1,24 @@
+language: c
+compiler: gcc
+script: make $ZSTD_TRAVIS_CI_ENV
+before_install:
+  - sudo apt-get update  -qq
+  - sudo apt-get install -qq gcc-arm-linux-gnueabi
+  - sudo apt-get install -qq clang
+  - sudo apt-get install -qq g++-multilib
+  - sudo apt-get install -qq gcc-multilib
+  - sudo apt-get install -qq valgrind
+
+env:
+  - ZSTD_TRAVIS_CI_ENV=travis-install
+  - ZSTD_TRAVIS_CI_ENV=clangtest  
+  - ZSTD_TRAVIS_CI_ENV=gpptest  
+  - ZSTD_TRAVIS_CI_ENV=armtest  
+  - ZSTD_TRAVIS_CI_ENV=test  
+  - ZSTD_TRAVIS_CI_ENV="-C programs test32"  
+  - ZSTD_TRAVIS_CI_ENV=usan
+  - ZSTD_TRAVIS_CI_ENV=asan
+  - ZSTD_TRAVIS_CI_ENV="-C programs valgrindTest"  
+
+matrix:
+  fast_finish: true
diff --git a/Makefile b/Makefile
new file mode 100644
index 0000000..4ec4a79
--- /dev/null
+++ b/Makefile
@@ -0,0 +1,102 @@
+# ################################################################
+# zstd - Makefile
+# Copyright (C) Yann Collet 2014-2015
+# All rights reserved.
+# 
+# BSD license
+#
+# Redistribution and use in source and binary forms, with or without modification,
+# are permitted provided that the following conditions are met:
+# 
+# * Redistributions of source code must retain the above copyright notice, this
+#   list of conditions and the following disclaimer.
+# 
+# * Redistributions in binary form must reproduce the above copyright notice, this
+#   list of conditions and the following disclaimer in the documentation and/or
+#   other materials provided with the distribution.
+# 
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
+# ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+# WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+# DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR
+# ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+# (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+# LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON
+# ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+# SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+# 
+# You can contact the author at :
+#  - zstd source repository : https://github.com/Cyan4973/zstd
+#  - Public forum : https://groups.google.com/forum/#!forum/lz4c
+# ################################################################
+
+# Version number
+export VERSION := 0.4.0
+
+PRGDIR  = programs
+ZSTDDIR = lib
+
+# Define nul output
+ifneq (,$(filter Windows%,$(OS)))
+VOID = nul
+else
+VOID = /dev/null
+endif
+
+.PHONY: default all zstdprogram clean install uninstall travis-install test clangtest gpptest armtest usan asan uasan
+
+default: zstdprogram
+
+all: 
+	$(MAKE) -C $(ZSTDDIR) $@
+	$(MAKE) -C $(PRGDIR) $@
+
+zstdprogram:
+	$(MAKE) -C $(PRGDIR)
+
+clean:
+	@$(MAKE) -C $(ZSTDDIR) $@ > $(VOID)
+	@$(MAKE) -C $(PRGDIR) $@ > $(VOID)
+	@echo Cleaning completed
+
+
+#------------------------------------------------------------------------
+#make install is validated only for Linux, OSX, kFreeBSD and Hurd targets
+ifneq (,$(filter $(shell uname),Linux Darwin GNU/kFreeBSD GNU))
+
+install:
+	$(MAKE) -C $(ZSTDDIR) $@
+	$(MAKE) -C $(PRGDIR) $@
+
+uninstall:
+	$(MAKE) -C $(ZSTDDIR) $@
+	$(MAKE) -C $(PRGDIR) $@
+
+travis-install:
+	sudo $(MAKE) install
+
+test:
+	$(MAKE) -C $(PRGDIR) $@
+
+clangtest: clean
+	clang -v
+	$(MAKE) all CC=clang MOREFLAGS="-Werror -Wconversion -Wno-sign-conversion"
+
+gpptest: clean
+	$(MAKE) all CC=g++ CFLAGS="-O3 -Wall -Wextra -Wundef -Wshadow -Wcast-align -Werror"
+
+armtest: clean
+	$(MAKE) -C $(ZSTDDIR) -e all CC=arm-linux-gnueabi-gcc MOREFLAGS="-Werror"
+	$(MAKE) -C $(PRGDIR) -e CC=arm-linux-gnueabi-gcc MOREFLAGS="-Werror"
+
+usan: clean
+	$(MAKE) test CC=clang MOREFLAGS="-g -fsanitize=undefined"
+
+asan: clean
+	$(MAKE) test CC=clang MOREFLAGS="-g -fsanitize=address"
+
+uasan: clean
+	$(MAKE) test CC=clang MOREFLAGS="-g -fsanitize=address -fsanitize=undefined"
+
+endif
diff --git a/NEWS b/NEWS
new file mode 100644
index 0000000..75c9b87
--- /dev/null
+++ b/NEWS
@@ -0,0 +1,51 @@
+v0.4.0
+Command line utility is now compatible with high compression levels 
+Removed zstdhc => merged into zstd
+Added : ZBUFF API (see zstd_buffered.h)
+Rolling buffer support
+
+v0.3.6
+small blocks params
+
+v0.3.5
+minor generic compression improvements
+
+v0.3.4
+Faster fast cLevels
+
+v0.3.3
+Small compression ratio improvement
+
+v0.3.2
+Fixed Visual Studio
+
+v0.3.1 :
+Small compression ratio improvement
+
+v0.3
+HC mode : compression levels 2-26
+
+v0.2.2
+Fix : Visual Studio 2013 & 2015 release compilation, by Christophe Chevalier
+
+v0.2.1
+Fix : Read errors, advanced fuzzer tests, by Hanno Böck 
+
+v0.2.0
+**Breaking format change**
+Faster decompression speed
+Can still decode v0.1 format
+
+v0.1.3
+fix uninitialization warning, reported by Evan Nemerson
+
+v0.1.2
+frame concatenation support
+
+v0.1.1
+fix compression bug
+detects write-flush errors
+
+v0.1.0
+first release
+
diff --git a/README.md b/README.md
new file mode 100644
index 0000000..4a339cc
--- /dev/null
+++ b/README.md
@@ -0,0 +1,44 @@
+ **Zstd**, short for Zstandard, is a fast lossless compression algorithm, targeting real-time compression scenarios at zlib-level compression ratio.
+
+It is provided as a BSD-license package, hosted on Github.
+
+|Branch      |Status   |
+|------------|---------|
+|master      | [![Build Status](https://travis-ci.org/Cyan4973/zstd.svg?branch=master)](https://travis-ci.org/Cyan4973/zstd) |
+|dev         | [![Build Status](https://travis-ci.org/Cyan4973/zstd.svg?branch=dev)](https://travis-ci.org/Cyan4973/zstd) |
+
+For a taste of its performance, here are a few benchmark numbers from a number of compression codecs suitable for real-time. The test was completed on a Core i7-5600U @ 2.6 GHz, using m^2's [fsbench 0.14.3](http://encode.ru/threads/1371-Filesystem-benchmark?p=34029&viewfull=1#post34029) compiled with gcc 4.8.4, on the [Silesia compression corpus](http://sun.aei.polsl.pl/~sdeor/index.php?page=silesia).
+
+|Name             | Ratio | C.speed | D.speed |
+|-----------------|-------|--------:|--------:|
+|                 |       |   MB/s  |  MB/s   |
+| **zstd 0.4**    |**2.872**|**280**| **670** |
+| [zlib] 1.2.8 -1 | 2.730 |    70   |   300   | 
+| QuickLZ 1.5.1b6 | 2.237 |   370   |   415   |
+| LZO 2.06        | 2.106 |   400   |   580   |
+| [LZ4] r131      | 2.101 |   450   |  2100   |
+| Snappy 1.1.0    | 2.091 |   330   |  1100   |
+| LZF 3.6         | 2.077 |   200   |   560   |
+
+[zlib]:http://www.zlib.net/
+[LZ4]:http://www.lz4.org/
+
+Zstd can also offer stronger compression ratio at the cost of compression speed. Speed / Ratio trade-off is configurable by small increment, to fit different situations. Note however that decompression speed is preserved and remain roughly the same at all settings, a property shared by most LZ compression algorithms, such as [zlib]. The following test is run on a Core i7-3930K CPU @ 4.5GHz, using [lzbench], an open-source in-memory benchmark by inikep compiled with gcc 5.2.1, on the [Sil [...]
+
+[lzbench]:https://github.com/inikep/lzbench
+
+Compression Speed vs Ratio | Decompression Speed
+---------------------------|--------------------
+![Compression Speed vs Ratio](images/CSpeed.png "Compression Speed vs Ratio") | ![Decompression Speed](images/DSpeed.png "Decompression Speed")
+
+
+Zstd entropy stage is provided by [Huff0 and FSE, from Finite State Entropy library](https://github.com/Cyan4973/FiniteStateEntropy).
+
+Its memory requirement can be configured to fit into low-memory hardware configurations, or servers handling multiple connections/contexts in parallel.
+
+Zstd has not yet reached "stable format" status. It doesn't guarantee yet that its current compressed format will remain stable and supported in future versions. During this period, it can still change to adapt new optimizations still being investigated. "Stable Format" is projected sometimes early 2016. 
+
+That being said, the library is now fairly robust, able to withstand hazards situations, including invalid inputs. The library reliability has been tested using [Fuzz Testing](https://en.wikipedia.org/wiki/Fuzz_testing), with both [internal tools](programs/fuzzer.c) and [external ones](http://lcamtuf.coredump.cx/afl). Therefore, it seems now safe to test Zstandard even within production environments.
+
+### Branch Policy
+The "dev" branch is the one where all contributions will be merged before reaching "master". If you plan to propose a patch, please commit into the "dev" branch or its own feature branch. Direct commit to "master" are not permitted.
diff --git a/images/CSpeed.png b/images/CSpeed.png
new file mode 100644
index 0000000..0289e2d
Binary files /dev/null and b/images/CSpeed.png differ
diff --git a/images/DSpeed.png b/images/DSpeed.png
new file mode 100644
index 0000000..4db6437
Binary files /dev/null and b/images/DSpeed.png differ
diff --git a/lib/LICENSE b/lib/LICENSE
new file mode 100644
index 0000000..3549585
--- /dev/null
+++ b/lib/LICENSE
@@ -0,0 +1,26 @@
+ZSTD Library
+Copyright (c) 2014-2015, Yann Collet
+All rights reserved.
+
+BSD License
+
+Redistribution and use in source and binary forms, with or without modification,
+are permitted provided that the following conditions are met:
+
+* Redistributions of source code must retain the above copyright notice, this
+  list of conditions and the following disclaimer.
+
+* Redistributions in binary form must reproduce the above copyright notice, this
+  list of conditions and the following disclaimer in the documentation and/or
+  other materials provided with the distribution.
+
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
+ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR
+ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON
+ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
diff --git a/lib/Makefile b/lib/Makefile
new file mode 100644
index 0000000..3e34d89
--- /dev/null
+++ b/lib/Makefile
@@ -0,0 +1,120 @@
+# ################################################################
+# ZSTD library - Makefile
+# Copyright (C) Yann Collet 2015
+# All rights reserved.
+# 
+# BSD license
+
+# Redistribution and use in source and binary forms, with or without modification,
+# are permitted provided that the following conditions are met:
+# 
+# * Redistributions of source code must retain the above copyright notice, this
+#   list of conditions and the following disclaimer.
+# 
+# * Redistributions in binary form must reproduce the above copyright notice, this
+#   list of conditions and the following disclaimer in the documentation and/or
+#   other materials provided with the distribution.
+# 
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
+# ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+# WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+# DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR
+# ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+# (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+# LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON
+# ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+# SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+# 
+# You can contact the author at :
+#  - ZSTD homepage : http://www.zstd.net
+#  - ZSTD source repository : https://github.com/Cyan4973/zstd
+# ################################################################
+
+# Version numbers
+LIBVER_MAJOR=`sed -n '/define ZSTD_VERSION_MAJOR/s/.*[[:blank:]]\([0-9][0-9]*\).*/\1/p' < zstd.h`
+LIBVER_MINOR=`sed -n '/define ZSTD_VERSION_MINOR/s/.*[[:blank:]]\([0-9][0-9]*\).*/\1/p' < zstd.h`
+LIBVER_PATCH=`sed -n '/define ZSTD_VERSION_RELEASE/s/.*[[:blank:]]\([0-9][0-9]*\).*/\1/p' < zstd.h`
+LIBVER  = $(LIBVER_MAJOR).$(LIBVER_MINOR).$(LIBVER_PATCH)
+VERSION?= $(LIBVER)
+
+DESTDIR?=
+PREFIX ?= /usr/local
+CPPFLAGS= -I. -I./legacy -DZSTD_LEGACY_SUPPORT=1
+CFLAGS ?= -O3
+CFLAGS += -std=c99 -Wall -Wextra -Wundef -Wshadow -Wcast-qual -Wcast-align -Wstrict-prototypes
+FLAGS   = $(CPPFLAGS) $(CFLAGS) $(LDFLAGS) $(MOREFLAGS)
+
+LIBDIR ?= $(PREFIX)/lib
+INCLUDEDIR=$(PREFIX)/include
+
+
+# OS X linker doesn't support -soname, and use different extension
+# see : https://developer.apple.com/library/mac/documentation/DeveloperTools/Conceptual/DynamicLibraries/100-Articles/DynamicLibraryDesignGuidelines.html
+ifeq ($(shell uname), Darwin)
+	SHARED_EXT = dylib
+	SHARED_EXT_MAJOR = $(LIBVER_MAJOR).$(SHARED_EXT)
+	SHARED_EXT_VER = $(LIBVER).$(SHARED_EXT)
+	SONAME_FLAGS = -install_name $(PREFIX)/lib/libzstd.$(SHARED_EXT_MAJOR) -compatibility_version $(LIBVER_MAJOR) -current_version $(LIBVER)
+else
+	SONAME_FLAGS = -Wl,-soname=libzstd.$(SHARED_EXT).$(LIBVER_MAJOR)
+	SHARED_EXT = so
+	SHARED_EXT_MAJOR = $(SHARED_EXT).$(LIBVER_MAJOR)
+	SHARED_EXT_VER = $(SHARED_EXT).$(LIBVER)
+endif
+
+
+.PHONY: default all clean install uninstall
+
+default: clean libzstd
+
+all: clean libzstd
+
+libzstd: zstd_compress.c zstd_decompress.c huff0.c fse.c \
+         legacy/zstd_v01.c legacy/zstd_v02.c legacy/zstd_v03.c
+	@echo compiling static library
+	@$(CC) $(FLAGS) -c $^
+	@$(AR) rcs libzstd.a *.o
+	@echo compiling dynamic library $(LIBVER)
+	@$(CC) $(FLAGS) -shared $^ -fPIC $(SONAME_FLAGS) -o $@.$(SHARED_EXT_VER)
+	@echo creating versioned links
+	@ln -sf $@.$(SHARED_EXT_VER) $@.$(SHARED_EXT_MAJOR)
+	@ln -sf $@.$(SHARED_EXT_VER) $@.$(SHARED_EXT)
+
+clean:
+	@rm -f core *.o *.a *.$(SHARED_EXT) *.$(SHARED_EXT).* libzstd.pc
+	@echo Cleaning library completed
+
+
+#------------------------------------------------------------------------
+#make install is validated only for Linux, OSX, kFreeBSD and Hurd targets
+ifneq (,$(filter $(shell uname),Linux Darwin GNU/kFreeBSD GNU))
+
+libzstd.pc: libzstd.pc.in Makefile
+	@echo creating pkgconfig
+	@sed -e 's|@PREFIX@|$(PREFIX)|' \
+            -e 's|@LIBDIR@|$(LIBDIR)|' \
+            -e 's|@INCLUDEDIR@|$(INCLUDEDIR)|' \
+            -e 's|@VERSION@|$(VERSION)|' \
+             $< >$@
+
+install: libzstd libzstd.pc
+	@install -d -m 755 $(DESTDIR)$(LIBDIR)/pkgconfig/ $(DESTDIR)$(INCLUDEDIR)/
+	@install -m 755 libzstd.$(SHARED_EXT_VER) $(DESTDIR)$(LIBDIR)/libzstd.$(SHARED_EXT_VER)
+	@cp -a libzstd.$(SHARED_EXT_MAJOR) $(DESTDIR)$(LIBDIR)
+	@cp -a libzstd.$(SHARED_EXT) $(DESTDIR)$(LIBDIR)
+	@cp -a libzstd.pc $(DESTDIR)$(LIBDIR)/pkgconfig/
+	@install -m 644 libzstd.a $(DESTDIR)$(LIBDIR)/libzstd.a
+	@install -m 644 zstd.h $(DESTDIR)$(INCLUDEDIR)/zstd.h
+	@echo zstd static and shared library installed
+
+uninstall:
+	@rm -f $(DESTDIR)$(LIBDIR)/libzstd.$(SHARED_EXT)
+	@rm -f $(DESTDIR)$(LIBDIR)/libzstd.$(SHARED_EXT_MAJOR)
+	@rm -f $(DESTDIR)$(LIBDIR)/pkgconfig/libzstd.pc
+	@[ -x $(DESTDIR)$(LIBDIR)/libzstd.$(SHARED_EXT_VER) ] && rm -f $(DESTDIR)$(LIBDIR)/libzstd.$(SHARED_EXT_VER)
+	@[ -f $(DESTDIR)$(LIBDIR)/libzstd.a ] && rm -f $(DESTDIR)$(LIBDIR)/libzstd.a
+	@[ -f $(DESTDIR)$(INCLUDEDIR)/zstd.h ] && rm -f $(DESTDIR)$(INCLUDEDIR)/zstd.h
+	@echo zstd libraries successfully uninstalled
+
+endif
diff --git a/lib/bitstream.h b/lib/bitstream.h
new file mode 100644
index 0000000..dcfe8b0
--- /dev/null
+++ b/lib/bitstream.h
@@ -0,0 +1,384 @@
+/* ******************************************************************
+   bitstream
+   Part of NewGen Entropy library
+   header file (to include)
+   Copyright (C) 2013-2015, Yann Collet.
+
+   BSD 2-Clause License (http://www.opensource.org/licenses/bsd-license.php)
+
+   Redistribution and use in source and binary forms, with or without
+   modification, are permitted provided that the following conditions are
+   met:
+
+       * Redistributions of source code must retain the above copyright
+   notice, this list of conditions and the following disclaimer.
+       * Redistributions in binary form must reproduce the above
+   copyright notice, this list of conditions and the following disclaimer
+   in the documentation and/or other materials provided with the
+   distribution.
+
+   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+   You can contact the author at :
+   - Source repository : https://github.com/Cyan4973/FiniteStateEntropy
+   - Public forum : https://groups.google.com/forum/#!forum/lz4c
+****************************************************************** */
+#ifndef BITSTREAM_H_MODULE
+#define BITSTREAM_H_MODULE
+
+#if defined (__cplusplus)
+extern "C" {
+#endif
+
+
+/*
+*  This API consists of small unitary functions, which highly benefit from being inlined.
+*  Since link-time-optimization is not available for all compilers,
+*  these functions are defined into a .h to be included.
+*/
+
+/******************************************
+*  Includes
+******************************************/
+#include "mem.h"     /* unaligned access routines */
+#include "error.h"   /* error codes and messages */
+
+
+/********************************************
+*  bitStream compression API (write forward)
+********************************************/
+/*
+* bitStream can mix input from multiple sources.
+* A critical property of these streams is that they encode and decode in **reverse** direction.
+* So the first bit sequence you add will be the last to be read, like a LIFO stack.
+*/
+typedef struct
+{
+    size_t bitContainer;
+    int    bitPos;
+    char*  startPtr;
+    char*  ptr;
+    char*  endPtr;
+} BIT_CStream_t;
+
+MEM_STATIC size_t BIT_initCStream(BIT_CStream_t* bitC, void* dstBuffer, size_t maxDstSize);
+MEM_STATIC void   BIT_addBits(BIT_CStream_t* bitC, size_t value, unsigned nbBits);
+MEM_STATIC void   BIT_flushBits(BIT_CStream_t* bitC);
+MEM_STATIC size_t BIT_closeCStream(BIT_CStream_t* bitC);
+
+/*
+* Start by initCStream, providing the maximum size of write buffer to write into.
+* bitStream will never write outside of this buffer.
+* buffer must be at least as large as a size_t, otherwise function result will be an error code.
+*
+* bits are first added to a local register.
+* Local register is size_t, hence 64-bits on 64-bits systems, or 32-bits on 32-bits systems.
+* Writing data into memory is a manual operation, performed by the flushBits function.
+* Hence keep track how many bits are potentially stored into local register to avoid register overflow.
+* After a flushBits, a maximum of 7 bits might still be stored into local register.
+*
+* Avoid storing elements of more than 25 bits if you want compatibility with 32-bits bitstream readers.
+*
+* Last operation is to close the bitStream.
+* The function returns the final size of CStream in bytes.
+* If data couldn't fit into dstBuffer, it will return a 0 ( == not storable)
+*/
+
+
+/**********************************************
+*  bitStream decompression API (read backward)
+**********************************************/
+typedef struct
+{
+    size_t   bitContainer;
+    unsigned bitsConsumed;
+    const char* ptr;
+    const char* start;
+} BIT_DStream_t;
+
+typedef enum { BIT_DStream_unfinished = 0,
+               BIT_DStream_endOfBuffer = 1,
+               BIT_DStream_completed = 2,
+               BIT_DStream_overflow = 3 } BIT_DStream_status;  /* result of BIT_reloadDStream() */
+               /* 1,2,4,8 would be better for bitmap combinations, but slows down performance a bit ... :( */
+
+MEM_STATIC size_t   BIT_initDStream(BIT_DStream_t* bitD, const void* srcBuffer, size_t srcSize);
+MEM_STATIC size_t   BIT_readBits(BIT_DStream_t* bitD, unsigned nbBits);
+MEM_STATIC BIT_DStream_status BIT_reloadDStream(BIT_DStream_t* bitD);
+MEM_STATIC unsigned BIT_endOfDStream(const BIT_DStream_t* bitD);
+
+
+/*
+* Start by invoking BIT_initDStream().
+* A chunk of the bitStream is then stored into a local register.
+* Local register size is 64-bits on 64-bits systems, 32-bits on 32-bits systems (size_t).
+* You can then retrieve bitFields stored into the local register, **in reverse order**.
+* Local register is manually filled from memory by the BIT_reloadDStream() method.
+* A reload guarantee a minimum of ((8*sizeof(size_t))-7) bits when its result is BIT_DStream_unfinished.
+* Otherwise, it can be less than that, so proceed accordingly.
+* Checking if DStream has reached its end can be performed with BIT_endOfDStream()
+*/
+
+
+/******************************************
+*  unsafe API
+******************************************/
+MEM_STATIC void BIT_addBitsFast(BIT_CStream_t* bitC, size_t value, unsigned nbBits);
+/* faster, but works only if value is "clean", meaning all high bits above nbBits are 0 */
+
+MEM_STATIC void BIT_flushBitsFast(BIT_CStream_t* bitC);
+/* unsafe version; does not check buffer overflow */
+
+MEM_STATIC size_t BIT_readBitsFast(BIT_DStream_t* bitD, unsigned nbBits);
+/* faster, but works only if nbBits >= 1 */
+
+
+
+/****************************************************************
+*  Helper functions
+****************************************************************/
+MEM_STATIC unsigned BIT_highbit32 (register U32 val)
+{
+#   if defined(_MSC_VER)   /* Visual */
+    unsigned long r=0;
+    _BitScanReverse ( &r, val );
+    return (unsigned) r;
+#   elif defined(__GNUC__) && (__GNUC__ >= 3)   /* Use GCC Intrinsic */
+    return 31 - __builtin_clz (val);
+#   else   /* Software version */
+    static const unsigned DeBruijnClz[32] = { 0, 9, 1, 10, 13, 21, 2, 29, 11, 14, 16, 18, 22, 25, 3, 30, 8, 12, 20, 28, 15, 17, 24, 7, 19, 27, 23, 6, 26, 5, 4, 31 };
+    U32 v = val;
+    unsigned r;
+    v |= v >> 1;
+    v |= v >> 2;
+    v |= v >> 4;
+    v |= v >> 8;
+    v |= v >> 16;
+    r = DeBruijnClz[ (U32) (v * 0x07C4ACDDU) >> 27];
+    return r;
+#   endif
+}
+
+
+/****************************************************************
+*  bitStream encoding
+****************************************************************/
+
+MEM_STATIC size_t BIT_initCStream(BIT_CStream_t* bitC, void* startPtr, size_t maxSize)
+{
+    bitC->bitContainer = 0;
+    bitC->bitPos = 0;
+    bitC->startPtr = (char*)startPtr;
+    bitC->ptr = bitC->startPtr;
+    bitC->endPtr = bitC->startPtr + maxSize - sizeof(bitC->ptr);
+    if (maxSize < sizeof(bitC->ptr)) return ERROR(dstSize_tooSmall);
+    return 0;
+}
+
+MEM_STATIC void BIT_addBits(BIT_CStream_t* bitC, size_t value, unsigned nbBits)
+{
+    static const unsigned mask[] = { 0, 1, 3, 7, 0xF, 0x1F, 0x3F, 0x7F, 0xFF, 0x1FF, 0x3FF, 0x7FF, 0xFFF, 0x1FFF, 0x3FFF, 0x7FFF, 0xFFFF, 0x1FFFF, 0x3FFFF, 0x7FFFF, 0xFFFFF, 0x1FFFFF, 0x3FFFFF, 0x7FFFFF,  0xFFFFFF, 0x1FFFFFF };   /* up to 25 bits */
+    bitC->bitContainer |= (value & mask[nbBits]) << bitC->bitPos;
+    bitC->bitPos += nbBits;
+}
+
+/*! BIT_addBitsFast
+ *  works only if `value` is _clean_, meaning all high bits above nbBits are 0 */
+MEM_STATIC void BIT_addBitsFast(BIT_CStream_t* bitC, size_t value, unsigned nbBits)
+{
+    bitC->bitContainer |= value << bitC->bitPos;
+    bitC->bitPos += nbBits;
+}
+
+/*! BIT_flushBitsFast
+ *  unsafe version; does not check buffer overflow */
+MEM_STATIC void BIT_flushBitsFast(BIT_CStream_t* bitC)
+{
+    size_t nbBytes = bitC->bitPos >> 3;
+    MEM_writeLEST(bitC->ptr, bitC->bitContainer);
+    bitC->ptr += nbBytes;
+    bitC->bitPos &= 7;
+    bitC->bitContainer >>= nbBytes*8;   /* if bitPos >= sizeof(bitContainer)*8 --> undefined behavior */
+}
+
+MEM_STATIC void BIT_flushBits(BIT_CStream_t* bitC)
+{
+    size_t nbBytes = bitC->bitPos >> 3;
+    MEM_writeLEST(bitC->ptr, bitC->bitContainer);
+    bitC->ptr += nbBytes;
+    if (bitC->ptr > bitC->endPtr) bitC->ptr = bitC->endPtr;
+    bitC->bitPos &= 7;
+    bitC->bitContainer >>= nbBytes*8;   /* if bitPos >= sizeof(bitContainer)*8 --> undefined behavior */
+}
+
+/*! BIT_closeCStream
+ *  @result : size of CStream, in bytes, or 0 if it cannot fit into dstBuffer */
+MEM_STATIC size_t BIT_closeCStream(BIT_CStream_t* bitC)
+{
+    char* endPtr;
+
+    BIT_addBitsFast(bitC, 1, 1);   /* endMark */
+    BIT_flushBits(bitC);
+
+    if (bitC->ptr >= bitC->endPtr) /* too close to buffer's end */
+        return 0;   /* not storable */
+
+    endPtr = bitC->ptr;
+    endPtr += bitC->bitPos > 0;    /* remaining bits (incomplete byte) */
+
+    return (endPtr - bitC->startPtr);
+}
+
+
+/**********************************************************
+* bitStream decoding
+**********************************************************/
+
+/*!BIT_initDStream
+*  Initialize a BIT_DStream_t.
+*  @bitD : a pointer to an already allocated BIT_DStream_t structure
+*  @srcBuffer must point at the beginning of a bitStream
+*  @srcSize must be the exact size of the bitStream
+*  @result : size of stream (== srcSize) or an errorCode if a problem is detected
+*/
+MEM_STATIC size_t BIT_initDStream(BIT_DStream_t* bitD, const void* srcBuffer, size_t srcSize)
+{
+    if (srcSize < 1) { memset(bitD, 0, sizeof(*bitD)); return ERROR(srcSize_wrong); }
+
+    if (srcSize >=  sizeof(size_t))   /* normal case */
+    {
+        U32 contain32;
+        bitD->start = (const char*)srcBuffer;
+        bitD->ptr   = (const char*)srcBuffer + srcSize - sizeof(size_t);
+        bitD->bitContainer = MEM_readLEST(bitD->ptr);
+        contain32 = ((const BYTE*)srcBuffer)[srcSize-1];
+        if (contain32 == 0) return ERROR(GENERIC);   /* endMark not present */
+        bitD->bitsConsumed = 8 - BIT_highbit32(contain32);
+    }
+    else
+    {
+        U32 contain32;
+        bitD->start = (const char*)srcBuffer;
+        bitD->ptr   = bitD->start;
+        bitD->bitContainer = *(const BYTE*)(bitD->start);
+        switch(srcSize)
+        {
+            case 7: bitD->bitContainer += (size_t)(((const BYTE*)(bitD->start))[6]) << (sizeof(size_t)*8 - 16);
+            case 6: bitD->bitContainer += (size_t)(((const BYTE*)(bitD->start))[5]) << (sizeof(size_t)*8 - 24);
+            case 5: bitD->bitContainer += (size_t)(((const BYTE*)(bitD->start))[4]) << (sizeof(size_t)*8 - 32);
+            case 4: bitD->bitContainer += (size_t)(((const BYTE*)(bitD->start))[3]) << 24;
+            case 3: bitD->bitContainer += (size_t)(((const BYTE*)(bitD->start))[2]) << 16;
+            case 2: bitD->bitContainer += (size_t)(((const BYTE*)(bitD->start))[1]) <<  8;
+            default:;
+        }
+        contain32 = ((const BYTE*)srcBuffer)[srcSize-1];
+        if (contain32 == 0) return ERROR(GENERIC);   /* endMark not present */
+        bitD->bitsConsumed = 8 - BIT_highbit32(contain32);
+        bitD->bitsConsumed += (U32)(sizeof(size_t) - srcSize)*8;
+    }
+
+    return srcSize;
+}
+
+/*!BIT_lookBits
+ * Provides next n bits from local register
+ * local register is not modified (bits are still present for next read/look)
+ * On 32-bits, maxNbBits==25
+ * On 64-bits, maxNbBits==57
+ * @return : value extracted
+ */
+MEM_STATIC size_t BIT_lookBits(BIT_DStream_t* bitD, U32 nbBits)
+{
+    const U32 bitMask = sizeof(bitD->bitContainer)*8 - 1;
+    return ((bitD->bitContainer << (bitD->bitsConsumed & bitMask)) >> 1) >> ((bitMask-nbBits) & bitMask);
+}
+
+/*! BIT_lookBitsFast :
+*   unsafe version; only works only if nbBits >= 1 */
+MEM_STATIC size_t BIT_lookBitsFast(BIT_DStream_t* bitD, U32 nbBits)
+{
+    const U32 bitMask = sizeof(bitD->bitContainer)*8 - 1;
+    return (bitD->bitContainer << (bitD->bitsConsumed & bitMask)) >> (((bitMask+1)-nbBits) & bitMask);
+}
+
+MEM_STATIC void BIT_skipBits(BIT_DStream_t* bitD, U32 nbBits)
+{
+    bitD->bitsConsumed += nbBits;
+}
+
+/*!BIT_readBits
+ * Read next n bits from local register.
+ * pay attention to not read more than nbBits contained into local register.
+ * @return : extracted value.
+ */
+MEM_STATIC size_t BIT_readBits(BIT_DStream_t* bitD, U32 nbBits)
+{
+    size_t value = BIT_lookBits(bitD, nbBits);
+    BIT_skipBits(bitD, nbBits);
+    return value;
+}
+
+/*!BIT_readBitsFast :
+*  unsafe version; only works only if nbBits >= 1 */
+MEM_STATIC size_t BIT_readBitsFast(BIT_DStream_t* bitD, U32 nbBits)
+{
+    size_t value = BIT_lookBitsFast(bitD, nbBits);
+    BIT_skipBits(bitD, nbBits);
+    return value;
+}
+
+MEM_STATIC BIT_DStream_status BIT_reloadDStream(BIT_DStream_t* bitD)
+{
+	if (bitD->bitsConsumed > (sizeof(bitD->bitContainer)*8))  /* should never happen */
+		return BIT_DStream_overflow;
+
+    if (bitD->ptr >= bitD->start + sizeof(bitD->bitContainer))
+    {
+        bitD->ptr -= bitD->bitsConsumed >> 3;
+        bitD->bitsConsumed &= 7;
+        bitD->bitContainer = MEM_readLEST(bitD->ptr);
+        return BIT_DStream_unfinished;
+    }
+    if (bitD->ptr == bitD->start)
+    {
+        if (bitD->bitsConsumed < sizeof(bitD->bitContainer)*8) return BIT_DStream_endOfBuffer;
+        return BIT_DStream_completed;
+    }
+    {
+        U32 nbBytes = bitD->bitsConsumed >> 3;
+        BIT_DStream_status result = BIT_DStream_unfinished;
+        if (bitD->ptr - nbBytes < bitD->start)
+        {
+            nbBytes = (U32)(bitD->ptr - bitD->start);  /* ptr > start */
+            result = BIT_DStream_endOfBuffer;
+        }
+        bitD->ptr -= nbBytes;
+        bitD->bitsConsumed -= nbBytes*8;
+        bitD->bitContainer = MEM_readLEST(bitD->ptr);   /* reminder : srcSize > sizeof(bitD) */
+        return result;
+    }
+}
+
+/*! BIT_endOfDStream
+*   @return Tells if DStream has reached its exact end
+*/
+MEM_STATIC unsigned BIT_endOfDStream(const BIT_DStream_t* DStream)
+{
+    return ((DStream->ptr == DStream->start) && (DStream->bitsConsumed == sizeof(DStream->bitContainer)*8));
+}
+
+#if defined (__cplusplus)
+}
+#endif
+
+#endif /* BITSTREAM_H_MODULE */
diff --git a/lib/error.h b/lib/error.h
new file mode 100644
index 0000000..3c39c39
--- /dev/null
+++ b/lib/error.h
@@ -0,0 +1,99 @@
+/* ******************************************************************
+   Error codes and messages
+   Copyright (C) 2013-2015, Yann Collet
+
+   BSD 2-Clause License (http://www.opensource.org/licenses/bsd-license.php)
+
+   Redistribution and use in source and binary forms, with or without
+   modification, are permitted provided that the following conditions are
+   met:
+
+       * Redistributions of source code must retain the above copyright
+   notice, this list of conditions and the following disclaimer.
+       * Redistributions in binary form must reproduce the above
+   copyright notice, this list of conditions and the following disclaimer
+   in the documentation and/or other materials provided with the
+   distribution.
+
+   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+   You can contact the author at :
+   - Source repository : https://github.com/Cyan4973/FiniteStateEntropy
+   - Public forum : https://groups.google.com/forum/#!forum/lz4c
+****************************************************************** */
+#ifndef ERROR_H_MODULE
+#define ERROR_H_MODULE
+
+#if defined (__cplusplus)
+extern "C" {
+#endif
+
+
+/******************************************
+*  Includes
+******************************************/
+#include <stddef.h>    /* size_t, ptrdiff_t */
+
+
+/******************************************
+*  Compiler-specific
+******************************************/
+#if defined (__cplusplus) || (defined (__STDC_VERSION__) && (__STDC_VERSION__ >= 199901L) /* C99 */)
+#  define ERR_STATIC static inline
+#elif defined(_MSC_VER)
+#  define ERR_STATIC static __inline
+#elif defined(__GNUC__)
+#  define ERR_STATIC static __attribute__((unused))
+#else
+#  define ERR_STATIC static  /* this version may generate warnings for unused static functions; disable the relevant warning */
+#endif
+
+
+/******************************************
+*  Error Management
+******************************************/
+#define PREFIX(name) ZSTD_error_##name
+
+#define ERROR(name) (size_t)-PREFIX(name)
+
+#define ERROR_LIST(ITEM) \
+        ITEM(PREFIX(No_Error)) ITEM(PREFIX(GENERIC)) \
+        ITEM(PREFIX(prefix_unknown)) ITEM(PREFIX(frameParameter_unsupported)) ITEM(PREFIX(frameParameter_unsupportedBy32bitsImplementation)) \
+        ITEM(PREFIX(init_missing)) ITEM(PREFIX(memory_allocation)) \
+        ITEM(PREFIX(dstSize_tooSmall)) ITEM(PREFIX(srcSize_wrong)) \
+        ITEM(PREFIX(corruption_detected)) \
+        ITEM(PREFIX(tableLog_tooLarge)) ITEM(PREFIX(maxSymbolValue_tooLarge)) ITEM(PREFIX(maxSymbolValue_tooSmall)) \
+        ITEM(PREFIX(maxCode))
+
+#define ERROR_GENERATE_ENUM(ENUM) ENUM,
+typedef enum { ERROR_LIST(ERROR_GENERATE_ENUM) } ERR_codes;  /* enum is exposed, to detect & handle specific errors; compare function result to -enum value */
+
+#define ERROR_CONVERTTOSTRING(STRING) #STRING,
+#define ERROR_GENERATE_STRING(EXPR) ERROR_CONVERTTOSTRING(EXPR)
+static const char* ERR_strings[] = { ERROR_LIST(ERROR_GENERATE_STRING) };
+
+ERR_STATIC unsigned ERR_isError(size_t code) { return (code > ERROR(maxCode)); }
+
+ERR_STATIC const char* ERR_getErrorName(size_t code)
+{
+    static const char* codeError = "Unspecified error code";
+    if (ERR_isError(code)) return ERR_strings[-(int)(code)];
+    return codeError;
+}
+
+
+#if defined (__cplusplus)
+}
+#endif
+
+#endif /* ERROR_H_MODULE */
diff --git a/lib/fse.c b/lib/fse.c
new file mode 100644
index 0000000..38e40a5
--- /dev/null
+++ b/lib/fse.c
@@ -0,0 +1,1229 @@
+/* ******************************************************************
+   FSE : Finite State Entropy coder
+   Copyright (C) 2013-2015, Yann Collet.
+
+   BSD 2-Clause License (http://www.opensource.org/licenses/bsd-license.php)
+
+   Redistribution and use in source and binary forms, with or without
+   modification, are permitted provided that the following conditions are
+   met:
+
+       * Redistributions of source code must retain the above copyright
+   notice, this list of conditions and the following disclaimer.
+       * Redistributions in binary form must reproduce the above
+   copyright notice, this list of conditions and the following disclaimer
+   in the documentation and/or other materials provided with the
+   distribution.
+
+   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+    You can contact the author at :
+    - FSE source repository : https://github.com/Cyan4973/FiniteStateEntropy
+    - Public forum : https://groups.google.com/forum/#!forum/lz4c
+****************************************************************** */
+
+#ifndef FSE_COMMONDEFS_ONLY
+
+/****************************************************************
+*  Tuning parameters
+****************************************************************/
+/* MEMORY_USAGE :
+*  Memory usage formula : N->2^N Bytes (examples : 10 -> 1KB; 12 -> 4KB ; 16 -> 64KB; 20 -> 1MB; etc.)
+*  Increasing memory usage improves compression ratio
+*  Reduced memory usage can improve speed, due to cache effect
+*  Recommended max value is 14, for 16KB, which nicely fits into Intel x86 L1 cache */
+#define FSE_MAX_MEMORY_USAGE 14
+#define FSE_DEFAULT_MEMORY_USAGE 13
+
+/* FSE_MAX_SYMBOL_VALUE :
+*  Maximum symbol value authorized.
+*  Required for proper stack allocation */
+#define FSE_MAX_SYMBOL_VALUE 255
+
+
+/****************************************************************
+*  template functions type & suffix
+****************************************************************/
+#define FSE_FUNCTION_TYPE BYTE
+#define FSE_FUNCTION_EXTENSION
+
+
+/****************************************************************
+*  Byte symbol type
+****************************************************************/
+#endif   /* !FSE_COMMONDEFS_ONLY */
+
+
+/****************************************************************
+*  Compiler specifics
+****************************************************************/
+#ifdef _MSC_VER    /* Visual Studio */
+#  define FORCE_INLINE static __forceinline
+#  include <intrin.h>                    /* For Visual 2005 */
+#  pragma warning(disable : 4127)        /* disable: C4127: conditional expression is constant */
+#  pragma warning(disable : 4214)        /* disable: C4214: non-int bitfields */
+#else
+#  ifdef __GNUC__
+#    define GCC_VERSION (__GNUC__ * 100 + __GNUC_MINOR__)
+#    define FORCE_INLINE static inline __attribute__((always_inline))
+#  else
+#    define FORCE_INLINE static inline
+#  endif
+#endif
+
+
+/****************************************************************
+*  Includes
+****************************************************************/
+#include <stdlib.h>     /* malloc, free, qsort */
+#include <string.h>     /* memcpy, memset */
+#include <stdio.h>      /* printf (debug) */
+#include "bitstream.h"
+#include "fse_static.h"
+
+
+/****************************************************************
+*  Constants
+*****************************************************************/
+#define FSE_MAX_TABLELOG  (FSE_MAX_MEMORY_USAGE-2)
+#define FSE_MAX_TABLESIZE (1U<<FSE_MAX_TABLELOG)
+#define FSE_MAXTABLESIZE_MASK (FSE_MAX_TABLESIZE-1)
+#define FSE_DEFAULT_TABLELOG (FSE_DEFAULT_MEMORY_USAGE-2)
+#define FSE_MIN_TABLELOG 5
+
+#define FSE_TABLELOG_ABSOLUTE_MAX 15
+#if FSE_MAX_TABLELOG > FSE_TABLELOG_ABSOLUTE_MAX
+#error "FSE_MAX_TABLELOG > FSE_TABLELOG_ABSOLUTE_MAX is not supported"
+#endif
+
+
+/****************************************************************
+*  Error Management
+****************************************************************/
+#define FSE_STATIC_ASSERT(c) { enum { FSE_static_assert = 1/(int)(!!(c)) }; }   /* use only *after* variable declarations */
+
+
+/****************************************************************
+*  Complex types
+****************************************************************/
+typedef U32 CTable_max_t[FSE_CTABLE_SIZE_U32(FSE_MAX_TABLELOG, FSE_MAX_SYMBOL_VALUE)];
+typedef U32 DTable_max_t[FSE_DTABLE_SIZE_U32(FSE_MAX_TABLELOG)];
+
+
+/****************************************************************
+*  Templates
+****************************************************************/
+/*
+  designed to be included
+  for type-specific functions (template emulation in C)
+  Objective is to write these functions only once, for improved maintenance
+*/
+
+/* safety checks */
+#ifndef FSE_FUNCTION_EXTENSION
+#  error "FSE_FUNCTION_EXTENSION must be defined"
+#endif
+#ifndef FSE_FUNCTION_TYPE
+#  error "FSE_FUNCTION_TYPE must be defined"
+#endif
+
+/* Function names */
+#define FSE_CAT(X,Y) X##Y
+#define FSE_FUNCTION_NAME(X,Y) FSE_CAT(X,Y)
+#define FSE_TYPE_NAME(X,Y) FSE_CAT(X,Y)
+
+
+/* Function templates */
+size_t FSE_FUNCTION_NAME(FSE_count_generic, FSE_FUNCTION_EXTENSION)
+(unsigned* count, unsigned* maxSymbolValuePtr, const FSE_FUNCTION_TYPE* source, size_t sourceSize, unsigned safe)
+{
+    const FSE_FUNCTION_TYPE* ip = source;
+    const FSE_FUNCTION_TYPE* const iend = ip+sourceSize;
+    unsigned maxSymbolValue = *maxSymbolValuePtr;
+    unsigned max=0;
+    int s;
+
+    U32 Counting1[FSE_MAX_SYMBOL_VALUE+1] = { 0 };
+    U32 Counting2[FSE_MAX_SYMBOL_VALUE+1] = { 0 };
+    U32 Counting3[FSE_MAX_SYMBOL_VALUE+1] = { 0 };
+    U32 Counting4[FSE_MAX_SYMBOL_VALUE+1] = { 0 };
+
+    /* safety checks */
+    if (!sourceSize)
+    {
+        memset(count, 0, (maxSymbolValue + 1) * sizeof(FSE_FUNCTION_TYPE));
+        *maxSymbolValuePtr = 0;
+        return 0;
+    }
+    if (maxSymbolValue > FSE_MAX_SYMBOL_VALUE) return ERROR(GENERIC);   /* maxSymbolValue too large : unsupported */
+    if (!maxSymbolValue) maxSymbolValue = FSE_MAX_SYMBOL_VALUE;            /* 0 == default */
+
+    if ((safe) || (sizeof(FSE_FUNCTION_TYPE)>1))
+    {
+        /* check input values, to avoid count table overflow */
+        while (ip < iend-3)
+        {
+            if (*ip>maxSymbolValue) return ERROR(GENERIC); Counting1[*ip++]++;
+            if (*ip>maxSymbolValue) return ERROR(GENERIC); Counting2[*ip++]++;
+            if (*ip>maxSymbolValue) return ERROR(GENERIC); Counting3[*ip++]++;
+            if (*ip>maxSymbolValue) return ERROR(GENERIC); Counting4[*ip++]++;
+        }
+    }
+    else
+    {
+        U32 cached = MEM_read32(ip); ip += 4;
+        while (ip < iend-15)
+        {
+            U32 c = cached; cached = MEM_read32(ip); ip += 4;
+            Counting1[(BYTE) c     ]++;
+            Counting2[(BYTE)(c>>8) ]++;
+            Counting3[(BYTE)(c>>16)]++;
+            Counting4[       c>>24 ]++;
+            c = cached; cached = MEM_read32(ip); ip += 4;
+            Counting1[(BYTE) c     ]++;
+            Counting2[(BYTE)(c>>8) ]++;
+            Counting3[(BYTE)(c>>16)]++;
+            Counting4[       c>>24 ]++;
+            c = cached; cached = MEM_read32(ip); ip += 4;
+            Counting1[(BYTE) c     ]++;
+            Counting2[(BYTE)(c>>8) ]++;
+            Counting3[(BYTE)(c>>16)]++;
+            Counting4[       c>>24 ]++;
+            c = cached; cached = MEM_read32(ip); ip += 4;
+            Counting1[(BYTE) c     ]++;
+            Counting2[(BYTE)(c>>8) ]++;
+            Counting3[(BYTE)(c>>16)]++;
+            Counting4[       c>>24 ]++;
+        }
+        ip-=4;
+    }
+
+    /* finish last symbols */
+    while (ip<iend) { if ((safe) && (*ip>maxSymbolValue)) return ERROR(GENERIC); Counting1[*ip++]++; }
+
+    for (s=0; s<=(int)maxSymbolValue; s++)
+    {
+        count[s] = Counting1[s] + Counting2[s] + Counting3[s] + Counting4[s];
+        if (count[s] > max) max = count[s];
+    }
+
+    while (!count[maxSymbolValue]) maxSymbolValue--;
+    *maxSymbolValuePtr = maxSymbolValue;
+    return (size_t)max;
+}
+
+/* hidden fast variant (unsafe) */
+size_t FSE_FUNCTION_NAME(FSE_countFast, FSE_FUNCTION_EXTENSION)
+(unsigned* count, unsigned* maxSymbolValuePtr, const FSE_FUNCTION_TYPE* source, size_t sourceSize)
+{
+    return FSE_FUNCTION_NAME(FSE_count_generic, FSE_FUNCTION_EXTENSION) (count, maxSymbolValuePtr, source, sourceSize, 0);
+}
+
+size_t FSE_FUNCTION_NAME(FSE_count, FSE_FUNCTION_EXTENSION)
+(unsigned* count, unsigned* maxSymbolValuePtr, const FSE_FUNCTION_TYPE* source, size_t sourceSize)
+{
+    if ((sizeof(FSE_FUNCTION_TYPE)==1) && (*maxSymbolValuePtr >= 255))
+    {
+        *maxSymbolValuePtr = 255;
+        return FSE_FUNCTION_NAME(FSE_count_generic, FSE_FUNCTION_EXTENSION) (count, maxSymbolValuePtr, source, sourceSize, 0);
+    }
+    return FSE_FUNCTION_NAME(FSE_count_generic, FSE_FUNCTION_EXTENSION) (count, maxSymbolValuePtr, source, sourceSize, 1);
+}
+
+
+static U32 FSE_tableStep(U32 tableSize) { return (tableSize>>1) + (tableSize>>3) + 3; }
+
+size_t FSE_FUNCTION_NAME(FSE_buildCTable, FSE_FUNCTION_EXTENSION)
+(FSE_CTable* ct, const short* normalizedCounter, unsigned maxSymbolValue, unsigned tableLog)
+{
+    const unsigned tableSize = 1 << tableLog;
+    const unsigned tableMask = tableSize - 1;
+    U16* tableU16 = ( (U16*) ct) + 2;
+    FSE_symbolCompressionTransform* symbolTT = (FSE_symbolCompressionTransform*) (((U32*)ct) + 1 + (tableLog ? tableSize>>1 : 1) );
+    const unsigned step = FSE_tableStep(tableSize);
+    unsigned cumul[FSE_MAX_SYMBOL_VALUE+2];
+    U32 position = 0;
+    FSE_FUNCTION_TYPE tableSymbol[FSE_MAX_TABLESIZE]; /* init isn't necessary, even if static analyzer complain about it */
+    U32 highThreshold = tableSize-1;
+    unsigned symbol;
+    unsigned i;
+
+    /* header */
+    tableU16[-2] = (U16) tableLog;
+    tableU16[-1] = (U16) maxSymbolValue;
+
+    /* For explanations on how to distribute symbol values over the table :
+    *  http://fastcompression.blogspot.fr/2014/02/fse-distributing-symbol-values.html */
+
+    /* symbol start positions */
+    cumul[0] = 0;
+    for (i=1; i<=maxSymbolValue+1; i++)
+    {
+        if (normalizedCounter[i-1]==-1)   /* Low prob symbol */
+        {
+            cumul[i] = cumul[i-1] + 1;
+            tableSymbol[highThreshold--] = (FSE_FUNCTION_TYPE)(i-1);
+        }
+        else
+            cumul[i] = cumul[i-1] + normalizedCounter[i-1];
+    }
+    cumul[maxSymbolValue+1] = tableSize+1;
+
+    /* Spread symbols */
+    for (symbol=0; symbol<=maxSymbolValue; symbol++)
+    {
+        int nbOccurences;
+        for (nbOccurences=0; nbOccurences<normalizedCounter[symbol]; nbOccurences++)
+        {
+            tableSymbol[position] = (FSE_FUNCTION_TYPE)symbol;
+            position = (position + step) & tableMask;
+            while (position > highThreshold) position = (position + step) & tableMask;   /* Lowprob area */
+        }
+    }
+
+    if (position!=0) return ERROR(GENERIC);   /* Must have gone through all positions */
+
+    /* Build table */
+    for (i=0; i<tableSize; i++)
+    {
+        FSE_FUNCTION_TYPE s = tableSymbol[i];   /* static analyzer doesn't understand tableSymbol is properly initialized */
+        tableU16[cumul[s]++] = (U16) (tableSize+i);   /* TableU16 : sorted by symbol order; gives next state value */
+    }
+
+    /* Build Symbol Transformation Table */
+    {
+        unsigned s;
+        unsigned total = 0;
+        for (s=0; s<=maxSymbolValue; s++)
+        {
+            switch (normalizedCounter[s])
+            {
+            case  0:
+                break;
+            case -1:
+            case  1:
+                symbolTT[s].deltaNbBits = tableLog << 16;
+                symbolTT[s].deltaFindState = total - 1;
+                total ++;
+                break;
+            default :
+                {
+                    U32 maxBitsOut = tableLog - BIT_highbit32 (normalizedCounter[s]-1);
+                    U32 minStatePlus = normalizedCounter[s] << maxBitsOut;
+                    symbolTT[s].deltaNbBits = (maxBitsOut << 16) - minStatePlus;
+                    symbolTT[s].deltaFindState = total - normalizedCounter[s];
+                    total +=  normalizedCounter[s];
+                }
+            }
+        }
+    }
+
+    return 0;
+}
+
+
+#define FSE_DECODE_TYPE FSE_TYPE_NAME(FSE_decode_t, FSE_FUNCTION_EXTENSION)
+
+FSE_DTable* FSE_FUNCTION_NAME(FSE_createDTable, FSE_FUNCTION_EXTENSION) (unsigned tableLog)
+{
+    if (tableLog > FSE_TABLELOG_ABSOLUTE_MAX) tableLog = FSE_TABLELOG_ABSOLUTE_MAX;
+    return (FSE_DTable*)malloc( FSE_DTABLE_SIZE_U32(tableLog) * sizeof (U32) );
+}
+
+void FSE_FUNCTION_NAME(FSE_freeDTable, FSE_FUNCTION_EXTENSION) (FSE_DTable* dt)
+{
+    free(dt);
+}
+
+size_t FSE_FUNCTION_NAME(FSE_buildDTable, FSE_FUNCTION_EXTENSION)
+(FSE_DTable* dt, const short* normalizedCounter, unsigned maxSymbolValue, unsigned tableLog)
+{
+    FSE_DTableHeader* const DTableH = (FSE_DTableHeader*)dt;
+    FSE_DECODE_TYPE* const tableDecode = (FSE_DECODE_TYPE*) (dt+1);   /* because dt is unsigned, 32-bits aligned on 32-bits */
+    const U32 tableSize = 1 << tableLog;
+    const U32 tableMask = tableSize-1;
+    const U32 step = FSE_tableStep(tableSize);
+    U16 symbolNext[FSE_MAX_SYMBOL_VALUE+1];
+    U32 position = 0;
+    U32 highThreshold = tableSize-1;
+    const S16 largeLimit= (S16)(1 << (tableLog-1));
+    U32 noLarge = 1;
+    U32 s;
+
+    /* Sanity Checks */
+    if (maxSymbolValue > FSE_MAX_SYMBOL_VALUE) return ERROR(maxSymbolValue_tooLarge);
+    if (tableLog > FSE_MAX_TABLELOG) return ERROR(tableLog_tooLarge);
+
+    /* Init, lay down lowprob symbols */
+    DTableH[0].tableLog = (U16)tableLog;
+    for (s=0; s<=maxSymbolValue; s++)
+    {
+        if (normalizedCounter[s]==-1)
+        {
+            tableDecode[highThreshold--].symbol = (FSE_FUNCTION_TYPE)s;
+            symbolNext[s] = 1;
+        }
+        else
+        {
+            if (normalizedCounter[s] >= largeLimit) noLarge=0;
+            symbolNext[s] = normalizedCounter[s];
+        }
+    }
+
+    /* Spread symbols */
+    for (s=0; s<=maxSymbolValue; s++)
+    {
+        int i;
+        for (i=0; i<normalizedCounter[s]; i++)
+        {
+            tableDecode[position].symbol = (FSE_FUNCTION_TYPE)s;
+            position = (position + step) & tableMask;
+            while (position > highThreshold) position = (position + step) & tableMask;   /* lowprob area */
+        }
+    }
+
+    if (position!=0) return ERROR(GENERIC);   /* position must reach all cells once, otherwise normalizedCounter is incorrect */
+
+    /* Build Decoding table */
+    {
+        U32 i;
+        for (i=0; i<tableSize; i++)
+        {
+            FSE_FUNCTION_TYPE symbol = (FSE_FUNCTION_TYPE)(tableDecode[i].symbol);
+            U16 nextState = symbolNext[symbol]++;
+            tableDecode[i].nbBits = (BYTE) (tableLog - BIT_highbit32 ((U32)nextState) );
+            tableDecode[i].newState = (U16) ( (nextState << tableDecode[i].nbBits) - tableSize);
+        }
+    }
+
+    DTableH->fastMode = (U16)noLarge;
+    return 0;
+}
+
+
+#ifndef FSE_COMMONDEFS_ONLY
+/******************************************
+*  FSE helper functions
+******************************************/
+unsigned FSE_isError(size_t code) { return ERR_isError(code); }
+
+const char* FSE_getErrorName(size_t code) { return ERR_getErrorName(code); }
+
+
+/****************************************************************
+*  FSE NCount encoding-decoding
+****************************************************************/
+size_t FSE_NCountWriteBound(unsigned maxSymbolValue, unsigned tableLog)
+{
+    size_t maxHeaderSize = (((maxSymbolValue+1) * tableLog) >> 3) + 3;
+    return maxSymbolValue ? maxHeaderSize : FSE_NCOUNTBOUND;  /* maxSymbolValue==0 ? use default */
+}
+
+static short FSE_abs(short a)
+{
+    return a<0 ? -a : a;
+}
+
+static size_t FSE_writeNCount_generic (void* header, size_t headerBufferSize,
+                                       const short* normalizedCounter, unsigned maxSymbolValue, unsigned tableLog,
+                                       unsigned writeIsSafe)
+{
+    BYTE* const ostart = (BYTE*) header;
+    BYTE* out = ostart;
+    BYTE* const oend = ostart + headerBufferSize;
+    int nbBits;
+    const int tableSize = 1 << tableLog;
+    int remaining;
+    int threshold;
+    U32 bitStream;
+    int bitCount;
+    unsigned charnum = 0;
+    int previous0 = 0;
+
+    bitStream = 0;
+    bitCount  = 0;
+    /* Table Size */
+    bitStream += (tableLog-FSE_MIN_TABLELOG) << bitCount;
+    bitCount  += 4;
+
+    /* Init */
+    remaining = tableSize+1;   /* +1 for extra accuracy */
+    threshold = tableSize;
+    nbBits = tableLog+1;
+
+    while (remaining>1)   /* stops at 1 */
+    {
+        if (previous0)
+        {
+            unsigned start = charnum;
+            while (!normalizedCounter[charnum]) charnum++;
+            while (charnum >= start+24)
+            {
+                start+=24;
+                bitStream += 0xFFFFU << bitCount;
+                if ((!writeIsSafe) && (out > oend-2)) return ERROR(dstSize_tooSmall);   /* Buffer overflow */
+                out[0] = (BYTE) bitStream;
+                out[1] = (BYTE)(bitStream>>8);
+                out+=2;
+                bitStream>>=16;
+            }
+            while (charnum >= start+3)
+            {
+                start+=3;
+                bitStream += 3 << bitCount;
+                bitCount += 2;
+            }
+            bitStream += (charnum-start) << bitCount;
+            bitCount += 2;
+            if (bitCount>16)
+            {
+                if ((!writeIsSafe) && (out > oend - 2)) return ERROR(dstSize_tooSmall);   /* Buffer overflow */
+                out[0] = (BYTE)bitStream;
+                out[1] = (BYTE)(bitStream>>8);
+                out += 2;
+                bitStream >>= 16;
+                bitCount -= 16;
+            }
+        }
+        {
+            short count = normalizedCounter[charnum++];
+            const short max = (short)((2*threshold-1)-remaining);
+            remaining -= FSE_abs(count);
+            if (remaining<1) return ERROR(GENERIC);
+            count++;   /* +1 for extra accuracy */
+            if (count>=threshold) count += max;   /* [0..max[ [max..threshold[ (...) [threshold+max 2*threshold[ */
+            bitStream += count << bitCount;
+            bitCount  += nbBits;
+            bitCount  -= (count<max);
+            previous0 = (count==1);
+            while (remaining<threshold) nbBits--, threshold>>=1;
+        }
+        if (bitCount>16)
+        {
+            if ((!writeIsSafe) && (out > oend - 2)) return ERROR(dstSize_tooSmall);   /* Buffer overflow */
+            out[0] = (BYTE)bitStream;
+            out[1] = (BYTE)(bitStream>>8);
+            out += 2;
+            bitStream >>= 16;
+            bitCount -= 16;
+        }
+    }
+
+    /* flush remaining bitStream */
+    if ((!writeIsSafe) && (out > oend - 2)) return ERROR(dstSize_tooSmall);   /* Buffer overflow */
+    out[0] = (BYTE)bitStream;
+    out[1] = (BYTE)(bitStream>>8);
+    out+= (bitCount+7) /8;
+
+    if (charnum > maxSymbolValue + 1) return ERROR(GENERIC);
+
+    return (out-ostart);
+}
+
+
+size_t FSE_writeNCount (void* buffer, size_t bufferSize, const short* normalizedCounter, unsigned maxSymbolValue, unsigned tableLog)
+{
+    if (tableLog > FSE_MAX_TABLELOG) return ERROR(GENERIC);   /* Unsupported */
+    if (tableLog < FSE_MIN_TABLELOG) return ERROR(GENERIC);   /* Unsupported */
+
+    if (bufferSize < FSE_NCountWriteBound(maxSymbolValue, tableLog))
+        return FSE_writeNCount_generic(buffer, bufferSize, normalizedCounter, maxSymbolValue, tableLog, 0);
+
+    return FSE_writeNCount_generic(buffer, bufferSize, normalizedCounter, maxSymbolValue, tableLog, 1);
+}
+
+
+size_t FSE_readNCount (short* normalizedCounter, unsigned* maxSVPtr, unsigned* tableLogPtr,
+                 const void* headerBuffer, size_t hbSize)
+{
+    const BYTE* const istart = (const BYTE*) headerBuffer;
+    const BYTE* const iend = istart + hbSize;
+    const BYTE* ip = istart;
+    int nbBits;
+    int remaining;
+    int threshold;
+    U32 bitStream;
+    int bitCount;
+    unsigned charnum = 0;
+    int previous0 = 0;
+
+    if (hbSize < 4) return ERROR(srcSize_wrong);
+    bitStream = MEM_readLE32(ip);
+    nbBits = (bitStream & 0xF) + FSE_MIN_TABLELOG;   /* extract tableLog */
+    if (nbBits > FSE_TABLELOG_ABSOLUTE_MAX) return ERROR(tableLog_tooLarge);
+    bitStream >>= 4;
+    bitCount = 4;
+    *tableLogPtr = nbBits;
+    remaining = (1<<nbBits)+1;
+    threshold = 1<<nbBits;
+    nbBits++;
+
+    while ((remaining>1) && (charnum<=*maxSVPtr))
+    {
+        if (previous0)
+        {
+            unsigned n0 = charnum;
+            while ((bitStream & 0xFFFF) == 0xFFFF)
+            {
+                n0+=24;
+                if (ip < iend-5)
+                {
+                    ip+=2;
+                    bitStream = MEM_readLE32(ip) >> bitCount;
+                }
+                else
+                {
+                    bitStream >>= 16;
+                    bitCount+=16;
+                }
+            }
+            while ((bitStream & 3) == 3)
+            {
+                n0+=3;
+                bitStream>>=2;
+                bitCount+=2;
+            }
+            n0 += bitStream & 3;
+            bitCount += 2;
+            if (n0 > *maxSVPtr) return ERROR(maxSymbolValue_tooSmall);
+            while (charnum < n0) normalizedCounter[charnum++] = 0;
+            if ((ip <= iend-7) || (ip + (bitCount>>3) <= iend-4))
+            {
+                ip += bitCount>>3;
+                bitCount &= 7;
+                bitStream = MEM_readLE32(ip) >> bitCount;
+            }
+            else
+                bitStream >>= 2;
+        }
+        {
+            const short max = (short)((2*threshold-1)-remaining);
+            short count;
+
+            if ((bitStream & (threshold-1)) < (U32)max)
+            {
+                count = (short)(bitStream & (threshold-1));
+                bitCount   += nbBits-1;
+            }
+            else
+            {
+                count = (short)(bitStream & (2*threshold-1));
+                if (count >= threshold) count -= max;
+                bitCount   += nbBits;
+            }
+
+            count--;   /* extra accuracy */
+            remaining -= FSE_abs(count);
+            normalizedCounter[charnum++] = count;
+            previous0 = !count;
+            while (remaining < threshold)
+            {
+                nbBits--;
+                threshold >>= 1;
+            }
+
+            {
+                if ((ip <= iend-7) || (ip + (bitCount>>3) <= iend-4))
+                {
+                    ip += bitCount>>3;
+                    bitCount &= 7;
+                }
+                else
+                {
+                    bitCount -= (int)(8 * (iend - 4 - ip));
+					ip = iend - 4;
+				}
+                bitStream = MEM_readLE32(ip) >> (bitCount & 31);
+            }
+        }
+    }
+    if (remaining != 1) return ERROR(GENERIC);
+    *maxSVPtr = charnum-1;
+
+    ip += (bitCount+7)>>3;
+    if ((size_t)(ip-istart) > hbSize) return ERROR(srcSize_wrong);
+    return ip-istart;
+}
+
+
+/****************************************************************
+*  FSE Compression Code
+****************************************************************/
+/*
+FSE_CTable is a variable size structure which contains :
+    U16 tableLog;
+    U16 maxSymbolValue;
+    U16 nextStateNumber[1 << tableLog];                         // This size is variable
+    FSE_symbolCompressionTransform symbolTT[maxSymbolValue+1];  // This size is variable
+Allocation is manual, since C standard does not support variable-size structures.
+*/
+
+size_t FSE_sizeof_CTable (unsigned maxSymbolValue, unsigned tableLog)
+{
+    size_t size;
+    FSE_STATIC_ASSERT((size_t)FSE_CTABLE_SIZE_U32(FSE_MAX_TABLELOG, FSE_MAX_SYMBOL_VALUE)*4 >= sizeof(CTable_max_t));   /* A compilation error here means FSE_CTABLE_SIZE_U32 is not large enough */
+    if (tableLog > FSE_MAX_TABLELOG) return ERROR(GENERIC);
+    size = FSE_CTABLE_SIZE_U32 (tableLog, maxSymbolValue) * sizeof(U32);
+    return size;
+}
+
+FSE_CTable* FSE_createCTable (unsigned maxSymbolValue, unsigned tableLog)
+{
+    size_t size;
+    if (tableLog > FSE_TABLELOG_ABSOLUTE_MAX) tableLog = FSE_TABLELOG_ABSOLUTE_MAX;
+    size = FSE_CTABLE_SIZE_U32 (tableLog, maxSymbolValue) * sizeof(U32);
+    return (FSE_CTable*)malloc(size);
+}
+
+void  FSE_freeCTable (FSE_CTable* ct)
+{
+    free(ct);
+}
+
+
+/* provides the minimum logSize to safely represent a distribution */
+static unsigned FSE_minTableLog(size_t srcSize, unsigned maxSymbolValue)
+{
+	U32 minBitsSrc = BIT_highbit32((U32)(srcSize - 1)) + 1;
+	U32 minBitsSymbols = BIT_highbit32(maxSymbolValue) + 2;
+	U32 minBits = minBitsSrc < minBitsSymbols ? minBitsSrc : minBitsSymbols;
+	return minBits;
+}
+
+unsigned FSE_optimalTableLog(unsigned maxTableLog, size_t srcSize, unsigned maxSymbolValue)
+{
+	U32 maxBitsSrc = BIT_highbit32((U32)(srcSize - 1)) - 2;
+    U32 tableLog = maxTableLog;
+	U32 minBits = FSE_minTableLog(srcSize, maxSymbolValue);
+    if (tableLog==0) tableLog = FSE_DEFAULT_TABLELOG;
+	if (maxBitsSrc < tableLog) tableLog = maxBitsSrc;   /* Accuracy can be reduced */
+	if (minBits > tableLog) tableLog = minBits;   /* Need a minimum to safely represent all symbol values */
+    if (tableLog < FSE_MIN_TABLELOG) tableLog = FSE_MIN_TABLELOG;
+    if (tableLog > FSE_MAX_TABLELOG) tableLog = FSE_MAX_TABLELOG;
+    return tableLog;
+}
+
+
+/* Secondary normalization method.
+   To be used when primary method fails. */
+
+static size_t FSE_normalizeM2(short* norm, U32 tableLog, const unsigned* count, size_t total, U32 maxSymbolValue)
+{
+    U32 s;
+    U32 distributed = 0;
+    U32 ToDistribute;
+
+    /* Init */
+    U32 lowThreshold = (U32)(total >> tableLog);
+    U32 lowOne = (U32)((total * 3) >> (tableLog + 1));
+
+    for (s=0; s<=maxSymbolValue; s++)
+    {
+        if (count[s] == 0)
+        {
+            norm[s]=0;
+            continue;
+        }
+        if (count[s] <= lowThreshold)
+        {
+            norm[s] = -1;
+            distributed++;
+            total -= count[s];
+            continue;
+        }
+        if (count[s] <= lowOne)
+        {
+            norm[s] = 1;
+            distributed++;
+            total -= count[s];
+            continue;
+        }
+        norm[s]=-2;
+    }
+    ToDistribute = (1 << tableLog) - distributed;
+
+    if ((total / ToDistribute) > lowOne)
+    {
+        /* risk of rounding to zero */
+        lowOne = (U32)((total * 3) / (ToDistribute * 2));
+        for (s=0; s<=maxSymbolValue; s++)
+        {
+            if ((norm[s] == -2) && (count[s] <= lowOne))
+            {
+                norm[s] = 1;
+                distributed++;
+                total -= count[s];
+                continue;
+            }
+        }
+        ToDistribute = (1 << tableLog) - distributed;
+    }
+
+    if (distributed == maxSymbolValue+1)
+    {
+        /* all values are pretty poor;
+           probably incompressible data (should have already been detected);
+           find max, then give all remaining points to max */
+        U32 maxV = 0, maxC =0;
+        for (s=0; s<=maxSymbolValue; s++)
+            if (count[s] > maxC) maxV=s, maxC=count[s];
+        norm[maxV] += (short)ToDistribute;
+        return 0;
+    }
+
+    {
+        U64 const vStepLog = 62 - tableLog;
+        U64 const mid = (1ULL << (vStepLog-1)) - 1;
+        U64 const rStep = ((((U64)1<<vStepLog) * ToDistribute) + mid) / total;   /* scale on remaining */
+        U64 tmpTotal = mid;
+        for (s=0; s<=maxSymbolValue; s++)
+        {
+            if (norm[s]==-2)
+            {
+                U64 end = tmpTotal + (count[s] * rStep);
+                U32 sStart = (U32)(tmpTotal >> vStepLog);
+                U32 sEnd = (U32)(end >> vStepLog);
+                U32 weight = sEnd - sStart;
+                if (weight < 1)
+                    return ERROR(GENERIC);
+                norm[s] = (short)weight;
+                tmpTotal = end;
+            }
+        }
+    }
+
+    return 0;
+}
+
+
+size_t FSE_normalizeCount (short* normalizedCounter, unsigned tableLog,
+                           const unsigned* count, size_t total,
+                           unsigned maxSymbolValue)
+{
+    /* Sanity checks */
+    if (tableLog==0) tableLog = FSE_DEFAULT_TABLELOG;
+    if (tableLog < FSE_MIN_TABLELOG) return ERROR(GENERIC);   /* Unsupported size */
+    if (tableLog > FSE_MAX_TABLELOG) return ERROR(GENERIC);   /* Unsupported size */
+    if (tableLog < FSE_minTableLog(total, maxSymbolValue)) return ERROR(GENERIC);   /* Too small tableLog, compression potentially impossible */
+
+    {
+        U32 const rtbTable[] = {     0, 473195, 504333, 520860, 550000, 700000, 750000, 830000 };
+        U64 const scale = 62 - tableLog;
+        U64 const step = ((U64)1<<62) / total;   /* <== here, one division ! */
+        U64 const vStep = 1ULL<<(scale-20);
+        int stillToDistribute = 1<<tableLog;
+        unsigned s;
+        unsigned largest=0;
+        short largestP=0;
+        U32 lowThreshold = (U32)(total >> tableLog);
+
+        for (s=0; s<=maxSymbolValue; s++)
+        {
+            if (count[s] == total) return 0;
+            if (count[s] == 0)
+            {
+                normalizedCounter[s]=0;
+                continue;
+            }
+            if (count[s] <= lowThreshold)
+            {
+                normalizedCounter[s] = -1;
+                stillToDistribute--;
+            }
+            else
+            {
+                short proba = (short)((count[s]*step) >> scale);
+                if (proba<8)
+                {
+                    U64 restToBeat = vStep * rtbTable[proba];
+                    proba += (count[s]*step) - ((U64)proba<<scale) > restToBeat;
+                }
+                if (proba > largestP)
+                {
+                    largestP=proba;
+                    largest=s;
+                }
+                normalizedCounter[s] = proba;
+                stillToDistribute -= proba;
+            }
+        }
+        if (-stillToDistribute >= (normalizedCounter[largest] >> 1))
+        {
+            /* corner case, need another normalization method */
+            size_t errorCode = FSE_normalizeM2(normalizedCounter, tableLog, count, total, maxSymbolValue);
+            if (FSE_isError(errorCode)) return errorCode;
+        }
+        else normalizedCounter[largest] += (short)stillToDistribute;
+    }
+
+#if 0
+    {   /* Print Table (debug) */
+        U32 s;
+        U32 nTotal = 0;
+        for (s=0; s<=maxSymbolValue; s++)
+            printf("%3i: %4i \n", s, normalizedCounter[s]);
+        for (s=0; s<=maxSymbolValue; s++)
+            nTotal += abs(normalizedCounter[s]);
+        if (nTotal != (1U<<tableLog))
+            printf("Warning !!! Total == %u != %u !!!", nTotal, 1U<<tableLog);
+        getchar();
+    }
+#endif
+
+    return tableLog;
+}
+
+
+/* fake FSE_CTable, for raw (uncompressed) input */
+size_t FSE_buildCTable_raw (FSE_CTable* ct, unsigned nbBits)
+{
+    const unsigned tableSize = 1 << nbBits;
+    const unsigned tableMask = tableSize - 1;
+    const unsigned maxSymbolValue = tableMask;
+    U16* tableU16 = ( (U16*) ct) + 2;
+    FSE_symbolCompressionTransform* symbolTT = (FSE_symbolCompressionTransform*) ((((U32*)ct)+1) + (tableSize>>1));
+    unsigned s;
+
+    /* Sanity checks */
+    if (nbBits < 1) return ERROR(GENERIC);             /* min size */
+
+    /* header */
+    tableU16[-2] = (U16) nbBits;
+    tableU16[-1] = (U16) maxSymbolValue;
+
+    /* Build table */
+    for (s=0; s<tableSize; s++)
+        tableU16[s] = (U16)(tableSize + s);
+
+    /* Build Symbol Transformation Table */
+    for (s=0; s<=maxSymbolValue; s++)
+    {
+        symbolTT[s].deltaNbBits = nbBits << 16;
+        symbolTT[s].deltaFindState = s-1;
+    }
+
+    return 0;
+}
+
+/* fake FSE_CTable, for rle (100% always same symbol) input */
+size_t FSE_buildCTable_rle (FSE_CTable* ct, BYTE symbolValue)
+{
+    U16* tableU16 = ( (U16*) ct) + 2;
+    FSE_symbolCompressionTransform* symbolTT = (FSE_symbolCompressionTransform*) ((U32*)ct + 2);
+
+    /* header */
+    tableU16[-2] = (U16) 0;
+    tableU16[-1] = (U16) symbolValue;
+
+    /* Build table */
+    tableU16[0] = 0;
+    tableU16[1] = 0;   /* just in case */
+
+    /* Build Symbol Transformation Table */
+    {
+        symbolTT[symbolValue].deltaNbBits = 0;
+        symbolTT[symbolValue].deltaFindState = 0;
+    }
+
+    return 0;
+}
+
+
+static size_t FSE_compress_usingCTable_generic (void* dst, size_t dstSize,
+                           const void* src, size_t srcSize,
+                           const FSE_CTable* ct, const unsigned fast)
+{
+    const BYTE* const istart = (const BYTE*) src;
+    const BYTE* ip;
+    const BYTE* const iend = istart + srcSize;
+
+    size_t errorCode;
+    BIT_CStream_t bitC;
+    FSE_CState_t CState1, CState2;
+
+
+    /* init */
+    errorCode = BIT_initCStream(&bitC, dst, dstSize);
+    if (FSE_isError(errorCode)) return 0;
+    FSE_initCState(&CState1, ct);
+    CState2 = CState1;
+
+    ip=iend;
+
+#define FSE_FLUSHBITS(s)  (fast ? BIT_flushBitsFast(s) : BIT_flushBits(s))
+
+    /* join to even */
+    if (srcSize & 1)
+    {
+        FSE_encodeSymbol(&bitC, &CState1, *--ip);
+        FSE_FLUSHBITS(&bitC);
+    }
+
+    /* join to mod 4 */
+    if ((sizeof(bitC.bitContainer)*8 > FSE_MAX_TABLELOG*4+7 ) && (srcSize & 2))   /* test bit 2 */
+    {
+        FSE_encodeSymbol(&bitC, &CState2, *--ip);
+        FSE_encodeSymbol(&bitC, &CState1, *--ip);
+        FSE_FLUSHBITS(&bitC);
+    }
+
+    /* 2 or 4 encoding per loop */
+    for ( ; ip>istart ; )
+    {
+        FSE_encodeSymbol(&bitC, &CState2, *--ip);
+
+        if (sizeof(bitC.bitContainer)*8 < FSE_MAX_TABLELOG*2+7 )   /* this test must be static */
+            FSE_FLUSHBITS(&bitC);
+
+        FSE_encodeSymbol(&bitC, &CState1, *--ip);
+
+        if (sizeof(bitC.bitContainer)*8 > FSE_MAX_TABLELOG*4+7 )   /* this test must be static */
+        {
+            FSE_encodeSymbol(&bitC, &CState2, *--ip);
+            FSE_encodeSymbol(&bitC, &CState1, *--ip);
+        }
+
+        FSE_FLUSHBITS(&bitC);
+    }
+
+    FSE_flushCState(&bitC, &CState2);
+    FSE_flushCState(&bitC, &CState1);
+    return BIT_closeCStream(&bitC);
+}
+
+size_t FSE_compress_usingCTable (void* dst, size_t dstSize,
+                           const void* src, size_t srcSize,
+                           const FSE_CTable* ct)
+{
+    const unsigned fast = (dstSize >= FSE_BLOCKBOUND(srcSize));
+
+    if (fast)
+        return FSE_compress_usingCTable_generic(dst, dstSize, src, srcSize, ct, 1);
+    else
+        return FSE_compress_usingCTable_generic(dst, dstSize, src, srcSize, ct, 0);
+}
+
+
+size_t FSE_compressBound(size_t size) { return FSE_COMPRESSBOUND(size); }
+
+size_t FSE_compress2 (void* dst, size_t dstSize, const void* src, size_t srcSize, unsigned maxSymbolValue, unsigned tableLog)
+{
+    const BYTE* const istart = (const BYTE*) src;
+    const BYTE* ip = istart;
+
+    BYTE* const ostart = (BYTE*) dst;
+    BYTE* op = ostart;
+    BYTE* const oend = ostart + dstSize;
+
+    U32   count[FSE_MAX_SYMBOL_VALUE+1];
+    S16   norm[FSE_MAX_SYMBOL_VALUE+1];
+    CTable_max_t ct;
+    size_t errorCode;
+
+    /* init conditions */
+    if (srcSize <= 1) return 0;  /* Uncompressible */
+    if (!maxSymbolValue) maxSymbolValue = FSE_MAX_SYMBOL_VALUE;
+    if (!tableLog) tableLog = FSE_DEFAULT_TABLELOG;
+
+    /* Scan input and build symbol stats */
+    errorCode = FSE_count (count, &maxSymbolValue, ip, srcSize);
+    if (FSE_isError(errorCode)) return errorCode;
+    if (errorCode == srcSize) return 1;
+    if (errorCode == 1) return 0;   /* each symbol only present once */
+    if (errorCode < (srcSize >> 7)) return 0;   /* Heuristic : not compressible enough */
+
+    tableLog = FSE_optimalTableLog(tableLog, srcSize, maxSymbolValue);
+    errorCode = FSE_normalizeCount (norm, tableLog, count, srcSize, maxSymbolValue);
+    if (FSE_isError(errorCode)) return errorCode;
+
+    /* Write table description header */
+    errorCode = FSE_writeNCount (op, oend-op, norm, maxSymbolValue, tableLog);
+    if (FSE_isError(errorCode)) return errorCode;
+    op += errorCode;
+
+    /* Compress */
+    errorCode = FSE_buildCTable (ct, norm, maxSymbolValue, tableLog);
+    if (FSE_isError(errorCode)) return errorCode;
+    errorCode = FSE_compress_usingCTable(op, oend - op, ip, srcSize, ct);
+    if (errorCode == 0) return 0;   /* not enough space for compressed data */
+    op += errorCode;
+
+    /* check compressibility */
+    if ( (size_t)(op-ostart) >= srcSize-1 )
+        return 0;
+
+    return op-ostart;
+}
+
+size_t FSE_compress (void* dst, size_t dstSize, const void* src, size_t srcSize)
+{
+    return FSE_compress2(dst, dstSize, src, (U32)srcSize, FSE_MAX_SYMBOL_VALUE, FSE_DEFAULT_TABLELOG);
+}
+
+
+/*********************************************************
+*  Decompression (Byte symbols)
+*********************************************************/
+size_t FSE_buildDTable_rle (FSE_DTable* dt, BYTE symbolValue)
+{
+    FSE_DTableHeader* const DTableH = (FSE_DTableHeader*)dt;
+    FSE_decode_t* const cell = (FSE_decode_t*)(dt + 1);   /* because dt is unsigned */
+
+    DTableH->tableLog = 0;
+    DTableH->fastMode = 0;
+
+    cell->newState = 0;
+    cell->symbol = symbolValue;
+    cell->nbBits = 0;
+
+    return 0;
+}
+
+
+size_t FSE_buildDTable_raw (FSE_DTable* dt, unsigned nbBits)
+{
+    FSE_DTableHeader* const DTableH = (FSE_DTableHeader*)dt;
+    FSE_decode_t* const dinfo = (FSE_decode_t*)(dt + 1);   /* because dt is unsigned */
+    const unsigned tableSize = 1 << nbBits;
+    const unsigned tableMask = tableSize - 1;
+    const unsigned maxSymbolValue = tableMask;
+    unsigned s;
+
+    /* Sanity checks */
+    if (nbBits < 1) return ERROR(GENERIC);         /* min size */
+
+    /* Build Decoding Table */
+    DTableH->tableLog = (U16)nbBits;
+    DTableH->fastMode = 1;
+    for (s=0; s<=maxSymbolValue; s++)
+    {
+        dinfo[s].newState = 0;
+        dinfo[s].symbol = (BYTE)s;
+        dinfo[s].nbBits = (BYTE)nbBits;
+    }
+
+    return 0;
+}
+
+FORCE_INLINE size_t FSE_decompress_usingDTable_generic(
+          void* dst, size_t maxDstSize,
+    const void* cSrc, size_t cSrcSize,
+    const FSE_DTable* dt, const unsigned fast)
+{
+    BYTE* const ostart = (BYTE*) dst;
+    BYTE* op = ostart;
+    BYTE* const omax = op + maxDstSize;
+    BYTE* const olimit = omax-3;
+
+    BIT_DStream_t bitD;
+    FSE_DState_t state1;
+    FSE_DState_t state2;
+    size_t errorCode;
+
+    /* Init */
+    errorCode = BIT_initDStream(&bitD, cSrc, cSrcSize);   /* replaced last arg by maxCompressed Size */
+    if (FSE_isError(errorCode)) return errorCode;
+
+    FSE_initDState(&state1, &bitD, dt);
+    FSE_initDState(&state2, &bitD, dt);
+
+#define FSE_GETSYMBOL(statePtr) fast ? FSE_decodeSymbolFast(statePtr, &bitD) : FSE_decodeSymbol(statePtr, &bitD)
+
+    /* 4 symbols per loop */
+    for ( ; (BIT_reloadDStream(&bitD)==BIT_DStream_unfinished) && (op<olimit) ; op+=4)
+    {
+        op[0] = FSE_GETSYMBOL(&state1);
+
+        if (FSE_MAX_TABLELOG*2+7 > sizeof(bitD.bitContainer)*8)    /* This test must be static */
+            BIT_reloadDStream(&bitD);
+
+        op[1] = FSE_GETSYMBOL(&state2);
+
+        if (FSE_MAX_TABLELOG*4+7 > sizeof(bitD.bitContainer)*8)    /* This test must be static */
+            { if (BIT_reloadDStream(&bitD) > BIT_DStream_unfinished) { op+=2; break; } }
+
+        op[2] = FSE_GETSYMBOL(&state1);
+
+        if (FSE_MAX_TABLELOG*2+7 > sizeof(bitD.bitContainer)*8)    /* This test must be static */
+            BIT_reloadDStream(&bitD);
+
+        op[3] = FSE_GETSYMBOL(&state2);
+    }
+
+    /* tail */
+    /* note : BIT_reloadDStream(&bitD) >= FSE_DStream_partiallyFilled; Ends at exactly BIT_DStream_completed */
+    while (1)
+    {
+        if ( (BIT_reloadDStream(&bitD)>BIT_DStream_completed) || (op==omax) || (BIT_endOfDStream(&bitD) && (fast || FSE_endOfDState(&state1))) )
+            break;
+
+        *op++ = FSE_GETSYMBOL(&state1);
+
+        if ( (BIT_reloadDStream(&bitD)>BIT_DStream_completed) || (op==omax) || (BIT_endOfDStream(&bitD) && (fast || FSE_endOfDState(&state2))) )
+            break;
+
+        *op++ = FSE_GETSYMBOL(&state2);
+    }
+
+    /* end ? */
+    if (BIT_endOfDStream(&bitD) && FSE_endOfDState(&state1) && FSE_endOfDState(&state2))
+        return op-ostart;
+
+    if (op==omax) return ERROR(dstSize_tooSmall);   /* dst buffer is full, but cSrc unfinished */
+
+    return ERROR(corruption_detected);
+}
+
+
+size_t FSE_decompress_usingDTable(void* dst, size_t originalSize,
+                            const void* cSrc, size_t cSrcSize,
+                            const FSE_DTable* dt)
+{
+    const FSE_DTableHeader* DTableH = (const FSE_DTableHeader*)dt;
+    const U32 fastMode = DTableH->fastMode;
+
+    /* select fast mode (static) */
+    if (fastMode) return FSE_decompress_usingDTable_generic(dst, originalSize, cSrc, cSrcSize, dt, 1);
+    return FSE_decompress_usingDTable_generic(dst, originalSize, cSrc, cSrcSize, dt, 0);
+}
+
+
+size_t FSE_decompress(void* dst, size_t maxDstSize, const void* cSrc, size_t cSrcSize)
+{
+    const BYTE* const istart = (const BYTE*)cSrc;
+    const BYTE* ip = istart;
+    short counting[FSE_MAX_SYMBOL_VALUE+1];
+    DTable_max_t dt;   /* Static analyzer seems unable to understand this table will be properly initialized later */
+    unsigned tableLog;
+    unsigned maxSymbolValue = FSE_MAX_SYMBOL_VALUE;
+    size_t errorCode;
+
+    if (cSrcSize<2) return ERROR(srcSize_wrong);   /* too small input size */
+
+    /* normal FSE decoding mode */
+    errorCode = FSE_readNCount (counting, &maxSymbolValue, &tableLog, istart, cSrcSize);
+    if (FSE_isError(errorCode)) return errorCode;
+    if (errorCode >= cSrcSize) return ERROR(srcSize_wrong);   /* too small input size */
+    ip += errorCode;
+    cSrcSize -= errorCode;
+
+    errorCode = FSE_buildDTable (dt, counting, maxSymbolValue, tableLog);
+    if (FSE_isError(errorCode)) return errorCode;
+
+    /* always return, even if it is an error code */
+    return FSE_decompress_usingDTable (dst, maxDstSize, ip, cSrcSize, dt);
+}
+
+
+
+#endif   /* FSE_COMMONDEFS_ONLY */
diff --git a/lib/fse.h b/lib/fse.h
new file mode 100644
index 0000000..24d1d26
--- /dev/null
+++ b/lib/fse.h
@@ -0,0 +1,295 @@
+/* ******************************************************************
+   FSE : Finite State Entropy coder
+   header file
+   Copyright (C) 2013-2015, Yann Collet.
+
+   BSD 2-Clause License (http://www.opensource.org/licenses/bsd-license.php)
+
+   Redistribution and use in source and binary forms, with or without
+   modification, are permitted provided that the following conditions are
+   met:
+
+       * Redistributions of source code must retain the above copyright
+   notice, this list of conditions and the following disclaimer.
+       * Redistributions in binary form must reproduce the above
+   copyright notice, this list of conditions and the following disclaimer
+   in the documentation and/or other materials provided with the
+   distribution.
+
+   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+   You can contact the author at :
+   - Source repository : https://github.com/Cyan4973/FiniteStateEntropy
+   - Public forum : https://groups.google.com/forum/#!forum/lz4c
+****************************************************************** */
+#ifndef FSE_H
+#define FSE_H
+
+#if defined (__cplusplus)
+extern "C" {
+#endif
+
+
+/******************************************
+*  Includes
+******************************************/
+#include <stddef.h>    /* size_t, ptrdiff_t */
+
+
+/******************************************
+*  FSE simple functions
+******************************************/
+size_t FSE_compress(void* dst, size_t maxDstSize,
+              const void* src, size_t srcSize);
+size_t FSE_decompress(void* dst,  size_t maxDstSize,
+                const void* cSrc, size_t cSrcSize);
+/*
+FSE_compress():
+    Compress content of buffer 'src', of size 'srcSize', into destination buffer 'dst'.
+    'dst' buffer must be already allocated. Compression runs faster is maxDstSize >= FSE_compressBound(srcSize)
+    return : size of compressed data (<= maxDstSize)
+    Special values : if return == 0, srcData is not compressible => Nothing is stored within dst !!!
+                     if return == 1, srcData is a single byte symbol * srcSize times. Use RLE compression instead.
+                     if FSE_isError(return), compression failed (more details using FSE_getErrorName())
+
+FSE_decompress():
+    Decompress FSE data from buffer 'cSrc', of size 'cSrcSize',
+    into already allocated destination buffer 'dst', of size 'maxDstSize'.
+    return : size of regenerated data (<= maxDstSize)
+             or an error code, which can be tested using FSE_isError()
+
+    ** Important ** : FSE_decompress() doesn't decompress non-compressible nor RLE data !!!
+    Why ? : making this distinction requires a header.
+    Header management is intentionally delegated to the user layer, which can better manage special cases.
+*/
+
+
+/******************************************
+*  Tool functions
+******************************************/
+size_t FSE_compressBound(size_t size);       /* maximum compressed size */
+
+/* Error Management */
+unsigned    FSE_isError(size_t code);        /* tells if a return value is an error code */
+const char* FSE_getErrorName(size_t code);   /* provides error code string (useful for debugging) */
+
+
+/******************************************
+*  FSE advanced functions
+******************************************/
+/*
+FSE_compress2():
+    Same as FSE_compress(), but allows the selection of 'maxSymbolValue' and 'tableLog'
+    Both parameters can be defined as '0' to mean : use default value
+    return : size of compressed data
+    Special values : if return == 0, srcData is not compressible => Nothing is stored within cSrc !!!
+                     if return == 1, srcData is a single byte symbol * srcSize times. Use RLE compression.
+                     if FSE_isError(return), it's an error code.
+*/
+size_t FSE_compress2 (void* dst, size_t dstSize, const void* src, size_t srcSize, unsigned maxSymbolValue, unsigned tableLog);
+
+
+/******************************************
+*  FSE detailed API
+******************************************/
+/*
+FSE_compress() does the following:
+1. count symbol occurrence from source[] into table count[]
+2. normalize counters so that sum(count[]) == Power_of_2 (2^tableLog)
+3. save normalized counters to memory buffer using writeNCount()
+4. build encoding table 'CTable' from normalized counters
+5. encode the data stream using encoding table 'CTable'
+
+FSE_decompress() does the following:
+1. read normalized counters with readNCount()
+2. build decoding table 'DTable' from normalized counters
+3. decode the data stream using decoding table 'DTable'
+
+The following API allows targeting specific sub-functions for advanced tasks.
+For example, it's possible to compress several blocks using the same 'CTable',
+or to save and provide normalized distribution using external method.
+*/
+
+/* *** COMPRESSION *** */
+
+/*
+FSE_count():
+   Provides the precise count of each symbol within a table 'count'
+   'count' is a table of unsigned int, of minimum size (maxSymbolValuePtr[0]+1).
+   maxSymbolValuePtr[0] will be updated if detected smaller than initially expected
+   return : the count of the most frequent symbol (which is not identified)
+            if return == srcSize, there is only one symbol.
+            if FSE_isError(return), it's an error code. */
+size_t FSE_count(unsigned* count, unsigned* maxSymbolValuePtr, const unsigned char* src, size_t srcSize);
+
+/*
+FSE_optimalTableLog():
+   dynamically downsize 'tableLog' when conditions are met.
+   It saves CPU time, by using smaller tables, while preserving or even improving compression ratio.
+   return : recommended tableLog (necessarily <= initial 'tableLog') */
+unsigned FSE_optimalTableLog(unsigned tableLog, size_t srcSize, unsigned maxSymbolValue);
+
+/*
+FSE_normalizeCount():
+   normalize counters so that sum(count[]) == Power_of_2 (2^tableLog)
+   'normalizedCounter' is a table of short, of minimum size (maxSymbolValue+1).
+   return : tableLog,
+            or an errorCode, which can be tested using FSE_isError() */
+size_t FSE_normalizeCount(short* normalizedCounter, unsigned tableLog, const unsigned* count, size_t srcSize, unsigned maxSymbolValue);
+
+/*
+FSE_NCountWriteBound():
+   Provides the maximum possible size of an FSE normalized table, given 'maxSymbolValue' and 'tableLog'
+   Typically useful for allocation purpose. */
+size_t FSE_NCountWriteBound(unsigned maxSymbolValue, unsigned tableLog);
+
+/*
+FSE_writeNCount():
+   Compactly save 'normalizedCounter' into 'buffer'.
+   return : size of the compressed table
+            or an errorCode, which can be tested using FSE_isError() */
+size_t FSE_writeNCount (void* buffer, size_t bufferSize, const short* normalizedCounter, unsigned maxSymbolValue, unsigned tableLog);
+
+
+/*
+Constructor and Destructor of type FSE_CTable
+    Note that its size depends on 'tableLog' and 'maxSymbolValue' */
+typedef unsigned FSE_CTable;   /* don't allocate that. It's just a way to be more restrictive than void* */
+FSE_CTable* FSE_createCTable (unsigned tableLog, unsigned maxSymbolValue);
+void        FSE_freeCTable (FSE_CTable* ct);
+
+/*
+FSE_buildCTable():
+   Builds 'ct', which must be already allocated, using FSE_createCTable()
+   return : 0
+            or an errorCode, which can be tested using FSE_isError() */
+size_t FSE_buildCTable(FSE_CTable* ct, const short* normalizedCounter, unsigned maxSymbolValue, unsigned tableLog);
+
+/*
+FSE_compress_usingCTable():
+   Compress 'src' using 'ct' into 'dst' which must be already allocated
+   return : size of compressed data (<= maxDstSize)
+            or 0 if compressed data could not fit into 'dst'
+            or an errorCode, which can be tested using FSE_isError() */
+size_t FSE_compress_usingCTable (void* dst, size_t maxDstSize, const void* src, size_t srcSize, const FSE_CTable* ct);
+
+/*
+Tutorial :
+----------
+The first step is to count all symbols. FSE_count() does this job very fast.
+Result will be saved into 'count', a table of unsigned int, which must be already allocated, and have 'maxSymbolValuePtr[0]+1' cells.
+'src' is a table of bytes of size 'srcSize'. All values within 'src' MUST be <= maxSymbolValuePtr[0]
+maxSymbolValuePtr[0] will be updated, with its real value (necessarily <= original value)
+FSE_count() will return the number of occurrence of the most frequent symbol.
+This can be used to know if there is a single symbol within 'src', and to quickly evaluate its compressibility.
+If there is an error, the function will return an ErrorCode (which can be tested using FSE_isError()).
+
+The next step is to normalize the frequencies.
+FSE_normalizeCount() will ensure that sum of frequencies is == 2 ^'tableLog'.
+It also guarantees a minimum of 1 to any Symbol with frequency >= 1.
+You can use 'tableLog'==0 to mean "use default tableLog value".
+If you are unsure of which tableLog value to use, you can ask FSE_optimalTableLog(),
+which will provide the optimal valid tableLog given sourceSize, maxSymbolValue, and a user-defined maximum (0 means "default").
+
+The result of FSE_normalizeCount() will be saved into a table,
+called 'normalizedCounter', which is a table of signed short.
+'normalizedCounter' must be already allocated, and have at least 'maxSymbolValue+1' cells.
+The return value is tableLog if everything proceeded as expected.
+It is 0 if there is a single symbol within distribution.
+If there is an error (ex: invalid tableLog value), the function will return an ErrorCode (which can be tested using FSE_isError()).
+
+'normalizedCounter' can be saved in a compact manner to a memory area using FSE_writeNCount().
+'buffer' must be already allocated.
+For guaranteed success, buffer size must be at least FSE_headerBound().
+The result of the function is the number of bytes written into 'buffer'.
+If there is an error, the function will return an ErrorCode (which can be tested using FSE_isError(); ex : buffer size too small).
+
+'normalizedCounter' can then be used to create the compression table 'CTable'.
+The space required by 'CTable' must be already allocated, using FSE_createCTable().
+You can then use FSE_buildCTable() to fill 'CTable'.
+If there is an error, both functions will return an ErrorCode (which can be tested using FSE_isError()).
+
+'CTable' can then be used to compress 'src', with FSE_compress_usingCTable().
+Similar to FSE_count(), the convention is that 'src' is assumed to be a table of char of size 'srcSize'
+The function returns the size of compressed data (without header), necessarily <= maxDstSize.
+If it returns '0', compressed data could not fit into 'dst'.
+If there is an error, the function will return an ErrorCode (which can be tested using FSE_isError()).
+*/
+
+
+/* *** DECOMPRESSION *** */
+
+/*
+FSE_readNCount():
+   Read compactly saved 'normalizedCounter' from 'rBuffer'.
+   return : size read from 'rBuffer'
+            or an errorCode, which can be tested using FSE_isError()
+            maxSymbolValuePtr[0] and tableLogPtr[0] will also be updated with their respective values */
+size_t FSE_readNCount (short* normalizedCounter, unsigned* maxSymbolValuePtr, unsigned* tableLogPtr, const void* rBuffer, size_t rBuffSize);
+
+/*
+Constructor and Destructor of type FSE_DTable
+    Note that its size depends on 'tableLog' */
+typedef unsigned FSE_DTable;   /* don't allocate that. It's just a way to be more restrictive than void* */
+FSE_DTable* FSE_createDTable(unsigned tableLog);
+void        FSE_freeDTable(FSE_DTable* dt);
+
+/*
+FSE_buildDTable():
+   Builds 'dt', which must be already allocated, using FSE_createDTable()
+   return : 0,
+            or an errorCode, which can be tested using FSE_isError() */
+size_t FSE_buildDTable (FSE_DTable* dt, const short* normalizedCounter, unsigned maxSymbolValue, unsigned tableLog);
+
+/*
+FSE_decompress_usingDTable():
+   Decompress compressed source 'cSrc' of size 'cSrcSize' using 'dt'
+   into 'dst' which must be already allocated.
+   return : size of regenerated data (necessarily <= maxDstSize)
+            or an errorCode, which can be tested using FSE_isError() */
+size_t FSE_decompress_usingDTable(void* dst, size_t maxDstSize, const void* cSrc, size_t cSrcSize, const FSE_DTable* dt);
+
+/*
+Tutorial :
+----------
+(Note : these functions only decompress FSE-compressed blocks.
+ If block is uncompressed, use memcpy() instead
+ If block is a single repeated byte, use memset() instead )
+
+The first step is to obtain the normalized frequencies of symbols.
+This can be performed by FSE_readNCount() if it was saved using FSE_writeNCount().
+'normalizedCounter' must be already allocated, and have at least 'maxSymbolValuePtr[0]+1' cells of signed short.
+In practice, that means it's necessary to know 'maxSymbolValue' beforehand,
+or size the table to handle worst case situations (typically 256).
+FSE_readNCount() will provide 'tableLog' and 'maxSymbolValue'.
+The result of FSE_readNCount() is the number of bytes read from 'rBuffer'.
+Note that 'rBufferSize' must be at least 4 bytes, even if useful information is less than that.
+If there is an error, the function will return an error code, which can be tested using FSE_isError().
+
+The next step is to build the decompression tables 'FSE_DTable' from 'normalizedCounter'.
+This is performed by the function FSE_buildDTable().
+The space required by 'FSE_DTable' must be already allocated using FSE_createDTable().
+If there is an error, the function will return an error code, which can be tested using FSE_isError().
+
+'FSE_DTable' can then be used to decompress 'cSrc', with FSE_decompress_usingDTable().
+'cSrcSize' must be strictly correct, otherwise decompression will fail.
+FSE_decompress_usingDTable() result will tell how many bytes were regenerated (<=maxDstSize).
+If there is an error, the function will return an error code, which can be tested using FSE_isError(). (ex: dst buffer too small)
+*/
+
+
+#if defined (__cplusplus)
+}
+#endif
+
+#endif  /* FSE_H */
diff --git a/lib/fse_static.h b/lib/fse_static.h
new file mode 100644
index 0000000..01e2566
--- /dev/null
+++ b/lib/fse_static.h
@@ -0,0 +1,310 @@
+/* ******************************************************************
+   FSE : Finite State Entropy coder
+   header file for static linking (only)
+   Copyright (C) 2013-2015, Yann Collet
+
+   BSD 2-Clause License (http://www.opensource.org/licenses/bsd-license.php)
+
+   Redistribution and use in source and binary forms, with or without
+   modification, are permitted provided that the following conditions are
+   met:
+
+       * Redistributions of source code must retain the above copyright
+   notice, this list of conditions and the following disclaimer.
+       * Redistributions in binary form must reproduce the above
+   copyright notice, this list of conditions and the following disclaimer
+   in the documentation and/or other materials provided with the
+   distribution.
+
+   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+   You can contact the author at :
+   - Source repository : https://github.com/Cyan4973/FiniteStateEntropy
+   - Public forum : https://groups.google.com/forum/#!forum/lz4c
+****************************************************************** */
+#ifndef FSE_STATIC_H
+#define FSE_STATIC_H
+
+#if defined (__cplusplus)
+extern "C" {
+#endif
+
+
+/******************************************
+*  FSE API compatible with DLL
+******************************************/
+#include "fse.h"
+#include "bitstream.h"
+
+
+/******************************************
+*  Static allocation
+******************************************/
+/* FSE buffer bounds */
+#define FSE_NCOUNTBOUND 512
+#define FSE_BLOCKBOUND(size) (size + (size>>7))
+#define FSE_COMPRESSBOUND(size) (FSE_NCOUNTBOUND + FSE_BLOCKBOUND(size))   /* Macro version, useful for static allocation */
+
+/* You can statically allocate FSE CTable/DTable as a table of unsigned using below macro */
+#define FSE_CTABLE_SIZE_U32(maxTableLog, maxSymbolValue)   (1 + (1<<(maxTableLog-1)) + ((maxSymbolValue+1)*2))
+#define FSE_DTABLE_SIZE_U32(maxTableLog)                   (1 + (1<<maxTableLog))
+
+
+/******************************************
+*  FSE advanced API
+******************************************/
+size_t FSE_countFast(unsigned* count, unsigned* maxSymbolValuePtr, const unsigned char* src, size_t srcSize);
+/* same as FSE_count(), but blindly trust that all values within src are <= maxSymbolValuePtr[0] */
+
+size_t FSE_buildCTable_raw (FSE_CTable* ct, unsigned nbBits);
+/* build a fake FSE_CTable, designed to not compress an input, where each symbol uses nbBits */
+
+size_t FSE_buildCTable_rle (FSE_CTable* ct, unsigned char symbolValue);
+/* build a fake FSE_CTable, designed to compress always the same symbolValue */
+
+size_t FSE_buildDTable_raw (FSE_DTable* dt, unsigned nbBits);
+/* build a fake FSE_DTable, designed to read an uncompressed bitstream where each symbol uses nbBits */
+
+size_t FSE_buildDTable_rle (FSE_DTable* dt, unsigned char symbolValue);
+/* build a fake FSE_DTable, designed to always generate the same symbolValue */
+
+
+/******************************************
+*  FSE symbol compression API
+******************************************/
+/*
+   This API consists of small unitary functions, which highly benefit from being inlined.
+   You will want to enable link-time-optimization to ensure these functions are properly inlined in your binary.
+   Visual seems to do it automatically.
+   For gcc or clang, you'll need to add -flto flag at compilation and linking stages.
+   If none of these solutions is applicable, include "fse.c" directly.
+*/
+typedef struct
+{
+    ptrdiff_t   value;
+    const void* stateTable;
+    const void* symbolTT;
+    unsigned    stateLog;
+} FSE_CState_t;
+
+static void FSE_initCState(FSE_CState_t* CStatePtr, const FSE_CTable* ct);
+
+static void FSE_encodeSymbol(BIT_CStream_t* bitC, FSE_CState_t* CStatePtr, unsigned symbol);
+
+static void FSE_flushCState(BIT_CStream_t* bitC, const FSE_CState_t* CStatePtr);
+
+/*
+These functions are inner components of FSE_compress_usingCTable().
+They allow the creation of custom streams, mixing multiple tables and bit sources.
+
+A key property to keep in mind is that encoding and decoding are done **in reverse direction**.
+So the first symbol you will encode is the last you will decode, like a LIFO stack.
+
+You will need a few variables to track your CStream. They are :
+
+FSE_CTable    ct;         // Provided by FSE_buildCTable()
+BIT_CStream_t bitStream;  // bitStream tracking structure
+FSE_CState_t  state;      // State tracking structure (can have several)
+
+
+The first thing to do is to init bitStream and state.
+    size_t errorCode = BIT_initCStream(&bitStream, dstBuffer, maxDstSize);
+    FSE_initCState(&state, ct);
+
+Note that BIT_initCStream() can produce an error code, so its result should be tested, using FSE_isError();
+You can then encode your input data, byte after byte.
+FSE_encodeSymbol() outputs a maximum of 'tableLog' bits at a time.
+Remember decoding will be done in reverse direction.
+    FSE_encodeByte(&bitStream, &state, symbol);
+
+At any time, you can also add any bit sequence.
+Note : maximum allowed nbBits is 25, for compatibility with 32-bits decoders
+    BIT_addBits(&bitStream, bitField, nbBits);
+
+The above methods don't commit data to memory, they just store it into local register, for speed.
+Local register size is 64-bits on 64-bits systems, 32-bits on 32-bits systems (size_t).
+Writing data to memory is a manual operation, performed by the flushBits function.
+    BIT_flushBits(&bitStream);
+
+Your last FSE encoding operation shall be to flush your last state value(s).
+    FSE_flushState(&bitStream, &state);
+
+Finally, you must close the bitStream.
+The function returns the size of CStream in bytes.
+If data couldn't fit into dstBuffer, it will return a 0 ( == not compressible)
+If there is an error, it returns an errorCode (which can be tested using FSE_isError()).
+    size_t size = BIT_closeCStream(&bitStream);
+*/
+
+
+/******************************************
+*  FSE symbol decompression API
+******************************************/
+typedef struct
+{
+    size_t      state;
+    const void* table;   /* precise table may vary, depending on U16 */
+} FSE_DState_t;
+
+
+static void     FSE_initDState(FSE_DState_t* DStatePtr, BIT_DStream_t* bitD, const FSE_DTable* dt);
+
+static unsigned char FSE_decodeSymbol(FSE_DState_t* DStatePtr, BIT_DStream_t* bitD);
+
+static unsigned FSE_endOfDState(const FSE_DState_t* DStatePtr);
+
+/*
+Let's now decompose FSE_decompress_usingDTable() into its unitary components.
+You will decode FSE-encoded symbols from the bitStream,
+and also any other bitFields you put in, **in reverse order**.
+
+You will need a few variables to track your bitStream. They are :
+
+BIT_DStream_t DStream;    // Stream context
+FSE_DState_t  DState;     // State context. Multiple ones are possible
+FSE_DTable*   DTablePtr;  // Decoding table, provided by FSE_buildDTable()
+
+The first thing to do is to init the bitStream.
+    errorCode = BIT_initDStream(&DStream, srcBuffer, srcSize);
+
+You should then retrieve your initial state(s)
+(in reverse flushing order if you have several ones) :
+    errorCode = FSE_initDState(&DState, &DStream, DTablePtr);
+
+You can then decode your data, symbol after symbol.
+For information the maximum number of bits read by FSE_decodeSymbol() is 'tableLog'.
+Keep in mind that symbols are decoded in reverse order, like a LIFO stack (last in, first out).
+    unsigned char symbol = FSE_decodeSymbol(&DState, &DStream);
+
+You can retrieve any bitfield you eventually stored into the bitStream (in reverse order)
+Note : maximum allowed nbBits is 25, for 32-bits compatibility
+    size_t bitField = BIT_readBits(&DStream, nbBits);
+
+All above operations only read from local register (which size depends on size_t).
+Refueling the register from memory is manually performed by the reload method.
+    endSignal = FSE_reloadDStream(&DStream);
+
+BIT_reloadDStream() result tells if there is still some more data to read from DStream.
+BIT_DStream_unfinished : there is still some data left into the DStream.
+BIT_DStream_endOfBuffer : Dstream reached end of buffer. Its container may no longer be completely filled.
+BIT_DStream_completed : Dstream reached its exact end, corresponding in general to decompression completed.
+BIT_DStream_tooFar : Dstream went too far. Decompression result is corrupted.
+
+When reaching end of buffer (BIT_DStream_endOfBuffer), progress slowly, notably if you decode multiple symbols per loop,
+to properly detect the exact end of stream.
+After each decoded symbol, check if DStream is fully consumed using this simple test :
+    BIT_reloadDStream(&DStream) >= BIT_DStream_completed
+
+When it's done, verify decompression is fully completed, by checking both DStream and the relevant states.
+Checking if DStream has reached its end is performed by :
+    BIT_endOfDStream(&DStream);
+Check also the states. There might be some symbols left there, if some high probability ones (>50%) are possible.
+    FSE_endOfDState(&DState);
+*/
+
+
+/******************************************
+*  FSE unsafe API
+******************************************/
+static unsigned char FSE_decodeSymbolFast(FSE_DState_t* DStatePtr, BIT_DStream_t* bitD);
+/* faster, but works only if nbBits is always >= 1 (otherwise, result will be corrupted) */
+
+
+/******************************************
+*  Implementation of inline functions
+******************************************/
+typedef struct
+{
+    int deltaFindState;
+    U32 deltaNbBits;
+} FSE_symbolCompressionTransform; /* total 8 bytes */
+
+MEM_STATIC void FSE_initCState(FSE_CState_t* statePtr, const FSE_CTable* ct)
+{
+    const U32 tableLog = ( (const U16*) ct) [0];
+    statePtr->value = (ptrdiff_t)1<<tableLog;
+    statePtr->stateTable = ((const U16*) ct) + 2;
+    statePtr->symbolTT = (const void*)((const U32*)ct + 1 + (tableLog ? (1<<(tableLog-1)) : 1));
+    statePtr->stateLog = tableLog;
+}
+
+MEM_STATIC void FSE_encodeSymbol(BIT_CStream_t* bitC, FSE_CState_t* statePtr, U32 symbol)
+{
+    const FSE_symbolCompressionTransform symbolTT = ((const FSE_symbolCompressionTransform*)(statePtr->symbolTT))[symbol];
+    const U16* const stateTable = (const U16*)(statePtr->stateTable);
+    U32 nbBitsOut  = (U32)((statePtr->value + symbolTT.deltaNbBits) >> 16);
+    BIT_addBits(bitC, statePtr->value, nbBitsOut);
+    statePtr->value = stateTable[ (statePtr->value >> nbBitsOut) + symbolTT.deltaFindState];
+}
+
+MEM_STATIC void FSE_flushCState(BIT_CStream_t* bitC, const FSE_CState_t* statePtr)
+{
+    BIT_addBits(bitC, statePtr->value, statePtr->stateLog);
+    BIT_flushBits(bitC);
+}
+
+/* decompression */
+
+typedef struct {
+    U16 tableLog;
+    U16 fastMode;
+} FSE_DTableHeader;   /* sizeof U32 */
+
+typedef struct
+{
+    unsigned short newState;
+    unsigned char  symbol;
+    unsigned char  nbBits;
+} FSE_decode_t;   /* size == U32 */
+
+MEM_STATIC void FSE_initDState(FSE_DState_t* DStatePtr, BIT_DStream_t* bitD, const FSE_DTable* dt)
+{
+    const FSE_DTableHeader* const DTableH = (const FSE_DTableHeader*)dt;
+    DStatePtr->state = BIT_readBits(bitD, DTableH->tableLog);
+    BIT_reloadDStream(bitD);
+    DStatePtr->table = dt + 1;
+}
+
+MEM_STATIC BYTE FSE_decodeSymbol(FSE_DState_t* DStatePtr, BIT_DStream_t* bitD)
+{
+    const FSE_decode_t DInfo = ((const FSE_decode_t*)(DStatePtr->table))[DStatePtr->state];
+    const U32  nbBits = DInfo.nbBits;
+    BYTE symbol = DInfo.symbol;
+    size_t lowBits = BIT_readBits(bitD, nbBits);
+
+    DStatePtr->state = DInfo.newState + lowBits;
+    return symbol;
+}
+
+MEM_STATIC BYTE FSE_decodeSymbolFast(FSE_DState_t* DStatePtr, BIT_DStream_t* bitD)
+{
+    const FSE_decode_t DInfo = ((const FSE_decode_t*)(DStatePtr->table))[DStatePtr->state];
+    const U32 nbBits = DInfo.nbBits;
+    BYTE symbol = DInfo.symbol;
+    size_t lowBits = BIT_readBitsFast(bitD, nbBits);
+
+    DStatePtr->state = DInfo.newState + lowBits;
+    return symbol;
+}
+
+MEM_STATIC unsigned FSE_endOfDState(const FSE_DState_t* DStatePtr)
+{
+    return DStatePtr->state == 0;
+}
+
+
+#if defined (__cplusplus)
+}
+#endif
+
+#endif  /* FSE_STATIC_H */
diff --git a/lib/huff0.c b/lib/huff0.c
new file mode 100644
index 0000000..3ddb7e5
--- /dev/null
+++ b/lib/huff0.c
@@ -0,0 +1,1718 @@
+/* ******************************************************************
+   Huff0 : Huffman coder, part of New Generation Entropy library
+   Copyright (C) 2013-2015, Yann Collet.
+
+   BSD 2-Clause License (http://www.opensource.org/licenses/bsd-license.php)
+
+   Redistribution and use in source and binary forms, with or without
+   modification, are permitted provided that the following conditions are
+   met:
+
+       * Redistributions of source code must retain the above copyright
+   notice, this list of conditions and the following disclaimer.
+       * Redistributions in binary form must reproduce the above
+   copyright notice, this list of conditions and the following disclaimer
+   in the documentation and/or other materials provided with the
+   distribution.
+
+   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+    You can contact the author at :
+    - FSE+Huff0 source repository : https://github.com/Cyan4973/FiniteStateEntropy
+    - Public forum : https://groups.google.com/forum/#!forum/lz4c
+****************************************************************** */
+
+/****************************************************************
+*  Compiler specifics
+****************************************************************/
+#if defined (__cplusplus) || (defined (__STDC_VERSION__) && (__STDC_VERSION__ >= 199901L) /* C99 */)
+/* inline is defined */
+#elif defined(_MSC_VER)
+#  define inline __inline
+#else
+#  define inline /* disable inline */
+#endif
+
+
+#ifdef _MSC_VER    /* Visual Studio */
+#  define FORCE_INLINE static __forceinline
+#  pragma warning(disable : 4127)        /* disable: C4127: conditional expression is constant */
+#else
+#  ifdef __GNUC__
+#    define GCC_VERSION (__GNUC__ * 100 + __GNUC_MINOR__)
+#    define FORCE_INLINE static inline __attribute__((always_inline))
+#  else
+#    define FORCE_INLINE static inline
+#  endif
+#endif
+
+
+/****************************************************************
+*  Includes
+****************************************************************/
+#include <stdlib.h>     /* malloc, free, qsort */
+#include <string.h>     /* memcpy, memset */
+#include <stdio.h>      /* printf (debug) */
+#include "huff0_static.h"
+#include "bitstream.h"
+#include "fse.h"        /* header compression */
+
+
+/****************************************************************
+*  Constants
+****************************************************************/
+#define HUF_ABSOLUTEMAX_TABLELOG  16   /* absolute limit of HUF_MAX_TABLELOG. Beyond that value, code does not work */
+#define HUF_MAX_TABLELOG  12           /* max configured tableLog (for static allocation); can be modified up to HUF_ABSOLUTEMAX_TABLELOG */
+#define HUF_DEFAULT_TABLELOG  HUF_MAX_TABLELOG   /* tableLog by default, when not specified */
+#define HUF_MAX_SYMBOL_VALUE 255
+#if (HUF_MAX_TABLELOG > HUF_ABSOLUTEMAX_TABLELOG)
+#  error "HUF_MAX_TABLELOG is too large !"
+#endif
+
+
+/****************************************************************
+*  Error Management
+****************************************************************/
+#define HUF_STATIC_ASSERT(c) { enum { HUF_static_assert = 1/(int)(!!(c)) }; }   /* use only *after* variable declarations */
+
+
+/******************************************
+*  Helper functions
+******************************************/
+unsigned HUF_isError(size_t code) { return ERR_isError(code); }
+
+const char* HUF_getErrorName(size_t code) { return ERR_getErrorName(code); }
+
+
+/*********************************************************
+*  Huff0 : Huffman block compression
+*********************************************************/
+typedef struct HUF_CElt_s {
+  U16  val;
+  BYTE nbBits;
+} HUF_CElt ;
+
+typedef struct nodeElt_s {
+    U32 count;
+    U16 parent;
+    BYTE byte;
+    BYTE nbBits;
+} nodeElt;
+
+/*! HUF_writeCTable() :
+    @dst : destination buffer
+    @CTable : huffman tree to save, using huff0 representation
+    @return : size of saved CTable */
+size_t HUF_writeCTable (void* dst, size_t maxDstSize, const HUF_CElt* CTable, U32 maxSymbolValue, U32 huffLog)
+{
+    BYTE bitsToWeight[HUF_MAX_TABLELOG + 1];
+    BYTE huffWeight[HUF_MAX_SYMBOL_VALUE + 1];
+    U32 n;
+    BYTE* op = (BYTE*)dst;
+    size_t size;
+
+     /* check conditions */
+    if (maxSymbolValue > HUF_MAX_SYMBOL_VALUE + 1)
+        return ERROR(GENERIC);
+
+    /* convert to weight */
+    bitsToWeight[0] = 0;
+    for (n=1; n<=huffLog; n++)
+        bitsToWeight[n] = (BYTE)(huffLog + 1 - n);
+    for (n=0; n<maxSymbolValue; n++)
+        huffWeight[n] = bitsToWeight[CTable[n].nbBits];
+
+    size = FSE_compress(op+1, maxDstSize-1, huffWeight, maxSymbolValue);   /* don't need last symbol stat : implied */
+    if (HUF_isError(size)) return size;
+    if (size >= 128) return ERROR(GENERIC);   /* should never happen, since maxSymbolValue <= 255 */
+    if ((size <= 1) || (size >= maxSymbolValue/2))
+    {
+        if (size==1)   /* RLE */
+        {
+            /* only possible case : serie of 1 (because there are at least 2) */
+            /* can only be 2^n or (2^n-1), otherwise not an huffman tree */
+            BYTE code;
+            switch(maxSymbolValue)
+            {
+            case 1: code = 0; break;
+            case 2: code = 1; break;
+            case 3: code = 2; break;
+            case 4: code = 3; break;
+            case 7: code = 4; break;
+            case 8: code = 5; break;
+            case 15: code = 6; break;
+            case 16: code = 7; break;
+            case 31: code = 8; break;
+            case 32: code = 9; break;
+            case 63: code = 10; break;
+            case 64: code = 11; break;
+            case 127: code = 12; break;
+            case 128: code = 13; break;
+            default : return ERROR(corruption_detected);
+            }
+            op[0] = (BYTE)(255-13 + code);
+            return 1;
+        }
+         /* Not compressible */
+        if (maxSymbolValue > (241-128)) return ERROR(GENERIC);   /* not implemented (not possible with current format) */
+        if (((maxSymbolValue+1)/2) + 1 > maxDstSize) return ERROR(dstSize_tooSmall);   /* not enough space within dst buffer */
+        op[0] = (BYTE)(128 /*special case*/ + 0 /* Not Compressible */ + (maxSymbolValue-1));
+        huffWeight[maxSymbolValue] = 0;   /* to be sure it doesn't cause issue in final combination */
+        for (n=0; n<maxSymbolValue; n+=2)
+            op[(n/2)+1] = (BYTE)((huffWeight[n] << 4) + huffWeight[n+1]);
+        return ((maxSymbolValue+1)/2) + 1;
+    }
+
+    /* normal header case */
+    op[0] = (BYTE)size;
+    return size+1;
+}
+
+
+static U32 HUF_setMaxHeight(nodeElt* huffNode, U32 lastNonNull, U32 maxNbBits)
+{
+    int totalCost = 0;
+    const U32 largestBits = huffNode[lastNonNull].nbBits;
+
+    /* early exit : all is fine */
+    if (largestBits <= maxNbBits) return largestBits;
+
+    /* there are several too large elements (at least >= 2) */
+    {
+        const U32 baseCost = 1 << (largestBits - maxNbBits);
+        U32 n = lastNonNull;
+
+        while (huffNode[n].nbBits > maxNbBits)
+        {
+            totalCost += baseCost - (1 << (largestBits - huffNode[n].nbBits));
+            huffNode[n].nbBits = (BYTE)maxNbBits;
+            n --;
+        } /* n stops at huffNode[n].nbBits <= maxNbBits */
+        while (huffNode[n].nbBits == maxNbBits) n--;   /* n end at index of smallest symbol using (maxNbBits-1) */
+
+        /* renorm totalCost */
+        totalCost >>= (largestBits - maxNbBits);  /* note : totalCost is necessarily a multiple of baseCost */
+
+        /* repay normalized cost */
+        {
+            const U32 noSymbol = 0xF0F0F0F0;
+            U32 rankLast[HUF_MAX_TABLELOG+1];
+            U32 currentNbBits = maxNbBits;
+            int pos;
+
+            /* Get pos of last (smallest) symbol per rank */
+            memset(rankLast, 0xF0, sizeof(rankLast));
+            for (pos=n ; pos >= 0; pos--)
+            {
+                if (huffNode[pos].nbBits >= currentNbBits) continue;
+                currentNbBits = huffNode[pos].nbBits;   /* < maxNbBits */
+                rankLast[maxNbBits-currentNbBits] = pos;
+            }
+
+            while (totalCost > 0)
+            {
+                U32 nBitsToDecrease = BIT_highbit32(totalCost) + 1;
+                for ( ; nBitsToDecrease > 1; nBitsToDecrease--)
+                {
+                    U32 highPos = rankLast[nBitsToDecrease];
+                    U32 lowPos = rankLast[nBitsToDecrease-1];
+                    if (highPos == noSymbol) continue;
+                    if (lowPos == noSymbol) break;
+                    {
+                        U32 highTotal = huffNode[highPos].count;
+                        U32 lowTotal = 2 * huffNode[lowPos].count;
+                        if (highTotal <= lowTotal) break;
+                    }
+                }
+                /* only triggered when no more rank 1 symbol left => find closest one (note : there is necessarily at least one !) */
+                while ((nBitsToDecrease<=HUF_MAX_TABLELOG) && (rankLast[nBitsToDecrease] == noSymbol))  /* HUF_MAX_TABLELOG test just to please gcc 5+; but it should not be necessary */
+                    nBitsToDecrease ++;
+                totalCost -= 1 << (nBitsToDecrease-1);
+                if (rankLast[nBitsToDecrease-1] == noSymbol)
+                    rankLast[nBitsToDecrease-1] = rankLast[nBitsToDecrease];   /* this rank is no longer empty */
+                huffNode[rankLast[nBitsToDecrease]].nbBits ++;
+                if (rankLast[nBitsToDecrease] == 0)    /* special case, reached largest symbol */
+                    rankLast[nBitsToDecrease] = noSymbol;
+                else
+                {
+                    rankLast[nBitsToDecrease]--;
+                    if (huffNode[rankLast[nBitsToDecrease]].nbBits != maxNbBits-nBitsToDecrease)
+                        rankLast[nBitsToDecrease] = noSymbol;   /* this rank is now empty */
+                }
+            }
+
+            while (totalCost < 0)   /* Sometimes, cost correction overshoot */
+            {
+                if (rankLast[1] == noSymbol)   /* special case : no rank 1 symbol (using maxNbBits-1); let's create one from largest rank 0 (using maxNbBits) */
+                {
+                    while (huffNode[n].nbBits == maxNbBits) n--;
+                    huffNode[n+1].nbBits--;
+                    rankLast[1] = n+1;
+                    totalCost++;
+                    continue;
+                }
+                huffNode[ rankLast[1] + 1 ].nbBits--;
+                rankLast[1]++;
+                totalCost ++;
+            }
+        }
+    }
+
+    return maxNbBits;
+}
+
+
+typedef struct {
+    U32 base;
+    U32 current;
+} rankPos;
+
+static void HUF_sort(nodeElt* huffNode, const U32* count, U32 maxSymbolValue)
+{
+    rankPos rank[32];
+    U32 n;
+
+    memset(rank, 0, sizeof(rank));
+    for (n=0; n<=maxSymbolValue; n++)
+    {
+        U32 r = BIT_highbit32(count[n] + 1);
+        rank[r].base ++;
+    }
+    for (n=30; n>0; n--) rank[n-1].base += rank[n].base;
+    for (n=0; n<32; n++) rank[n].current = rank[n].base;
+    for (n=0; n<=maxSymbolValue; n++)
+    {
+        U32 c = count[n];
+        U32 r = BIT_highbit32(c+1) + 1;
+        U32 pos = rank[r].current++;
+        while ((pos > rank[r].base) && (c > huffNode[pos-1].count)) huffNode[pos]=huffNode[pos-1], pos--;
+        huffNode[pos].count = c;
+        huffNode[pos].byte  = (BYTE)n;
+    }
+}
+
+
+#define STARTNODE (HUF_MAX_SYMBOL_VALUE+1)
+size_t HUF_buildCTable (HUF_CElt* tree, const U32* count, U32 maxSymbolValue, U32 maxNbBits)
+{
+    nodeElt huffNode0[2*HUF_MAX_SYMBOL_VALUE+1 +1];
+    nodeElt* huffNode = huffNode0 + 1;
+    U32 n, nonNullRank;
+    int lowS, lowN;
+    U16 nodeNb = STARTNODE;
+    U32 nodeRoot;
+
+    /* safety checks */
+    if (maxNbBits == 0) maxNbBits = HUF_DEFAULT_TABLELOG;
+    if (maxSymbolValue > HUF_MAX_SYMBOL_VALUE) return ERROR(GENERIC);
+    memset(huffNode0, 0, sizeof(huffNode0));
+
+    /* sort, decreasing order */
+    HUF_sort(huffNode, count, maxSymbolValue);
+
+    // init for parents
+    nonNullRank = maxSymbolValue;
+    while(huffNode[nonNullRank].count == 0) nonNullRank--;
+    lowS = nonNullRank; nodeRoot = nodeNb + lowS - 1; lowN = nodeNb;
+    huffNode[nodeNb].count = huffNode[lowS].count + huffNode[lowS-1].count;
+    huffNode[lowS].parent = huffNode[lowS-1].parent = nodeNb;
+    nodeNb++; lowS-=2;
+    for (n=nodeNb; n<=nodeRoot; n++) huffNode[n].count = (U32)(1U<<30);
+    huffNode0[0].count = (U32)(1U<<31);
+
+    // create parents
+    while (nodeNb <= nodeRoot)
+    {
+        U32 n1 = (huffNode[lowS].count < huffNode[lowN].count) ? lowS-- : lowN++;
+        U32 n2 = (huffNode[lowS].count < huffNode[lowN].count) ? lowS-- : lowN++;
+        huffNode[nodeNb].count = huffNode[n1].count + huffNode[n2].count;
+        huffNode[n1].parent = huffNode[n2].parent = nodeNb;
+        nodeNb++;
+    }
+
+    // distribute weights (unlimited tree height)
+    huffNode[nodeRoot].nbBits = 0;
+    for (n=nodeRoot-1; n>=STARTNODE; n--)
+        huffNode[n].nbBits = huffNode[ huffNode[n].parent ].nbBits + 1;
+    for (n=0; n<=nonNullRank; n++)
+        huffNode[n].nbBits = huffNode[ huffNode[n].parent ].nbBits + 1;
+
+    /* enforce maxTableLog */
+    maxNbBits = HUF_setMaxHeight(huffNode, nonNullRank, maxNbBits);
+
+    /* fill result into tree (val, nbBits) */
+    {
+        U16 nbPerRank[HUF_MAX_TABLELOG+1] = {0};
+        U16 valPerRank[HUF_MAX_TABLELOG+1] = {0};
+        if (maxNbBits > HUF_MAX_TABLELOG) return ERROR(GENERIC);   /* check fit into table */
+        for (n=0; n<=nonNullRank; n++)
+            nbPerRank[huffNode[n].nbBits]++;
+        {
+            /* determine stating value per rank */
+            U16 min = 0;
+            for (n=maxNbBits; n>0; n--)
+            {
+                valPerRank[n] = min;      /* get starting value within each rank */
+                min += nbPerRank[n];
+                min >>= 1;
+            }
+        }
+        for (n=0; n<=maxSymbolValue; n++)
+            tree[huffNode[n].byte].nbBits = huffNode[n].nbBits;   // push nbBits per symbol, symbol order
+        for (n=0; n<=maxSymbolValue; n++)
+            tree[n].val = valPerRank[tree[n].nbBits]++;   // assign value within rank, symbol order
+    }
+
+    return maxNbBits;
+}
+
+static void HUF_encodeSymbol(BIT_CStream_t* bitCPtr, U32 symbol, const HUF_CElt* CTable)
+{
+    BIT_addBitsFast(bitCPtr, CTable[symbol].val, CTable[symbol].nbBits);
+}
+
+size_t HUF_compressBound(size_t size) { return HUF_COMPRESSBOUND(size); }
+
+#define HUF_FLUSHBITS(s)  (fast ? BIT_flushBitsFast(s) : BIT_flushBits(s))
+
+#define HUF_FLUSHBITS_1(stream) \
+    if (sizeof((stream)->bitContainer)*8 < HUF_MAX_TABLELOG*2+7) HUF_FLUSHBITS(stream)
+
+#define HUF_FLUSHBITS_2(stream) \
+    if (sizeof((stream)->bitContainer)*8 < HUF_MAX_TABLELOG*4+7) HUF_FLUSHBITS(stream)
+
+size_t HUF_compress_usingCTable(void* dst, size_t dstSize, const void* src, size_t srcSize, const HUF_CElt* CTable)
+{
+    const BYTE* ip = (const BYTE*) src;
+    BYTE* const ostart = (BYTE*)dst;
+    BYTE* op = ostart;
+    BYTE* const oend = ostart + dstSize;
+    size_t n;
+    const unsigned fast = (dstSize >= HUF_BLOCKBOUND(srcSize));
+    size_t errorCode;
+    BIT_CStream_t bitC;
+
+    /* init */
+    if (dstSize < 8) return 0;   /* not enough space to compress */
+    errorCode = BIT_initCStream(&bitC, op, oend-op);
+    if (HUF_isError(errorCode)) return 0;
+
+    n = srcSize & ~3;  /* join to mod 4 */
+    switch (srcSize & 3)
+    {
+        case 3 : HUF_encodeSymbol(&bitC, ip[n+ 2], CTable);
+                 HUF_FLUSHBITS_2(&bitC);
+        case 2 : HUF_encodeSymbol(&bitC, ip[n+ 1], CTable);
+                 HUF_FLUSHBITS_1(&bitC);
+        case 1 : HUF_encodeSymbol(&bitC, ip[n+ 0], CTable);
+                 HUF_FLUSHBITS(&bitC);
+        case 0 :
+        default: ;
+    }
+
+    for (; n>0; n-=4)   /* note : n&3==0 at this stage */
+    {
+        HUF_encodeSymbol(&bitC, ip[n- 1], CTable);
+        HUF_FLUSHBITS_1(&bitC);
+        HUF_encodeSymbol(&bitC, ip[n- 2], CTable);
+        HUF_FLUSHBITS_2(&bitC);
+        HUF_encodeSymbol(&bitC, ip[n- 3], CTable);
+        HUF_FLUSHBITS_1(&bitC);
+        HUF_encodeSymbol(&bitC, ip[n- 4], CTable);
+        HUF_FLUSHBITS(&bitC);
+    }
+
+    return BIT_closeCStream(&bitC);
+}
+
+
+static size_t HUF_compress_into4Segments(void* dst, size_t dstSize, const void* src, size_t srcSize, const HUF_CElt* CTable)
+{
+    size_t segmentSize = (srcSize+3)/4;   /* first 3 segments */
+    size_t errorCode;
+    const BYTE* ip = (const BYTE*) src;
+    const BYTE* const iend = ip + srcSize;
+    BYTE* const ostart = (BYTE*) dst;
+    BYTE* op = ostart;
+    BYTE* const oend = ostart + dstSize;
+
+    if (dstSize < 6 + 1 + 1 + 1 + 8) return 0;   /* minimum space to compress successfully */
+    if (srcSize < 12) return 0;   /* no saving possible : too small input */
+    op += 6;   /* jumpTable */
+
+    errorCode = HUF_compress_usingCTable(op, oend-op, ip, segmentSize, CTable);
+    if (HUF_isError(errorCode)) return errorCode;
+    if (errorCode==0) return 0;
+    MEM_writeLE16(ostart, (U16)errorCode);
+
+    ip += segmentSize;
+    op += errorCode;
+    errorCode = HUF_compress_usingCTable(op, oend-op, ip, segmentSize, CTable);
+    if (HUF_isError(errorCode)) return errorCode;
+    if (errorCode==0) return 0;
+    MEM_writeLE16(ostart+2, (U16)errorCode);
+
+    ip += segmentSize;
+    op += errorCode;
+    errorCode = HUF_compress_usingCTable(op, oend-op, ip, segmentSize, CTable);
+    if (HUF_isError(errorCode)) return errorCode;
+    if (errorCode==0) return 0;
+    MEM_writeLE16(ostart+4, (U16)errorCode);
+
+    ip += segmentSize;
+    op += errorCode;
+    errorCode = HUF_compress_usingCTable(op, oend-op, ip, iend-ip, CTable);
+    if (HUF_isError(errorCode)) return errorCode;
+    if (errorCode==0) return 0;
+
+    op += errorCode;
+    return op-ostart;
+}
+
+
+size_t HUF_compress2 (void* dst, size_t dstSize,
+                const void* src, size_t srcSize,
+                unsigned maxSymbolValue, unsigned huffLog)
+{
+    BYTE* const ostart = (BYTE*)dst;
+    BYTE* op = ostart;
+    BYTE* const oend = ostart + dstSize;
+
+    U32 count[HUF_MAX_SYMBOL_VALUE+1];
+    HUF_CElt CTable[HUF_MAX_SYMBOL_VALUE+1];
+    size_t errorCode;
+
+    /* checks & inits */
+    if (srcSize < 1) return 0;  /* Uncompressed */
+    if (dstSize < 1) return 0;  /* not compressible within dst budget */
+    if (srcSize > 128 * 1024) return ERROR(srcSize_wrong);   /* current block size limit */
+    if (huffLog > HUF_MAX_TABLELOG) return ERROR(tableLog_tooLarge);
+    if (!maxSymbolValue) maxSymbolValue = HUF_MAX_SYMBOL_VALUE;
+    if (!huffLog) huffLog = HUF_DEFAULT_TABLELOG;
+
+    /* Scan input and build symbol stats */
+    errorCode = FSE_count (count, &maxSymbolValue, (const BYTE*)src, srcSize);
+    if (HUF_isError(errorCode)) return errorCode;
+    if (errorCode == srcSize) { *ostart = ((const BYTE*)src)[0]; return 1; }
+    if (errorCode <= (srcSize >> 7)+1) return 0;   /* Heuristic : not compressible enough */
+
+    /* Build Huffman Tree */
+    errorCode = HUF_buildCTable (CTable, count, maxSymbolValue, huffLog);
+    if (HUF_isError(errorCode)) return errorCode;
+    huffLog = (U32)errorCode;
+
+    /* Write table description header */
+    errorCode = HUF_writeCTable (op, dstSize, CTable, maxSymbolValue, huffLog);
+    if (HUF_isError(errorCode)) return errorCode;
+    if (errorCode + 12 >= srcSize) return 0;   /* not useful to try compression */
+    op += errorCode;
+
+    /* Compress */
+    //errorCode = HUF_compress_usingCTable(op, oend - op, src, srcSize, CTable);   /* single segment */
+    errorCode = HUF_compress_into4Segments(op, oend - op, src, srcSize, CTable);
+    if (HUF_isError(errorCode)) return errorCode;
+    if (errorCode==0) return 0;
+    op += errorCode;
+
+    /* check compressibility */
+    if ((size_t)(op-ostart) >= srcSize-1)
+        return 0;
+
+    return op-ostart;
+}
+
+size_t HUF_compress (void* dst, size_t maxDstSize, const void* src, size_t srcSize)
+{
+    return HUF_compress2(dst, maxDstSize, src, (U32)srcSize, 255, HUF_DEFAULT_TABLELOG);
+}
+
+
+/*********************************************************
+*  Huff0 : Huffman block decompression
+*********************************************************/
+typedef struct { BYTE byte; BYTE nbBits; } HUF_DEltX2;   /* single-symbol decoding */
+
+typedef struct { U16 sequence; BYTE nbBits; BYTE length; } HUF_DEltX4;  /* double-symbols decoding */
+
+typedef struct { BYTE symbol; BYTE weight; } sortedSymbol_t;
+
+/*! HUF_readStats
+    Read compact Huffman tree, saved by HUF_writeCTable
+    @huffWeight : destination buffer
+    @return : size read from `src`
+*/
+static size_t HUF_readStats(BYTE* huffWeight, size_t hwSize, U32* rankStats,
+                            U32* nbSymbolsPtr, U32* tableLogPtr,
+                            const void* src, size_t srcSize)
+{
+    U32 weightTotal;
+    U32 tableLog;
+    const BYTE* ip = (const BYTE*) src;
+    size_t iSize = ip[0];
+    size_t oSize;
+    U32 n;
+
+    //memset(huffWeight, 0, hwSize);   /* is not necessary, even though some analyzer complain ... */
+
+    if (iSize >= 128)  /* special header */
+    {
+        if (iSize >= (242))   /* RLE */
+        {
+            static int l[14] = { 1, 2, 3, 4, 7, 8, 15, 16, 31, 32, 63, 64, 127, 128 };
+            oSize = l[iSize-242];
+            memset(huffWeight, 1, hwSize);
+            iSize = 0;
+        }
+        else   /* Incompressible */
+        {
+            oSize = iSize - 127;
+            iSize = ((oSize+1)/2);
+            if (iSize+1 > srcSize) return ERROR(srcSize_wrong);
+            if (oSize >= hwSize) return ERROR(corruption_detected);
+            ip += 1;
+            for (n=0; n<oSize; n+=2)
+            {
+                huffWeight[n]   = ip[n/2] >> 4;
+                huffWeight[n+1] = ip[n/2] & 15;
+            }
+        }
+    }
+    else  /* header compressed with FSE (normal case) */
+    {
+        if (iSize+1 > srcSize) return ERROR(srcSize_wrong);
+        oSize = FSE_decompress(huffWeight, hwSize-1, ip+1, iSize);   /* max (hwSize-1) values decoded, as last one is implied */
+        if (FSE_isError(oSize)) return oSize;
+    }
+
+    /* collect weight stats */
+    memset(rankStats, 0, (HUF_ABSOLUTEMAX_TABLELOG + 1) * sizeof(U32));
+    weightTotal = 0;
+    for (n=0; n<oSize; n++)
+    {
+        if (huffWeight[n] >= HUF_ABSOLUTEMAX_TABLELOG) return ERROR(corruption_detected);
+        rankStats[huffWeight[n]]++;
+        weightTotal += (1 << huffWeight[n]) >> 1;
+    }
+
+    /* get last non-null symbol weight (implied, total must be 2^n) */
+    tableLog = BIT_highbit32(weightTotal) + 1;
+    if (tableLog > HUF_ABSOLUTEMAX_TABLELOG) return ERROR(corruption_detected);
+    {
+        U32 total = 1 << tableLog;
+        U32 rest = total - weightTotal;
+        U32 verif = 1 << BIT_highbit32(rest);
+        U32 lastWeight = BIT_highbit32(rest) + 1;
+        if (verif != rest) return ERROR(corruption_detected);    /* last value must be a clean power of 2 */
+        huffWeight[oSize] = (BYTE)lastWeight;
+        rankStats[lastWeight]++;
+    }
+
+    /* check tree construction validity */
+    if ((rankStats[1] < 2) || (rankStats[1] & 1)) return ERROR(corruption_detected);   /* by construction : at least 2 elts of rank 1, must be even */
+
+    /* results */
+    *nbSymbolsPtr = (U32)(oSize+1);
+    *tableLogPtr = tableLog;
+    return iSize+1;
+}
+
+
+/**************************/
+/* single-symbol decoding */
+/**************************/
+
+size_t HUF_readDTableX2 (U16* DTable, const void* src, size_t srcSize)
+{
+    BYTE huffWeight[HUF_MAX_SYMBOL_VALUE + 1];
+    U32 rankVal[HUF_ABSOLUTEMAX_TABLELOG + 1];   /* large enough for values from 0 to 16 */
+    U32 tableLog = 0;
+    const BYTE* ip = (const BYTE*) src;
+    size_t iSize = ip[0];
+    U32 nbSymbols = 0;
+    U32 n;
+    U32 nextRankStart;
+    HUF_DEltX2* const dt = (HUF_DEltX2*)(DTable + 1);
+
+    HUF_STATIC_ASSERT(sizeof(HUF_DEltX2) == sizeof(U16));   /* if compilation fails here, assertion is false */
+    //memset(huffWeight, 0, sizeof(huffWeight));   /* is not necessary, even though some analyzer complain ... */
+
+    iSize = HUF_readStats(huffWeight, HUF_MAX_SYMBOL_VALUE + 1, rankVal, &nbSymbols, &tableLog, src, srcSize);
+    if (HUF_isError(iSize)) return iSize;
+
+    /* check result */
+    if (tableLog > DTable[0]) return ERROR(tableLog_tooLarge);   /* DTable is too small */
+    DTable[0] = (U16)tableLog;   /* maybe should separate sizeof DTable, as allocated, from used size of DTable, in case of DTable re-use */
+
+    /* Prepare ranks */
+    nextRankStart = 0;
+    for (n=1; n<=tableLog; n++)
+    {
+        U32 current = nextRankStart;
+        nextRankStart += (rankVal[n] << (n-1));
+        rankVal[n] = current;
+    }
+
+    /* fill DTable */
+    for (n=0; n<nbSymbols; n++)
+    {
+        const U32 w = huffWeight[n];
+        const U32 length = (1 << w) >> 1;
+        U32 i;
+        HUF_DEltX2 D;
+        D.byte = (BYTE)n; D.nbBits = (BYTE)(tableLog + 1 - w);
+        for (i = rankVal[w]; i < rankVal[w] + length; i++)
+            dt[i] = D;
+        rankVal[w] += length;
+    }
+
+    return iSize;
+}
+
+static BYTE HUF_decodeSymbolX2(BIT_DStream_t* Dstream, const HUF_DEltX2* dt, const U32 dtLog)
+{
+        const size_t val = BIT_lookBitsFast(Dstream, dtLog); /* note : dtLog >= 1 */
+        const BYTE c = dt[val].byte;
+        BIT_skipBits(Dstream, dt[val].nbBits);
+        return c;
+}
+
+#define HUF_DECODE_SYMBOLX2_0(ptr, DStreamPtr) \
+    *ptr++ = HUF_decodeSymbolX2(DStreamPtr, dt, dtLog)
+
+#define HUF_DECODE_SYMBOLX2_1(ptr, DStreamPtr) \
+    if (MEM_64bits() || (HUF_MAX_TABLELOG<=12)) \
+        HUF_DECODE_SYMBOLX2_0(ptr, DStreamPtr)
+
+#define HUF_DECODE_SYMBOLX2_2(ptr, DStreamPtr) \
+    if (MEM_64bits()) \
+        HUF_DECODE_SYMBOLX2_0(ptr, DStreamPtr)
+
+static inline size_t HUF_decodeStreamX2(BYTE* p, BIT_DStream_t* const bitDPtr, BYTE* const pEnd, const HUF_DEltX2* const dt, const U32 dtLog)
+{
+    BYTE* const pStart = p;
+
+    /* up to 4 symbols at a time */
+    while ((BIT_reloadDStream(bitDPtr) == BIT_DStream_unfinished) && (p <= pEnd-4))
+    {
+        HUF_DECODE_SYMBOLX2_2(p, bitDPtr);
+        HUF_DECODE_SYMBOLX2_1(p, bitDPtr);
+        HUF_DECODE_SYMBOLX2_2(p, bitDPtr);
+        HUF_DECODE_SYMBOLX2_0(p, bitDPtr);
+    }
+
+    /* closer to the end */
+    while ((BIT_reloadDStream(bitDPtr) == BIT_DStream_unfinished) && (p < pEnd))
+        HUF_DECODE_SYMBOLX2_0(p, bitDPtr);
+
+    /* no more data to retrieve from bitstream, hence no need to reload */
+    while (p < pEnd)
+        HUF_DECODE_SYMBOLX2_0(p, bitDPtr);
+
+    return pEnd-pStart;
+}
+
+size_t HUF_decompress1X2_usingDTable(
+          void* dst,  size_t dstSize,
+    const void* cSrc, size_t cSrcSize,
+    const U16* DTable)
+{
+    BYTE* op = (BYTE*)dst;
+    BYTE* const oend = op + dstSize;
+    size_t errorCode;
+    const U32 dtLog = DTable[0];
+    const HUF_DEltX2* const dt = ((const HUF_DEltX2*)DTable) +1;
+    BIT_DStream_t bitD;
+    errorCode = BIT_initDStream(&bitD, cSrc, cSrcSize);
+    if (HUF_isError(errorCode)) return errorCode;
+
+    HUF_decodeStreamX2(op, &bitD, oend, dt, dtLog);
+
+    /* check */
+    if (!BIT_endOfDStream(&bitD)) return ERROR(corruption_detected);
+
+    return dstSize;
+}
+
+size_t HUF_decompress1X2 (void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize)
+{
+    HUF_CREATE_STATIC_DTABLEX2(DTable, HUF_MAX_TABLELOG);
+    const BYTE* ip = (const BYTE*) cSrc;
+    size_t errorCode;
+
+    errorCode = HUF_readDTableX2 (DTable, cSrc, cSrcSize);
+    if (HUF_isError(errorCode)) return errorCode;
+    if (errorCode >= cSrcSize) return ERROR(srcSize_wrong);
+    ip += errorCode;
+    cSrcSize -= errorCode;
+
+    return HUF_decompress1X2_usingDTable (dst, dstSize, ip, cSrcSize, DTable);
+}
+
+
+size_t HUF_decompress4X2_usingDTable(
+          void* dst,  size_t dstSize,
+    const void* cSrc, size_t cSrcSize,
+    const U16* DTable)
+{
+    if (cSrcSize < 10) return ERROR(corruption_detected);   /* strict minimum : jump table + 1 byte per stream */
+
+    {
+        const BYTE* const istart = (const BYTE*) cSrc;
+        BYTE* const ostart = (BYTE*) dst;
+        BYTE* const oend = ostart + dstSize;
+
+        const HUF_DEltX2* const dt = ((const HUF_DEltX2*)DTable) +1;
+        const U32 dtLog = DTable[0];
+        size_t errorCode;
+
+        /* Init */
+        BIT_DStream_t bitD1;
+        BIT_DStream_t bitD2;
+        BIT_DStream_t bitD3;
+        BIT_DStream_t bitD4;
+        const size_t length1 = MEM_readLE16(istart);
+        const size_t length2 = MEM_readLE16(istart+2);
+        const size_t length3 = MEM_readLE16(istart+4);
+        size_t length4;
+        const BYTE* const istart1 = istart + 6;  /* jumpTable */
+        const BYTE* const istart2 = istart1 + length1;
+        const BYTE* const istart3 = istart2 + length2;
+        const BYTE* const istart4 = istart3 + length3;
+        const size_t segmentSize = (dstSize+3) / 4;
+        BYTE* const opStart2 = ostart + segmentSize;
+        BYTE* const opStart3 = opStart2 + segmentSize;
+        BYTE* const opStart4 = opStart3 + segmentSize;
+        BYTE* op1 = ostart;
+        BYTE* op2 = opStart2;
+        BYTE* op3 = opStart3;
+        BYTE* op4 = opStart4;
+        U32 endSignal;
+
+        length4 = cSrcSize - (length1 + length2 + length3 + 6);
+        if (length4 > cSrcSize) return ERROR(corruption_detected);   /* overflow */
+        errorCode = BIT_initDStream(&bitD1, istart1, length1);
+        if (HUF_isError(errorCode)) return errorCode;
+        errorCode = BIT_initDStream(&bitD2, istart2, length2);
+        if (HUF_isError(errorCode)) return errorCode;
+        errorCode = BIT_initDStream(&bitD3, istart3, length3);
+        if (HUF_isError(errorCode)) return errorCode;
+        errorCode = BIT_initDStream(&bitD4, istart4, length4);
+        if (HUF_isError(errorCode)) return errorCode;
+
+        /* 16-32 symbols per loop (4-8 symbols per stream) */
+        endSignal = BIT_reloadDStream(&bitD1) | BIT_reloadDStream(&bitD2) | BIT_reloadDStream(&bitD3) | BIT_reloadDStream(&bitD4);
+        for ( ; (endSignal==BIT_DStream_unfinished) && (op4<(oend-7)) ; )
+        {
+            HUF_DECODE_SYMBOLX2_2(op1, &bitD1);
+            HUF_DECODE_SYMBOLX2_2(op2, &bitD2);
+            HUF_DECODE_SYMBOLX2_2(op3, &bitD3);
+            HUF_DECODE_SYMBOLX2_2(op4, &bitD4);
+            HUF_DECODE_SYMBOLX2_1(op1, &bitD1);
+            HUF_DECODE_SYMBOLX2_1(op2, &bitD2);
+            HUF_DECODE_SYMBOLX2_1(op3, &bitD3);
+            HUF_DECODE_SYMBOLX2_1(op4, &bitD4);
+            HUF_DECODE_SYMBOLX2_2(op1, &bitD1);
+            HUF_DECODE_SYMBOLX2_2(op2, &bitD2);
+            HUF_DECODE_SYMBOLX2_2(op3, &bitD3);
+            HUF_DECODE_SYMBOLX2_2(op4, &bitD4);
+            HUF_DECODE_SYMBOLX2_0(op1, &bitD1);
+            HUF_DECODE_SYMBOLX2_0(op2, &bitD2);
+            HUF_DECODE_SYMBOLX2_0(op3, &bitD3);
+            HUF_DECODE_SYMBOLX2_0(op4, &bitD4);
+
+            endSignal = BIT_reloadDStream(&bitD1) | BIT_reloadDStream(&bitD2) | BIT_reloadDStream(&bitD3) | BIT_reloadDStream(&bitD4);
+        }
+
+        /* check corruption */
+        if (op1 > opStart2) return ERROR(corruption_detected);
+        if (op2 > opStart3) return ERROR(corruption_detected);
+        if (op3 > opStart4) return ERROR(corruption_detected);
+        /* note : op4 supposed already verified within main loop */
+
+        /* finish bitStreams one by one */
+        HUF_decodeStreamX2(op1, &bitD1, opStart2, dt, dtLog);
+        HUF_decodeStreamX2(op2, &bitD2, opStart3, dt, dtLog);
+        HUF_decodeStreamX2(op3, &bitD3, opStart4, dt, dtLog);
+        HUF_decodeStreamX2(op4, &bitD4, oend,     dt, dtLog);
+
+        /* check */
+        endSignal = BIT_endOfDStream(&bitD1) & BIT_endOfDStream(&bitD2) & BIT_endOfDStream(&bitD3) & BIT_endOfDStream(&bitD4);
+        if (!endSignal) return ERROR(corruption_detected);
+
+        /* decoded size */
+        return dstSize;
+    }
+}
+
+
+size_t HUF_decompress4X2 (void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize)
+{
+    HUF_CREATE_STATIC_DTABLEX2(DTable, HUF_MAX_TABLELOG);
+    const BYTE* ip = (const BYTE*) cSrc;
+    size_t errorCode;
+
+    errorCode = HUF_readDTableX2 (DTable, cSrc, cSrcSize);
+    if (HUF_isError(errorCode)) return errorCode;
+    if (errorCode >= cSrcSize) return ERROR(srcSize_wrong);
+    ip += errorCode;
+    cSrcSize -= errorCode;
+
+    return HUF_decompress4X2_usingDTable (dst, dstSize, ip, cSrcSize, DTable);
+}
+
+
+/***************************/
+/* double-symbols decoding */
+/***************************/
+
+static void HUF_fillDTableX4Level2(HUF_DEltX4* DTable, U32 sizeLog, const U32 consumed,
+                           const U32* rankValOrigin, const int minWeight,
+                           const sortedSymbol_t* sortedSymbols, const U32 sortedListSize,
+                           U32 nbBitsBaseline, U16 baseSeq)
+{
+    HUF_DEltX4 DElt;
+    U32 rankVal[HUF_ABSOLUTEMAX_TABLELOG + 1];
+    U32 s;
+
+    /* get pre-calculated rankVal */
+    memcpy(rankVal, rankValOrigin, sizeof(rankVal));
+
+    /* fill skipped values */
+    if (minWeight>1)
+    {
+        U32 i, skipSize = rankVal[minWeight];
+        MEM_writeLE16(&(DElt.sequence), baseSeq);
+        DElt.nbBits   = (BYTE)(consumed);
+        DElt.length   = 1;
+        for (i = 0; i < skipSize; i++)
+            DTable[i] = DElt;
+    }
+
+    /* fill DTable */
+    for (s=0; s<sortedListSize; s++)   /* note : sortedSymbols already skipped */
+    {
+        const U32 symbol = sortedSymbols[s].symbol;
+        const U32 weight = sortedSymbols[s].weight;
+        const U32 nbBits = nbBitsBaseline - weight;
+        const U32 length = 1 << (sizeLog-nbBits);
+        const U32 start = rankVal[weight];
+        U32 i = start;
+        const U32 end = start + length;
+
+        MEM_writeLE16(&(DElt.sequence), (U16)(baseSeq + (symbol << 8)));
+        DElt.nbBits = (BYTE)(nbBits + consumed);
+        DElt.length = 2;
+        do { DTable[i++] = DElt; } while (i<end);   /* since length >= 1 */
+
+        rankVal[weight] += length;
+    }
+}
+
+typedef U32 rankVal_t[HUF_ABSOLUTEMAX_TABLELOG][HUF_ABSOLUTEMAX_TABLELOG + 1];
+
+static void HUF_fillDTableX4(HUF_DEltX4* DTable, const U32 targetLog,
+                           const sortedSymbol_t* sortedList, const U32 sortedListSize,
+                           const U32* rankStart, rankVal_t rankValOrigin, const U32 maxWeight,
+                           const U32 nbBitsBaseline)
+{
+    U32 rankVal[HUF_ABSOLUTEMAX_TABLELOG + 1];
+    const int scaleLog = nbBitsBaseline - targetLog;   /* note : targetLog >= srcLog, hence scaleLog <= 1 */
+    const U32 minBits  = nbBitsBaseline - maxWeight;
+    U32 s;
+
+    memcpy(rankVal, rankValOrigin, sizeof(rankVal));
+
+    /* fill DTable */
+    for (s=0; s<sortedListSize; s++)
+    {
+        const U16 symbol = sortedList[s].symbol;
+        const U32 weight = sortedList[s].weight;
+        const U32 nbBits = nbBitsBaseline - weight;
+        const U32 start = rankVal[weight];
+        const U32 length = 1 << (targetLog-nbBits);
+
+        if (targetLog-nbBits >= minBits)   /* enough room for a second symbol */
+        {
+            U32 sortedRank;
+            int minWeight = nbBits + scaleLog;
+            if (minWeight < 1) minWeight = 1;
+            sortedRank = rankStart[minWeight];
+            HUF_fillDTableX4Level2(DTable+start, targetLog-nbBits, nbBits,
+                           rankValOrigin[nbBits], minWeight,
+                           sortedList+sortedRank, sortedListSize-sortedRank,
+                           nbBitsBaseline, symbol);
+        }
+        else
+        {
+            U32 i;
+            const U32 end = start + length;
+            HUF_DEltX4 DElt;
+
+            MEM_writeLE16(&(DElt.sequence), symbol);
+            DElt.nbBits   = (BYTE)(nbBits);
+            DElt.length   = 1;
+            for (i = start; i < end; i++)
+                DTable[i] = DElt;
+        }
+        rankVal[weight] += length;
+    }
+}
+
+size_t HUF_readDTableX4 (U32* DTable, const void* src, size_t srcSize)
+{
+    BYTE weightList[HUF_MAX_SYMBOL_VALUE + 1];
+    sortedSymbol_t sortedSymbol[HUF_MAX_SYMBOL_VALUE + 1];
+    U32 rankStats[HUF_ABSOLUTEMAX_TABLELOG + 1] = { 0 };
+    U32 rankStart0[HUF_ABSOLUTEMAX_TABLELOG + 2] = { 0 };
+    U32* const rankStart = rankStart0+1;
+    rankVal_t rankVal;
+    U32 tableLog, maxW, sizeOfSort, nbSymbols;
+    const U32 memLog = DTable[0];
+    const BYTE* ip = (const BYTE*) src;
+    size_t iSize = ip[0];
+    HUF_DEltX4* const dt = ((HUF_DEltX4*)DTable) + 1;
+
+    HUF_STATIC_ASSERT(sizeof(HUF_DEltX4) == sizeof(U32));   /* if compilation fails here, assertion is false */
+    if (memLog > HUF_ABSOLUTEMAX_TABLELOG) return ERROR(tableLog_tooLarge);
+    //memset(weightList, 0, sizeof(weightList));   /* is not necessary, even though some analyzer complain ... */
+
+    iSize = HUF_readStats(weightList, HUF_MAX_SYMBOL_VALUE + 1, rankStats, &nbSymbols, &tableLog, src, srcSize);
+    if (HUF_isError(iSize)) return iSize;
+
+    /* check result */
+    if (tableLog > memLog) return ERROR(tableLog_tooLarge);   /* DTable can't fit code depth */
+
+    /* find maxWeight */
+    for (maxW = tableLog; rankStats[maxW]==0; maxW--) {}  /* necessarily finds a solution before 0 */
+
+    /* Get start index of each weight */
+    {
+        U32 w, nextRankStart = 0;
+        for (w=1; w<=maxW; w++)
+        {
+            U32 current = nextRankStart;
+            nextRankStart += rankStats[w];
+            rankStart[w] = current;
+        }
+        rankStart[0] = nextRankStart;   /* put all 0w symbols at the end of sorted list*/
+        sizeOfSort = nextRankStart;
+    }
+
+    /* sort symbols by weight */
+    {
+        U32 s;
+        for (s=0; s<nbSymbols; s++)
+        {
+            U32 w = weightList[s];
+            U32 r = rankStart[w]++;
+            sortedSymbol[r].symbol = (BYTE)s;
+            sortedSymbol[r].weight = (BYTE)w;
+        }
+        rankStart[0] = 0;   /* forget 0w symbols; this is beginning of weight(1) */
+    }
+
+    /* Build rankVal */
+    {
+        const U32 minBits = tableLog+1 - maxW;
+        U32 nextRankVal = 0;
+        U32 w, consumed;
+        const int rescale = (memLog-tableLog) - 1;   /* tableLog <= memLog */
+        U32* rankVal0 = rankVal[0];
+        for (w=1; w<=maxW; w++)
+        {
+            U32 current = nextRankVal;
+            nextRankVal += rankStats[w] << (w+rescale);
+            rankVal0[w] = current;
+        }
+        for (consumed = minBits; consumed <= memLog - minBits; consumed++)
+        {
+            U32* rankValPtr = rankVal[consumed];
+            for (w = 1; w <= maxW; w++)
+            {
+                rankValPtr[w] = rankVal0[w] >> consumed;
+            }
+        }
+    }
+
+    HUF_fillDTableX4(dt, memLog,
+                   sortedSymbol, sizeOfSort,
+                   rankStart0, rankVal, maxW,
+                   tableLog+1);
+
+    return iSize;
+}
+
+
+static U32 HUF_decodeSymbolX4(void* op, BIT_DStream_t* DStream, const HUF_DEltX4* dt, const U32 dtLog)
+{
+    const size_t val = BIT_lookBitsFast(DStream, dtLog);   /* note : dtLog >= 1 */
+    memcpy(op, dt+val, 2);
+    BIT_skipBits(DStream, dt[val].nbBits);
+    return dt[val].length;
+}
+
+static U32 HUF_decodeLastSymbolX4(void* op, BIT_DStream_t* DStream, const HUF_DEltX4* dt, const U32 dtLog)
+{
+    const size_t val = BIT_lookBitsFast(DStream, dtLog);   /* note : dtLog >= 1 */
+    memcpy(op, dt+val, 1);
+    if (dt[val].length==1) BIT_skipBits(DStream, dt[val].nbBits);
+    else
+    {
+        if (DStream->bitsConsumed < (sizeof(DStream->bitContainer)*8))
+        {
+            BIT_skipBits(DStream, dt[val].nbBits);
+            if (DStream->bitsConsumed > (sizeof(DStream->bitContainer)*8))
+                DStream->bitsConsumed = (sizeof(DStream->bitContainer)*8);   /* ugly hack; works only because it's the last symbol. Note : can't easily extract nbBits from just this symbol */
+        }
+    }
+    return 1;
+}
+
+
+#define HUF_DECODE_SYMBOLX4_0(ptr, DStreamPtr) \
+    ptr += HUF_decodeSymbolX4(ptr, DStreamPtr, dt, dtLog)
+
+#define HUF_DECODE_SYMBOLX4_1(ptr, DStreamPtr) \
+    if (MEM_64bits() || (HUF_MAX_TABLELOG<=12)) \
+        ptr += HUF_decodeSymbolX4(ptr, DStreamPtr, dt, dtLog)
+
+#define HUF_DECODE_SYMBOLX4_2(ptr, DStreamPtr) \
+    if (MEM_64bits()) \
+        ptr += HUF_decodeSymbolX4(ptr, DStreamPtr, dt, dtLog)
+
+static inline size_t HUF_decodeStreamX4(BYTE* p, BIT_DStream_t* bitDPtr, BYTE* const pEnd, const HUF_DEltX4* const dt, const U32 dtLog)
+{
+    BYTE* const pStart = p;
+
+    /* up to 8 symbols at a time */
+    while ((BIT_reloadDStream(bitDPtr) == BIT_DStream_unfinished) && (p < pEnd-7))
+    {
+        HUF_DECODE_SYMBOLX4_2(p, bitDPtr);
+        HUF_DECODE_SYMBOLX4_1(p, bitDPtr);
+        HUF_DECODE_SYMBOLX4_2(p, bitDPtr);
+        HUF_DECODE_SYMBOLX4_0(p, bitDPtr);
+    }
+
+    /* closer to the end */
+    while ((BIT_reloadDStream(bitDPtr) == BIT_DStream_unfinished) && (p <= pEnd-2))
+        HUF_DECODE_SYMBOLX4_0(p, bitDPtr);
+
+    while (p <= pEnd-2)
+        HUF_DECODE_SYMBOLX4_0(p, bitDPtr);   /* no need to reload : reached the end of DStream */
+
+    if (p < pEnd)
+        p += HUF_decodeLastSymbolX4(p, bitDPtr, dt, dtLog);
+
+    return p-pStart;
+}
+
+
+size_t HUF_decompress1X4_usingDTable(
+          void* dst,  size_t dstSize,
+    const void* cSrc, size_t cSrcSize,
+    const U32* DTable)
+{
+    const BYTE* const istart = (const BYTE*) cSrc;
+    BYTE* const ostart = (BYTE*) dst;
+    BYTE* const oend = ostart + dstSize;
+
+    const U32 dtLog = DTable[0];
+    const HUF_DEltX4* const dt = ((const HUF_DEltX4*)DTable) +1;
+    size_t errorCode;
+
+    /* Init */
+    BIT_DStream_t bitD;
+    errorCode = BIT_initDStream(&bitD, istart, cSrcSize);
+    if (HUF_isError(errorCode)) return errorCode;
+
+    /* finish bitStreams one by one */
+    HUF_decodeStreamX4(ostart, &bitD, oend,     dt, dtLog);
+
+    /* check */
+    if (!BIT_endOfDStream(&bitD)) return ERROR(corruption_detected);
+
+    /* decoded size */
+    return dstSize;
+}
+
+size_t HUF_decompress1X4 (void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize)
+{
+    HUF_CREATE_STATIC_DTABLEX4(DTable, HUF_MAX_TABLELOG);
+    const BYTE* ip = (const BYTE*) cSrc;
+
+    size_t hSize = HUF_readDTableX4 (DTable, cSrc, cSrcSize);
+    if (HUF_isError(hSize)) return hSize;
+    if (hSize >= cSrcSize) return ERROR(srcSize_wrong);
+    ip += hSize;
+    cSrcSize -= hSize;
+
+    return HUF_decompress1X4_usingDTable (dst, dstSize, ip, cSrcSize, DTable);
+}
+
+size_t HUF_decompress4X4_usingDTable(
+          void* dst,  size_t dstSize,
+    const void* cSrc, size_t cSrcSize,
+    const U32* DTable)
+{
+    if (cSrcSize < 10) return ERROR(corruption_detected);   /* strict minimum : jump table + 1 byte per stream */
+
+    {
+        const BYTE* const istart = (const BYTE*) cSrc;
+        BYTE* const ostart = (BYTE*) dst;
+        BYTE* const oend = ostart + dstSize;
+
+        const HUF_DEltX4* const dt = ((const HUF_DEltX4*)DTable) +1;
+        const U32 dtLog = DTable[0];
+        size_t errorCode;
+
+        /* Init */
+        BIT_DStream_t bitD1;
+        BIT_DStream_t bitD2;
+        BIT_DStream_t bitD3;
+        BIT_DStream_t bitD4;
+        const size_t length1 = MEM_readLE16(istart);
+        const size_t length2 = MEM_readLE16(istart+2);
+        const size_t length3 = MEM_readLE16(istart+4);
+        size_t length4;
+        const BYTE* const istart1 = istart + 6;  /* jumpTable */
+        const BYTE* const istart2 = istart1 + length1;
+        const BYTE* const istart3 = istart2 + length2;
+        const BYTE* const istart4 = istart3 + length3;
+        const size_t segmentSize = (dstSize+3) / 4;
+        BYTE* const opStart2 = ostart + segmentSize;
+        BYTE* const opStart3 = opStart2 + segmentSize;
+        BYTE* const opStart4 = opStart3 + segmentSize;
+        BYTE* op1 = ostart;
+        BYTE* op2 = opStart2;
+        BYTE* op3 = opStart3;
+        BYTE* op4 = opStart4;
+        U32 endSignal;
+
+        length4 = cSrcSize - (length1 + length2 + length3 + 6);
+        if (length4 > cSrcSize) return ERROR(corruption_detected);   /* overflow */
+        errorCode = BIT_initDStream(&bitD1, istart1, length1);
+        if (HUF_isError(errorCode)) return errorCode;
+        errorCode = BIT_initDStream(&bitD2, istart2, length2);
+        if (HUF_isError(errorCode)) return errorCode;
+        errorCode = BIT_initDStream(&bitD3, istart3, length3);
+        if (HUF_isError(errorCode)) return errorCode;
+        errorCode = BIT_initDStream(&bitD4, istart4, length4);
+        if (HUF_isError(errorCode)) return errorCode;
+
+        /* 16-32 symbols per loop (4-8 symbols per stream) */
+        endSignal = BIT_reloadDStream(&bitD1) | BIT_reloadDStream(&bitD2) | BIT_reloadDStream(&bitD3) | BIT_reloadDStream(&bitD4);
+        for ( ; (endSignal==BIT_DStream_unfinished) && (op4<(oend-7)) ; )
+        {
+            HUF_DECODE_SYMBOLX4_2(op1, &bitD1);
+            HUF_DECODE_SYMBOLX4_2(op2, &bitD2);
+            HUF_DECODE_SYMBOLX4_2(op3, &bitD3);
+            HUF_DECODE_SYMBOLX4_2(op4, &bitD4);
+            HUF_DECODE_SYMBOLX4_1(op1, &bitD1);
+            HUF_DECODE_SYMBOLX4_1(op2, &bitD2);
+            HUF_DECODE_SYMBOLX4_1(op3, &bitD3);
+            HUF_DECODE_SYMBOLX4_1(op4, &bitD4);
+            HUF_DECODE_SYMBOLX4_2(op1, &bitD1);
+            HUF_DECODE_SYMBOLX4_2(op2, &bitD2);
+            HUF_DECODE_SYMBOLX4_2(op3, &bitD3);
+            HUF_DECODE_SYMBOLX4_2(op4, &bitD4);
+            HUF_DECODE_SYMBOLX4_0(op1, &bitD1);
+            HUF_DECODE_SYMBOLX4_0(op2, &bitD2);
+            HUF_DECODE_SYMBOLX4_0(op3, &bitD3);
+            HUF_DECODE_SYMBOLX4_0(op4, &bitD4);
+
+            endSignal = BIT_reloadDStream(&bitD1) | BIT_reloadDStream(&bitD2) | BIT_reloadDStream(&bitD3) | BIT_reloadDStream(&bitD4);
+        }
+
+        /* check corruption */
+        if (op1 > opStart2) return ERROR(corruption_detected);
+        if (op2 > opStart3) return ERROR(corruption_detected);
+        if (op3 > opStart4) return ERROR(corruption_detected);
+        /* note : op4 supposed already verified within main loop */
+
+        /* finish bitStreams one by one */
+        HUF_decodeStreamX4(op1, &bitD1, opStart2, dt, dtLog);
+        HUF_decodeStreamX4(op2, &bitD2, opStart3, dt, dtLog);
+        HUF_decodeStreamX4(op3, &bitD3, opStart4, dt, dtLog);
+        HUF_decodeStreamX4(op4, &bitD4, oend,     dt, dtLog);
+
+        /* check */
+        endSignal = BIT_endOfDStream(&bitD1) & BIT_endOfDStream(&bitD2) & BIT_endOfDStream(&bitD3) & BIT_endOfDStream(&bitD4);
+        if (!endSignal) return ERROR(corruption_detected);
+
+        /* decoded size */
+        return dstSize;
+    }
+}
+
+
+size_t HUF_decompress4X4 (void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize)
+{
+    HUF_CREATE_STATIC_DTABLEX4(DTable, HUF_MAX_TABLELOG);
+    const BYTE* ip = (const BYTE*) cSrc;
+
+    size_t hSize = HUF_readDTableX4 (DTable, cSrc, cSrcSize);
+    if (HUF_isError(hSize)) return hSize;
+    if (hSize >= cSrcSize) return ERROR(srcSize_wrong);
+    ip += hSize;
+    cSrcSize -= hSize;
+
+    return HUF_decompress4X4_usingDTable (dst, dstSize, ip, cSrcSize, DTable);
+}
+
+
+/**********************************/
+/* quad-symbol decoding           */
+/**********************************/
+typedef struct { BYTE nbBits; BYTE nbBytes; } HUF_DDescX6;
+typedef union { BYTE byte[4]; U32 sequence; } HUF_DSeqX6;
+
+/* recursive, up to level 3; may benefit from <template>-like strategy to nest each level inline */
+static void HUF_fillDTableX6LevelN(HUF_DDescX6* DDescription, HUF_DSeqX6* DSequence, int sizeLog,
+                           const rankVal_t rankValOrigin, const U32 consumed, const int minWeight, const U32 maxWeight,
+                           const sortedSymbol_t* sortedSymbols, const U32 sortedListSize, const U32* rankStart,
+                           const U32 nbBitsBaseline, HUF_DSeqX6 baseSeq, HUF_DDescX6 DDesc)
+{
+    const int scaleLog = nbBitsBaseline - sizeLog;   /* note : targetLog >= (nbBitsBaseline-1), hence scaleLog <= 1 */
+    const int minBits  = nbBitsBaseline - maxWeight;
+    const U32 level = DDesc.nbBytes;
+    U32 rankVal[HUF_ABSOLUTEMAX_TABLELOG + 1];
+    U32 symbolStartPos, s;
+
+    /* local rankVal, will be modified */
+    memcpy(rankVal, rankValOrigin[consumed], sizeof(rankVal));
+
+    /* fill skipped values */
+    if (minWeight>1)
+    {
+        U32 i;
+        const U32 skipSize = rankVal[minWeight];
+        for (i = 0; i < skipSize; i++)
+        {
+            DSequence[i] = baseSeq;
+            DDescription[i] = DDesc;
+        }
+    }
+
+    /* fill DTable */
+    DDesc.nbBytes++;
+    symbolStartPos = rankStart[minWeight];
+    for (s=symbolStartPos; s<sortedListSize; s++)
+    {
+        const BYTE symbol = sortedSymbols[s].symbol;
+        const U32  weight = sortedSymbols[s].weight;   /* >= 1 (sorted) */
+        const int  nbBits = nbBitsBaseline - weight;   /* >= 1 (by construction) */
+        const int  totalBits = consumed+nbBits;
+        const U32  start  = rankVal[weight];
+        const U32  length = 1 << (sizeLog-nbBits);
+        baseSeq.byte[level] = symbol;
+        DDesc.nbBits = (BYTE)totalBits;
+
+        if ((level<3) && (sizeLog-totalBits >= minBits))   /* enough room for another symbol */
+        {
+            int nextMinWeight = totalBits + scaleLog;
+            if (nextMinWeight < 1) nextMinWeight = 1;
+            HUF_fillDTableX6LevelN(DDescription+start, DSequence+start, sizeLog-nbBits,
+                           rankValOrigin, totalBits, nextMinWeight, maxWeight,
+                           sortedSymbols, sortedListSize, rankStart,
+                           nbBitsBaseline, baseSeq, DDesc);   /* recursive (max : level 3) */
+        }
+        else
+        {
+            U32 i;
+            const U32 end = start + length;
+            for (i = start; i < end; i++)
+            {
+                DDescription[i] = DDesc;
+                DSequence[i] = baseSeq;
+            }
+        }
+        rankVal[weight] += length;
+    }
+}
+
+
+/* note : same preparation as X4 */
+size_t HUF_readDTableX6 (U32* DTable, const void* src, size_t srcSize)
+{
+    BYTE weightList[HUF_MAX_SYMBOL_VALUE + 1];
+    sortedSymbol_t sortedSymbol[HUF_MAX_SYMBOL_VALUE + 1];
+    U32 rankStats[HUF_ABSOLUTEMAX_TABLELOG + 1] = { 0 };
+    U32 rankStart0[HUF_ABSOLUTEMAX_TABLELOG + 2] = { 0 };
+    U32* const rankStart = rankStart0+1;
+    U32 tableLog, maxW, sizeOfSort, nbSymbols;
+    rankVal_t rankVal;
+    const U32 memLog = DTable[0];
+    const BYTE* ip = (const BYTE*) src;
+    size_t iSize = ip[0];
+
+    if (memLog > HUF_ABSOLUTEMAX_TABLELOG) return ERROR(tableLog_tooLarge);
+    //memset(weightList, 0, sizeof(weightList));   /* is not necessary, even though some analyzer complain ... */
+
+    iSize = HUF_readStats(weightList, HUF_MAX_SYMBOL_VALUE + 1, rankStats, &nbSymbols, &tableLog, src, srcSize);
+    if (HUF_isError(iSize)) return iSize;
+
+    /* check result */
+    if (tableLog > memLog) return ERROR(tableLog_tooLarge);   /* DTable is too small */
+
+    /* find maxWeight */
+    for (maxW = tableLog; rankStats[maxW]==0; maxW--) {}  /* necessarily finds a solution before 0 */
+
+    /* Get start index of each weight */
+    {
+        U32 w, nextRankStart = 0;
+        for (w=1; w<=maxW; w++)
+        {
+            U32 current = nextRankStart;
+            nextRankStart += rankStats[w];
+            rankStart[w] = current;
+        }
+        rankStart[0] = nextRankStart;   /* put all 0w symbols at the end of sorted list*/
+        sizeOfSort = nextRankStart;
+    }
+
+    /* sort symbols by weight */
+    {
+        U32 s;
+        for (s=0; s<nbSymbols; s++)
+        {
+            U32 w = weightList[s];
+            U32 r = rankStart[w]++;
+            sortedSymbol[r].symbol = (BYTE)s;
+            sortedSymbol[r].weight = (BYTE)w;
+        }
+        rankStart[0] = 0;   /* forget 0w symbols; this is beginning of weight(1) */
+    }
+
+    /* Build rankVal */
+    {
+        const U32 minBits = tableLog+1 - maxW;
+        U32 nextRankVal = 0;
+        U32 w, consumed;
+        const int rescale = (memLog-tableLog) - 1;   /* tableLog <= memLog */
+        U32* rankVal0 = rankVal[0];
+        for (w=1; w<=maxW; w++)
+        {
+            U32 current = nextRankVal;
+            nextRankVal += rankStats[w] << (w+rescale);
+            rankVal0[w] = current;
+        }
+        for (consumed = minBits; consumed <= memLog - minBits; consumed++)
+        {
+            U32* rankValPtr = rankVal[consumed];
+            for (w = 1; w <= maxW; w++)
+            {
+                rankValPtr[w] = rankVal0[w] >> consumed;
+            }
+        }
+    }
+
+    /* fill tables */
+    {
+        HUF_DDescX6* DDescription = (HUF_DDescX6*)(DTable+1);
+        HUF_DSeqX6* DSequence = (HUF_DSeqX6*)(DTable + 1 + ((size_t)1<<(memLog-1)));
+        HUF_DSeqX6 DSeq;
+        HUF_DDescX6 DDesc;
+        DSeq.sequence = 0;
+        DDesc.nbBits = 0;
+        DDesc.nbBytes = 0;
+        HUF_fillDTableX6LevelN(DDescription, DSequence, memLog,
+                       (const U32 (*)[HUF_ABSOLUTEMAX_TABLELOG + 1])rankVal, 0, 1, maxW,
+                       sortedSymbol, sizeOfSort, rankStart0,
+                       tableLog+1, DSeq, DDesc);
+    }
+
+    return iSize;
+}
+
+
+static U32 HUF_decodeSymbolX6(void* op, BIT_DStream_t* DStream, const HUF_DDescX6* dd, const HUF_DSeqX6* ds, const U32 dtLog)
+{
+    const size_t val = BIT_lookBitsFast(DStream, dtLog);   /* note : dtLog >= 1 */
+    memcpy(op, ds+val, sizeof(HUF_DSeqX6));
+    BIT_skipBits(DStream, dd[val].nbBits);
+    return dd[val].nbBytes;
+}
+
+static U32 HUF_decodeLastSymbolsX6(void* op, const U32 maxL, BIT_DStream_t* DStream,
+                                  const HUF_DDescX6* dd, const HUF_DSeqX6* ds, const U32 dtLog)
+{
+    const size_t val = BIT_lookBitsFast(DStream, dtLog);   /* note : dtLog >= 1 */
+    U32 length = dd[val].nbBytes;
+    if (length <= maxL)
+    {
+        memcpy(op, ds+val, length);
+        BIT_skipBits(DStream, dd[val].nbBits);
+        return length;
+    }
+    memcpy(op, ds+val, maxL);
+    if (DStream->bitsConsumed < (sizeof(DStream->bitContainer)*8))
+    {
+        BIT_skipBits(DStream, dd[val].nbBits);
+        if (DStream->bitsConsumed > (sizeof(DStream->bitContainer)*8))
+            DStream->bitsConsumed = (sizeof(DStream->bitContainer)*8);   /* ugly hack; works only because it's the last symbol. Note : can't easily extract nbBits from just this symbol */
+    }
+    return maxL;
+}
+
+
+#define HUF_DECODE_SYMBOLX6_0(ptr, DStreamPtr) \
+    ptr += HUF_decodeSymbolX6(ptr, DStreamPtr, dd, ds, dtLog)
+
+#define HUF_DECODE_SYMBOLX6_1(ptr, DStreamPtr) \
+    if (MEM_64bits() || (HUF_MAX_TABLELOG<=12)) \
+        HUF_DECODE_SYMBOLX6_0(ptr, DStreamPtr)
+
+#define HUF_DECODE_SYMBOLX6_2(ptr, DStreamPtr) \
+    if (MEM_64bits()) \
+        HUF_DECODE_SYMBOLX6_0(ptr, DStreamPtr)
+
+static inline size_t HUF_decodeStreamX6(BYTE* p, BIT_DStream_t* bitDPtr, BYTE* const pEnd, const U32* DTable, const U32 dtLog)
+{
+    const HUF_DDescX6* dd = (const HUF_DDescX6*)(DTable+1);
+    const HUF_DSeqX6* ds = (const HUF_DSeqX6*)(DTable + 1 + ((size_t)1<<(dtLog-1)));
+    BYTE* const pStart = p;
+
+    /* up to 16 symbols at a time */
+    while ((BIT_reloadDStream(bitDPtr) == BIT_DStream_unfinished) && (p <= pEnd-16))
+    {
+        HUF_DECODE_SYMBOLX6_2(p, bitDPtr);
+        HUF_DECODE_SYMBOLX6_1(p, bitDPtr);
+        HUF_DECODE_SYMBOLX6_2(p, bitDPtr);
+        HUF_DECODE_SYMBOLX6_0(p, bitDPtr);
+    }
+
+    /* closer to the end, up to 4 symbols at a time */
+    while ((BIT_reloadDStream(bitDPtr) == BIT_DStream_unfinished) && (p <= pEnd-4))
+        HUF_DECODE_SYMBOLX6_0(p, bitDPtr);
+
+    while ((BIT_reloadDStream(bitDPtr) <= BIT_DStream_endOfBuffer) && (p < pEnd))
+        p += HUF_decodeLastSymbolsX6(p, (U32)(pEnd-p), bitDPtr, dd, ds, dtLog);
+
+    return p-pStart;
+}
+
+
+size_t HUF_decompress1X6_usingDTable(
+          void* dst,  size_t dstSize,
+    const void* cSrc, size_t cSrcSize,
+    const U32* DTable)
+{
+    const BYTE* const istart = (const BYTE*) cSrc;
+    BYTE* const ostart = (BYTE*) dst;
+    BYTE* const oend = ostart + dstSize;
+
+    const U32 dtLog = DTable[0];
+    size_t errorCode;
+
+    /* Init */
+    BIT_DStream_t bitD;
+    errorCode = BIT_initDStream(&bitD, istart, cSrcSize);
+    if (HUF_isError(errorCode)) return errorCode;
+
+    /* finish bitStreams one by one */
+    HUF_decodeStreamX6(ostart, &bitD, oend, DTable, dtLog);
+
+    /* check */
+    if (!BIT_endOfDStream(&bitD)) return ERROR(corruption_detected);
+
+    /* decoded size */
+    return dstSize;
+}
+
+size_t HUF_decompress1X6 (void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize)
+{
+    HUF_CREATE_STATIC_DTABLEX6(DTable, HUF_MAX_TABLELOG);
+    const BYTE* ip = (const BYTE*) cSrc;
+
+    size_t hSize = HUF_readDTableX6 (DTable, cSrc, cSrcSize);
+    if (HUF_isError(hSize)) return hSize;
+    if (hSize >= cSrcSize) return ERROR(srcSize_wrong);
+    ip += hSize;
+    cSrcSize -= hSize;
+
+    return HUF_decompress1X6_usingDTable (dst, dstSize, ip, cSrcSize, DTable);
+}
+
+
+size_t HUF_decompress4X6_usingDTable(
+          void* dst,  size_t dstSize,
+    const void* cSrc, size_t cSrcSize,
+    const U32* DTable)
+{
+    if (cSrcSize < 10) return ERROR(corruption_detected);   /* strict minimum : jump table + 1 byte per stream */
+
+    {
+        const BYTE* const istart = (const BYTE*) cSrc;
+        BYTE* const ostart = (BYTE*) dst;
+        BYTE* const oend = ostart + dstSize;
+
+        const U32 dtLog = DTable[0];
+        const HUF_DDescX6* dd = (const HUF_DDescX6*)(DTable+1);
+        const HUF_DSeqX6* ds = (const HUF_DSeqX6*)(DTable + 1 + ((size_t)1<<(dtLog-1)));
+        size_t errorCode;
+
+        /* Init */
+        BIT_DStream_t bitD1;
+        BIT_DStream_t bitD2;
+        BIT_DStream_t bitD3;
+        BIT_DStream_t bitD4;
+        const size_t length1 = MEM_readLE16(istart);
+        const size_t length2 = MEM_readLE16(istart+2);
+        const size_t length3 = MEM_readLE16(istart+4);
+        size_t length4;
+        const BYTE* const istart1 = istart + 6;  /* jumpTable */
+        const BYTE* const istart2 = istart1 + length1;
+        const BYTE* const istart3 = istart2 + length2;
+        const BYTE* const istart4 = istart3 + length3;
+        const size_t segmentSize = (dstSize+3) / 4;
+        BYTE* const opStart2 = ostart + segmentSize;
+        BYTE* const opStart3 = opStart2 + segmentSize;
+        BYTE* const opStart4 = opStart3 + segmentSize;
+        BYTE* op1 = ostart;
+        BYTE* op2 = opStart2;
+        BYTE* op3 = opStart3;
+        BYTE* op4 = opStart4;
+        U32 endSignal;
+
+        length4 = cSrcSize - (length1 + length2 + length3 + 6);
+        if (length4 > cSrcSize) return ERROR(corruption_detected);   /* overflow */
+        errorCode = BIT_initDStream(&bitD1, istart1, length1);
+        if (HUF_isError(errorCode)) return errorCode;
+        errorCode = BIT_initDStream(&bitD2, istart2, length2);
+        if (HUF_isError(errorCode)) return errorCode;
+        errorCode = BIT_initDStream(&bitD3, istart3, length3);
+        if (HUF_isError(errorCode)) return errorCode;
+        errorCode = BIT_initDStream(&bitD4, istart4, length4);
+        if (HUF_isError(errorCode)) return errorCode;
+
+        /* 16-64 symbols per loop (4-16 symbols per stream) */
+        endSignal = BIT_reloadDStream(&bitD1) | BIT_reloadDStream(&bitD2) | BIT_reloadDStream(&bitD3) | BIT_reloadDStream(&bitD4);
+        for ( ; (op3 <= opStart4) && (endSignal==BIT_DStream_unfinished) && (op4<=(oend-16)) ; )
+        {
+            HUF_DECODE_SYMBOLX6_2(op1, &bitD1);
+            HUF_DECODE_SYMBOLX6_2(op2, &bitD2);
+            HUF_DECODE_SYMBOLX6_2(op3, &bitD3);
+            HUF_DECODE_SYMBOLX6_2(op4, &bitD4);
+            HUF_DECODE_SYMBOLX6_1(op1, &bitD1);
+            HUF_DECODE_SYMBOLX6_1(op2, &bitD2);
+            HUF_DECODE_SYMBOLX6_1(op3, &bitD3);
+            HUF_DECODE_SYMBOLX6_1(op4, &bitD4);
+            HUF_DECODE_SYMBOLX6_2(op1, &bitD1);
+            HUF_DECODE_SYMBOLX6_2(op2, &bitD2);
+            HUF_DECODE_SYMBOLX6_2(op3, &bitD3);
+            HUF_DECODE_SYMBOLX6_2(op4, &bitD4);
+            HUF_DECODE_SYMBOLX6_0(op1, &bitD1);
+            HUF_DECODE_SYMBOLX6_0(op2, &bitD2);
+            HUF_DECODE_SYMBOLX6_0(op3, &bitD3);
+            HUF_DECODE_SYMBOLX6_0(op4, &bitD4);
+
+            endSignal = BIT_reloadDStream(&bitD1) | BIT_reloadDStream(&bitD2) | BIT_reloadDStream(&bitD3) | BIT_reloadDStream(&bitD4);
+        }
+
+        /* check corruption */
+        if (op1 > opStart2) return ERROR(corruption_detected);
+        if (op2 > opStart3) return ERROR(corruption_detected);
+        if (op3 > opStart4) return ERROR(corruption_detected);
+        /* note : op4 supposed already verified within main loop */
+
+        /* finish bitStreams one by one */
+        HUF_decodeStreamX6(op1, &bitD1, opStart2, DTable, dtLog);
+        HUF_decodeStreamX6(op2, &bitD2, opStart3, DTable, dtLog);
+        HUF_decodeStreamX6(op3, &bitD3, opStart4, DTable, dtLog);
+        HUF_decodeStreamX6(op4, &bitD4, oend,     DTable, dtLog);
+
+        /* check */
+        endSignal = BIT_endOfDStream(&bitD1) & BIT_endOfDStream(&bitD2) & BIT_endOfDStream(&bitD3) & BIT_endOfDStream(&bitD4);
+        if (!endSignal) return ERROR(corruption_detected);
+
+        /* decoded size */
+        return dstSize;
+    }
+}
+
+
+size_t HUF_decompress4X6 (void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize)
+{
+    HUF_CREATE_STATIC_DTABLEX6(DTable, HUF_MAX_TABLELOG);
+    const BYTE* ip = (const BYTE*) cSrc;
+
+    size_t hSize = HUF_readDTableX6 (DTable, cSrc, cSrcSize);
+    if (HUF_isError(hSize)) return hSize;
+    if (hSize >= cSrcSize) return ERROR(srcSize_wrong);
+    ip += hSize;
+    cSrcSize -= hSize;
+
+    return HUF_decompress4X6_usingDTable (dst, dstSize, ip, cSrcSize, DTable);
+}
+
+
+/**********************************/
+/* Generic decompression selector */
+/**********************************/
+
+typedef struct { U32 tableTime; U32 decode256Time; } algo_time_t;
+static const algo_time_t algoTime[16 /* Quantization */][3 /* single, double, quad */] =
+{
+    /* single, double, quad */
+    {{0,0}, {1,1}, {2,2}},  /* Q==0 : impossible */
+    {{0,0}, {1,1}, {2,2}},  /* Q==1 : impossible */
+    {{  38,130}, {1313, 74}, {2151, 38}},   /* Q == 2 : 12-18% */
+    {{ 448,128}, {1353, 74}, {2238, 41}},   /* Q == 3 : 18-25% */
+    {{ 556,128}, {1353, 74}, {2238, 47}},   /* Q == 4 : 25-32% */
+    {{ 714,128}, {1418, 74}, {2436, 53}},   /* Q == 5 : 32-38% */
+    {{ 883,128}, {1437, 74}, {2464, 61}},   /* Q == 6 : 38-44% */
+    {{ 897,128}, {1515, 75}, {2622, 68}},   /* Q == 7 : 44-50% */
+    {{ 926,128}, {1613, 75}, {2730, 75}},   /* Q == 8 : 50-56% */
+    {{ 947,128}, {1729, 77}, {3359, 77}},   /* Q == 9 : 56-62% */
+    {{1107,128}, {2083, 81}, {4006, 84}},   /* Q ==10 : 62-69% */
+    {{1177,128}, {2379, 87}, {4785, 88}},   /* Q ==11 : 69-75% */
+    {{1242,128}, {2415, 93}, {5155, 84}},   /* Q ==12 : 75-81% */
+    {{1349,128}, {2644,106}, {5260,106}},   /* Q ==13 : 81-87% */
+    {{1455,128}, {2422,124}, {4174,124}},   /* Q ==14 : 87-93% */
+    {{ 722,128}, {1891,145}, {1936,146}},   /* Q ==15 : 93-99% */
+};
+
+typedef size_t (*decompressionAlgo)(void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize);
+
+size_t HUF_decompress (void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize)
+{
+    static const decompressionAlgo decompress[3] = { HUF_decompress4X2, HUF_decompress4X4, HUF_decompress4X6 };
+    /* estimate decompression time */
+    U32 Q;
+    const U32 D256 = (U32)(dstSize >> 8);
+    U32 Dtime[3];
+    U32 algoNb = 0;
+    int n;
+
+    /* validation checks */
+    if (dstSize == 0) return ERROR(dstSize_tooSmall);
+    if (cSrcSize > dstSize) return ERROR(corruption_detected);   /* invalid */
+    if (cSrcSize == dstSize) { memcpy(dst, cSrc, dstSize); return dstSize; }   /* not compressed */
+    if (cSrcSize == 1) { memset(dst, *(const BYTE*)cSrc, dstSize); return dstSize; }   /* RLE */
+
+    /* decoder timing evaluation */
+    Q = (U32)(cSrcSize * 16 / dstSize);   /* Q < 16 since dstSize > cSrcSize */
+    for (n=0; n<3; n++)
+        Dtime[n] = algoTime[Q][n].tableTime + (algoTime[Q][n].decode256Time * D256);
+
+    Dtime[1] += Dtime[1] >> 4; Dtime[2] += Dtime[2] >> 3; /* advantage to algorithms using less memory, for cache eviction */
+
+    if (Dtime[1] < Dtime[0]) algoNb = 1;
+    if (Dtime[2] < Dtime[algoNb]) algoNb = 2;
+
+    return decompress[algoNb](dst, dstSize, cSrc, cSrcSize);
+
+    //return HUF_decompress4X2(dst, dstSize, cSrc, cSrcSize);   /* multi-streams single-symbol decoding */
+    //return HUF_decompress4X4(dst, dstSize, cSrc, cSrcSize);   /* multi-streams double-symbols decoding */
+    //return HUF_decompress4X6(dst, dstSize, cSrc, cSrcSize);   /* multi-streams quad-symbols decoding */
+}
diff --git a/lib/huff0.h b/lib/huff0.h
new file mode 100644
index 0000000..613d9b9
--- /dev/null
+++ b/lib/huff0.h
@@ -0,0 +1,96 @@
+/* ******************************************************************
+   Huff0 : Huffman coder, part of New Generation Entropy library
+   header file
+   Copyright (C) 2013-2015, Yann Collet.
+
+   BSD 2-Clause License (http://www.opensource.org/licenses/bsd-license.php)
+
+   Redistribution and use in source and binary forms, with or without
+   modification, are permitted provided that the following conditions are
+   met:
+
+       * Redistributions of source code must retain the above copyright
+   notice, this list of conditions and the following disclaimer.
+       * Redistributions in binary form must reproduce the above
+   copyright notice, this list of conditions and the following disclaimer
+   in the documentation and/or other materials provided with the
+   distribution.
+
+   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+   You can contact the author at :
+   - Source repository : https://github.com/Cyan4973/FiniteStateEntropy
+   - Public forum : https://groups.google.com/forum/#!forum/lz4c
+****************************************************************** */
+#ifndef HUFF0_H
+#define HUFF0_H
+
+#if defined (__cplusplus)
+extern "C" {
+#endif
+
+
+/******************************************
+*  Dependency
+******************************************/
+#include <stddef.h>    /* size_t */
+
+
+/******************************************
+*  Huff0 simple functions
+******************************************/
+size_t HUF_compress(void* dst, size_t maxDstSize,
+              const void* src, size_t srcSize);
+size_t HUF_decompress(void* dst,  size_t dstSize,
+                const void* cSrc, size_t cSrcSize);
+/*
+HUF_compress():
+    Compress content of buffer 'src', of size 'srcSize', into destination buffer 'dst'.
+    'dst' buffer must be already allocated. Compression runs faster if maxDstSize >= HUF_compressBound(srcSize).
+    Note : srcSize must be <= 128 KB
+    return : size of compressed data (<= maxDstSize)
+    Special values : if return == 0, srcData is not compressible => Nothing is stored within dst !!!
+                     if return == 1, srcData is a single repeated byte symbol (RLE compression)
+                     if HUF_isError(return), compression failed (more details using HUF_getErrorName())
+
+HUF_decompress():
+    Decompress Huff0 data from buffer 'cSrc', of size 'cSrcSize',
+    into already allocated destination buffer 'dst', of size 'dstSize'.
+    'dstSize' must be the exact size of original (uncompressed) data.
+    Note : in contrast with FSE, HUF_decompress can regenerate RLE (cSrcSize==1) and uncompressed (cSrcSize==dstSize) data, because it knows size to regenerate.
+    return : size of regenerated data (== dstSize)
+             or an error code, which can be tested using HUF_isError()
+*/
+
+
+/******************************************
+*  Tool functions
+******************************************/
+size_t HUF_compressBound(size_t size);       /* maximum compressed size */
+
+/* Error Management */
+unsigned    HUF_isError(size_t code);        /* tells if a return value is an error code */
+const char* HUF_getErrorName(size_t code);   /* provides error code string (useful for debugging) */
+
+
+/******************************************
+*  Advanced functions
+******************************************/
+size_t HUF_compress2 (void* dst, size_t dstSize, const void* src, size_t srcSize, unsigned maxSymbolValue, unsigned tableLog);
+
+
+#if defined (__cplusplus)
+}
+#endif
+
+#endif   /* HUFF0_H */
diff --git a/lib/huff0_static.h b/lib/huff0_static.h
new file mode 100644
index 0000000..e8af19e
--- /dev/null
+++ b/lib/huff0_static.h
@@ -0,0 +1,79 @@
+/* ******************************************************************
+   Huff0 : Huffman coder, part of New Generation Entropy library
+   header file for static linking (only)
+   Copyright (C) 2013-2015, Yann Collet
+
+   BSD 2-Clause License (http://www.opensource.org/licenses/bsd-license.php)
+
+   Redistribution and use in source and binary forms, with or without
+   modification, are permitted provided that the following conditions are
+   met:
+
+       * Redistributions of source code must retain the above copyright
+   notice, this list of conditions and the following disclaimer.
+       * Redistributions in binary form must reproduce the above
+   copyright notice, this list of conditions and the following disclaimer
+   in the documentation and/or other materials provided with the
+   distribution.
+
+   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+   You can contact the author at :
+   - Source repository : https://github.com/Cyan4973/FiniteStateEntropy
+   - Public forum : https://groups.google.com/forum/#!forum/lz4c
+****************************************************************** */
+#ifndef HUFF0_STATIC_H
+#define HUFF0_STATIC_H
+
+#if defined (__cplusplus)
+extern "C" {
+#endif
+
+
+/******************************************
+*  Dependency
+******************************************/
+#include "huff0.h"
+
+
+/******************************************
+*  Static allocation macros
+******************************************/
+/* Huff0 buffer bounds */
+#define HUF_CTABLEBOUND 129
+#define HUF_BLOCKBOUND(size) (size + (size>>8) + 8)   /* only true if incompressible pre-filtered with fast heuristic */
+#define HUF_COMPRESSBOUND(size) (HUF_CTABLEBOUND + HUF_BLOCKBOUND(size))   /* Macro version, useful for static allocation */
+
+/* static allocation of Huff0's DTable */
+#define HUF_DTABLE_SIZE(maxTableLog)   (1 + (1<<maxTableLog))  /* nb Cells; use unsigned short for X2, unsigned int for X4 */
+#define HUF_CREATE_STATIC_DTABLEX2(DTable, maxTableLog) \
+        unsigned short DTable[HUF_DTABLE_SIZE(maxTableLog)] = { maxTableLog }
+#define HUF_CREATE_STATIC_DTABLEX4(DTable, maxTableLog) \
+        unsigned int DTable[HUF_DTABLE_SIZE(maxTableLog)] = { maxTableLog }
+#define HUF_CREATE_STATIC_DTABLEX6(DTable, maxTableLog) \
+        unsigned int DTable[HUF_DTABLE_SIZE(maxTableLog) * 3 / 2] = { maxTableLog }
+
+
+/******************************************
+*  Advanced functions
+******************************************/
+size_t HUF_decompress4X2 (void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize);   /* single-symbol decoder */
+size_t HUF_decompress4X4 (void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize);   /* double-symbols decoder */
+size_t HUF_decompress4X6 (void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize);   /* quad-symbols decoder */
+
+
+#if defined (__cplusplus)
+}
+#endif
+
+#endif /* HUFF0_STATIC_H */
diff --git a/lib/legacy/zstd_legacy.h b/lib/legacy/zstd_legacy.h
new file mode 100644
index 0000000..308d134
--- /dev/null
+++ b/lib/legacy/zstd_legacy.h
@@ -0,0 +1,85 @@
+/*
+    zstd_legacy - decoder for legacy format
+    Header File
+    Copyright (C) 2015, Yann Collet.
+
+    BSD 2-Clause License (http://www.opensource.org/licenses/bsd-license.php)
+
+    Redistribution and use in source and binary forms, with or without
+    modification, are permitted provided that the following conditions are
+    met:
+    * Redistributions of source code must retain the above copyright
+    notice, this list of conditions and the following disclaimer.
+    * Redistributions in binary form must reproduce the above
+    copyright notice, this list of conditions and the following disclaimer
+    in the documentation and/or other materials provided with the
+    distribution.
+    THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+    "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+    LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+    A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+    OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+    SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+    LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+    DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+    THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+    (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+    OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+    You can contact the author at :
+    - zstd source repository : https://github.com/Cyan4973/zstd
+    - ztsd public forum : https://groups.google.com/forum/#!forum/lz4c
+*/
+#ifndef ZSTD_LEGACY_H
+#define ZSTD_LEGACY_H
+
+#if defined (__cplusplus)
+extern "C" {
+#endif
+
+/* *************************************
+*  Includes
+***************************************/
+#include "mem.h"        /* MEM_STATIC */
+#include "error.h"      /* ERROR */
+#include "zstd_v01.h"
+#include "zstd_v02.h"
+#include "zstd_v03.h"
+
+MEM_STATIC unsigned ZSTD_isLegacy (U32 magicNumberLE)
+{
+	switch(magicNumberLE)
+	{
+		case ZSTDv01_magicNumberLE :
+		case ZSTDv02_magicNumber : 
+		case ZSTDv03_magicNumber : return 1;
+		default : return 0;
+	}
+}
+
+
+MEM_STATIC size_t ZSTD_decompressLegacy(
+                     void* dst, size_t maxOriginalSize,
+               const void* src, size_t compressedSize,
+                     U32 magicNumberLE)
+{
+	switch(magicNumberLE)
+	{
+		case ZSTDv01_magicNumberLE :
+			return ZSTDv01_decompress(dst, maxOriginalSize, src, compressedSize);
+		case ZSTDv02_magicNumber :
+			return ZSTDv02_decompress(dst, maxOriginalSize, src, compressedSize);
+		case ZSTDv03_magicNumber :
+			return ZSTDv03_decompress(dst, maxOriginalSize, src, compressedSize);
+		default :
+		    return ERROR(prefix_unknown);
+	}
+}
+
+
+
+#if defined (__cplusplus)
+}
+#endif
+
+#endif   /* ZSTD_LEGACY_H */
diff --git a/lib/legacy/zstd_v01.c b/lib/legacy/zstd_v01.c
new file mode 100644
index 0000000..ccd57d5
--- /dev/null
+++ b/lib/legacy/zstd_v01.c
@@ -0,0 +1,2171 @@
+/* ******************************************************************
+   ZSTD_v01
+   Zstandard decoder, compatible with v0.1.x format
+   Copyright (C) 2013-2015, Yann Collet.
+
+   BSD 2-Clause License (http://www.opensource.org/licenses/bsd-license.php)
+
+   Redistribution and use in source and binary forms, with or without
+   modification, are permitted provided that the following conditions are
+   met:
+
+       * Redistributions of source code must retain the above copyright
+   notice, this list of conditions and the following disclaimer.
+       * Redistributions in binary form must reproduce the above
+   copyright notice, this list of conditions and the following disclaimer
+   in the documentation and/or other materials provided with the
+   distribution.
+
+   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+   You can contact the author at :
+   - Source repository : https://github.com/Cyan4973/FiniteStateEntropy
+   - Public forum : https://groups.google.com/forum/#!forum/lz4c
+****************************************************************** */
+
+/******************************************
+*  Includes
+******************************************/
+#include <stddef.h>    /* size_t, ptrdiff_t */
+#include "zstd_v01.h"
+
+
+/******************************************
+*  Static allocation
+******************************************/
+/* You can statically allocate FSE CTable/DTable as a table of unsigned using below macro */
+#define FSE_DTABLE_SIZE_U32(maxTableLog)                   (1 + (1<<maxTableLog))
+
+/* You can statically allocate Huff0 DTable as a table of unsigned short using below macro */
+#define HUF_DTABLE_SIZE_U16(maxTableLog)   (1 + (1<<maxTableLog))
+#define HUF_CREATE_STATIC_DTABLE(DTable, maxTableLog) \
+        unsigned short DTable[HUF_DTABLE_SIZE_U16(maxTableLog)] = { maxTableLog }
+
+
+/******************************************
+*  Error Management
+******************************************/
+#define FSE_LIST_ERRORS(ITEM) \
+        ITEM(FSE_OK_NoError) ITEM(FSE_ERROR_GENERIC) \
+        ITEM(FSE_ERROR_tableLog_tooLarge) ITEM(FSE_ERROR_maxSymbolValue_tooLarge) ITEM(FSE_ERROR_maxSymbolValue_tooSmall) \
+        ITEM(FSE_ERROR_dstSize_tooSmall) ITEM(FSE_ERROR_srcSize_wrong)\
+        ITEM(FSE_ERROR_corruptionDetected) \
+        ITEM(FSE_ERROR_maxCode)
+
+#define FSE_GENERATE_ENUM(ENUM) ENUM,
+typedef enum { FSE_LIST_ERRORS(FSE_GENERATE_ENUM) } FSE_errorCodes;  /* enum is exposed, to detect & handle specific errors; compare function result to -enum value */
+
+
+/******************************************
+*  FSE symbol compression API
+******************************************/
+/*
+   This API consists of small unitary functions, which highly benefit from being inlined.
+   You will want to enable link-time-optimization to ensure these functions are properly inlined in your binary.
+   Visual seems to do it automatically.
+   For gcc or clang, you'll need to add -flto flag at compilation and linking stages.
+   If none of these solutions is applicable, include "fse.c" directly.
+*/
+
+typedef unsigned FSE_CTable;   /* don't allocate that. It's just a way to be more restrictive than void* */
+typedef unsigned FSE_DTable;   /* don't allocate that. It's just a way to be more restrictive than void* */
+
+typedef struct
+{
+    size_t bitContainer;
+    int    bitPos;
+    char*  startPtr;
+    char*  ptr;
+    char*  endPtr;
+} FSE_CStream_t;
+
+typedef struct
+{
+    ptrdiff_t   value;
+    const void* stateTable;
+    const void* symbolTT;
+    unsigned    stateLog;
+} FSE_CState_t;
+
+typedef struct
+{
+    size_t   bitContainer;
+    unsigned bitsConsumed;
+    const char* ptr;
+    const char* start;
+} FSE_DStream_t;
+
+typedef struct
+{
+    size_t      state;
+    const void* table;   /* precise table may vary, depending on U16 */
+} FSE_DState_t;
+
+typedef enum { FSE_DStream_unfinished = 0,
+               FSE_DStream_endOfBuffer = 1,
+               FSE_DStream_completed = 2,
+               FSE_DStream_tooFar = 3 } FSE_DStream_status;  /* result of FSE_reloadDStream() */
+               /* 1,2,4,8 would be better for bitmap combinations, but slows down performance a bit ... ?! */
+
+
+/****************************************************************
+*  Tuning parameters
+****************************************************************/
+/* MEMORY_USAGE :
+*  Memory usage formula : N->2^N Bytes (examples : 10 -> 1KB; 12 -> 4KB ; 16 -> 64KB; 20 -> 1MB; etc.)
+*  Increasing memory usage improves compression ratio
+*  Reduced memory usage can improve speed, due to cache effect
+*  Recommended max value is 14, for 16KB, which nicely fits into Intel x86 L1 cache */
+#define FSE_MAX_MEMORY_USAGE 14
+#define FSE_DEFAULT_MEMORY_USAGE 13
+
+/* FSE_MAX_SYMBOL_VALUE :
+*  Maximum symbol value authorized.
+*  Required for proper stack allocation */
+#define FSE_MAX_SYMBOL_VALUE 255
+
+
+/****************************************************************
+*  template functions type & suffix
+****************************************************************/
+#define FSE_FUNCTION_TYPE BYTE
+#define FSE_FUNCTION_EXTENSION
+
+
+/****************************************************************
+*  Byte symbol type
+****************************************************************/
+typedef struct
+{
+    unsigned short newState;
+    unsigned char  symbol;
+    unsigned char  nbBits;
+} FSE_decode_t;   /* size == U32 */
+
+
+
+/****************************************************************
+*  Compiler specifics
+****************************************************************/
+#ifdef _MSC_VER    /* Visual Studio */
+#  define FORCE_INLINE static __forceinline
+#  include <intrin.h>                    /* For Visual 2005 */
+#  pragma warning(disable : 4127)        /* disable: C4127: conditional expression is constant */
+#  pragma warning(disable : 4214)        /* disable: C4214: non-int bitfields */
+#else
+#  define GCC_VERSION (__GNUC__ * 100 + __GNUC_MINOR__)
+#  ifdef __GNUC__
+#    define FORCE_INLINE static inline __attribute__((always_inline))
+#  else
+#    define FORCE_INLINE static inline
+#  endif
+#endif
+
+
+/****************************************************************
+*  Includes
+****************************************************************/
+#include <stdlib.h>     /* malloc, free, qsort */
+#include <string.h>     /* memcpy, memset */
+#include <stdio.h>      /* printf (debug) */
+
+
+#ifndef MEM_ACCESS_MODULE
+#define MEM_ACCESS_MODULE
+/****************************************************************
+*  Basic Types
+*****************************************************************/
+#if defined (__STDC_VERSION__) && __STDC_VERSION__ >= 199901L   /* C99 */
+# include <stdint.h>
+typedef  uint8_t BYTE;
+typedef uint16_t U16;
+typedef  int16_t S16;
+typedef uint32_t U32;
+typedef  int32_t S32;
+typedef uint64_t U64;
+typedef  int64_t S64;
+#else
+typedef unsigned char       BYTE;
+typedef unsigned short      U16;
+typedef   signed short      S16;
+typedef unsigned int        U32;
+typedef   signed int        S32;
+typedef unsigned long long  U64;
+typedef   signed long long  S64;
+#endif
+
+#endif   /* MEM_ACCESS_MODULE */
+
+/****************************************************************
+*  Memory I/O
+*****************************************************************/
+/* FSE_FORCE_MEMORY_ACCESS
+ * By default, access to unaligned memory is controlled by `memcpy()`, which is safe and portable.
+ * Unfortunately, on some target/compiler combinations, the generated assembly is sub-optimal.
+ * The below switch allow to select different access method for improved performance.
+ * Method 0 (default) : use `memcpy()`. Safe and portable.
+ * Method 1 : `__packed` statement. It depends on compiler extension (ie, not portable).
+ *            This method is safe if your compiler supports it, and *generally* as fast or faster than `memcpy`.
+ * Method 2 : direct access. This method is portable but violate C standard.
+ *            It can generate buggy code on targets generating assembly depending on alignment.
+ *            But in some circumstances, it's the only known way to get the most performance (ie GCC + ARMv6)
+ * See http://fastcompression.blogspot.fr/2015/08/accessing-unaligned-memory.html for details.
+ * Prefer these methods in priority order (0 > 1 > 2)
+ */
+#ifndef FSE_FORCE_MEMORY_ACCESS   /* can be defined externally, on command line for example */
+#  if defined(__GNUC__) && ( defined(__ARM_ARCH_6__) || defined(__ARM_ARCH_6J__) || defined(__ARM_ARCH_6K__) || defined(__ARM_ARCH_6Z__) || defined(__ARM_ARCH_6ZK__) || defined(__ARM_ARCH_6T2__) )
+#    define FSE_FORCE_MEMORY_ACCESS 2
+#  elif defined(__INTEL_COMPILER) || \
+  (defined(__GNUC__) && ( defined(__ARM_ARCH_7__) || defined(__ARM_ARCH_7A__) || defined(__ARM_ARCH_7R__) || defined(__ARM_ARCH_7M__) || defined(__ARM_ARCH_7S__) ))
+#    define FSE_FORCE_MEMORY_ACCESS 1
+#  endif
+#endif
+
+
+static unsigned FSE_32bits(void)
+{
+    return sizeof(void*)==4;
+}
+
+static unsigned FSE_isLittleEndian(void)
+{
+    const union { U32 i; BYTE c[4]; } one = { 1 };   /* don't use static : performance detrimental  */
+    return one.c[0];
+}
+
+#if defined(FSE_FORCE_MEMORY_ACCESS) && (FSE_FORCE_MEMORY_ACCESS==2)
+
+static U16 FSE_read16(const void* memPtr) { return *(const U16*) memPtr; }
+static U32 FSE_read32(const void* memPtr) { return *(const U32*) memPtr; }
+static U64 FSE_read64(const void* memPtr) { return *(const U64*) memPtr; }
+
+#elif defined(FSE_FORCE_MEMORY_ACCESS) && (FSE_FORCE_MEMORY_ACCESS==1)
+
+/* __pack instructions are safer, but compiler specific, hence potentially problematic for some compilers */
+/* currently only defined for gcc and icc */
+typedef union { U16 u16; U32 u32; U64 u64; } __attribute__((packed)) unalign;
+
+static U16 FSE_read16(const void* ptr) { return ((const unalign*)ptr)->u16; }
+static U32 FSE_read32(const void* ptr) { return ((const unalign*)ptr)->u32; }
+static U64 FSE_read64(const void* ptr) { return ((const unalign*)ptr)->u64; }
+
+#else
+
+static U16 FSE_read16(const void* memPtr)
+{
+    U16 val; memcpy(&val, memPtr, sizeof(val)); return val;
+}
+
+static U32 FSE_read32(const void* memPtr)
+{
+    U32 val; memcpy(&val, memPtr, sizeof(val)); return val;
+}
+
+static U64 FSE_read64(const void* memPtr)
+{
+    U64 val; memcpy(&val, memPtr, sizeof(val)); return val;
+}
+
+#endif // FSE_FORCE_MEMORY_ACCESS
+
+static U16 FSE_readLE16(const void* memPtr)
+{
+    if (FSE_isLittleEndian())
+        return FSE_read16(memPtr);
+    else
+    {
+        const BYTE* p = (const BYTE*)memPtr;
+        return (U16)(p[0] + (p[1]<<8));
+    }
+}
+
+static U32 FSE_readLE32(const void* memPtr)
+{
+    if (FSE_isLittleEndian())
+        return FSE_read32(memPtr);
+    else
+    {
+        const BYTE* p = (const BYTE*)memPtr;
+        return (U32)((U32)p[0] + ((U32)p[1]<<8) + ((U32)p[2]<<16) + ((U32)p[3]<<24));
+    }
+}
+
+
+static U64 FSE_readLE64(const void* memPtr)
+{
+    if (FSE_isLittleEndian())
+        return FSE_read64(memPtr);
+    else
+    {
+        const BYTE* p = (const BYTE*)memPtr;
+        return (U64)((U64)p[0] + ((U64)p[1]<<8) + ((U64)p[2]<<16) + ((U64)p[3]<<24)
+                     + ((U64)p[4]<<32) + ((U64)p[5]<<40) + ((U64)p[6]<<48) + ((U64)p[7]<<56));
+    }
+}
+
+static size_t FSE_readLEST(const void* memPtr)
+{
+    if (FSE_32bits())
+        return (size_t)FSE_readLE32(memPtr);
+    else
+        return (size_t)FSE_readLE64(memPtr);
+}
+
+
+
+/****************************************************************
+*  Constants
+*****************************************************************/
+#define FSE_MAX_TABLELOG  (FSE_MAX_MEMORY_USAGE-2)
+#define FSE_MAX_TABLESIZE (1U<<FSE_MAX_TABLELOG)
+#define FSE_MAXTABLESIZE_MASK (FSE_MAX_TABLESIZE-1)
+#define FSE_DEFAULT_TABLELOG (FSE_DEFAULT_MEMORY_USAGE-2)
+#define FSE_MIN_TABLELOG 5
+
+#define FSE_TABLELOG_ABSOLUTE_MAX 15
+#if FSE_MAX_TABLELOG > FSE_TABLELOG_ABSOLUTE_MAX
+#error "FSE_MAX_TABLELOG > FSE_TABLELOG_ABSOLUTE_MAX is not supported"
+#endif
+
+
+/****************************************************************
+*  Error Management
+****************************************************************/
+#define FSE_STATIC_ASSERT(c) { enum { FSE_static_assert = 1/(int)(!!(c)) }; }   /* use only *after* variable declarations */
+
+
+/****************************************************************
+*  Complex types
+****************************************************************/
+typedef struct
+{
+    int deltaFindState;
+    U32 deltaNbBits;
+} FSE_symbolCompressionTransform; /* total 8 bytes */
+
+typedef U32 DTable_max_t[FSE_DTABLE_SIZE_U32(FSE_MAX_TABLELOG)];
+
+/****************************************************************
+*  Internal functions
+****************************************************************/
+FORCE_INLINE unsigned FSE_highbit32 (register U32 val)
+{
+#   if defined(_MSC_VER)   /* Visual */
+    unsigned long r;
+    _BitScanReverse ( &r, val );
+    return (unsigned) r;
+#   elif defined(__GNUC__) && (GCC_VERSION >= 304)   /* GCC Intrinsic */
+    return 31 - __builtin_clz (val);
+#   else   /* Software version */
+    static const unsigned DeBruijnClz[32] = { 0, 9, 1, 10, 13, 21, 2, 29, 11, 14, 16, 18, 22, 25, 3, 30, 8, 12, 20, 28, 15, 17, 24, 7, 19, 27, 23, 6, 26, 5, 4, 31 };
+    U32 v = val;
+    unsigned r;
+    v |= v >> 1;
+    v |= v >> 2;
+    v |= v >> 4;
+    v |= v >> 8;
+    v |= v >> 16;
+    r = DeBruijnClz[ (U32) (v * 0x07C4ACDDU) >> 27];
+    return r;
+#   endif
+}
+
+
+/****************************************************************
+*  Templates
+****************************************************************/
+/*
+  designed to be included
+  for type-specific functions (template emulation in C)
+  Objective is to write these functions only once, for improved maintenance
+*/
+
+/* safety checks */
+#ifndef FSE_FUNCTION_EXTENSION
+#  error "FSE_FUNCTION_EXTENSION must be defined"
+#endif
+#ifndef FSE_FUNCTION_TYPE
+#  error "FSE_FUNCTION_TYPE must be defined"
+#endif
+
+/* Function names */
+#define FSE_CAT(X,Y) X##Y
+#define FSE_FUNCTION_NAME(X,Y) FSE_CAT(X,Y)
+#define FSE_TYPE_NAME(X,Y) FSE_CAT(X,Y)
+
+
+
+static U32 FSE_tableStep(U32 tableSize) { return (tableSize>>1) + (tableSize>>3) + 3; }
+
+#define FSE_DECODE_TYPE FSE_TYPE_NAME(FSE_decode_t, FSE_FUNCTION_EXTENSION)
+
+
+typedef struct {
+    U16 tableLog;
+    U16 fastMode;
+} FSE_DTableHeader;   /* sizeof U32 */
+
+static size_t FSE_FUNCTION_NAME(FSE_buildDTable, FSE_FUNCTION_EXTENSION)
+(FSE_DTable* dt, const short* normalizedCounter, unsigned maxSymbolValue, unsigned tableLog)
+{
+    FSE_DTableHeader* const DTableH = (FSE_DTableHeader*)dt;
+    FSE_DECODE_TYPE* const tableDecode = (FSE_DECODE_TYPE*) (dt+1);   /* because dt is unsigned, 32-bits aligned on 32-bits */
+    const U32 tableSize = 1 << tableLog;
+    const U32 tableMask = tableSize-1;
+    const U32 step = FSE_tableStep(tableSize);
+    U16 symbolNext[FSE_MAX_SYMBOL_VALUE+1];
+    U32 position = 0;
+    U32 highThreshold = tableSize-1;
+    const S16 largeLimit= (S16)(1 << (tableLog-1));
+    U32 noLarge = 1;
+    U32 s;
+
+    /* Sanity Checks */
+    if (maxSymbolValue > FSE_MAX_SYMBOL_VALUE) return (size_t)-FSE_ERROR_maxSymbolValue_tooLarge;
+    if (tableLog > FSE_MAX_TABLELOG) return (size_t)-FSE_ERROR_tableLog_tooLarge;
+
+    /* Init, lay down lowprob symbols */
+    DTableH[0].tableLog = (U16)tableLog;
+    for (s=0; s<=maxSymbolValue; s++)
+    {
+        if (normalizedCounter[s]==-1)
+        {
+            tableDecode[highThreshold--].symbol = (FSE_FUNCTION_TYPE)s;
+            symbolNext[s] = 1;
+        }
+        else
+        {
+            if (normalizedCounter[s] >= largeLimit) noLarge=0;
+            symbolNext[s] = normalizedCounter[s];
+        }
+    }
+
+    /* Spread symbols */
+    for (s=0; s<=maxSymbolValue; s++)
+    {
+        int i;
+        for (i=0; i<normalizedCounter[s]; i++)
+        {
+            tableDecode[position].symbol = (FSE_FUNCTION_TYPE)s;
+            position = (position + step) & tableMask;
+            while (position > highThreshold) position = (position + step) & tableMask;   /* lowprob area */
+        }
+    }
+
+    if (position!=0) return (size_t)-FSE_ERROR_GENERIC;   /* position must reach all cells once, otherwise normalizedCounter is incorrect */
+
+    /* Build Decoding table */
+    {
+        U32 i;
+        for (i=0; i<tableSize; i++)
+        {
+            FSE_FUNCTION_TYPE symbol = (FSE_FUNCTION_TYPE)(tableDecode[i].symbol);
+            U16 nextState = symbolNext[symbol]++;
+            tableDecode[i].nbBits = (BYTE) (tableLog - FSE_highbit32 ((U32)nextState) );
+            tableDecode[i].newState = (U16) ( (nextState << tableDecode[i].nbBits) - tableSize);
+        }
+    }
+
+    DTableH->fastMode = (U16)noLarge;
+    return 0;
+}
+
+
+/******************************************
+*  FSE byte symbol
+******************************************/
+#ifndef FSE_COMMONDEFS_ONLY
+
+static unsigned FSE_isError(size_t code) { return (code > (size_t)(-FSE_ERROR_maxCode)); }
+
+static short FSE_abs(short a)
+{
+    return a<0? -a : a;
+}
+
+
+/****************************************************************
+*  Header bitstream management
+****************************************************************/
+static size_t FSE_readNCount (short* normalizedCounter, unsigned* maxSVPtr, unsigned* tableLogPtr,
+                 const void* headerBuffer, size_t hbSize)
+{
+    const BYTE* const istart = (const BYTE*) headerBuffer;
+    const BYTE* const iend = istart + hbSize;
+    const BYTE* ip = istart;
+    int nbBits;
+    int remaining;
+    int threshold;
+    U32 bitStream;
+    int bitCount;
+    unsigned charnum = 0;
+    int previous0 = 0;
+
+    if (hbSize < 4) return (size_t)-FSE_ERROR_srcSize_wrong;
+    bitStream = FSE_readLE32(ip);
+    nbBits = (bitStream & 0xF) + FSE_MIN_TABLELOG;   /* extract tableLog */
+    if (nbBits > FSE_TABLELOG_ABSOLUTE_MAX) return (size_t)-FSE_ERROR_tableLog_tooLarge;
+    bitStream >>= 4;
+    bitCount = 4;
+    *tableLogPtr = nbBits;
+    remaining = (1<<nbBits)+1;
+    threshold = 1<<nbBits;
+    nbBits++;
+
+    while ((remaining>1) && (charnum<=*maxSVPtr))
+    {
+        if (previous0)
+        {
+            unsigned n0 = charnum;
+            while ((bitStream & 0xFFFF) == 0xFFFF)
+            {
+                n0+=24;
+                if (ip < iend-5)
+                {
+                    ip+=2;
+                    bitStream = FSE_readLE32(ip) >> bitCount;
+                }
+                else
+                {
+                    bitStream >>= 16;
+                    bitCount+=16;
+                }
+            }
+            while ((bitStream & 3) == 3)
+            {
+                n0+=3;
+                bitStream>>=2;
+                bitCount+=2;
+            }
+            n0 += bitStream & 3;
+            bitCount += 2;
+            if (n0 > *maxSVPtr) return (size_t)-FSE_ERROR_maxSymbolValue_tooSmall;
+            while (charnum < n0) normalizedCounter[charnum++] = 0;
+            if ((ip <= iend-7) || (ip + (bitCount>>3) <= iend-4))
+            {
+                ip += bitCount>>3;
+                bitCount &= 7;
+                bitStream = FSE_readLE32(ip) >> bitCount;
+            }
+            else
+                bitStream >>= 2;
+        }
+        {
+            const short max = (short)((2*threshold-1)-remaining);
+            short count;
+
+            if ((bitStream & (threshold-1)) < (U32)max)
+            {
+                count = (short)(bitStream & (threshold-1));
+                bitCount   += nbBits-1;
+            }
+            else
+            {
+                count = (short)(bitStream & (2*threshold-1));
+                if (count >= threshold) count -= max;
+                bitCount   += nbBits;
+            }
+
+            count--;   /* extra accuracy */
+            remaining -= FSE_abs(count);
+            normalizedCounter[charnum++] = count;
+            previous0 = !count;
+            while (remaining < threshold)
+            {
+                nbBits--;
+                threshold >>= 1;
+            }
+
+            {
+                if ((ip <= iend-7) || (ip + (bitCount>>3) <= iend-4))
+                {
+                    ip += bitCount>>3;
+                    bitCount &= 7;
+                }
+                else
+                {
+                    bitCount -= (int)(8 * (iend - 4 - ip));
+                    ip = iend - 4;
+                }
+                bitStream = FSE_readLE32(ip) >> (bitCount & 31);
+            }
+        }
+    }
+    if (remaining != 1) return (size_t)-FSE_ERROR_GENERIC;
+    *maxSVPtr = charnum-1;
+
+    ip += (bitCount+7)>>3;
+    if ((size_t)(ip-istart) > hbSize) return (size_t)-FSE_ERROR_srcSize_wrong;
+    return ip-istart;
+}
+
+
+/*********************************************************
+*  Decompression (Byte symbols)
+*********************************************************/
+static size_t FSE_buildDTable_rle (FSE_DTable* dt, BYTE symbolValue)
+{
+    FSE_DTableHeader* const DTableH = (FSE_DTableHeader*)dt;
+    FSE_decode_t* const cell = (FSE_decode_t*)(dt + 1);   /* because dt is unsigned */
+
+    DTableH->tableLog = 0;
+    DTableH->fastMode = 0;
+
+    cell->newState = 0;
+    cell->symbol = symbolValue;
+    cell->nbBits = 0;
+
+    return 0;
+}
+
+
+static size_t FSE_buildDTable_raw (FSE_DTable* dt, unsigned nbBits)
+{
+    FSE_DTableHeader* const DTableH = (FSE_DTableHeader*)dt;
+    FSE_decode_t* const dinfo = (FSE_decode_t*)(dt + 1);   /* because dt is unsigned */
+    const unsigned tableSize = 1 << nbBits;
+    const unsigned tableMask = tableSize - 1;
+    const unsigned maxSymbolValue = tableMask;
+    unsigned s;
+
+    /* Sanity checks */
+    if (nbBits < 1) return (size_t)-FSE_ERROR_GENERIC;             /* min size */
+
+    /* Build Decoding Table */
+    DTableH->tableLog = (U16)nbBits;
+    DTableH->fastMode = 1;
+    for (s=0; s<=maxSymbolValue; s++)
+    {
+        dinfo[s].newState = 0;
+        dinfo[s].symbol = (BYTE)s;
+        dinfo[s].nbBits = (BYTE)nbBits;
+    }
+
+    return 0;
+}
+
+
+/* FSE_initDStream
+ * Initialize a FSE_DStream_t.
+ * srcBuffer must point at the beginning of an FSE block.
+ * The function result is the size of the FSE_block (== srcSize).
+ * If srcSize is too small, the function will return an errorCode;
+ */
+static size_t FSE_initDStream(FSE_DStream_t* bitD, const void* srcBuffer, size_t srcSize)
+{
+    if (srcSize < 1) return (size_t)-FSE_ERROR_srcSize_wrong;
+
+    if (srcSize >=  sizeof(size_t))
+    {
+        U32 contain32;
+        bitD->start = (const char*)srcBuffer;
+        bitD->ptr   = (const char*)srcBuffer + srcSize - sizeof(size_t);
+        bitD->bitContainer = FSE_readLEST(bitD->ptr);
+        contain32 = ((const BYTE*)srcBuffer)[srcSize-1];
+        if (contain32 == 0) return (size_t)-FSE_ERROR_GENERIC;   /* stop bit not present */
+        bitD->bitsConsumed = 8 - FSE_highbit32(contain32);
+    }
+    else
+    {
+        U32 contain32;
+        bitD->start = (const char*)srcBuffer;
+        bitD->ptr   = bitD->start;
+        bitD->bitContainer = *(const BYTE*)(bitD->start);
+        switch(srcSize)
+        {
+            case 7: bitD->bitContainer += (size_t)(((const BYTE*)(bitD->start))[6]) << (sizeof(size_t)*8 - 16);
+            case 6: bitD->bitContainer += (size_t)(((const BYTE*)(bitD->start))[5]) << (sizeof(size_t)*8 - 24);
+            case 5: bitD->bitContainer += (size_t)(((const BYTE*)(bitD->start))[4]) << (sizeof(size_t)*8 - 32);
+            case 4: bitD->bitContainer += (size_t)(((const BYTE*)(bitD->start))[3]) << 24;
+            case 3: bitD->bitContainer += (size_t)(((const BYTE*)(bitD->start))[2]) << 16;
+            case 2: bitD->bitContainer += (size_t)(((const BYTE*)(bitD->start))[1]) <<  8;
+            default:;
+        }
+        contain32 = ((const BYTE*)srcBuffer)[srcSize-1];
+        if (contain32 == 0) return (size_t)-FSE_ERROR_GENERIC;   /* stop bit not present */
+        bitD->bitsConsumed = 8 - FSE_highbit32(contain32);
+        bitD->bitsConsumed += (U32)(sizeof(size_t) - srcSize)*8;
+    }
+
+    return srcSize;
+}
+
+
+/* FSE_lookBits
+ * Provides next n bits from the bitContainer.
+ * bitContainer is not modified (bits are still present for next read/look)
+ * On 32-bits, maxNbBits==25
+ * On 64-bits, maxNbBits==57
+ * return : value extracted.
+ */
+static size_t FSE_lookBits(FSE_DStream_t* bitD, U32 nbBits)
+{
+    const U32 bitMask = sizeof(bitD->bitContainer)*8 - 1;
+    return ((bitD->bitContainer << (bitD->bitsConsumed & bitMask)) >> 1) >> ((bitMask-nbBits) & bitMask);
+}
+
+static size_t FSE_lookBitsFast(FSE_DStream_t* bitD, U32 nbBits)   /* only if nbBits >= 1 !! */
+{
+    const U32 bitMask = sizeof(bitD->bitContainer)*8 - 1;
+    return (bitD->bitContainer << (bitD->bitsConsumed & bitMask)) >> (((bitMask+1)-nbBits) & bitMask);
+}
+
+static void FSE_skipBits(FSE_DStream_t* bitD, U32 nbBits)
+{
+    bitD->bitsConsumed += nbBits;
+}
+
+
+/* FSE_readBits
+ * Read next n bits from the bitContainer.
+ * On 32-bits, don't read more than maxNbBits==25
+ * On 64-bits, don't read more than maxNbBits==57
+ * Use the fast variant *only* if n >= 1.
+ * return : value extracted.
+ */
+static size_t FSE_readBits(FSE_DStream_t* bitD, U32 nbBits)
+{
+    size_t value = FSE_lookBits(bitD, nbBits);
+    FSE_skipBits(bitD, nbBits);
+    return value;
+}
+
+static size_t FSE_readBitsFast(FSE_DStream_t* bitD, U32 nbBits)   /* only if nbBits >= 1 !! */
+{
+    size_t value = FSE_lookBitsFast(bitD, nbBits);
+    FSE_skipBits(bitD, nbBits);
+    return value;
+}
+
+static unsigned FSE_reloadDStream(FSE_DStream_t* bitD)
+{
+    if (bitD->bitsConsumed > (sizeof(bitD->bitContainer)*8))  /* should never happen */
+        return FSE_DStream_tooFar;
+
+    if (bitD->ptr >= bitD->start + sizeof(bitD->bitContainer))
+    {
+        bitD->ptr -= bitD->bitsConsumed >> 3;
+        bitD->bitsConsumed &= 7;
+        bitD->bitContainer = FSE_readLEST(bitD->ptr);
+        return FSE_DStream_unfinished;
+    }
+    if (bitD->ptr == bitD->start)
+    {
+        if (bitD->bitsConsumed < sizeof(bitD->bitContainer)*8) return FSE_DStream_endOfBuffer;
+        return FSE_DStream_completed;
+    }
+    {
+        U32 nbBytes = bitD->bitsConsumed >> 3;
+        U32 result = FSE_DStream_unfinished;
+        if (bitD->ptr - nbBytes < bitD->start)
+        {
+            nbBytes = (U32)(bitD->ptr - bitD->start);  /* ptr > start */
+            result = FSE_DStream_endOfBuffer;
+        }
+        bitD->ptr -= nbBytes;
+        bitD->bitsConsumed -= nbBytes*8;
+        bitD->bitContainer = FSE_readLEST(bitD->ptr);   /* reminder : srcSize > sizeof(bitD) */
+        return result;
+    }
+}
+
+
+static void FSE_initDState(FSE_DState_t* DStatePtr, FSE_DStream_t* bitD, const FSE_DTable* dt)
+{
+    const FSE_DTableHeader* const DTableH = (const FSE_DTableHeader*)dt;
+    DStatePtr->state = FSE_readBits(bitD, DTableH->tableLog);
+    FSE_reloadDStream(bitD);
+    DStatePtr->table = dt + 1;
+}
+
+static BYTE FSE_decodeSymbol(FSE_DState_t* DStatePtr, FSE_DStream_t* bitD)
+{
+    const FSE_decode_t DInfo = ((const FSE_decode_t*)(DStatePtr->table))[DStatePtr->state];
+    const U32  nbBits = DInfo.nbBits;
+    BYTE symbol = DInfo.symbol;
+    size_t lowBits = FSE_readBits(bitD, nbBits);
+
+    DStatePtr->state = DInfo.newState + lowBits;
+    return symbol;
+}
+
+static BYTE FSE_decodeSymbolFast(FSE_DState_t* DStatePtr, FSE_DStream_t* bitD)
+{
+    const FSE_decode_t DInfo = ((const FSE_decode_t*)(DStatePtr->table))[DStatePtr->state];
+    const U32 nbBits = DInfo.nbBits;
+    BYTE symbol = DInfo.symbol;
+    size_t lowBits = FSE_readBitsFast(bitD, nbBits);
+
+    DStatePtr->state = DInfo.newState + lowBits;
+    return symbol;
+}
+
+/* FSE_endOfDStream
+   Tells if bitD has reached end of bitStream or not */
+
+static unsigned FSE_endOfDStream(const FSE_DStream_t* bitD)
+{
+    return ((bitD->ptr == bitD->start) && (bitD->bitsConsumed == sizeof(bitD->bitContainer)*8));
+}
+
+static unsigned FSE_endOfDState(const FSE_DState_t* DStatePtr)
+{
+    return DStatePtr->state == 0;
+}
+
+
+FORCE_INLINE size_t FSE_decompress_usingDTable_generic(
+          void* dst, size_t maxDstSize,
+    const void* cSrc, size_t cSrcSize,
+    const FSE_DTable* dt, const unsigned fast)
+{
+    BYTE* const ostart = (BYTE*) dst;
+    BYTE* op = ostart;
+    BYTE* const omax = op + maxDstSize;
+    BYTE* const olimit = omax-3;
+
+    FSE_DStream_t bitD;
+    FSE_DState_t state1;
+    FSE_DState_t state2;
+    size_t errorCode;
+
+    /* Init */
+    errorCode = FSE_initDStream(&bitD, cSrc, cSrcSize);   /* replaced last arg by maxCompressed Size */
+    if (FSE_isError(errorCode)) return errorCode;
+
+    FSE_initDState(&state1, &bitD, dt);
+    FSE_initDState(&state2, &bitD, dt);
+
+#define FSE_GETSYMBOL(statePtr) fast ? FSE_decodeSymbolFast(statePtr, &bitD) : FSE_decodeSymbol(statePtr, &bitD)
+
+    /* 4 symbols per loop */
+    for ( ; (FSE_reloadDStream(&bitD)==FSE_DStream_unfinished) && (op<olimit) ; op+=4)
+    {
+        op[0] = FSE_GETSYMBOL(&state1);
+
+        if (FSE_MAX_TABLELOG*2+7 > sizeof(bitD.bitContainer)*8)    /* This test must be static */
+            FSE_reloadDStream(&bitD);
+
+        op[1] = FSE_GETSYMBOL(&state2);
+
+        if (FSE_MAX_TABLELOG*4+7 > sizeof(bitD.bitContainer)*8)    /* This test must be static */
+            { if (FSE_reloadDStream(&bitD) > FSE_DStream_unfinished) { op+=2; break; } }
+
+        op[2] = FSE_GETSYMBOL(&state1);
+
+        if (FSE_MAX_TABLELOG*2+7 > sizeof(bitD.bitContainer)*8)    /* This test must be static */
+            FSE_reloadDStream(&bitD);
+
+        op[3] = FSE_GETSYMBOL(&state2);
+    }
+
+    /* tail */
+    /* note : FSE_reloadDStream(&bitD) >= FSE_DStream_partiallyFilled; Ends at exactly FSE_DStream_completed */
+    while (1)
+    {
+        if ( (FSE_reloadDStream(&bitD)>FSE_DStream_completed) || (op==omax) || (FSE_endOfDStream(&bitD) && (fast || FSE_endOfDState(&state1))) )
+            break;
+
+        *op++ = FSE_GETSYMBOL(&state1);
+
+        if ( (FSE_reloadDStream(&bitD)>FSE_DStream_completed) || (op==omax) || (FSE_endOfDStream(&bitD) && (fast || FSE_endOfDState(&state2))) )
+            break;
+
+        *op++ = FSE_GETSYMBOL(&state2);
+    }
+
+    /* end ? */
+    if (FSE_endOfDStream(&bitD) && FSE_endOfDState(&state1) && FSE_endOfDState(&state2))
+        return op-ostart;
+
+    if (op==omax) return (size_t)-FSE_ERROR_dstSize_tooSmall;   /* dst buffer is full, but cSrc unfinished */
+
+    return (size_t)-FSE_ERROR_corruptionDetected;
+}
+
+
+static size_t FSE_decompress_usingDTable(void* dst, size_t originalSize,
+                            const void* cSrc, size_t cSrcSize,
+                            const FSE_DTable* dt)
+{
+    const FSE_DTableHeader* DTableH = (const FSE_DTableHeader*)dt;
+    const U32 fastMode = DTableH->fastMode;
+
+    /* select fast mode (static) */
+    if (fastMode) return FSE_decompress_usingDTable_generic(dst, originalSize, cSrc, cSrcSize, dt, 1);
+    return FSE_decompress_usingDTable_generic(dst, originalSize, cSrc, cSrcSize, dt, 0);
+}
+
+
+static size_t FSE_decompress(void* dst, size_t maxDstSize, const void* cSrc, size_t cSrcSize)
+{
+    const BYTE* const istart = (const BYTE*)cSrc;
+    const BYTE* ip = istart;
+    short counting[FSE_MAX_SYMBOL_VALUE+1];
+    DTable_max_t dt;   /* Static analyzer seems unable to understand this table will be properly initialized later */
+    unsigned tableLog;
+    unsigned maxSymbolValue = FSE_MAX_SYMBOL_VALUE;
+    size_t errorCode;
+
+    if (cSrcSize<2) return (size_t)-FSE_ERROR_srcSize_wrong;   /* too small input size */
+
+    /* normal FSE decoding mode */
+    errorCode = FSE_readNCount (counting, &maxSymbolValue, &tableLog, istart, cSrcSize);
+    if (FSE_isError(errorCode)) return errorCode;
+    if (errorCode >= cSrcSize) return (size_t)-FSE_ERROR_srcSize_wrong;   /* too small input size */
+    ip += errorCode;
+    cSrcSize -= errorCode;
+
+    errorCode = FSE_buildDTable (dt, counting, maxSymbolValue, tableLog);
+    if (FSE_isError(errorCode)) return errorCode;
+
+    /* always return, even if it is an error code */
+    return FSE_decompress_usingDTable (dst, maxDstSize, ip, cSrcSize, dt);
+}
+
+
+
+/*********************************************************
+*  Huff0 : Huffman block compression
+*********************************************************/
+#define HUF_MAX_SYMBOL_VALUE 255
+#define HUF_DEFAULT_TABLELOG  12       /* used by default, when not specified */
+#define HUF_MAX_TABLELOG  12           /* max possible tableLog; for allocation purpose; can be modified */
+#define HUF_ABSOLUTEMAX_TABLELOG  16   /* absolute limit of HUF_MAX_TABLELOG. Beyond that value, code does not work */
+#if (HUF_MAX_TABLELOG > HUF_ABSOLUTEMAX_TABLELOG)
+#  error "HUF_MAX_TABLELOG is too large !"
+#endif
+
+typedef struct HUF_CElt_s {
+  U16  val;
+  BYTE nbBits;
+} HUF_CElt ;
+
+typedef struct nodeElt_s {
+    U32 count;
+    U16 parent;
+    BYTE byte;
+    BYTE nbBits;
+} nodeElt;
+
+
+/*********************************************************
+*  Huff0 : Huffman block decompression
+*********************************************************/
+typedef struct {
+    BYTE byte;
+    BYTE nbBits;
+} HUF_DElt;
+
+static size_t HUF_readDTable (U16* DTable, const void* src, size_t srcSize)
+{
+    BYTE huffWeight[HUF_MAX_SYMBOL_VALUE + 1];
+    U32 rankVal[HUF_ABSOLUTEMAX_TABLELOG + 1];  /* large enough for values from 0 to 16 */
+    U32 weightTotal;
+    U32 maxBits;
+    const BYTE* ip = (const BYTE*) src;
+    size_t iSize = ip[0];
+    size_t oSize;
+    U32 n;
+    U32 nextRankStart;
+    HUF_DElt* const dt = (HUF_DElt*)(DTable + 1);
+
+    FSE_STATIC_ASSERT(sizeof(HUF_DElt) == sizeof(U16));   /* if compilation fails here, assertion is false */
+    //memset(huffWeight, 0, sizeof(huffWeight));   /* should not be necessary, but some analyzer complain ... */
+    if (iSize >= 128)  /* special header */
+    {
+        if (iSize >= (242))   /* RLE */
+        {
+            static int l[14] = { 1, 2, 3, 4, 7, 8, 15, 16, 31, 32, 63, 64, 127, 128 };
+            oSize = l[iSize-242];
+            memset(huffWeight, 1, sizeof(huffWeight));
+            iSize = 0;
+        }
+        else   /* Incompressible */
+        {
+            oSize = iSize - 127;
+            iSize = ((oSize+1)/2);
+            if (iSize+1 > srcSize) return (size_t)-FSE_ERROR_srcSize_wrong;
+            ip += 1;
+            for (n=0; n<oSize; n+=2)
+            {
+                huffWeight[n]   = ip[n/2] >> 4;
+                huffWeight[n+1] = ip[n/2] & 15;
+            }
+        }
+    }
+    else  /* header compressed with FSE (normal case) */
+    {
+        if (iSize+1 > srcSize) return (size_t)-FSE_ERROR_srcSize_wrong;
+        oSize = FSE_decompress(huffWeight, HUF_MAX_SYMBOL_VALUE, ip+1, iSize);   /* max 255 values decoded, last one is implied */
+        if (FSE_isError(oSize)) return oSize;
+    }
+
+    /* collect weight stats */
+    memset(rankVal, 0, sizeof(rankVal));
+    weightTotal = 0;
+    for (n=0; n<oSize; n++)
+    {
+        if (huffWeight[n] >= HUF_ABSOLUTEMAX_TABLELOG) return (size_t)-FSE_ERROR_corruptionDetected;
+        rankVal[huffWeight[n]]++;
+        weightTotal += (1 << huffWeight[n]) >> 1;
+    }
+
+    /* get last non-null symbol weight (implied, total must be 2^n) */
+    maxBits = FSE_highbit32(weightTotal) + 1;
+    if (maxBits > DTable[0]) return (size_t)-FSE_ERROR_tableLog_tooLarge;   /* DTable is too small */
+    DTable[0] = (U16)maxBits;
+    {
+        U32 total = 1 << maxBits;
+        U32 rest = total - weightTotal;
+        U32 verif = 1 << FSE_highbit32(rest);
+        U32 lastWeight = FSE_highbit32(rest) + 1;
+        if (verif != rest) return (size_t)-FSE_ERROR_corruptionDetected;    /* last value must be a clean power of 2 */
+        huffWeight[oSize] = (BYTE)lastWeight;
+        rankVal[lastWeight]++;
+    }
+
+    /* check tree construction validity */
+    if ((rankVal[1] < 2) || (rankVal[1] & 1)) return (size_t)-FSE_ERROR_corruptionDetected;   /* by construction : at least 2 elts of rank 1, must be even */
+
+    /* Prepare ranks */
+    nextRankStart = 0;
+    for (n=1; n<=maxBits; n++)
+    {
+        U32 current = nextRankStart;
+        nextRankStart += (rankVal[n] << (n-1));
+        rankVal[n] = current;
+    }
+
+    /* fill DTable */
+    for (n=0; n<=oSize; n++)
+    {
+        const U32 w = huffWeight[n];
+        const U32 length = (1 << w) >> 1;
+        U32 i;
+        HUF_DElt D;
+        D.byte = (BYTE)n; D.nbBits = (BYTE)(maxBits + 1 - w);
+        for (i = rankVal[w]; i < rankVal[w] + length; i++)
+            dt[i] = D;
+        rankVal[w] += length;
+    }
+
+    return iSize+1;
+}
+
+
+static BYTE HUF_decodeSymbol(FSE_DStream_t* Dstream, const HUF_DElt* dt, const U32 dtLog)
+{
+        const size_t val = FSE_lookBitsFast(Dstream, dtLog); /* note : dtLog >= 1 */
+        const BYTE c = dt[val].byte;
+        FSE_skipBits(Dstream, dt[val].nbBits);
+        return c;
+}
+
+static size_t HUF_decompress_usingDTable(   /* -3% slower when non static */
+          void* dst, size_t maxDstSize,
+    const void* cSrc, size_t cSrcSize,
+    const U16* DTable)
+{
+    BYTE* const ostart = (BYTE*) dst;
+    BYTE* op = ostart;
+    BYTE* const omax = op + maxDstSize;
+    BYTE* const olimit = omax-15;
+
+    const HUF_DElt* const dt = (const HUF_DElt*)(DTable+1);
+    const U32 dtLog = DTable[0];
+    size_t errorCode;
+    U32 reloadStatus;
+
+    /* Init */
+
+    const U16* jumpTable = (const U16*)cSrc;
+    const size_t length1 = FSE_readLE16(jumpTable);
+    const size_t length2 = FSE_readLE16(jumpTable+1);
+    const size_t length3 = FSE_readLE16(jumpTable+2);
+    const size_t length4 = cSrcSize - 6 - length1 - length2 - length3;   // check coherency !!
+    const char* const start1 = (const char*)(cSrc) + 6;
+    const char* const start2 = start1 + length1;
+    const char* const start3 = start2 + length2;
+    const char* const start4 = start3 + length3;
+    FSE_DStream_t bitD1, bitD2, bitD3, bitD4;
+
+    if (length1+length2+length3+6 >= cSrcSize) return (size_t)-FSE_ERROR_srcSize_wrong;
+
+    errorCode = FSE_initDStream(&bitD1, start1, length1);
+    if (FSE_isError(errorCode)) return errorCode;
+    errorCode = FSE_initDStream(&bitD2, start2, length2);
+    if (FSE_isError(errorCode)) return errorCode;
+    errorCode = FSE_initDStream(&bitD3, start3, length3);
+    if (FSE_isError(errorCode)) return errorCode;
+    errorCode = FSE_initDStream(&bitD4, start4, length4);
+    if (FSE_isError(errorCode)) return errorCode;
+
+    reloadStatus=FSE_reloadDStream(&bitD2);
+
+    /* 16 symbols per loop */
+    for ( ; (reloadStatus<FSE_DStream_completed) && (op<olimit);  /* D2-3-4 are supposed to be synchronized and finish together */
+        op+=16, reloadStatus = FSE_reloadDStream(&bitD2) | FSE_reloadDStream(&bitD3) | FSE_reloadDStream(&bitD4), FSE_reloadDStream(&bitD1))
+    {
+#define HUF_DECODE_SYMBOL_0(n, Dstream) \
+        op[n] = HUF_decodeSymbol(&Dstream, dt, dtLog);
+
+#define HUF_DECODE_SYMBOL_1(n, Dstream) \
+        op[n] = HUF_decodeSymbol(&Dstream, dt, dtLog); \
+        if (FSE_32bits() && (HUF_MAX_TABLELOG>12)) FSE_reloadDStream(&Dstream)
+
+#define HUF_DECODE_SYMBOL_2(n, Dstream) \
+        op[n] = HUF_decodeSymbol(&Dstream, dt, dtLog); \
+        if (FSE_32bits()) FSE_reloadDStream(&Dstream)
+
+        HUF_DECODE_SYMBOL_1( 0, bitD1);
+        HUF_DECODE_SYMBOL_1( 1, bitD2);
+        HUF_DECODE_SYMBOL_1( 2, bitD3);
+        HUF_DECODE_SYMBOL_1( 3, bitD4);
+        HUF_DECODE_SYMBOL_2( 4, bitD1);
+        HUF_DECODE_SYMBOL_2( 5, bitD2);
+        HUF_DECODE_SYMBOL_2( 6, bitD3);
+        HUF_DECODE_SYMBOL_2( 7, bitD4);
+        HUF_DECODE_SYMBOL_1( 8, bitD1);
+        HUF_DECODE_SYMBOL_1( 9, bitD2);
+        HUF_DECODE_SYMBOL_1(10, bitD3);
+        HUF_DECODE_SYMBOL_1(11, bitD4);
+        HUF_DECODE_SYMBOL_0(12, bitD1);
+        HUF_DECODE_SYMBOL_0(13, bitD2);
+        HUF_DECODE_SYMBOL_0(14, bitD3);
+        HUF_DECODE_SYMBOL_0(15, bitD4);
+    }
+
+    if (reloadStatus!=FSE_DStream_completed)   /* not complete : some bitStream might be FSE_DStream_unfinished */
+        return (size_t)-FSE_ERROR_corruptionDetected;
+
+    /* tail */
+    {
+        // bitTail = bitD1;   // *much* slower : -20% !??!
+        FSE_DStream_t bitTail;
+        bitTail.ptr = bitD1.ptr;
+        bitTail.bitsConsumed = bitD1.bitsConsumed;
+        bitTail.bitContainer = bitD1.bitContainer;   // required in case of FSE_DStream_endOfBuffer
+        bitTail.start = start1;
+        for ( ; (FSE_reloadDStream(&bitTail) < FSE_DStream_completed) && (op<omax) ; op++)
+        {
+            HUF_DECODE_SYMBOL_0(0, bitTail);
+        }
+
+        if (FSE_endOfDStream(&bitTail))
+            return op-ostart;
+    }
+
+    if (op==omax) return (size_t)-FSE_ERROR_dstSize_tooSmall;   /* dst buffer is full, but cSrc unfinished */
+
+    return (size_t)-FSE_ERROR_corruptionDetected;
+}
+
+
+static size_t HUF_decompress (void* dst, size_t maxDstSize, const void* cSrc, size_t cSrcSize)
+{
+    HUF_CREATE_STATIC_DTABLE(DTable, HUF_MAX_TABLELOG);
+    const BYTE* ip = (const BYTE*) cSrc;
+    size_t errorCode;
+
+    errorCode = HUF_readDTable (DTable, cSrc, cSrcSize);
+    if (FSE_isError(errorCode)) return errorCode;
+    if (errorCode >= cSrcSize) return (size_t)-FSE_ERROR_srcSize_wrong;
+    ip += errorCode;
+    cSrcSize -= errorCode;
+
+    return HUF_decompress_usingDTable (dst, maxDstSize, ip, cSrcSize, DTable);
+}
+
+
+#endif   /* FSE_COMMONDEFS_ONLY */
+
+/*
+    zstd - standard compression library
+    Header File for static linking only
+    Copyright (C) 2014-2015, Yann Collet.
+
+    BSD 2-Clause License (http://www.opensource.org/licenses/bsd-license.php)
+
+    Redistribution and use in source and binary forms, with or without
+    modification, are permitted provided that the following conditions are
+    met:
+    * Redistributions of source code must retain the above copyright
+    notice, this list of conditions and the following disclaimer.
+    * Redistributions in binary form must reproduce the above
+    copyright notice, this list of conditions and the following disclaimer
+    in the documentation and/or other materials provided with the
+    distribution.
+    THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+    "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+    LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+    A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+    OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+    SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+    LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+    DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+    THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+    (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+    OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+    You can contact the author at :
+    - zstd source repository : https://github.com/Cyan4973/zstd
+    - ztsd public forum : https://groups.google.com/forum/#!forum/lz4c
+*/
+
+/* The objects defined into this file should be considered experimental.
+ * They are not labelled stable, as their prototype may change in the future.
+ * You can use them for tests, provide feedback, or if you can endure risk of future changes.
+ */
+
+/**************************************
+*  Error management
+**************************************/
+#define ZSTD_LIST_ERRORS(ITEM) \
+        ITEM(ZSTD_OK_NoError) ITEM(ZSTD_ERROR_GENERIC) \
+        ITEM(ZSTD_ERROR_MagicNumber) \
+        ITEM(ZSTD_ERROR_SrcSize) ITEM(ZSTD_ERROR_maxDstSize_tooSmall) \
+        ITEM(ZSTD_ERROR_corruption) \
+        ITEM(ZSTD_ERROR_maxCode)
+
+#define ZSTD_GENERATE_ENUM(ENUM) ENUM,
+typedef enum { ZSTD_LIST_ERRORS(ZSTD_GENERATE_ENUM) } ZSTD_errorCodes;   /* exposed list of errors; static linking only */
+
+/*
+    zstd - standard compression library
+    Copyright (C) 2014-2015, Yann Collet.
+
+    BSD 2-Clause License (http://www.opensource.org/licenses/bsd-license.php)
+
+    Redistribution and use in source and binary forms, with or without
+    modification, are permitted provided that the following conditions are
+    met:
+    * Redistributions of source code must retain the above copyright
+    notice, this list of conditions and the following disclaimer.
+    * Redistributions in binary form must reproduce the above
+    copyright notice, this list of conditions and the following disclaimer
+    in the documentation and/or other materials provided with the
+    distribution.
+    THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+    "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+    LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+    A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+    OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+    SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+    LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+    DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+    THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+    (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+    OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+    You can contact the author at :
+    - zstd source repository : https://github.com/Cyan4973/zstd
+    - ztsd public forum : https://groups.google.com/forum/#!forum/lz4c
+*/
+
+/****************************************************************
+*  Tuning parameters
+*****************************************************************/
+/* MEMORY_USAGE :
+*  Memory usage formula : N->2^N Bytes (examples : 10 -> 1KB; 12 -> 4KB ; 16 -> 64KB; 20 -> 1MB; etc.)
+*  Increasing memory usage improves compression ratio
+*  Reduced memory usage can improve speed, due to cache effect */
+#define ZSTD_MEMORY_USAGE 17
+
+
+/**************************************
+   CPU Feature Detection
+**************************************/
+/*
+ * Automated efficient unaligned memory access detection
+ * Based on known hardware architectures
+ * This list will be updated thanks to feedbacks
+ */
+#if defined(CPU_HAS_EFFICIENT_UNALIGNED_MEMORY_ACCESS) \
+    || defined(__ARM_FEATURE_UNALIGNED) \
+    || defined(__i386__) || defined(__x86_64__) \
+    || defined(_M_IX86) || defined(_M_X64) \
+    || defined(__ARM_ARCH_7__) || defined(__ARM_ARCH_8__) \
+    || (defined(_M_ARM) && (_M_ARM >= 7))
+#  define ZSTD_UNALIGNED_ACCESS 1
+#else
+#  define ZSTD_UNALIGNED_ACCESS 0
+#endif
+
+
+/********************************************************
+*  Includes
+*********************************************************/
+#include <stdlib.h>      /* calloc */
+#include <string.h>      /* memcpy, memmove */
+#include <stdio.h>       /* debug : printf */
+
+
+/********************************************************
+*  Compiler specifics
+*********************************************************/
+#ifdef __AVX2__
+#  include <immintrin.h>   /* AVX2 intrinsics */
+#endif
+
+#ifdef _MSC_VER    /* Visual Studio */
+#  define FORCE_INLINE static __forceinline
+#  include <intrin.h>                    /* For Visual 2005 */
+#  pragma warning(disable : 4127)        /* disable: C4127: conditional expression is constant */
+#  pragma warning(disable : 4324)        /* disable: C4324: padded structure */
+#else
+#  define GCC_VERSION (__GNUC__ * 100 + __GNUC_MINOR__)
+#  ifdef __GNUC__
+#    define FORCE_INLINE static inline __attribute__((always_inline))
+#  else
+#    define FORCE_INLINE static inline
+#  endif
+#endif
+
+
+#ifndef MEM_ACCESS_MODULE
+#define MEM_ACCESS_MODULE
+/********************************************************
+*  Basic Types
+*********************************************************/
+#if defined (__STDC_VERSION__) && __STDC_VERSION__ >= 199901L   /* C99 */
+# include <stdint.h>
+typedef  uint8_t BYTE;
+typedef uint16_t U16;
+typedef  int16_t S16;
+typedef uint32_t U32;
+typedef  int32_t S32;
+typedef uint64_t U64;
+#else
+typedef unsigned char       BYTE;
+typedef unsigned short      U16;
+typedef   signed short      S16;
+typedef unsigned int        U32;
+typedef   signed int        S32;
+typedef unsigned long long  U64;
+#endif
+
+#endif   /* MEM_ACCESS_MODULE */
+
+
+/********************************************************
+*  Constants
+*********************************************************/
+static const U32 ZSTD_magicNumber = 0xFD2FB51E;   /* 3rd version : seqNb header */
+
+#define HASH_LOG (ZSTD_MEMORY_USAGE - 2)
+#define HASH_TABLESIZE (1 << HASH_LOG)
+#define HASH_MASK (HASH_TABLESIZE - 1)
+
+#define KNUTH 2654435761
+
+#define BIT7 128
+#define BIT6  64
+#define BIT5  32
+#define BIT4  16
+
+#define KB *(1 <<10)
+#define MB *(1 <<20)
+#define GB *(1U<<30)
+
+#define BLOCKSIZE (128 KB)                 /* define, for static allocation */
+
+#define WORKPLACESIZE (BLOCKSIZE*3)
+#define MINMATCH 4
+#define MLbits   7
+#define LLbits   6
+#define Offbits  5
+#define MaxML  ((1<<MLbits )-1)
+#define MaxLL  ((1<<LLbits )-1)
+#define MaxOff ((1<<Offbits)-1)
+#define LitFSELog  11
+#define MLFSELog   10
+#define LLFSELog   10
+#define OffFSELog   9
+#define MAX(a,b) ((a)<(b)?(b):(a))
+#define MaxSeq MAX(MaxLL, MaxML)
+
+#define LITERAL_NOENTROPY 63
+#define COMMAND_NOENTROPY 7   /* to remove */
+
+static const size_t ZSTD_blockHeaderSize = 3;
+static const size_t ZSTD_frameHeaderSize = 4;
+
+
+/********************************************************
+*  Memory operations
+*********************************************************/
+static unsigned ZSTD_32bits(void) { return sizeof(void*)==4; }
+
+static unsigned ZSTD_isLittleEndian(void)
+{
+    const union { U32 i; BYTE c[4]; } one = { 1 };   /* don't use static : performance detrimental  */
+    return one.c[0];
+}
+
+static U16    ZSTD_read16(const void* p) { U16 r; memcpy(&r, p, sizeof(r)); return r; }
+
+static U32    ZSTD_read32(const void* p) { U32 r; memcpy(&r, p, sizeof(r)); return r; }
+
+static void   ZSTD_copy4(void* dst, const void* src) { memcpy(dst, src, 4); }
+
+static void   ZSTD_copy8(void* dst, const void* src) { memcpy(dst, src, 8); }
+
+#define COPY8(d,s)    { ZSTD_copy8(d,s); d+=8; s+=8; }
+
+static void ZSTD_wildcopy(void* dst, const void* src, size_t length)
+{
+    const BYTE* ip = (const BYTE*)src;
+    BYTE* op = (BYTE*)dst;
+    BYTE* const oend = op + length;
+    while (op < oend) COPY8(op, ip);
+}
+
+static U16 ZSTD_readLE16(const void* memPtr)
+{
+    if (ZSTD_isLittleEndian()) return ZSTD_read16(memPtr);
+    else
+    {
+        const BYTE* p = (const BYTE*)memPtr;
+        return (U16)((U16)p[0] + ((U16)p[1]<<8));
+    }
+}
+
+
+static U32 ZSTD_readLE32(const void* memPtr)
+{
+    if (ZSTD_isLittleEndian())
+        return ZSTD_read32(memPtr);
+    else
+    {
+        const BYTE* p = (const BYTE*)memPtr;
+        return (U32)((U32)p[0] + ((U32)p[1]<<8) + ((U32)p[2]<<16) + ((U32)p[3]<<24));
+    }
+}
+
+static U32 ZSTD_readBE32(const void* memPtr)
+{
+    const BYTE* p = (const BYTE*)memPtr;
+    return (U32)(((U32)p[0]<<24) + ((U32)p[1]<<16) + ((U32)p[2]<<8) + ((U32)p[3]<<0));
+}
+
+
+/**************************************
+*  Local structures
+***************************************/
+typedef struct ZSTD_Cctx_s ZSTD_Cctx;
+
+typedef enum { bt_compressed, bt_raw, bt_rle, bt_end } blockType_t;
+
+typedef struct
+{
+    blockType_t blockType;
+    U32 origSize;
+} blockProperties_t;
+
+typedef struct {
+    void* buffer;
+    U32*  offsetStart;
+    U32*  offset;
+    BYTE* offCodeStart;
+    BYTE* offCode;
+    BYTE* litStart;
+    BYTE* lit;
+    BYTE* litLengthStart;
+    BYTE* litLength;
+    BYTE* matchLengthStart;
+    BYTE* matchLength;
+    BYTE* dumpsStart;
+    BYTE* dumps;
+} seqStore_t;
+
+
+typedef struct ZSTD_Cctx_s
+{
+    const BYTE* base;
+    U32 current;
+    U32 nextUpdate;
+    seqStore_t seqStore;
+#ifdef __AVX2__
+    __m256i hashTable[HASH_TABLESIZE>>3];
+#else
+    U32 hashTable[HASH_TABLESIZE];
+#endif
+	BYTE buffer[WORKPLACESIZE];
+} cctxi_t;
+
+
+
+
+/**************************************
+*  Error Management
+**************************************/
+/* tells if a return value is an error code */
+static unsigned ZSTD_isError(size_t code) { return (code > (size_t)(-ZSTD_ERROR_maxCode)); }
+
+/* published entry point */
+unsigned ZSTDv01_isError(size_t code) { return ZSTD_isError(code); }
+
+
+/**************************************
+*  Tool functions
+**************************************/
+#define ZSTD_VERSION_MAJOR    0    /* for breaking interface changes  */
+#define ZSTD_VERSION_MINOR    1    /* for new (non-breaking) interface capabilities */
+#define ZSTD_VERSION_RELEASE  3    /* for tweaks, bug-fixes, or development */
+#define ZSTD_VERSION_NUMBER  (ZSTD_VERSION_MAJOR *100*100 + ZSTD_VERSION_MINOR *100 + ZSTD_VERSION_RELEASE)
+
+/**************************************************************
+*   Decompression code
+**************************************************************/
+
+static size_t ZSTD_getcBlockSize(const void* src, size_t srcSize, blockProperties_t* bpPtr)
+{
+    const BYTE* const in = (const BYTE* const)src;
+    BYTE headerFlags;
+    U32 cSize;
+
+    if (srcSize < 3) return (size_t)-ZSTD_ERROR_SrcSize;
+
+    headerFlags = *in;
+    cSize = in[2] + (in[1]<<8) + ((in[0] & 7)<<16);
+
+    bpPtr->blockType = (blockType_t)(headerFlags >> 6);
+    bpPtr->origSize = (bpPtr->blockType == bt_rle) ? cSize : 0;
+
+    if (bpPtr->blockType == bt_end) return 0;
+    if (bpPtr->blockType == bt_rle) return 1;
+    return cSize;
+}
+
+
+static size_t ZSTD_copyUncompressedBlock(void* dst, size_t maxDstSize, const void* src, size_t srcSize)
+{
+    if (srcSize > maxDstSize) return (size_t)-ZSTD_ERROR_maxDstSize_tooSmall;
+    memcpy(dst, src, srcSize);
+    return srcSize;
+}
+
+
+static size_t ZSTD_decompressLiterals(void* ctx,
+                                      void* dst, size_t maxDstSize,
+                                const void* src, size_t srcSize)
+{
+    BYTE* op = (BYTE*)dst;
+    BYTE* const oend = op + maxDstSize;
+    const BYTE* ip = (const BYTE*)src;
+    size_t errorCode;
+    size_t litSize;
+
+    /* check : minimum 2, for litSize, +1, for content */
+    if (srcSize <= 3) return (size_t)-ZSTD_ERROR_corruption;
+
+    litSize = ip[1] + (ip[0]<<8);
+    litSize += ((ip[-3] >> 3) & 7) << 16;   // mmmmh....
+    op = oend - litSize;
+
+    (void)ctx;
+    if (litSize > maxDstSize) return (size_t)-ZSTD_ERROR_maxDstSize_tooSmall;
+    errorCode = HUF_decompress(op, litSize, ip+2, srcSize-2);
+    if (FSE_isError(errorCode)) return (size_t)-ZSTD_ERROR_GENERIC;
+    return litSize;
+}
+
+
+static size_t ZSTD_decodeLiteralsBlock(void* ctx,
+                                void* dst, size_t maxDstSize,
+                          const BYTE** litStart, size_t* litSize,
+                          const void* src, size_t srcSize)
+{
+    const BYTE* const istart = (const BYTE* const)src;
+    const BYTE* ip = istart;
+    BYTE* const ostart = (BYTE* const)dst;
+    BYTE* const oend = ostart + maxDstSize;
+    blockProperties_t litbp;
+
+    size_t litcSize = ZSTD_getcBlockSize(src, srcSize, &litbp);
+    if (ZSTD_isError(litcSize)) return litcSize;
+    if (litcSize > srcSize - ZSTD_blockHeaderSize) return (size_t)-ZSTD_ERROR_SrcSize;
+    ip += ZSTD_blockHeaderSize;
+
+    switch(litbp.blockType)
+    {
+    case bt_raw:
+        *litStart = ip;
+        ip += litcSize;
+        *litSize = litcSize;
+        break;
+    case bt_rle:
+        {
+            size_t rleSize = litbp.origSize;
+            if (rleSize>maxDstSize) return (size_t)-ZSTD_ERROR_maxDstSize_tooSmall;
+            memset(oend - rleSize, *ip, rleSize);
+            *litStart = oend - rleSize;
+            *litSize = rleSize;
+            ip++;
+            break;
+        }
+    case bt_compressed:
+        {
+            size_t decodedLitSize = ZSTD_decompressLiterals(ctx, dst, maxDstSize, ip, litcSize);
+            if (ZSTD_isError(decodedLitSize)) return decodedLitSize;
+            *litStart = oend - decodedLitSize;
+            *litSize = decodedLitSize;
+            ip += litcSize;
+            break;
+        }
+    default:
+        return (size_t)-ZSTD_ERROR_GENERIC;
+    }
+
+    return ip-istart;
+}
+
+
+static size_t ZSTD_decodeSeqHeaders(int* nbSeq, const BYTE** dumpsPtr, size_t* dumpsLengthPtr,
+                         FSE_DTable* DTableLL, FSE_DTable* DTableML, FSE_DTable* DTableOffb,
+                         const void* src, size_t srcSize)
+{
+    const BYTE* const istart = (const BYTE* const)src;
+    const BYTE* ip = istart;
+    const BYTE* const iend = istart + srcSize;
+    U32 LLtype, Offtype, MLtype;
+    U32 LLlog, Offlog, MLlog;
+    size_t dumpsLength;
+
+    /* check */
+    if (srcSize < 5) return (size_t)-ZSTD_ERROR_SrcSize;
+
+    /* SeqHead */
+    *nbSeq = ZSTD_readLE16(ip); ip+=2;
+    LLtype  = *ip >> 6;
+    Offtype = (*ip >> 4) & 3;
+    MLtype  = (*ip >> 2) & 3;
+    if (*ip & 2)
+    {
+        dumpsLength  = ip[2];
+        dumpsLength += ip[1] << 8;
+        ip += 3;
+    }
+    else
+    {
+        dumpsLength  = ip[1];
+        dumpsLength += (ip[0] & 1) << 8;
+        ip += 2;
+    }
+    *dumpsPtr = ip;
+    ip += dumpsLength;
+    *dumpsLengthPtr = dumpsLength;
+
+    /* check */
+    if (ip > iend-3) return (size_t)-ZSTD_ERROR_SrcSize; /* min : all 3 are "raw", hence no header, but at least xxLog bits per type */
+
+    /* sequences */
+    {
+        S16 norm[MaxML+1];    /* assumption : MaxML >= MaxLL and MaxOff */
+        size_t headerSize;
+
+        /* Build DTables */
+        switch(LLtype)
+        {
+        U32 max;
+        case bt_rle :
+            LLlog = 0;
+            FSE_buildDTable_rle(DTableLL, *ip++); break;
+        case bt_raw :
+            LLlog = LLbits;
+            FSE_buildDTable_raw(DTableLL, LLbits); break;
+        default :
+            max = MaxLL;
+            headerSize = FSE_readNCount(norm, &max, &LLlog, ip, iend-ip);
+            if (FSE_isError(headerSize)) return (size_t)-ZSTD_ERROR_GENERIC;
+            if (LLlog > LLFSELog) return (size_t)-ZSTD_ERROR_corruption;
+            ip += headerSize;
+            FSE_buildDTable(DTableLL, norm, max, LLlog);
+        }
+
+        switch(Offtype)
+        {
+        U32 max;
+        case bt_rle :
+            Offlog = 0;
+            if (ip > iend-2) return (size_t)-ZSTD_ERROR_SrcSize; /* min : "raw", hence no header, but at least xxLog bits */
+            FSE_buildDTable_rle(DTableOffb, *ip++); break;
+        case bt_raw :
+            Offlog = Offbits;
+            FSE_buildDTable_raw(DTableOffb, Offbits); break;
+        default :
+            max = MaxOff;
+            headerSize = FSE_readNCount(norm, &max, &Offlog, ip, iend-ip);
+            if (FSE_isError(headerSize)) return (size_t)-ZSTD_ERROR_GENERIC;
+            if (Offlog > OffFSELog) return (size_t)-ZSTD_ERROR_corruption;
+            ip += headerSize;
+            FSE_buildDTable(DTableOffb, norm, max, Offlog);
+        }
+
+        switch(MLtype)
+        {
+        U32 max;
+        case bt_rle :
+            MLlog = 0;
+            if (ip > iend-2) return (size_t)-ZSTD_ERROR_SrcSize; /* min : "raw", hence no header, but at least xxLog bits */
+            FSE_buildDTable_rle(DTableML, *ip++); break;
+        case bt_raw :
+            MLlog = MLbits;
+            FSE_buildDTable_raw(DTableML, MLbits); break;
+        default :
+            max = MaxML;
+            headerSize = FSE_readNCount(norm, &max, &MLlog, ip, iend-ip);
+            if (FSE_isError(headerSize)) return (size_t)-ZSTD_ERROR_GENERIC;
+            if (MLlog > MLFSELog) return (size_t)-ZSTD_ERROR_corruption;
+            ip += headerSize;
+            FSE_buildDTable(DTableML, norm, max, MLlog);
+        }
+    }
+
+    return ip-istart;
+}
+
+
+typedef struct {
+    size_t litLength;
+    size_t offset;
+    size_t matchLength;
+} seq_t;
+
+typedef struct {
+    FSE_DStream_t DStream;
+    FSE_DState_t stateLL;
+    FSE_DState_t stateOffb;
+    FSE_DState_t stateML;
+    size_t prevOffset;
+    const BYTE* dumps;
+    const BYTE* dumpsEnd;
+} seqState_t;
+
+
+static void ZSTD_decodeSequence(seq_t* seq, seqState_t* seqState)
+{
+    size_t litLength;
+    size_t prevOffset;
+    size_t offset;
+    size_t matchLength;
+    const BYTE* dumps = seqState->dumps;
+    const BYTE* const de = seqState->dumpsEnd;
+
+    /* Literal length */
+    litLength = FSE_decodeSymbol(&(seqState->stateLL), &(seqState->DStream));
+    prevOffset = litLength ? seq->offset : seqState->prevOffset;
+    seqState->prevOffset = seq->offset;
+    if (litLength == MaxLL)
+    {
+        U32 add = dumps<de ? *dumps++ : 0;
+        if (add < 255) litLength += add;
+        else
+        {
+            if (dumps<=(de-3))
+            {
+                litLength = ZSTD_readLE32(dumps) & 0xFFFFFF;  /* no pb : dumps is always followed by seq tables > 1 byte */
+                dumps += 3;
+            }
+        }
+    }
+
+    /* Offset */
+    {
+        U32 offsetCode, nbBits;
+        offsetCode = FSE_decodeSymbol(&(seqState->stateOffb), &(seqState->DStream));
+        if (ZSTD_32bits()) FSE_reloadDStream(&(seqState->DStream));
+        nbBits = offsetCode - 1;
+        if (offsetCode==0) nbBits = 0;   /* cmove */
+        offset = ((size_t)1 << (nbBits & ((sizeof(offset)*8)-1))) + FSE_readBits(&(seqState->DStream), nbBits);
+        if (ZSTD_32bits()) FSE_reloadDStream(&(seqState->DStream));
+        if (offsetCode==0) offset = prevOffset;
+    }
+
+    /* MatchLength */
+    matchLength = FSE_decodeSymbol(&(seqState->stateML), &(seqState->DStream));
+    if (matchLength == MaxML)
+    {
+        U32 add = dumps<de ? *dumps++ : 0;
+        if (add < 255) matchLength += add;
+        else
+        {
+            if (dumps<=(de-3))
+            {
+                matchLength = ZSTD_readLE32(dumps) & 0xFFFFFF;  /* no pb : dumps is always followed by seq tables > 1 byte */
+                dumps += 3;
+            }
+        }
+    }
+    matchLength += MINMATCH;
+
+    /* save result */
+    seq->litLength = litLength;
+    seq->offset = offset;
+    seq->matchLength = matchLength;
+    seqState->dumps = dumps;
+}
+
+
+static size_t ZSTD_execSequence(BYTE* op,
+                                seq_t sequence,
+                                const BYTE** litPtr, const BYTE* const litLimit,
+                                BYTE* const base, BYTE* const oend)
+{
+    static const int dec32table[] = {0, 1, 2, 1, 4, 4, 4, 4};   /* added */
+    static const int dec64table[] = {8, 8, 8, 7, 8, 9,10,11};   /* substracted */
+    const BYTE* const ostart = op;
+    const size_t litLength = sequence.litLength;
+    BYTE* const endMatch = op + litLength + sequence.matchLength;    /* risk : address space overflow (32-bits) */
+    const BYTE* const litEnd = *litPtr + litLength;
+
+    /* check */
+    if (endMatch > oend) return (size_t)-ZSTD_ERROR_maxDstSize_tooSmall;   /* overwrite beyond dst buffer */
+    if (litEnd > litLimit) return (size_t)-ZSTD_ERROR_corruption;
+    if (sequence.matchLength > (size_t)(*litPtr-op))  return (size_t)-ZSTD_ERROR_maxDstSize_tooSmall;    /* overwrite literal segment */
+
+    /* copy Literals */
+    if (((size_t)(*litPtr - op) < 8) || ((size_t)(oend-litEnd) < 8) || (op+litLength > oend-8))
+        memmove(op, *litPtr, litLength);   /* overwrite risk */
+    else
+        ZSTD_wildcopy(op, *litPtr, litLength);
+    op += litLength;
+    *litPtr = litEnd;   /* update for next sequence */
+
+    /* check : last match must be at a minimum distance of 8 from end of dest buffer */
+    if (oend-op < 8) return (size_t)-ZSTD_ERROR_maxDstSize_tooSmall;
+
+    /* copy Match */
+    {
+        const U32 overlapRisk = (((size_t)(litEnd - endMatch)) < 12);
+        const BYTE* match = op - sequence.offset;            /* possible underflow at op - offset ? */
+        size_t qutt = 12;
+        U64 saved[2];
+
+        /* check */
+        if (match < base) return (size_t)-ZSTD_ERROR_corruption;
+        if (sequence.offset > (size_t)base) return (size_t)-ZSTD_ERROR_corruption;
+
+        /* save beginning of literal sequence, in case of write overlap */
+        if (overlapRisk)
+        {
+            if ((endMatch + qutt) > oend) qutt = oend-endMatch;
+            memcpy(saved, endMatch, qutt);
+        }
+
+        if (sequence.offset < 8)
+        {
+            const int dec64 = dec64table[sequence.offset];
+            op[0] = match[0];
+            op[1] = match[1];
+            op[2] = match[2];
+            op[3] = match[3];
+            match += dec32table[sequence.offset];
+            ZSTD_copy4(op+4, match);
+            match -= dec64;
+        } else { ZSTD_copy8(op, match); }
+        op += 8; match += 8;
+
+        if (endMatch > oend-12)
+        {
+            if (op < oend-8)
+            {
+                ZSTD_wildcopy(op, match, (oend-8) - op);
+                match += (oend-8) - op;
+                op = oend-8;
+            }
+            while (op<endMatch) *op++ = *match++;
+        }
+        else
+            ZSTD_wildcopy(op, match, sequence.matchLength-8);   /* works even if matchLength < 8 */
+
+        /* restore, in case of overlap */
+        if (overlapRisk) memcpy(endMatch, saved, qutt);
+    }
+
+    return endMatch-ostart;
+}
+
+typedef struct ZSTDv01_Dctx_s
+{
+    U32 LLTable[FSE_DTABLE_SIZE_U32(LLFSELog)];
+    U32 OffTable[FSE_DTABLE_SIZE_U32(OffFSELog)];
+    U32 MLTable[FSE_DTABLE_SIZE_U32(MLFSELog)];
+    void* previousDstEnd;
+    void* base;
+    size_t expected;
+    blockType_t bType;
+    U32 phase;
+} dctx_t;
+
+
+static size_t ZSTD_decompressSequences(
+                               void* ctx,
+                               void* dst, size_t maxDstSize,
+                         const void* seqStart, size_t seqSize,
+                         const BYTE* litStart, size_t litSize)
+{
+    dctx_t* dctx = (dctx_t*)ctx;
+    const BYTE* ip = (const BYTE*)seqStart;
+    const BYTE* const iend = ip + seqSize;
+    BYTE* const ostart = (BYTE* const)dst;
+    BYTE* op = ostart;
+    BYTE* const oend = ostart + maxDstSize;
+    size_t errorCode, dumpsLength;
+    const BYTE* litPtr = litStart;
+    const BYTE* const litEnd = litStart + litSize;
+    int nbSeq;
+    const BYTE* dumps;
+    U32* DTableLL = dctx->LLTable;
+    U32* DTableML = dctx->MLTable;
+    U32* DTableOffb = dctx->OffTable;
+    BYTE* const base = (BYTE*) (dctx->base);
+
+    /* Build Decoding Tables */
+    errorCode = ZSTD_decodeSeqHeaders(&nbSeq, &dumps, &dumpsLength,
+                                      DTableLL, DTableML, DTableOffb,
+                                      ip, iend-ip);
+    if (ZSTD_isError(errorCode)) return errorCode;
+    ip += errorCode;
+
+    /* Regen sequences */
+    {
+        seq_t sequence;
+        seqState_t seqState;
+
+        memset(&sequence, 0, sizeof(sequence));
+        seqState.dumps = dumps;
+        seqState.dumpsEnd = dumps + dumpsLength;
+        seqState.prevOffset = 1;
+        errorCode = FSE_initDStream(&(seqState.DStream), ip, iend-ip);
+        if (FSE_isError(errorCode)) return (size_t)-ZSTD_ERROR_corruption;
+        FSE_initDState(&(seqState.stateLL), &(seqState.DStream), DTableLL);
+        FSE_initDState(&(seqState.stateOffb), &(seqState.DStream), DTableOffb);
+        FSE_initDState(&(seqState.stateML), &(seqState.DStream), DTableML);
+
+        for ( ; (FSE_reloadDStream(&(seqState.DStream)) <= FSE_DStream_completed) && (nbSeq>0) ; )
+        {
+            size_t oneSeqSize;
+            nbSeq--;
+            ZSTD_decodeSequence(&sequence, &seqState);
+            oneSeqSize = ZSTD_execSequence(op, sequence, &litPtr, litEnd, base, oend);
+            if (ZSTD_isError(oneSeqSize)) return oneSeqSize;
+            op += oneSeqSize;
+        }
+
+        /* check if reached exact end */
+        if ( !FSE_endOfDStream(&(seqState.DStream)) ) return (size_t)-ZSTD_ERROR_corruption;   /* requested too much : data is corrupted */
+        if (nbSeq<0) return (size_t)-ZSTD_ERROR_corruption;   /* requested too many sequences : data is corrupted */
+
+        /* last literal segment */
+        {
+            size_t lastLLSize = litEnd - litPtr;
+            if (op+lastLLSize > oend) return (size_t)-ZSTD_ERROR_maxDstSize_tooSmall;
+            if (op != litPtr) memmove(op, litPtr, lastLLSize);
+            op += lastLLSize;
+        }
+    }
+
+    return op-ostart;
+}
+
+
+static size_t ZSTD_decompressBlock(
+                            void* ctx,
+                            void* dst, size_t maxDstSize,
+                      const void* src, size_t srcSize)
+{
+    /* blockType == blockCompressed, srcSize is trusted */
+    const BYTE* ip = (const BYTE*)src;
+    const BYTE* litPtr;
+    size_t litSize;
+    size_t errorCode;
+
+    /* Decode literals sub-block */
+    errorCode = ZSTD_decodeLiteralsBlock(ctx, dst, maxDstSize, &litPtr, &litSize, src, srcSize);
+    if (ZSTD_isError(errorCode)) return errorCode;
+    ip += errorCode;
+    srcSize -= errorCode;
+
+    return ZSTD_decompressSequences(ctx, dst, maxDstSize, ip, srcSize, litPtr, litSize);
+}
+
+
+size_t ZSTDv01_decompressDCtx(void* ctx, void* dst, size_t maxDstSize, const void* src, size_t srcSize)
+{
+    const BYTE* ip = (const BYTE*)src;
+    const BYTE* iend = ip + srcSize;
+    BYTE* const ostart = (BYTE* const)dst;
+    BYTE* op = ostart;
+    BYTE* const oend = ostart + maxDstSize;
+    size_t remainingSize = srcSize;
+    U32 magicNumber;
+    size_t errorCode=0;
+    blockProperties_t blockProperties;
+
+    /* Frame Header */
+    if (srcSize < ZSTD_frameHeaderSize+ZSTD_blockHeaderSize) return (size_t)-ZSTD_ERROR_SrcSize;
+    magicNumber = ZSTD_readBE32(src);
+    if (magicNumber != ZSTD_magicNumber) return (size_t)-ZSTD_ERROR_MagicNumber;
+    ip += ZSTD_frameHeaderSize; remainingSize -= ZSTD_frameHeaderSize;
+
+    /* Loop on each block */
+    while (1)
+    {
+        size_t blockSize = ZSTD_getcBlockSize(ip, iend-ip, &blockProperties);
+        if (ZSTD_isError(blockSize)) return blockSize;
+
+        ip += ZSTD_blockHeaderSize;
+        remainingSize -= ZSTD_blockHeaderSize;
+        if (blockSize > remainingSize) return (size_t)-ZSTD_ERROR_SrcSize;
+
+        switch(blockProperties.blockType)
+        {
+        case bt_compressed:
+            errorCode = ZSTD_decompressBlock(ctx, op, oend-op, ip, blockSize);
+            break;
+        case bt_raw :
+            errorCode = ZSTD_copyUncompressedBlock(op, oend-op, ip, blockSize);
+            break;
+        case bt_rle :
+            return (size_t)-ZSTD_ERROR_GENERIC;   /* not yet supported */
+            break;
+        case bt_end :
+            /* end of frame */
+            if (remainingSize) return (size_t)-ZSTD_ERROR_SrcSize;
+            break;
+        default:
+            return (size_t)-ZSTD_ERROR_GENERIC;
+        }
+        if (blockSize == 0) break;   /* bt_end */
+
+        if (ZSTD_isError(errorCode)) return errorCode;
+        op += errorCode;
+        ip += blockSize;
+        remainingSize -= blockSize;
+    }
+
+    return op-ostart;
+}
+
+size_t ZSTDv01_decompress(void* dst, size_t maxDstSize, const void* src, size_t srcSize)
+{
+    dctx_t ctx;
+    ctx.base = dst;
+    return ZSTDv01_decompressDCtx(&ctx, dst, maxDstSize, src, srcSize);
+}
+
+
+/*******************************
+*  Streaming Decompression API
+*******************************/
+
+size_t ZSTDv01_resetDCtx(ZSTDv01_Dctx* dctx)
+{
+    dctx->expected = ZSTD_frameHeaderSize;
+    dctx->phase = 0;
+    dctx->previousDstEnd = NULL;
+    dctx->base = NULL;
+    return 0;
+}
+
+ZSTDv01_Dctx* ZSTDv01_createDCtx(void)
+{
+    ZSTDv01_Dctx* dctx = (ZSTDv01_Dctx*)malloc(sizeof(ZSTDv01_Dctx));
+    if (dctx==NULL) return NULL;
+    ZSTDv01_resetDCtx(dctx);
+    return dctx;
+}
+
+size_t ZSTDv01_freeDCtx(ZSTDv01_Dctx* dctx)
+{
+    free(dctx);
+    return 0;
+}
+
+size_t ZSTDv01_nextSrcSizeToDecompress(ZSTDv01_Dctx* dctx)
+{
+    return ((dctx_t*)dctx)->expected;
+}
+
+size_t ZSTDv01_decompressContinue(ZSTDv01_Dctx* dctx, void* dst, size_t maxDstSize, const void* src, size_t srcSize)
+{
+    dctx_t* ctx = (dctx_t*)dctx;
+
+    /* Sanity check */
+    if (srcSize != ctx->expected) return (size_t)-ZSTD_ERROR_SrcSize;
+    if (dst != ctx->previousDstEnd)  /* not contiguous */
+        ctx->base = dst;
+
+    /* Decompress : frame header */
+    if (ctx->phase == 0)
+    {
+        /* Check frame magic header */
+        U32 magicNumber = ZSTD_readBE32(src);
+        if (magicNumber != ZSTD_magicNumber) return (size_t)-ZSTD_ERROR_MagicNumber;
+        ctx->phase = 1;
+        ctx->expected = ZSTD_blockHeaderSize;
+        return 0;
+    }
+
+    /* Decompress : block header */
+    if (ctx->phase == 1)
+    {
+        blockProperties_t bp;
+        size_t blockSize = ZSTD_getcBlockSize(src, ZSTD_blockHeaderSize, &bp);
+        if (ZSTD_isError(blockSize)) return blockSize;
+        if (bp.blockType == bt_end)
+        {
+            ctx->expected = 0;
+            ctx->phase = 0;
+        }
+        else
+        {
+            ctx->expected = blockSize;
+            ctx->bType = bp.blockType;
+            ctx->phase = 2;
+        }
+
+        return 0;
+    }
+
+    /* Decompress : block content */
+    {
+        size_t rSize;
+        switch(ctx->bType)
+        {
+        case bt_compressed:
+            rSize = ZSTD_decompressBlock(ctx, dst, maxDstSize, src, srcSize);
+            break;
+        case bt_raw :
+            rSize = ZSTD_copyUncompressedBlock(dst, maxDstSize, src, srcSize);
+            break;
+        case bt_rle :
+            return (size_t)-ZSTD_ERROR_GENERIC;   /* not yet handled */
+            break;
+        case bt_end :   /* should never happen (filtered at phase 1) */
+            rSize = 0;
+            break;
+        default:
+            return (size_t)-ZSTD_ERROR_GENERIC;
+        }
+        ctx->phase = 1;
+        ctx->expected = ZSTD_blockHeaderSize;
+        ctx->previousDstEnd = (void*)( ((char*)dst) + rSize);
+        return rSize;
+    }
+
+}
+
+
diff --git a/lib/legacy/zstd_v01.h b/lib/legacy/zstd_v01.h
new file mode 100644
index 0000000..314cd34
--- /dev/null
+++ b/lib/legacy/zstd_v01.h
@@ -0,0 +1,100 @@
+/*
+    zstd - standard compression library
+    Header File
+    Copyright (C) 2014-2015, Yann Collet.
+
+    BSD 2-Clause License (http://www.opensource.org/licenses/bsd-license.php)
+
+    Redistribution and use in source and binary forms, with or without
+    modification, are permitted provided that the following conditions are
+    met:
+    * Redistributions of source code must retain the above copyright
+    notice, this list of conditions and the following disclaimer.
+    * Redistributions in binary form must reproduce the above
+    copyright notice, this list of conditions and the following disclaimer
+    in the documentation and/or other materials provided with the
+    distribution.
+    THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+    "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+    LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+    A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+    OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+    SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+    LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+    DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+    THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+    (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+    OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+    You can contact the author at :
+    - zstd source repository : https://github.com/Cyan4973/zstd
+    - ztsd public forum : https://groups.google.com/forum/#!forum/lz4c
+*/
+#pragma once
+
+#if defined (__cplusplus)
+extern "C" {
+#endif
+
+/* *************************************
+*  Includes
+***************************************/
+#include <stddef.h>   /* size_t */
+
+
+/* *************************************
+*  Simple one-step function
+***************************************/
+/**
+ZSTDv01_decompress() : decompress ZSTD frames compliant with v0.1.x format
+    compressedSize : is the exact source size
+    maxOriginalSize : is the size of the 'dst' buffer, which must be already allocated.
+                      It must be equal or larger than originalSize, otherwise decompression will fail.
+    return : the number of bytes decompressed into destination buffer (originalSize)
+             or an errorCode if it fails (which can be tested using ZSTDv01_isError())
+*/
+size_t ZSTDv01_decompress( void* dst, size_t maxOriginalSize,
+                     const void* src, size_t compressedSize);
+
+/**
+ZSTDv01_isError() : tells if the result of ZSTDv01_decompress() is an error
+*/
+unsigned ZSTDv01_isError(size_t code);
+
+
+/* *************************************
+*  Advanced functions
+***************************************/
+typedef struct ZSTDv01_Dctx_s ZSTDv01_Dctx;
+ZSTDv01_Dctx* ZSTDv01_createDCtx(void);
+size_t ZSTDv01_freeDCtx(ZSTDv01_Dctx* dctx);
+
+size_t ZSTDv01_decompressDCtx(void* ctx,
+                              void* dst, size_t maxOriginalSize,
+                        const void* src, size_t compressedSize);
+
+/* *************************************
+*  Streaming functions
+***************************************/
+size_t ZSTDv01_resetDCtx(ZSTDv01_Dctx* dctx);
+
+size_t ZSTDv01_nextSrcSizeToDecompress(ZSTDv01_Dctx* dctx);
+size_t ZSTDv01_decompressContinue(ZSTDv01_Dctx* dctx, void* dst, size_t maxDstSize, const void* src, size_t srcSize);
+/**
+  Use above functions alternatively.
+  ZSTD_nextSrcSizeToDecompress() tells how much bytes to provide as 'srcSize' to ZSTD_decompressContinue().
+  ZSTD_decompressContinue() will use previous data blocks to improve compression if they are located prior to current block.
+  Result is the number of bytes regenerated within 'dst'.
+  It can be zero, which is not an error; it just means ZSTD_decompressContinue() has decoded some header.
+*/
+
+/* *************************************
+*  Prefix - version detection
+***************************************/
+#define ZSTDv01_magicNumber   0xFD2FB51E   /* Big Endian version */
+#define ZSTDv01_magicNumberLE 0x1EB52FFD   /* Little Endian version */
+
+
+#if defined (__cplusplus)
+}
+#endif
diff --git a/lib/legacy/zstd_v02.c b/lib/legacy/zstd_v02.c
new file mode 100644
index 0000000..67d726f
--- /dev/null
+++ b/lib/legacy/zstd_v02.c
@@ -0,0 +1,3730 @@
+/* ******************************************************************
+   Error codes and messages
+   Copyright (C) 2013-2015, Yann Collet
+
+   BSD 2-Clause License (http://www.opensource.org/licenses/bsd-license.php)
+
+   Redistribution and use in source and binary forms, with or without
+   modification, are permitted provided that the following conditions are
+   met:
+
+       * Redistributions of source code must retain the above copyright
+   notice, this list of conditions and the following disclaimer.
+       * Redistributions in binary form must reproduce the above
+   copyright notice, this list of conditions and the following disclaimer
+   in the documentation and/or other materials provided with the
+   distribution.
+
+   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+   You can contact the author at :
+   - Source repository : https://github.com/Cyan4973/FiniteStateEntropy
+   - Public forum : https://groups.google.com/forum/#!forum/lz4c
+****************************************************************** */
+#ifndef ERROR_H_MODULE
+#define ERROR_H_MODULE
+
+#if defined (__cplusplus)
+extern "C" {
+#endif
+
+#include <stddef.h>    /* size_t, ptrdiff_t */
+#include "zstd_v02.h"
+
+/******************************************
+*  Compiler-specific
+******************************************/
+#if defined (__cplusplus) || (defined (__STDC_VERSION__) && (__STDC_VERSION__ >= 199901L) /* C99 */)
+#  define ERR_STATIC static inline
+#elif defined(_MSC_VER)
+#  define ERR_STATIC static __inline
+#elif defined(__GNUC__)
+#  define ERR_STATIC static __attribute__((unused))
+#else
+#  define ERR_STATIC static  /* this version may generate warnings for unused static functions; disable the relevant warning */
+#endif
+
+
+/******************************************
+*  Error Management
+******************************************/
+#define PREFIX(name) ZSTD_error_##name
+
+#define ERROR(name) (size_t)-PREFIX(name)
+
+#define ERROR_LIST(ITEM) \
+        ITEM(PREFIX(No_Error)) ITEM(PREFIX(GENERIC)) \
+        ITEM(PREFIX(memory_allocation)) \
+        ITEM(PREFIX(dstSize_tooSmall)) ITEM(PREFIX(srcSize_wrong)) \
+        ITEM(PREFIX(prefix_unknown)) ITEM(PREFIX(corruption_detected)) \
+        ITEM(PREFIX(tableLog_tooLarge)) ITEM(PREFIX(maxSymbolValue_tooLarge)) ITEM(PREFIX(maxSymbolValue_tooSmall)) \
+        ITEM(PREFIX(maxCode))
+
+#define ERROR_GENERATE_ENUM(ENUM) ENUM,
+typedef enum { ERROR_LIST(ERROR_GENERATE_ENUM) } ERR_codes;  /* enum is exposed, to detect & handle specific errors; compare function result to -enum value */
+
+#define ERROR_CONVERTTOSTRING(STRING) #STRING,
+#define ERROR_GENERATE_STRING(EXPR) ERROR_CONVERTTOSTRING(EXPR)
+
+ERR_STATIC unsigned ERR_isError(size_t code) { return (code > ERROR(maxCode)); }
+
+
+#if defined (__cplusplus)
+}
+#endif
+
+#endif /* ERROR_H_MODULE */
+
+
+/* ******************************************************************
+   mem.h
+   low-level memory access routines
+   Copyright (C) 2013-2015, Yann Collet.
+
+   BSD 2-Clause License (http://www.opensource.org/licenses/bsd-license.php)
+
+   Redistribution and use in source and binary forms, with or without
+   modification, are permitted provided that the following conditions are
+   met:
+
+       * Redistributions of source code must retain the above copyright
+   notice, this list of conditions and the following disclaimer.
+       * Redistributions in binary form must reproduce the above
+   copyright notice, this list of conditions and the following disclaimer
+   in the documentation and/or other materials provided with the
+   distribution.
+
+   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+    You can contact the author at :
+    - FSE source repository : https://github.com/Cyan4973/FiniteStateEntropy
+    - Public forum : https://groups.google.com/forum/#!forum/lz4c
+****************************************************************** */
+#ifndef MEM_H_MODULE
+#define MEM_H_MODULE
+
+#if defined (__cplusplus)
+extern "C" {
+#endif
+
+/******************************************
+*  Includes
+******************************************/
+#include <stddef.h>    /* size_t, ptrdiff_t */
+#include <string.h>    /* memcpy */
+
+
+/******************************************
+*  Compiler-specific
+******************************************/
+#if defined(__GNUC__)
+#  define MEM_STATIC static __attribute__((unused))
+#elif defined (__cplusplus) || (defined (__STDC_VERSION__) && (__STDC_VERSION__ >= 199901L) /* C99 */)
+#  define MEM_STATIC static inline
+#elif defined(_MSC_VER)
+#  define MEM_STATIC static __inline
+#else
+#  define MEM_STATIC static  /* this version may generate warnings for unused static functions; disable the relevant warning */
+#endif
+
+
+/****************************************************************
+*  Basic Types
+*****************************************************************/
+#if defined (__cplusplus) || (defined (__STDC_VERSION__) && (__STDC_VERSION__ >= 199901L) /* C99 */)
+# include <stdint.h>
+  typedef  uint8_t BYTE;
+  typedef uint16_t U16;
+  typedef  int16_t S16;
+  typedef uint32_t U32;
+  typedef  int32_t S32;
+  typedef uint64_t U64;
+  typedef  int64_t S64;
+#else
+  typedef unsigned char       BYTE;
+  typedef unsigned short      U16;
+  typedef   signed short      S16;
+  typedef unsigned int        U32;
+  typedef   signed int        S32;
+  typedef unsigned long long  U64;
+  typedef   signed long long  S64;
+#endif
+
+
+/****************************************************************
+*  Memory I/O
+*****************************************************************/
+/* MEM_FORCE_MEMORY_ACCESS
+ * By default, access to unaligned memory is controlled by `memcpy()`, which is safe and portable.
+ * Unfortunately, on some target/compiler combinations, the generated assembly is sub-optimal.
+ * The below switch allow to select different access method for improved performance.
+ * Method 0 (default) : use `memcpy()`. Safe and portable.
+ * Method 1 : `__packed` statement. It depends on compiler extension (ie, not portable).
+ *            This method is safe if your compiler supports it, and *generally* as fast or faster than `memcpy`.
+ * Method 2 : direct access. This method is portable but violate C standard.
+ *            It can generate buggy code on targets generating assembly depending on alignment.
+ *            But in some circumstances, it's the only known way to get the most performance (ie GCC + ARMv6)
+ * See http://fastcompression.blogspot.fr/2015/08/accessing-unaligned-memory.html for details.
+ * Prefer these methods in priority order (0 > 1 > 2)
+ */
+#ifndef MEM_FORCE_MEMORY_ACCESS   /* can be defined externally, on command line for example */
+#  if defined(__GNUC__) && ( defined(__ARM_ARCH_6__) || defined(__ARM_ARCH_6J__) || defined(__ARM_ARCH_6K__) || defined(__ARM_ARCH_6Z__) || defined(__ARM_ARCH_6ZK__) || defined(__ARM_ARCH_6T2__) )
+#    define MEM_FORCE_MEMORY_ACCESS 2
+#  elif defined(__INTEL_COMPILER) || \
+  (defined(__GNUC__) && ( defined(__ARM_ARCH_7__) || defined(__ARM_ARCH_7A__) || defined(__ARM_ARCH_7R__) || defined(__ARM_ARCH_7M__) || defined(__ARM_ARCH_7S__) ))
+#    define MEM_FORCE_MEMORY_ACCESS 1
+#  endif
+#endif
+
+MEM_STATIC unsigned MEM_32bits(void) { return sizeof(void*)==4; }
+MEM_STATIC unsigned MEM_64bits(void) { return sizeof(void*)==8; }
+
+MEM_STATIC unsigned MEM_isLittleEndian(void)
+{
+    const union { U32 u; BYTE c[4]; } one = { 1 };   /* don't use static : performance detrimental  */
+    return one.c[0];
+}
+
+#if defined(MEM_FORCE_MEMORY_ACCESS) && (MEM_FORCE_MEMORY_ACCESS==2)
+
+/* violates C standard on structure alignment.
+Only use if no other choice to achieve best performance on target platform */
+MEM_STATIC U16 MEM_read16(const void* memPtr) { return *(const U16*) memPtr; }
+MEM_STATIC U32 MEM_read32(const void* memPtr) { return *(const U32*) memPtr; }
+MEM_STATIC U64 MEM_read64(const void* memPtr) { return *(const U64*) memPtr; }
+
+MEM_STATIC void MEM_write16(void* memPtr, U16 value) { *(U16*)memPtr = value; }
+MEM_STATIC void MEM_write32(void* memPtr, U32 value) { *(U32*)memPtr = value; }
+MEM_STATIC void MEM_write64(void* memPtr, U64 value) { *(U64*)memPtr = value; }
+
+#elif defined(MEM_FORCE_MEMORY_ACCESS) && (MEM_FORCE_MEMORY_ACCESS==1)
+
+/* __pack instructions are safer, but compiler specific, hence potentially problematic for some compilers */
+/* currently only defined for gcc and icc */
+typedef union { U16 u16; U32 u32; U64 u64; } __attribute__((packed)) unalign;
+
+MEM_STATIC U16 MEM_read16(const void* ptr) { return ((const unalign*)ptr)->u16; }
+MEM_STATIC U32 MEM_read32(const void* ptr) { return ((const unalign*)ptr)->u32; }
+MEM_STATIC U64 MEM_read64(const void* ptr) { return ((const unalign*)ptr)->u64; }
+
+MEM_STATIC void MEM_write16(void* memPtr, U16 value) { ((unalign*)memPtr)->u16 = value; }
+MEM_STATIC void MEM_write32(void* memPtr, U32 value) { ((unalign*)memPtr)->u32 = value; }
+MEM_STATIC void MEM_write64(void* memPtr, U64 value) { ((unalign*)memPtr)->u64 = value; }
+
+#else
+
+/* default method, safe and standard.
+   can sometimes prove slower */
+
+MEM_STATIC U16 MEM_read16(const void* memPtr)
+{
+    U16 val; memcpy(&val, memPtr, sizeof(val)); return val;
+}
+
+MEM_STATIC U32 MEM_read32(const void* memPtr)
+{
+    U32 val; memcpy(&val, memPtr, sizeof(val)); return val;
+}
+
+MEM_STATIC U64 MEM_read64(const void* memPtr)
+{
+    U64 val; memcpy(&val, memPtr, sizeof(val)); return val;
+}
+
+MEM_STATIC void MEM_write16(void* memPtr, U16 value)
+{
+    memcpy(memPtr, &value, sizeof(value));
+}
+
+MEM_STATIC void MEM_write32(void* memPtr, U32 value)
+{
+    memcpy(memPtr, &value, sizeof(value));
+}
+
+MEM_STATIC void MEM_write64(void* memPtr, U64 value)
+{
+    memcpy(memPtr, &value, sizeof(value));
+}
+
+#endif // MEM_FORCE_MEMORY_ACCESS
+
+
+MEM_STATIC U16 MEM_readLE16(const void* memPtr)
+{
+    if (MEM_isLittleEndian())
+        return MEM_read16(memPtr);
+    else
+    {
+        const BYTE* p = (const BYTE*)memPtr;
+        return (U16)(p[0] + (p[1]<<8));
+    }
+}
+
+MEM_STATIC void MEM_writeLE16(void* memPtr, U16 val)
+{
+    if (MEM_isLittleEndian())
+    {
+        MEM_write16(memPtr, val);
+    }
+    else
+    {
+        BYTE* p = (BYTE*)memPtr;
+        p[0] = (BYTE)val;
+        p[1] = (BYTE)(val>>8);
+    }
+}
+
+MEM_STATIC U32 MEM_readLE32(const void* memPtr)
+{
+    if (MEM_isLittleEndian())
+        return MEM_read32(memPtr);
+    else
+    {
+        const BYTE* p = (const BYTE*)memPtr;
+        return (U32)((U32)p[0] + ((U32)p[1]<<8) + ((U32)p[2]<<16) + ((U32)p[3]<<24));
+    }
+}
+
+MEM_STATIC void MEM_writeLE32(void* memPtr, U32 val32)
+{
+    if (MEM_isLittleEndian())
+    {
+        MEM_write32(memPtr, val32);
+    }
+    else
+    {
+        BYTE* p = (BYTE*)memPtr;
+        p[0] = (BYTE)val32;
+        p[1] = (BYTE)(val32>>8);
+        p[2] = (BYTE)(val32>>16);
+        p[3] = (BYTE)(val32>>24);
+    }
+}
+
+MEM_STATIC U64 MEM_readLE64(const void* memPtr)
+{
+    if (MEM_isLittleEndian())
+        return MEM_read64(memPtr);
+    else
+    {
+        const BYTE* p = (const BYTE*)memPtr;
+        return (U64)((U64)p[0] + ((U64)p[1]<<8) + ((U64)p[2]<<16) + ((U64)p[3]<<24)
+                     + ((U64)p[4]<<32) + ((U64)p[5]<<40) + ((U64)p[6]<<48) + ((U64)p[7]<<56));
+    }
+}
+
+MEM_STATIC void MEM_writeLE64(void* memPtr, U64 val64)
+{
+    if (MEM_isLittleEndian())
+    {
+        MEM_write64(memPtr, val64);
+    }
+    else
+    {
+        BYTE* p = (BYTE*)memPtr;
+        p[0] = (BYTE)val64;
+        p[1] = (BYTE)(val64>>8);
+        p[2] = (BYTE)(val64>>16);
+        p[3] = (BYTE)(val64>>24);
+        p[4] = (BYTE)(val64>>32);
+        p[5] = (BYTE)(val64>>40);
+        p[6] = (BYTE)(val64>>48);
+        p[7] = (BYTE)(val64>>56);
+    }
+}
+
+MEM_STATIC size_t MEM_readLEST(const void* memPtr)
+{
+    if (MEM_32bits())
+        return (size_t)MEM_readLE32(memPtr);
+    else
+        return (size_t)MEM_readLE64(memPtr);
+}
+
+MEM_STATIC void MEM_writeLEST(void* memPtr, size_t val)
+{
+    if (MEM_32bits())
+        MEM_writeLE32(memPtr, (U32)val);
+    else
+        MEM_writeLE64(memPtr, (U64)val);
+}
+
+#if defined (__cplusplus)
+}
+#endif
+
+#endif /* MEM_H_MODULE */
+
+
+/* ******************************************************************
+   bitstream
+   Part of NewGen Entropy library
+   header file (to include)
+   Copyright (C) 2013-2015, Yann Collet.
+
+   BSD 2-Clause License (http://www.opensource.org/licenses/bsd-license.php)
+
+   Redistribution and use in source and binary forms, with or without
+   modification, are permitted provided that the following conditions are
+   met:
+
+       * Redistributions of source code must retain the above copyright
+   notice, this list of conditions and the following disclaimer.
+       * Redistributions in binary form must reproduce the above
+   copyright notice, this list of conditions and the following disclaimer
+   in the documentation and/or other materials provided with the
+   distribution.
+
+   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+   You can contact the author at :
+   - Source repository : https://github.com/Cyan4973/FiniteStateEntropy
+   - Public forum : https://groups.google.com/forum/#!forum/lz4c
+****************************************************************** */
+#ifndef BITSTREAM_H_MODULE
+#define BITSTREAM_H_MODULE
+
+#if defined (__cplusplus)
+extern "C" {
+#endif
+
+
+/*
+*  This API consists of small unitary functions, which highly benefit from being inlined.
+*  Since link-time-optimization is not available for all compilers,
+*  these functions are defined into a .h to be included.
+*/
+
+
+/**********************************************
+*  bitStream decompression API (read backward)
+**********************************************/
+typedef struct
+{
+    size_t   bitContainer;
+    unsigned bitsConsumed;
+    const char* ptr;
+    const char* start;
+} BIT_DStream_t;
+
+typedef enum { BIT_DStream_unfinished = 0,
+               BIT_DStream_endOfBuffer = 1,
+               BIT_DStream_completed = 2,
+               BIT_DStream_overflow = 3 } BIT_DStream_status;  /* result of BIT_reloadDStream() */
+               /* 1,2,4,8 would be better for bitmap combinations, but slows down performance a bit ... :( */
+
+MEM_STATIC size_t   BIT_initDStream(BIT_DStream_t* bitD, const void* srcBuffer, size_t srcSize);
+MEM_STATIC size_t   BIT_readBits(BIT_DStream_t* bitD, unsigned nbBits);
+MEM_STATIC BIT_DStream_status BIT_reloadDStream(BIT_DStream_t* bitD);
+MEM_STATIC unsigned BIT_endOfDStream(const BIT_DStream_t* bitD);
+
+
+/*
+* Start by invoking BIT_initDStream().
+* A chunk of the bitStream is then stored into a local register.
+* Local register size is 64-bits on 64-bits systems, 32-bits on 32-bits systems (size_t).
+* You can then retrieve bitFields stored into the local register, **in reverse order**.
+* Local register is manually filled from memory by the BIT_reloadDStream() method.
+* A reload guarantee a minimum of ((8*sizeof(size_t))-7) bits when its result is BIT_DStream_unfinished.
+* Otherwise, it can be less than that, so proceed accordingly.
+* Checking if DStream has reached its end can be performed with BIT_endOfDStream()
+*/
+
+
+/******************************************
+*  unsafe API
+******************************************/
+MEM_STATIC size_t BIT_readBitsFast(BIT_DStream_t* bitD, unsigned nbBits);
+/* faster, but works only if nbBits >= 1 */
+
+
+
+/****************************************************************
+*  Helper functions
+****************************************************************/
+MEM_STATIC unsigned BIT_highbit32 (register U32 val)
+{
+#   if defined(_MSC_VER)   /* Visual */
+    unsigned long r=0;
+    _BitScanReverse ( &r, val );
+    return (unsigned) r;
+#   elif defined(__GNUC__) && (__GNUC__ >= 3)   /* Use GCC Intrinsic */
+    return 31 - __builtin_clz (val);
+#   else   /* Software version */
+    static const unsigned DeBruijnClz[32] = { 0, 9, 1, 10, 13, 21, 2, 29, 11, 14, 16, 18, 22, 25, 3, 30, 8, 12, 20, 28, 15, 17, 24, 7, 19, 27, 23, 6, 26, 5, 4, 31 };
+    U32 v = val;
+    unsigned r;
+    v |= v >> 1;
+    v |= v >> 2;
+    v |= v >> 4;
+    v |= v >> 8;
+    v |= v >> 16;
+    r = DeBruijnClz[ (U32) (v * 0x07C4ACDDU) >> 27];
+    return r;
+#   endif
+}
+
+
+
+/**********************************************************
+* bitStream decoding
+**********************************************************/
+
+/*!BIT_initDStream
+*  Initialize a BIT_DStream_t.
+*  @bitD : a pointer to an already allocated BIT_DStream_t structure
+*  @srcBuffer must point at the beginning of a bitStream
+*  @srcSize must be the exact size of the bitStream
+*  @result : size of stream (== srcSize) or an errorCode if a problem is detected
+*/
+MEM_STATIC size_t BIT_initDStream(BIT_DStream_t* bitD, const void* srcBuffer, size_t srcSize)
+{
+    if (srcSize < 1) { memset(bitD, 0, sizeof(*bitD)); return ERROR(srcSize_wrong); }
+
+    if (srcSize >=  sizeof(size_t))   /* normal case */
+    {
+        U32 contain32;
+        bitD->start = (const char*)srcBuffer;
+        bitD->ptr   = (const char*)srcBuffer + srcSize - sizeof(size_t);
+        bitD->bitContainer = MEM_readLEST(bitD->ptr);
+        contain32 = ((const BYTE*)srcBuffer)[srcSize-1];
+        if (contain32 == 0) return ERROR(GENERIC);   /* endMark not present */
+        bitD->bitsConsumed = 8 - BIT_highbit32(contain32);
+    }
+    else
+    {
+        U32 contain32;
+        bitD->start = (const char*)srcBuffer;
+        bitD->ptr   = bitD->start;
+        bitD->bitContainer = *(const BYTE*)(bitD->start);
+        switch(srcSize)
+        {
+            case 7: bitD->bitContainer += (size_t)(((const BYTE*)(bitD->start))[6]) << (sizeof(size_t)*8 - 16);
+            case 6: bitD->bitContainer += (size_t)(((const BYTE*)(bitD->start))[5]) << (sizeof(size_t)*8 - 24);
+            case 5: bitD->bitContainer += (size_t)(((const BYTE*)(bitD->start))[4]) << (sizeof(size_t)*8 - 32);
+            case 4: bitD->bitContainer += (size_t)(((const BYTE*)(bitD->start))[3]) << 24;
+            case 3: bitD->bitContainer += (size_t)(((const BYTE*)(bitD->start))[2]) << 16;
+            case 2: bitD->bitContainer += (size_t)(((const BYTE*)(bitD->start))[1]) <<  8;
+            default:;
+        }
+        contain32 = ((const BYTE*)srcBuffer)[srcSize-1];
+        if (contain32 == 0) return ERROR(GENERIC);   /* endMark not present */
+        bitD->bitsConsumed = 8 - BIT_highbit32(contain32);
+        bitD->bitsConsumed += (U32)(sizeof(size_t) - srcSize)*8;
+    }
+
+    return srcSize;
+}
+
+/*!BIT_lookBits
+ * Provides next n bits from local register
+ * local register is not modified (bits are still present for next read/look)
+ * On 32-bits, maxNbBits==25
+ * On 64-bits, maxNbBits==57
+ * @return : value extracted
+ */
+MEM_STATIC size_t BIT_lookBits(BIT_DStream_t* bitD, U32 nbBits)
+{
+    const U32 bitMask = sizeof(bitD->bitContainer)*8 - 1;
+    return ((bitD->bitContainer << (bitD->bitsConsumed & bitMask)) >> 1) >> ((bitMask-nbBits) & bitMask);
+}
+
+/*! BIT_lookBitsFast :
+*   unsafe version; only works only if nbBits >= 1 */
+MEM_STATIC size_t BIT_lookBitsFast(BIT_DStream_t* bitD, U32 nbBits)
+{
+    const U32 bitMask = sizeof(bitD->bitContainer)*8 - 1;
+    return (bitD->bitContainer << (bitD->bitsConsumed & bitMask)) >> (((bitMask+1)-nbBits) & bitMask);
+}
+
+MEM_STATIC void BIT_skipBits(BIT_DStream_t* bitD, U32 nbBits)
+{
+    bitD->bitsConsumed += nbBits;
+}
+
+/*!BIT_readBits
+ * Read next n bits from local register.
+ * pay attention to not read more than nbBits contained into local register.
+ * @return : extracted value.
+ */
+MEM_STATIC size_t BIT_readBits(BIT_DStream_t* bitD, U32 nbBits)
+{
+    size_t value = BIT_lookBits(bitD, nbBits);
+    BIT_skipBits(bitD, nbBits);
+    return value;
+}
+
+/*!BIT_readBitsFast :
+*  unsafe version; only works only if nbBits >= 1 */
+MEM_STATIC size_t BIT_readBitsFast(BIT_DStream_t* bitD, U32 nbBits)
+{
+    size_t value = BIT_lookBitsFast(bitD, nbBits);
+    BIT_skipBits(bitD, nbBits);
+    return value;
+}
+
+MEM_STATIC BIT_DStream_status BIT_reloadDStream(BIT_DStream_t* bitD)
+{
+	if (bitD->bitsConsumed > (sizeof(bitD->bitContainer)*8))  /* should never happen */
+		return BIT_DStream_overflow;
+
+    if (bitD->ptr >= bitD->start + sizeof(bitD->bitContainer))
+    {
+        bitD->ptr -= bitD->bitsConsumed >> 3;
+        bitD->bitsConsumed &= 7;
+        bitD->bitContainer = MEM_readLEST(bitD->ptr);
+        return BIT_DStream_unfinished;
+    }
+    if (bitD->ptr == bitD->start)
+    {
+        if (bitD->bitsConsumed < sizeof(bitD->bitContainer)*8) return BIT_DStream_endOfBuffer;
+        return BIT_DStream_completed;
+    }
+    {
+        U32 nbBytes = bitD->bitsConsumed >> 3;
+        BIT_DStream_status result = BIT_DStream_unfinished;
+        if (bitD->ptr - nbBytes < bitD->start)
+        {
+            nbBytes = (U32)(bitD->ptr - bitD->start);  /* ptr > start */
+            result = BIT_DStream_endOfBuffer;
+        }
+        bitD->ptr -= nbBytes;
+        bitD->bitsConsumed -= nbBytes*8;
+        bitD->bitContainer = MEM_readLEST(bitD->ptr);   /* reminder : srcSize > sizeof(bitD) */
+        return result;
+    }
+}
+
+/*! BIT_endOfDStream
+*   @return Tells if DStream has reached its exact end
+*/
+MEM_STATIC unsigned BIT_endOfDStream(const BIT_DStream_t* DStream)
+{
+    return ((DStream->ptr == DStream->start) && (DStream->bitsConsumed == sizeof(DStream->bitContainer)*8));
+}
+
+#if defined (__cplusplus)
+}
+#endif
+
+#endif /* BITSTREAM_H_MODULE */
+/* ******************************************************************
+   Error codes and messages
+   Copyright (C) 2013-2015, Yann Collet
+
+   BSD 2-Clause License (http://www.opensource.org/licenses/bsd-license.php)
+
+   Redistribution and use in source and binary forms, with or without
+   modification, are permitted provided that the following conditions are
+   met:
+
+       * Redistributions of source code must retain the above copyright
+   notice, this list of conditions and the following disclaimer.
+       * Redistributions in binary form must reproduce the above
+   copyright notice, this list of conditions and the following disclaimer
+   in the documentation and/or other materials provided with the
+   distribution.
+
+   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+   You can contact the author at :
+   - Source repository : https://github.com/Cyan4973/FiniteStateEntropy
+   - Public forum : https://groups.google.com/forum/#!forum/lz4c
+****************************************************************** */
+#ifndef ERROR_H_MODULE
+#define ERROR_H_MODULE
+
+#if defined (__cplusplus)
+extern "C" {
+#endif
+
+
+/******************************************
+*  Compiler-specific
+******************************************/
+#if defined (__cplusplus) || (defined (__STDC_VERSION__) && (__STDC_VERSION__ >= 199901L) /* C99 */)
+#  define ERR_STATIC static inline
+#elif defined(_MSC_VER)
+#  define ERR_STATIC static __inline
+#elif defined(__GNUC__)
+#  define ERR_STATIC static __attribute__((unused))
+#else
+#  define ERR_STATIC static  /* this version may generate warnings for unused static functions; disable the relevant warning */
+#endif
+
+
+/******************************************
+*  Error Management
+******************************************/
+#define PREFIX(name) ZSTD_error_##name
+
+#define ERROR(name) (size_t)-PREFIX(name)
+
+#define ERROR_LIST(ITEM) \
+        ITEM(PREFIX(No_Error)) ITEM(PREFIX(GENERIC)) \
+        ITEM(PREFIX(dstSize_tooSmall)) ITEM(PREFIX(srcSize_wrong)) \
+        ITEM(PREFIX(prefix_unknown)) ITEM(PREFIX(corruption_detected)) \
+        ITEM(PREFIX(tableLog_tooLarge)) ITEM(PREFIX(maxSymbolValue_tooLarge)) ITEM(PREFIX(maxSymbolValue_tooSmall)) \
+        ITEM(PREFIX(maxCode))
+
+#define ERROR_GENERATE_ENUM(ENUM) ENUM,
+typedef enum { ERROR_LIST(ERROR_GENERATE_ENUM) } ERR_codes;  /* enum is exposed, to detect & handle specific errors; compare function result to -enum value */
+
+#define ERROR_CONVERTTOSTRING(STRING) #STRING,
+#define ERROR_GENERATE_STRING(EXPR) ERROR_CONVERTTOSTRING(EXPR)
+static const char* ERR_strings[] = { ERROR_LIST(ERROR_GENERATE_STRING) };
+
+ERR_STATIC unsigned ERR_isError(size_t code) { return (code > ERROR(maxCode)); }
+
+ERR_STATIC const char* ERR_getErrorName(size_t code)
+{
+    static const char* codeError = "Unspecified error code";
+    if (ERR_isError(code)) return ERR_strings[-(int)(code)];
+    return codeError;
+}
+
+
+#if defined (__cplusplus)
+}
+#endif
+
+#endif /* ERROR_H_MODULE */
+/*
+Constructor and Destructor of type FSE_CTable
+    Note that its size depends on 'tableLog' and 'maxSymbolValue' */
+typedef unsigned FSE_CTable;   /* don't allocate that. It's just a way to be more restrictive than void* */
+typedef unsigned FSE_DTable;   /* don't allocate that. It's just a way to be more restrictive than void* */
+
+
+/* ******************************************************************
+   FSE : Finite State Entropy coder
+   header file for static linking (only)
+   Copyright (C) 2013-2015, Yann Collet
+
+   BSD 2-Clause License (http://www.opensource.org/licenses/bsd-license.php)
+
+   Redistribution and use in source and binary forms, with or without
+   modification, are permitted provided that the following conditions are
+   met:
+
+       * Redistributions of source code must retain the above copyright
+   notice, this list of conditions and the following disclaimer.
+       * Redistributions in binary form must reproduce the above
+   copyright notice, this list of conditions and the following disclaimer
+   in the documentation and/or other materials provided with the
+   distribution.
+
+   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+   You can contact the author at :
+   - Source repository : https://github.com/Cyan4973/FiniteStateEntropy
+   - Public forum : https://groups.google.com/forum/#!forum/lz4c
+****************************************************************** */
+#if defined (__cplusplus)
+extern "C" {
+#endif
+
+
+/******************************************
+*  Static allocation
+******************************************/
+/* FSE buffer bounds */
+#define FSE_NCOUNTBOUND 512
+#define FSE_BLOCKBOUND(size) (size + (size>>7))
+#define FSE_COMPRESSBOUND(size) (FSE_NCOUNTBOUND + FSE_BLOCKBOUND(size))   /* Macro version, useful for static allocation */
+
+/* You can statically allocate FSE CTable/DTable as a table of unsigned using below macro */
+#define FSE_CTABLE_SIZE_U32(maxTableLog, maxSymbolValue)   (1 + (1<<(maxTableLog-1)) + ((maxSymbolValue+1)*2))
+#define FSE_DTABLE_SIZE_U32(maxTableLog)                   (1 + (1<<maxTableLog))
+
+
+/******************************************
+*  FSE advanced API
+******************************************/
+static size_t FSE_buildDTable_raw (FSE_DTable* dt, unsigned nbBits);
+/* build a fake FSE_DTable, designed to read an uncompressed bitstream where each symbol uses nbBits */
+
+static size_t FSE_buildDTable_rle (FSE_DTable* dt, unsigned char symbolValue);
+/* build a fake FSE_DTable, designed to always generate the same symbolValue */
+
+
+/******************************************
+*  FSE symbol decompression API
+******************************************/
+typedef struct
+{
+    size_t      state;
+    const void* table;   /* precise table may vary, depending on U16 */
+} FSE_DState_t;
+
+
+static void     FSE_initDState(FSE_DState_t* DStatePtr, BIT_DStream_t* bitD, const FSE_DTable* dt);
+
+static unsigned char FSE_decodeSymbol(FSE_DState_t* DStatePtr, BIT_DStream_t* bitD);
+
+static unsigned FSE_endOfDState(const FSE_DState_t* DStatePtr);
+
+/*
+Let's now decompose FSE_decompress_usingDTable() into its unitary components.
+You will decode FSE-encoded symbols from the bitStream,
+and also any other bitFields you put in, **in reverse order**.
+
+You will need a few variables to track your bitStream. They are :
+
+BIT_DStream_t DStream;    // Stream context
+FSE_DState_t  DState;     // State context. Multiple ones are possible
+FSE_DTable*   DTablePtr;  // Decoding table, provided by FSE_buildDTable()
+
+The first thing to do is to init the bitStream.
+    errorCode = BIT_initDStream(&DStream, srcBuffer, srcSize);
+
+You should then retrieve your initial state(s)
+(in reverse flushing order if you have several ones) :
+    errorCode = FSE_initDState(&DState, &DStream, DTablePtr);
+
+You can then decode your data, symbol after symbol.
+For information the maximum number of bits read by FSE_decodeSymbol() is 'tableLog'.
+Keep in mind that symbols are decoded in reverse order, like a LIFO stack (last in, first out).
+    unsigned char symbol = FSE_decodeSymbol(&DState, &DStream);
+
+You can retrieve any bitfield you eventually stored into the bitStream (in reverse order)
+Note : maximum allowed nbBits is 25, for 32-bits compatibility
+    size_t bitField = BIT_readBits(&DStream, nbBits);
+
+All above operations only read from local register (which size depends on size_t).
+Refueling the register from memory is manually performed by the reload method.
+    endSignal = FSE_reloadDStream(&DStream);
+
+BIT_reloadDStream() result tells if there is still some more data to read from DStream.
+BIT_DStream_unfinished : there is still some data left into the DStream.
+BIT_DStream_endOfBuffer : Dstream reached end of buffer. Its container may no longer be completely filled.
+BIT_DStream_completed : Dstream reached its exact end, corresponding in general to decompression completed.
+BIT_DStream_tooFar : Dstream went too far. Decompression result is corrupted.
+
+When reaching end of buffer (BIT_DStream_endOfBuffer), progress slowly, notably if you decode multiple symbols per loop,
+to properly detect the exact end of stream.
+After each decoded symbol, check if DStream is fully consumed using this simple test :
+    BIT_reloadDStream(&DStream) >= BIT_DStream_completed
+
+When it's done, verify decompression is fully completed, by checking both DStream and the relevant states.
+Checking if DStream has reached its end is performed by :
+    BIT_endOfDStream(&DStream);
+Check also the states. There might be some symbols left there, if some high probability ones (>50%) are possible.
+    FSE_endOfDState(&DState);
+*/
+
+
+/******************************************
+*  FSE unsafe API
+******************************************/
+static unsigned char FSE_decodeSymbolFast(FSE_DState_t* DStatePtr, BIT_DStream_t* bitD);
+/* faster, but works only if nbBits is always >= 1 (otherwise, result will be corrupted) */
+
+
+/******************************************
+*  Implementation of inline functions
+******************************************/
+
+/* decompression */
+
+typedef struct {
+    U16 tableLog;
+    U16 fastMode;
+} FSE_DTableHeader;   /* sizeof U32 */
+
+typedef struct
+{
+    unsigned short newState;
+    unsigned char  symbol;
+    unsigned char  nbBits;
+} FSE_decode_t;   /* size == U32 */
+
+MEM_STATIC void FSE_initDState(FSE_DState_t* DStatePtr, BIT_DStream_t* bitD, const FSE_DTable* dt)
+{
+    const FSE_DTableHeader* const DTableH = (const FSE_DTableHeader*)dt;
+    DStatePtr->state = BIT_readBits(bitD, DTableH->tableLog);
+    BIT_reloadDStream(bitD);
+    DStatePtr->table = dt + 1;
+}
+
+MEM_STATIC BYTE FSE_decodeSymbol(FSE_DState_t* DStatePtr, BIT_DStream_t* bitD)
+{
+    const FSE_decode_t DInfo = ((const FSE_decode_t*)(DStatePtr->table))[DStatePtr->state];
+    const U32  nbBits = DInfo.nbBits;
+    BYTE symbol = DInfo.symbol;
+    size_t lowBits = BIT_readBits(bitD, nbBits);
+
+    DStatePtr->state = DInfo.newState + lowBits;
+    return symbol;
+}
+
+MEM_STATIC BYTE FSE_decodeSymbolFast(FSE_DState_t* DStatePtr, BIT_DStream_t* bitD)
+{
+    const FSE_decode_t DInfo = ((const FSE_decode_t*)(DStatePtr->table))[DStatePtr->state];
+    const U32 nbBits = DInfo.nbBits;
+    BYTE symbol = DInfo.symbol;
+    size_t lowBits = BIT_readBitsFast(bitD, nbBits);
+
+    DStatePtr->state = DInfo.newState + lowBits;
+    return symbol;
+}
+
+MEM_STATIC unsigned FSE_endOfDState(const FSE_DState_t* DStatePtr)
+{
+    return DStatePtr->state == 0;
+}
+
+
+#if defined (__cplusplus)
+}
+#endif
+/* ******************************************************************
+   Huff0 : Huffman coder, part of New Generation Entropy library
+   header file for static linking (only)
+   Copyright (C) 2013-2015, Yann Collet
+
+   BSD 2-Clause License (http://www.opensource.org/licenses/bsd-license.php)
+
+   Redistribution and use in source and binary forms, with or without
+   modification, are permitted provided that the following conditions are
+   met:
+
+       * Redistributions of source code must retain the above copyright
+   notice, this list of conditions and the following disclaimer.
+       * Redistributions in binary form must reproduce the above
+   copyright notice, this list of conditions and the following disclaimer
+   in the documentation and/or other materials provided with the
+   distribution.
+
+   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+   You can contact the author at :
+   - Source repository : https://github.com/Cyan4973/FiniteStateEntropy
+   - Public forum : https://groups.google.com/forum/#!forum/lz4c
+****************************************************************** */
+
+#if defined (__cplusplus)
+extern "C" {
+#endif
+
+/******************************************
+*  Static allocation macros
+******************************************/
+/* Huff0 buffer bounds */
+#define HUF_CTABLEBOUND 129
+#define HUF_BLOCKBOUND(size) (size + (size>>8) + 8)   /* only true if incompressible pre-filtered with fast heuristic */
+#define HUF_COMPRESSBOUND(size) (HUF_CTABLEBOUND + HUF_BLOCKBOUND(size))   /* Macro version, useful for static allocation */
+
+/* static allocation of Huff0's DTable */
+#define HUF_DTABLE_SIZE(maxTableLog)   (1 + (1<<maxTableLog))  /* nb Cells; use unsigned short for X2, unsigned int for X4 */
+#define HUF_CREATE_STATIC_DTABLEX2(DTable, maxTableLog) \
+        unsigned short DTable[HUF_DTABLE_SIZE(maxTableLog)] = { maxTableLog }
+#define HUF_CREATE_STATIC_DTABLEX4(DTable, maxTableLog) \
+        unsigned int DTable[HUF_DTABLE_SIZE(maxTableLog)] = { maxTableLog }
+#define HUF_CREATE_STATIC_DTABLEX6(DTable, maxTableLog) \
+        unsigned int DTable[HUF_DTABLE_SIZE(maxTableLog) * 3 / 2] = { maxTableLog }
+
+
+/******************************************
+*  Advanced functions
+******************************************/
+static size_t HUF_decompress4X2 (void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize);   /* single-symbol decoder */
+static size_t HUF_decompress4X4 (void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize);   /* double-symbols decoder */
+static size_t HUF_decompress4X6 (void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize);   /* quad-symbols decoder */
+
+
+#if defined (__cplusplus)
+}
+#endif
+
+/*
+    zstd - standard compression library
+    Header File
+    Copyright (C) 2014-2015, Yann Collet.
+
+    BSD 2-Clause License (http://www.opensource.org/licenses/bsd-license.php)
+
+    Redistribution and use in source and binary forms, with or without
+    modification, are permitted provided that the following conditions are
+    met:
+    * Redistributions of source code must retain the above copyright
+    notice, this list of conditions and the following disclaimer.
+    * Redistributions in binary form must reproduce the above
+    copyright notice, this list of conditions and the following disclaimer
+    in the documentation and/or other materials provided with the
+    distribution.
+    THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+    "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+    LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+    A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+    OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+    SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+    LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+    DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+    THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+    (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+    OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+    You can contact the author at :
+    - zstd source repository : https://github.com/Cyan4973/zstd
+    - ztsd public forum : https://groups.google.com/forum/#!forum/lz4c
+*/
+
+#if defined (__cplusplus)
+extern "C" {
+#endif
+
+/* *************************************
+*  Includes
+***************************************/
+#include <stddef.h>   /* size_t */
+
+
+/* *************************************
+*  Version
+***************************************/
+#define ZSTD_VERSION_MAJOR    0    /* for breaking interface changes  */
+#define ZSTD_VERSION_MINOR    2    /* for new (non-breaking) interface capabilities */
+#define ZSTD_VERSION_RELEASE  2    /* for tweaks, bug-fixes, or development */
+#define ZSTD_VERSION_NUMBER  (ZSTD_VERSION_MAJOR *100*100 + ZSTD_VERSION_MINOR *100 + ZSTD_VERSION_RELEASE)
+
+
+/* *************************************
+*  Advanced functions
+***************************************/
+typedef struct ZSTD_CCtx_s ZSTD_CCtx;   /* incomplete type */
+
+#if defined (__cplusplus)
+}
+#endif
+/*
+    zstd - standard compression library
+    Header File for static linking only
+    Copyright (C) 2014-2015, Yann Collet.
+
+    BSD 2-Clause License (http://www.opensource.org/licenses/bsd-license.php)
+
+    Redistribution and use in source and binary forms, with or without
+    modification, are permitted provided that the following conditions are
+    met:
+    * Redistributions of source code must retain the above copyright
+    notice, this list of conditions and the following disclaimer.
+    * Redistributions in binary form must reproduce the above
+    copyright notice, this list of conditions and the following disclaimer
+    in the documentation and/or other materials provided with the
+    distribution.
+    THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+    "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+    LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+    A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+    OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+    SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+    LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+    DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+    THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+    (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+    OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+    You can contact the author at :
+    - zstd source repository : https://github.com/Cyan4973/zstd
+    - ztsd public forum : https://groups.google.com/forum/#!forum/lz4c
+*/
+
+/* The objects defined into this file should be considered experimental.
+ * They are not labelled stable, as their prototype may change in the future.
+ * You can use them for tests, provide feedback, or if you can endure risk of future changes.
+ */
+
+#if defined (__cplusplus)
+extern "C" {
+#endif
+
+/* *************************************
+*  Streaming functions
+***************************************/
+
+typedef struct ZSTD_DCtx_s ZSTD_DCtx;
+
+/*
+  Use above functions alternatively.
+  ZSTD_nextSrcSizeToDecompress() tells how much bytes to provide as 'srcSize' to ZSTD_decompressContinue().
+  ZSTD_decompressContinue() will use previous data blocks to improve compression if they are located prior to current block.
+  Result is the number of bytes regenerated within 'dst'.
+  It can be zero, which is not an error; it just means ZSTD_decompressContinue() has decoded some header.
+*/
+
+/* *************************************
+*  Prefix - version detection
+***************************************/
+#define ZSTD_magicNumber 0xFD2FB522   /* v0.2 (current)*/
+
+
+#if defined (__cplusplus)
+}
+#endif
+/* ******************************************************************
+   FSE : Finite State Entropy coder
+   Copyright (C) 2013-2015, Yann Collet.
+
+   BSD 2-Clause License (http://www.opensource.org/licenses/bsd-license.php)
+
+   Redistribution and use in source and binary forms, with or without
+   modification, are permitted provided that the following conditions are
+   met:
+
+       * Redistributions of source code must retain the above copyright
+   notice, this list of conditions and the following disclaimer.
+       * Redistributions in binary form must reproduce the above
+   copyright notice, this list of conditions and the following disclaimer
+   in the documentation and/or other materials provided with the
+   distribution.
+
+   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+    You can contact the author at :
+    - FSE source repository : https://github.com/Cyan4973/FiniteStateEntropy
+    - Public forum : https://groups.google.com/forum/#!forum/lz4c
+****************************************************************** */
+
+#ifndef FSE_COMMONDEFS_ONLY
+
+/****************************************************************
+*  Tuning parameters
+****************************************************************/
+/* MEMORY_USAGE :
+*  Memory usage formula : N->2^N Bytes (examples : 10 -> 1KB; 12 -> 4KB ; 16 -> 64KB; 20 -> 1MB; etc.)
+*  Increasing memory usage improves compression ratio
+*  Reduced memory usage can improve speed, due to cache effect
+*  Recommended max value is 14, for 16KB, which nicely fits into Intel x86 L1 cache */
+#define FSE_MAX_MEMORY_USAGE 14
+#define FSE_DEFAULT_MEMORY_USAGE 13
+
+/* FSE_MAX_SYMBOL_VALUE :
+*  Maximum symbol value authorized.
+*  Required for proper stack allocation */
+#define FSE_MAX_SYMBOL_VALUE 255
+
+
+/****************************************************************
+*  template functions type & suffix
+****************************************************************/
+#define FSE_FUNCTION_TYPE BYTE
+#define FSE_FUNCTION_EXTENSION
+
+
+/****************************************************************
+*  Byte symbol type
+****************************************************************/
+#endif   /* !FSE_COMMONDEFS_ONLY */
+
+
+/****************************************************************
+*  Compiler specifics
+****************************************************************/
+#ifdef _MSC_VER    /* Visual Studio */
+#  define FORCE_INLINE static __forceinline
+#  include <intrin.h>                    /* For Visual 2005 */
+#  pragma warning(disable : 4127)        /* disable: C4127: conditional expression is constant */
+#  pragma warning(disable : 4214)        /* disable: C4214: non-int bitfields */
+#else
+#  ifdef __GNUC__
+#    define GCC_VERSION (__GNUC__ * 100 + __GNUC_MINOR__)
+#    define FORCE_INLINE static inline __attribute__((always_inline))
+#  else
+#    define FORCE_INLINE static inline
+#  endif
+#endif
+
+
+/****************************************************************
+*  Includes
+****************************************************************/
+#include <stdlib.h>     /* malloc, free, qsort */
+#include <string.h>     /* memcpy, memset */
+#include <stdio.h>      /* printf (debug) */
+
+/****************************************************************
+*  Constants
+*****************************************************************/
+#define FSE_MAX_TABLELOG  (FSE_MAX_MEMORY_USAGE-2)
+#define FSE_MAX_TABLESIZE (1U<<FSE_MAX_TABLELOG)
+#define FSE_MAXTABLESIZE_MASK (FSE_MAX_TABLESIZE-1)
+#define FSE_DEFAULT_TABLELOG (FSE_DEFAULT_MEMORY_USAGE-2)
+#define FSE_MIN_TABLELOG 5
+
+#define FSE_TABLELOG_ABSOLUTE_MAX 15
+#if FSE_MAX_TABLELOG > FSE_TABLELOG_ABSOLUTE_MAX
+#error "FSE_MAX_TABLELOG > FSE_TABLELOG_ABSOLUTE_MAX is not supported"
+#endif
+
+
+/****************************************************************
+*  Error Management
+****************************************************************/
+#define FSE_STATIC_ASSERT(c) { enum { FSE_static_assert = 1/(int)(!!(c)) }; }   /* use only *after* variable declarations */
+
+
+/****************************************************************
+*  Complex types
+****************************************************************/
+typedef U32 DTable_max_t[FSE_DTABLE_SIZE_U32(FSE_MAX_TABLELOG)];
+
+
+/****************************************************************
+*  Templates
+****************************************************************/
+/*
+  designed to be included
+  for type-specific functions (template emulation in C)
+  Objective is to write these functions only once, for improved maintenance
+*/
+
+/* safety checks */
+#ifndef FSE_FUNCTION_EXTENSION
+#  error "FSE_FUNCTION_EXTENSION must be defined"
+#endif
+#ifndef FSE_FUNCTION_TYPE
+#  error "FSE_FUNCTION_TYPE must be defined"
+#endif
+
+/* Function names */
+#define FSE_CAT(X,Y) X##Y
+#define FSE_FUNCTION_NAME(X,Y) FSE_CAT(X,Y)
+#define FSE_TYPE_NAME(X,Y) FSE_CAT(X,Y)
+
+
+/* Function templates */
+
+#define FSE_DECODE_TYPE FSE_TYPE_NAME(FSE_decode_t, FSE_FUNCTION_EXTENSION)
+
+static U32 FSE_tableStep(U32 tableSize) { return (tableSize>>1) + (tableSize>>3) + 3; }
+
+static size_t FSE_FUNCTION_NAME(FSE_buildDTable, FSE_FUNCTION_EXTENSION)
+(FSE_DTable* dt, const short* normalizedCounter, unsigned maxSymbolValue, unsigned tableLog)
+{
+    FSE_DTableHeader* const DTableH = (FSE_DTableHeader*)dt;
+    FSE_DECODE_TYPE* const tableDecode = (FSE_DECODE_TYPE*) (dt+1);   /* because dt is unsigned, 32-bits aligned on 32-bits */
+    const U32 tableSize = 1 << tableLog;
+    const U32 tableMask = tableSize-1;
+    const U32 step = FSE_tableStep(tableSize);
+    U16 symbolNext[FSE_MAX_SYMBOL_VALUE+1];
+    U32 position = 0;
+    U32 highThreshold = tableSize-1;
+    const S16 largeLimit= (S16)(1 << (tableLog-1));
+    U32 noLarge = 1;
+    U32 s;
+
+    /* Sanity Checks */
+    if (maxSymbolValue > FSE_MAX_SYMBOL_VALUE) return ERROR(maxSymbolValue_tooLarge);
+    if (tableLog > FSE_MAX_TABLELOG) return ERROR(tableLog_tooLarge);
+
+    /* Init, lay down lowprob symbols */
+    DTableH[0].tableLog = (U16)tableLog;
+    for (s=0; s<=maxSymbolValue; s++)
+    {
+        if (normalizedCounter[s]==-1)
+        {
+            tableDecode[highThreshold--].symbol = (FSE_FUNCTION_TYPE)s;
+            symbolNext[s] = 1;
+        }
+        else
+        {
+            if (normalizedCounter[s] >= largeLimit) noLarge=0;
+            symbolNext[s] = normalizedCounter[s];
+        }
+    }
+
+    /* Spread symbols */
+    for (s=0; s<=maxSymbolValue; s++)
+    {
+        int i;
+        for (i=0; i<normalizedCounter[s]; i++)
+        {
+            tableDecode[position].symbol = (FSE_FUNCTION_TYPE)s;
+            position = (position + step) & tableMask;
+            while (position > highThreshold) position = (position + step) & tableMask;   /* lowprob area */
+        }
+    }
+
+    if (position!=0) return ERROR(GENERIC);   /* position must reach all cells once, otherwise normalizedCounter is incorrect */
+
+    /* Build Decoding table */
+    {
+        U32 i;
+        for (i=0; i<tableSize; i++)
+        {
+            FSE_FUNCTION_TYPE symbol = (FSE_FUNCTION_TYPE)(tableDecode[i].symbol);
+            U16 nextState = symbolNext[symbol]++;
+            tableDecode[i].nbBits = (BYTE) (tableLog - BIT_highbit32 ((U32)nextState) );
+            tableDecode[i].newState = (U16) ( (nextState << tableDecode[i].nbBits) - tableSize);
+        }
+    }
+
+    DTableH->fastMode = (U16)noLarge;
+    return 0;
+}
+
+
+#ifndef FSE_COMMONDEFS_ONLY
+/******************************************
+*  FSE helper functions
+******************************************/
+static unsigned FSE_isError(size_t code) { return ERR_isError(code); }
+
+
+/****************************************************************
+*  FSE NCount encoding-decoding
+****************************************************************/
+static short FSE_abs(short a)
+{
+    return a<0 ? -a : a;
+}
+
+static size_t FSE_readNCount (short* normalizedCounter, unsigned* maxSVPtr, unsigned* tableLogPtr,
+                 const void* headerBuffer, size_t hbSize)
+{
+    const BYTE* const istart = (const BYTE*) headerBuffer;
+    const BYTE* const iend = istart + hbSize;
+    const BYTE* ip = istart;
+    int nbBits;
+    int remaining;
+    int threshold;
+    U32 bitStream;
+    int bitCount;
+    unsigned charnum = 0;
+    int previous0 = 0;
+
+    if (hbSize < 4) return ERROR(srcSize_wrong);
+    bitStream = MEM_readLE32(ip);
+    nbBits = (bitStream & 0xF) + FSE_MIN_TABLELOG;   /* extract tableLog */
+    if (nbBits > FSE_TABLELOG_ABSOLUTE_MAX) return ERROR(tableLog_tooLarge);
+    bitStream >>= 4;
+    bitCount = 4;
+    *tableLogPtr = nbBits;
+    remaining = (1<<nbBits)+1;
+    threshold = 1<<nbBits;
+    nbBits++;
+
+    while ((remaining>1) && (charnum<=*maxSVPtr))
+    {
+        if (previous0)
+        {
+            unsigned n0 = charnum;
+            while ((bitStream & 0xFFFF) == 0xFFFF)
+            {
+                n0+=24;
+                if (ip < iend-5)
+                {
+                    ip+=2;
+                    bitStream = MEM_readLE32(ip) >> bitCount;
+                }
+                else
+                {
+                    bitStream >>= 16;
+                    bitCount+=16;
+                }
+            }
+            while ((bitStream & 3) == 3)
+            {
+                n0+=3;
+                bitStream>>=2;
+                bitCount+=2;
+            }
+            n0 += bitStream & 3;
+            bitCount += 2;
+            if (n0 > *maxSVPtr) return ERROR(maxSymbolValue_tooSmall);
+            while (charnum < n0) normalizedCounter[charnum++] = 0;
+            if ((ip <= iend-7) || (ip + (bitCount>>3) <= iend-4))
+            {
+                ip += bitCount>>3;
+                bitCount &= 7;
+                bitStream = MEM_readLE32(ip) >> bitCount;
+            }
+            else
+                bitStream >>= 2;
+        }
+        {
+            const short max = (short)((2*threshold-1)-remaining);
+            short count;
+
+            if ((bitStream & (threshold-1)) < (U32)max)
+            {
+                count = (short)(bitStream & (threshold-1));
+                bitCount   += nbBits-1;
+            }
+            else
+            {
+                count = (short)(bitStream & (2*threshold-1));
+                if (count >= threshold) count -= max;
+                bitCount   += nbBits;
+            }
+
+            count--;   /* extra accuracy */
+            remaining -= FSE_abs(count);
+            normalizedCounter[charnum++] = count;
+            previous0 = !count;
+            while (remaining < threshold)
+            {
+                nbBits--;
+                threshold >>= 1;
+            }
+
+            {
+                if ((ip <= iend-7) || (ip + (bitCount>>3) <= iend-4))
+                {
+                    ip += bitCount>>3;
+                    bitCount &= 7;
+                }
+                else
+                {
+                    bitCount -= (int)(8 * (iend - 4 - ip));
+					ip = iend - 4;
+				}
+                bitStream = MEM_readLE32(ip) >> (bitCount & 31);
+            }
+        }
+    }
+    if (remaining != 1) return ERROR(GENERIC);
+    *maxSVPtr = charnum-1;
+
+    ip += (bitCount+7)>>3;
+    if ((size_t)(ip-istart) > hbSize) return ERROR(srcSize_wrong);
+    return ip-istart;
+}
+
+
+/*********************************************************
+*  Decompression (Byte symbols)
+*********************************************************/
+static size_t FSE_buildDTable_rle (FSE_DTable* dt, BYTE symbolValue)
+{
+    FSE_DTableHeader* const DTableH = (FSE_DTableHeader*)dt;
+    FSE_decode_t* const cell = (FSE_decode_t*)(dt + 1);   /* because dt is unsigned */
+
+    DTableH->tableLog = 0;
+    DTableH->fastMode = 0;
+
+    cell->newState = 0;
+    cell->symbol = symbolValue;
+    cell->nbBits = 0;
+
+    return 0;
+}
+
+
+static size_t FSE_buildDTable_raw (FSE_DTable* dt, unsigned nbBits)
+{
+    FSE_DTableHeader* const DTableH = (FSE_DTableHeader*)dt;
+    FSE_decode_t* const dinfo = (FSE_decode_t*)(dt + 1);   /* because dt is unsigned */
+    const unsigned tableSize = 1 << nbBits;
+    const unsigned tableMask = tableSize - 1;
+    const unsigned maxSymbolValue = tableMask;
+    unsigned s;
+
+    /* Sanity checks */
+    if (nbBits < 1) return ERROR(GENERIC);         /* min size */
+
+    /* Build Decoding Table */
+    DTableH->tableLog = (U16)nbBits;
+    DTableH->fastMode = 1;
+    for (s=0; s<=maxSymbolValue; s++)
+    {
+        dinfo[s].newState = 0;
+        dinfo[s].symbol = (BYTE)s;
+        dinfo[s].nbBits = (BYTE)nbBits;
+    }
+
+    return 0;
+}
+
+FORCE_INLINE size_t FSE_decompress_usingDTable_generic(
+          void* dst, size_t maxDstSize,
+    const void* cSrc, size_t cSrcSize,
+    const FSE_DTable* dt, const unsigned fast)
+{
+    BYTE* const ostart = (BYTE*) dst;
+    BYTE* op = ostart;
+    BYTE* const omax = op + maxDstSize;
+    BYTE* const olimit = omax-3;
+
+    BIT_DStream_t bitD;
+    FSE_DState_t state1;
+    FSE_DState_t state2;
+    size_t errorCode;
+
+    /* Init */
+    errorCode = BIT_initDStream(&bitD, cSrc, cSrcSize);   /* replaced last arg by maxCompressed Size */
+    if (FSE_isError(errorCode)) return errorCode;
+
+    FSE_initDState(&state1, &bitD, dt);
+    FSE_initDState(&state2, &bitD, dt);
+
+#define FSE_GETSYMBOL(statePtr) fast ? FSE_decodeSymbolFast(statePtr, &bitD) : FSE_decodeSymbol(statePtr, &bitD)
+
+    /* 4 symbols per loop */
+    for ( ; (BIT_reloadDStream(&bitD)==BIT_DStream_unfinished) && (op<olimit) ; op+=4)
+    {
+        op[0] = FSE_GETSYMBOL(&state1);
+
+        if (FSE_MAX_TABLELOG*2+7 > sizeof(bitD.bitContainer)*8)    /* This test must be static */
+            BIT_reloadDStream(&bitD);
+
+        op[1] = FSE_GETSYMBOL(&state2);
+
+        if (FSE_MAX_TABLELOG*4+7 > sizeof(bitD.bitContainer)*8)    /* This test must be static */
+            { if (BIT_reloadDStream(&bitD) > BIT_DStream_unfinished) { op+=2; break; } }
+
+        op[2] = FSE_GETSYMBOL(&state1);
+
+        if (FSE_MAX_TABLELOG*2+7 > sizeof(bitD.bitContainer)*8)    /* This test must be static */
+            BIT_reloadDStream(&bitD);
+
+        op[3] = FSE_GETSYMBOL(&state2);
+    }
+
+    /* tail */
+    /* note : BIT_reloadDStream(&bitD) >= FSE_DStream_partiallyFilled; Ends at exactly BIT_DStream_completed */
+    while (1)
+    {
+        if ( (BIT_reloadDStream(&bitD)>BIT_DStream_completed) || (op==omax) || (BIT_endOfDStream(&bitD) && (fast || FSE_endOfDState(&state1))) )
+            break;
+
+        *op++ = FSE_GETSYMBOL(&state1);
+
+        if ( (BIT_reloadDStream(&bitD)>BIT_DStream_completed) || (op==omax) || (BIT_endOfDStream(&bitD) && (fast || FSE_endOfDState(&state2))) )
+            break;
+
+        *op++ = FSE_GETSYMBOL(&state2);
+    }
+
+    /* end ? */
+    if (BIT_endOfDStream(&bitD) && FSE_endOfDState(&state1) && FSE_endOfDState(&state2))
+        return op-ostart;
+
+    if (op==omax) return ERROR(dstSize_tooSmall);   /* dst buffer is full, but cSrc unfinished */
+
+    return ERROR(corruption_detected);
+}
+
+
+static size_t FSE_decompress_usingDTable(void* dst, size_t originalSize,
+                            const void* cSrc, size_t cSrcSize,
+                            const FSE_DTable* dt)
+{
+    const FSE_DTableHeader* DTableH = (const FSE_DTableHeader*)dt;
+    const U32 fastMode = DTableH->fastMode;
+
+    /* select fast mode (static) */
+    if (fastMode) return FSE_decompress_usingDTable_generic(dst, originalSize, cSrc, cSrcSize, dt, 1);
+    return FSE_decompress_usingDTable_generic(dst, originalSize, cSrc, cSrcSize, dt, 0);
+}
+
+
+static size_t FSE_decompress(void* dst, size_t maxDstSize, const void* cSrc, size_t cSrcSize)
+{
+    const BYTE* const istart = (const BYTE*)cSrc;
+    const BYTE* ip = istart;
+    short counting[FSE_MAX_SYMBOL_VALUE+1];
+    DTable_max_t dt;   /* Static analyzer seems unable to understand this table will be properly initialized later */
+    unsigned tableLog;
+    unsigned maxSymbolValue = FSE_MAX_SYMBOL_VALUE;
+    size_t errorCode;
+
+    if (cSrcSize<2) return ERROR(srcSize_wrong);   /* too small input size */
+
+    /* normal FSE decoding mode */
+    errorCode = FSE_readNCount (counting, &maxSymbolValue, &tableLog, istart, cSrcSize);
+    if (FSE_isError(errorCode)) return errorCode;
+    if (errorCode >= cSrcSize) return ERROR(srcSize_wrong);   /* too small input size */
+    ip += errorCode;
+    cSrcSize -= errorCode;
+
+    errorCode = FSE_buildDTable (dt, counting, maxSymbolValue, tableLog);
+    if (FSE_isError(errorCode)) return errorCode;
+
+    /* always return, even if it is an error code */
+    return FSE_decompress_usingDTable (dst, maxDstSize, ip, cSrcSize, dt);
+}
+
+
+
+#endif   /* FSE_COMMONDEFS_ONLY */
+/* ******************************************************************
+   Huff0 : Huffman coder, part of New Generation Entropy library
+   Copyright (C) 2013-2015, Yann Collet.
+
+   BSD 2-Clause License (http://www.opensource.org/licenses/bsd-license.php)
+
+   Redistribution and use in source and binary forms, with or without
+   modification, are permitted provided that the following conditions are
+   met:
+
+       * Redistributions of source code must retain the above copyright
+   notice, this list of conditions and the following disclaimer.
+       * Redistributions in binary form must reproduce the above
+   copyright notice, this list of conditions and the following disclaimer
+   in the documentation and/or other materials provided with the
+   distribution.
+
+   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+    You can contact the author at :
+    - FSE+Huff0 source repository : https://github.com/Cyan4973/FiniteStateEntropy
+    - Public forum : https://groups.google.com/forum/#!forum/lz4c
+****************************************************************** */
+
+/****************************************************************
+*  Compiler specifics
+****************************************************************/
+#if defined (__cplusplus) || (defined (__STDC_VERSION__) && (__STDC_VERSION__ >= 199901L) /* C99 */)
+/* inline is defined */
+#elif defined(_MSC_VER)
+#  define inline __inline
+#else
+#  define inline /* disable inline */
+#endif
+
+
+#ifdef _MSC_VER    /* Visual Studio */
+#  define FORCE_INLINE static __forceinline
+#  pragma warning(disable : 4127)        /* disable: C4127: conditional expression is constant */
+#else
+#  ifdef __GNUC__
+#    define GCC_VERSION (__GNUC__ * 100 + __GNUC_MINOR__)
+#    define FORCE_INLINE static inline __attribute__((always_inline))
+#  else
+#    define FORCE_INLINE static inline
+#  endif
+#endif
+
+
+/****************************************************************
+*  Includes
+****************************************************************/
+#include <stdlib.h>     /* malloc, free, qsort */
+#include <string.h>     /* memcpy, memset */
+#include <stdio.h>      /* printf (debug) */
+
+/****************************************************************
+*  Error Management
+****************************************************************/
+#define HUF_STATIC_ASSERT(c) { enum { HUF_static_assert = 1/(int)(!!(c)) }; }   /* use only *after* variable declarations */
+
+
+/******************************************
+*  Helper functions
+******************************************/
+static unsigned HUF_isError(size_t code) { return ERR_isError(code); }
+
+#define HUF_ABSOLUTEMAX_TABLELOG  16   /* absolute limit of HUF_MAX_TABLELOG. Beyond that value, code does not work */
+#define HUF_MAX_TABLELOG  12           /* max configured tableLog (for static allocation); can be modified up to HUF_ABSOLUTEMAX_TABLELOG */
+#define HUF_DEFAULT_TABLELOG  HUF_MAX_TABLELOG   /* tableLog by default, when not specified */
+#define HUF_MAX_SYMBOL_VALUE 255
+#if (HUF_MAX_TABLELOG > HUF_ABSOLUTEMAX_TABLELOG)
+#  error "HUF_MAX_TABLELOG is too large !"
+#endif
+
+
+
+/*********************************************************
+*  Huff0 : Huffman block decompression
+*********************************************************/
+typedef struct { BYTE byte; BYTE nbBits; } HUF_DEltX2;   /* single-symbol decoding */
+
+typedef struct { U16 sequence; BYTE nbBits; BYTE length; } HUF_DEltX4;  /* double-symbols decoding */
+
+typedef struct { BYTE symbol; BYTE weight; } sortedSymbol_t;
+
+/*! HUF_readStats
+    Read compact Huffman tree, saved by HUF_writeCTable
+    @huffWeight : destination buffer
+    @return : size read from `src`
+*/
+static size_t HUF_readStats(BYTE* huffWeight, size_t hwSize, U32* rankStats,
+                            U32* nbSymbolsPtr, U32* tableLogPtr,
+                            const void* src, size_t srcSize)
+{
+    U32 weightTotal;
+    U32 tableLog;
+    const BYTE* ip = (const BYTE*) src;
+    size_t iSize = ip[0];
+    size_t oSize;
+    U32 n;
+
+    //memset(huffWeight, 0, hwSize);   /* is not necessary, even though some analyzer complain ... */
+
+    if (iSize >= 128)  /* special header */
+    {
+        if (iSize >= (242))   /* RLE */
+        {
+            static int l[14] = { 1, 2, 3, 4, 7, 8, 15, 16, 31, 32, 63, 64, 127, 128 };
+            oSize = l[iSize-242];
+            memset(huffWeight, 1, hwSize);
+            iSize = 0;
+        }
+        else   /* Incompressible */
+        {
+            oSize = iSize - 127;
+            iSize = ((oSize+1)/2);
+            if (iSize+1 > srcSize) return ERROR(srcSize_wrong);
+            if (oSize >= hwSize) return ERROR(corruption_detected);
+            ip += 1;
+            for (n=0; n<oSize; n+=2)
+            {
+                huffWeight[n]   = ip[n/2] >> 4;
+                huffWeight[n+1] = ip[n/2] & 15;
+            }
+        }
+    }
+    else  /* header compressed with FSE (normal case) */
+    {
+        if (iSize+1 > srcSize) return ERROR(srcSize_wrong);
+        oSize = FSE_decompress(huffWeight, hwSize-1, ip+1, iSize);   /* max (hwSize-1) values decoded, as last one is implied */
+        if (FSE_isError(oSize)) return oSize;
+    }
+
+    /* collect weight stats */
+    memset(rankStats, 0, (HUF_ABSOLUTEMAX_TABLELOG + 1) * sizeof(U32));
+    weightTotal = 0;
+    for (n=0; n<oSize; n++)
+    {
+        if (huffWeight[n] >= HUF_ABSOLUTEMAX_TABLELOG) return ERROR(corruption_detected);
+        rankStats[huffWeight[n]]++;
+        weightTotal += (1 << huffWeight[n]) >> 1;
+    }
+
+    /* get last non-null symbol weight (implied, total must be 2^n) */
+    tableLog = BIT_highbit32(weightTotal) + 1;
+    if (tableLog > HUF_ABSOLUTEMAX_TABLELOG) return ERROR(corruption_detected);
+    {
+        U32 total = 1 << tableLog;
+        U32 rest = total - weightTotal;
+        U32 verif = 1 << BIT_highbit32(rest);
+        U32 lastWeight = BIT_highbit32(rest) + 1;
+        if (verif != rest) return ERROR(corruption_detected);    /* last value must be a clean power of 2 */
+        huffWeight[oSize] = (BYTE)lastWeight;
+        rankStats[lastWeight]++;
+    }
+
+    /* check tree construction validity */
+    if ((rankStats[1] < 2) || (rankStats[1] & 1)) return ERROR(corruption_detected);   /* by construction : at least 2 elts of rank 1, must be even */
+
+    /* results */
+    *nbSymbolsPtr = (U32)(oSize+1);
+    *tableLogPtr = tableLog;
+    return iSize+1;
+}
+
+
+/**************************/
+/* single-symbol decoding */
+/**************************/
+
+static size_t HUF_readDTableX2 (U16* DTable, const void* src, size_t srcSize)
+{
+    BYTE huffWeight[HUF_MAX_SYMBOL_VALUE + 1];
+    U32 rankVal[HUF_ABSOLUTEMAX_TABLELOG + 1];   /* large enough for values from 0 to 16 */
+    U32 tableLog = 0;
+    const BYTE* ip = (const BYTE*) src;
+    size_t iSize = ip[0];
+    U32 nbSymbols = 0;
+    U32 n;
+    U32 nextRankStart;
+    HUF_DEltX2* const dt = (HUF_DEltX2*)(DTable + 1);
+
+    HUF_STATIC_ASSERT(sizeof(HUF_DEltX2) == sizeof(U16));   /* if compilation fails here, assertion is false */
+    //memset(huffWeight, 0, sizeof(huffWeight));   /* is not necessary, even though some analyzer complain ... */
+
+    iSize = HUF_readStats(huffWeight, HUF_MAX_SYMBOL_VALUE + 1, rankVal, &nbSymbols, &tableLog, src, srcSize);
+    if (HUF_isError(iSize)) return iSize;
+
+    /* check result */
+    if (tableLog > DTable[0]) return ERROR(tableLog_tooLarge);   /* DTable is too small */
+    DTable[0] = (U16)tableLog;   /* maybe should separate sizeof DTable, as allocated, from used size of DTable, in case of DTable re-use */
+
+    /* Prepare ranks */
+    nextRankStart = 0;
+    for (n=1; n<=tableLog; n++)
+    {
+        U32 current = nextRankStart;
+        nextRankStart += (rankVal[n] << (n-1));
+        rankVal[n] = current;
+    }
+
+    /* fill DTable */
+    for (n=0; n<nbSymbols; n++)
+    {
+        const U32 w = huffWeight[n];
+        const U32 length = (1 << w) >> 1;
+        U32 i;
+        HUF_DEltX2 D;
+        D.byte = (BYTE)n; D.nbBits = (BYTE)(tableLog + 1 - w);
+        for (i = rankVal[w]; i < rankVal[w] + length; i++)
+            dt[i] = D;
+        rankVal[w] += length;
+    }
+
+    return iSize;
+}
+
+static BYTE HUF_decodeSymbolX2(BIT_DStream_t* Dstream, const HUF_DEltX2* dt, const U32 dtLog)
+{
+        const size_t val = BIT_lookBitsFast(Dstream, dtLog); /* note : dtLog >= 1 */
+        const BYTE c = dt[val].byte;
+        BIT_skipBits(Dstream, dt[val].nbBits);
+        return c;
+}
+
+#define HUF_DECODE_SYMBOLX2_0(ptr, DStreamPtr) \
+    *ptr++ = HUF_decodeSymbolX2(DStreamPtr, dt, dtLog)
+
+#define HUF_DECODE_SYMBOLX2_1(ptr, DStreamPtr) \
+    if (MEM_64bits() || (HUF_MAX_TABLELOG<=12)) \
+        HUF_DECODE_SYMBOLX2_0(ptr, DStreamPtr)
+
+#define HUF_DECODE_SYMBOLX2_2(ptr, DStreamPtr) \
+    if (MEM_64bits()) \
+        HUF_DECODE_SYMBOLX2_0(ptr, DStreamPtr)
+
+static inline size_t HUF_decodeStreamX2(BYTE* p, BIT_DStream_t* const bitDPtr, BYTE* const pEnd, const HUF_DEltX2* const dt, const U32 dtLog)
+{
+    BYTE* const pStart = p;
+
+    /* up to 4 symbols at a time */
+    while ((BIT_reloadDStream(bitDPtr) == BIT_DStream_unfinished) && (p <= pEnd-4))
+    {
+        HUF_DECODE_SYMBOLX2_2(p, bitDPtr);
+        HUF_DECODE_SYMBOLX2_1(p, bitDPtr);
+        HUF_DECODE_SYMBOLX2_2(p, bitDPtr);
+        HUF_DECODE_SYMBOLX2_0(p, bitDPtr);
+    }
+
+    /* closer to the end */
+    while ((BIT_reloadDStream(bitDPtr) == BIT_DStream_unfinished) && (p < pEnd))
+        HUF_DECODE_SYMBOLX2_0(p, bitDPtr);
+
+    /* no more data to retrieve from bitstream, hence no need to reload */
+    while (p < pEnd)
+        HUF_DECODE_SYMBOLX2_0(p, bitDPtr);
+
+    return pEnd-pStart;
+}
+
+
+static size_t HUF_decompress4X2_usingDTable(
+          void* dst,  size_t dstSize,
+    const void* cSrc, size_t cSrcSize,
+    const U16* DTable)
+{
+    if (cSrcSize < 10) return ERROR(corruption_detected);   /* strict minimum : jump table + 1 byte per stream */
+
+    {
+        const BYTE* const istart = (const BYTE*) cSrc;
+        BYTE* const ostart = (BYTE*) dst;
+        BYTE* const oend = ostart + dstSize;
+
+        const HUF_DEltX2* const dt = ((const HUF_DEltX2*)DTable) +1;
+        const U32 dtLog = DTable[0];
+        size_t errorCode;
+
+        /* Init */
+        BIT_DStream_t bitD1;
+        BIT_DStream_t bitD2;
+        BIT_DStream_t bitD3;
+        BIT_DStream_t bitD4;
+        const size_t length1 = MEM_readLE16(istart);
+        const size_t length2 = MEM_readLE16(istart+2);
+        const size_t length3 = MEM_readLE16(istart+4);
+        size_t length4;
+        const BYTE* const istart1 = istart + 6;  /* jumpTable */
+        const BYTE* const istart2 = istart1 + length1;
+        const BYTE* const istart3 = istart2 + length2;
+        const BYTE* const istart4 = istart3 + length3;
+        const size_t segmentSize = (dstSize+3) / 4;
+        BYTE* const opStart2 = ostart + segmentSize;
+        BYTE* const opStart3 = opStart2 + segmentSize;
+        BYTE* const opStart4 = opStart3 + segmentSize;
+        BYTE* op1 = ostart;
+        BYTE* op2 = opStart2;
+        BYTE* op3 = opStart3;
+        BYTE* op4 = opStart4;
+        U32 endSignal;
+
+        length4 = cSrcSize - (length1 + length2 + length3 + 6);
+        if (length4 > cSrcSize) return ERROR(corruption_detected);   /* overflow */
+        errorCode = BIT_initDStream(&bitD1, istart1, length1);
+        if (HUF_isError(errorCode)) return errorCode;
+        errorCode = BIT_initDStream(&bitD2, istart2, length2);
+        if (HUF_isError(errorCode)) return errorCode;
+        errorCode = BIT_initDStream(&bitD3, istart3, length3);
+        if (HUF_isError(errorCode)) return errorCode;
+        errorCode = BIT_initDStream(&bitD4, istart4, length4);
+        if (HUF_isError(errorCode)) return errorCode;
+
+        /* 16-32 symbols per loop (4-8 symbols per stream) */
+        endSignal = BIT_reloadDStream(&bitD1) | BIT_reloadDStream(&bitD2) | BIT_reloadDStream(&bitD3) | BIT_reloadDStream(&bitD4);
+        for ( ; (endSignal==BIT_DStream_unfinished) && (op4<(oend-7)) ; )
+        {
+            HUF_DECODE_SYMBOLX2_2(op1, &bitD1);
+            HUF_DECODE_SYMBOLX2_2(op2, &bitD2);
+            HUF_DECODE_SYMBOLX2_2(op3, &bitD3);
+            HUF_DECODE_SYMBOLX2_2(op4, &bitD4);
+            HUF_DECODE_SYMBOLX2_1(op1, &bitD1);
+            HUF_DECODE_SYMBOLX2_1(op2, &bitD2);
+            HUF_DECODE_SYMBOLX2_1(op3, &bitD3);
+            HUF_DECODE_SYMBOLX2_1(op4, &bitD4);
+            HUF_DECODE_SYMBOLX2_2(op1, &bitD1);
+            HUF_DECODE_SYMBOLX2_2(op2, &bitD2);
+            HUF_DECODE_SYMBOLX2_2(op3, &bitD3);
+            HUF_DECODE_SYMBOLX2_2(op4, &bitD4);
+            HUF_DECODE_SYMBOLX2_0(op1, &bitD1);
+            HUF_DECODE_SYMBOLX2_0(op2, &bitD2);
+            HUF_DECODE_SYMBOLX2_0(op3, &bitD3);
+            HUF_DECODE_SYMBOLX2_0(op4, &bitD4);
+
+            endSignal = BIT_reloadDStream(&bitD1) | BIT_reloadDStream(&bitD2) | BIT_reloadDStream(&bitD3) | BIT_reloadDStream(&bitD4);
+        }
+
+        /* check corruption */
+        if (op1 > opStart2) return ERROR(corruption_detected);
+        if (op2 > opStart3) return ERROR(corruption_detected);
+        if (op3 > opStart4) return ERROR(corruption_detected);
+        /* note : op4 supposed already verified within main loop */
+
+        /* finish bitStreams one by one */
+        HUF_decodeStreamX2(op1, &bitD1, opStart2, dt, dtLog);
+        HUF_decodeStreamX2(op2, &bitD2, opStart3, dt, dtLog);
+        HUF_decodeStreamX2(op3, &bitD3, opStart4, dt, dtLog);
+        HUF_decodeStreamX2(op4, &bitD4, oend,     dt, dtLog);
+
+        /* check */
+        endSignal = BIT_endOfDStream(&bitD1) & BIT_endOfDStream(&bitD2) & BIT_endOfDStream(&bitD3) & BIT_endOfDStream(&bitD4);
+        if (!endSignal) return ERROR(corruption_detected);
+
+        /* decoded size */
+        return dstSize;
+    }
+}
+
+
+static size_t HUF_decompress4X2 (void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize)
+{
+    HUF_CREATE_STATIC_DTABLEX2(DTable, HUF_MAX_TABLELOG);
+    const BYTE* ip = (const BYTE*) cSrc;
+    size_t errorCode;
+
+    errorCode = HUF_readDTableX2 (DTable, cSrc, cSrcSize);
+    if (HUF_isError(errorCode)) return errorCode;
+    if (errorCode >= cSrcSize) return ERROR(srcSize_wrong);
+    ip += errorCode;
+    cSrcSize -= errorCode;
+
+    return HUF_decompress4X2_usingDTable (dst, dstSize, ip, cSrcSize, DTable);
+}
+
+
+/***************************/
+/* double-symbols decoding */
+/***************************/
+
+static void HUF_fillDTableX4Level2(HUF_DEltX4* DTable, U32 sizeLog, const U32 consumed,
+                           const U32* rankValOrigin, const int minWeight,
+                           const sortedSymbol_t* sortedSymbols, const U32 sortedListSize,
+                           U32 nbBitsBaseline, U16 baseSeq)
+{
+    HUF_DEltX4 DElt;
+    U32 rankVal[HUF_ABSOLUTEMAX_TABLELOG + 1];
+    U32 s;
+
+    /* get pre-calculated rankVal */
+    memcpy(rankVal, rankValOrigin, sizeof(rankVal));
+
+    /* fill skipped values */
+    if (minWeight>1)
+    {
+        U32 i, skipSize = rankVal[minWeight];
+        MEM_writeLE16(&(DElt.sequence), baseSeq);
+        DElt.nbBits   = (BYTE)(consumed);
+        DElt.length   = 1;
+        for (i = 0; i < skipSize; i++)
+            DTable[i] = DElt;
+    }
+
+    /* fill DTable */
+    for (s=0; s<sortedListSize; s++)   /* note : sortedSymbols already skipped */
+    {
+        const U32 symbol = sortedSymbols[s].symbol;
+        const U32 weight = sortedSymbols[s].weight;
+        const U32 nbBits = nbBitsBaseline - weight;
+        const U32 length = 1 << (sizeLog-nbBits);
+        const U32 start = rankVal[weight];
+        U32 i = start;
+        const U32 end = start + length;
+
+        MEM_writeLE16(&(DElt.sequence), (U16)(baseSeq + (symbol << 8)));
+        DElt.nbBits = (BYTE)(nbBits + consumed);
+        DElt.length = 2;
+        do { DTable[i++] = DElt; } while (i<end);   /* since length >= 1 */
+
+        rankVal[weight] += length;
+    }
+}
+
+typedef U32 rankVal_t[HUF_ABSOLUTEMAX_TABLELOG][HUF_ABSOLUTEMAX_TABLELOG + 1];
+
+static void HUF_fillDTableX4(HUF_DEltX4* DTable, const U32 targetLog,
+                           const sortedSymbol_t* sortedList, const U32 sortedListSize,
+                           const U32* rankStart, rankVal_t rankValOrigin, const U32 maxWeight,
+                           const U32 nbBitsBaseline)
+{
+    U32 rankVal[HUF_ABSOLUTEMAX_TABLELOG + 1];
+    const int scaleLog = nbBitsBaseline - targetLog;   /* note : targetLog >= srcLog, hence scaleLog <= 1 */
+    const U32 minBits  = nbBitsBaseline - maxWeight;
+    U32 s;
+
+    memcpy(rankVal, rankValOrigin, sizeof(rankVal));
+
+    /* fill DTable */
+    for (s=0; s<sortedListSize; s++)
+    {
+        const U16 symbol = sortedList[s].symbol;
+        const U32 weight = sortedList[s].weight;
+        const U32 nbBits = nbBitsBaseline - weight;
+        const U32 start = rankVal[weight];
+        const U32 length = 1 << (targetLog-nbBits);
+
+        if (targetLog-nbBits >= minBits)   /* enough room for a second symbol */
+        {
+            U32 sortedRank;
+            int minWeight = nbBits + scaleLog;
+            if (minWeight < 1) minWeight = 1;
+            sortedRank = rankStart[minWeight];
+            HUF_fillDTableX4Level2(DTable+start, targetLog-nbBits, nbBits,
+                           rankValOrigin[nbBits], minWeight,
+                           sortedList+sortedRank, sortedListSize-sortedRank,
+                           nbBitsBaseline, symbol);
+        }
+        else
+        {
+            U32 i;
+            const U32 end = start + length;
+            HUF_DEltX4 DElt;
+
+            MEM_writeLE16(&(DElt.sequence), symbol);
+            DElt.nbBits   = (BYTE)(nbBits);
+            DElt.length   = 1;
+            for (i = start; i < end; i++)
+                DTable[i] = DElt;
+        }
+        rankVal[weight] += length;
+    }
+}
+
+static size_t HUF_readDTableX4 (U32* DTable, const void* src, size_t srcSize)
+{
+    BYTE weightList[HUF_MAX_SYMBOL_VALUE + 1];
+    sortedSymbol_t sortedSymbol[HUF_MAX_SYMBOL_VALUE + 1];
+    U32 rankStats[HUF_ABSOLUTEMAX_TABLELOG + 1] = { 0 };
+    U32 rankStart0[HUF_ABSOLUTEMAX_TABLELOG + 2] = { 0 };
+    U32* const rankStart = rankStart0+1;
+    rankVal_t rankVal;
+    U32 tableLog, maxW, sizeOfSort, nbSymbols;
+    const U32 memLog = DTable[0];
+    const BYTE* ip = (const BYTE*) src;
+    size_t iSize = ip[0];
+    HUF_DEltX4* const dt = ((HUF_DEltX4*)DTable) + 1;
+
+    HUF_STATIC_ASSERT(sizeof(HUF_DEltX4) == sizeof(U32));   /* if compilation fails here, assertion is false */
+    if (memLog > HUF_ABSOLUTEMAX_TABLELOG) return ERROR(tableLog_tooLarge);
+    //memset(weightList, 0, sizeof(weightList));   /* is not necessary, even though some analyzer complain ... */
+
+    iSize = HUF_readStats(weightList, HUF_MAX_SYMBOL_VALUE + 1, rankStats, &nbSymbols, &tableLog, src, srcSize);
+    if (HUF_isError(iSize)) return iSize;
+
+    /* check result */
+    if (tableLog > memLog) return ERROR(tableLog_tooLarge);   /* DTable can't fit code depth */
+
+    /* find maxWeight */
+    for (maxW = tableLog; rankStats[maxW]==0; maxW--) {}  /* necessarily finds a solution before 0 */
+
+    /* Get start index of each weight */
+    {
+        U32 w, nextRankStart = 0;
+        for (w=1; w<=maxW; w++)
+        {
+            U32 current = nextRankStart;
+            nextRankStart += rankStats[w];
+            rankStart[w] = current;
+        }
+        rankStart[0] = nextRankStart;   /* put all 0w symbols at the end of sorted list*/
+        sizeOfSort = nextRankStart;
+    }
+
+    /* sort symbols by weight */
+    {
+        U32 s;
+        for (s=0; s<nbSymbols; s++)
+        {
+            U32 w = weightList[s];
+            U32 r = rankStart[w]++;
+            sortedSymbol[r].symbol = (BYTE)s;
+            sortedSymbol[r].weight = (BYTE)w;
+        }
+        rankStart[0] = 0;   /* forget 0w symbols; this is beginning of weight(1) */
+    }
+
+	/* Build rankVal */
+    {
+        const U32 minBits = tableLog+1 - maxW;
+        U32 nextRankVal = 0;
+        U32 w, consumed;
+        const int rescale = (memLog-tableLog) - 1;   /* tableLog <= memLog */
+        U32* rankVal0 = rankVal[0];
+        for (w=1; w<=maxW; w++)
+        {
+            U32 current = nextRankVal;
+            nextRankVal += rankStats[w] << (w+rescale);
+            rankVal0[w] = current;
+        }
+        for (consumed = minBits; consumed <= memLog - minBits; consumed++)
+        {
+            U32* rankValPtr = rankVal[consumed];
+            for (w = 1; w <= maxW; w++)
+            {
+                rankValPtr[w] = rankVal0[w] >> consumed;
+            }
+        }
+    }
+
+    HUF_fillDTableX4(dt, memLog,
+                   sortedSymbol, sizeOfSort,
+                   rankStart0, rankVal, maxW,
+                   tableLog+1);
+
+    return iSize;
+}
+
+
+static U32 HUF_decodeSymbolX4(void* op, BIT_DStream_t* DStream, const HUF_DEltX4* dt, const U32 dtLog)
+{
+    const size_t val = BIT_lookBitsFast(DStream, dtLog);   /* note : dtLog >= 1 */
+    memcpy(op, dt+val, 2);
+    BIT_skipBits(DStream, dt[val].nbBits);
+    return dt[val].length;
+}
+
+static U32 HUF_decodeLastSymbolX4(void* op, BIT_DStream_t* DStream, const HUF_DEltX4* dt, const U32 dtLog)
+{
+    const size_t val = BIT_lookBitsFast(DStream, dtLog);   /* note : dtLog >= 1 */
+    memcpy(op, dt+val, 1);
+    if (dt[val].length==1) BIT_skipBits(DStream, dt[val].nbBits);
+    else
+    {
+        if (DStream->bitsConsumed < (sizeof(DStream->bitContainer)*8))
+        {
+            BIT_skipBits(DStream, dt[val].nbBits);
+            if (DStream->bitsConsumed > (sizeof(DStream->bitContainer)*8))
+                DStream->bitsConsumed = (sizeof(DStream->bitContainer)*8);   /* ugly hack; works only because it's the last symbol. Note : can't easily extract nbBits from just this symbol */
+        }
+    }
+    return 1;
+}
+
+
+#define HUF_DECODE_SYMBOLX4_0(ptr, DStreamPtr) \
+    ptr += HUF_decodeSymbolX4(ptr, DStreamPtr, dt, dtLog)
+
+#define HUF_DECODE_SYMBOLX4_1(ptr, DStreamPtr) \
+    if (MEM_64bits() || (HUF_MAX_TABLELOG<=12)) \
+        ptr += HUF_decodeSymbolX4(ptr, DStreamPtr, dt, dtLog)
+
+#define HUF_DECODE_SYMBOLX4_2(ptr, DStreamPtr) \
+    if (MEM_64bits()) \
+        ptr += HUF_decodeSymbolX4(ptr, DStreamPtr, dt, dtLog)
+
+static inline size_t HUF_decodeStreamX4(BYTE* p, BIT_DStream_t* bitDPtr, BYTE* const pEnd, const HUF_DEltX4* const dt, const U32 dtLog)
+{
+    BYTE* const pStart = p;
+
+    /* up to 8 symbols at a time */
+    while ((BIT_reloadDStream(bitDPtr) == BIT_DStream_unfinished) && (p < pEnd-7))
+    {
+        HUF_DECODE_SYMBOLX4_2(p, bitDPtr);
+        HUF_DECODE_SYMBOLX4_1(p, bitDPtr);
+        HUF_DECODE_SYMBOLX4_2(p, bitDPtr);
+        HUF_DECODE_SYMBOLX4_0(p, bitDPtr);
+    }
+
+    /* closer to the end */
+    while ((BIT_reloadDStream(bitDPtr) == BIT_DStream_unfinished) && (p <= pEnd-2))
+        HUF_DECODE_SYMBOLX4_0(p, bitDPtr);
+
+    while (p <= pEnd-2)
+        HUF_DECODE_SYMBOLX4_0(p, bitDPtr);   /* no need to reload : reached the end of DStream */
+
+    if (p < pEnd)
+        p += HUF_decodeLastSymbolX4(p, bitDPtr, dt, dtLog);
+
+    return p-pStart;
+}
+
+
+
+static size_t HUF_decompress4X4_usingDTable(
+          void* dst,  size_t dstSize,
+    const void* cSrc, size_t cSrcSize,
+    const U32* DTable)
+{
+    if (cSrcSize < 10) return ERROR(corruption_detected);   /* strict minimum : jump table + 1 byte per stream */
+
+    {
+        const BYTE* const istart = (const BYTE*) cSrc;
+        BYTE* const ostart = (BYTE*) dst;
+        BYTE* const oend = ostart + dstSize;
+
+        const HUF_DEltX4* const dt = ((const HUF_DEltX4*)DTable) +1;
+        const U32 dtLog = DTable[0];
+        size_t errorCode;
+
+        /* Init */
+        BIT_DStream_t bitD1;
+        BIT_DStream_t bitD2;
+        BIT_DStream_t bitD3;
+        BIT_DStream_t bitD4;
+        const size_t length1 = MEM_readLE16(istart);
+        const size_t length2 = MEM_readLE16(istart+2);
+        const size_t length3 = MEM_readLE16(istart+4);
+        size_t length4;
+        const BYTE* const istart1 = istart + 6;  /* jumpTable */
+        const BYTE* const istart2 = istart1 + length1;
+        const BYTE* const istart3 = istart2 + length2;
+        const BYTE* const istart4 = istart3 + length3;
+        const size_t segmentSize = (dstSize+3) / 4;
+        BYTE* const opStart2 = ostart + segmentSize;
+        BYTE* const opStart3 = opStart2 + segmentSize;
+        BYTE* const opStart4 = opStart3 + segmentSize;
+        BYTE* op1 = ostart;
+        BYTE* op2 = opStart2;
+        BYTE* op3 = opStart3;
+        BYTE* op4 = opStart4;
+        U32 endSignal;
+
+        length4 = cSrcSize - (length1 + length2 + length3 + 6);
+        if (length4 > cSrcSize) return ERROR(corruption_detected);   /* overflow */
+        errorCode = BIT_initDStream(&bitD1, istart1, length1);
+        if (HUF_isError(errorCode)) return errorCode;
+        errorCode = BIT_initDStream(&bitD2, istart2, length2);
+        if (HUF_isError(errorCode)) return errorCode;
+        errorCode = BIT_initDStream(&bitD3, istart3, length3);
+        if (HUF_isError(errorCode)) return errorCode;
+        errorCode = BIT_initDStream(&bitD4, istart4, length4);
+        if (HUF_isError(errorCode)) return errorCode;
+
+        /* 16-32 symbols per loop (4-8 symbols per stream) */
+        endSignal = BIT_reloadDStream(&bitD1) | BIT_reloadDStream(&bitD2) | BIT_reloadDStream(&bitD3) | BIT_reloadDStream(&bitD4);
+        for ( ; (endSignal==BIT_DStream_unfinished) && (op4<(oend-7)) ; )
+        {
+            HUF_DECODE_SYMBOLX4_2(op1, &bitD1);
+            HUF_DECODE_SYMBOLX4_2(op2, &bitD2);
+            HUF_DECODE_SYMBOLX4_2(op3, &bitD3);
+            HUF_DECODE_SYMBOLX4_2(op4, &bitD4);
+            HUF_DECODE_SYMBOLX4_1(op1, &bitD1);
+            HUF_DECODE_SYMBOLX4_1(op2, &bitD2);
+            HUF_DECODE_SYMBOLX4_1(op3, &bitD3);
+            HUF_DECODE_SYMBOLX4_1(op4, &bitD4);
+            HUF_DECODE_SYMBOLX4_2(op1, &bitD1);
+            HUF_DECODE_SYMBOLX4_2(op2, &bitD2);
+            HUF_DECODE_SYMBOLX4_2(op3, &bitD3);
+            HUF_DECODE_SYMBOLX4_2(op4, &bitD4);
+            HUF_DECODE_SYMBOLX4_0(op1, &bitD1);
+            HUF_DECODE_SYMBOLX4_0(op2, &bitD2);
+            HUF_DECODE_SYMBOLX4_0(op3, &bitD3);
+            HUF_DECODE_SYMBOLX4_0(op4, &bitD4);
+
+            endSignal = BIT_reloadDStream(&bitD1) | BIT_reloadDStream(&bitD2) | BIT_reloadDStream(&bitD3) | BIT_reloadDStream(&bitD4);
+        }
+
+        /* check corruption */
+        if (op1 > opStart2) return ERROR(corruption_detected);
+        if (op2 > opStart3) return ERROR(corruption_detected);
+        if (op3 > opStart4) return ERROR(corruption_detected);
+        /* note : op4 supposed already verified within main loop */
+
+        /* finish bitStreams one by one */
+        HUF_decodeStreamX4(op1, &bitD1, opStart2, dt, dtLog);
+        HUF_decodeStreamX4(op2, &bitD2, opStart3, dt, dtLog);
+        HUF_decodeStreamX4(op3, &bitD3, opStart4, dt, dtLog);
+        HUF_decodeStreamX4(op4, &bitD4, oend,     dt, dtLog);
+
+        /* check */
+        endSignal = BIT_endOfDStream(&bitD1) & BIT_endOfDStream(&bitD2) & BIT_endOfDStream(&bitD3) & BIT_endOfDStream(&bitD4);
+        if (!endSignal) return ERROR(corruption_detected);
+
+        /* decoded size */
+        return dstSize;
+    }
+}
+
+
+static size_t HUF_decompress4X4 (void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize)
+{
+    HUF_CREATE_STATIC_DTABLEX4(DTable, HUF_MAX_TABLELOG);
+    const BYTE* ip = (const BYTE*) cSrc;
+
+    size_t hSize = HUF_readDTableX4 (DTable, cSrc, cSrcSize);
+    if (HUF_isError(hSize)) return hSize;
+    if (hSize >= cSrcSize) return ERROR(srcSize_wrong);
+    ip += hSize;
+    cSrcSize -= hSize;
+
+    return HUF_decompress4X4_usingDTable (dst, dstSize, ip, cSrcSize, DTable);
+}
+
+
+/**********************************/
+/* quad-symbol decoding           */
+/**********************************/
+typedef struct { BYTE nbBits; BYTE nbBytes; } HUF_DDescX6;
+typedef union { BYTE byte[4]; U32 sequence; } HUF_DSeqX6;
+
+/* recursive, up to level 3; may benefit from <template>-like strategy to nest each level inline */
+static void HUF_fillDTableX6LevelN(HUF_DDescX6* DDescription, HUF_DSeqX6* DSequence, int sizeLog,
+                           const rankVal_t rankValOrigin, const U32 consumed, const int minWeight, const U32 maxWeight,
+                           const sortedSymbol_t* sortedSymbols, const U32 sortedListSize, const U32* rankStart,
+                           const U32 nbBitsBaseline, HUF_DSeqX6 baseSeq, HUF_DDescX6 DDesc)
+{
+    const int scaleLog = nbBitsBaseline - sizeLog;   /* note : targetLog >= (nbBitsBaseline-1), hence scaleLog <= 1 */
+    const int minBits  = nbBitsBaseline - maxWeight;
+    const U32 level = DDesc.nbBytes;
+    U32 rankVal[HUF_ABSOLUTEMAX_TABLELOG + 1];
+    U32 symbolStartPos, s;
+
+    /* local rankVal, will be modified */
+    memcpy(rankVal, rankValOrigin[consumed], sizeof(rankVal));
+
+    /* fill skipped values */
+    if (minWeight>1)
+    {
+        U32 i;
+        const U32 skipSize = rankVal[minWeight];
+        for (i = 0; i < skipSize; i++)
+        {
+            DSequence[i] = baseSeq;
+            DDescription[i] = DDesc;
+        }
+    }
+
+    /* fill DTable */
+    DDesc.nbBytes++;
+    symbolStartPos = rankStart[minWeight];
+    for (s=symbolStartPos; s<sortedListSize; s++)
+    {
+        const BYTE symbol = sortedSymbols[s].symbol;
+        const U32  weight = sortedSymbols[s].weight;   /* >= 1 (sorted) */
+        const int  nbBits = nbBitsBaseline - weight;   /* >= 1 (by construction) */
+        const int  totalBits = consumed+nbBits;
+        const U32  start  = rankVal[weight];
+        const U32  length = 1 << (sizeLog-nbBits);
+        baseSeq.byte[level] = symbol;
+        DDesc.nbBits = (BYTE)totalBits;
+
+        if ((level<3) && (sizeLog-totalBits >= minBits))   /* enough room for another symbol */
+        {
+            int nextMinWeight = totalBits + scaleLog;
+            if (nextMinWeight < 1) nextMinWeight = 1;
+            HUF_fillDTableX6LevelN(DDescription+start, DSequence+start, sizeLog-nbBits,
+                           rankValOrigin, totalBits, nextMinWeight, maxWeight,
+                           sortedSymbols, sortedListSize, rankStart,
+                           nbBitsBaseline, baseSeq, DDesc);   /* recursive (max : level 3) */
+        }
+        else
+        {
+            U32 i;
+            const U32 end = start + length;
+            for (i = start; i < end; i++)
+            {
+                DDescription[i] = DDesc;
+                DSequence[i] = baseSeq;
+            }
+        }
+        rankVal[weight] += length;
+    }
+}
+
+
+/* note : same preparation as X4 */
+static size_t HUF_readDTableX6 (U32* DTable, const void* src, size_t srcSize)
+{
+    BYTE weightList[HUF_MAX_SYMBOL_VALUE + 1];
+    sortedSymbol_t sortedSymbol[HUF_MAX_SYMBOL_VALUE + 1];
+    U32 rankStats[HUF_ABSOLUTEMAX_TABLELOG + 1] = { 0 };
+    U32 rankStart0[HUF_ABSOLUTEMAX_TABLELOG + 2] = { 0 };
+    U32* const rankStart = rankStart0+1;
+    U32 tableLog, maxW, sizeOfSort, nbSymbols;
+    rankVal_t rankVal;
+    const U32 memLog = DTable[0];
+    const BYTE* ip = (const BYTE*) src;
+    size_t iSize = ip[0];
+
+    if (memLog > HUF_ABSOLUTEMAX_TABLELOG) return ERROR(tableLog_tooLarge);
+    //memset(weightList, 0, sizeof(weightList));   /* is not necessary, even though some analyzer complain ... */
+
+    iSize = HUF_readStats(weightList, HUF_MAX_SYMBOL_VALUE + 1, rankStats, &nbSymbols, &tableLog, src, srcSize);
+    if (HUF_isError(iSize)) return iSize;
+
+    /* check result */
+    if (tableLog > memLog) return ERROR(tableLog_tooLarge);   /* DTable is too small */
+
+    /* find maxWeight */
+    for (maxW = tableLog; rankStats[maxW]==0; maxW--) {}  /* necessarily finds a solution before 0 */
+
+    /* Get start index of each weight */
+    {
+        U32 w, nextRankStart = 0;
+        for (w=1; w<=maxW; w++)
+        {
+            U32 current = nextRankStart;
+            nextRankStart += rankStats[w];
+            rankStart[w] = current;
+        }
+        rankStart[0] = nextRankStart;   /* put all 0w symbols at the end of sorted list*/
+        sizeOfSort = nextRankStart;
+    }
+
+    /* sort symbols by weight */
+    {
+        U32 s;
+        for (s=0; s<nbSymbols; s++)
+        {
+            U32 w = weightList[s];
+            U32 r = rankStart[w]++;
+            sortedSymbol[r].symbol = (BYTE)s;
+            sortedSymbol[r].weight = (BYTE)w;
+        }
+        rankStart[0] = 0;   /* forget 0w symbols; this is beginning of weight(1) */
+    }
+
+	/* Build rankVal */
+    {
+        const U32 minBits = tableLog+1 - maxW;
+        U32 nextRankVal = 0;
+        U32 w, consumed;
+        const int rescale = (memLog-tableLog) - 1;   /* tableLog <= memLog */
+        U32* rankVal0 = rankVal[0];
+        for (w=1; w<=maxW; w++)
+        {
+            U32 current = nextRankVal;
+            nextRankVal += rankStats[w] << (w+rescale);
+            rankVal0[w] = current;
+        }
+        for (consumed = minBits; consumed <= memLog - minBits; consumed++)
+        {
+            U32* rankValPtr = rankVal[consumed];
+            for (w = 1; w <= maxW; w++)
+            {
+                rankValPtr[w] = rankVal0[w] >> consumed;
+            }
+        }
+    }
+
+
+    /* fill tables */
+    {
+        HUF_DDescX6* DDescription = (HUF_DDescX6*)(DTable+1);
+        HUF_DSeqX6* DSequence = (HUF_DSeqX6*)(DTable + 1 + ((size_t)1<<(memLog-1)));
+        HUF_DSeqX6 DSeq;
+        HUF_DDescX6 DDesc;
+        DSeq.sequence = 0;
+        DDesc.nbBits = 0;
+        DDesc.nbBytes = 0;
+        HUF_fillDTableX6LevelN(DDescription, DSequence, memLog,
+                       (const U32 (*)[HUF_ABSOLUTEMAX_TABLELOG + 1])rankVal, 0, 1, maxW,
+                       sortedSymbol, sizeOfSort, rankStart0,
+                       tableLog+1, DSeq, DDesc);
+    }
+
+    return iSize;
+}
+
+
+static U32 HUF_decodeSymbolX6(void* op, BIT_DStream_t* DStream, const HUF_DDescX6* dd, const HUF_DSeqX6* ds, const U32 dtLog)
+{
+    const size_t val = BIT_lookBitsFast(DStream, dtLog);   /* note : dtLog >= 1 */
+    memcpy(op, ds+val, sizeof(HUF_DSeqX6));
+    BIT_skipBits(DStream, dd[val].nbBits);
+    return dd[val].nbBytes;
+}
+
+static U32 HUF_decodeLastSymbolsX6(void* op, const U32 maxL, BIT_DStream_t* DStream,
+                                  const HUF_DDescX6* dd, const HUF_DSeqX6* ds, const U32 dtLog)
+{
+    const size_t val = BIT_lookBitsFast(DStream, dtLog);   /* note : dtLog >= 1 */
+    U32 length = dd[val].nbBytes;
+    if (length <= maxL)
+    {
+        memcpy(op, ds+val, length);
+        BIT_skipBits(DStream, dd[val].nbBits);
+        return length;
+    }
+    memcpy(op, ds+val, maxL);
+    if (DStream->bitsConsumed < (sizeof(DStream->bitContainer)*8))
+    {
+        BIT_skipBits(DStream, dd[val].nbBits);
+        if (DStream->bitsConsumed > (sizeof(DStream->bitContainer)*8))
+            DStream->bitsConsumed = (sizeof(DStream->bitContainer)*8);   /* ugly hack; works only because it's the last symbol. Note : can't easily extract nbBits from just this symbol */
+    }
+    return maxL;
+}
+
+
+#define HUF_DECODE_SYMBOLX6_0(ptr, DStreamPtr) \
+    ptr += HUF_decodeSymbolX6(ptr, DStreamPtr, dd, ds, dtLog)
+
+#define HUF_DECODE_SYMBOLX6_1(ptr, DStreamPtr) \
+    if (MEM_64bits() || (HUF_MAX_TABLELOG<=12)) \
+        HUF_DECODE_SYMBOLX6_0(ptr, DStreamPtr)
+
+#define HUF_DECODE_SYMBOLX6_2(ptr, DStreamPtr) \
+    if (MEM_64bits()) \
+        HUF_DECODE_SYMBOLX6_0(ptr, DStreamPtr)
+
+static inline size_t HUF_decodeStreamX6(BYTE* p, BIT_DStream_t* bitDPtr, BYTE* const pEnd, const U32* DTable, const U32 dtLog)
+{
+    const HUF_DDescX6* dd = (const HUF_DDescX6*)(DTable+1);
+    const HUF_DSeqX6* ds = (const HUF_DSeqX6*)(DTable + 1 + ((size_t)1<<(dtLog-1)));
+    BYTE* const pStart = p;
+
+    /* up to 16 symbols at a time */
+    while ((BIT_reloadDStream(bitDPtr) == BIT_DStream_unfinished) && (p <= pEnd-16))
+    {
+        HUF_DECODE_SYMBOLX6_2(p, bitDPtr);
+        HUF_DECODE_SYMBOLX6_1(p, bitDPtr);
+        HUF_DECODE_SYMBOLX6_2(p, bitDPtr);
+        HUF_DECODE_SYMBOLX6_0(p, bitDPtr);
+    }
+
+    /* closer to the end, up to 4 symbols at a time */
+    while ((BIT_reloadDStream(bitDPtr) == BIT_DStream_unfinished) && (p <= pEnd-4))
+        HUF_DECODE_SYMBOLX6_0(p, bitDPtr);
+
+    while (p <= pEnd-4)
+        HUF_DECODE_SYMBOLX6_0(p, bitDPtr);   /* no need to reload : reached the end of DStream */
+
+    while (p < pEnd)
+        p += HUF_decodeLastSymbolsX6(p, (U32)(pEnd-p), bitDPtr, dd, ds, dtLog);
+
+    return p-pStart;
+}
+
+
+
+static size_t HUF_decompress4X6_usingDTable(
+          void* dst,  size_t dstSize,
+    const void* cSrc, size_t cSrcSize,
+    const U32* DTable)
+{
+    if (cSrcSize < 10) return ERROR(corruption_detected);   /* strict minimum : jump table + 1 byte per stream */
+
+    {
+        const BYTE* const istart = (const BYTE*) cSrc;
+        BYTE* const ostart = (BYTE*) dst;
+        BYTE* const oend = ostart + dstSize;
+
+        const U32 dtLog = DTable[0];
+        const HUF_DDescX6* dd = (const HUF_DDescX6*)(DTable+1);
+        const HUF_DSeqX6* ds = (const HUF_DSeqX6*)(DTable + 1 + ((size_t)1<<(dtLog-1)));
+        size_t errorCode;
+
+        /* Init */
+        BIT_DStream_t bitD1;
+        BIT_DStream_t bitD2;
+        BIT_DStream_t bitD3;
+        BIT_DStream_t bitD4;
+        const size_t length1 = MEM_readLE16(istart);
+        const size_t length2 = MEM_readLE16(istart+2);
+        const size_t length3 = MEM_readLE16(istart+4);
+        size_t length4;
+        const BYTE* const istart1 = istart + 6;  /* jumpTable */
+        const BYTE* const istart2 = istart1 + length1;
+        const BYTE* const istart3 = istart2 + length2;
+        const BYTE* const istart4 = istart3 + length3;
+        const size_t segmentSize = (dstSize+3) / 4;
+        BYTE* const opStart2 = ostart + segmentSize;
+        BYTE* const opStart3 = opStart2 + segmentSize;
+        BYTE* const opStart4 = opStart3 + segmentSize;
+        BYTE* op1 = ostart;
+        BYTE* op2 = opStart2;
+        BYTE* op3 = opStart3;
+        BYTE* op4 = opStart4;
+        U32 endSignal;
+
+        length4 = cSrcSize - (length1 + length2 + length3 + 6);
+        if (length4 > cSrcSize) return ERROR(corruption_detected);   /* overflow */
+        errorCode = BIT_initDStream(&bitD1, istart1, length1);
+        if (HUF_isError(errorCode)) return errorCode;
+        errorCode = BIT_initDStream(&bitD2, istart2, length2);
+        if (HUF_isError(errorCode)) return errorCode;
+        errorCode = BIT_initDStream(&bitD3, istart3, length3);
+        if (HUF_isError(errorCode)) return errorCode;
+        errorCode = BIT_initDStream(&bitD4, istart4, length4);
+        if (HUF_isError(errorCode)) return errorCode;
+
+        /* 16-64 symbols per loop (4-16 symbols per stream) */
+        endSignal = BIT_reloadDStream(&bitD1) | BIT_reloadDStream(&bitD2) | BIT_reloadDStream(&bitD3) | BIT_reloadDStream(&bitD4);
+        for ( ; (op3 <= opStart4) && (endSignal==BIT_DStream_unfinished) && (op4<=(oend-16)) ; )
+        {
+            HUF_DECODE_SYMBOLX6_2(op1, &bitD1);
+            HUF_DECODE_SYMBOLX6_2(op2, &bitD2);
+            HUF_DECODE_SYMBOLX6_2(op3, &bitD3);
+            HUF_DECODE_SYMBOLX6_2(op4, &bitD4);
+            HUF_DECODE_SYMBOLX6_1(op1, &bitD1);
+            HUF_DECODE_SYMBOLX6_1(op2, &bitD2);
+            HUF_DECODE_SYMBOLX6_1(op3, &bitD3);
+            HUF_DECODE_SYMBOLX6_1(op4, &bitD4);
+            HUF_DECODE_SYMBOLX6_2(op1, &bitD1);
+            HUF_DECODE_SYMBOLX6_2(op2, &bitD2);
+            HUF_DECODE_SYMBOLX6_2(op3, &bitD3);
+            HUF_DECODE_SYMBOLX6_2(op4, &bitD4);
+            HUF_DECODE_SYMBOLX6_0(op1, &bitD1);
+            HUF_DECODE_SYMBOLX6_0(op2, &bitD2);
+            HUF_DECODE_SYMBOLX6_0(op3, &bitD3);
+            HUF_DECODE_SYMBOLX6_0(op4, &bitD4);
+
+            endSignal = BIT_reloadDStream(&bitD1) | BIT_reloadDStream(&bitD2) | BIT_reloadDStream(&bitD3) | BIT_reloadDStream(&bitD4);
+        }
+
+        /* check corruption */
+        if (op1 > opStart2) return ERROR(corruption_detected);
+        if (op2 > opStart3) return ERROR(corruption_detected);
+        if (op3 > opStart4) return ERROR(corruption_detected);
+        /* note : op4 supposed already verified within main loop */
+
+        /* finish bitStreams one by one */
+        HUF_decodeStreamX6(op1, &bitD1, opStart2, DTable, dtLog);
+        HUF_decodeStreamX6(op2, &bitD2, opStart3, DTable, dtLog);
+        HUF_decodeStreamX6(op3, &bitD3, opStart4, DTable, dtLog);
+        HUF_decodeStreamX6(op4, &bitD4, oend,     DTable, dtLog);
+
+        /* check */
+        endSignal = BIT_endOfDStream(&bitD1) & BIT_endOfDStream(&bitD2) & BIT_endOfDStream(&bitD3) & BIT_endOfDStream(&bitD4);
+        if (!endSignal) return ERROR(corruption_detected);
+
+        /* decoded size */
+        return dstSize;
+    }
+}
+
+
+static size_t HUF_decompress4X6 (void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize)
+{
+    HUF_CREATE_STATIC_DTABLEX6(DTable, HUF_MAX_TABLELOG);
+    const BYTE* ip = (const BYTE*) cSrc;
+
+    size_t hSize = HUF_readDTableX6 (DTable, cSrc, cSrcSize);
+    if (HUF_isError(hSize)) return hSize;
+    if (hSize >= cSrcSize) return ERROR(srcSize_wrong);
+    ip += hSize;
+    cSrcSize -= hSize;
+
+    return HUF_decompress4X6_usingDTable (dst, dstSize, ip, cSrcSize, DTable);
+}
+
+
+/**********************************/
+/* Generic decompression selector */
+/**********************************/
+
+typedef struct { U32 tableTime; U32 decode256Time; } algo_time_t;
+static const algo_time_t algoTime[16 /* Quantization */][3 /* single, double, quad */] =
+{
+    /* single, double, quad */
+    {{0,0}, {1,1}, {2,2}},  /* Q==0 : impossible */
+    {{0,0}, {1,1}, {2,2}},  /* Q==1 : impossible */
+    {{  38,130}, {1313, 74}, {2151, 38}},   /* Q == 2 : 12-18% */
+    {{ 448,128}, {1353, 74}, {2238, 41}},   /* Q == 3 : 18-25% */
+    {{ 556,128}, {1353, 74}, {2238, 47}},   /* Q == 4 : 25-32% */
+    {{ 714,128}, {1418, 74}, {2436, 53}},   /* Q == 5 : 32-38% */
+    {{ 883,128}, {1437, 74}, {2464, 61}},   /* Q == 6 : 38-44% */
+    {{ 897,128}, {1515, 75}, {2622, 68}},   /* Q == 7 : 44-50% */
+    {{ 926,128}, {1613, 75}, {2730, 75}},   /* Q == 8 : 50-56% */
+    {{ 947,128}, {1729, 77}, {3359, 77}},   /* Q == 9 : 56-62% */
+    {{1107,128}, {2083, 81}, {4006, 84}},   /* Q ==10 : 62-69% */
+    {{1177,128}, {2379, 87}, {4785, 88}},   /* Q ==11 : 69-75% */
+    {{1242,128}, {2415, 93}, {5155, 84}},   /* Q ==12 : 75-81% */
+    {{1349,128}, {2644,106}, {5260,106}},   /* Q ==13 : 81-87% */
+    {{1455,128}, {2422,124}, {4174,124}},   /* Q ==14 : 87-93% */
+    {{ 722,128}, {1891,145}, {1936,146}},   /* Q ==15 : 93-99% */
+};
+
+typedef size_t (*decompressionAlgo)(void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize);
+
+static size_t HUF_decompress (void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize)
+{
+    static const decompressionAlgo decompress[3] = { HUF_decompress4X2, HUF_decompress4X4, HUF_decompress4X6 };
+    /* estimate decompression time */
+    U32 Q;
+    const U32 D256 = (U32)(dstSize >> 8);
+    U32 Dtime[3];
+    U32 algoNb = 0;
+    int n;
+
+    /* validation checks */
+    if (dstSize == 0) return ERROR(dstSize_tooSmall);
+    if (cSrcSize > dstSize) return ERROR(corruption_detected);   /* invalid */
+    if (cSrcSize == dstSize) { memcpy(dst, cSrc, dstSize); return dstSize; }   /* not compressed */
+    if (cSrcSize == 1) { memset(dst, *(const BYTE*)cSrc, dstSize); return dstSize; }   /* RLE */
+
+    /* decoder timing evaluation */
+    Q = (U32)(cSrcSize * 16 / dstSize);   /* Q < 16 since dstSize > cSrcSize */
+    for (n=0; n<3; n++)
+        Dtime[n] = algoTime[Q][n].tableTime + (algoTime[Q][n].decode256Time * D256);
+
+    Dtime[1] += Dtime[1] >> 4; Dtime[2] += Dtime[2] >> 3; /* advantage to algorithms using less memory, for cache eviction */
+
+    if (Dtime[1] < Dtime[0]) algoNb = 1;
+    if (Dtime[2] < Dtime[algoNb]) algoNb = 2;
+
+    return decompress[algoNb](dst, dstSize, cSrc, cSrcSize);
+
+    //return HUF_decompress4X2(dst, dstSize, cSrc, cSrcSize);   /* multi-streams single-symbol decoding */
+    //return HUF_decompress4X4(dst, dstSize, cSrc, cSrcSize);   /* multi-streams double-symbols decoding */
+    //return HUF_decompress4X6(dst, dstSize, cSrc, cSrcSize);   /* multi-streams quad-symbols decoding */
+}
+/*
+    zstd - standard compression library
+    Copyright (C) 2014-2015, Yann Collet.
+
+    BSD 2-Clause License (http://www.opensource.org/licenses/bsd-license.php)
+
+    Redistribution and use in source and binary forms, with or without
+    modification, are permitted provided that the following conditions are
+    met:
+    * Redistributions of source code must retain the above copyright
+    notice, this list of conditions and the following disclaimer.
+    * Redistributions in binary form must reproduce the above
+    copyright notice, this list of conditions and the following disclaimer
+    in the documentation and/or other materials provided with the
+    distribution.
+    THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+    "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+    LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+    A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+    OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+    SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+    LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+    DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+    THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+    (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+    OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+    You can contact the author at :
+    - zstd source repository : https://github.com/Cyan4973/zstd
+    - ztsd public forum : https://groups.google.com/forum/#!forum/lz4c
+*/
+
+/* ***************************************************************
+*  Tuning parameters
+*****************************************************************/
+/*!
+*  MEMORY_USAGE :
+*  Memory usage formula : N->2^N Bytes (examples : 10 -> 1KB; 12 -> 4KB ; 16 -> 64KB; 20 -> 1MB; etc.)
+*  Increasing memory usage improves compression ratio
+*  Reduced memory usage can improve speed, due to cache effect
+*/
+#define ZSTD_MEMORY_USAGE 17
+
+/*!
+ * HEAPMODE :
+ * Select how default compression functions will allocate memory for their hash table,
+ * in memory stack (0, fastest), or in memory heap (1, requires malloc())
+ * Note that compression context is fairly large, as a consequence heap memory is recommended.
+ */
+#ifndef ZSTD_HEAPMODE
+#  define ZSTD_HEAPMODE 1
+#endif /* ZSTD_HEAPMODE */
+
+/*!
+*  LEGACY_SUPPORT :
+*  decompressor can decode older formats (starting from Zstd 0.1+)
+*/
+#ifndef ZSTD_LEGACY_SUPPORT
+#  define ZSTD_LEGACY_SUPPORT 1
+#endif
+
+
+/* *******************************************************
+*  Includes
+*********************************************************/
+#include <stdlib.h>      /* calloc */
+#include <string.h>      /* memcpy, memmove */
+#include <stdio.h>       /* debug : printf */
+
+
+/* *******************************************************
+*  Compiler specifics
+*********************************************************/
+#ifdef __AVX2__
+#  include <immintrin.h>   /* AVX2 intrinsics */
+#endif
+
+#ifdef _MSC_VER    /* Visual Studio */
+#  define FORCE_INLINE static __forceinline
+#  include <intrin.h>                    /* For Visual 2005 */
+#  pragma warning(disable : 4127)        /* disable: C4127: conditional expression is constant */
+#  pragma warning(disable : 4324)        /* disable: C4324: padded structure */
+#else
+#  define GCC_VERSION (__GNUC__ * 100 + __GNUC_MINOR__)
+#  ifdef __GNUC__
+#    define FORCE_INLINE static inline __attribute__((always_inline))
+#  else
+#    define FORCE_INLINE static inline
+#  endif
+#endif
+
+
+/* *******************************************************
+*  Constants
+*********************************************************/
+#define HASH_LOG (ZSTD_MEMORY_USAGE - 2)
+#define HASH_TABLESIZE (1 << HASH_LOG)
+#define HASH_MASK (HASH_TABLESIZE - 1)
+
+#define KNUTH 2654435761
+
+#define BIT7 128
+#define BIT6  64
+#define BIT5  32
+#define BIT4  16
+#define BIT1   2
+#define BIT0   1
+
+#define KB *(1 <<10)
+#define MB *(1 <<20)
+#define GB *(1U<<30)
+
+#define BLOCKSIZE (128 KB)                 /* define, for static allocation */
+#define MIN_SEQUENCES_SIZE (2 /*seqNb*/ + 2 /*dumps*/ + 3 /*seqTables*/ + 1 /*bitStream*/)
+#define MIN_CBLOCK_SIZE (3 /*litCSize*/ + MIN_SEQUENCES_SIZE)
+#define IS_RAW BIT0
+#define IS_RLE BIT1
+
+#define WORKPLACESIZE (BLOCKSIZE*3)
+#define MINMATCH 4
+#define MLbits   7
+#define LLbits   6
+#define Offbits  5
+#define MaxML  ((1<<MLbits )-1)
+#define MaxLL  ((1<<LLbits )-1)
+#define MaxOff   31
+#define LitFSELog  11
+#define MLFSELog   10
+#define LLFSELog   10
+#define OffFSELog   9
+#define MAX(a,b) ((a)<(b)?(b):(a))
+#define MaxSeq MAX(MaxLL, MaxML)
+
+#define LITERAL_NOENTROPY 63
+#define COMMAND_NOENTROPY 7   /* to remove */
+
+static const size_t ZSTD_blockHeaderSize = 3;
+static const size_t ZSTD_frameHeaderSize = 4;
+
+
+/* *******************************************************
+*  Memory operations
+**********************************************************/
+static void   ZSTD_copy4(void* dst, const void* src) { memcpy(dst, src, 4); }
+
+static void   ZSTD_copy8(void* dst, const void* src) { memcpy(dst, src, 8); }
+
+#define COPY8(d,s) { ZSTD_copy8(d,s); d+=8; s+=8; }
+
+/*! ZSTD_wildcopy : custom version of memcpy(), can copy up to 7-8 bytes too many */
+static void ZSTD_wildcopy(void* dst, const void* src, size_t length)
+{
+    const BYTE* ip = (const BYTE*)src;
+    BYTE* op = (BYTE*)dst;
+    BYTE* const oend = op + length;
+    do COPY8(op, ip) while (op < oend);
+}
+
+
+/* **************************************
+*  Local structures
+****************************************/
+typedef enum { bt_compressed, bt_raw, bt_rle, bt_end } blockType_t;
+
+typedef struct
+{
+    blockType_t blockType;
+    U32 origSize;
+} blockProperties_t;
+
+typedef struct {
+    void* buffer;
+    U32*  offsetStart;
+    U32*  offset;
+    BYTE* offCodeStart;
+    BYTE* offCode;
+    BYTE* litStart;
+    BYTE* lit;
+    BYTE* litLengthStart;
+    BYTE* litLength;
+    BYTE* matchLengthStart;
+    BYTE* matchLength;
+    BYTE* dumpsStart;
+    BYTE* dumps;
+} seqStore_t;
+
+
+/* *************************************
+*  Error Management
+***************************************/
+/*! ZSTD_isError
+*   tells if a return value is an error code */
+static unsigned ZSTD_isError(size_t code) { return ERR_isError(code); }
+
+
+/* *************************************
+*  Function body to include
+***************************************/
+static size_t ZSTD_read_ARCH(const void* p) { size_t r; memcpy(&r, p, sizeof(r)); return r; }
+
+MEM_STATIC unsigned ZSTD_NbCommonBytes (register size_t val)
+{
+    if (MEM_isLittleEndian())
+    {
+        if (MEM_64bits())
+        {
+#       if defined(_MSC_VER) && defined(_WIN64) && !defined(LZ4_FORCE_SW_BITCOUNT)
+            unsigned long r = 0;
+            _BitScanForward64( &r, (U64)val );
+            return (int)(r>>3);
+#       elif defined(__GNUC__) && (__GNUC__ >= 3) && !defined(LZ4_FORCE_SW_BITCOUNT)
+            return (__builtin_ctzll((U64)val) >> 3);
+#       else
+            static const int DeBruijnBytePos[64] = { 0, 0, 0, 0, 0, 1, 1, 2, 0, 3, 1, 3, 1, 4, 2, 7, 0, 2, 3, 6, 1, 5, 3, 5, 1, 3, 4, 4, 2, 5, 6, 7, 7, 0, 1, 2, 3, 3, 4, 6, 2, 6, 5, 5, 3, 4, 5, 6, 7, 1, 2, 4, 6, 4, 4, 5, 7, 2, 6, 5, 7, 6, 7, 7 };
+            return DeBruijnBytePos[((U64)((val & -(long long)val) * 0x0218A392CDABBD3FULL)) >> 58];
+#       endif
+        }
+        else /* 32 bits */
+        {
+#       if defined(_MSC_VER) && !defined(LZ4_FORCE_SW_BITCOUNT)
+            unsigned long r;
+            _BitScanForward( &r, (U32)val );
+            return (int)(r>>3);
+#       elif defined(__GNUC__) && (__GNUC__ >= 3) && !defined(LZ4_FORCE_SW_BITCOUNT)
+            return (__builtin_ctz((U32)val) >> 3);
+#       else
+            static const int DeBruijnBytePos[32] = { 0, 0, 3, 0, 3, 1, 3, 0, 3, 2, 2, 1, 3, 2, 0, 1, 3, 3, 1, 2, 2, 2, 2, 0, 3, 1, 2, 0, 1, 0, 1, 1 };
+            return DeBruijnBytePos[((U32)((val & -(S32)val) * 0x077CB531U)) >> 27];
+#       endif
+        }
+    }
+    else   /* Big Endian CPU */
+    {
+        if (MEM_32bits())
+        {
+#       if defined(_MSC_VER) && defined(_WIN64) && !defined(LZ4_FORCE_SW_BITCOUNT)
+            unsigned long r = 0;
+            _BitScanReverse64( &r, val );
+            return (unsigned)(r>>3);
+#       elif defined(__GNUC__) && (__GNUC__ >= 3) && !defined(LZ4_FORCE_SW_BITCOUNT)
+            return (__builtin_clzll(val) >> 3);
+#       else
+            unsigned r;
+            const unsigned n32 = sizeof(size_t)*4;   /* calculate this way due to compiler complaining in 32-bits mode */
+            if (!(val>>n32)) { r=4; } else { r=0; val>>=n32; }
+            if (!(val>>16)) { r+=2; val>>=8; } else { val>>=24; }
+            r += (!val);
+            return r;
+#       endif
+        }
+        else /* 32 bits */
+        {
+#       if defined(_MSC_VER) && !defined(LZ4_FORCE_SW_BITCOUNT)
+            unsigned long r = 0;
+            _BitScanReverse( &r, (unsigned long)val );
+            return (unsigned)(r>>3);
+#       elif defined(__GNUC__) && (__GNUC__ >= 3) && !defined(LZ4_FORCE_SW_BITCOUNT)
+            return (__builtin_clz((U32)val) >> 3);
+#       else
+            unsigned r;
+            if (!(val>>16)) { r=2; val>>=8; } else { r=0; val>>=24; }
+            r += (!val);
+            return r;
+#       endif
+        }
+    }
+}
+
+
+MEM_STATIC size_t ZSTD_count(const BYTE* pIn, const BYTE* pMatch, const BYTE* pInLimit)
+{
+    const BYTE* const pStart = pIn;
+
+    while ((pIn<pInLimit-(sizeof(size_t)-1)))
+    {
+        size_t diff = ZSTD_read_ARCH(pMatch) ^ ZSTD_read_ARCH(pIn);
+        if (!diff) { pIn+=sizeof(size_t); pMatch+=sizeof(size_t); continue; }
+        pIn += ZSTD_NbCommonBytes(diff);
+        return (size_t)(pIn - pStart);
+    }
+
+    if (MEM_32bits()) if ((pIn<(pInLimit-3)) && (MEM_read32(pMatch) == MEM_read32(pIn))) { pIn+=4; pMatch+=4; }
+    if ((pIn<(pInLimit-1)) && (MEM_read16(pMatch) == MEM_read16(pIn))) { pIn+=2; pMatch+=2; }
+    if ((pIn<pInLimit) && (*pMatch == *pIn)) pIn++;
+    return (size_t)(pIn - pStart);
+}
+
+
+/* *************************************************************
+*   Decompression section
+***************************************************************/
+struct ZSTD_DCtx_s
+{
+    U32 LLTable[FSE_DTABLE_SIZE_U32(LLFSELog)];
+    U32 OffTable[FSE_DTABLE_SIZE_U32(OffFSELog)];
+    U32 MLTable[FSE_DTABLE_SIZE_U32(MLFSELog)];
+    void* previousDstEnd;
+    void* base;
+    size_t expected;
+    blockType_t bType;
+    U32 phase;
+    const BYTE* litPtr;
+    size_t litBufSize;
+    size_t litSize;
+    BYTE litBuffer[BLOCKSIZE + 8 /* margin for wildcopy */];
+};   /* typedef'd to ZSTD_Dctx within "zstd_static.h" */
+
+
+static size_t ZSTD_getcBlockSize(const void* src, size_t srcSize, blockProperties_t* bpPtr)
+{
+    const BYTE* const in = (const BYTE* const)src;
+    BYTE headerFlags;
+    U32 cSize;
+
+    if (srcSize < 3) return ERROR(srcSize_wrong);
+
+    headerFlags = *in;
+    cSize = in[2] + (in[1]<<8) + ((in[0] & 7)<<16);
+
+    bpPtr->blockType = (blockType_t)(headerFlags >> 6);
+    bpPtr->origSize = (bpPtr->blockType == bt_rle) ? cSize : 0;
+
+    if (bpPtr->blockType == bt_end) return 0;
+    if (bpPtr->blockType == bt_rle) return 1;
+    return cSize;
+}
+
+static size_t ZSTD_copyUncompressedBlock(void* dst, size_t maxDstSize, const void* src, size_t srcSize)
+{
+    if (srcSize > maxDstSize) return ERROR(dstSize_tooSmall);
+    memcpy(dst, src, srcSize);
+    return srcSize;
+}
+
+
+/** ZSTD_decompressLiterals
+    @return : nb of bytes read from src, or an error code*/
+static size_t ZSTD_decompressLiterals(void* dst, size_t* maxDstSizePtr,
+                                const void* src, size_t srcSize)
+{
+    const BYTE* ip = (const BYTE*)src;
+
+    const size_t litSize = (MEM_readLE32(src) & 0x1FFFFF) >> 2;   /* no buffer issue : srcSize >= MIN_CBLOCK_SIZE */
+    const size_t litCSize = (MEM_readLE32(ip+2) & 0xFFFFFF) >> 5;   /* no buffer issue : srcSize >= MIN_CBLOCK_SIZE */
+
+    if (litSize > *maxDstSizePtr) return ERROR(corruption_detected);
+    if (litCSize + 5 > srcSize) return ERROR(corruption_detected);
+
+    if (HUF_isError(HUF_decompress(dst, litSize, ip+5, litCSize))) return ERROR(corruption_detected);
+
+    *maxDstSizePtr = litSize;
+    return litCSize + 5;
+}
+
+
+/** ZSTD_decodeLiteralsBlock
+    @return : nb of bytes read from src (< srcSize )*/
+static size_t ZSTD_decodeLiteralsBlock(void* ctx,
+                          const void* src, size_t srcSize)
+{
+    ZSTD_DCtx* dctx = (ZSTD_DCtx*)ctx;
+    const BYTE* const istart = (const BYTE* const)src;
+
+    /* any compressed block with literals segment must be at least this size */
+    if (srcSize < MIN_CBLOCK_SIZE) return ERROR(corruption_detected);
+
+    switch(*istart & 3)
+    {
+    default:
+    case 0:
+        {
+            size_t litSize = BLOCKSIZE;
+            const size_t readSize = ZSTD_decompressLiterals(dctx->litBuffer, &litSize, src, srcSize);
+            dctx->litPtr = dctx->litBuffer;
+            dctx->litBufSize = BLOCKSIZE;
+            dctx->litSize = litSize;
+            return readSize;   /* works if it's an error too */
+        }
+    case IS_RAW:
+        {
+            const size_t litSize = (MEM_readLE32(istart) & 0xFFFFFF) >> 2;   /* no buffer issue : srcSize >= MIN_CBLOCK_SIZE */
+            if (litSize > srcSize-11)   /* risk of reading too far with wildcopy */
+            {
+				if (litSize > srcSize-3) return ERROR(corruption_detected);
+				memcpy(dctx->litBuffer, istart, litSize);
+				dctx->litPtr = dctx->litBuffer;
+				dctx->litBufSize = BLOCKSIZE;
+				dctx->litSize = litSize;
+				return litSize+3;
+			}
+			/* direct reference into compressed stream */
+            dctx->litPtr = istart+3;
+            dctx->litBufSize = srcSize-3;
+            dctx->litSize = litSize;
+            return litSize+3;
+        }
+    case IS_RLE:
+        {
+            const size_t litSize = (MEM_readLE32(istart) & 0xFFFFFF) >> 2;   /* no buffer issue : srcSize >= MIN_CBLOCK_SIZE */
+            if (litSize > BLOCKSIZE) return ERROR(corruption_detected);
+            memset(dctx->litBuffer, istart[3], litSize);
+            dctx->litPtr = dctx->litBuffer;
+            dctx->litBufSize = BLOCKSIZE;
+            dctx->litSize = litSize;
+            return 4;
+        }
+    }
+}
+
+
+static size_t ZSTD_decodeSeqHeaders(int* nbSeq, const BYTE** dumpsPtr, size_t* dumpsLengthPtr,
+                         FSE_DTable* DTableLL, FSE_DTable* DTableML, FSE_DTable* DTableOffb,
+                         const void* src, size_t srcSize)
+{
+    const BYTE* const istart = (const BYTE* const)src;
+    const BYTE* ip = istart;
+    const BYTE* const iend = istart + srcSize;
+    U32 LLtype, Offtype, MLtype;
+    U32 LLlog, Offlog, MLlog;
+    size_t dumpsLength;
+
+    /* check */
+    if (srcSize < 5) return ERROR(srcSize_wrong);
+
+    /* SeqHead */
+    *nbSeq = MEM_readLE16(ip); ip+=2;
+    LLtype  = *ip >> 6;
+    Offtype = (*ip >> 4) & 3;
+    MLtype  = (*ip >> 2) & 3;
+    if (*ip & 2)
+    {
+        dumpsLength  = ip[2];
+        dumpsLength += ip[1] << 8;
+        ip += 3;
+    }
+    else
+    {
+        dumpsLength  = ip[1];
+        dumpsLength += (ip[0] & 1) << 8;
+        ip += 2;
+    }
+    *dumpsPtr = ip;
+    ip += dumpsLength;
+    *dumpsLengthPtr = dumpsLength;
+
+    /* check */
+    if (ip > iend-3) return ERROR(srcSize_wrong); /* min : all 3 are "raw", hence no header, but at least xxLog bits per type */
+
+    /* sequences */
+    {
+        S16 norm[MaxML+1];    /* assumption : MaxML >= MaxLL and MaxOff */
+        size_t headerSize;
+
+        /* Build DTables */
+        switch(LLtype)
+        {
+        U32 max;
+        case bt_rle :
+            LLlog = 0;
+            FSE_buildDTable_rle(DTableLL, *ip++); break;
+        case bt_raw :
+            LLlog = LLbits;
+            FSE_buildDTable_raw(DTableLL, LLbits); break;
+        default :
+            max = MaxLL;
+            headerSize = FSE_readNCount(norm, &max, &LLlog, ip, iend-ip);
+            if (FSE_isError(headerSize)) return ERROR(GENERIC);
+            if (LLlog > LLFSELog) return ERROR(corruption_detected);
+            ip += headerSize;
+            FSE_buildDTable(DTableLL, norm, max, LLlog);
+        }
+
+        switch(Offtype)
+        {
+        U32 max;
+        case bt_rle :
+            Offlog = 0;
+            if (ip > iend-2) return ERROR(srcSize_wrong);   /* min : "raw", hence no header, but at least xxLog bits */
+            FSE_buildDTable_rle(DTableOffb, *ip++ & MaxOff); /* if *ip > MaxOff, data is corrupted */
+            break;
+        case bt_raw :
+            Offlog = Offbits;
+            FSE_buildDTable_raw(DTableOffb, Offbits); break;
+        default :
+            max = MaxOff;
+            headerSize = FSE_readNCount(norm, &max, &Offlog, ip, iend-ip);
+            if (FSE_isError(headerSize)) return ERROR(GENERIC);
+            if (Offlog > OffFSELog) return ERROR(corruption_detected);
+            ip += headerSize;
+            FSE_buildDTable(DTableOffb, norm, max, Offlog);
+        }
+
+        switch(MLtype)
+        {
+        U32 max;
+        case bt_rle :
+            MLlog = 0;
+            if (ip > iend-2) return ERROR(srcSize_wrong); /* min : "raw", hence no header, but at least xxLog bits */
+            FSE_buildDTable_rle(DTableML, *ip++); break;
+        case bt_raw :
+            MLlog = MLbits;
+            FSE_buildDTable_raw(DTableML, MLbits); break;
+        default :
+            max = MaxML;
+            headerSize = FSE_readNCount(norm, &max, &MLlog, ip, iend-ip);
+            if (FSE_isError(headerSize)) return ERROR(GENERIC);
+            if (MLlog > MLFSELog) return ERROR(corruption_detected);
+            ip += headerSize;
+            FSE_buildDTable(DTableML, norm, max, MLlog);
+        }
+    }
+
+    return ip-istart;
+}
+
+
+typedef struct {
+    size_t litLength;
+    size_t offset;
+    size_t matchLength;
+} seq_t;
+
+typedef struct {
+    BIT_DStream_t DStream;
+    FSE_DState_t stateLL;
+    FSE_DState_t stateOffb;
+    FSE_DState_t stateML;
+    size_t prevOffset;
+    const BYTE* dumps;
+    const BYTE* dumpsEnd;
+} seqState_t;
+
+
+static void ZSTD_decodeSequence(seq_t* seq, seqState_t* seqState)
+{
+    size_t litLength;
+    size_t prevOffset;
+    size_t offset;
+    size_t matchLength;
+    const BYTE* dumps = seqState->dumps;
+    const BYTE* const de = seqState->dumpsEnd;
+
+    /* Literal length */
+    litLength = FSE_decodeSymbol(&(seqState->stateLL), &(seqState->DStream));
+    prevOffset = litLength ? seq->offset : seqState->prevOffset;
+    seqState->prevOffset = seq->offset;
+    if (litLength == MaxLL)
+    {
+        U32 add = *dumps++;
+        if (add < 255) litLength += add;
+        else
+        {
+            litLength = MEM_readLE32(dumps) & 0xFFFFFF;  /* no pb : dumps is always followed by seq tables > 1 byte */
+            dumps += 3;
+        }
+        if (dumps >= de) dumps = de-1;   /* late correction, to avoid read overflow (data is now corrupted anyway) */
+    }
+
+    /* Offset */
+    {
+        static const size_t offsetPrefix[MaxOff+1] = {  /* note : size_t faster than U32 */
+                1 /*fake*/, 1, 2, 4, 8, 16, 32, 64, 128, 256,
+                512, 1024, 2048, 4096, 8192, 16384, 32768, 65536, 131072, 262144,
+                524288, 1048576, 2097152, 4194304, 8388608, 16777216, 33554432, /*fake*/ 1, 1, 1, 1, 1 };
+        U32 offsetCode, nbBits;
+        offsetCode = FSE_decodeSymbol(&(seqState->stateOffb), &(seqState->DStream));   /* <= maxOff, by table construction */
+        if (MEM_32bits()) BIT_reloadDStream(&(seqState->DStream));
+        nbBits = offsetCode - 1;
+        if (offsetCode==0) nbBits = 0;   /* cmove */
+        offset = offsetPrefix[offsetCode] + BIT_readBits(&(seqState->DStream), nbBits);
+        if (MEM_32bits()) BIT_reloadDStream(&(seqState->DStream));
+        if (offsetCode==0) offset = prevOffset;   /* cmove */
+    }
+
+    /* MatchLength */
+    matchLength = FSE_decodeSymbol(&(seqState->stateML), &(seqState->DStream));
+    if (matchLength == MaxML)
+    {
+        U32 add = *dumps++;
+        if (add < 255) matchLength += add;
+        else
+        {
+            matchLength = MEM_readLE32(dumps) & 0xFFFFFF;  /* no pb : dumps is always followed by seq tables > 1 byte */
+            dumps += 3;
+        }
+        if (dumps >= de) dumps = de-1;   /* late correction, to avoid read overflow (data is now corrupted anyway) */
+    }
+    matchLength += MINMATCH;
+
+    /* save result */
+    seq->litLength = litLength;
+    seq->offset = offset;
+    seq->matchLength = matchLength;
+    seqState->dumps = dumps;
+}
+
+
+static size_t ZSTD_execSequence(BYTE* op,
+                                seq_t sequence,
+                                const BYTE** litPtr, const BYTE* const litLimit,
+                                BYTE* const base, BYTE* const oend)
+{
+    static const int dec32table[] = {0, 1, 2, 1, 4, 4, 4, 4};   /* added */
+    static const int dec64table[] = {8, 8, 8, 7, 8, 9,10,11};   /* substracted */
+    const BYTE* const ostart = op;
+    BYTE* const oLitEnd = op + sequence.litLength;
+    BYTE* const oMatchEnd = op + sequence.litLength + sequence.matchLength;   /* risk : address space overflow (32-bits) */
+    BYTE* const oend_8 = oend-8;
+    const BYTE* const litEnd = *litPtr + sequence.litLength;
+
+    /* checks */
+    if (oLitEnd > oend_8) return ERROR(dstSize_tooSmall);   /* last match must start at a minimum distance of 8 from oend */
+    if (oMatchEnd > oend) return ERROR(dstSize_tooSmall);   /* overwrite beyond dst buffer */
+    if (litEnd > litLimit-8) return ERROR(corruption_detected);   /* overRead beyond lit buffer */
+
+    /* copy Literals */
+    ZSTD_wildcopy(op, *litPtr, sequence.litLength);   /* note : oLitEnd <= oend-8 : no risk of overwrite beyond oend */
+    op = oLitEnd;
+    *litPtr = litEnd;   /* update for next sequence */
+
+    /* copy Match */
+    {
+        const BYTE* match = op - sequence.offset;
+
+        /* check */
+        if (sequence.offset > (size_t)op) return ERROR(corruption_detected);   /* address space overflow test (this test seems kept by clang optimizer) */
+        //if (match > op) return ERROR(corruption_detected);   /* address space overflow test (is clang optimizer removing this test ?) */
+        if (match < base) return ERROR(corruption_detected);
+
+        /* close range match, overlap */
+        if (sequence.offset < 8)
+        {
+            const int dec64 = dec64table[sequence.offset];
+            op[0] = match[0];
+            op[1] = match[1];
+            op[2] = match[2];
+            op[3] = match[3];
+            match += dec32table[sequence.offset];
+            ZSTD_copy4(op+4, match);
+            match -= dec64;
+        }
+        else
+        {
+            ZSTD_copy8(op, match);
+        }
+        op += 8; match += 8;
+
+        if (oMatchEnd > oend-12)
+        {
+            if (op < oend_8)
+            {
+                ZSTD_wildcopy(op, match, oend_8 - op);
+                match += oend_8 - op;
+                op = oend_8;
+            }
+            while (op < oMatchEnd) *op++ = *match++;
+        }
+        else
+        {
+            ZSTD_wildcopy(op, match, sequence.matchLength-8);   /* works even if matchLength < 8 */
+        }
+    }
+
+    return oMatchEnd - ostart;
+}
+
+static size_t ZSTD_decompressSequences(
+                               void* ctx,
+                               void* dst, size_t maxDstSize,
+                         const void* seqStart, size_t seqSize)
+{
+    ZSTD_DCtx* dctx = (ZSTD_DCtx*)ctx;
+    const BYTE* ip = (const BYTE*)seqStart;
+    const BYTE* const iend = ip + seqSize;
+    BYTE* const ostart = (BYTE* const)dst;
+    BYTE* op = ostart;
+    BYTE* const oend = ostart + maxDstSize;
+    size_t errorCode, dumpsLength;
+    const BYTE* litPtr = dctx->litPtr;
+    const BYTE* const litMax = litPtr + dctx->litBufSize;
+    const BYTE* const litEnd = litPtr + dctx->litSize;
+    int nbSeq;
+    const BYTE* dumps;
+    U32* DTableLL = dctx->LLTable;
+    U32* DTableML = dctx->MLTable;
+    U32* DTableOffb = dctx->OffTable;
+    BYTE* const base = (BYTE*) (dctx->base);
+
+    /* Build Decoding Tables */
+    errorCode = ZSTD_decodeSeqHeaders(&nbSeq, &dumps, &dumpsLength,
+                                      DTableLL, DTableML, DTableOffb,
+                                      ip, iend-ip);
+    if (ZSTD_isError(errorCode)) return errorCode;
+    ip += errorCode;
+
+    /* Regen sequences */
+    {
+        seq_t sequence;
+        seqState_t seqState;
+
+        memset(&sequence, 0, sizeof(sequence));
+        seqState.dumps = dumps;
+        seqState.dumpsEnd = dumps + dumpsLength;
+        seqState.prevOffset = 1;
+        errorCode = BIT_initDStream(&(seqState.DStream), ip, iend-ip);
+        if (ERR_isError(errorCode)) return ERROR(corruption_detected);
+        FSE_initDState(&(seqState.stateLL), &(seqState.DStream), DTableLL);
+        FSE_initDState(&(seqState.stateOffb), &(seqState.DStream), DTableOffb);
+        FSE_initDState(&(seqState.stateML), &(seqState.DStream), DTableML);
+
+        for ( ; (BIT_reloadDStream(&(seqState.DStream)) <= BIT_DStream_completed) && (nbSeq>0) ; )
+        {
+            size_t oneSeqSize;
+            nbSeq--;
+            ZSTD_decodeSequence(&sequence, &seqState);
+            oneSeqSize = ZSTD_execSequence(op, sequence, &litPtr, litMax, base, oend);
+            if (ZSTD_isError(oneSeqSize)) return oneSeqSize;
+            op += oneSeqSize;
+        }
+
+        /* check if reached exact end */
+        if ( !BIT_endOfDStream(&(seqState.DStream)) ) return ERROR(corruption_detected);   /* requested too much : data is corrupted */
+        if (nbSeq<0) return ERROR(corruption_detected);   /* requested too many sequences : data is corrupted */
+
+        /* last literal segment */
+        {
+            size_t lastLLSize = litEnd - litPtr;
+            if (litPtr > litEnd) return ERROR(corruption_detected);
+            if (op+lastLLSize > oend) return ERROR(dstSize_tooSmall);
+            if (op != litPtr) memmove(op, litPtr, lastLLSize);
+            op += lastLLSize;
+        }
+    }
+
+    return op-ostart;
+}
+
+
+static size_t ZSTD_decompressBlock(
+                            void* ctx,
+                            void* dst, size_t maxDstSize,
+                      const void* src, size_t srcSize)
+{
+    /* blockType == blockCompressed */
+    const BYTE* ip = (const BYTE*)src;
+
+    /* Decode literals sub-block */
+    size_t litCSize = ZSTD_decodeLiteralsBlock(ctx, src, srcSize);
+    if (ZSTD_isError(litCSize)) return litCSize;
+    ip += litCSize;
+    srcSize -= litCSize;
+
+    return ZSTD_decompressSequences(ctx, dst, maxDstSize, ip, srcSize);
+}
+
+
+static size_t ZSTD_decompressDCtx(void* ctx, void* dst, size_t maxDstSize, const void* src, size_t srcSize)
+{
+    const BYTE* ip = (const BYTE*)src;
+    const BYTE* iend = ip + srcSize;
+    BYTE* const ostart = (BYTE* const)dst;
+    BYTE* op = ostart;
+    BYTE* const oend = ostart + maxDstSize;
+    size_t remainingSize = srcSize;
+    U32 magicNumber;
+    blockProperties_t blockProperties;
+
+    /* Frame Header */
+    if (srcSize < ZSTD_frameHeaderSize+ZSTD_blockHeaderSize) return ERROR(srcSize_wrong);
+    magicNumber = MEM_readLE32(src);
+    if (magicNumber != ZSTD_magicNumber) return ERROR(prefix_unknown);
+    ip += ZSTD_frameHeaderSize; remainingSize -= ZSTD_frameHeaderSize;
+
+    /* Loop on each block */
+    while (1)
+    {
+        size_t decodedSize=0;
+        size_t cBlockSize = ZSTD_getcBlockSize(ip, iend-ip, &blockProperties);
+        if (ZSTD_isError(cBlockSize)) return cBlockSize;
+
+        ip += ZSTD_blockHeaderSize;
+        remainingSize -= ZSTD_blockHeaderSize;
+        if (cBlockSize > remainingSize) return ERROR(srcSize_wrong);
+
+        switch(blockProperties.blockType)
+        {
+        case bt_compressed:
+            decodedSize = ZSTD_decompressBlock(ctx, op, oend-op, ip, cBlockSize);
+            break;
+        case bt_raw :
+            decodedSize = ZSTD_copyUncompressedBlock(op, oend-op, ip, cBlockSize);
+            break;
+        case bt_rle :
+            return ERROR(GENERIC);   /* not yet supported */
+            break;
+        case bt_end :
+            /* end of frame */
+            if (remainingSize) return ERROR(srcSize_wrong);
+            break;
+        default:
+            return ERROR(GENERIC);   /* impossible */
+        }
+        if (cBlockSize == 0) break;   /* bt_end */
+
+        if (ZSTD_isError(decodedSize)) return decodedSize;
+        op += decodedSize;
+        ip += cBlockSize;
+        remainingSize -= cBlockSize;
+    }
+
+    return op-ostart;
+}
+
+static size_t ZSTD_decompress(void* dst, size_t maxDstSize, const void* src, size_t srcSize)
+{
+    ZSTD_DCtx ctx;
+    ctx.base = dst;
+    return ZSTD_decompressDCtx(&ctx, dst, maxDstSize, src, srcSize);
+}
+
+
+/*******************************
+*  Streaming Decompression API
+*******************************/
+
+static size_t ZSTD_resetDCtx(ZSTD_DCtx* dctx)
+{
+    dctx->expected = ZSTD_frameHeaderSize;
+    dctx->phase = 0;
+    dctx->previousDstEnd = NULL;
+    dctx->base = NULL;
+    return 0;
+}
+
+static ZSTD_DCtx* ZSTD_createDCtx(void)
+{
+    ZSTD_DCtx* dctx = (ZSTD_DCtx*)malloc(sizeof(ZSTD_DCtx));
+    if (dctx==NULL) return NULL;
+    ZSTD_resetDCtx(dctx);
+    return dctx;
+}
+
+static size_t ZSTD_freeDCtx(ZSTD_DCtx* dctx)
+{
+    free(dctx);
+    return 0;
+}
+
+static size_t ZSTD_nextSrcSizeToDecompress(ZSTD_DCtx* dctx)
+{
+    return dctx->expected;
+}
+
+static size_t ZSTD_decompressContinue(ZSTD_DCtx* ctx, void* dst, size_t maxDstSize, const void* src, size_t srcSize)
+{
+    /* Sanity check */
+    if (srcSize != ctx->expected) return ERROR(srcSize_wrong);
+    if (dst != ctx->previousDstEnd)  /* not contiguous */
+        ctx->base = dst;
+
+    /* Decompress : frame header */
+    if (ctx->phase == 0)
+    {
+        /* Check frame magic header */
+        U32 magicNumber = MEM_readLE32(src);
+        if (magicNumber != ZSTD_magicNumber) return ERROR(prefix_unknown);
+        ctx->phase = 1;
+        ctx->expected = ZSTD_blockHeaderSize;
+        return 0;
+    }
+
+    /* Decompress : block header */
+    if (ctx->phase == 1)
+    {
+        blockProperties_t bp;
+        size_t blockSize = ZSTD_getcBlockSize(src, ZSTD_blockHeaderSize, &bp);
+        if (ZSTD_isError(blockSize)) return blockSize;
+        if (bp.blockType == bt_end)
+        {
+            ctx->expected = 0;
+            ctx->phase = 0;
+        }
+        else
+        {
+            ctx->expected = blockSize;
+            ctx->bType = bp.blockType;
+            ctx->phase = 2;
+        }
+
+        return 0;
+    }
+
+    /* Decompress : block content */
+    {
+        size_t rSize;
+        switch(ctx->bType)
+        {
+        case bt_compressed:
+            rSize = ZSTD_decompressBlock(ctx, dst, maxDstSize, src, srcSize);
+            break;
+        case bt_raw :
+            rSize = ZSTD_copyUncompressedBlock(dst, maxDstSize, src, srcSize);
+            break;
+        case bt_rle :
+            return ERROR(GENERIC);   /* not yet handled */
+            break;
+        case bt_end :   /* should never happen (filtered at phase 1) */
+            rSize = 0;
+            break;
+        default:
+            return ERROR(GENERIC);
+        }
+        ctx->phase = 1;
+        ctx->expected = ZSTD_blockHeaderSize;
+        ctx->previousDstEnd = (void*)( ((char*)dst) + rSize);
+        return rSize;
+    }
+
+}
+
+
+/* wrapper layer */
+
+unsigned ZSTDv02_isError(size_t code)
+{
+	return ZSTD_isError(code);
+}
+
+size_t ZSTDv02_decompress( void* dst, size_t maxOriginalSize,
+                     const void* src, size_t compressedSize)
+{
+	return ZSTD_decompress(dst, maxOriginalSize, src, compressedSize);
+}
+
+ZSTDv02_Dctx* ZSTDv02_createDCtx(void)
+{
+	return (ZSTDv02_Dctx*)ZSTD_createDCtx();
+}
+
+size_t ZSTDv02_freeDCtx(ZSTDv02_Dctx* dctx)
+{
+	return ZSTD_freeDCtx((ZSTD_DCtx*)dctx);
+}
+
+size_t ZSTDv02_resetDCtx(ZSTDv02_Dctx* dctx)
+{
+	return ZSTD_resetDCtx((ZSTD_DCtx*)dctx);
+}
+
+size_t ZSTDv02_nextSrcSizeToDecompress(ZSTDv02_Dctx* dctx)
+{
+	return ZSTD_nextSrcSizeToDecompress((ZSTD_DCtx*)dctx);
+}
+
+size_t ZSTDv02_decompressContinue(ZSTDv02_Dctx* dctx, void* dst, size_t maxDstSize, const void* src, size_t srcSize)
+{
+	return ZSTD_decompressContinue((ZSTD_DCtx*)dctx, dst, maxDstSize, src, srcSize);
+}
diff --git a/lib/legacy/zstd_v02.h b/lib/legacy/zstd_v02.h
new file mode 100644
index 0000000..1087b77
--- /dev/null
+++ b/lib/legacy/zstd_v02.h
@@ -0,0 +1,99 @@
+/*
+    zstd_v02 - decoder for 0.2 format
+    Header File
+    Copyright (C) 2015, Yann Collet.
+
+    BSD 2-Clause License (http://www.opensource.org/licenses/bsd-license.php)
+
+    Redistribution and use in source and binary forms, with or without
+    modification, are permitted provided that the following conditions are
+    met:
+    * Redistributions of source code must retain the above copyright
+    notice, this list of conditions and the following disclaimer.
+    * Redistributions in binary form must reproduce the above
+    copyright notice, this list of conditions and the following disclaimer
+    in the documentation and/or other materials provided with the
+    distribution.
+    THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+    "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+    LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+    A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+    OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+    SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+    LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+    DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+    THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+    (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+    OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+    You can contact the author at :
+    - zstd source repository : https://github.com/Cyan4973/zstd
+    - ztsd public forum : https://groups.google.com/forum/#!forum/lz4c
+*/
+#pragma once
+
+#if defined (__cplusplus)
+extern "C" {
+#endif
+
+/* *************************************
+*  Includes
+***************************************/
+#include <stddef.h>   /* size_t */
+
+
+/* *************************************
+*  Simple one-step function
+***************************************/
+/**
+ZSTDv02_decompress() : decompress ZSTD frames compliant with v0.2.x format
+    compressedSize : is the exact source size
+    maxOriginalSize : is the size of the 'dst' buffer, which must be already allocated.
+                      It must be equal or larger than originalSize, otherwise decompression will fail.
+    return : the number of bytes decompressed into destination buffer (originalSize)
+             or an errorCode if it fails (which can be tested using ZSTDv01_isError())
+*/
+size_t ZSTDv02_decompress( void* dst, size_t maxOriginalSize,
+                     const void* src, size_t compressedSize);
+
+/**
+ZSTDv02_isError() : tells if the result of ZSTDv02_decompress() is an error
+*/
+unsigned ZSTDv02_isError(size_t code);
+
+
+/* *************************************
+*  Advanced functions
+***************************************/
+typedef struct ZSTDv02_Dctx_s ZSTDv02_Dctx;
+ZSTDv02_Dctx* ZSTDv02_createDCtx(void);
+size_t ZSTDv02_freeDCtx(ZSTDv02_Dctx* dctx);
+
+size_t ZSTDv02_decompressDCtx(void* ctx,
+                              void* dst, size_t maxOriginalSize,
+                        const void* src, size_t compressedSize);
+
+/* *************************************
+*  Streaming functions
+***************************************/
+size_t ZSTDv02_resetDCtx(ZSTDv02_Dctx* dctx);
+
+size_t ZSTDv02_nextSrcSizeToDecompress(ZSTDv02_Dctx* dctx);
+size_t ZSTDv02_decompressContinue(ZSTDv02_Dctx* dctx, void* dst, size_t maxDstSize, const void* src, size_t srcSize);
+/**
+  Use above functions alternatively.
+  ZSTD_nextSrcSizeToDecompress() tells how much bytes to provide as 'srcSize' to ZSTD_decompressContinue().
+  ZSTD_decompressContinue() will use previous data blocks to improve compression if they are located prior to current block.
+  Result is the number of bytes regenerated within 'dst'.
+  It can be zero, which is not an error; it just means ZSTD_decompressContinue() has decoded some header.
+*/
+
+/* *************************************
+*  Prefix - version detection
+***************************************/
+#define ZSTDv02_magicNumber 0xFD2FB522   /* v0.2 */
+
+
+#if defined (__cplusplus)
+}
+#endif
diff --git a/lib/legacy/zstd_v03.c b/lib/legacy/zstd_v03.c
new file mode 100644
index 0000000..ae67941
--- /dev/null
+++ b/lib/legacy/zstd_v03.c
@@ -0,0 +1,3730 @@
+/* ******************************************************************
+   Error codes and messages
+   Copyright (C) 2013-2015, Yann Collet
+
+   BSD 2-Clause License (http://www.opensource.org/licenses/bsd-license.php)
+
+   Redistribution and use in source and binary forms, with or without
+   modification, are permitted provided that the following conditions are
+   met:
+
+       * Redistributions of source code must retain the above copyright
+   notice, this list of conditions and the following disclaimer.
+       * Redistributions in binary form must reproduce the above
+   copyright notice, this list of conditions and the following disclaimer
+   in the documentation and/or other materials provided with the
+   distribution.
+
+   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+   You can contact the author at :
+   - Source repository : https://github.com/Cyan4973/FiniteStateEntropy
+   - Public forum : https://groups.google.com/forum/#!forum/lz4c
+****************************************************************** */
+#ifndef ERROR_H_MODULE
+#define ERROR_H_MODULE
+
+#if defined (__cplusplus)
+extern "C" {
+#endif
+
+#include <stddef.h>    /* size_t, ptrdiff_t */
+#include "zstd_v03.h"
+
+/******************************************
+*  Compiler-specific
+******************************************/
+#if defined (__cplusplus) || (defined (__STDC_VERSION__) && (__STDC_VERSION__ >= 199901L) /* C99 */)
+#  define ERR_STATIC static inline
+#elif defined(_MSC_VER)
+#  define ERR_STATIC static __inline
+#elif defined(__GNUC__)
+#  define ERR_STATIC static __attribute__((unused))
+#else
+#  define ERR_STATIC static  /* this version may generate warnings for unused static functions; disable the relevant warning */
+#endif
+
+
+/******************************************
+*  Error Management
+******************************************/
+#define PREFIX(name) ZSTD_error_##name
+
+#define ERROR(name) (size_t)-PREFIX(name)
+
+#define ERROR_LIST(ITEM) \
+        ITEM(PREFIX(No_Error)) ITEM(PREFIX(GENERIC)) \
+        ITEM(PREFIX(memory_allocation)) \
+        ITEM(PREFIX(dstSize_tooSmall)) ITEM(PREFIX(srcSize_wrong)) \
+        ITEM(PREFIX(prefix_unknown)) ITEM(PREFIX(corruption_detected)) \
+        ITEM(PREFIX(tableLog_tooLarge)) ITEM(PREFIX(maxSymbolValue_tooLarge)) ITEM(PREFIX(maxSymbolValue_tooSmall)) \
+        ITEM(PREFIX(maxCode))
+
+#define ERROR_GENERATE_ENUM(ENUM) ENUM,
+typedef enum { ERROR_LIST(ERROR_GENERATE_ENUM) } ERR_codes;  /* enum is exposed, to detect & handle specific errors; compare function result to -enum value */
+
+#define ERROR_CONVERTTOSTRING(STRING) #STRING,
+#define ERROR_GENERATE_STRING(EXPR) ERROR_CONVERTTOSTRING(EXPR)
+
+ERR_STATIC unsigned ERR_isError(size_t code) { return (code > ERROR(maxCode)); }
+
+
+#if defined (__cplusplus)
+}
+#endif
+
+#endif /* ERROR_H_MODULE */
+
+
+/* ******************************************************************
+   mem.h
+   low-level memory access routines
+   Copyright (C) 2013-2015, Yann Collet.
+
+   BSD 2-Clause License (http://www.opensource.org/licenses/bsd-license.php)
+
+   Redistribution and use in source and binary forms, with or without
+   modification, are permitted provided that the following conditions are
+   met:
+
+       * Redistributions of source code must retain the above copyright
+   notice, this list of conditions and the following disclaimer.
+       * Redistributions in binary form must reproduce the above
+   copyright notice, this list of conditions and the following disclaimer
+   in the documentation and/or other materials provided with the
+   distribution.
+
+   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+    You can contact the author at :
+    - FSE source repository : https://github.com/Cyan4973/FiniteStateEntropy
+    - Public forum : https://groups.google.com/forum/#!forum/lz4c
+****************************************************************** */
+#ifndef MEM_H_MODULE
+#define MEM_H_MODULE
+
+#if defined (__cplusplus)
+extern "C" {
+#endif
+
+/******************************************
+*  Includes
+******************************************/
+#include <stddef.h>    /* size_t, ptrdiff_t */
+#include <string.h>    /* memcpy */
+
+
+/******************************************
+*  Compiler-specific
+******************************************/
+#if defined(__GNUC__)
+#  define MEM_STATIC static __attribute__((unused))
+#elif defined (__cplusplus) || (defined (__STDC_VERSION__) && (__STDC_VERSION__ >= 199901L) /* C99 */)
+#  define MEM_STATIC static inline
+#elif defined(_MSC_VER)
+#  define MEM_STATIC static __inline
+#else
+#  define MEM_STATIC static  /* this version may generate warnings for unused static functions; disable the relevant warning */
+#endif
+
+
+/****************************************************************
+*  Basic Types
+*****************************************************************/
+#if defined (__cplusplus) || (defined (__STDC_VERSION__) && (__STDC_VERSION__ >= 199901L) /* C99 */)
+# include <stdint.h>
+  typedef  uint8_t BYTE;
+  typedef uint16_t U16;
+  typedef  int16_t S16;
+  typedef uint32_t U32;
+  typedef  int32_t S32;
+  typedef uint64_t U64;
+  typedef  int64_t S64;
+#else
+  typedef unsigned char       BYTE;
+  typedef unsigned short      U16;
+  typedef   signed short      S16;
+  typedef unsigned int        U32;
+  typedef   signed int        S32;
+  typedef unsigned long long  U64;
+  typedef   signed long long  S64;
+#endif
+
+
+/****************************************************************
+*  Memory I/O
+*****************************************************************/
+/* MEM_FORCE_MEMORY_ACCESS
+ * By default, access to unaligned memory is controlled by `memcpy()`, which is safe and portable.
+ * Unfortunately, on some target/compiler combinations, the generated assembly is sub-optimal.
+ * The below switch allow to select different access method for improved performance.
+ * Method 0 (default) : use `memcpy()`. Safe and portable.
+ * Method 1 : `__packed` statement. It depends on compiler extension (ie, not portable).
+ *            This method is safe if your compiler supports it, and *generally* as fast or faster than `memcpy`.
+ * Method 2 : direct access. This method is portable but violate C standard.
+ *            It can generate buggy code on targets generating assembly depending on alignment.
+ *            But in some circumstances, it's the only known way to get the most performance (ie GCC + ARMv6)
+ * See http://fastcompression.blogspot.fr/2015/08/accessing-unaligned-memory.html for details.
+ * Prefer these methods in priority order (0 > 1 > 2)
+ */
+#ifndef MEM_FORCE_MEMORY_ACCESS   /* can be defined externally, on command line for example */
+#  if defined(__GNUC__) && ( defined(__ARM_ARCH_6__) || defined(__ARM_ARCH_6J__) || defined(__ARM_ARCH_6K__) || defined(__ARM_ARCH_6Z__) || defined(__ARM_ARCH_6ZK__) || defined(__ARM_ARCH_6T2__) )
+#    define MEM_FORCE_MEMORY_ACCESS 2
+#  elif defined(__INTEL_COMPILER) || \
+  (defined(__GNUC__) && ( defined(__ARM_ARCH_7__) || defined(__ARM_ARCH_7A__) || defined(__ARM_ARCH_7R__) || defined(__ARM_ARCH_7M__) || defined(__ARM_ARCH_7S__) ))
+#    define MEM_FORCE_MEMORY_ACCESS 1
+#  endif
+#endif
+
+MEM_STATIC unsigned MEM_32bits(void) { return sizeof(void*)==4; }
+MEM_STATIC unsigned MEM_64bits(void) { return sizeof(void*)==8; }
+
+MEM_STATIC unsigned MEM_isLittleEndian(void)
+{
+    const union { U32 u; BYTE c[4]; } one = { 1 };   /* don't use static : performance detrimental  */
+    return one.c[0];
+}
+
+#if defined(MEM_FORCE_MEMORY_ACCESS) && (MEM_FORCE_MEMORY_ACCESS==2)
+
+/* violates C standard on structure alignment.
+Only use if no other choice to achieve best performance on target platform */
+MEM_STATIC U16 MEM_read16(const void* memPtr) { return *(const U16*) memPtr; }
+MEM_STATIC U32 MEM_read32(const void* memPtr) { return *(const U32*) memPtr; }
+MEM_STATIC U64 MEM_read64(const void* memPtr) { return *(const U64*) memPtr; }
+
+MEM_STATIC void MEM_write16(void* memPtr, U16 value) { *(U16*)memPtr = value; }
+MEM_STATIC void MEM_write32(void* memPtr, U32 value) { *(U32*)memPtr = value; }
+MEM_STATIC void MEM_write64(void* memPtr, U64 value) { *(U64*)memPtr = value; }
+
+#elif defined(MEM_FORCE_MEMORY_ACCESS) && (MEM_FORCE_MEMORY_ACCESS==1)
+
+/* __pack instructions are safer, but compiler specific, hence potentially problematic for some compilers */
+/* currently only defined for gcc and icc */
+typedef union { U16 u16; U32 u32; U64 u64; } __attribute__((packed)) unalign;
+
+MEM_STATIC U16 MEM_read16(const void* ptr) { return ((const unalign*)ptr)->u16; }
+MEM_STATIC U32 MEM_read32(const void* ptr) { return ((const unalign*)ptr)->u32; }
+MEM_STATIC U64 MEM_read64(const void* ptr) { return ((const unalign*)ptr)->u64; }
+
+MEM_STATIC void MEM_write16(void* memPtr, U16 value) { ((unalign*)memPtr)->u16 = value; }
+MEM_STATIC void MEM_write32(void* memPtr, U32 value) { ((unalign*)memPtr)->u32 = value; }
+MEM_STATIC void MEM_write64(void* memPtr, U64 value) { ((unalign*)memPtr)->u64 = value; }
+
+#else
+
+/* default method, safe and standard.
+   can sometimes prove slower */
+
+MEM_STATIC U16 MEM_read16(const void* memPtr)
+{
+    U16 val; memcpy(&val, memPtr, sizeof(val)); return val;
+}
+
+MEM_STATIC U32 MEM_read32(const void* memPtr)
+{
+    U32 val; memcpy(&val, memPtr, sizeof(val)); return val;
+}
+
+MEM_STATIC U64 MEM_read64(const void* memPtr)
+{
+    U64 val; memcpy(&val, memPtr, sizeof(val)); return val;
+}
+
+MEM_STATIC void MEM_write16(void* memPtr, U16 value)
+{
+    memcpy(memPtr, &value, sizeof(value));
+}
+
+MEM_STATIC void MEM_write32(void* memPtr, U32 value)
+{
+    memcpy(memPtr, &value, sizeof(value));
+}
+
+MEM_STATIC void MEM_write64(void* memPtr, U64 value)
+{
+    memcpy(memPtr, &value, sizeof(value));
+}
+
+#endif // MEM_FORCE_MEMORY_ACCESS
+
+
+MEM_STATIC U16 MEM_readLE16(const void* memPtr)
+{
+    if (MEM_isLittleEndian())
+        return MEM_read16(memPtr);
+    else
+    {
+        const BYTE* p = (const BYTE*)memPtr;
+        return (U16)(p[0] + (p[1]<<8));
+    }
+}
+
+MEM_STATIC void MEM_writeLE16(void* memPtr, U16 val)
+{
+    if (MEM_isLittleEndian())
+    {
+        MEM_write16(memPtr, val);
+    }
+    else
+    {
+        BYTE* p = (BYTE*)memPtr;
+        p[0] = (BYTE)val;
+        p[1] = (BYTE)(val>>8);
+    }
+}
+
+MEM_STATIC U32 MEM_readLE32(const void* memPtr)
+{
+    if (MEM_isLittleEndian())
+        return MEM_read32(memPtr);
+    else
+    {
+        const BYTE* p = (const BYTE*)memPtr;
+        return (U32)((U32)p[0] + ((U32)p[1]<<8) + ((U32)p[2]<<16) + ((U32)p[3]<<24));
+    }
+}
+
+MEM_STATIC void MEM_writeLE32(void* memPtr, U32 val32)
+{
+    if (MEM_isLittleEndian())
+    {
+        MEM_write32(memPtr, val32);
+    }
+    else
+    {
+        BYTE* p = (BYTE*)memPtr;
+        p[0] = (BYTE)val32;
+        p[1] = (BYTE)(val32>>8);
+        p[2] = (BYTE)(val32>>16);
+        p[3] = (BYTE)(val32>>24);
+    }
+}
+
+MEM_STATIC U64 MEM_readLE64(const void* memPtr)
+{
+    if (MEM_isLittleEndian())
+        return MEM_read64(memPtr);
+    else
+    {
+        const BYTE* p = (const BYTE*)memPtr;
+        return (U64)((U64)p[0] + ((U64)p[1]<<8) + ((U64)p[2]<<16) + ((U64)p[3]<<24)
+                     + ((U64)p[4]<<32) + ((U64)p[5]<<40) + ((U64)p[6]<<48) + ((U64)p[7]<<56));
+    }
+}
+
+MEM_STATIC void MEM_writeLE64(void* memPtr, U64 val64)
+{
+    if (MEM_isLittleEndian())
+    {
+        MEM_write64(memPtr, val64);
+    }
+    else
+    {
+        BYTE* p = (BYTE*)memPtr;
+        p[0] = (BYTE)val64;
+        p[1] = (BYTE)(val64>>8);
+        p[2] = (BYTE)(val64>>16);
+        p[3] = (BYTE)(val64>>24);
+        p[4] = (BYTE)(val64>>32);
+        p[5] = (BYTE)(val64>>40);
+        p[6] = (BYTE)(val64>>48);
+        p[7] = (BYTE)(val64>>56);
+    }
+}
+
+MEM_STATIC size_t MEM_readLEST(const void* memPtr)
+{
+    if (MEM_32bits())
+        return (size_t)MEM_readLE32(memPtr);
+    else
+        return (size_t)MEM_readLE64(memPtr);
+}
+
+MEM_STATIC void MEM_writeLEST(void* memPtr, size_t val)
+{
+    if (MEM_32bits())
+        MEM_writeLE32(memPtr, (U32)val);
+    else
+        MEM_writeLE64(memPtr, (U64)val);
+}
+
+#if defined (__cplusplus)
+}
+#endif
+
+#endif /* MEM_H_MODULE */
+
+
+/* ******************************************************************
+   bitstream
+   Part of NewGen Entropy library
+   header file (to include)
+   Copyright (C) 2013-2015, Yann Collet.
+
+   BSD 2-Clause License (http://www.opensource.org/licenses/bsd-license.php)
+
+   Redistribution and use in source and binary forms, with or without
+   modification, are permitted provided that the following conditions are
+   met:
+
+       * Redistributions of source code must retain the above copyright
+   notice, this list of conditions and the following disclaimer.
+       * Redistributions in binary form must reproduce the above
+   copyright notice, this list of conditions and the following disclaimer
+   in the documentation and/or other materials provided with the
+   distribution.
+
+   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+   You can contact the author at :
+   - Source repository : https://github.com/Cyan4973/FiniteStateEntropy
+   - Public forum : https://groups.google.com/forum/#!forum/lz4c
+****************************************************************** */
+#ifndef BITSTREAM_H_MODULE
+#define BITSTREAM_H_MODULE
+
+#if defined (__cplusplus)
+extern "C" {
+#endif
+
+
+/*
+*  This API consists of small unitary functions, which highly benefit from being inlined.
+*  Since link-time-optimization is not available for all compilers,
+*  these functions are defined into a .h to be included.
+*/
+
+
+/**********************************************
+*  bitStream decompression API (read backward)
+**********************************************/
+typedef struct
+{
+    size_t   bitContainer;
+    unsigned bitsConsumed;
+    const char* ptr;
+    const char* start;
+} BIT_DStream_t;
+
+typedef enum { BIT_DStream_unfinished = 0,
+               BIT_DStream_endOfBuffer = 1,
+               BIT_DStream_completed = 2,
+               BIT_DStream_overflow = 3 } BIT_DStream_status;  /* result of BIT_reloadDStream() */
+               /* 1,2,4,8 would be better for bitmap combinations, but slows down performance a bit ... :( */
+
+MEM_STATIC size_t   BIT_initDStream(BIT_DStream_t* bitD, const void* srcBuffer, size_t srcSize);
+MEM_STATIC size_t   BIT_readBits(BIT_DStream_t* bitD, unsigned nbBits);
+MEM_STATIC BIT_DStream_status BIT_reloadDStream(BIT_DStream_t* bitD);
+MEM_STATIC unsigned BIT_endOfDStream(const BIT_DStream_t* bitD);
+
+
+/*
+* Start by invoking BIT_initDStream().
+* A chunk of the bitStream is then stored into a local register.
+* Local register size is 64-bits on 64-bits systems, 32-bits on 32-bits systems (size_t).
+* You can then retrieve bitFields stored into the local register, **in reverse order**.
+* Local register is manually filled from memory by the BIT_reloadDStream() method.
+* A reload guarantee a minimum of ((8*sizeof(size_t))-7) bits when its result is BIT_DStream_unfinished.
+* Otherwise, it can be less than that, so proceed accordingly.
+* Checking if DStream has reached its end can be performed with BIT_endOfDStream()
+*/
+
+
+/******************************************
+*  unsafe API
+******************************************/
+MEM_STATIC size_t BIT_readBitsFast(BIT_DStream_t* bitD, unsigned nbBits);
+/* faster, but works only if nbBits >= 1 */
+
+
+
+/****************************************************************
+*  Helper functions
+****************************************************************/
+MEM_STATIC unsigned BIT_highbit32 (register U32 val)
+{
+#   if defined(_MSC_VER)   /* Visual */
+    unsigned long r=0;
+    _BitScanReverse ( &r, val );
+    return (unsigned) r;
+#   elif defined(__GNUC__) && (__GNUC__ >= 3)   /* Use GCC Intrinsic */
+    return 31 - __builtin_clz (val);
+#   else   /* Software version */
+    static const unsigned DeBruijnClz[32] = { 0, 9, 1, 10, 13, 21, 2, 29, 11, 14, 16, 18, 22, 25, 3, 30, 8, 12, 20, 28, 15, 17, 24, 7, 19, 27, 23, 6, 26, 5, 4, 31 };
+    U32 v = val;
+    unsigned r;
+    v |= v >> 1;
+    v |= v >> 2;
+    v |= v >> 4;
+    v |= v >> 8;
+    v |= v >> 16;
+    r = DeBruijnClz[ (U32) (v * 0x07C4ACDDU) >> 27];
+    return r;
+#   endif
+}
+
+
+
+/**********************************************************
+* bitStream decoding
+**********************************************************/
+
+/*!BIT_initDStream
+*  Initialize a BIT_DStream_t.
+*  @bitD : a pointer to an already allocated BIT_DStream_t structure
+*  @srcBuffer must point at the beginning of a bitStream
+*  @srcSize must be the exact size of the bitStream
+*  @result : size of stream (== srcSize) or an errorCode if a problem is detected
+*/
+MEM_STATIC size_t BIT_initDStream(BIT_DStream_t* bitD, const void* srcBuffer, size_t srcSize)
+{
+    if (srcSize < 1) { memset(bitD, 0, sizeof(*bitD)); return ERROR(srcSize_wrong); }
+
+    if (srcSize >=  sizeof(size_t))   /* normal case */
+    {
+        U32 contain32;
+        bitD->start = (const char*)srcBuffer;
+        bitD->ptr   = (const char*)srcBuffer + srcSize - sizeof(size_t);
+        bitD->bitContainer = MEM_readLEST(bitD->ptr);
+        contain32 = ((const BYTE*)srcBuffer)[srcSize-1];
+        if (contain32 == 0) return ERROR(GENERIC);   /* endMark not present */
+        bitD->bitsConsumed = 8 - BIT_highbit32(contain32);
+    }
+    else
+    {
+        U32 contain32;
+        bitD->start = (const char*)srcBuffer;
+        bitD->ptr   = bitD->start;
+        bitD->bitContainer = *(const BYTE*)(bitD->start);
+        switch(srcSize)
+        {
+            case 7: bitD->bitContainer += (size_t)(((const BYTE*)(bitD->start))[6]) << (sizeof(size_t)*8 - 16);
+            case 6: bitD->bitContainer += (size_t)(((const BYTE*)(bitD->start))[5]) << (sizeof(size_t)*8 - 24);
+            case 5: bitD->bitContainer += (size_t)(((const BYTE*)(bitD->start))[4]) << (sizeof(size_t)*8 - 32);
+            case 4: bitD->bitContainer += (size_t)(((const BYTE*)(bitD->start))[3]) << 24;
+            case 3: bitD->bitContainer += (size_t)(((const BYTE*)(bitD->start))[2]) << 16;
+            case 2: bitD->bitContainer += (size_t)(((const BYTE*)(bitD->start))[1]) <<  8;
+            default:;
+        }
+        contain32 = ((const BYTE*)srcBuffer)[srcSize-1];
+        if (contain32 == 0) return ERROR(GENERIC);   /* endMark not present */
+        bitD->bitsConsumed = 8 - BIT_highbit32(contain32);
+        bitD->bitsConsumed += (U32)(sizeof(size_t) - srcSize)*8;
+    }
+
+    return srcSize;
+}
+
+/*!BIT_lookBits
+ * Provides next n bits from local register
+ * local register is not modified (bits are still present for next read/look)
+ * On 32-bits, maxNbBits==25
+ * On 64-bits, maxNbBits==57
+ * @return : value extracted
+ */
+MEM_STATIC size_t BIT_lookBits(BIT_DStream_t* bitD, U32 nbBits)
+{
+    const U32 bitMask = sizeof(bitD->bitContainer)*8 - 1;
+    return ((bitD->bitContainer << (bitD->bitsConsumed & bitMask)) >> 1) >> ((bitMask-nbBits) & bitMask);
+}
+
+/*! BIT_lookBitsFast :
+*   unsafe version; only works only if nbBits >= 1 */
+MEM_STATIC size_t BIT_lookBitsFast(BIT_DStream_t* bitD, U32 nbBits)
+{
+    const U32 bitMask = sizeof(bitD->bitContainer)*8 - 1;
+    return (bitD->bitContainer << (bitD->bitsConsumed & bitMask)) >> (((bitMask+1)-nbBits) & bitMask);
+}
+
+MEM_STATIC void BIT_skipBits(BIT_DStream_t* bitD, U32 nbBits)
+{
+    bitD->bitsConsumed += nbBits;
+}
+
+/*!BIT_readBits
+ * Read next n bits from local register.
+ * pay attention to not read more than nbBits contained into local register.
+ * @return : extracted value.
+ */
+MEM_STATIC size_t BIT_readBits(BIT_DStream_t* bitD, U32 nbBits)
+{
+    size_t value = BIT_lookBits(bitD, nbBits);
+    BIT_skipBits(bitD, nbBits);
+    return value;
+}
+
+/*!BIT_readBitsFast :
+*  unsafe version; only works only if nbBits >= 1 */
+MEM_STATIC size_t BIT_readBitsFast(BIT_DStream_t* bitD, U32 nbBits)
+{
+    size_t value = BIT_lookBitsFast(bitD, nbBits);
+    BIT_skipBits(bitD, nbBits);
+    return value;
+}
+
+MEM_STATIC BIT_DStream_status BIT_reloadDStream(BIT_DStream_t* bitD)
+{
+	if (bitD->bitsConsumed > (sizeof(bitD->bitContainer)*8))  /* should never happen */
+		return BIT_DStream_overflow;
+
+    if (bitD->ptr >= bitD->start + sizeof(bitD->bitContainer))
+    {
+        bitD->ptr -= bitD->bitsConsumed >> 3;
+        bitD->bitsConsumed &= 7;
+        bitD->bitContainer = MEM_readLEST(bitD->ptr);
+        return BIT_DStream_unfinished;
+    }
+    if (bitD->ptr == bitD->start)
+    {
+        if (bitD->bitsConsumed < sizeof(bitD->bitContainer)*8) return BIT_DStream_endOfBuffer;
+        return BIT_DStream_completed;
+    }
+    {
+        U32 nbBytes = bitD->bitsConsumed >> 3;
+        BIT_DStream_status result = BIT_DStream_unfinished;
+        if (bitD->ptr - nbBytes < bitD->start)
+        {
+            nbBytes = (U32)(bitD->ptr - bitD->start);  /* ptr > start */
+            result = BIT_DStream_endOfBuffer;
+        }
+        bitD->ptr -= nbBytes;
+        bitD->bitsConsumed -= nbBytes*8;
+        bitD->bitContainer = MEM_readLEST(bitD->ptr);   /* reminder : srcSize > sizeof(bitD) */
+        return result;
+    }
+}
+
+/*! BIT_endOfDStream
+*   @return Tells if DStream has reached its exact end
+*/
+MEM_STATIC unsigned BIT_endOfDStream(const BIT_DStream_t* DStream)
+{
+    return ((DStream->ptr == DStream->start) && (DStream->bitsConsumed == sizeof(DStream->bitContainer)*8));
+}
+
+#if defined (__cplusplus)
+}
+#endif
+
+#endif /* BITSTREAM_H_MODULE */
+/* ******************************************************************
+   Error codes and messages
+   Copyright (C) 2013-2015, Yann Collet
+
+   BSD 2-Clause License (http://www.opensource.org/licenses/bsd-license.php)
+
+   Redistribution and use in source and binary forms, with or without
+   modification, are permitted provided that the following conditions are
+   met:
+
+       * Redistributions of source code must retain the above copyright
+   notice, this list of conditions and the following disclaimer.
+       * Redistributions in binary form must reproduce the above
+   copyright notice, this list of conditions and the following disclaimer
+   in the documentation and/or other materials provided with the
+   distribution.
+
+   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+   You can contact the author at :
+   - Source repository : https://github.com/Cyan4973/FiniteStateEntropy
+   - Public forum : https://groups.google.com/forum/#!forum/lz4c
+****************************************************************** */
+#ifndef ERROR_H_MODULE
+#define ERROR_H_MODULE
+
+#if defined (__cplusplus)
+extern "C" {
+#endif
+
+
+/******************************************
+*  Compiler-specific
+******************************************/
+#if defined (__cplusplus) || (defined (__STDC_VERSION__) && (__STDC_VERSION__ >= 199901L) /* C99 */)
+#  define ERR_STATIC static inline
+#elif defined(_MSC_VER)
+#  define ERR_STATIC static __inline
+#elif defined(__GNUC__)
+#  define ERR_STATIC static __attribute__((unused))
+#else
+#  define ERR_STATIC static  /* this version may generate warnings for unused static functions; disable the relevant warning */
+#endif
+
+
+/******************************************
+*  Error Management
+******************************************/
+#define PREFIX(name) ZSTD_error_##name
+
+#define ERROR(name) (size_t)-PREFIX(name)
+
+#define ERROR_LIST(ITEM) \
+        ITEM(PREFIX(No_Error)) ITEM(PREFIX(GENERIC)) \
+        ITEM(PREFIX(dstSize_tooSmall)) ITEM(PREFIX(srcSize_wrong)) \
+        ITEM(PREFIX(prefix_unknown)) ITEM(PREFIX(corruption_detected)) \
+        ITEM(PREFIX(tableLog_tooLarge)) ITEM(PREFIX(maxSymbolValue_tooLarge)) ITEM(PREFIX(maxSymbolValue_tooSmall)) \
+        ITEM(PREFIX(maxCode))
+
+#define ERROR_GENERATE_ENUM(ENUM) ENUM,
+typedef enum { ERROR_LIST(ERROR_GENERATE_ENUM) } ERR_codes;  /* enum is exposed, to detect & handle specific errors; compare function result to -enum value */
+
+#define ERROR_CONVERTTOSTRING(STRING) #STRING,
+#define ERROR_GENERATE_STRING(EXPR) ERROR_CONVERTTOSTRING(EXPR)
+static const char* ERR_strings[] = { ERROR_LIST(ERROR_GENERATE_STRING) };
+
+ERR_STATIC unsigned ERR_isError(size_t code) { return (code > ERROR(maxCode)); }
+
+ERR_STATIC const char* ERR_getErrorName(size_t code)
+{
+    static const char* codeError = "Unspecified error code";
+    if (ERR_isError(code)) return ERR_strings[-(int)(code)];
+    return codeError;
+}
+
+
+#if defined (__cplusplus)
+}
+#endif
+
+#endif /* ERROR_H_MODULE */
+/*
+Constructor and Destructor of type FSE_CTable
+    Note that its size depends on 'tableLog' and 'maxSymbolValue' */
+typedef unsigned FSE_CTable;   /* don't allocate that. It's just a way to be more restrictive than void* */
+typedef unsigned FSE_DTable;   /* don't allocate that. It's just a way to be more restrictive than void* */
+
+
+/* ******************************************************************
+   FSE : Finite State Entropy coder
+   header file for static linking (only)
+   Copyright (C) 2013-2015, Yann Collet
+
+   BSD 2-Clause License (http://www.opensource.org/licenses/bsd-license.php)
+
+   Redistribution and use in source and binary forms, with or without
+   modification, are permitted provided that the following conditions are
+   met:
+
+       * Redistributions of source code must retain the above copyright
+   notice, this list of conditions and the following disclaimer.
+       * Redistributions in binary form must reproduce the above
+   copyright notice, this list of conditions and the following disclaimer
+   in the documentation and/or other materials provided with the
+   distribution.
+
+   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+   You can contact the author at :
+   - Source repository : https://github.com/Cyan4973/FiniteStateEntropy
+   - Public forum : https://groups.google.com/forum/#!forum/lz4c
+****************************************************************** */
+#if defined (__cplusplus)
+extern "C" {
+#endif
+
+
+/******************************************
+*  Static allocation
+******************************************/
+/* FSE buffer bounds */
+#define FSE_NCOUNTBOUND 512
+#define FSE_BLOCKBOUND(size) (size + (size>>7))
+#define FSE_COMPRESSBOUND(size) (FSE_NCOUNTBOUND + FSE_BLOCKBOUND(size))   /* Macro version, useful for static allocation */
+
+/* You can statically allocate FSE CTable/DTable as a table of unsigned using below macro */
+#define FSE_CTABLE_SIZE_U32(maxTableLog, maxSymbolValue)   (1 + (1<<(maxTableLog-1)) + ((maxSymbolValue+1)*2))
+#define FSE_DTABLE_SIZE_U32(maxTableLog)                   (1 + (1<<maxTableLog))
+
+
+/******************************************
+*  FSE advanced API
+******************************************/
+static size_t FSE_buildDTable_raw (FSE_DTable* dt, unsigned nbBits);
+/* build a fake FSE_DTable, designed to read an uncompressed bitstream where each symbol uses nbBits */
+
+static size_t FSE_buildDTable_rle (FSE_DTable* dt, unsigned char symbolValue);
+/* build a fake FSE_DTable, designed to always generate the same symbolValue */
+
+
+/******************************************
+*  FSE symbol decompression API
+******************************************/
+typedef struct
+{
+    size_t      state;
+    const void* table;   /* precise table may vary, depending on U16 */
+} FSE_DState_t;
+
+
+static void     FSE_initDState(FSE_DState_t* DStatePtr, BIT_DStream_t* bitD, const FSE_DTable* dt);
+
+static unsigned char FSE_decodeSymbol(FSE_DState_t* DStatePtr, BIT_DStream_t* bitD);
+
+static unsigned FSE_endOfDState(const FSE_DState_t* DStatePtr);
+
+/*
+Let's now decompose FSE_decompress_usingDTable() into its unitary components.
+You will decode FSE-encoded symbols from the bitStream,
+and also any other bitFields you put in, **in reverse order**.
+
+You will need a few variables to track your bitStream. They are :
+
+BIT_DStream_t DStream;    // Stream context
+FSE_DState_t  DState;     // State context. Multiple ones are possible
+FSE_DTable*   DTablePtr;  // Decoding table, provided by FSE_buildDTable()
+
+The first thing to do is to init the bitStream.
+    errorCode = BIT_initDStream(&DStream, srcBuffer, srcSize);
+
+You should then retrieve your initial state(s)
+(in reverse flushing order if you have several ones) :
+    errorCode = FSE_initDState(&DState, &DStream, DTablePtr);
+
+You can then decode your data, symbol after symbol.
+For information the maximum number of bits read by FSE_decodeSymbol() is 'tableLog'.
+Keep in mind that symbols are decoded in reverse order, like a LIFO stack (last in, first out).
+    unsigned char symbol = FSE_decodeSymbol(&DState, &DStream);
+
+You can retrieve any bitfield you eventually stored into the bitStream (in reverse order)
+Note : maximum allowed nbBits is 25, for 32-bits compatibility
+    size_t bitField = BIT_readBits(&DStream, nbBits);
+
+All above operations only read from local register (which size depends on size_t).
+Refueling the register from memory is manually performed by the reload method.
+    endSignal = FSE_reloadDStream(&DStream);
+
+BIT_reloadDStream() result tells if there is still some more data to read from DStream.
+BIT_DStream_unfinished : there is still some data left into the DStream.
+BIT_DStream_endOfBuffer : Dstream reached end of buffer. Its container may no longer be completely filled.
+BIT_DStream_completed : Dstream reached its exact end, corresponding in general to decompression completed.
+BIT_DStream_tooFar : Dstream went too far. Decompression result is corrupted.
+
+When reaching end of buffer (BIT_DStream_endOfBuffer), progress slowly, notably if you decode multiple symbols per loop,
+to properly detect the exact end of stream.
+After each decoded symbol, check if DStream is fully consumed using this simple test :
+    BIT_reloadDStream(&DStream) >= BIT_DStream_completed
+
+When it's done, verify decompression is fully completed, by checking both DStream and the relevant states.
+Checking if DStream has reached its end is performed by :
+    BIT_endOfDStream(&DStream);
+Check also the states. There might be some symbols left there, if some high probability ones (>50%) are possible.
+    FSE_endOfDState(&DState);
+*/
+
+
+/******************************************
+*  FSE unsafe API
+******************************************/
+static unsigned char FSE_decodeSymbolFast(FSE_DState_t* DStatePtr, BIT_DStream_t* bitD);
+/* faster, but works only if nbBits is always >= 1 (otherwise, result will be corrupted) */
+
+
+/******************************************
+*  Implementation of inline functions
+******************************************/
+
+/* decompression */
+
+typedef struct {
+    U16 tableLog;
+    U16 fastMode;
+} FSE_DTableHeader;   /* sizeof U32 */
+
+typedef struct
+{
+    unsigned short newState;
+    unsigned char  symbol;
+    unsigned char  nbBits;
+} FSE_decode_t;   /* size == U32 */
+
+MEM_STATIC void FSE_initDState(FSE_DState_t* DStatePtr, BIT_DStream_t* bitD, const FSE_DTable* dt)
+{
+    const FSE_DTableHeader* const DTableH = (const FSE_DTableHeader*)dt;
+    DStatePtr->state = BIT_readBits(bitD, DTableH->tableLog);
+    BIT_reloadDStream(bitD);
+    DStatePtr->table = dt + 1;
+}
+
+MEM_STATIC BYTE FSE_decodeSymbol(FSE_DState_t* DStatePtr, BIT_DStream_t* bitD)
+{
+    const FSE_decode_t DInfo = ((const FSE_decode_t*)(DStatePtr->table))[DStatePtr->state];
+    const U32  nbBits = DInfo.nbBits;
+    BYTE symbol = DInfo.symbol;
+    size_t lowBits = BIT_readBits(bitD, nbBits);
+
+    DStatePtr->state = DInfo.newState + lowBits;
+    return symbol;
+}
+
+MEM_STATIC BYTE FSE_decodeSymbolFast(FSE_DState_t* DStatePtr, BIT_DStream_t* bitD)
+{
+    const FSE_decode_t DInfo = ((const FSE_decode_t*)(DStatePtr->table))[DStatePtr->state];
+    const U32 nbBits = DInfo.nbBits;
+    BYTE symbol = DInfo.symbol;
+    size_t lowBits = BIT_readBitsFast(bitD, nbBits);
+
+    DStatePtr->state = DInfo.newState + lowBits;
+    return symbol;
+}
+
+MEM_STATIC unsigned FSE_endOfDState(const FSE_DState_t* DStatePtr)
+{
+    return DStatePtr->state == 0;
+}
+
+
+#if defined (__cplusplus)
+}
+#endif
+/* ******************************************************************
+   Huff0 : Huffman coder, part of New Generation Entropy library
+   header file for static linking (only)
+   Copyright (C) 2013-2015, Yann Collet
+
+   BSD 2-Clause License (http://www.opensource.org/licenses/bsd-license.php)
+
+   Redistribution and use in source and binary forms, with or without
+   modification, are permitted provided that the following conditions are
+   met:
+
+       * Redistributions of source code must retain the above copyright
+   notice, this list of conditions and the following disclaimer.
+       * Redistributions in binary form must reproduce the above
+   copyright notice, this list of conditions and the following disclaimer
+   in the documentation and/or other materials provided with the
+   distribution.
+
+   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+   You can contact the author at :
+   - Source repository : https://github.com/Cyan4973/FiniteStateEntropy
+   - Public forum : https://groups.google.com/forum/#!forum/lz4c
+****************************************************************** */
+
+#if defined (__cplusplus)
+extern "C" {
+#endif
+
+/******************************************
+*  Static allocation macros
+******************************************/
+/* Huff0 buffer bounds */
+#define HUF_CTABLEBOUND 129
+#define HUF_BLOCKBOUND(size) (size + (size>>8) + 8)   /* only true if incompressible pre-filtered with fast heuristic */
+#define HUF_COMPRESSBOUND(size) (HUF_CTABLEBOUND + HUF_BLOCKBOUND(size))   /* Macro version, useful for static allocation */
+
+/* static allocation of Huff0's DTable */
+#define HUF_DTABLE_SIZE(maxTableLog)   (1 + (1<<maxTableLog))  /* nb Cells; use unsigned short for X2, unsigned int for X4 */
+#define HUF_CREATE_STATIC_DTABLEX2(DTable, maxTableLog) \
+        unsigned short DTable[HUF_DTABLE_SIZE(maxTableLog)] = { maxTableLog }
+#define HUF_CREATE_STATIC_DTABLEX4(DTable, maxTableLog) \
+        unsigned int DTable[HUF_DTABLE_SIZE(maxTableLog)] = { maxTableLog }
+#define HUF_CREATE_STATIC_DTABLEX6(DTable, maxTableLog) \
+        unsigned int DTable[HUF_DTABLE_SIZE(maxTableLog) * 3 / 2] = { maxTableLog }
+
+
+/******************************************
+*  Advanced functions
+******************************************/
+static size_t HUF_decompress4X2 (void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize);   /* single-symbol decoder */
+static size_t HUF_decompress4X4 (void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize);   /* double-symbols decoder */
+static size_t HUF_decompress4X6 (void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize);   /* quad-symbols decoder */
+
+
+#if defined (__cplusplus)
+}
+#endif
+
+/*
+    zstd - standard compression library
+    Header File
+    Copyright (C) 2014-2015, Yann Collet.
+
+    BSD 2-Clause License (http://www.opensource.org/licenses/bsd-license.php)
+
+    Redistribution and use in source and binary forms, with or without
+    modification, are permitted provided that the following conditions are
+    met:
+    * Redistributions of source code must retain the above copyright
+    notice, this list of conditions and the following disclaimer.
+    * Redistributions in binary form must reproduce the above
+    copyright notice, this list of conditions and the following disclaimer
+    in the documentation and/or other materials provided with the
+    distribution.
+    THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+    "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+    LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+    A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+    OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+    SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+    LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+    DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+    THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+    (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+    OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+    You can contact the author at :
+    - zstd source repository : https://github.com/Cyan4973/zstd
+    - ztsd public forum : https://groups.google.com/forum/#!forum/lz4c
+*/
+
+#if defined (__cplusplus)
+extern "C" {
+#endif
+
+/* *************************************
+*  Includes
+***************************************/
+#include <stddef.h>   /* size_t */
+
+
+/* *************************************
+*  Version
+***************************************/
+#define ZSTD_VERSION_MAJOR    0    /* for breaking interface changes  */
+#define ZSTD_VERSION_MINOR    2    /* for new (non-breaking) interface capabilities */
+#define ZSTD_VERSION_RELEASE  2    /* for tweaks, bug-fixes, or development */
+#define ZSTD_VERSION_NUMBER  (ZSTD_VERSION_MAJOR *100*100 + ZSTD_VERSION_MINOR *100 + ZSTD_VERSION_RELEASE)
+
+
+/* *************************************
+*  Advanced functions
+***************************************/
+typedef struct ZSTD_CCtx_s ZSTD_CCtx;   /* incomplete type */
+
+#if defined (__cplusplus)
+}
+#endif
+/*
+    zstd - standard compression library
+    Header File for static linking only
+    Copyright (C) 2014-2015, Yann Collet.
+
+    BSD 2-Clause License (http://www.opensource.org/licenses/bsd-license.php)
+
+    Redistribution and use in source and binary forms, with or without
+    modification, are permitted provided that the following conditions are
+    met:
+    * Redistributions of source code must retain the above copyright
+    notice, this list of conditions and the following disclaimer.
+    * Redistributions in binary form must reproduce the above
+    copyright notice, this list of conditions and the following disclaimer
+    in the documentation and/or other materials provided with the
+    distribution.
+    THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+    "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+    LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+    A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+    OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+    SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+    LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+    DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+    THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+    (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+    OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+    You can contact the author at :
+    - zstd source repository : https://github.com/Cyan4973/zstd
+    - ztsd public forum : https://groups.google.com/forum/#!forum/lz4c
+*/
+
+/* The objects defined into this file should be considered experimental.
+ * They are not labelled stable, as their prototype may change in the future.
+ * You can use them for tests, provide feedback, or if you can endure risk of future changes.
+ */
+
+#if defined (__cplusplus)
+extern "C" {
+#endif
+
+/* *************************************
+*  Streaming functions
+***************************************/
+
+typedef struct ZSTD_DCtx_s ZSTD_DCtx;
+
+/*
+  Use above functions alternatively.
+  ZSTD_nextSrcSizeToDecompress() tells how much bytes to provide as 'srcSize' to ZSTD_decompressContinue().
+  ZSTD_decompressContinue() will use previous data blocks to improve compression if they are located prior to current block.
+  Result is the number of bytes regenerated within 'dst'.
+  It can be zero, which is not an error; it just means ZSTD_decompressContinue() has decoded some header.
+*/
+
+/* *************************************
+*  Prefix - version detection
+***************************************/
+#define ZSTD_magicNumber 0xFD2FB523   /* v0.3 */
+
+
+#if defined (__cplusplus)
+}
+#endif
+/* ******************************************************************
+   FSE : Finite State Entropy coder
+   Copyright (C) 2013-2015, Yann Collet.
+
+   BSD 2-Clause License (http://www.opensource.org/licenses/bsd-license.php)
+
+   Redistribution and use in source and binary forms, with or without
+   modification, are permitted provided that the following conditions are
+   met:
+
+       * Redistributions of source code must retain the above copyright
+   notice, this list of conditions and the following disclaimer.
+       * Redistributions in binary form must reproduce the above
+   copyright notice, this list of conditions and the following disclaimer
+   in the documentation and/or other materials provided with the
+   distribution.
+
+   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+    You can contact the author at :
+    - FSE source repository : https://github.com/Cyan4973/FiniteStateEntropy
+    - Public forum : https://groups.google.com/forum/#!forum/lz4c
+****************************************************************** */
+
+#ifndef FSE_COMMONDEFS_ONLY
+
+/****************************************************************
+*  Tuning parameters
+****************************************************************/
+/* MEMORY_USAGE :
+*  Memory usage formula : N->2^N Bytes (examples : 10 -> 1KB; 12 -> 4KB ; 16 -> 64KB; 20 -> 1MB; etc.)
+*  Increasing memory usage improves compression ratio
+*  Reduced memory usage can improve speed, due to cache effect
+*  Recommended max value is 14, for 16KB, which nicely fits into Intel x86 L1 cache */
+#define FSE_MAX_MEMORY_USAGE 14
+#define FSE_DEFAULT_MEMORY_USAGE 13
+
+/* FSE_MAX_SYMBOL_VALUE :
+*  Maximum symbol value authorized.
+*  Required for proper stack allocation */
+#define FSE_MAX_SYMBOL_VALUE 255
+
+
+/****************************************************************
+*  template functions type & suffix
+****************************************************************/
+#define FSE_FUNCTION_TYPE BYTE
+#define FSE_FUNCTION_EXTENSION
+
+
+/****************************************************************
+*  Byte symbol type
+****************************************************************/
+#endif   /* !FSE_COMMONDEFS_ONLY */
+
+
+/****************************************************************
+*  Compiler specifics
+****************************************************************/
+#ifdef _MSC_VER    /* Visual Studio */
+#  define FORCE_INLINE static __forceinline
+#  include <intrin.h>                    /* For Visual 2005 */
+#  pragma warning(disable : 4127)        /* disable: C4127: conditional expression is constant */
+#  pragma warning(disable : 4214)        /* disable: C4214: non-int bitfields */
+#else
+#  ifdef __GNUC__
+#    define GCC_VERSION (__GNUC__ * 100 + __GNUC_MINOR__)
+#    define FORCE_INLINE static inline __attribute__((always_inline))
+#  else
+#    define FORCE_INLINE static inline
+#  endif
+#endif
+
+
+/****************************************************************
+*  Includes
+****************************************************************/
+#include <stdlib.h>     /* malloc, free, qsort */
+#include <string.h>     /* memcpy, memset */
+#include <stdio.h>      /* printf (debug) */
+
+/****************************************************************
+*  Constants
+*****************************************************************/
+#define FSE_MAX_TABLELOG  (FSE_MAX_MEMORY_USAGE-2)
+#define FSE_MAX_TABLESIZE (1U<<FSE_MAX_TABLELOG)
+#define FSE_MAXTABLESIZE_MASK (FSE_MAX_TABLESIZE-1)
+#define FSE_DEFAULT_TABLELOG (FSE_DEFAULT_MEMORY_USAGE-2)
+#define FSE_MIN_TABLELOG 5
+
+#define FSE_TABLELOG_ABSOLUTE_MAX 15
+#if FSE_MAX_TABLELOG > FSE_TABLELOG_ABSOLUTE_MAX
+#error "FSE_MAX_TABLELOG > FSE_TABLELOG_ABSOLUTE_MAX is not supported"
+#endif
+
+
+/****************************************************************
+*  Error Management
+****************************************************************/
+#define FSE_STATIC_ASSERT(c) { enum { FSE_static_assert = 1/(int)(!!(c)) }; }   /* use only *after* variable declarations */
+
+
+/****************************************************************
+*  Complex types
+****************************************************************/
+typedef U32 DTable_max_t[FSE_DTABLE_SIZE_U32(FSE_MAX_TABLELOG)];
+
+
+/****************************************************************
+*  Templates
+****************************************************************/
+/*
+  designed to be included
+  for type-specific functions (template emulation in C)
+  Objective is to write these functions only once, for improved maintenance
+*/
+
+/* safety checks */
+#ifndef FSE_FUNCTION_EXTENSION
+#  error "FSE_FUNCTION_EXTENSION must be defined"
+#endif
+#ifndef FSE_FUNCTION_TYPE
+#  error "FSE_FUNCTION_TYPE must be defined"
+#endif
+
+/* Function names */
+#define FSE_CAT(X,Y) X##Y
+#define FSE_FUNCTION_NAME(X,Y) FSE_CAT(X,Y)
+#define FSE_TYPE_NAME(X,Y) FSE_CAT(X,Y)
+
+
+/* Function templates */
+
+#define FSE_DECODE_TYPE FSE_TYPE_NAME(FSE_decode_t, FSE_FUNCTION_EXTENSION)
+
+static U32 FSE_tableStep(U32 tableSize) { return (tableSize>>1) + (tableSize>>3) + 3; }
+
+static size_t FSE_FUNCTION_NAME(FSE_buildDTable, FSE_FUNCTION_EXTENSION)
+(FSE_DTable* dt, const short* normalizedCounter, unsigned maxSymbolValue, unsigned tableLog)
+{
+    FSE_DTableHeader* const DTableH = (FSE_DTableHeader*)dt;
+    FSE_DECODE_TYPE* const tableDecode = (FSE_DECODE_TYPE*) (dt+1);   /* because dt is unsigned, 32-bits aligned on 32-bits */
+    const U32 tableSize = 1 << tableLog;
+    const U32 tableMask = tableSize-1;
+    const U32 step = FSE_tableStep(tableSize);
+    U16 symbolNext[FSE_MAX_SYMBOL_VALUE+1];
+    U32 position = 0;
+    U32 highThreshold = tableSize-1;
+    const S16 largeLimit= (S16)(1 << (tableLog-1));
+    U32 noLarge = 1;
+    U32 s;
+
+    /* Sanity Checks */
+    if (maxSymbolValue > FSE_MAX_SYMBOL_VALUE) return ERROR(maxSymbolValue_tooLarge);
+    if (tableLog > FSE_MAX_TABLELOG) return ERROR(tableLog_tooLarge);
+
+    /* Init, lay down lowprob symbols */
+    DTableH[0].tableLog = (U16)tableLog;
+    for (s=0; s<=maxSymbolValue; s++)
+    {
+        if (normalizedCounter[s]==-1)
+        {
+            tableDecode[highThreshold--].symbol = (FSE_FUNCTION_TYPE)s;
+            symbolNext[s] = 1;
+        }
+        else
+        {
+            if (normalizedCounter[s] >= largeLimit) noLarge=0;
+            symbolNext[s] = normalizedCounter[s];
+        }
+    }
+
+    /* Spread symbols */
+    for (s=0; s<=maxSymbolValue; s++)
+    {
+        int i;
+        for (i=0; i<normalizedCounter[s]; i++)
+        {
+            tableDecode[position].symbol = (FSE_FUNCTION_TYPE)s;
+            position = (position + step) & tableMask;
+            while (position > highThreshold) position = (position + step) & tableMask;   /* lowprob area */
+        }
+    }
+
+    if (position!=0) return ERROR(GENERIC);   /* position must reach all cells once, otherwise normalizedCounter is incorrect */
+
+    /* Build Decoding table */
+    {
+        U32 i;
+        for (i=0; i<tableSize; i++)
+        {
+            FSE_FUNCTION_TYPE symbol = (FSE_FUNCTION_TYPE)(tableDecode[i].symbol);
+            U16 nextState = symbolNext[symbol]++;
+            tableDecode[i].nbBits = (BYTE) (tableLog - BIT_highbit32 ((U32)nextState) );
+            tableDecode[i].newState = (U16) ( (nextState << tableDecode[i].nbBits) - tableSize);
+        }
+    }
+
+    DTableH->fastMode = (U16)noLarge;
+    return 0;
+}
+
+
+#ifndef FSE_COMMONDEFS_ONLY
+/******************************************
+*  FSE helper functions
+******************************************/
+static unsigned FSE_isError(size_t code) { return ERR_isError(code); }
+
+
+/****************************************************************
+*  FSE NCount encoding-decoding
+****************************************************************/
+static short FSE_abs(short a)
+{
+    return a<0 ? -a : a;
+}
+
+static size_t FSE_readNCount (short* normalizedCounter, unsigned* maxSVPtr, unsigned* tableLogPtr,
+                 const void* headerBuffer, size_t hbSize)
+{
+    const BYTE* const istart = (const BYTE*) headerBuffer;
+    const BYTE* const iend = istart + hbSize;
+    const BYTE* ip = istart;
+    int nbBits;
+    int remaining;
+    int threshold;
+    U32 bitStream;
+    int bitCount;
+    unsigned charnum = 0;
+    int previous0 = 0;
+
+    if (hbSize < 4) return ERROR(srcSize_wrong);
+    bitStream = MEM_readLE32(ip);
+    nbBits = (bitStream & 0xF) + FSE_MIN_TABLELOG;   /* extract tableLog */
+    if (nbBits > FSE_TABLELOG_ABSOLUTE_MAX) return ERROR(tableLog_tooLarge);
+    bitStream >>= 4;
+    bitCount = 4;
+    *tableLogPtr = nbBits;
+    remaining = (1<<nbBits)+1;
+    threshold = 1<<nbBits;
+    nbBits++;
+
+    while ((remaining>1) && (charnum<=*maxSVPtr))
+    {
+        if (previous0)
+        {
+            unsigned n0 = charnum;
+            while ((bitStream & 0xFFFF) == 0xFFFF)
+            {
+                n0+=24;
+                if (ip < iend-5)
+                {
+                    ip+=2;
+                    bitStream = MEM_readLE32(ip) >> bitCount;
+                }
+                else
+                {
+                    bitStream >>= 16;
+                    bitCount+=16;
+                }
+            }
+            while ((bitStream & 3) == 3)
+            {
+                n0+=3;
+                bitStream>>=2;
+                bitCount+=2;
+            }
+            n0 += bitStream & 3;
+            bitCount += 2;
+            if (n0 > *maxSVPtr) return ERROR(maxSymbolValue_tooSmall);
+            while (charnum < n0) normalizedCounter[charnum++] = 0;
+            if ((ip <= iend-7) || (ip + (bitCount>>3) <= iend-4))
+            {
+                ip += bitCount>>3;
+                bitCount &= 7;
+                bitStream = MEM_readLE32(ip) >> bitCount;
+            }
+            else
+                bitStream >>= 2;
+        }
+        {
+            const short max = (short)((2*threshold-1)-remaining);
+            short count;
+
+            if ((bitStream & (threshold-1)) < (U32)max)
+            {
+                count = (short)(bitStream & (threshold-1));
+                bitCount   += nbBits-1;
+            }
+            else
+            {
+                count = (short)(bitStream & (2*threshold-1));
+                if (count >= threshold) count -= max;
+                bitCount   += nbBits;
+            }
+
+            count--;   /* extra accuracy */
+            remaining -= FSE_abs(count);
+            normalizedCounter[charnum++] = count;
+            previous0 = !count;
+            while (remaining < threshold)
+            {
+                nbBits--;
+                threshold >>= 1;
+            }
+
+            {
+                if ((ip <= iend-7) || (ip + (bitCount>>3) <= iend-4))
+                {
+                    ip += bitCount>>3;
+                    bitCount &= 7;
+                }
+                else
+                {
+                    bitCount -= (int)(8 * (iend - 4 - ip));
+					ip = iend - 4;
+				}
+                bitStream = MEM_readLE32(ip) >> (bitCount & 31);
+            }
+        }
+    }
+    if (remaining != 1) return ERROR(GENERIC);
+    *maxSVPtr = charnum-1;
+
+    ip += (bitCount+7)>>3;
+    if ((size_t)(ip-istart) > hbSize) return ERROR(srcSize_wrong);
+    return ip-istart;
+}
+
+
+/*********************************************************
+*  Decompression (Byte symbols)
+*********************************************************/
+static size_t FSE_buildDTable_rle (FSE_DTable* dt, BYTE symbolValue)
+{
+    FSE_DTableHeader* const DTableH = (FSE_DTableHeader*)dt;
+    FSE_decode_t* const cell = (FSE_decode_t*)(dt + 1);   /* because dt is unsigned */
+
+    DTableH->tableLog = 0;
+    DTableH->fastMode = 0;
+
+    cell->newState = 0;
+    cell->symbol = symbolValue;
+    cell->nbBits = 0;
+
+    return 0;
+}
+
+
+static size_t FSE_buildDTable_raw (FSE_DTable* dt, unsigned nbBits)
+{
+    FSE_DTableHeader* const DTableH = (FSE_DTableHeader*)dt;
+    FSE_decode_t* const dinfo = (FSE_decode_t*)(dt + 1);   /* because dt is unsigned */
+    const unsigned tableSize = 1 << nbBits;
+    const unsigned tableMask = tableSize - 1;
+    const unsigned maxSymbolValue = tableMask;
+    unsigned s;
+
+    /* Sanity checks */
+    if (nbBits < 1) return ERROR(GENERIC);         /* min size */
+
+    /* Build Decoding Table */
+    DTableH->tableLog = (U16)nbBits;
+    DTableH->fastMode = 1;
+    for (s=0; s<=maxSymbolValue; s++)
+    {
+        dinfo[s].newState = 0;
+        dinfo[s].symbol = (BYTE)s;
+        dinfo[s].nbBits = (BYTE)nbBits;
+    }
+
+    return 0;
+}
+
+FORCE_INLINE size_t FSE_decompress_usingDTable_generic(
+          void* dst, size_t maxDstSize,
+    const void* cSrc, size_t cSrcSize,
+    const FSE_DTable* dt, const unsigned fast)
+{
+    BYTE* const ostart = (BYTE*) dst;
+    BYTE* op = ostart;
+    BYTE* const omax = op + maxDstSize;
+    BYTE* const olimit = omax-3;
+
+    BIT_DStream_t bitD;
+    FSE_DState_t state1;
+    FSE_DState_t state2;
+    size_t errorCode;
+
+    /* Init */
+    errorCode = BIT_initDStream(&bitD, cSrc, cSrcSize);   /* replaced last arg by maxCompressed Size */
+    if (FSE_isError(errorCode)) return errorCode;
+
+    FSE_initDState(&state1, &bitD, dt);
+    FSE_initDState(&state2, &bitD, dt);
+
+#define FSE_GETSYMBOL(statePtr) fast ? FSE_decodeSymbolFast(statePtr, &bitD) : FSE_decodeSymbol(statePtr, &bitD)
+
+    /* 4 symbols per loop */
+    for ( ; (BIT_reloadDStream(&bitD)==BIT_DStream_unfinished) && (op<olimit) ; op+=4)
+    {
+        op[0] = FSE_GETSYMBOL(&state1);
+
+        if (FSE_MAX_TABLELOG*2+7 > sizeof(bitD.bitContainer)*8)    /* This test must be static */
+            BIT_reloadDStream(&bitD);
+
+        op[1] = FSE_GETSYMBOL(&state2);
+
+        if (FSE_MAX_TABLELOG*4+7 > sizeof(bitD.bitContainer)*8)    /* This test must be static */
+            { if (BIT_reloadDStream(&bitD) > BIT_DStream_unfinished) { op+=2; break; } }
+
+        op[2] = FSE_GETSYMBOL(&state1);
+
+        if (FSE_MAX_TABLELOG*2+7 > sizeof(bitD.bitContainer)*8)    /* This test must be static */
+            BIT_reloadDStream(&bitD);
+
+        op[3] = FSE_GETSYMBOL(&state2);
+    }
+
+    /* tail */
+    /* note : BIT_reloadDStream(&bitD) >= FSE_DStream_partiallyFilled; Ends at exactly BIT_DStream_completed */
+    while (1)
+    {
+        if ( (BIT_reloadDStream(&bitD)>BIT_DStream_completed) || (op==omax) || (BIT_endOfDStream(&bitD) && (fast || FSE_endOfDState(&state1))) )
+            break;
+
+        *op++ = FSE_GETSYMBOL(&state1);
+
+        if ( (BIT_reloadDStream(&bitD)>BIT_DStream_completed) || (op==omax) || (BIT_endOfDStream(&bitD) && (fast || FSE_endOfDState(&state2))) )
+            break;
+
+        *op++ = FSE_GETSYMBOL(&state2);
+    }
+
+    /* end ? */
+    if (BIT_endOfDStream(&bitD) && FSE_endOfDState(&state1) && FSE_endOfDState(&state2))
+        return op-ostart;
+
+    if (op==omax) return ERROR(dstSize_tooSmall);   /* dst buffer is full, but cSrc unfinished */
+
+    return ERROR(corruption_detected);
+}
+
+
+static size_t FSE_decompress_usingDTable(void* dst, size_t originalSize,
+                            const void* cSrc, size_t cSrcSize,
+                            const FSE_DTable* dt)
+{
+    const FSE_DTableHeader* DTableH = (const FSE_DTableHeader*)dt;
+    const U32 fastMode = DTableH->fastMode;
+
+    /* select fast mode (static) */
+    if (fastMode) return FSE_decompress_usingDTable_generic(dst, originalSize, cSrc, cSrcSize, dt, 1);
+    return FSE_decompress_usingDTable_generic(dst, originalSize, cSrc, cSrcSize, dt, 0);
+}
+
+
+static size_t FSE_decompress(void* dst, size_t maxDstSize, const void* cSrc, size_t cSrcSize)
+{
+    const BYTE* const istart = (const BYTE*)cSrc;
+    const BYTE* ip = istart;
+    short counting[FSE_MAX_SYMBOL_VALUE+1];
+    DTable_max_t dt;   /* Static analyzer seems unable to understand this table will be properly initialized later */
+    unsigned tableLog;
+    unsigned maxSymbolValue = FSE_MAX_SYMBOL_VALUE;
+    size_t errorCode;
+
+    if (cSrcSize<2) return ERROR(srcSize_wrong);   /* too small input size */
+
+    /* normal FSE decoding mode */
+    errorCode = FSE_readNCount (counting, &maxSymbolValue, &tableLog, istart, cSrcSize);
+    if (FSE_isError(errorCode)) return errorCode;
+    if (errorCode >= cSrcSize) return ERROR(srcSize_wrong);   /* too small input size */
+    ip += errorCode;
+    cSrcSize -= errorCode;
+
+    errorCode = FSE_buildDTable (dt, counting, maxSymbolValue, tableLog);
+    if (FSE_isError(errorCode)) return errorCode;
+
+    /* always return, even if it is an error code */
+    return FSE_decompress_usingDTable (dst, maxDstSize, ip, cSrcSize, dt);
+}
+
+
+
+#endif   /* FSE_COMMONDEFS_ONLY */
+/* ******************************************************************
+   Huff0 : Huffman coder, part of New Generation Entropy library
+   Copyright (C) 2013-2015, Yann Collet.
+
+   BSD 2-Clause License (http://www.opensource.org/licenses/bsd-license.php)
+
+   Redistribution and use in source and binary forms, with or without
+   modification, are permitted provided that the following conditions are
+   met:
+
+       * Redistributions of source code must retain the above copyright
+   notice, this list of conditions and the following disclaimer.
+       * Redistributions in binary form must reproduce the above
+   copyright notice, this list of conditions and the following disclaimer
+   in the documentation and/or other materials provided with the
+   distribution.
+
+   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+    You can contact the author at :
+    - FSE+Huff0 source repository : https://github.com/Cyan4973/FiniteStateEntropy
+    - Public forum : https://groups.google.com/forum/#!forum/lz4c
+****************************************************************** */
+
+/****************************************************************
+*  Compiler specifics
+****************************************************************/
+#if defined (__cplusplus) || (defined (__STDC_VERSION__) && (__STDC_VERSION__ >= 199901L) /* C99 */)
+/* inline is defined */
+#elif defined(_MSC_VER)
+#  define inline __inline
+#else
+#  define inline /* disable inline */
+#endif
+
+
+#ifdef _MSC_VER    /* Visual Studio */
+#  define FORCE_INLINE static __forceinline
+#  pragma warning(disable : 4127)        /* disable: C4127: conditional expression is constant */
+#else
+#  ifdef __GNUC__
+#    define GCC_VERSION (__GNUC__ * 100 + __GNUC_MINOR__)
+#    define FORCE_INLINE static inline __attribute__((always_inline))
+#  else
+#    define FORCE_INLINE static inline
+#  endif
+#endif
+
+
+/****************************************************************
+*  Includes
+****************************************************************/
+#include <stdlib.h>     /* malloc, free, qsort */
+#include <string.h>     /* memcpy, memset */
+#include <stdio.h>      /* printf (debug) */
+
+/****************************************************************
+*  Error Management
+****************************************************************/
+#define HUF_STATIC_ASSERT(c) { enum { HUF_static_assert = 1/(int)(!!(c)) }; }   /* use only *after* variable declarations */
+
+
+/******************************************
+*  Helper functions
+******************************************/
+static unsigned HUF_isError(size_t code) { return ERR_isError(code); }
+
+#define HUF_ABSOLUTEMAX_TABLELOG  16   /* absolute limit of HUF_MAX_TABLELOG. Beyond that value, code does not work */
+#define HUF_MAX_TABLELOG  12           /* max configured tableLog (for static allocation); can be modified up to HUF_ABSOLUTEMAX_TABLELOG */
+#define HUF_DEFAULT_TABLELOG  HUF_MAX_TABLELOG   /* tableLog by default, when not specified */
+#define HUF_MAX_SYMBOL_VALUE 255
+#if (HUF_MAX_TABLELOG > HUF_ABSOLUTEMAX_TABLELOG)
+#  error "HUF_MAX_TABLELOG is too large !"
+#endif
+
+
+
+/*********************************************************
+*  Huff0 : Huffman block decompression
+*********************************************************/
+typedef struct { BYTE byte; BYTE nbBits; } HUF_DEltX2;   /* single-symbol decoding */
+
+typedef struct { U16 sequence; BYTE nbBits; BYTE length; } HUF_DEltX4;  /* double-symbols decoding */
+
+typedef struct { BYTE symbol; BYTE weight; } sortedSymbol_t;
+
+/*! HUF_readStats
+    Read compact Huffman tree, saved by HUF_writeCTable
+    @huffWeight : destination buffer
+    @return : size read from `src`
+*/
+static size_t HUF_readStats(BYTE* huffWeight, size_t hwSize, U32* rankStats,
+                            U32* nbSymbolsPtr, U32* tableLogPtr,
+                            const void* src, size_t srcSize)
+{
+    U32 weightTotal;
+    U32 tableLog;
+    const BYTE* ip = (const BYTE*) src;
+    size_t iSize = ip[0];
+    size_t oSize;
+    U32 n;
+
+    //memset(huffWeight, 0, hwSize);   /* is not necessary, even though some analyzer complain ... */
+
+    if (iSize >= 128)  /* special header */
+    {
+        if (iSize >= (242))   /* RLE */
+        {
+            static int l[14] = { 1, 2, 3, 4, 7, 8, 15, 16, 31, 32, 63, 64, 127, 128 };
+            oSize = l[iSize-242];
+            memset(huffWeight, 1, hwSize);
+            iSize = 0;
+        }
+        else   /* Incompressible */
+        {
+            oSize = iSize - 127;
+            iSize = ((oSize+1)/2);
+            if (iSize+1 > srcSize) return ERROR(srcSize_wrong);
+            if (oSize >= hwSize) return ERROR(corruption_detected);
+            ip += 1;
+            for (n=0; n<oSize; n+=2)
+            {
+                huffWeight[n]   = ip[n/2] >> 4;
+                huffWeight[n+1] = ip[n/2] & 15;
+            }
+        }
+    }
+    else  /* header compressed with FSE (normal case) */
+    {
+        if (iSize+1 > srcSize) return ERROR(srcSize_wrong);
+        oSize = FSE_decompress(huffWeight, hwSize-1, ip+1, iSize);   /* max (hwSize-1) values decoded, as last one is implied */
+        if (FSE_isError(oSize)) return oSize;
+    }
+
+    /* collect weight stats */
+    memset(rankStats, 0, (HUF_ABSOLUTEMAX_TABLELOG + 1) * sizeof(U32));
+    weightTotal = 0;
+    for (n=0; n<oSize; n++)
+    {
+        if (huffWeight[n] >= HUF_ABSOLUTEMAX_TABLELOG) return ERROR(corruption_detected);
+        rankStats[huffWeight[n]]++;
+        weightTotal += (1 << huffWeight[n]) >> 1;
+    }
+
+    /* get last non-null symbol weight (implied, total must be 2^n) */
+    tableLog = BIT_highbit32(weightTotal) + 1;
+    if (tableLog > HUF_ABSOLUTEMAX_TABLELOG) return ERROR(corruption_detected);
+    {
+        U32 total = 1 << tableLog;
+        U32 rest = total - weightTotal;
+        U32 verif = 1 << BIT_highbit32(rest);
+        U32 lastWeight = BIT_highbit32(rest) + 1;
+        if (verif != rest) return ERROR(corruption_detected);    /* last value must be a clean power of 2 */
+        huffWeight[oSize] = (BYTE)lastWeight;
+        rankStats[lastWeight]++;
+    }
+
+    /* check tree construction validity */
+    if ((rankStats[1] < 2) || (rankStats[1] & 1)) return ERROR(corruption_detected);   /* by construction : at least 2 elts of rank 1, must be even */
+
+    /* results */
+    *nbSymbolsPtr = (U32)(oSize+1);
+    *tableLogPtr = tableLog;
+    return iSize+1;
+}
+
+
+/**************************/
+/* single-symbol decoding */
+/**************************/
+
+static size_t HUF_readDTableX2 (U16* DTable, const void* src, size_t srcSize)
+{
+    BYTE huffWeight[HUF_MAX_SYMBOL_VALUE + 1];
+    U32 rankVal[HUF_ABSOLUTEMAX_TABLELOG + 1];   /* large enough for values from 0 to 16 */
+    U32 tableLog = 0;
+    const BYTE* ip = (const BYTE*) src;
+    size_t iSize = ip[0];
+    U32 nbSymbols = 0;
+    U32 n;
+    U32 nextRankStart;
+    HUF_DEltX2* const dt = (HUF_DEltX2*)(DTable + 1);
+
+    HUF_STATIC_ASSERT(sizeof(HUF_DEltX2) == sizeof(U16));   /* if compilation fails here, assertion is false */
+    //memset(huffWeight, 0, sizeof(huffWeight));   /* is not necessary, even though some analyzer complain ... */
+
+    iSize = HUF_readStats(huffWeight, HUF_MAX_SYMBOL_VALUE + 1, rankVal, &nbSymbols, &tableLog, src, srcSize);
+    if (HUF_isError(iSize)) return iSize;
+
+    /* check result */
+    if (tableLog > DTable[0]) return ERROR(tableLog_tooLarge);   /* DTable is too small */
+    DTable[0] = (U16)tableLog;   /* maybe should separate sizeof DTable, as allocated, from used size of DTable, in case of DTable re-use */
+
+    /* Prepare ranks */
+    nextRankStart = 0;
+    for (n=1; n<=tableLog; n++)
+    {
+        U32 current = nextRankStart;
+        nextRankStart += (rankVal[n] << (n-1));
+        rankVal[n] = current;
+    }
+
+    /* fill DTable */
+    for (n=0; n<nbSymbols; n++)
+    {
+        const U32 w = huffWeight[n];
+        const U32 length = (1 << w) >> 1;
+        U32 i;
+        HUF_DEltX2 D;
+        D.byte = (BYTE)n; D.nbBits = (BYTE)(tableLog + 1 - w);
+        for (i = rankVal[w]; i < rankVal[w] + length; i++)
+            dt[i] = D;
+        rankVal[w] += length;
+    }
+
+    return iSize;
+}
+
+static BYTE HUF_decodeSymbolX2(BIT_DStream_t* Dstream, const HUF_DEltX2* dt, const U32 dtLog)
+{
+        const size_t val = BIT_lookBitsFast(Dstream, dtLog); /* note : dtLog >= 1 */
+        const BYTE c = dt[val].byte;
+        BIT_skipBits(Dstream, dt[val].nbBits);
+        return c;
+}
+
+#define HUF_DECODE_SYMBOLX2_0(ptr, DStreamPtr) \
+    *ptr++ = HUF_decodeSymbolX2(DStreamPtr, dt, dtLog)
+
+#define HUF_DECODE_SYMBOLX2_1(ptr, DStreamPtr) \
+    if (MEM_64bits() || (HUF_MAX_TABLELOG<=12)) \
+        HUF_DECODE_SYMBOLX2_0(ptr, DStreamPtr)
+
+#define HUF_DECODE_SYMBOLX2_2(ptr, DStreamPtr) \
+    if (MEM_64bits()) \
+        HUF_DECODE_SYMBOLX2_0(ptr, DStreamPtr)
+
+static inline size_t HUF_decodeStreamX2(BYTE* p, BIT_DStream_t* const bitDPtr, BYTE* const pEnd, const HUF_DEltX2* const dt, const U32 dtLog)
+{
+    BYTE* const pStart = p;
+
+    /* up to 4 symbols at a time */
+    while ((BIT_reloadDStream(bitDPtr) == BIT_DStream_unfinished) && (p <= pEnd-4))
+    {
+        HUF_DECODE_SYMBOLX2_2(p, bitDPtr);
+        HUF_DECODE_SYMBOLX2_1(p, bitDPtr);
+        HUF_DECODE_SYMBOLX2_2(p, bitDPtr);
+        HUF_DECODE_SYMBOLX2_0(p, bitDPtr);
+    }
+
+    /* closer to the end */
+    while ((BIT_reloadDStream(bitDPtr) == BIT_DStream_unfinished) && (p < pEnd))
+        HUF_DECODE_SYMBOLX2_0(p, bitDPtr);
+
+    /* no more data to retrieve from bitstream, hence no need to reload */
+    while (p < pEnd)
+        HUF_DECODE_SYMBOLX2_0(p, bitDPtr);
+
+    return pEnd-pStart;
+}
+
+
+static size_t HUF_decompress4X2_usingDTable(
+          void* dst,  size_t dstSize,
+    const void* cSrc, size_t cSrcSize,
+    const U16* DTable)
+{
+    if (cSrcSize < 10) return ERROR(corruption_detected);   /* strict minimum : jump table + 1 byte per stream */
+
+    {
+        const BYTE* const istart = (const BYTE*) cSrc;
+        BYTE* const ostart = (BYTE*) dst;
+        BYTE* const oend = ostart + dstSize;
+
+        const HUF_DEltX2* const dt = ((const HUF_DEltX2*)DTable) +1;
+        const U32 dtLog = DTable[0];
+        size_t errorCode;
+
+        /* Init */
+        BIT_DStream_t bitD1;
+        BIT_DStream_t bitD2;
+        BIT_DStream_t bitD3;
+        BIT_DStream_t bitD4;
+        const size_t length1 = MEM_readLE16(istart);
+        const size_t length2 = MEM_readLE16(istart+2);
+        const size_t length3 = MEM_readLE16(istart+4);
+        size_t length4;
+        const BYTE* const istart1 = istart + 6;  /* jumpTable */
+        const BYTE* const istart2 = istart1 + length1;
+        const BYTE* const istart3 = istart2 + length2;
+        const BYTE* const istart4 = istart3 + length3;
+        const size_t segmentSize = (dstSize+3) / 4;
+        BYTE* const opStart2 = ostart + segmentSize;
+        BYTE* const opStart3 = opStart2 + segmentSize;
+        BYTE* const opStart4 = opStart3 + segmentSize;
+        BYTE* op1 = ostart;
+        BYTE* op2 = opStart2;
+        BYTE* op3 = opStart3;
+        BYTE* op4 = opStart4;
+        U32 endSignal;
+
+        length4 = cSrcSize - (length1 + length2 + length3 + 6);
+        if (length4 > cSrcSize) return ERROR(corruption_detected);   /* overflow */
+        errorCode = BIT_initDStream(&bitD1, istart1, length1);
+        if (HUF_isError(errorCode)) return errorCode;
+        errorCode = BIT_initDStream(&bitD2, istart2, length2);
+        if (HUF_isError(errorCode)) return errorCode;
+        errorCode = BIT_initDStream(&bitD3, istart3, length3);
+        if (HUF_isError(errorCode)) return errorCode;
+        errorCode = BIT_initDStream(&bitD4, istart4, length4);
+        if (HUF_isError(errorCode)) return errorCode;
+
+        /* 16-32 symbols per loop (4-8 symbols per stream) */
+        endSignal = BIT_reloadDStream(&bitD1) | BIT_reloadDStream(&bitD2) | BIT_reloadDStream(&bitD3) | BIT_reloadDStream(&bitD4);
+        for ( ; (endSignal==BIT_DStream_unfinished) && (op4<(oend-7)) ; )
+        {
+            HUF_DECODE_SYMBOLX2_2(op1, &bitD1);
+            HUF_DECODE_SYMBOLX2_2(op2, &bitD2);
+            HUF_DECODE_SYMBOLX2_2(op3, &bitD3);
+            HUF_DECODE_SYMBOLX2_2(op4, &bitD4);
+            HUF_DECODE_SYMBOLX2_1(op1, &bitD1);
+            HUF_DECODE_SYMBOLX2_1(op2, &bitD2);
+            HUF_DECODE_SYMBOLX2_1(op3, &bitD3);
+            HUF_DECODE_SYMBOLX2_1(op4, &bitD4);
+            HUF_DECODE_SYMBOLX2_2(op1, &bitD1);
+            HUF_DECODE_SYMBOLX2_2(op2, &bitD2);
+            HUF_DECODE_SYMBOLX2_2(op3, &bitD3);
+            HUF_DECODE_SYMBOLX2_2(op4, &bitD4);
+            HUF_DECODE_SYMBOLX2_0(op1, &bitD1);
+            HUF_DECODE_SYMBOLX2_0(op2, &bitD2);
+            HUF_DECODE_SYMBOLX2_0(op3, &bitD3);
+            HUF_DECODE_SYMBOLX2_0(op4, &bitD4);
+
+            endSignal = BIT_reloadDStream(&bitD1) | BIT_reloadDStream(&bitD2) | BIT_reloadDStream(&bitD3) | BIT_reloadDStream(&bitD4);
+        }
+
+        /* check corruption */
+        if (op1 > opStart2) return ERROR(corruption_detected);
+        if (op2 > opStart3) return ERROR(corruption_detected);
+        if (op3 > opStart4) return ERROR(corruption_detected);
+        /* note : op4 supposed already verified within main loop */
+
+        /* finish bitStreams one by one */
+        HUF_decodeStreamX2(op1, &bitD1, opStart2, dt, dtLog);
+        HUF_decodeStreamX2(op2, &bitD2, opStart3, dt, dtLog);
+        HUF_decodeStreamX2(op3, &bitD3, opStart4, dt, dtLog);
+        HUF_decodeStreamX2(op4, &bitD4, oend,     dt, dtLog);
+
+        /* check */
+        endSignal = BIT_endOfDStream(&bitD1) & BIT_endOfDStream(&bitD2) & BIT_endOfDStream(&bitD3) & BIT_endOfDStream(&bitD4);
+        if (!endSignal) return ERROR(corruption_detected);
+
+        /* decoded size */
+        return dstSize;
+    }
+}
+
+
+static size_t HUF_decompress4X2 (void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize)
+{
+    HUF_CREATE_STATIC_DTABLEX2(DTable, HUF_MAX_TABLELOG);
+    const BYTE* ip = (const BYTE*) cSrc;
+    size_t errorCode;
+
+    errorCode = HUF_readDTableX2 (DTable, cSrc, cSrcSize);
+    if (HUF_isError(errorCode)) return errorCode;
+    if (errorCode >= cSrcSize) return ERROR(srcSize_wrong);
+    ip += errorCode;
+    cSrcSize -= errorCode;
+
+    return HUF_decompress4X2_usingDTable (dst, dstSize, ip, cSrcSize, DTable);
+}
+
+
+/***************************/
+/* double-symbols decoding */
+/***************************/
+
+static void HUF_fillDTableX4Level2(HUF_DEltX4* DTable, U32 sizeLog, const U32 consumed,
+                           const U32* rankValOrigin, const int minWeight,
+                           const sortedSymbol_t* sortedSymbols, const U32 sortedListSize,
+                           U32 nbBitsBaseline, U16 baseSeq)
+{
+    HUF_DEltX4 DElt;
+    U32 rankVal[HUF_ABSOLUTEMAX_TABLELOG + 1];
+    U32 s;
+
+    /* get pre-calculated rankVal */
+    memcpy(rankVal, rankValOrigin, sizeof(rankVal));
+
+    /* fill skipped values */
+    if (minWeight>1)
+    {
+        U32 i, skipSize = rankVal[minWeight];
+        MEM_writeLE16(&(DElt.sequence), baseSeq);
+        DElt.nbBits   = (BYTE)(consumed);
+        DElt.length   = 1;
+        for (i = 0; i < skipSize; i++)
+            DTable[i] = DElt;
+    }
+
+    /* fill DTable */
+    for (s=0; s<sortedListSize; s++)   /* note : sortedSymbols already skipped */
+    {
+        const U32 symbol = sortedSymbols[s].symbol;
+        const U32 weight = sortedSymbols[s].weight;
+        const U32 nbBits = nbBitsBaseline - weight;
+        const U32 length = 1 << (sizeLog-nbBits);
+        const U32 start = rankVal[weight];
+        U32 i = start;
+        const U32 end = start + length;
+
+        MEM_writeLE16(&(DElt.sequence), (U16)(baseSeq + (symbol << 8)));
+        DElt.nbBits = (BYTE)(nbBits + consumed);
+        DElt.length = 2;
+        do { DTable[i++] = DElt; } while (i<end);   /* since length >= 1 */
+
+        rankVal[weight] += length;
+    }
+}
+
+typedef U32 rankVal_t[HUF_ABSOLUTEMAX_TABLELOG][HUF_ABSOLUTEMAX_TABLELOG + 1];
+
+static void HUF_fillDTableX4(HUF_DEltX4* DTable, const U32 targetLog,
+                           const sortedSymbol_t* sortedList, const U32 sortedListSize,
+                           const U32* rankStart, rankVal_t rankValOrigin, const U32 maxWeight,
+                           const U32 nbBitsBaseline)
+{
+    U32 rankVal[HUF_ABSOLUTEMAX_TABLELOG + 1];
+    const int scaleLog = nbBitsBaseline - targetLog;   /* note : targetLog >= srcLog, hence scaleLog <= 1 */
+    const U32 minBits  = nbBitsBaseline - maxWeight;
+    U32 s;
+
+    memcpy(rankVal, rankValOrigin, sizeof(rankVal));
+
+    /* fill DTable */
+    for (s=0; s<sortedListSize; s++)
+    {
+        const U16 symbol = sortedList[s].symbol;
+        const U32 weight = sortedList[s].weight;
+        const U32 nbBits = nbBitsBaseline - weight;
+        const U32 start = rankVal[weight];
+        const U32 length = 1 << (targetLog-nbBits);
+
+        if (targetLog-nbBits >= minBits)   /* enough room for a second symbol */
+        {
+            U32 sortedRank;
+            int minWeight = nbBits + scaleLog;
+            if (minWeight < 1) minWeight = 1;
+            sortedRank = rankStart[minWeight];
+            HUF_fillDTableX4Level2(DTable+start, targetLog-nbBits, nbBits,
+                           rankValOrigin[nbBits], minWeight,
+                           sortedList+sortedRank, sortedListSize-sortedRank,
+                           nbBitsBaseline, symbol);
+        }
+        else
+        {
+            U32 i;
+            const U32 end = start + length;
+            HUF_DEltX4 DElt;
+
+            MEM_writeLE16(&(DElt.sequence), symbol);
+            DElt.nbBits   = (BYTE)(nbBits);
+            DElt.length   = 1;
+            for (i = start; i < end; i++)
+                DTable[i] = DElt;
+        }
+        rankVal[weight] += length;
+    }
+}
+
+static size_t HUF_readDTableX4 (U32* DTable, const void* src, size_t srcSize)
+{
+    BYTE weightList[HUF_MAX_SYMBOL_VALUE + 1];
+    sortedSymbol_t sortedSymbol[HUF_MAX_SYMBOL_VALUE + 1];
+    U32 rankStats[HUF_ABSOLUTEMAX_TABLELOG + 1] = { 0 };
+    U32 rankStart0[HUF_ABSOLUTEMAX_TABLELOG + 2] = { 0 };
+    U32* const rankStart = rankStart0+1;
+    rankVal_t rankVal;
+    U32 tableLog, maxW, sizeOfSort, nbSymbols;
+    const U32 memLog = DTable[0];
+    const BYTE* ip = (const BYTE*) src;
+    size_t iSize = ip[0];
+    HUF_DEltX4* const dt = ((HUF_DEltX4*)DTable) + 1;
+
+    HUF_STATIC_ASSERT(sizeof(HUF_DEltX4) == sizeof(U32));   /* if compilation fails here, assertion is false */
+    if (memLog > HUF_ABSOLUTEMAX_TABLELOG) return ERROR(tableLog_tooLarge);
+    //memset(weightList, 0, sizeof(weightList));   /* is not necessary, even though some analyzer complain ... */
+
+    iSize = HUF_readStats(weightList, HUF_MAX_SYMBOL_VALUE + 1, rankStats, &nbSymbols, &tableLog, src, srcSize);
+    if (HUF_isError(iSize)) return iSize;
+
+    /* check result */
+    if (tableLog > memLog) return ERROR(tableLog_tooLarge);   /* DTable can't fit code depth */
+
+    /* find maxWeight */
+    for (maxW = tableLog; rankStats[maxW]==0; maxW--) {}  /* necessarily finds a solution before 0 */
+
+    /* Get start index of each weight */
+    {
+        U32 w, nextRankStart = 0;
+        for (w=1; w<=maxW; w++)
+        {
+            U32 current = nextRankStart;
+            nextRankStart += rankStats[w];
+            rankStart[w] = current;
+        }
+        rankStart[0] = nextRankStart;   /* put all 0w symbols at the end of sorted list*/
+        sizeOfSort = nextRankStart;
+    }
+
+    /* sort symbols by weight */
+    {
+        U32 s;
+        for (s=0; s<nbSymbols; s++)
+        {
+            U32 w = weightList[s];
+            U32 r = rankStart[w]++;
+            sortedSymbol[r].symbol = (BYTE)s;
+            sortedSymbol[r].weight = (BYTE)w;
+        }
+        rankStart[0] = 0;   /* forget 0w symbols; this is beginning of weight(1) */
+    }
+
+	/* Build rankVal */
+    {
+        const U32 minBits = tableLog+1 - maxW;
+        U32 nextRankVal = 0;
+        U32 w, consumed;
+        const int rescale = (memLog-tableLog) - 1;   /* tableLog <= memLog */
+        U32* rankVal0 = rankVal[0];
+        for (w=1; w<=maxW; w++)
+        {
+            U32 current = nextRankVal;
+            nextRankVal += rankStats[w] << (w+rescale);
+            rankVal0[w] = current;
+        }
+        for (consumed = minBits; consumed <= memLog - minBits; consumed++)
+        {
+            U32* rankValPtr = rankVal[consumed];
+            for (w = 1; w <= maxW; w++)
+            {
+                rankValPtr[w] = rankVal0[w] >> consumed;
+            }
+        }
+    }
+
+    HUF_fillDTableX4(dt, memLog,
+                   sortedSymbol, sizeOfSort,
+                   rankStart0, rankVal, maxW,
+                   tableLog+1);
+
+    return iSize;
+}
+
+
+static U32 HUF_decodeSymbolX4(void* op, BIT_DStream_t* DStream, const HUF_DEltX4* dt, const U32 dtLog)
+{
+    const size_t val = BIT_lookBitsFast(DStream, dtLog);   /* note : dtLog >= 1 */
+    memcpy(op, dt+val, 2);
+    BIT_skipBits(DStream, dt[val].nbBits);
+    return dt[val].length;
+}
+
+static U32 HUF_decodeLastSymbolX4(void* op, BIT_DStream_t* DStream, const HUF_DEltX4* dt, const U32 dtLog)
+{
+    const size_t val = BIT_lookBitsFast(DStream, dtLog);   /* note : dtLog >= 1 */
+    memcpy(op, dt+val, 1);
+    if (dt[val].length==1) BIT_skipBits(DStream, dt[val].nbBits);
+    else
+    {
+        if (DStream->bitsConsumed < (sizeof(DStream->bitContainer)*8))
+        {
+            BIT_skipBits(DStream, dt[val].nbBits);
+            if (DStream->bitsConsumed > (sizeof(DStream->bitContainer)*8))
+                DStream->bitsConsumed = (sizeof(DStream->bitContainer)*8);   /* ugly hack; works only because it's the last symbol. Note : can't easily extract nbBits from just this symbol */
+        }
+    }
+    return 1;
+}
+
+
+#define HUF_DECODE_SYMBOLX4_0(ptr, DStreamPtr) \
+    ptr += HUF_decodeSymbolX4(ptr, DStreamPtr, dt, dtLog)
+
+#define HUF_DECODE_SYMBOLX4_1(ptr, DStreamPtr) \
+    if (MEM_64bits() || (HUF_MAX_TABLELOG<=12)) \
+        ptr += HUF_decodeSymbolX4(ptr, DStreamPtr, dt, dtLog)
+
+#define HUF_DECODE_SYMBOLX4_2(ptr, DStreamPtr) \
+    if (MEM_64bits()) \
+        ptr += HUF_decodeSymbolX4(ptr, DStreamPtr, dt, dtLog)
+
+static inline size_t HUF_decodeStreamX4(BYTE* p, BIT_DStream_t* bitDPtr, BYTE* const pEnd, const HUF_DEltX4* const dt, const U32 dtLog)
+{
+    BYTE* const pStart = p;
+
+    /* up to 8 symbols at a time */
+    while ((BIT_reloadDStream(bitDPtr) == BIT_DStream_unfinished) && (p < pEnd-7))
+    {
+        HUF_DECODE_SYMBOLX4_2(p, bitDPtr);
+        HUF_DECODE_SYMBOLX4_1(p, bitDPtr);
+        HUF_DECODE_SYMBOLX4_2(p, bitDPtr);
+        HUF_DECODE_SYMBOLX4_0(p, bitDPtr);
+    }
+
+    /* closer to the end */
+    while ((BIT_reloadDStream(bitDPtr) == BIT_DStream_unfinished) && (p <= pEnd-2))
+        HUF_DECODE_SYMBOLX4_0(p, bitDPtr);
+
+    while (p <= pEnd-2)
+        HUF_DECODE_SYMBOLX4_0(p, bitDPtr);   /* no need to reload : reached the end of DStream */
+
+    if (p < pEnd)
+        p += HUF_decodeLastSymbolX4(p, bitDPtr, dt, dtLog);
+
+    return p-pStart;
+}
+
+
+
+static size_t HUF_decompress4X4_usingDTable(
+          void* dst,  size_t dstSize,
+    const void* cSrc, size_t cSrcSize,
+    const U32* DTable)
+{
+    if (cSrcSize < 10) return ERROR(corruption_detected);   /* strict minimum : jump table + 1 byte per stream */
+
+    {
+        const BYTE* const istart = (const BYTE*) cSrc;
+        BYTE* const ostart = (BYTE*) dst;
+        BYTE* const oend = ostart + dstSize;
+
+        const HUF_DEltX4* const dt = ((const HUF_DEltX4*)DTable) +1;
+        const U32 dtLog = DTable[0];
+        size_t errorCode;
+
+        /* Init */
+        BIT_DStream_t bitD1;
+        BIT_DStream_t bitD2;
+        BIT_DStream_t bitD3;
+        BIT_DStream_t bitD4;
+        const size_t length1 = MEM_readLE16(istart);
+        const size_t length2 = MEM_readLE16(istart+2);
+        const size_t length3 = MEM_readLE16(istart+4);
+        size_t length4;
+        const BYTE* const istart1 = istart + 6;  /* jumpTable */
+        const BYTE* const istart2 = istart1 + length1;
+        const BYTE* const istart3 = istart2 + length2;
+        const BYTE* const istart4 = istart3 + length3;
+        const size_t segmentSize = (dstSize+3) / 4;
+        BYTE* const opStart2 = ostart + segmentSize;
+        BYTE* const opStart3 = opStart2 + segmentSize;
+        BYTE* const opStart4 = opStart3 + segmentSize;
+        BYTE* op1 = ostart;
+        BYTE* op2 = opStart2;
+        BYTE* op3 = opStart3;
+        BYTE* op4 = opStart4;
+        U32 endSignal;
+
+        length4 = cSrcSize - (length1 + length2 + length3 + 6);
+        if (length4 > cSrcSize) return ERROR(corruption_detected);   /* overflow */
+        errorCode = BIT_initDStream(&bitD1, istart1, length1);
+        if (HUF_isError(errorCode)) return errorCode;
+        errorCode = BIT_initDStream(&bitD2, istart2, length2);
+        if (HUF_isError(errorCode)) return errorCode;
+        errorCode = BIT_initDStream(&bitD3, istart3, length3);
+        if (HUF_isError(errorCode)) return errorCode;
+        errorCode = BIT_initDStream(&bitD4, istart4, length4);
+        if (HUF_isError(errorCode)) return errorCode;
+
+        /* 16-32 symbols per loop (4-8 symbols per stream) */
+        endSignal = BIT_reloadDStream(&bitD1) | BIT_reloadDStream(&bitD2) | BIT_reloadDStream(&bitD3) | BIT_reloadDStream(&bitD4);
+        for ( ; (endSignal==BIT_DStream_unfinished) && (op4<(oend-7)) ; )
+        {
+            HUF_DECODE_SYMBOLX4_2(op1, &bitD1);
+            HUF_DECODE_SYMBOLX4_2(op2, &bitD2);
+            HUF_DECODE_SYMBOLX4_2(op3, &bitD3);
+            HUF_DECODE_SYMBOLX4_2(op4, &bitD4);
+            HUF_DECODE_SYMBOLX4_1(op1, &bitD1);
+            HUF_DECODE_SYMBOLX4_1(op2, &bitD2);
+            HUF_DECODE_SYMBOLX4_1(op3, &bitD3);
+            HUF_DECODE_SYMBOLX4_1(op4, &bitD4);
+            HUF_DECODE_SYMBOLX4_2(op1, &bitD1);
+            HUF_DECODE_SYMBOLX4_2(op2, &bitD2);
+            HUF_DECODE_SYMBOLX4_2(op3, &bitD3);
+            HUF_DECODE_SYMBOLX4_2(op4, &bitD4);
+            HUF_DECODE_SYMBOLX4_0(op1, &bitD1);
+            HUF_DECODE_SYMBOLX4_0(op2, &bitD2);
+            HUF_DECODE_SYMBOLX4_0(op3, &bitD3);
+            HUF_DECODE_SYMBOLX4_0(op4, &bitD4);
+
+            endSignal = BIT_reloadDStream(&bitD1) | BIT_reloadDStream(&bitD2) | BIT_reloadDStream(&bitD3) | BIT_reloadDStream(&bitD4);
+        }
+
+        /* check corruption */
+        if (op1 > opStart2) return ERROR(corruption_detected);
+        if (op2 > opStart3) return ERROR(corruption_detected);
+        if (op3 > opStart4) return ERROR(corruption_detected);
+        /* note : op4 supposed already verified within main loop */
+
+        /* finish bitStreams one by one */
+        HUF_decodeStreamX4(op1, &bitD1, opStart2, dt, dtLog);
+        HUF_decodeStreamX4(op2, &bitD2, opStart3, dt, dtLog);
+        HUF_decodeStreamX4(op3, &bitD3, opStart4, dt, dtLog);
+        HUF_decodeStreamX4(op4, &bitD4, oend,     dt, dtLog);
+
+        /* check */
+        endSignal = BIT_endOfDStream(&bitD1) & BIT_endOfDStream(&bitD2) & BIT_endOfDStream(&bitD3) & BIT_endOfDStream(&bitD4);
+        if (!endSignal) return ERROR(corruption_detected);
+
+        /* decoded size */
+        return dstSize;
+    }
+}
+
+
+static size_t HUF_decompress4X4 (void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize)
+{
+    HUF_CREATE_STATIC_DTABLEX4(DTable, HUF_MAX_TABLELOG);
+    const BYTE* ip = (const BYTE*) cSrc;
+
+    size_t hSize = HUF_readDTableX4 (DTable, cSrc, cSrcSize);
+    if (HUF_isError(hSize)) return hSize;
+    if (hSize >= cSrcSize) return ERROR(srcSize_wrong);
+    ip += hSize;
+    cSrcSize -= hSize;
+
+    return HUF_decompress4X4_usingDTable (dst, dstSize, ip, cSrcSize, DTable);
+}
+
+
+/**********************************/
+/* quad-symbol decoding           */
+/**********************************/
+typedef struct { BYTE nbBits; BYTE nbBytes; } HUF_DDescX6;
+typedef union { BYTE byte[4]; U32 sequence; } HUF_DSeqX6;
+
+/* recursive, up to level 3; may benefit from <template>-like strategy to nest each level inline */
+static void HUF_fillDTableX6LevelN(HUF_DDescX6* DDescription, HUF_DSeqX6* DSequence, int sizeLog,
+                           const rankVal_t rankValOrigin, const U32 consumed, const int minWeight, const U32 maxWeight,
+                           const sortedSymbol_t* sortedSymbols, const U32 sortedListSize, const U32* rankStart,
+                           const U32 nbBitsBaseline, HUF_DSeqX6 baseSeq, HUF_DDescX6 DDesc)
+{
+    const int scaleLog = nbBitsBaseline - sizeLog;   /* note : targetLog >= (nbBitsBaseline-1), hence scaleLog <= 1 */
+    const int minBits  = nbBitsBaseline - maxWeight;
+    const U32 level = DDesc.nbBytes;
+    U32 rankVal[HUF_ABSOLUTEMAX_TABLELOG + 1];
+    U32 symbolStartPos, s;
+
+    /* local rankVal, will be modified */
+    memcpy(rankVal, rankValOrigin[consumed], sizeof(rankVal));
+
+    /* fill skipped values */
+    if (minWeight>1)
+    {
+        U32 i;
+        const U32 skipSize = rankVal[minWeight];
+        for (i = 0; i < skipSize; i++)
+        {
+            DSequence[i] = baseSeq;
+            DDescription[i] = DDesc;
+        }
+    }
+
+    /* fill DTable */
+    DDesc.nbBytes++;
+    symbolStartPos = rankStart[minWeight];
+    for (s=symbolStartPos; s<sortedListSize; s++)
+    {
+        const BYTE symbol = sortedSymbols[s].symbol;
+        const U32  weight = sortedSymbols[s].weight;   /* >= 1 (sorted) */
+        const int  nbBits = nbBitsBaseline - weight;   /* >= 1 (by construction) */
+        const int  totalBits = consumed+nbBits;
+        const U32  start  = rankVal[weight];
+        const U32  length = 1 << (sizeLog-nbBits);
+        baseSeq.byte[level] = symbol;
+        DDesc.nbBits = (BYTE)totalBits;
+
+        if ((level<3) && (sizeLog-totalBits >= minBits))   /* enough room for another symbol */
+        {
+            int nextMinWeight = totalBits + scaleLog;
+            if (nextMinWeight < 1) nextMinWeight = 1;
+            HUF_fillDTableX6LevelN(DDescription+start, DSequence+start, sizeLog-nbBits,
+                           rankValOrigin, totalBits, nextMinWeight, maxWeight,
+                           sortedSymbols, sortedListSize, rankStart,
+                           nbBitsBaseline, baseSeq, DDesc);   /* recursive (max : level 3) */
+        }
+        else
+        {
+            U32 i;
+            const U32 end = start + length;
+            for (i = start; i < end; i++)
+            {
+                DDescription[i] = DDesc;
+                DSequence[i] = baseSeq;
+            }
+        }
+        rankVal[weight] += length;
+    }
+}
+
+
+/* note : same preparation as X4 */
+static size_t HUF_readDTableX6 (U32* DTable, const void* src, size_t srcSize)
+{
+    BYTE weightList[HUF_MAX_SYMBOL_VALUE + 1];
+    sortedSymbol_t sortedSymbol[HUF_MAX_SYMBOL_VALUE + 1];
+    U32 rankStats[HUF_ABSOLUTEMAX_TABLELOG + 1] = { 0 };
+    U32 rankStart0[HUF_ABSOLUTEMAX_TABLELOG + 2] = { 0 };
+    U32* const rankStart = rankStart0+1;
+    U32 tableLog, maxW, sizeOfSort, nbSymbols;
+    rankVal_t rankVal;
+    const U32 memLog = DTable[0];
+    const BYTE* ip = (const BYTE*) src;
+    size_t iSize = ip[0];
+
+    if (memLog > HUF_ABSOLUTEMAX_TABLELOG) return ERROR(tableLog_tooLarge);
+    //memset(weightList, 0, sizeof(weightList));   /* is not necessary, even though some analyzer complain ... */
+
+    iSize = HUF_readStats(weightList, HUF_MAX_SYMBOL_VALUE + 1, rankStats, &nbSymbols, &tableLog, src, srcSize);
+    if (HUF_isError(iSize)) return iSize;
+
+    /* check result */
+    if (tableLog > memLog) return ERROR(tableLog_tooLarge);   /* DTable is too small */
+
+    /* find maxWeight */
+    for (maxW = tableLog; rankStats[maxW]==0; maxW--) {}  /* necessarily finds a solution before 0 */
+
+    /* Get start index of each weight */
+    {
+        U32 w, nextRankStart = 0;
+        for (w=1; w<=maxW; w++)
+        {
+            U32 current = nextRankStart;
+            nextRankStart += rankStats[w];
+            rankStart[w] = current;
+        }
+        rankStart[0] = nextRankStart;   /* put all 0w symbols at the end of sorted list*/
+        sizeOfSort = nextRankStart;
+    }
+
+    /* sort symbols by weight */
+    {
+        U32 s;
+        for (s=0; s<nbSymbols; s++)
+        {
+            U32 w = weightList[s];
+            U32 r = rankStart[w]++;
+            sortedSymbol[r].symbol = (BYTE)s;
+            sortedSymbol[r].weight = (BYTE)w;
+        }
+        rankStart[0] = 0;   /* forget 0w symbols; this is beginning of weight(1) */
+    }
+
+	/* Build rankVal */
+    {
+        const U32 minBits = tableLog+1 - maxW;
+        U32 nextRankVal = 0;
+        U32 w, consumed;
+        const int rescale = (memLog-tableLog) - 1;   /* tableLog <= memLog */
+        U32* rankVal0 = rankVal[0];
+        for (w=1; w<=maxW; w++)
+        {
+            U32 current = nextRankVal;
+            nextRankVal += rankStats[w] << (w+rescale);
+            rankVal0[w] = current;
+        }
+        for (consumed = minBits; consumed <= memLog - minBits; consumed++)
+        {
+            U32* rankValPtr = rankVal[consumed];
+            for (w = 1; w <= maxW; w++)
+            {
+                rankValPtr[w] = rankVal0[w] >> consumed;
+            }
+        }
+    }
+
+
+    /* fill tables */
+    {
+        HUF_DDescX6* DDescription = (HUF_DDescX6*)(DTable+1);
+        HUF_DSeqX6* DSequence = (HUF_DSeqX6*)(DTable + 1 + ((size_t)1<<(memLog-1)));
+        HUF_DSeqX6 DSeq;
+        HUF_DDescX6 DDesc;
+        DSeq.sequence = 0;
+        DDesc.nbBits = 0;
+        DDesc.nbBytes = 0;
+        HUF_fillDTableX6LevelN(DDescription, DSequence, memLog,
+                       (const U32 (*)[HUF_ABSOLUTEMAX_TABLELOG + 1])rankVal, 0, 1, maxW,
+                       sortedSymbol, sizeOfSort, rankStart0,
+                       tableLog+1, DSeq, DDesc);
+    }
+
+    return iSize;
+}
+
+
+static U32 HUF_decodeSymbolX6(void* op, BIT_DStream_t* DStream, const HUF_DDescX6* dd, const HUF_DSeqX6* ds, const U32 dtLog)
+{
+    const size_t val = BIT_lookBitsFast(DStream, dtLog);   /* note : dtLog >= 1 */
+    memcpy(op, ds+val, sizeof(HUF_DSeqX6));
+    BIT_skipBits(DStream, dd[val].nbBits);
+    return dd[val].nbBytes;
+}
+
+static U32 HUF_decodeLastSymbolsX6(void* op, const U32 maxL, BIT_DStream_t* DStream,
+                                  const HUF_DDescX6* dd, const HUF_DSeqX6* ds, const U32 dtLog)
+{
+    const size_t val = BIT_lookBitsFast(DStream, dtLog);   /* note : dtLog >= 1 */
+    U32 length = dd[val].nbBytes;
+    if (length <= maxL)
+    {
+        memcpy(op, ds+val, length);
+        BIT_skipBits(DStream, dd[val].nbBits);
+        return length;
+    }
+    memcpy(op, ds+val, maxL);
+    if (DStream->bitsConsumed < (sizeof(DStream->bitContainer)*8))
+    {
+        BIT_skipBits(DStream, dd[val].nbBits);
+        if (DStream->bitsConsumed > (sizeof(DStream->bitContainer)*8))
+            DStream->bitsConsumed = (sizeof(DStream->bitContainer)*8);   /* ugly hack; works only because it's the last symbol. Note : can't easily extract nbBits from just this symbol */
+    }
+    return maxL;
+}
+
+
+#define HUF_DECODE_SYMBOLX6_0(ptr, DStreamPtr) \
+    ptr += HUF_decodeSymbolX6(ptr, DStreamPtr, dd, ds, dtLog)
+
+#define HUF_DECODE_SYMBOLX6_1(ptr, DStreamPtr) \
+    if (MEM_64bits() || (HUF_MAX_TABLELOG<=12)) \
+        HUF_DECODE_SYMBOLX6_0(ptr, DStreamPtr)
+
+#define HUF_DECODE_SYMBOLX6_2(ptr, DStreamPtr) \
+    if (MEM_64bits()) \
+        HUF_DECODE_SYMBOLX6_0(ptr, DStreamPtr)
+
+static inline size_t HUF_decodeStreamX6(BYTE* p, BIT_DStream_t* bitDPtr, BYTE* const pEnd, const U32* DTable, const U32 dtLog)
+{
+    const HUF_DDescX6* dd = (const HUF_DDescX6*)(DTable+1);
+    const HUF_DSeqX6* ds = (const HUF_DSeqX6*)(DTable + 1 + ((size_t)1<<(dtLog-1)));
+    BYTE* const pStart = p;
+
+    /* up to 16 symbols at a time */
+    while ((BIT_reloadDStream(bitDPtr) == BIT_DStream_unfinished) && (p <= pEnd-16))
+    {
+        HUF_DECODE_SYMBOLX6_2(p, bitDPtr);
+        HUF_DECODE_SYMBOLX6_1(p, bitDPtr);
+        HUF_DECODE_SYMBOLX6_2(p, bitDPtr);
+        HUF_DECODE_SYMBOLX6_0(p, bitDPtr);
+    }
+
+    /* closer to the end, up to 4 symbols at a time */
+    while ((BIT_reloadDStream(bitDPtr) == BIT_DStream_unfinished) && (p <= pEnd-4))
+        HUF_DECODE_SYMBOLX6_0(p, bitDPtr);
+
+    while (p <= pEnd-4)
+        HUF_DECODE_SYMBOLX6_0(p, bitDPtr);   /* no need to reload : reached the end of DStream */
+
+    while (p < pEnd)
+        p += HUF_decodeLastSymbolsX6(p, (U32)(pEnd-p), bitDPtr, dd, ds, dtLog);
+
+    return p-pStart;
+}
+
+
+
+static size_t HUF_decompress4X6_usingDTable(
+          void* dst,  size_t dstSize,
+    const void* cSrc, size_t cSrcSize,
+    const U32* DTable)
+{
+    if (cSrcSize < 10) return ERROR(corruption_detected);   /* strict minimum : jump table + 1 byte per stream */
+
+    {
+        const BYTE* const istart = (const BYTE*) cSrc;
+        BYTE* const ostart = (BYTE*) dst;
+        BYTE* const oend = ostart + dstSize;
+
+        const U32 dtLog = DTable[0];
+        const HUF_DDescX6* dd = (const HUF_DDescX6*)(DTable+1);
+        const HUF_DSeqX6* ds = (const HUF_DSeqX6*)(DTable + 1 + ((size_t)1<<(dtLog-1)));
+        size_t errorCode;
+
+        /* Init */
+        BIT_DStream_t bitD1;
+        BIT_DStream_t bitD2;
+        BIT_DStream_t bitD3;
+        BIT_DStream_t bitD4;
+        const size_t length1 = MEM_readLE16(istart);
+        const size_t length2 = MEM_readLE16(istart+2);
+        const size_t length3 = MEM_readLE16(istart+4);
+        size_t length4;
+        const BYTE* const istart1 = istart + 6;  /* jumpTable */
+        const BYTE* const istart2 = istart1 + length1;
+        const BYTE* const istart3 = istart2 + length2;
+        const BYTE* const istart4 = istart3 + length3;
+        const size_t segmentSize = (dstSize+3) / 4;
+        BYTE* const opStart2 = ostart + segmentSize;
+        BYTE* const opStart3 = opStart2 + segmentSize;
+        BYTE* const opStart4 = opStart3 + segmentSize;
+        BYTE* op1 = ostart;
+        BYTE* op2 = opStart2;
+        BYTE* op3 = opStart3;
+        BYTE* op4 = opStart4;
+        U32 endSignal;
+
+        length4 = cSrcSize - (length1 + length2 + length3 + 6);
+        if (length4 > cSrcSize) return ERROR(corruption_detected);   /* overflow */
+        errorCode = BIT_initDStream(&bitD1, istart1, length1);
+        if (HUF_isError(errorCode)) return errorCode;
+        errorCode = BIT_initDStream(&bitD2, istart2, length2);
+        if (HUF_isError(errorCode)) return errorCode;
+        errorCode = BIT_initDStream(&bitD3, istart3, length3);
+        if (HUF_isError(errorCode)) return errorCode;
+        errorCode = BIT_initDStream(&bitD4, istart4, length4);
+        if (HUF_isError(errorCode)) return errorCode;
+
+        /* 16-64 symbols per loop (4-16 symbols per stream) */
+        endSignal = BIT_reloadDStream(&bitD1) | BIT_reloadDStream(&bitD2) | BIT_reloadDStream(&bitD3) | BIT_reloadDStream(&bitD4);
+        for ( ; (op3 <= opStart4) && (endSignal==BIT_DStream_unfinished) && (op4<=(oend-16)) ; )
+        {
+            HUF_DECODE_SYMBOLX6_2(op1, &bitD1);
+            HUF_DECODE_SYMBOLX6_2(op2, &bitD2);
+            HUF_DECODE_SYMBOLX6_2(op3, &bitD3);
+            HUF_DECODE_SYMBOLX6_2(op4, &bitD4);
+            HUF_DECODE_SYMBOLX6_1(op1, &bitD1);
+            HUF_DECODE_SYMBOLX6_1(op2, &bitD2);
+            HUF_DECODE_SYMBOLX6_1(op3, &bitD3);
+            HUF_DECODE_SYMBOLX6_1(op4, &bitD4);
+            HUF_DECODE_SYMBOLX6_2(op1, &bitD1);
+            HUF_DECODE_SYMBOLX6_2(op2, &bitD2);
+            HUF_DECODE_SYMBOLX6_2(op3, &bitD3);
+            HUF_DECODE_SYMBOLX6_2(op4, &bitD4);
+            HUF_DECODE_SYMBOLX6_0(op1, &bitD1);
+            HUF_DECODE_SYMBOLX6_0(op2, &bitD2);
+            HUF_DECODE_SYMBOLX6_0(op3, &bitD3);
+            HUF_DECODE_SYMBOLX6_0(op4, &bitD4);
+
+            endSignal = BIT_reloadDStream(&bitD1) | BIT_reloadDStream(&bitD2) | BIT_reloadDStream(&bitD3) | BIT_reloadDStream(&bitD4);
+        }
+
+        /* check corruption */
+        if (op1 > opStart2) return ERROR(corruption_detected);
+        if (op2 > opStart3) return ERROR(corruption_detected);
+        if (op3 > opStart4) return ERROR(corruption_detected);
+        /* note : op4 supposed already verified within main loop */
+
+        /* finish bitStreams one by one */
+        HUF_decodeStreamX6(op1, &bitD1, opStart2, DTable, dtLog);
+        HUF_decodeStreamX6(op2, &bitD2, opStart3, DTable, dtLog);
+        HUF_decodeStreamX6(op3, &bitD3, opStart4, DTable, dtLog);
+        HUF_decodeStreamX6(op4, &bitD4, oend,     DTable, dtLog);
+
+        /* check */
+        endSignal = BIT_endOfDStream(&bitD1) & BIT_endOfDStream(&bitD2) & BIT_endOfDStream(&bitD3) & BIT_endOfDStream(&bitD4);
+        if (!endSignal) return ERROR(corruption_detected);
+
+        /* decoded size */
+        return dstSize;
+    }
+}
+
+
+static size_t HUF_decompress4X6 (void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize)
+{
+    HUF_CREATE_STATIC_DTABLEX6(DTable, HUF_MAX_TABLELOG);
+    const BYTE* ip = (const BYTE*) cSrc;
+
+    size_t hSize = HUF_readDTableX6 (DTable, cSrc, cSrcSize);
+    if (HUF_isError(hSize)) return hSize;
+    if (hSize >= cSrcSize) return ERROR(srcSize_wrong);
+    ip += hSize;
+    cSrcSize -= hSize;
+
+    return HUF_decompress4X6_usingDTable (dst, dstSize, ip, cSrcSize, DTable);
+}
+
+
+/**********************************/
+/* Generic decompression selector */
+/**********************************/
+
+typedef struct { U32 tableTime; U32 decode256Time; } algo_time_t;
+static const algo_time_t algoTime[16 /* Quantization */][3 /* single, double, quad */] =
+{
+    /* single, double, quad */
+    {{0,0}, {1,1}, {2,2}},  /* Q==0 : impossible */
+    {{0,0}, {1,1}, {2,2}},  /* Q==1 : impossible */
+    {{  38,130}, {1313, 74}, {2151, 38}},   /* Q == 2 : 12-18% */
+    {{ 448,128}, {1353, 74}, {2238, 41}},   /* Q == 3 : 18-25% */
+    {{ 556,128}, {1353, 74}, {2238, 47}},   /* Q == 4 : 25-32% */
+    {{ 714,128}, {1418, 74}, {2436, 53}},   /* Q == 5 : 32-38% */
+    {{ 883,128}, {1437, 74}, {2464, 61}},   /* Q == 6 : 38-44% */
+    {{ 897,128}, {1515, 75}, {2622, 68}},   /* Q == 7 : 44-50% */
+    {{ 926,128}, {1613, 75}, {2730, 75}},   /* Q == 8 : 50-56% */
+    {{ 947,128}, {1729, 77}, {3359, 77}},   /* Q == 9 : 56-62% */
+    {{1107,128}, {2083, 81}, {4006, 84}},   /* Q ==10 : 62-69% */
+    {{1177,128}, {2379, 87}, {4785, 88}},   /* Q ==11 : 69-75% */
+    {{1242,128}, {2415, 93}, {5155, 84}},   /* Q ==12 : 75-81% */
+    {{1349,128}, {2644,106}, {5260,106}},   /* Q ==13 : 81-87% */
+    {{1455,128}, {2422,124}, {4174,124}},   /* Q ==14 : 87-93% */
+    {{ 722,128}, {1891,145}, {1936,146}},   /* Q ==15 : 93-99% */
+};
+
+typedef size_t (*decompressionAlgo)(void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize);
+
+static size_t HUF_decompress (void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize)
+{
+    static const decompressionAlgo decompress[3] = { HUF_decompress4X2, HUF_decompress4X4, HUF_decompress4X6 };
+    /* estimate decompression time */
+    U32 Q;
+    const U32 D256 = (U32)(dstSize >> 8);
+    U32 Dtime[3];
+    U32 algoNb = 0;
+    int n;
+
+    /* validation checks */
+    if (dstSize == 0) return ERROR(dstSize_tooSmall);
+    if (cSrcSize > dstSize) return ERROR(corruption_detected);   /* invalid */
+    if (cSrcSize == dstSize) { memcpy(dst, cSrc, dstSize); return dstSize; }   /* not compressed */
+    if (cSrcSize == 1) { memset(dst, *(const BYTE*)cSrc, dstSize); return dstSize; }   /* RLE */
+
+    /* decoder timing evaluation */
+    Q = (U32)(cSrcSize * 16 / dstSize);   /* Q < 16 since dstSize > cSrcSize */
+    for (n=0; n<3; n++)
+        Dtime[n] = algoTime[Q][n].tableTime + (algoTime[Q][n].decode256Time * D256);
+
+    Dtime[1] += Dtime[1] >> 4; Dtime[2] += Dtime[2] >> 3; /* advantage to algorithms using less memory, for cache eviction */
+
+    if (Dtime[1] < Dtime[0]) algoNb = 1;
+    if (Dtime[2] < Dtime[algoNb]) algoNb = 2;
+
+    return decompress[algoNb](dst, dstSize, cSrc, cSrcSize);
+
+    //return HUF_decompress4X2(dst, dstSize, cSrc, cSrcSize);   /* multi-streams single-symbol decoding */
+    //return HUF_decompress4X4(dst, dstSize, cSrc, cSrcSize);   /* multi-streams double-symbols decoding */
+    //return HUF_decompress4X6(dst, dstSize, cSrc, cSrcSize);   /* multi-streams quad-symbols decoding */
+}
+/*
+    zstd - standard compression library
+    Copyright (C) 2014-2015, Yann Collet.
+
+    BSD 2-Clause License (http://www.opensource.org/licenses/bsd-license.php)
+
+    Redistribution and use in source and binary forms, with or without
+    modification, are permitted provided that the following conditions are
+    met:
+    * Redistributions of source code must retain the above copyright
+    notice, this list of conditions and the following disclaimer.
+    * Redistributions in binary form must reproduce the above
+    copyright notice, this list of conditions and the following disclaimer
+    in the documentation and/or other materials provided with the
+    distribution.
+    THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+    "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+    LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+    A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+    OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+    SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+    LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+    DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+    THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+    (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+    OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+    You can contact the author at :
+    - zstd source repository : https://github.com/Cyan4973/zstd
+    - ztsd public forum : https://groups.google.com/forum/#!forum/lz4c
+*/
+
+/* ***************************************************************
+*  Tuning parameters
+*****************************************************************/
+/*!
+*  MEMORY_USAGE :
+*  Memory usage formula : N->2^N Bytes (examples : 10 -> 1KB; 12 -> 4KB ; 16 -> 64KB; 20 -> 1MB; etc.)
+*  Increasing memory usage improves compression ratio
+*  Reduced memory usage can improve speed, due to cache effect
+*/
+#define ZSTD_MEMORY_USAGE 17
+
+/*!
+ * HEAPMODE :
+ * Select how default compression functions will allocate memory for their hash table,
+ * in memory stack (0, fastest), or in memory heap (1, requires malloc())
+ * Note that compression context is fairly large, as a consequence heap memory is recommended.
+ */
+#ifndef ZSTD_HEAPMODE
+#  define ZSTD_HEAPMODE 1
+#endif /* ZSTD_HEAPMODE */
+
+/*!
+*  LEGACY_SUPPORT :
+*  decompressor can decode older formats (starting from Zstd 0.1+)
+*/
+#ifndef ZSTD_LEGACY_SUPPORT
+#  define ZSTD_LEGACY_SUPPORT 1
+#endif
+
+
+/* *******************************************************
+*  Includes
+*********************************************************/
+#include <stdlib.h>      /* calloc */
+#include <string.h>      /* memcpy, memmove */
+#include <stdio.h>       /* debug : printf */
+
+
+/* *******************************************************
+*  Compiler specifics
+*********************************************************/
+#ifdef __AVX2__
+#  include <immintrin.h>   /* AVX2 intrinsics */
+#endif
+
+#ifdef _MSC_VER    /* Visual Studio */
+#  define FORCE_INLINE static __forceinline
+#  include <intrin.h>                    /* For Visual 2005 */
+#  pragma warning(disable : 4127)        /* disable: C4127: conditional expression is constant */
+#  pragma warning(disable : 4324)        /* disable: C4324: padded structure */
+#else
+#  define GCC_VERSION (__GNUC__ * 100 + __GNUC_MINOR__)
+#  ifdef __GNUC__
+#    define FORCE_INLINE static inline __attribute__((always_inline))
+#  else
+#    define FORCE_INLINE static inline
+#  endif
+#endif
+
+
+/* *******************************************************
+*  Constants
+*********************************************************/
+#define HASH_LOG (ZSTD_MEMORY_USAGE - 2)
+#define HASH_TABLESIZE (1 << HASH_LOG)
+#define HASH_MASK (HASH_TABLESIZE - 1)
+
+#define KNUTH 2654435761
+
+#define BIT7 128
+#define BIT6  64
+#define BIT5  32
+#define BIT4  16
+#define BIT1   2
+#define BIT0   1
+
+#define KB *(1 <<10)
+#define MB *(1 <<20)
+#define GB *(1U<<30)
+
+#define BLOCKSIZE (128 KB)                 /* define, for static allocation */
+#define MIN_SEQUENCES_SIZE (2 /*seqNb*/ + 2 /*dumps*/ + 3 /*seqTables*/ + 1 /*bitStream*/)
+#define MIN_CBLOCK_SIZE (3 /*litCSize*/ + MIN_SEQUENCES_SIZE)
+#define IS_RAW BIT0
+#define IS_RLE BIT1
+
+#define WORKPLACESIZE (BLOCKSIZE*3)
+#define MINMATCH 4
+#define MLbits   7
+#define LLbits   6
+#define Offbits  5
+#define MaxML  ((1<<MLbits )-1)
+#define MaxLL  ((1<<LLbits )-1)
+#define MaxOff   31
+#define LitFSELog  11
+#define MLFSELog   10
+#define LLFSELog   10
+#define OffFSELog   9
+#define MAX(a,b) ((a)<(b)?(b):(a))
+#define MaxSeq MAX(MaxLL, MaxML)
+
+#define LITERAL_NOENTROPY 63
+#define COMMAND_NOENTROPY 7   /* to remove */
+
+static const size_t ZSTD_blockHeaderSize = 3;
+static const size_t ZSTD_frameHeaderSize = 4;
+
+
+/* *******************************************************
+*  Memory operations
+**********************************************************/
+static void   ZSTD_copy4(void* dst, const void* src) { memcpy(dst, src, 4); }
+
+static void   ZSTD_copy8(void* dst, const void* src) { memcpy(dst, src, 8); }
+
+#define COPY8(d,s) { ZSTD_copy8(d,s); d+=8; s+=8; }
+
+/*! ZSTD_wildcopy : custom version of memcpy(), can copy up to 7-8 bytes too many */
+static void ZSTD_wildcopy(void* dst, const void* src, size_t length)
+{
+    const BYTE* ip = (const BYTE*)src;
+    BYTE* op = (BYTE*)dst;
+    BYTE* const oend = op + length;
+    do COPY8(op, ip) while (op < oend);
+}
+
+
+/* **************************************
+*  Local structures
+****************************************/
+typedef enum { bt_compressed, bt_raw, bt_rle, bt_end } blockType_t;
+
+typedef struct
+{
+    blockType_t blockType;
+    U32 origSize;
+} blockProperties_t;
+
+typedef struct {
+    void* buffer;
+    U32*  offsetStart;
+    U32*  offset;
+    BYTE* offCodeStart;
+    BYTE* offCode;
+    BYTE* litStart;
+    BYTE* lit;
+    BYTE* litLengthStart;
+    BYTE* litLength;
+    BYTE* matchLengthStart;
+    BYTE* matchLength;
+    BYTE* dumpsStart;
+    BYTE* dumps;
+} seqStore_t;
+
+
+/* *************************************
+*  Error Management
+***************************************/
+/*! ZSTD_isError
+*   tells if a return value is an error code */
+static unsigned ZSTD_isError(size_t code) { return ERR_isError(code); }
+
+
+/* *************************************
+*  Function body to include
+***************************************/
+static size_t ZSTD_read_ARCH(const void* p) { size_t r; memcpy(&r, p, sizeof(r)); return r; }
+
+MEM_STATIC unsigned ZSTD_NbCommonBytes (register size_t val)
+{
+    if (MEM_isLittleEndian())
+    {
+        if (MEM_64bits())
+        {
+#       if defined(_MSC_VER) && defined(_WIN64) && !defined(LZ4_FORCE_SW_BITCOUNT)
+            unsigned long r = 0;
+            _BitScanForward64( &r, (U64)val );
+            return (int)(r>>3);
+#       elif defined(__GNUC__) && (__GNUC__ >= 3) && !defined(LZ4_FORCE_SW_BITCOUNT)
+            return (__builtin_ctzll((U64)val) >> 3);
+#       else
+            static const int DeBruijnBytePos[64] = { 0, 0, 0, 0, 0, 1, 1, 2, 0, 3, 1, 3, 1, 4, 2, 7, 0, 2, 3, 6, 1, 5, 3, 5, 1, 3, 4, 4, 2, 5, 6, 7, 7, 0, 1, 2, 3, 3, 4, 6, 2, 6, 5, 5, 3, 4, 5, 6, 7, 1, 2, 4, 6, 4, 4, 5, 7, 2, 6, 5, 7, 6, 7, 7 };
+            return DeBruijnBytePos[((U64)((val & -(long long)val) * 0x0218A392CDABBD3FULL)) >> 58];
+#       endif
+        }
+        else /* 32 bits */
+        {
+#       if defined(_MSC_VER) && !defined(LZ4_FORCE_SW_BITCOUNT)
+            unsigned long r;
+            _BitScanForward( &r, (U32)val );
+            return (int)(r>>3);
+#       elif defined(__GNUC__) && (__GNUC__ >= 3) && !defined(LZ4_FORCE_SW_BITCOUNT)
+            return (__builtin_ctz((U32)val) >> 3);
+#       else
+            static const int DeBruijnBytePos[32] = { 0, 0, 3, 0, 3, 1, 3, 0, 3, 2, 2, 1, 3, 2, 0, 1, 3, 3, 1, 2, 2, 2, 2, 0, 3, 1, 2, 0, 1, 0, 1, 1 };
+            return DeBruijnBytePos[((U32)((val & -(S32)val) * 0x077CB531U)) >> 27];
+#       endif
+        }
+    }
+    else   /* Big Endian CPU */
+    {
+        if (MEM_32bits())
+        {
+#       if defined(_MSC_VER) && defined(_WIN64) && !defined(LZ4_FORCE_SW_BITCOUNT)
+            unsigned long r = 0;
+            _BitScanReverse64( &r, val );
+            return (unsigned)(r>>3);
+#       elif defined(__GNUC__) && (__GNUC__ >= 3) && !defined(LZ4_FORCE_SW_BITCOUNT)
+            return (__builtin_clzll(val) >> 3);
+#       else
+            unsigned r;
+            const unsigned n32 = sizeof(size_t)*4;   /* calculate this way due to compiler complaining in 32-bits mode */
+            if (!(val>>n32)) { r=4; } else { r=0; val>>=n32; }
+            if (!(val>>16)) { r+=2; val>>=8; } else { val>>=24; }
+            r += (!val);
+            return r;
+#       endif
+        }
+        else /* 32 bits */
+        {
+#       if defined(_MSC_VER) && !defined(LZ4_FORCE_SW_BITCOUNT)
+            unsigned long r = 0;
+            _BitScanReverse( &r, (unsigned long)val );
+            return (unsigned)(r>>3);
+#       elif defined(__GNUC__) && (__GNUC__ >= 3) && !defined(LZ4_FORCE_SW_BITCOUNT)
+            return (__builtin_clz((U32)val) >> 3);
+#       else
+            unsigned r;
+            if (!(val>>16)) { r=2; val>>=8; } else { r=0; val>>=24; }
+            r += (!val);
+            return r;
+#       endif
+        }
+    }
+}
+
+
+MEM_STATIC size_t ZSTD_count(const BYTE* pIn, const BYTE* pMatch, const BYTE* pInLimit)
+{
+    const BYTE* const pStart = pIn;
+
+    while ((pIn<pInLimit-(sizeof(size_t)-1)))
+    {
+        size_t diff = ZSTD_read_ARCH(pMatch) ^ ZSTD_read_ARCH(pIn);
+        if (!diff) { pIn+=sizeof(size_t); pMatch+=sizeof(size_t); continue; }
+        pIn += ZSTD_NbCommonBytes(diff);
+        return (size_t)(pIn - pStart);
+    }
+
+    if (MEM_32bits()) if ((pIn<(pInLimit-3)) && (MEM_read32(pMatch) == MEM_read32(pIn))) { pIn+=4; pMatch+=4; }
+    if ((pIn<(pInLimit-1)) && (MEM_read16(pMatch) == MEM_read16(pIn))) { pIn+=2; pMatch+=2; }
+    if ((pIn<pInLimit) && (*pMatch == *pIn)) pIn++;
+    return (size_t)(pIn - pStart);
+}
+
+
+/* *************************************************************
+*   Decompression section
+***************************************************************/
+struct ZSTD_DCtx_s
+{
+    U32 LLTable[FSE_DTABLE_SIZE_U32(LLFSELog)];
+    U32 OffTable[FSE_DTABLE_SIZE_U32(OffFSELog)];
+    U32 MLTable[FSE_DTABLE_SIZE_U32(MLFSELog)];
+    void* previousDstEnd;
+    void* base;
+    size_t expected;
+    blockType_t bType;
+    U32 phase;
+    const BYTE* litPtr;
+    size_t litBufSize;
+    size_t litSize;
+    BYTE litBuffer[BLOCKSIZE + 8 /* margin for wildcopy */];
+};   /* typedef'd to ZSTD_Dctx within "zstd_static.h" */
+
+
+static size_t ZSTD_getcBlockSize(const void* src, size_t srcSize, blockProperties_t* bpPtr)
+{
+    const BYTE* const in = (const BYTE* const)src;
+    BYTE headerFlags;
+    U32 cSize;
+
+    if (srcSize < 3) return ERROR(srcSize_wrong);
+
+    headerFlags = *in;
+    cSize = in[2] + (in[1]<<8) + ((in[0] & 7)<<16);
+
+    bpPtr->blockType = (blockType_t)(headerFlags >> 6);
+    bpPtr->origSize = (bpPtr->blockType == bt_rle) ? cSize : 0;
+
+    if (bpPtr->blockType == bt_end) return 0;
+    if (bpPtr->blockType == bt_rle) return 1;
+    return cSize;
+}
+
+static size_t ZSTD_copyUncompressedBlock(void* dst, size_t maxDstSize, const void* src, size_t srcSize)
+{
+    if (srcSize > maxDstSize) return ERROR(dstSize_tooSmall);
+    memcpy(dst, src, srcSize);
+    return srcSize;
+}
+
+
+/** ZSTD_decompressLiterals
+    @return : nb of bytes read from src, or an error code*/
+static size_t ZSTD_decompressLiterals(void* dst, size_t* maxDstSizePtr,
+                                const void* src, size_t srcSize)
+{
+    const BYTE* ip = (const BYTE*)src;
+
+    const size_t litSize = (MEM_readLE32(src) & 0x1FFFFF) >> 2;   /* no buffer issue : srcSize >= MIN_CBLOCK_SIZE */
+    const size_t litCSize = (MEM_readLE32(ip+2) & 0xFFFFFF) >> 5;   /* no buffer issue : srcSize >= MIN_CBLOCK_SIZE */
+
+    if (litSize > *maxDstSizePtr) return ERROR(corruption_detected);
+    if (litCSize + 5 > srcSize) return ERROR(corruption_detected);
+
+    if (HUF_isError(HUF_decompress(dst, litSize, ip+5, litCSize))) return ERROR(corruption_detected);
+
+    *maxDstSizePtr = litSize;
+    return litCSize + 5;
+}
+
+
+/** ZSTD_decodeLiteralsBlock
+    @return : nb of bytes read from src (< srcSize )*/
+static size_t ZSTD_decodeLiteralsBlock(void* ctx,
+                          const void* src, size_t srcSize)
+{
+    ZSTD_DCtx* dctx = (ZSTD_DCtx*)ctx;
+    const BYTE* const istart = (const BYTE* const)src;
+
+    /* any compressed block with literals segment must be at least this size */
+    if (srcSize < MIN_CBLOCK_SIZE) return ERROR(corruption_detected);
+
+    switch(*istart & 3)
+    {
+    default:
+    case 0:
+        {
+            size_t litSize = BLOCKSIZE;
+            const size_t readSize = ZSTD_decompressLiterals(dctx->litBuffer, &litSize, src, srcSize);
+            dctx->litPtr = dctx->litBuffer;
+            dctx->litBufSize = BLOCKSIZE;
+            dctx->litSize = litSize;
+            return readSize;   /* works if it's an error too */
+        }
+    case IS_RAW:
+        {
+            const size_t litSize = (MEM_readLE32(istart) & 0xFFFFFF) >> 2;   /* no buffer issue : srcSize >= MIN_CBLOCK_SIZE */
+            if (litSize > srcSize-11)   /* risk of reading too far with wildcopy */
+            {
+				if (litSize > srcSize-3) return ERROR(corruption_detected);
+				memcpy(dctx->litBuffer, istart, litSize);
+				dctx->litPtr = dctx->litBuffer;
+				dctx->litBufSize = BLOCKSIZE;
+				dctx->litSize = litSize;
+				return litSize+3;
+			}
+			/* direct reference into compressed stream */
+            dctx->litPtr = istart+3;
+            dctx->litBufSize = srcSize-3;
+            dctx->litSize = litSize;
+            return litSize+3;
+        }
+    case IS_RLE:
+        {
+            const size_t litSize = (MEM_readLE32(istart) & 0xFFFFFF) >> 2;   /* no buffer issue : srcSize >= MIN_CBLOCK_SIZE */
+            if (litSize > BLOCKSIZE) return ERROR(corruption_detected);
+            memset(dctx->litBuffer, istart[3], litSize);
+            dctx->litPtr = dctx->litBuffer;
+            dctx->litBufSize = BLOCKSIZE;
+            dctx->litSize = litSize;
+            return 4;
+        }
+    }
+}
+
+
+static size_t ZSTD_decodeSeqHeaders(int* nbSeq, const BYTE** dumpsPtr, size_t* dumpsLengthPtr,
+                         FSE_DTable* DTableLL, FSE_DTable* DTableML, FSE_DTable* DTableOffb,
+                         const void* src, size_t srcSize)
+{
+    const BYTE* const istart = (const BYTE* const)src;
+    const BYTE* ip = istart;
+    const BYTE* const iend = istart + srcSize;
+    U32 LLtype, Offtype, MLtype;
+    U32 LLlog, Offlog, MLlog;
+    size_t dumpsLength;
+
+    /* check */
+    if (srcSize < 5) return ERROR(srcSize_wrong);
+
+    /* SeqHead */
+    *nbSeq = MEM_readLE16(ip); ip+=2;
+    LLtype  = *ip >> 6;
+    Offtype = (*ip >> 4) & 3;
+    MLtype  = (*ip >> 2) & 3;
+    if (*ip & 2)
+    {
+        dumpsLength  = ip[2];
+        dumpsLength += ip[1] << 8;
+        ip += 3;
+    }
+    else
+    {
+        dumpsLength  = ip[1];
+        dumpsLength += (ip[0] & 1) << 8;
+        ip += 2;
+    }
+    *dumpsPtr = ip;
+    ip += dumpsLength;
+    *dumpsLengthPtr = dumpsLength;
+
+    /* check */
+    if (ip > iend-3) return ERROR(srcSize_wrong); /* min : all 3 are "raw", hence no header, but at least xxLog bits per type */
+
+    /* sequences */
+    {
+        S16 norm[MaxML+1];    /* assumption : MaxML >= MaxLL and MaxOff */
+        size_t headerSize;
+
+        /* Build DTables */
+        switch(LLtype)
+        {
+        U32 max;
+        case bt_rle :
+            LLlog = 0;
+            FSE_buildDTable_rle(DTableLL, *ip++); break;
+        case bt_raw :
+            LLlog = LLbits;
+            FSE_buildDTable_raw(DTableLL, LLbits); break;
+        default :
+            max = MaxLL;
+            headerSize = FSE_readNCount(norm, &max, &LLlog, ip, iend-ip);
+            if (FSE_isError(headerSize)) return ERROR(GENERIC);
+            if (LLlog > LLFSELog) return ERROR(corruption_detected);
+            ip += headerSize;
+            FSE_buildDTable(DTableLL, norm, max, LLlog);
+        }
+
+        switch(Offtype)
+        {
+        U32 max;
+        case bt_rle :
+            Offlog = 0;
+            if (ip > iend-2) return ERROR(srcSize_wrong);   /* min : "raw", hence no header, but at least xxLog bits */
+            FSE_buildDTable_rle(DTableOffb, *ip++ & MaxOff); /* if *ip > MaxOff, data is corrupted */
+            break;
+        case bt_raw :
+            Offlog = Offbits;
+            FSE_buildDTable_raw(DTableOffb, Offbits); break;
+        default :
+            max = MaxOff;
+            headerSize = FSE_readNCount(norm, &max, &Offlog, ip, iend-ip);
+            if (FSE_isError(headerSize)) return ERROR(GENERIC);
+            if (Offlog > OffFSELog) return ERROR(corruption_detected);
+            ip += headerSize;
+            FSE_buildDTable(DTableOffb, norm, max, Offlog);
+        }
+
+        switch(MLtype)
+        {
+        U32 max;
+        case bt_rle :
+            MLlog = 0;
+            if (ip > iend-2) return ERROR(srcSize_wrong); /* min : "raw", hence no header, but at least xxLog bits */
+            FSE_buildDTable_rle(DTableML, *ip++); break;
+        case bt_raw :
+            MLlog = MLbits;
+            FSE_buildDTable_raw(DTableML, MLbits); break;
+        default :
+            max = MaxML;
+            headerSize = FSE_readNCount(norm, &max, &MLlog, ip, iend-ip);
+            if (FSE_isError(headerSize)) return ERROR(GENERIC);
+            if (MLlog > MLFSELog) return ERROR(corruption_detected);
+            ip += headerSize;
+            FSE_buildDTable(DTableML, norm, max, MLlog);
+        }
+    }
+
+    return ip-istart;
+}
+
+
+typedef struct {
+    size_t litLength;
+    size_t offset;
+    size_t matchLength;
+} seq_t;
+
+typedef struct {
+    BIT_DStream_t DStream;
+    FSE_DState_t stateLL;
+    FSE_DState_t stateOffb;
+    FSE_DState_t stateML;
+    size_t prevOffset;
+    const BYTE* dumps;
+    const BYTE* dumpsEnd;
+} seqState_t;
+
+
+static void ZSTD_decodeSequence(seq_t* seq, seqState_t* seqState)
+{
+    size_t litLength;
+    size_t prevOffset;
+    size_t offset;
+    size_t matchLength;
+    const BYTE* dumps = seqState->dumps;
+    const BYTE* const de = seqState->dumpsEnd;
+
+    /* Literal length */
+    litLength = FSE_decodeSymbol(&(seqState->stateLL), &(seqState->DStream));
+    prevOffset = litLength ? seq->offset : seqState->prevOffset;
+    seqState->prevOffset = seq->offset;
+    if (litLength == MaxLL)
+    {
+        U32 add = *dumps++;
+        if (add < 255) litLength += add;
+        else
+        {
+            litLength = MEM_readLE32(dumps) & 0xFFFFFF;  /* no pb : dumps is always followed by seq tables > 1 byte */
+            dumps += 3;
+        }
+        if (dumps >= de) dumps = de-1;   /* late correction, to avoid read overflow (data is now corrupted anyway) */
+    }
+
+    /* Offset */
+    {
+        static const size_t offsetPrefix[MaxOff+1] = {  /* note : size_t faster than U32 */
+                1 /*fake*/, 1, 2, 4, 8, 16, 32, 64, 128, 256,
+                512, 1024, 2048, 4096, 8192, 16384, 32768, 65536, 131072, 262144,
+                524288, 1048576, 2097152, 4194304, 8388608, 16777216, 33554432, /*fake*/ 1, 1, 1, 1, 1 };
+        U32 offsetCode, nbBits;
+        offsetCode = FSE_decodeSymbol(&(seqState->stateOffb), &(seqState->DStream));   /* <= maxOff, by table construction */
+        if (MEM_32bits()) BIT_reloadDStream(&(seqState->DStream));
+        nbBits = offsetCode - 1;
+        if (offsetCode==0) nbBits = 0;   /* cmove */
+        offset = offsetPrefix[offsetCode] + BIT_readBits(&(seqState->DStream), nbBits);
+        if (MEM_32bits()) BIT_reloadDStream(&(seqState->DStream));
+        if (offsetCode==0) offset = prevOffset;   /* cmove */
+    }
+
+    /* MatchLength */
+    matchLength = FSE_decodeSymbol(&(seqState->stateML), &(seqState->DStream));
+    if (matchLength == MaxML)
+    {
+        U32 add = *dumps++;
+        if (add < 255) matchLength += add;
+        else
+        {
+            matchLength = MEM_readLE32(dumps) & 0xFFFFFF;  /* no pb : dumps is always followed by seq tables > 1 byte */
+            dumps += 3;
+        }
+        if (dumps >= de) dumps = de-1;   /* late correction, to avoid read overflow (data is now corrupted anyway) */
+    }
+    matchLength += MINMATCH;
+
+    /* save result */
+    seq->litLength = litLength;
+    seq->offset = offset;
+    seq->matchLength = matchLength;
+    seqState->dumps = dumps;
+}
+
+
+static size_t ZSTD_execSequence(BYTE* op,
+                                seq_t sequence,
+                                const BYTE** litPtr, const BYTE* const litLimit,
+                                BYTE* const base, BYTE* const oend)
+{
+    static const int dec32table[] = {0, 1, 2, 1, 4, 4, 4, 4};   /* added */
+    static const int dec64table[] = {8, 8, 8, 7, 8, 9,10,11};   /* substracted */
+    const BYTE* const ostart = op;
+    BYTE* const oLitEnd = op + sequence.litLength;
+    BYTE* const oMatchEnd = op + sequence.litLength + sequence.matchLength;   /* risk : address space overflow (32-bits) */
+    BYTE* const oend_8 = oend-8;
+    const BYTE* const litEnd = *litPtr + sequence.litLength;
+
+    /* checks */
+    if (oLitEnd > oend_8) return ERROR(dstSize_tooSmall);   /* last match must start at a minimum distance of 8 from oend */
+    if (oMatchEnd > oend) return ERROR(dstSize_tooSmall);   /* overwrite beyond dst buffer */
+    if (litEnd > litLimit-8) return ERROR(corruption_detected);   /* overRead beyond lit buffer */
+
+    /* copy Literals */
+    ZSTD_wildcopy(op, *litPtr, sequence.litLength);   /* note : oLitEnd <= oend-8 : no risk of overwrite beyond oend */
+    op = oLitEnd;
+    *litPtr = litEnd;   /* update for next sequence */
+
+    /* copy Match */
+    {
+        const BYTE* match = op - sequence.offset;
+
+        /* check */
+        if (sequence.offset > (size_t)op) return ERROR(corruption_detected);   /* address space overflow test (this test seems kept by clang optimizer) */
+        //if (match > op) return ERROR(corruption_detected);   /* address space overflow test (is clang optimizer removing this test ?) */
+        if (match < base) return ERROR(corruption_detected);
+
+        /* close range match, overlap */
+        if (sequence.offset < 8)
+        {
+            const int dec64 = dec64table[sequence.offset];
+            op[0] = match[0];
+            op[1] = match[1];
+            op[2] = match[2];
+            op[3] = match[3];
+            match += dec32table[sequence.offset];
+            ZSTD_copy4(op+4, match);
+            match -= dec64;
+        }
+        else
+        {
+            ZSTD_copy8(op, match);
+        }
+        op += 8; match += 8;
+
+        if (oMatchEnd > oend-12)
+        {
+            if (op < oend_8)
+            {
+                ZSTD_wildcopy(op, match, oend_8 - op);
+                match += oend_8 - op;
+                op = oend_8;
+            }
+            while (op < oMatchEnd) *op++ = *match++;
+        }
+        else
+        {
+            ZSTD_wildcopy(op, match, sequence.matchLength-8);   /* works even if matchLength < 8 */
+        }
+    }
+
+    return oMatchEnd - ostart;
+}
+
+static size_t ZSTD_decompressSequences(
+                               void* ctx,
+                               void* dst, size_t maxDstSize,
+                         const void* seqStart, size_t seqSize)
+{
+    ZSTD_DCtx* dctx = (ZSTD_DCtx*)ctx;
+    const BYTE* ip = (const BYTE*)seqStart;
+    const BYTE* const iend = ip + seqSize;
+    BYTE* const ostart = (BYTE* const)dst;
+    BYTE* op = ostart;
+    BYTE* const oend = ostart + maxDstSize;
+    size_t errorCode, dumpsLength;
+    const BYTE* litPtr = dctx->litPtr;
+    const BYTE* const litMax = litPtr + dctx->litBufSize;
+    const BYTE* const litEnd = litPtr + dctx->litSize;
+    int nbSeq;
+    const BYTE* dumps;
+    U32* DTableLL = dctx->LLTable;
+    U32* DTableML = dctx->MLTable;
+    U32* DTableOffb = dctx->OffTable;
+    BYTE* const base = (BYTE*) (dctx->base);
+
+    /* Build Decoding Tables */
+    errorCode = ZSTD_decodeSeqHeaders(&nbSeq, &dumps, &dumpsLength,
+                                      DTableLL, DTableML, DTableOffb,
+                                      ip, iend-ip);
+    if (ZSTD_isError(errorCode)) return errorCode;
+    ip += errorCode;
+
+    /* Regen sequences */
+    {
+        seq_t sequence;
+        seqState_t seqState;
+
+        memset(&sequence, 0, sizeof(sequence));
+        seqState.dumps = dumps;
+        seqState.dumpsEnd = dumps + dumpsLength;
+        seqState.prevOffset = sequence.offset = 4;
+        errorCode = BIT_initDStream(&(seqState.DStream), ip, iend-ip);
+        if (ERR_isError(errorCode)) return ERROR(corruption_detected);
+        FSE_initDState(&(seqState.stateLL), &(seqState.DStream), DTableLL);
+        FSE_initDState(&(seqState.stateOffb), &(seqState.DStream), DTableOffb);
+        FSE_initDState(&(seqState.stateML), &(seqState.DStream), DTableML);
+
+        for ( ; (BIT_reloadDStream(&(seqState.DStream)) <= BIT_DStream_completed) && (nbSeq>0) ; )
+        {
+            size_t oneSeqSize;
+            nbSeq--;
+            ZSTD_decodeSequence(&sequence, &seqState);
+            oneSeqSize = ZSTD_execSequence(op, sequence, &litPtr, litMax, base, oend);
+            if (ZSTD_isError(oneSeqSize)) return oneSeqSize;
+            op += oneSeqSize;
+        }
+
+        /* check if reached exact end */
+        if ( !BIT_endOfDStream(&(seqState.DStream)) ) return ERROR(corruption_detected);   /* requested too much : data is corrupted */
+        if (nbSeq<0) return ERROR(corruption_detected);   /* requested too many sequences : data is corrupted */
+
+        /* last literal segment */
+        {
+            size_t lastLLSize = litEnd - litPtr;
+            if (litPtr > litEnd) return ERROR(corruption_detected);
+            if (op+lastLLSize > oend) return ERROR(dstSize_tooSmall);
+            if (op != litPtr) memmove(op, litPtr, lastLLSize);
+            op += lastLLSize;
+        }
+    }
+
+    return op-ostart;
+}
+
+
+static size_t ZSTD_decompressBlock(
+                            void* ctx,
+                            void* dst, size_t maxDstSize,
+                      const void* src, size_t srcSize)
+{
+    /* blockType == blockCompressed */
+    const BYTE* ip = (const BYTE*)src;
+
+    /* Decode literals sub-block */
+    size_t litCSize = ZSTD_decodeLiteralsBlock(ctx, src, srcSize);
+    if (ZSTD_isError(litCSize)) return litCSize;
+    ip += litCSize;
+    srcSize -= litCSize;
+
+    return ZSTD_decompressSequences(ctx, dst, maxDstSize, ip, srcSize);
+}
+
+
+static size_t ZSTD_decompressDCtx(void* ctx, void* dst, size_t maxDstSize, const void* src, size_t srcSize)
+{
+    const BYTE* ip = (const BYTE*)src;
+    const BYTE* iend = ip + srcSize;
+    BYTE* const ostart = (BYTE* const)dst;
+    BYTE* op = ostart;
+    BYTE* const oend = ostart + maxDstSize;
+    size_t remainingSize = srcSize;
+    U32 magicNumber;
+    blockProperties_t blockProperties;
+
+    /* Frame Header */
+    if (srcSize < ZSTD_frameHeaderSize+ZSTD_blockHeaderSize) return ERROR(srcSize_wrong);
+    magicNumber = MEM_readLE32(src);
+    if (magicNumber != ZSTD_magicNumber) return ERROR(prefix_unknown);
+    ip += ZSTD_frameHeaderSize; remainingSize -= ZSTD_frameHeaderSize;
+
+    /* Loop on each block */
+    while (1)
+    {
+        size_t decodedSize=0;
+        size_t cBlockSize = ZSTD_getcBlockSize(ip, iend-ip, &blockProperties);
+        if (ZSTD_isError(cBlockSize)) return cBlockSize;
+
+        ip += ZSTD_blockHeaderSize;
+        remainingSize -= ZSTD_blockHeaderSize;
+        if (cBlockSize > remainingSize) return ERROR(srcSize_wrong);
+
+        switch(blockProperties.blockType)
+        {
+        case bt_compressed:
+            decodedSize = ZSTD_decompressBlock(ctx, op, oend-op, ip, cBlockSize);
+            break;
+        case bt_raw :
+            decodedSize = ZSTD_copyUncompressedBlock(op, oend-op, ip, cBlockSize);
+            break;
+        case bt_rle :
+            return ERROR(GENERIC);   /* not yet supported */
+            break;
+        case bt_end :
+            /* end of frame */
+            if (remainingSize) return ERROR(srcSize_wrong);
+            break;
+        default:
+            return ERROR(GENERIC);   /* impossible */
+        }
+        if (cBlockSize == 0) break;   /* bt_end */
+
+        if (ZSTD_isError(decodedSize)) return decodedSize;
+        op += decodedSize;
+        ip += cBlockSize;
+        remainingSize -= cBlockSize;
+    }
+
+    return op-ostart;
+}
+
+static size_t ZSTD_decompress(void* dst, size_t maxDstSize, const void* src, size_t srcSize)
+{
+    ZSTD_DCtx ctx;
+    ctx.base = dst;
+    return ZSTD_decompressDCtx(&ctx, dst, maxDstSize, src, srcSize);
+}
+
+
+/*******************************
+*  Streaming Decompression API
+*******************************/
+
+static size_t ZSTD_resetDCtx(ZSTD_DCtx* dctx)
+{
+    dctx->expected = ZSTD_frameHeaderSize;
+    dctx->phase = 0;
+    dctx->previousDstEnd = NULL;
+    dctx->base = NULL;
+    return 0;
+}
+
+static ZSTD_DCtx* ZSTD_createDCtx(void)
+{
+    ZSTD_DCtx* dctx = (ZSTD_DCtx*)malloc(sizeof(ZSTD_DCtx));
+    if (dctx==NULL) return NULL;
+    ZSTD_resetDCtx(dctx);
+    return dctx;
+}
+
+static size_t ZSTD_freeDCtx(ZSTD_DCtx* dctx)
+{
+    free(dctx);
+    return 0;
+}
+
+static size_t ZSTD_nextSrcSizeToDecompress(ZSTD_DCtx* dctx)
+{
+    return dctx->expected;
+}
+
+static size_t ZSTD_decompressContinue(ZSTD_DCtx* ctx, void* dst, size_t maxDstSize, const void* src, size_t srcSize)
+{
+    /* Sanity check */
+    if (srcSize != ctx->expected) return ERROR(srcSize_wrong);
+    if (dst != ctx->previousDstEnd)  /* not contiguous */
+        ctx->base = dst;
+
+    /* Decompress : frame header */
+    if (ctx->phase == 0)
+    {
+        /* Check frame magic header */
+        U32 magicNumber = MEM_readLE32(src);
+        if (magicNumber != ZSTD_magicNumber) return ERROR(prefix_unknown);
+        ctx->phase = 1;
+        ctx->expected = ZSTD_blockHeaderSize;
+        return 0;
+    }
+
+    /* Decompress : block header */
+    if (ctx->phase == 1)
+    {
+        blockProperties_t bp;
+        size_t blockSize = ZSTD_getcBlockSize(src, ZSTD_blockHeaderSize, &bp);
+        if (ZSTD_isError(blockSize)) return blockSize;
+        if (bp.blockType == bt_end)
+        {
+            ctx->expected = 0;
+            ctx->phase = 0;
+        }
+        else
+        {
+            ctx->expected = blockSize;
+            ctx->bType = bp.blockType;
+            ctx->phase = 2;
+        }
+
+        return 0;
+    }
+
+    /* Decompress : block content */
+    {
+        size_t rSize;
+        switch(ctx->bType)
+        {
+        case bt_compressed:
+            rSize = ZSTD_decompressBlock(ctx, dst, maxDstSize, src, srcSize);
+            break;
+        case bt_raw :
+            rSize = ZSTD_copyUncompressedBlock(dst, maxDstSize, src, srcSize);
+            break;
+        case bt_rle :
+            return ERROR(GENERIC);   /* not yet handled */
+            break;
+        case bt_end :   /* should never happen (filtered at phase 1) */
+            rSize = 0;
+            break;
+        default:
+            return ERROR(GENERIC);
+        }
+        ctx->phase = 1;
+        ctx->expected = ZSTD_blockHeaderSize;
+        ctx->previousDstEnd = (void*)( ((char*)dst) + rSize);
+        return rSize;
+    }
+
+}
+
+
+/* wrapper layer */
+
+unsigned ZSTDv03_isError(size_t code)
+{
+	return ZSTD_isError(code);
+}
+
+size_t ZSTDv03_decompress( void* dst, size_t maxOriginalSize,
+                     const void* src, size_t compressedSize)
+{
+	return ZSTD_decompress(dst, maxOriginalSize, src, compressedSize);
+}
+
+ZSTDv03_Dctx* ZSTDv03_createDCtx(void)
+{
+	return (ZSTDv03_Dctx*)ZSTD_createDCtx();
+}
+
+size_t ZSTDv03_freeDCtx(ZSTDv03_Dctx* dctx)
+{
+	return ZSTD_freeDCtx((ZSTD_DCtx*)dctx);
+}
+
+size_t ZSTDv03_resetDCtx(ZSTDv03_Dctx* dctx)
+{
+	return ZSTD_resetDCtx((ZSTD_DCtx*)dctx);
+}
+
+size_t ZSTDv03_nextSrcSizeToDecompress(ZSTDv03_Dctx* dctx)
+{
+	return ZSTD_nextSrcSizeToDecompress((ZSTD_DCtx*)dctx);
+}
+
+size_t ZSTDv03_decompressContinue(ZSTDv03_Dctx* dctx, void* dst, size_t maxDstSize, const void* src, size_t srcSize)
+{
+	return ZSTD_decompressContinue((ZSTD_DCtx*)dctx, dst, maxDstSize, src, srcSize);
+}
diff --git a/lib/legacy/zstd_v03.h b/lib/legacy/zstd_v03.h
new file mode 100644
index 0000000..4546770
--- /dev/null
+++ b/lib/legacy/zstd_v03.h
@@ -0,0 +1,99 @@
+/*
+    zstd_v03 - decoder for 0.3 format
+    Header File
+    Copyright (C) 2015, Yann Collet.
+
+    BSD 2-Clause License (http://www.opensource.org/licenses/bsd-license.php)
+
+    Redistribution and use in source and binary forms, with or without
+    modification, are permitted provided that the following conditions are
+    met:
+    * Redistributions of source code must retain the above copyright
+    notice, this list of conditions and the following disclaimer.
+    * Redistributions in binary form must reproduce the above
+    copyright notice, this list of conditions and the following disclaimer
+    in the documentation and/or other materials provided with the
+    distribution.
+    THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+    "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+    LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+    A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+    OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+    SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+    LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+    DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+    THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+    (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+    OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+    You can contact the author at :
+    - zstd source repository : https://github.com/Cyan4973/zstd
+    - ztsd public forum : https://groups.google.com/forum/#!forum/lz4c
+*/
+#pragma once
+
+#if defined (__cplusplus)
+extern "C" {
+#endif
+
+/* *************************************
+*  Includes
+***************************************/
+#include <stddef.h>   /* size_t */
+
+
+/* *************************************
+*  Simple one-step function
+***************************************/
+/**
+ZSTDv03_decompress() : decompress ZSTD frames compliant with v0.3.x format
+    compressedSize : is the exact source size
+    maxOriginalSize : is the size of the 'dst' buffer, which must be already allocated.
+                      It must be equal or larger than originalSize, otherwise decompression will fail.
+    return : the number of bytes decompressed into destination buffer (originalSize)
+             or an errorCode if it fails (which can be tested using ZSTDv01_isError())
+*/
+size_t ZSTDv03_decompress( void* dst, size_t maxOriginalSize,
+                     const void* src, size_t compressedSize);
+
+/**
+ZSTDv03_isError() : tells if the result of ZSTDv03_decompress() is an error
+*/
+unsigned ZSTDv03_isError(size_t code);
+
+
+/* *************************************
+*  Advanced functions
+***************************************/
+typedef struct ZSTDv03_Dctx_s ZSTDv03_Dctx;
+ZSTDv03_Dctx* ZSTDv03_createDCtx(void);
+size_t ZSTDv03_freeDCtx(ZSTDv03_Dctx* dctx);
+
+size_t ZSTDv03_decompressDCtx(void* ctx,
+                              void* dst, size_t maxOriginalSize,
+                        const void* src, size_t compressedSize);
+
+/* *************************************
+*  Streaming functions
+***************************************/
+size_t ZSTDv03_resetDCtx(ZSTDv03_Dctx* dctx);
+
+size_t ZSTDv03_nextSrcSizeToDecompress(ZSTDv03_Dctx* dctx);
+size_t ZSTDv03_decompressContinue(ZSTDv03_Dctx* dctx, void* dst, size_t maxDstSize, const void* src, size_t srcSize);
+/**
+  Use above functions alternatively.
+  ZSTD_nextSrcSizeToDecompress() tells how much bytes to provide as 'srcSize' to ZSTD_decompressContinue().
+  ZSTD_decompressContinue() will use previous data blocks to improve compression if they are located prior to current block.
+  Result is the number of bytes regenerated within 'dst'.
+  It can be zero, which is not an error; it just means ZSTD_decompressContinue() has decoded some header.
+*/
+
+/* *************************************
+*  Prefix - version detection
+***************************************/
+#define ZSTDv03_magicNumber 0xFD2FB523   /* v0.3 */
+
+
+#if defined (__cplusplus)
+}
+#endif
diff --git a/lib/libzstd.pc.in b/lib/libzstd.pc.in
new file mode 100644
index 0000000..28afc3a
--- /dev/null
+++ b/lib/libzstd.pc.in
@@ -0,0 +1,14 @@
+#   ZSTD - standard compression algorithm
+#   Copyright (C) 2014-2015, Yann Collet.
+#   BSD 2-Clause License (http://www.opensource.org/licenses/bsd-license.php)
+
+prefix=@PREFIX@
+libdir=@LIBDIR@
+includedir=@INCLUDEDIR@
+
+Name: zstd
+Description: lossless compression algorithm library
+URL: https://github.com/Cyan4973/zstd
+Version: @VERSION@
+Libs: -L at LIBDIR@ -lzstd
+Cflags: -I at INCLUDEDIR@
diff --git a/lib/mem.h b/lib/mem.h
new file mode 100644
index 0000000..8ac56ed
--- /dev/null
+++ b/lib/mem.h
@@ -0,0 +1,289 @@
+/* ******************************************************************
+   mem.h
+   low-level memory access routines
+   Copyright (C) 2013-2015, Yann Collet.
+
+   BSD 2-Clause License (http://www.opensource.org/licenses/bsd-license.php)
+
+   Redistribution and use in source and binary forms, with or without
+   modification, are permitted provided that the following conditions are
+   met:
+
+       * Redistributions of source code must retain the above copyright
+   notice, this list of conditions and the following disclaimer.
+       * Redistributions in binary form must reproduce the above
+   copyright notice, this list of conditions and the following disclaimer
+   in the documentation and/or other materials provided with the
+   distribution.
+
+   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+    You can contact the author at :
+    - FSE source repository : https://github.com/Cyan4973/FiniteStateEntropy
+    - Public forum : https://groups.google.com/forum/#!forum/lz4c
+****************************************************************** */
+#ifndef MEM_H_MODULE
+#define MEM_H_MODULE
+
+#if defined (__cplusplus)
+extern "C" {
+#endif
+
+/******************************************
+*  Includes
+******************************************/
+#include <stddef.h>    /* size_t, ptrdiff_t */
+#include <string.h>    /* memcpy */
+
+
+/******************************************
+*  Compiler-specific
+******************************************/
+#if defined(__GNUC__)
+#  define MEM_STATIC static __attribute__((unused))
+#elif defined (__cplusplus) || (defined (__STDC_VERSION__) && (__STDC_VERSION__ >= 199901L) /* C99 */)
+#  define MEM_STATIC static inline
+#elif defined(_MSC_VER)
+#  define MEM_STATIC static __inline
+#else
+#  define MEM_STATIC static  /* this version may generate warnings for unused static functions; disable the relevant warning */
+#endif
+
+
+/****************************************************************
+*  Basic Types
+*****************************************************************/
+#if defined (__cplusplus) || (defined (__STDC_VERSION__) && (__STDC_VERSION__ >= 199901L) /* C99 */)
+# include <stdint.h>
+  typedef  uint8_t BYTE;
+  typedef uint16_t U16;
+  typedef  int16_t S16;
+  typedef uint32_t U32;
+  typedef  int32_t S32;
+  typedef uint64_t U64;
+  typedef  int64_t S64;
+#else
+  typedef unsigned char       BYTE;
+  typedef unsigned short      U16;
+  typedef   signed short      S16;
+  typedef unsigned int        U32;
+  typedef   signed int        S32;
+  typedef unsigned long long  U64;
+  typedef   signed long long  S64;
+#endif
+
+
+/****************************************************************
+*  Memory I/O
+*****************************************************************/
+/* MEM_FORCE_MEMORY_ACCESS
+ * By default, access to unaligned memory is controlled by `memcpy()`, which is safe and portable.
+ * Unfortunately, on some target/compiler combinations, the generated assembly is sub-optimal.
+ * The below switch allow to select different access method for improved performance.
+ * Method 0 (default) : use `memcpy()`. Safe and portable.
+ * Method 1 : `__packed` statement. It depends on compiler extension (ie, not portable).
+ *            This method is safe if your compiler supports it, and *generally* as fast or faster than `memcpy`.
+ * Method 2 : direct access. This method is portable but violate C standard.
+ *            It can generate buggy code on targets generating assembly depending on alignment.
+ *            But in some circumstances, it's the only known way to get the most performance (ie GCC + ARMv6)
+ * See http://fastcompression.blogspot.fr/2015/08/accessing-unaligned-memory.html for details.
+ * Prefer these methods in priority order (0 > 1 > 2)
+ */
+#ifndef MEM_FORCE_MEMORY_ACCESS   /* can be defined externally, on command line for example */
+#  if defined(__GNUC__) && ( defined(__ARM_ARCH_6__) || defined(__ARM_ARCH_6J__) || defined(__ARM_ARCH_6K__) || defined(__ARM_ARCH_6Z__) || defined(__ARM_ARCH_6ZK__) || defined(__ARM_ARCH_6T2__) )
+#    define MEM_FORCE_MEMORY_ACCESS 2
+#  elif defined(__INTEL_COMPILER) || \
+  (defined(__GNUC__) && ( defined(__ARM_ARCH_7__) || defined(__ARM_ARCH_7A__) || defined(__ARM_ARCH_7R__) || defined(__ARM_ARCH_7M__) || defined(__ARM_ARCH_7S__) ))
+#    define MEM_FORCE_MEMORY_ACCESS 1
+#  endif
+#endif
+
+MEM_STATIC unsigned MEM_32bits(void) { return sizeof(void*)==4; }
+MEM_STATIC unsigned MEM_64bits(void) { return sizeof(void*)==8; }
+
+MEM_STATIC unsigned MEM_isLittleEndian(void)
+{
+    const union { U32 u; BYTE c[4]; } one = { 1 };   /* don't use static : performance detrimental  */
+    return one.c[0];
+}
+
+#if defined(MEM_FORCE_MEMORY_ACCESS) && (MEM_FORCE_MEMORY_ACCESS==2)
+
+/* violates C standard on structure alignment.
+Only use if no other choice to achieve best performance on target platform */
+MEM_STATIC U16 MEM_read16(const void* memPtr) { return *(const U16*) memPtr; }
+MEM_STATIC U32 MEM_read32(const void* memPtr) { return *(const U32*) memPtr; }
+MEM_STATIC U64 MEM_read64(const void* memPtr) { return *(const U64*) memPtr; }
+
+MEM_STATIC void MEM_write16(void* memPtr, U16 value) { *(U16*)memPtr = value; }
+MEM_STATIC void MEM_write32(void* memPtr, U32 value) { *(U32*)memPtr = value; }
+MEM_STATIC void MEM_write64(void* memPtr, U64 value) { *(U64*)memPtr = value; }
+
+#elif defined(MEM_FORCE_MEMORY_ACCESS) && (MEM_FORCE_MEMORY_ACCESS==1)
+
+/* __pack instructions are safer, but compiler specific, hence potentially problematic for some compilers */
+/* currently only defined for gcc and icc */
+typedef union { U16 u16; U32 u32; U64 u64; } __attribute__((packed)) unalign;
+
+MEM_STATIC U16 MEM_read16(const void* ptr) { return ((const unalign*)ptr)->u16; }
+MEM_STATIC U32 MEM_read32(const void* ptr) { return ((const unalign*)ptr)->u32; }
+MEM_STATIC U64 MEM_read64(const void* ptr) { return ((const unalign*)ptr)->u64; }
+
+MEM_STATIC void MEM_write16(void* memPtr, U16 value) { ((unalign*)memPtr)->u16 = value; }
+MEM_STATIC void MEM_write32(void* memPtr, U32 value) { ((unalign*)memPtr)->u32 = value; }
+MEM_STATIC void MEM_write64(void* memPtr, U64 value) { ((unalign*)memPtr)->u64 = value; }
+
+#else
+
+/* default method, safe and standard.
+   can sometimes prove slower */
+
+MEM_STATIC U16 MEM_read16(const void* memPtr)
+{
+    U16 val; memcpy(&val, memPtr, sizeof(val)); return val;
+}
+
+MEM_STATIC U32 MEM_read32(const void* memPtr)
+{
+    U32 val; memcpy(&val, memPtr, sizeof(val)); return val;
+}
+
+MEM_STATIC U64 MEM_read64(const void* memPtr)
+{
+    U64 val; memcpy(&val, memPtr, sizeof(val)); return val;
+}
+
+MEM_STATIC void MEM_write16(void* memPtr, U16 value)
+{
+    memcpy(memPtr, &value, sizeof(value));
+}
+
+MEM_STATIC void MEM_write32(void* memPtr, U32 value)
+{
+    memcpy(memPtr, &value, sizeof(value));
+}
+
+MEM_STATIC void MEM_write64(void* memPtr, U64 value)
+{
+    memcpy(memPtr, &value, sizeof(value));
+}
+
+#endif // MEM_FORCE_MEMORY_ACCESS
+
+
+MEM_STATIC U16 MEM_readLE16(const void* memPtr)
+{
+    if (MEM_isLittleEndian())
+        return MEM_read16(memPtr);
+    else
+    {
+        const BYTE* p = (const BYTE*)memPtr;
+        return (U16)(p[0] + (p[1]<<8));
+    }
+}
+
+MEM_STATIC void MEM_writeLE16(void* memPtr, U16 val)
+{
+    if (MEM_isLittleEndian())
+    {
+        MEM_write16(memPtr, val);
+    }
+    else
+    {
+        BYTE* p = (BYTE*)memPtr;
+        p[0] = (BYTE)val;
+        p[1] = (BYTE)(val>>8);
+    }
+}
+
+MEM_STATIC U32 MEM_readLE32(const void* memPtr)
+{
+    if (MEM_isLittleEndian())
+        return MEM_read32(memPtr);
+    else
+    {
+        const BYTE* p = (const BYTE*)memPtr;
+        return (U32)((U32)p[0] + ((U32)p[1]<<8) + ((U32)p[2]<<16) + ((U32)p[3]<<24));
+    }
+}
+
+MEM_STATIC void MEM_writeLE32(void* memPtr, U32 val32)
+{
+    if (MEM_isLittleEndian())
+    {
+        MEM_write32(memPtr, val32);
+    }
+    else
+    {
+        BYTE* p = (BYTE*)memPtr;
+        p[0] = (BYTE)val32;
+        p[1] = (BYTE)(val32>>8);
+        p[2] = (BYTE)(val32>>16);
+        p[3] = (BYTE)(val32>>24);
+    }
+}
+
+MEM_STATIC U64 MEM_readLE64(const void* memPtr)
+{
+    if (MEM_isLittleEndian())
+        return MEM_read64(memPtr);
+    else
+    {
+        const BYTE* p = (const BYTE*)memPtr;
+        return (U64)((U64)p[0] + ((U64)p[1]<<8) + ((U64)p[2]<<16) + ((U64)p[3]<<24)
+                     + ((U64)p[4]<<32) + ((U64)p[5]<<40) + ((U64)p[6]<<48) + ((U64)p[7]<<56));
+    }
+}
+
+MEM_STATIC void MEM_writeLE64(void* memPtr, U64 val64)
+{
+    if (MEM_isLittleEndian())
+    {
+        MEM_write64(memPtr, val64);
+    }
+    else
+    {
+        BYTE* p = (BYTE*)memPtr;
+        p[0] = (BYTE)val64;
+        p[1] = (BYTE)(val64>>8);
+        p[2] = (BYTE)(val64>>16);
+        p[3] = (BYTE)(val64>>24);
+        p[4] = (BYTE)(val64>>32);
+        p[5] = (BYTE)(val64>>40);
+        p[6] = (BYTE)(val64>>48);
+        p[7] = (BYTE)(val64>>56);
+    }
+}
+
+MEM_STATIC size_t MEM_readLEST(const void* memPtr)
+{
+    if (MEM_32bits())
+        return (size_t)MEM_readLE32(memPtr);
+    else
+        return (size_t)MEM_readLE64(memPtr);
+}
+
+MEM_STATIC void MEM_writeLEST(void* memPtr, size_t val)
+{
+    if (MEM_32bits())
+        MEM_writeLE32(memPtr, (U32)val);
+    else
+        MEM_writeLE64(memPtr, (U64)val);
+}
+
+#if defined (__cplusplus)
+}
+#endif
+
+#endif /* MEM_H_MODULE */
+
diff --git a/lib/zstd.c b/lib/zstd.c
new file mode 100644
index 0000000..a76d111
--- /dev/null
+++ b/lib/zstd.c
@@ -0,0 +1,1462 @@
+/*
+    zstd - standard compression library
+    Copyright (C) 2014-2015, Yann Collet.
+
+    BSD 2-Clause License (http://www.opensource.org/licenses/bsd-license.php)
+
+    Redistribution and use in source and binary forms, with or without
+    modification, are permitted provided that the following conditions are
+    met:
+    * Redistributions of source code must retain the above copyright
+    notice, this list of conditions and the following disclaimer.
+    * Redistributions in binary form must reproduce the above
+    copyright notice, this list of conditions and the following disclaimer
+    in the documentation and/or other materials provided with the
+    distribution.
+    THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+    "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+    LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+    A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+    OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+    SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+    LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+    DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+    THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+    (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+    OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+    You can contact the author at :
+    - zstd source repository : https://github.com/Cyan4973/zstd
+    - ztsd public forum : https://groups.google.com/forum/#!forum/lz4c
+*/
+
+/* ***************************************************************
+*  Tuning parameters
+*****************************************************************/
+/*!
+*  MEMORY_USAGE :
+*  Memory usage formula : N->2^N Bytes (examples : 10 -> 1KB; 12 -> 4KB ; 16 -> 64KB; 20 -> 1MB; etc.)
+*  Increasing memory usage improves compression ratio
+*  Reduced memory usage can improve speed, due to cache effect
+*/
+#define ZSTD_MEMORY_USAGE 16
+
+/*!
+ * HEAPMODE :
+ * Select how default compression functions will allocate memory for their hash table,
+ * in memory stack (0, fastest), or in memory heap (1, requires malloc())
+ * Note that compression context is fairly large, as a consequence heap memory is recommended.
+ */
+#ifndef ZSTD_HEAPMODE
+#  define ZSTD_HEAPMODE 1
+#endif /* ZSTD_HEAPMODE */
+
+/*!
+*  LEGACY_SUPPORT :
+*  decompressor can decode older formats (starting from Zstd 0.1+)
+*/
+#ifndef ZSTD_LEGACY_SUPPORT
+#  define ZSTD_LEGACY_SUPPORT 1
+#endif
+
+
+/* *******************************************************
+*  Includes
+*********************************************************/
+#include <stdlib.h>      /* calloc */
+#include <string.h>      /* memcpy, memmove */
+#include <stdio.h>       /* debug : printf */
+#include "mem.h"         /* low level memory routines */
+#include "zstd_static.h"
+#include "zstd_internal.h"
+#include "fse_static.h"
+#include "huff0.h"
+
+#if defined(ZSTD_LEGACY_SUPPORT) && (ZSTD_LEGACY_SUPPORT==1)
+#  include "zstd_legacy.h"
+#endif
+
+
+/* *******************************************************
+*  Compiler specifics
+*********************************************************/
+#ifdef __AVX2__
+#  include <immintrin.h>   /* AVX2 intrinsics */
+#endif
+
+#ifdef _MSC_VER    /* Visual Studio */
+#  define FORCE_INLINE static __forceinline
+#  include <intrin.h>                    /* For Visual 2005 */
+#  pragma warning(disable : 4127)        /* disable: C4127: conditional expression is constant */
+#  pragma warning(disable : 4324)        /* disable: C4324: padded structure */
+#else
+#  define GCC_VERSION (__GNUC__ * 100 + __GNUC_MINOR__)
+#  ifdef __GNUC__
+#    define FORCE_INLINE static inline __attribute__((always_inline))
+#  else
+#    define FORCE_INLINE static inline
+#  endif
+#endif
+
+
+/* *******************************************************
+*  Constants
+*********************************************************/
+#define HASH_LOG (ZSTD_MEMORY_USAGE - 2)
+#define HASH_TABLESIZE (1 << HASH_LOG)
+#define HASH_MASK (HASH_TABLESIZE - 1)
+
+#define KNUTH 2654435761
+
+#define BIT7 128
+#define BIT6  64
+#define BIT5  32
+#define BIT4  16
+#define BIT1   2
+#define BIT0   1
+
+#define KB *(1 <<10)
+#define MB *(1 <<20)
+#define GB *(1U<<30)
+
+#define BLOCKSIZE (128 KB)                 /* define, for static allocation */
+#define IS_RAW BIT0
+#define IS_RLE BIT1
+
+static const U32 g_maxDistance = 4 * BLOCKSIZE;
+static const U32 g_maxLimit = 1 GB;
+
+#define WORKPLACESIZE (BLOCKSIZE*3)
+#define MINMATCH 4
+#define LitFSELog  11
+#define MLFSELog   10
+#define LLFSELog   10
+#define OffFSELog   9
+#define MAX(a,b) ((a)<(b)?(b):(a))
+#define MaxSeq MAX(MaxLL, MaxML)
+
+#define LITERAL_NOENTROPY 63
+#define COMMAND_NOENTROPY 7   /* to remove */
+
+static const size_t ZSTD_blockHeaderSize = 3;
+static const size_t ZSTD_frameHeaderSize = 4;
+
+
+/* *******************************************************
+*  Memory operations
+**********************************************************/
+static void ZSTD_copy4(void* dst, const void* src) { memcpy(dst, src, 4); }
+
+
+/* **************************************
+*  Local structures
+****************************************/
+void ZSTD_resetSeqStore(seqStore_t* ssPtr)
+{
+    ssPtr->offset = ssPtr->offsetStart;
+    ssPtr->lit = ssPtr->litStart;
+    ssPtr->litLength = ssPtr->litLengthStart;
+    ssPtr->matchLength = ssPtr->matchLengthStart;
+    ssPtr->dumps = ssPtr->dumpsStart;
+}
+
+struct ZSTD_CCtx_s
+{
+    const BYTE* base;
+    U32 current;
+    U32 nextUpdate;
+    seqStore_t seqStore;
+#ifdef __AVX2__
+    __m256i hashTable[HASH_TABLESIZE>>3];
+#else
+    U32 hashTable[HASH_TABLESIZE];
+#endif
+    BYTE buffer[WORKPLACESIZE];
+};
+
+
+void ZSTD_resetCCtx(ZSTD_CCtx* ctx)
+{
+    ctx->base = NULL;
+    ctx->seqStore.buffer = ctx->buffer;
+    ctx->seqStore.offsetStart = (U32*) (ctx->seqStore.buffer);
+    ctx->seqStore.offCodeStart = (BYTE*) (ctx->seqStore.offsetStart + (BLOCKSIZE>>2));
+    ctx->seqStore.litStart = ctx->seqStore.offCodeStart + (BLOCKSIZE>>2);
+    ctx->seqStore.litLengthStart =  ctx->seqStore.litStart + BLOCKSIZE;
+    ctx->seqStore.matchLengthStart = ctx->seqStore.litLengthStart + (BLOCKSIZE>>2);
+    ctx->seqStore.dumpsStart = ctx->seqStore.matchLengthStart + (BLOCKSIZE>>2);
+    memset(ctx->hashTable, 0, sizeof(ctx->hashTable));
+}
+
+ZSTD_CCtx* ZSTD_createCCtx(void)
+{
+    ZSTD_CCtx* ctx = (ZSTD_CCtx*) malloc( sizeof(ZSTD_CCtx) );
+    if (ctx==NULL) return NULL;
+    ZSTD_resetCCtx(ctx);
+    return ctx;
+}
+
+size_t ZSTD_freeCCtx(ZSTD_CCtx* ctx)
+{
+    free(ctx);
+    return 0;
+}
+
+
+/* *************************************
+*  Error Management
+***************************************/
+/*! ZSTD_isError
+*   tells if a return value is an error code */
+unsigned ZSTD_isError(size_t code) { return ERR_isError(code); }
+
+/*! ZSTD_getErrorName
+*   provides error code string (useful for debugging) */
+const char* ZSTD_getErrorName(size_t code) { return ERR_getErrorName(code); }
+
+
+/* *************************************
+*  Tool functions
+***************************************/
+unsigned ZSTD_versionNumber (void) { return ZSTD_VERSION_NUMBER; }
+
+
+/* *******************************************************
+*  Compression
+*********************************************************/
+size_t ZSTD_compressBound(size_t srcSize)   /* maximum compressed size */
+{
+    return FSE_compressBound(srcSize) + 12;
+}
+
+
+size_t ZSTD_noCompressBlock (void* dst, size_t maxDstSize, const void* src, size_t srcSize)
+{
+    BYTE* const ostart = (BYTE* const)dst;
+
+    if (srcSize + ZSTD_blockHeaderSize > maxDstSize) return ERROR(dstSize_tooSmall);
+    memcpy(ostart + ZSTD_blockHeaderSize, src, srcSize);
+
+    /* Build header */
+    ostart[0]  = (BYTE)(srcSize>>16);
+    ostart[1]  = (BYTE)(srcSize>>8);
+    ostart[2]  = (BYTE) srcSize;
+    ostart[0] += (BYTE)(bt_raw<<6);   /* is a raw (uncompressed) block */
+
+    return ZSTD_blockHeaderSize+srcSize;
+}
+
+
+static size_t ZSTD_compressRawLiteralsBlock (void* dst, size_t maxDstSize, const void* src, size_t srcSize)
+{
+    BYTE* const ostart = (BYTE* const)dst;
+
+    if (srcSize + 3 > maxDstSize) return ERROR(dstSize_tooSmall);
+
+    MEM_writeLE32(dst, ((U32)srcSize << 2) | IS_RAW);
+    memcpy(ostart + 3, src, srcSize);
+    return srcSize + 3;
+}
+
+static size_t ZSTD_compressRleLiteralsBlock (void* dst, size_t maxDstSize, const void* src, size_t srcSize)
+{
+    BYTE* const ostart = (BYTE* const)dst;
+
+    (void)maxDstSize;
+    MEM_writeLE32(dst, ((U32)srcSize << 2) | IS_RLE);  /* note : maxDstSize > litHeaderSize > 4 */
+    ostart[3] = *(const BYTE*)src;
+    return 4;
+}
+
+size_t ZSTD_minGain(size_t srcSize) { return (srcSize >> 6) + 1; }
+
+static size_t ZSTD_compressLiterals (void* dst, size_t maxDstSize,
+                               const void* src, size_t srcSize)
+{
+    const size_t minGain = ZSTD_minGain(srcSize);
+    BYTE* const ostart = (BYTE*)dst;
+    size_t hsize;
+    static const size_t litHeaderSize = 5;
+
+    if (maxDstSize < litHeaderSize+1) return ERROR(dstSize_tooSmall);   /* not enough space for compression */
+
+    hsize = HUF_compress(ostart+litHeaderSize, maxDstSize-litHeaderSize, src, srcSize);
+
+    if ((hsize==0) || (hsize >= srcSize - minGain)) return ZSTD_compressRawLiteralsBlock(dst, maxDstSize, src, srcSize);
+    if (hsize==1) return ZSTD_compressRleLiteralsBlock(dst, maxDstSize, src, srcSize);
+
+    /* Build header */
+    {
+        ostart[0]  = (BYTE)(srcSize << 2); /* is a block, is compressed */
+        ostart[1]  = (BYTE)(srcSize >> 6);
+        ostart[2]  = (BYTE)(srcSize >>14);
+        ostart[2] += (BYTE)(hsize << 5);
+        ostart[3]  = (BYTE)(hsize >> 3);
+        ostart[4]  = (BYTE)(hsize >>11);
+    }
+
+    return hsize+litHeaderSize;
+}
+
+
+size_t ZSTD_compressSequences(BYTE* dst, size_t maxDstSize,
+                        const seqStore_t* seqStorePtr,
+                              size_t srcSize)
+{
+    U32 count[MaxSeq+1];
+    S16 norm[MaxSeq+1];
+    size_t mostFrequent;
+    U32 max = 255;
+    U32 tableLog = 11;
+    U32 CTable_LitLength  [FSE_CTABLE_SIZE_U32(LLFSELog, MaxLL )];
+    U32 CTable_OffsetBits [FSE_CTABLE_SIZE_U32(OffFSELog,MaxOff)];
+    U32 CTable_MatchLength[FSE_CTABLE_SIZE_U32(MLFSELog, MaxML )];
+    U32 LLtype, Offtype, MLtype;   /* compressed, raw or rle */
+    const BYTE* const op_lit_start = seqStorePtr->litStart;
+    const BYTE* const llTable = seqStorePtr->litLengthStart;
+    const BYTE* const llPtr = seqStorePtr->litLength;
+    const BYTE* const mlTable = seqStorePtr->matchLengthStart;
+    const U32*  const offsetTable = seqStorePtr->offsetStart;
+    BYTE* const offCodeTable = seqStorePtr->offCodeStart;
+    BYTE* op = dst;
+    BYTE* const oend = dst + maxDstSize;
+    const size_t nbSeq = llPtr - llTable;
+    const size_t minGain = ZSTD_minGain(srcSize);
+    const size_t maxCSize = srcSize - minGain;
+    BYTE* seqHead;
+
+
+    /* Compress literals */
+    {
+        size_t cSize;
+        size_t litSize = seqStorePtr->lit - op_lit_start;
+
+        if (litSize <= LITERAL_NOENTROPY)
+            cSize = ZSTD_compressRawLiteralsBlock(op, maxDstSize, op_lit_start, litSize);
+        else
+            cSize = ZSTD_compressLiterals(op, maxDstSize, op_lit_start, litSize);
+        if (ZSTD_isError(cSize)) return cSize;
+        op += cSize;
+    }
+
+    /* Sequences Header */
+    if ((oend-op) < MIN_SEQUENCES_SIZE)
+        return ERROR(dstSize_tooSmall);
+    MEM_writeLE16(op, (U16)nbSeq); op+=2;
+    seqHead = op;
+
+    /* dumps : contains too large lengths */
+    {
+        size_t dumpsLength = seqStorePtr->dumps - seqStorePtr->dumpsStart;
+        if (dumpsLength < 512)
+        {
+            op[0] = (BYTE)(dumpsLength >> 8);
+            op[1] = (BYTE)(dumpsLength);
+            op += 2;
+        }
+        else
+        {
+            op[0] = 2;
+            op[1] = (BYTE)(dumpsLength>>8);
+            op[2] = (BYTE)(dumpsLength);
+            op += 3;
+        }
+        if ((size_t)(oend-op) < dumpsLength+6) return ERROR(dstSize_tooSmall);
+        memcpy(op, seqStorePtr->dumpsStart, dumpsLength);
+        op += dumpsLength;
+    }
+
+    /* CTable for Literal Lengths */
+    max = MaxLL;
+    mostFrequent = FSE_countFast(count, &max, seqStorePtr->litLengthStart, nbSeq);
+    if ((mostFrequent == nbSeq) && (nbSeq > 2))
+    {
+        *op++ = *(seqStorePtr->litLengthStart);
+        FSE_buildCTable_rle(CTable_LitLength, (BYTE)max);
+        LLtype = bt_rle;
+    }
+    else if ((nbSeq < 64) || (mostFrequent < (nbSeq >> (LLbits-1))))
+    {
+        FSE_buildCTable_raw(CTable_LitLength, LLbits);
+        LLtype = bt_raw;
+    }
+    else
+    {
+        size_t NCountSize;
+        tableLog = FSE_optimalTableLog(LLFSELog, nbSeq, max);
+        FSE_normalizeCount(norm, tableLog, count, nbSeq, max);
+        NCountSize = FSE_writeNCount(op, oend-op, norm, max, tableLog);   /* overflow protected */
+        if (FSE_isError(NCountSize)) return ERROR(GENERIC);
+        op += NCountSize;
+        FSE_buildCTable(CTable_LitLength, norm, max, tableLog);
+        LLtype = bt_compressed;
+    }
+
+    /* CTable for Offsets codes */
+    {
+        /* create Offset codes */
+        size_t i;
+        max = MaxOff;
+        for (i=0; i<nbSeq; i++)
+        {
+            offCodeTable[i] = (BYTE)ZSTD_highbit(offsetTable[i]) + 1;
+            if (offsetTable[i]==0) offCodeTable[i]=0;
+        }
+        mostFrequent = FSE_countFast(count, &max, offCodeTable, nbSeq);
+    }
+    if ((mostFrequent == nbSeq) && (nbSeq > 2))
+    {
+        *op++ = *offCodeTable;
+        FSE_buildCTable_rle(CTable_OffsetBits, (BYTE)max);
+        Offtype = bt_rle;
+    }
+    else if ((nbSeq < 64) || (mostFrequent < (nbSeq >> (Offbits-1))))
+    {
+        FSE_buildCTable_raw(CTable_OffsetBits, Offbits);
+        Offtype = bt_raw;
+    }
+    else
+    {
+        size_t NCountSize;
+        tableLog = FSE_optimalTableLog(OffFSELog, nbSeq, max);
+        FSE_normalizeCount(norm, tableLog, count, nbSeq, max);
+        NCountSize = FSE_writeNCount(op, oend-op, norm, max, tableLog);   /* overflow protected */
+        if (FSE_isError(NCountSize)) return ERROR(GENERIC);
+        op += NCountSize;
+        FSE_buildCTable(CTable_OffsetBits, norm, max, tableLog);
+        Offtype = bt_compressed;
+    }
+
+    /* CTable for MatchLengths */
+    max = MaxML;
+    mostFrequent = FSE_countFast(count, &max, seqStorePtr->matchLengthStart, nbSeq);
+    if ((mostFrequent == nbSeq) && (nbSeq > 2))
+    {
+        *op++ = *seqStorePtr->matchLengthStart;
+        FSE_buildCTable_rle(CTable_MatchLength, (BYTE)max);
+        MLtype = bt_rle;
+    }
+    else if ((nbSeq < 64) || (mostFrequent < (nbSeq >> (MLbits-1))))
+    {
+        FSE_buildCTable_raw(CTable_MatchLength, MLbits);
+        MLtype = bt_raw;
+    }
+    else
+    {
+        size_t NCountSize;
+        tableLog = FSE_optimalTableLog(MLFSELog, nbSeq, max);
+        FSE_normalizeCount(norm, tableLog, count, nbSeq, max);
+        NCountSize = FSE_writeNCount(op, oend-op, norm, max, tableLog);   /* overflow protected */
+        if (FSE_isError(NCountSize)) return ERROR(GENERIC);
+        op += NCountSize;
+        FSE_buildCTable(CTable_MatchLength, norm, max, tableLog);
+        MLtype = bt_compressed;
+    }
+
+    seqHead[0] += (BYTE)((LLtype<<6) + (Offtype<<4) + (MLtype<<2));
+
+    /* Encoding Sequences */
+    {
+        size_t streamSize, errorCode;
+        BIT_CStream_t blockStream;
+        FSE_CState_t stateMatchLength;
+        FSE_CState_t stateOffsetBits;
+        FSE_CState_t stateLitLength;
+        int i;
+
+        errorCode = BIT_initCStream(&blockStream, op, oend-op);
+        if (ERR_isError(errorCode)) return ERROR(dstSize_tooSmall);   /* not enough space remaining */
+        FSE_initCState(&stateMatchLength, CTable_MatchLength);
+        FSE_initCState(&stateOffsetBits, CTable_OffsetBits);
+        FSE_initCState(&stateLitLength, CTable_LitLength);
+
+        for (i=(int)nbSeq-1; i>=0; i--)
+        {
+            BYTE matchLength = mlTable[i];
+            U32  offset = offsetTable[i];
+            BYTE offCode = offCodeTable[i];                                 /* 32b*/  /* 64b*/
+            U32 nbBits = (offCode-1) * (!!offCode);
+            BYTE litLength = llTable[i];                                    /* (7)*/  /* (7)*/
+            FSE_encodeSymbol(&blockStream, &stateMatchLength, matchLength); /* 17 */  /* 17 */
+            if (MEM_32bits()) BIT_flushBits(&blockStream);                 /*  7 */
+            BIT_addBits(&blockStream, offset, nbBits);                      /* 32 */  /* 42 */
+            if (MEM_32bits()) BIT_flushBits(&blockStream);                 /*  7 */
+            FSE_encodeSymbol(&blockStream, &stateOffsetBits, offCode);      /* 16 */  /* 51 */
+            FSE_encodeSymbol(&blockStream, &stateLitLength, litLength);     /* 26 */  /* 61 */
+            BIT_flushBits(&blockStream);                                    /*  7 */  /*  7 */
+        }
+
+        FSE_flushCState(&blockStream, &stateMatchLength);
+        FSE_flushCState(&blockStream, &stateOffsetBits);
+        FSE_flushCState(&blockStream, &stateLitLength);
+
+        streamSize = BIT_closeCStream(&blockStream);
+        if (streamSize==0) return ERROR(dstSize_tooSmall);   /* not enough space */
+        op += streamSize;
+    }
+
+    /* check compressibility */
+    if ((size_t)(op-dst) >= maxCSize) return 0;
+
+    return op - dst;
+}
+
+
+//static const U32 hashMask = (1<<HASH_LOG)-1;
+
+//static U32   ZSTD_hashPtr(const void* p) { return (U32) _bextr_u64(*(U64*)p * prime7bytes, (56-HASH_LOG), HASH_LOG); }
+//static U32   ZSTD_hashPtr(const void* p) { return ( (*(U64*)p * prime7bytes) << 8 >> (64-HASH_LOG)); }
+//static U32   ZSTD_hashPtr(const void* p) { return ( (*(U64*)p * prime7bytes) >> (56-HASH_LOG)) & ((1<<HASH_LOG)-1); }
+//static U32   ZSTD_hashPtr(const void* p) { return ( ((*(U64*)p & 0xFFFFFFFFFFFFFF) * prime7bytes) >> (64-HASH_LOG)); }
+
+//static const U64 prime8bytes = 14923729446516375013ULL;
+//static U32   ZSTD_hashPtr(const void* p) { return ( (*(U64*)p * prime8bytes) >> (64-HASH_LOG)); }
+
+static const U64 prime7bytes =    58295818150454627ULL;
+static U32   ZSTD_hashPtr(const void* p) { return ( (MEM_read64(p) * prime7bytes) >> (56-HASH_LOG)) & HASH_MASK; }
+
+//static const U64 prime6bytes =      227718039650203ULL;
+//static U32   ZSTD_hashPtr(const void* p) { return ( (MEM_read64(p) * prime6bytes) >> (48-HASH_LOG)) & HASH_MASK; }
+
+//static const U64 prime5bytes =         889523592379ULL;
+//static U32   ZSTD_hashPtr(const void* p) { return ( (*(U64*)p * prime5bytes) >> (40-HASH_LOG)) & HASH_MASK; }
+
+//static U32   ZSTD_hashPtr(const void* p) { return ( (*(U32*)p * KNUTH) >> (32-HASH_LOG)); }
+
+static const BYTE* ZSTD_updateMatch(U32* table, const BYTE* p, const BYTE* start)
+{
+    U32 h = ZSTD_hashPtr(p);
+    const BYTE* r;
+    r = table[h] + start;
+    table[h] = (U32)(p-start);
+    return r;
+}
+
+static int ZSTD_checkMatch(const BYTE* match, const BYTE* ip)
+{
+    return MEM_read32(match) == MEM_read32(ip);
+}
+
+static void  ZSTD_addPtr(U32* table, const BYTE* p, const BYTE* start) { table[ZSTD_hashPtr(p)] = (U32)(p-start); }
+
+
+static size_t ZSTD_compressBlock(ZSTD_CCtx* ctx, void* dst, size_t maxDstSize, const void* src, size_t srcSize)
+{
+    U32*  HashTable = (U32*)(ctx->hashTable);
+    seqStore_t* seqStorePtr = &(ctx->seqStore);
+    const BYTE* const base = ctx->base;
+
+    const BYTE* const istart = (const BYTE*)src;
+    const BYTE* ip = istart + 1;
+    const BYTE* anchor = istart;
+    const BYTE* const iend = istart + srcSize;
+    const BYTE* const ilimit = iend - 8;
+
+    size_t offset_2=4, offset_1=4;
+
+
+    /* init */
+    if (ip-base < 4)
+    {
+        ZSTD_addPtr(HashTable, ip+0, base);
+        ZSTD_addPtr(HashTable, ip+1, base);
+        ZSTD_addPtr(HashTable, ip+2, base);
+        ZSTD_addPtr(HashTable, ip+3, base);
+        ip += 4;
+    }
+    ZSTD_resetSeqStore(seqStorePtr);
+
+    /* Main Search Loop */
+    while (ip < ilimit)  /* < instead of <=, because unconditionnal ZSTD_addPtr(ip+1) */
+    {
+        const BYTE* match = ZSTD_updateMatch(HashTable, ip, base);
+
+        if (ZSTD_checkMatch(ip-offset_2,ip)) match = ip-offset_2;
+        if (!ZSTD_checkMatch(match,ip)) { ip += ((ip-anchor) >> g_searchStrength) + 1; offset_2 = offset_1; continue; }
+        while ((ip>anchor) && (match>base) && (ip[-1] == match[-1])) { ip--; match--; }  /* catch up */
+
+        {
+            size_t litLength = ip-anchor;
+            size_t matchLength = ZSTD_count(ip+MINMATCH, match+MINMATCH, iend);
+            size_t offsetCode = ip-match;
+            if (offsetCode == offset_2) offsetCode = 0;
+            offset_2 = offset_1;
+            offset_1 = ip-match;
+            ZSTD_storeSeq(seqStorePtr, litLength, anchor, offsetCode, matchLength);
+
+            /* Fill Table */
+            ZSTD_addPtr(HashTable, ip+1, base);
+            ip += matchLength + MINMATCH;
+            anchor = ip;
+            if (ip < ilimit) /* same test as loop, for speed */
+                ZSTD_addPtr(HashTable, ip-2, base);
+        }
+    }
+
+    /* Last Literals */
+    {
+        size_t lastLLSize = iend - anchor;
+        memcpy(seqStorePtr->lit, anchor, lastLLSize);
+        seqStorePtr->lit += lastLLSize;
+    }
+
+    /* Finale compression stage */
+    return ZSTD_compressSequences((BYTE*)dst, maxDstSize,
+                                  seqStorePtr, srcSize);
+}
+
+
+size_t ZSTD_compressBegin(ZSTD_CCtx* ctx, void* dst, size_t maxDstSize)
+{
+    /* Sanity check */
+    if (maxDstSize < ZSTD_frameHeaderSize) return ERROR(dstSize_tooSmall);
+
+    /* Init */
+    ZSTD_resetCCtx(ctx);
+
+    /* Write Header */
+    MEM_writeLE32(dst, ZSTD_magicNumber);
+
+    return ZSTD_frameHeaderSize;
+}
+
+
+static void ZSTD_scaleDownCtx(ZSTD_CCtx* ctx, const U32 limit)
+{
+    int i;
+
+#if defined(__AVX2__)
+    /* AVX2 version */
+    __m256i* h = ctx->hashTable;
+    const __m256i limit8 = _mm256_set1_epi32(limit);
+    for (i=0; i<(HASH_TABLESIZE>>3); i++)
+    {
+        __m256i src =_mm256_loadu_si256((const __m256i*)(h+i));
+  const __m256i dec = _mm256_min_epu32(src, limit8);
+                src = _mm256_sub_epi32(src, dec);
+        _mm256_storeu_si256((__m256i*)(h+i), src);
+    }
+#else
+    /* this should be auto-vectorized by compiler */
+    U32* h = ctx->hashTable;
+    for (i=0; i<HASH_TABLESIZE; ++i)
+    {
+        U32 dec;
+        if (h[i] > limit) dec = limit; else dec = h[i];
+        h[i] -= dec;
+    }
+#endif
+}
+
+
+static void ZSTD_limitCtx(ZSTD_CCtx* ctx, const U32 limit)
+{
+    int i;
+
+    if (limit > g_maxLimit)
+    {
+        ZSTD_scaleDownCtx(ctx, limit);
+        ctx->base += limit;
+        ctx->current -= limit;
+        ctx->nextUpdate -= limit;
+        return;
+    }
+
+#if defined(__AVX2__)
+    /* AVX2 version */
+    {
+        __m256i* h = ctx->hashTable;
+        const __m256i limit8 = _mm256_set1_epi32(limit);
+        //printf("Address h : %0X\n", (U32)h);    // address test
+        for (i=0; i<(HASH_TABLESIZE>>3); i++)
+        {
+            __m256i src =_mm256_loadu_si256((const __m256i*)(h+i));   // Unfortunately, clang doesn't guarantee 32-bytes alignment
+                    src = _mm256_max_epu32(src, limit8);
+            _mm256_storeu_si256((__m256i*)(h+i), src);
+        }
+    }
+#else
+    /* this should be auto-vectorized by compiler */
+    {
+        U32* h = (U32*)(ctx->hashTable);
+        for (i=0; i<HASH_TABLESIZE; ++i)
+        {
+            if (h[i] < limit) h[i] = limit;
+        }
+    }
+#endif
+}
+
+
+size_t ZSTD_compressContinue(ZSTD_CCtx* ctx, void* dst, size_t maxDstSize, const void* src, size_t srcSize)
+{
+    const BYTE* const istart = (const BYTE* const)src;
+    const BYTE* ip = istart;
+    BYTE* const ostart = (BYTE* const)dst;
+    BYTE* op = ostart;
+    const U32 updateRate = 2 * BLOCKSIZE;
+
+    /*  Init */
+    if (ctx->base==NULL)
+        ctx->base = (const BYTE*)src, ctx->current=0, ctx->nextUpdate = g_maxDistance;
+    if (src != ctx->base + ctx->current)   /* not contiguous */
+    {
+        ZSTD_resetCCtx(ctx);
+        ctx->base = (const BYTE*)src;
+        ctx->current = 0;
+    }
+    ctx->current += (U32)srcSize;
+
+    while (srcSize)
+    {
+        size_t cSize;
+        size_t blockSize = BLOCKSIZE;
+        if (blockSize > srcSize) blockSize = srcSize;
+
+        if (maxDstSize < 2*ZSTD_blockHeaderSize+1)  /* one RLE block + endMark */
+            return ERROR(dstSize_tooSmall);
+
+        /* update hash table */
+        if (g_maxDistance <= BLOCKSIZE)   /* static test ; yes == blocks are independent */
+        {
+            ZSTD_resetCCtx(ctx);
+            ctx->base = ip;
+            ctx->current=0;
+        }
+        else if (ip >= ctx->base + ctx->nextUpdate)
+        {
+            ctx->nextUpdate += updateRate;
+            ZSTD_limitCtx(ctx, ctx->nextUpdate - g_maxDistance);
+        }
+
+        /* compress */
+        cSize = ZSTD_compressBlock(ctx, op+ZSTD_blockHeaderSize, maxDstSize-ZSTD_blockHeaderSize, ip, blockSize);
+        if (cSize == 0)
+        {
+            cSize = ZSTD_noCompressBlock(op, maxDstSize, ip, blockSize);   /* block is not compressible */
+            if (ZSTD_isError(cSize)) return cSize;
+        }
+        else
+        {
+            if (ZSTD_isError(cSize)) return cSize;
+            op[0] = (BYTE)(cSize>>16);
+            op[1] = (BYTE)(cSize>>8);
+            op[2] = (BYTE)cSize;
+            op[0] += (BYTE)(bt_compressed << 6); /* is a compressed block */
+            cSize += 3;
+        }
+        op += cSize;
+        maxDstSize -= cSize;
+        ip += blockSize;
+        srcSize -= blockSize;
+    }
+
+    return op-ostart;
+}
+
+
+size_t ZSTD_compressEnd(ZSTD_CCtx*  ctx, void* dst, size_t maxDstSize)
+{
+    BYTE* op = (BYTE*)dst;
+
+    /* Sanity check */
+    (void)ctx;
+    if (maxDstSize < ZSTD_blockHeaderSize) return ERROR(dstSize_tooSmall);
+
+    /* End of frame */
+    op[0] = (BYTE)(bt_end << 6);
+    op[1] = 0;
+    op[2] = 0;
+
+    return 3;
+}
+
+
+size_t ZSTD_compressCCtx(ZSTD_CCtx* ctx, void* dst, size_t maxDstSize, const void* src, size_t srcSize)
+{
+    BYTE* const ostart = (BYTE* const)dst;
+    BYTE* op = ostart;
+    size_t oSize;
+
+    /* Header */
+    oSize = ZSTD_compressBegin(ctx, dst, maxDstSize);
+    if(ZSTD_isError(oSize)) return oSize;
+    op += oSize;
+    maxDstSize -= oSize;
+
+    /* Compression */
+    oSize = ZSTD_compressContinue(ctx, op, maxDstSize, src, srcSize);
+    if (ZSTD_isError(oSize)) return oSize;
+    op += oSize;
+    maxDstSize -= oSize;
+
+    /* Close frame */
+    oSize = ZSTD_compressEnd(ctx, op, maxDstSize);
+    if(ZSTD_isError(oSize)) return oSize;
+    op += oSize;
+
+    return (op - ostart);
+}
+
+
+size_t ZSTD_compress(void* dst, size_t maxDstSize, const void* src, size_t srcSize)
+{
+    size_t r;
+#if defined(ZSTD_HEAPMODE) && (ZSTD_HEAPMODE==1)
+    ZSTD_CCtx* ctx;
+    ctx = ZSTD_createCCtx();
+    if (ctx==NULL) return ERROR(GENERIC);
+# else
+    ZSTD_CCtx ctxBody;
+    ZSTD_CCtx* const ctx = &ctxBody;
+# endif
+
+    r = ZSTD_compressCCtx(ctx, dst, maxDstSize, src, srcSize);
+
+#if defined(ZSTD_HEAPMODE) && (ZSTD_HEAPMODE==1)
+    ZSTD_freeCCtx(ctx);
+#endif
+
+    return r;
+}
+
+
+/* *************************************************************
+*   Decompression section
+***************************************************************/
+struct ZSTD_DCtx_s
+{
+    U32 LLTable[FSE_DTABLE_SIZE_U32(LLFSELog)];
+    U32 OffTable[FSE_DTABLE_SIZE_U32(OffFSELog)];
+    U32 MLTable[FSE_DTABLE_SIZE_U32(MLFSELog)];
+    void* previousDstEnd;
+    void* base;
+    size_t expected;
+    blockType_t bType;
+    U32 phase;
+    const BYTE* litPtr;
+    size_t litBufSize;
+    size_t litSize;
+    BYTE litBuffer[BLOCKSIZE + 8 /* margin for wildcopy */];
+};   /* typedef'd to ZSTD_Dctx within "zstd_static.h" */
+
+
+size_t ZSTD_getcBlockSize(const void* src, size_t srcSize, blockProperties_t* bpPtr)
+{
+    const BYTE* const in = (const BYTE* const)src;
+    BYTE headerFlags;
+    U32 cSize;
+
+    if (srcSize < 3) return ERROR(srcSize_wrong);
+
+    headerFlags = *in;
+    cSize = in[2] + (in[1]<<8) + ((in[0] & 7)<<16);
+
+    bpPtr->blockType = (blockType_t)(headerFlags >> 6);
+    bpPtr->origSize = (bpPtr->blockType == bt_rle) ? cSize : 0;
+
+    if (bpPtr->blockType == bt_end) return 0;
+    if (bpPtr->blockType == bt_rle) return 1;
+    return cSize;
+}
+
+static size_t ZSTD_copyUncompressedBlock(void* dst, size_t maxDstSize, const void* src, size_t srcSize)
+{
+    if (srcSize > maxDstSize) return ERROR(dstSize_tooSmall);
+    memcpy(dst, src, srcSize);
+    return srcSize;
+}
+
+
+/** ZSTD_decompressLiterals
+    @return : nb of bytes read from src, or an error code*/
+static size_t ZSTD_decompressLiterals(void* dst, size_t* maxDstSizePtr,
+                                const void* src, size_t srcSize)
+{
+    const BYTE* ip = (const BYTE*)src;
+
+    const size_t litSize = (MEM_readLE32(src) & 0x1FFFFF) >> 2;   /* no buffer issue : srcSize >= MIN_CBLOCK_SIZE */
+    const size_t litCSize = (MEM_readLE32(ip+2) & 0xFFFFFF) >> 5;   /* no buffer issue : srcSize >= MIN_CBLOCK_SIZE */
+
+    if (litSize > *maxDstSizePtr) return ERROR(corruption_detected);
+    if (litCSize + 5 > srcSize) return ERROR(corruption_detected);
+
+    if (HUF_isError(HUF_decompress(dst, litSize, ip+5, litCSize))) return ERROR(corruption_detected);
+
+    *maxDstSizePtr = litSize;
+    return litCSize + 5;
+}
+
+
+/** ZSTD_decodeLiteralsBlock
+    @return : nb of bytes read from src (< srcSize )*/
+size_t ZSTD_decodeLiteralsBlock(void* ctx,
+                          const void* src, size_t srcSize)   /* note : srcSize < BLOCKSIZE */
+{
+    ZSTD_DCtx* dctx = (ZSTD_DCtx*)ctx;
+    const BYTE* const istart = (const BYTE*) src;
+
+    /* any compressed block with literals segment must be at least this size */
+    if (srcSize < MIN_CBLOCK_SIZE) return ERROR(corruption_detected);
+
+    switch(*istart & 3)
+    {
+    /* compressed */
+    case 0:
+        {
+            size_t litSize = BLOCKSIZE;
+            const size_t readSize = ZSTD_decompressLiterals(dctx->litBuffer, &litSize, src, srcSize);
+            dctx->litPtr = dctx->litBuffer;
+            dctx->litBufSize = BLOCKSIZE+8;
+            dctx->litSize = litSize;
+            return readSize;   /* works if it's an error too */
+        }
+    case IS_RAW:
+        {
+            const size_t litSize = (MEM_readLE32(istart) & 0xFFFFFF) >> 2;   /* no buffer issue : srcSize >= MIN_CBLOCK_SIZE */
+            if (litSize > srcSize-11)   /* risk of reading too far with wildcopy */
+            {
+                if (litSize > srcSize-3) return ERROR(corruption_detected);
+                memcpy(dctx->litBuffer, istart, litSize);
+                dctx->litPtr = dctx->litBuffer;
+                dctx->litBufSize = BLOCKSIZE+8;
+                dctx->litSize = litSize;
+                return litSize+3;
+            }
+            /* direct reference into compressed stream */
+            dctx->litPtr = istart+3;
+            dctx->litBufSize = srcSize-3;
+            dctx->litSize = litSize;
+            return litSize+3;        }
+    case IS_RLE:
+        {
+            const size_t litSize = (MEM_readLE32(istart) & 0xFFFFFF) >> 2;   /* no buffer issue : srcSize >= MIN_CBLOCK_SIZE */
+            if (litSize > BLOCKSIZE) return ERROR(corruption_detected);
+            memset(dctx->litBuffer, istart[3], litSize);
+            dctx->litPtr = dctx->litBuffer;
+            dctx->litBufSize = BLOCKSIZE+8;
+            dctx->litSize = litSize;
+            return 4;
+        }
+    default:
+        return ERROR(corruption_detected);   /* forbidden nominal case */
+    }
+}
+
+
+size_t ZSTD_decodeSeqHeaders(int* nbSeq, const BYTE** dumpsPtr, size_t* dumpsLengthPtr,
+                         FSE_DTable* DTableLL, FSE_DTable* DTableML, FSE_DTable* DTableOffb,
+                         const void* src, size_t srcSize)
+{
+    const BYTE* const istart = (const BYTE* const)src;
+    const BYTE* ip = istart;
+    const BYTE* const iend = istart + srcSize;
+    U32 LLtype, Offtype, MLtype;
+    U32 LLlog, Offlog, MLlog;
+    size_t dumpsLength;
+
+    /* check */
+    if (srcSize < 5) return ERROR(srcSize_wrong);
+
+    /* SeqHead */
+    *nbSeq = MEM_readLE16(ip); ip+=2;
+    LLtype  = *ip >> 6;
+    Offtype = (*ip >> 4) & 3;
+    MLtype  = (*ip >> 2) & 3;
+    if (*ip & 2)
+    {
+        dumpsLength  = ip[2];
+        dumpsLength += ip[1] << 8;
+        ip += 3;
+    }
+    else
+    {
+        dumpsLength  = ip[1];
+        dumpsLength += (ip[0] & 1) << 8;
+        ip += 2;
+    }
+    *dumpsPtr = ip;
+    ip += dumpsLength;
+    *dumpsLengthPtr = dumpsLength;
+
+    /* check */
+    if (ip > iend-3) return ERROR(srcSize_wrong); /* min : all 3 are "raw", hence no header, but at least xxLog bits per type */
+
+    /* sequences */
+    {
+        S16 norm[MaxML+1];    /* assumption : MaxML >= MaxLL and MaxOff */
+        size_t headerSize;
+
+        /* Build DTables */
+        switch(LLtype)
+        {
+        U32 max;
+        case bt_rle :
+            LLlog = 0;
+            FSE_buildDTable_rle(DTableLL, *ip++); break;
+        case bt_raw :
+            LLlog = LLbits;
+            FSE_buildDTable_raw(DTableLL, LLbits); break;
+        default :
+            max = MaxLL;
+            headerSize = FSE_readNCount(norm, &max, &LLlog, ip, iend-ip);
+            if (FSE_isError(headerSize)) return ERROR(GENERIC);
+            if (LLlog > LLFSELog) return ERROR(corruption_detected);
+            ip += headerSize;
+            FSE_buildDTable(DTableLL, norm, max, LLlog);
+        }
+
+        switch(Offtype)
+        {
+        U32 max;
+        case bt_rle :
+            Offlog = 0;
+            if (ip > iend-2) return ERROR(srcSize_wrong);   /* min : "raw", hence no header, but at least xxLog bits */
+            FSE_buildDTable_rle(DTableOffb, *ip++ & MaxOff); /* if *ip > MaxOff, data is corrupted */
+            break;
+        case bt_raw :
+            Offlog = Offbits;
+            FSE_buildDTable_raw(DTableOffb, Offbits); break;
+        default :
+            max = MaxOff;
+            headerSize = FSE_readNCount(norm, &max, &Offlog, ip, iend-ip);
+            if (FSE_isError(headerSize)) return ERROR(GENERIC);
+            if (Offlog > OffFSELog) return ERROR(corruption_detected);
+            ip += headerSize;
+            FSE_buildDTable(DTableOffb, norm, max, Offlog);
+        }
+
+        switch(MLtype)
+        {
+        U32 max;
+        case bt_rle :
+            MLlog = 0;
+            if (ip > iend-2) return ERROR(srcSize_wrong); /* min : "raw", hence no header, but at least xxLog bits */
+            FSE_buildDTable_rle(DTableML, *ip++); break;
+        case bt_raw :
+            MLlog = MLbits;
+            FSE_buildDTable_raw(DTableML, MLbits); break;
+        default :
+            max = MaxML;
+            headerSize = FSE_readNCount(norm, &max, &MLlog, ip, iend-ip);
+            if (FSE_isError(headerSize)) return ERROR(GENERIC);
+            if (MLlog > MLFSELog) return ERROR(corruption_detected);
+            ip += headerSize;
+            FSE_buildDTable(DTableML, norm, max, MLlog);
+        }
+    }
+
+    return ip-istart;
+}
+
+
+typedef struct {
+    size_t litLength;
+    size_t offset;
+    size_t matchLength;
+} seq_t;
+
+typedef struct {
+    BIT_DStream_t DStream;
+    FSE_DState_t stateLL;
+    FSE_DState_t stateOffb;
+    FSE_DState_t stateML;
+    size_t prevOffset;
+    const BYTE* dumps;
+    const BYTE* dumpsEnd;
+} seqState_t;
+
+
+static void ZSTD_decodeSequence(seq_t* seq, seqState_t* seqState)
+{
+    size_t litLength;
+    size_t prevOffset;
+    size_t offset;
+    size_t matchLength;
+    const BYTE* dumps = seqState->dumps;
+    const BYTE* const de = seqState->dumpsEnd;
+
+    /* Literal length */
+    litLength = FSE_decodeSymbol(&(seqState->stateLL), &(seqState->DStream));
+    prevOffset = litLength ? seq->offset : seqState->prevOffset;
+    seqState->prevOffset = seq->offset;
+    if (litLength == MaxLL)
+    {
+        U32 add = *dumps++;
+        if (add < 255) litLength += add;
+        else
+        {
+            litLength = MEM_readLE32(dumps) & 0xFFFFFF;  /* no pb : dumps is always followed by seq tables > 1 byte */
+            dumps += 3;
+        }
+        if (dumps >= de) dumps = de-1;   /* late correction, to avoid read overflow (data is now corrupted anyway) */
+    }
+
+    /* Offset */
+    {
+        static const U32 offsetPrefix[MaxOff+1] = {
+                1 /*fake*/, 1, 2, 4, 8, 16, 32, 64, 128, 256,
+                512, 1024, 2048, 4096, 8192, 16384, 32768, 65536, 131072, 262144,
+                524288, 1048576, 2097152, 4194304, 8388608, 16777216, 33554432, /*fake*/ 1, 1, 1, 1, 1 };
+        U32 offsetCode, nbBits;
+        offsetCode = FSE_decodeSymbol(&(seqState->stateOffb), &(seqState->DStream));   /* <= maxOff, by table construction */
+        if (MEM_32bits()) BIT_reloadDStream(&(seqState->DStream));
+        nbBits = offsetCode - 1;
+        if (offsetCode==0) nbBits = 0;   /* cmove */
+        offset = offsetPrefix[offsetCode] + BIT_readBits(&(seqState->DStream), nbBits);
+        if (MEM_32bits()) BIT_reloadDStream(&(seqState->DStream));
+        if (offsetCode==0) offset = prevOffset;   /* cmove */
+    }
+
+    /* MatchLength */
+    matchLength = FSE_decodeSymbol(&(seqState->stateML), &(seqState->DStream));
+    if (matchLength == MaxML)
+    {
+        U32 add = *dumps++;
+        if (add < 255) matchLength += add;
+        else
+        {
+            matchLength = MEM_readLE32(dumps) & 0xFFFFFF;  /* no pb : dumps is always followed by seq tables > 1 byte */
+            dumps += 3;
+        }
+        if (dumps >= de) dumps = de-1;   /* late correction, to avoid read overflow (data is now corrupted anyway) */
+    }
+    matchLength += MINMATCH;
+
+    /* save result */
+    seq->litLength = litLength;
+    seq->offset = offset;
+    seq->matchLength = matchLength;
+    seqState->dumps = dumps;
+}
+
+
+static size_t ZSTD_execSequence(BYTE* op,
+                                seq_t sequence,
+                                const BYTE** litPtr, const BYTE* const litLimit_8,
+                                BYTE* const base, BYTE* const oend)
+{
+    static const int dec32table[] = {0, 1, 2, 1, 4, 4, 4, 4};   /* added */
+    static const int dec64table[] = {8, 8, 8, 7, 8, 9,10,11};   /* substracted */
+    const BYTE* const ostart = op;
+    BYTE* const oLitEnd = op + sequence.litLength;
+    BYTE* const oMatchEnd = op + sequence.litLength + sequence.matchLength;   /* risk : address space overflow (32-bits) */
+    BYTE* const oend_8 = oend-8;
+    const BYTE* const litEnd = *litPtr + sequence.litLength;
+
+    /* check */
+    if (oLitEnd > oend_8) return ERROR(dstSize_tooSmall);   /* last match must start at a minimum distance of 8 from oend */
+    if (oMatchEnd > oend) return ERROR(dstSize_tooSmall);   /* overwrite beyond dst buffer */
+    if (litEnd > litLimit_8) return ERROR(corruption_detected);   /* risk read beyond lit buffer */
+
+    /* copy Literals */
+    ZSTD_wildcopy(op, *litPtr, sequence.litLength);   /* note : oLitEnd <= oend-8 : no risk of overwrite beyond oend */
+    op = oLitEnd;
+    *litPtr = litEnd;   /* update for next sequence */
+
+    /* copy Match */
+    {
+        const BYTE* match = op - sequence.offset;
+
+        /* check */
+        //if (match > op) return ERROR(corruption_detected);   /* address space overflow test (is clang optimizer removing this test ?) */
+        if (sequence.offset > (size_t)op) return ERROR(corruption_detected);   /* address space overflow test (this test seems kept by clang optimizer) */
+        if (match < base) return ERROR(corruption_detected);
+
+        /* close range match, overlap */
+        if (sequence.offset < 8)
+        {
+            const int dec64 = dec64table[sequence.offset];
+            op[0] = match[0];
+            op[1] = match[1];
+            op[2] = match[2];
+            op[3] = match[3];
+            match += dec32table[sequence.offset];
+            ZSTD_copy4(op+4, match);
+            match -= dec64;
+        }
+        else
+        {
+            ZSTD_copy8(op, match);
+        }
+        op += 8; match += 8;
+
+        if (oMatchEnd > oend-12)
+        {
+            if (op < oend_8)
+            {
+                ZSTD_wildcopy(op, match, oend_8 - op);
+                match += oend_8 - op;
+                op = oend_8;
+            }
+            while (op < oMatchEnd) *op++ = *match++;
+        }
+        else
+        {
+            ZSTD_wildcopy(op, match, sequence.matchLength-8);   /* works even if matchLength < 8 */
+        }
+    }
+
+    return oMatchEnd - ostart;
+}
+
+static size_t ZSTD_decompressSequences(
+                               void* ctx,
+                               void* dst, size_t maxDstSize,
+                         const void* seqStart, size_t seqSize)
+{
+    ZSTD_DCtx* dctx = (ZSTD_DCtx*)ctx;
+    const BYTE* ip = (const BYTE*)seqStart;
+    const BYTE* const iend = ip + seqSize;
+    BYTE* const ostart = (BYTE* const)dst;
+    BYTE* op = ostart;
+    BYTE* const oend = ostart + maxDstSize;
+    size_t errorCode, dumpsLength;
+    const BYTE* litPtr = dctx->litPtr;
+    const BYTE* const litLimit_8 = litPtr + dctx->litBufSize - 8;
+    const BYTE* const litEnd = litPtr + dctx->litSize;
+    int nbSeq;
+    const BYTE* dumps;
+    U32* DTableLL = dctx->LLTable;
+    U32* DTableML = dctx->MLTable;
+    U32* DTableOffb = dctx->OffTable;
+    BYTE* const base = (BYTE*) (dctx->base);
+
+    /* Build Decoding Tables */
+    errorCode = ZSTD_decodeSeqHeaders(&nbSeq, &dumps, &dumpsLength,
+                                      DTableLL, DTableML, DTableOffb,
+                                      ip, iend-ip);
+    if (ZSTD_isError(errorCode)) return errorCode;
+    ip += errorCode;
+
+    /* Regen sequences */
+    {
+        seq_t sequence;
+        seqState_t seqState;
+
+        memset(&sequence, 0, sizeof(sequence));
+        sequence.offset = 4;
+        seqState.dumps = dumps;
+        seqState.dumpsEnd = dumps + dumpsLength;
+        seqState.prevOffset = 4;
+        errorCode = BIT_initDStream(&(seqState.DStream), ip, iend-ip);
+        if (ERR_isError(errorCode)) return ERROR(corruption_detected);
+        FSE_initDState(&(seqState.stateLL), &(seqState.DStream), DTableLL);
+        FSE_initDState(&(seqState.stateOffb), &(seqState.DStream), DTableOffb);
+        FSE_initDState(&(seqState.stateML), &(seqState.DStream), DTableML);
+
+        for ( ; (BIT_reloadDStream(&(seqState.DStream)) <= BIT_DStream_completed) && (nbSeq>0) ; )
+        {
+            size_t oneSeqSize;
+            nbSeq--;
+            ZSTD_decodeSequence(&sequence, &seqState);
+            oneSeqSize = ZSTD_execSequence(op, sequence, &litPtr, litLimit_8, base, oend);
+            if (ZSTD_isError(oneSeqSize)) return oneSeqSize;
+            op += oneSeqSize;
+        }
+
+        /* check if reached exact end */
+        if ( !BIT_endOfDStream(&(seqState.DStream)) ) return ERROR(corruption_detected);   /* requested too much : data is corrupted */
+        if (nbSeq<0) return ERROR(corruption_detected);   /* requested too many sequences : data is corrupted */
+
+        /* last literal segment */
+        {
+            size_t lastLLSize = litEnd - litPtr;
+            if (litPtr > litEnd) return ERROR(corruption_detected);
+            if (op+lastLLSize > oend) return ERROR(dstSize_tooSmall);
+            if (op != litPtr) memcpy(op, litPtr, lastLLSize);
+            op += lastLLSize;
+        }
+    }
+
+    return op-ostart;
+}
+
+
+static size_t ZSTD_decompressBlock(
+                            void* ctx,
+                            void* dst, size_t maxDstSize,
+                      const void* src, size_t srcSize)
+{
+    /* blockType == blockCompressed */
+    const BYTE* ip = (const BYTE*)src;
+
+    /* Decode literals sub-block */
+    size_t litCSize = ZSTD_decodeLiteralsBlock(ctx, src, srcSize);
+    if (ZSTD_isError(litCSize)) return litCSize;
+    ip += litCSize;
+    srcSize -= litCSize;
+
+    return ZSTD_decompressSequences(ctx, dst, maxDstSize, ip, srcSize);
+}
+
+
+static size_t ZSTD_decompressDCtx(void* ctx, void* dst, size_t maxDstSize, const void* src, size_t srcSize)
+{
+    const BYTE* ip = (const BYTE*)src;
+    const BYTE* iend = ip + srcSize;
+    BYTE* const ostart = (BYTE* const)dst;
+    BYTE* op = ostart;
+    BYTE* const oend = ostart + maxDstSize;
+    size_t remainingSize = srcSize;
+    U32 magicNumber;
+    blockProperties_t blockProperties;
+
+    /* Frame Header */
+    if (srcSize < ZSTD_frameHeaderSize+ZSTD_blockHeaderSize) return ERROR(srcSize_wrong);
+    magicNumber = MEM_readLE32(src);
+#if defined(ZSTD_LEGACY_SUPPORT) && (ZSTD_LEGACY_SUPPORT==1)
+    if (ZSTD_isLegacy(magicNumber))
+        return ZSTD_decompressLegacy(dst, maxDstSize, src, srcSize, magicNumber);
+#endif
+    if (magicNumber != ZSTD_magicNumber) return ERROR(prefix_unknown);
+    ip += ZSTD_frameHeaderSize; remainingSize -= ZSTD_frameHeaderSize;
+
+    /* Loop on each block */
+    while (1)
+    {
+        size_t decodedSize=0;
+        size_t cBlockSize = ZSTD_getcBlockSize(ip, iend-ip, &blockProperties);
+        if (ZSTD_isError(cBlockSize)) return cBlockSize;
+
+        ip += ZSTD_blockHeaderSize;
+        remainingSize -= ZSTD_blockHeaderSize;
+        if (cBlockSize > remainingSize) return ERROR(srcSize_wrong);
+
+        switch(blockProperties.blockType)
+        {
+        case bt_compressed:
+            decodedSize = ZSTD_decompressBlock(ctx, op, oend-op, ip, cBlockSize);
+            break;
+        case bt_raw :
+            decodedSize = ZSTD_copyUncompressedBlock(op, oend-op, ip, cBlockSize);
+            break;
+        case bt_rle :
+            return ERROR(GENERIC);   /* not yet supported */
+            break;
+        case bt_end :
+            /* end of frame */
+            if (remainingSize) return ERROR(srcSize_wrong);
+            break;
+        default:
+            return ERROR(GENERIC);   /* impossible */
+        }
+        if (cBlockSize == 0) break;   /* bt_end */
+
+        if (ZSTD_isError(decodedSize)) return decodedSize;
+        op += decodedSize;
+        ip += cBlockSize;
+        remainingSize -= cBlockSize;
+    }
+
+    return op-ostart;
+}
+
+size_t ZSTD_decompress(void* dst, size_t maxDstSize, const void* src, size_t srcSize)
+{
+    ZSTD_DCtx ctx;
+    ctx.base = dst;
+    return ZSTD_decompressDCtx(&ctx, dst, maxDstSize, src, srcSize);
+}
+
+
+/* ******************************
+*  Streaming Decompression API
+********************************/
+
+size_t ZSTD_resetDCtx(ZSTD_DCtx* dctx)
+{
+    dctx->expected = ZSTD_frameHeaderSize;
+    dctx->phase = 0;
+    dctx->previousDstEnd = NULL;
+    dctx->base = NULL;
+    return 0;
+}
+
+ZSTD_DCtx* ZSTD_createDCtx(void)
+{
+    ZSTD_DCtx* dctx = (ZSTD_DCtx*)malloc(sizeof(ZSTD_DCtx));
+    if (dctx==NULL) return NULL;
+    ZSTD_resetDCtx(dctx);
+    return dctx;
+}
+
+size_t ZSTD_freeDCtx(ZSTD_DCtx* dctx)
+{
+    free(dctx);
+    return 0;
+}
+
+size_t ZSTD_nextSrcSizeToDecompress(ZSTD_DCtx* dctx)
+{
+    return dctx->expected;
+}
+
+size_t ZSTD_decompressContinue(ZSTD_DCtx* ctx, void* dst, size_t maxDstSize, const void* src, size_t srcSize)
+{
+    /* Sanity check */
+    if (srcSize != ctx->expected) return ERROR(srcSize_wrong);
+    if (dst != ctx->previousDstEnd)  /* not contiguous */
+        ctx->base = dst;
+
+    /* Decompress : frame header */
+    if (ctx->phase == 0)
+    {
+        /* Check frame magic header */
+        U32 magicNumber = MEM_readLE32(src);
+        if (magicNumber != ZSTD_magicNumber) return ERROR(prefix_unknown);
+        ctx->phase = 1;
+        ctx->expected = ZSTD_blockHeaderSize;
+        return 0;
+    }
+
+    /* Decompress : block header */
+    if (ctx->phase == 1)
+    {
+        blockProperties_t bp;
+        size_t blockSize = ZSTD_getcBlockSize(src, ZSTD_blockHeaderSize, &bp);
+        if (ZSTD_isError(blockSize)) return blockSize;
+        if (bp.blockType == bt_end)
+        {
+            ctx->expected = 0;
+            ctx->phase = 0;
+        }
+        else
+        {
+            ctx->expected = blockSize;
+            ctx->bType = bp.blockType;
+            ctx->phase = 2;
+        }
+
+        return 0;
+    }
+
+    /* Decompress : block content */
+    {
+        size_t rSize;
+        switch(ctx->bType)
+        {
+        case bt_compressed:
+            rSize = ZSTD_decompressBlock(ctx, dst, maxDstSize, src, srcSize);
+            break;
+        case bt_raw :
+            rSize = ZSTD_copyUncompressedBlock(dst, maxDstSize, src, srcSize);
+            break;
+        case bt_rle :
+            return ERROR(GENERIC);   /* not yet handled */
+            break;
+        case bt_end :   /* should never happen (filtered at phase 1) */
+            rSize = 0;
+            break;
+        default:
+            return ERROR(GENERIC);
+        }
+        ctx->phase = 1;
+        ctx->expected = ZSTD_blockHeaderSize;
+        ctx->previousDstEnd = (void*)( ((char*)dst) + rSize);
+        return rSize;
+    }
+
+}
+
+
diff --git a/lib/zstd.h b/lib/zstd.h
new file mode 100644
index 0000000..96bb184
--- /dev/null
+++ b/lib/zstd.h
@@ -0,0 +1,111 @@
+/*
+    zstd - standard compression library
+    Header File
+    Copyright (C) 2014-2015, Yann Collet.
+
+    BSD 2-Clause License (http://www.opensource.org/licenses/bsd-license.php)
+
+    Redistribution and use in source and binary forms, with or without
+    modification, are permitted provided that the following conditions are
+    met:
+    * Redistributions of source code must retain the above copyright
+    notice, this list of conditions and the following disclaimer.
+    * Redistributions in binary form must reproduce the above
+    copyright notice, this list of conditions and the following disclaimer
+    in the documentation and/or other materials provided with the
+    distribution.
+    THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+    "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+    LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+    A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+    OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+    SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+    LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+    DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+    THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+    (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+    OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+    You can contact the author at :
+    - zstd source repository : https://github.com/Cyan4973/zstd
+    - ztsd public forum : https://groups.google.com/forum/#!forum/lz4c
+*/
+#ifndef ZSTD_H
+#define ZSTD_H
+
+#if defined (__cplusplus)
+extern "C" {
+#endif
+
+/* *************************************
+*  Includes
+***************************************/
+#include <stddef.h>   /* size_t */
+
+
+/* *************************************
+*  Version
+***************************************/
+#define ZSTD_VERSION_MAJOR    0    /* for breaking interface changes  */
+#define ZSTD_VERSION_MINOR    4    /* for new (non-breaking) interface capabilities */
+#define ZSTD_VERSION_RELEASE  0    /* for tweaks, bug-fixes, or development */
+#define ZSTD_VERSION_NUMBER  (ZSTD_VERSION_MAJOR *100*100 + ZSTD_VERSION_MINOR *100 + ZSTD_VERSION_RELEASE)
+unsigned ZSTD_versionNumber (void);
+
+
+/* *************************************
+*  Simple functions
+***************************************/
+size_t ZSTD_compress(   void* dst, size_t maxDstSize,
+                  const void* src, size_t srcSize,
+                         int  compressionLevel);
+
+size_t ZSTD_decompress( void* dst, size_t maxOriginalSize,
+                  const void* src, size_t compressedSize);
+
+/**
+ZSTD_compress() :
+    Compresses 'srcSize' bytes from buffer 'src' into buffer 'dst', of maximum size 'dstSize'.
+    Destination buffer must be already allocated.
+    Compression runs faster if maxDstSize >=  ZSTD_compressBound(srcSize).
+    return : the number of bytes written into buffer 'dst'
+             or an error code if it fails (which can be tested using ZSTD_isError())
+
+ZSTD_decompress() :
+    compressedSize : is the exact source size
+    maxOriginalSize : is the size of the 'dst' buffer, which must be already allocated.
+                      It must be equal or larger than originalSize, otherwise decompression will fail.
+    return : the number of bytes decompressed into destination buffer (<= maxOriginalSize)
+             or an errorCode if it fails (which can be tested using ZSTD_isError())
+*/
+
+
+/* *************************************
+*  Tool functions
+***************************************/
+size_t      ZSTD_compressBound(size_t srcSize);   /** maximum compressed size (worst case scenario) */
+
+/* Error Management */
+unsigned    ZSTD_isError(size_t code);         /** tells if a return value is an error code */
+const char* ZSTD_getErrorName(size_t code);    /** provides error code string */
+
+
+/* *************************************
+*  Advanced functions
+***************************************/
+typedef struct ZSTD_CCtx_s ZSTD_CCtx;   /* incomplete type */
+ZSTD_CCtx* ZSTD_createCCtx(void);
+size_t     ZSTD_freeCCtx(ZSTD_CCtx* cctx);
+
+/**
+ZSTD_compressCCtx() :
+    Same as ZSTD_compress(), but requires a ZSTD_CCtx working space already allocated
+*/
+size_t ZSTD_compressCCtx(ZSTD_CCtx* ctx, void* dst, size_t maxDstSize, const void* src, size_t srcSize, int compressionLevel);
+
+
+#if defined (__cplusplus)
+}
+#endif
+
+#endif  /* ZSTD_H */
diff --git a/lib/zstd_buffered.c b/lib/zstd_buffered.c
new file mode 100644
index 0000000..844c504
--- /dev/null
+++ b/lib/zstd_buffered.c
@@ -0,0 +1,551 @@
+/*
+    Buffered version of Zstd compression library
+    Copyright (C) 2015, Yann Collet.
+
+    BSD 2-Clause License (http://www.opensource.org/licenses/bsd-license.php)
+
+    Redistribution and use in source and binary forms, with or without
+    modification, are permitted provided that the following conditions are
+    met:
+    * Redistributions of source code must retain the above copyright
+    notice, this list of conditions and the following disclaimer.
+    * Redistributions in binary form must reproduce the above
+    copyright notice, this list of conditions and the following disclaimer
+    in the documentation and/or other materials provided with the
+    distribution.
+    THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+    "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+    LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+    A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+    OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+    SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+    LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+    DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+    THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+    (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+    OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+    You can contact the author at :
+    - zstd source repository : https://github.com/Cyan4973/zstd
+    - ztsd public forum : https://groups.google.com/forum/#!forum/lz4c
+*/
+
+/* The objects defined into this file should be considered experimental.
+ * They are not labelled stable, as their prototype may change in the future.
+ * You can use them for tests, provide feedback, or if you can endure risk of future changes.
+ */
+
+/* *************************************
+*  Includes
+***************************************/
+#include <stdlib.h>
+#include "error.h"
+#include "zstd_static.h"
+#include "zstd_buffered_static.h"
+
+
+/** ************************************************
+*  Streaming compression
+*
+*  A ZBUFF_CCtx object is required to track streaming operation.
+*  Use ZBUFF_createCCtx() and ZBUFF_freeCCtx() to create/release resources.
+*  Use ZBUFF_compressInit() to start a new compression operation.
+*  ZBUFF_CCtx objects can be reused multiple times.
+*
+*  Use ZBUFF_compressContinue() repetitively to consume your input.
+*  *srcSizePtr and *maxDstSizePtr can be any size.
+*  The function will report how many bytes were read or written by modifying *srcSizePtr and *maxDstSizePtr.
+*  Note that it may not consume the entire input, in which case it's up to the caller to call again the function with remaining input.
+*  The content of dst will be overwritten (up to *maxDstSizePtr) at each function call, so save its content if it matters or change dst .
+*  @return : a hint to preferred nb of bytes to use as input for next function call (it's only a hint, to improve latency)
+*            or an error code, which can be tested using ZBUFF_isError().
+*
+*  ZBUFF_compressFlush() can be used to instruct ZBUFF to compress and output whatever remains within its buffer.
+*  Note that it will not output more than *maxDstSizePtr.
+*  Therefore, some content might still be left into its internal buffer if dst buffer is too small.
+*  @return : nb of bytes still present into internal buffer (0 if it's empty)
+*            or an error code, which can be tested using ZBUFF_isError().
+*
+*  ZBUFF_compressEnd() instructs to finish a frame.
+*  It will perform a flush and write frame epilogue.
+*  Similar to ZBUFF_compressFlush(), it may not be able to output the entire internal buffer content if *maxDstSizePtr is too small.
+*  @return : nb of bytes still present into internal buffer (0 if it's empty)
+*            or an error code, which can be tested using ZBUFF_isError().
+*
+*  Hint : recommended buffer sizes (not compulsory)
+*  input : 128 KB block size is the internal unit, it improves latency to use this value.
+*  output : ZSTD_compressBound(128 KB) + 3 + 3 : ensures it's always possible to write/flush/end a full block at best speed.
+* **************************************************/
+
+typedef enum { ZBUFFcs_init, ZBUFFcs_load, ZBUFFcs_flush } ZBUFF_cStage;
+
+/* *** Ressources *** */
+struct ZBUFF_CCtx_s {
+    ZSTD_CCtx* zc;
+    char* inBuff;
+    size_t inBuffSize;
+    size_t inToCompress;
+    size_t inBuffPos;
+    size_t inBuffTarget;
+    size_t blockSize;
+    char* outBuff;
+    size_t outBuffSize;
+    size_t outBuffContentSize;
+    size_t outBuffFlushedSize;
+    ZBUFF_cStage stage;
+};   /* typedef'd tp ZBUFF_CCtx within "zstd_buffered.h" */
+
+ZBUFF_CCtx* ZBUFF_createCCtx(void)
+{
+    ZBUFF_CCtx* zbc = (ZBUFF_CCtx*)malloc(sizeof(ZBUFF_CCtx));
+    if (zbc==NULL) return NULL;
+    memset(zbc, 0, sizeof(*zbc));
+    zbc->zc = ZSTD_createCCtx();
+    return zbc;
+}
+
+size_t ZBUFF_freeCCtx(ZBUFF_CCtx* zbc)
+{
+    if (zbc==NULL) return 0;   /* support free on NULL */
+    ZSTD_freeCCtx(zbc->zc);
+    free(zbc->inBuff);
+    free(zbc->outBuff);
+    free(zbc);
+    return 0;
+}
+
+
+/* *** Initialization *** */
+
+#define MIN(a,b)    ( ((a)<(b)) ? (a) : (b) )
+#define BLOCKSIZE   (128 * 1024)   /* a bit too "magic", should come from reference */
+size_t ZBUFF_compressInit_advanced(ZBUFF_CCtx* zbc, ZSTD_parameters params)
+{
+    size_t neededInBuffSize;
+
+    ZSTD_validateParams(&params);
+    neededInBuffSize = (size_t)1 << params.windowLog;
+
+    /* allocate buffers */
+    if (zbc->inBuffSize < neededInBuffSize)
+    {
+        zbc->inBuffSize = neededInBuffSize;
+        free(zbc->inBuff);   /* should not be necessary */
+        zbc->inBuff = (char*)malloc(neededInBuffSize);
+        if (zbc->inBuff == NULL) return ERROR(memory_allocation);
+    }
+    zbc->blockSize = MIN(BLOCKSIZE, zbc->inBuffSize);
+    if (zbc->outBuffSize < ZSTD_compressBound(zbc->blockSize)+1)
+    {
+        zbc->outBuffSize = ZSTD_compressBound(zbc->blockSize)+1;
+        free(zbc->outBuff);   /* should not be necessary */
+        zbc->outBuff = (char*)malloc(zbc->outBuffSize);
+        if (zbc->outBuff == NULL) return ERROR(memory_allocation);
+    }
+
+    zbc->outBuffContentSize = ZSTD_compressBegin_advanced(zbc->zc, zbc->outBuff, zbc->outBuffSize, params);
+    if (ZSTD_isError(zbc->outBuffContentSize)) return zbc->outBuffContentSize;
+
+    zbc->inToCompress = 0;
+    zbc->inBuffPos = 0;
+    zbc->inBuffTarget = zbc->blockSize;
+    zbc->outBuffFlushedSize = 0;
+    zbc->stage = ZBUFFcs_flush;   /* starts by flushing the header */
+    return 0;   /* ready to go */
+}
+
+size_t ZBUFF_compressInit(ZBUFF_CCtx* zbc, int compressionLevel)
+{
+    return ZBUFF_compressInit_advanced(zbc, ZSTD_getParams(compressionLevel, 0));
+}
+
+
+
+/* *** Compression *** */
+
+static size_t ZBUFF_limitCopy(void* dst, size_t maxDstSize, const void* src, size_t srcSize)
+{
+    size_t length = MIN(maxDstSize, srcSize);
+    memcpy(dst, src, length);
+    return length;
+}
+
+static size_t ZBUFF_compressContinue_generic(ZBUFF_CCtx* zbc,
+                              void* dst, size_t* maxDstSizePtr,
+                        const void* src, size_t* srcSizePtr,
+                              int flush)   /* aggregate : wait for full block before compressing */
+{
+    U32 notDone = 1;
+    const char* const istart = (const char*)src;
+    const char* ip = istart;
+    const char* const iend = istart + *srcSizePtr;
+    char* const ostart = (char*)dst;
+    char* op = ostart;
+    char* const oend = ostart + *maxDstSizePtr;
+
+    while (notDone)
+    {
+        switch(zbc->stage)
+        {
+        case ZBUFFcs_init: return ERROR(init_missing);   /* call ZBUFF_compressInit() first ! */
+
+        case ZBUFFcs_load:
+            /* complete inBuffer */
+            {
+                size_t toLoad = zbc->inBuffTarget - zbc->inBuffPos;
+                size_t loaded = ZBUFF_limitCopy(zbc->inBuff + zbc->inBuffPos, toLoad, ip, iend-ip);
+                zbc->inBuffPos += loaded;
+                ip += loaded;
+                if ( (zbc->inBuffPos==zbc->inToCompress) || (!flush && (toLoad != loaded)) )
+                    { notDone = 0; break; }  /* not enough input to get a full block : stop there, wait for more */
+            }
+            /* compress current block (note : this stage cannot be stopped in the middle) */
+            {
+                void* cDst;
+                size_t cSize;
+                size_t iSize = zbc->inBuffPos - zbc->inToCompress;
+                size_t oSize = oend-op;
+                if (oSize >= ZSTD_compressBound(iSize))
+                    cDst = op;   /* compress directly into output buffer (avoid flush stage) */
+                else
+                    cDst = zbc->outBuff, oSize = zbc->outBuffSize;
+                cSize = ZSTD_compressContinue(zbc->zc, cDst, oSize, zbc->inBuff + zbc->inToCompress, iSize);
+                if (ZSTD_isError(cSize)) return cSize;
+                /* prepare next block */
+                zbc->inBuffTarget = zbc->inBuffPos + zbc->blockSize;
+                if (zbc->inBuffTarget > zbc->inBuffSize)
+                    { zbc->inBuffPos = 0; zbc->inBuffTarget = zbc->blockSize; }   /* note : inBuffSize >= blockSize */
+                zbc->inToCompress = zbc->inBuffPos;
+                if (cDst == op) { op += cSize; break; }   /* no need to flush */
+                zbc->outBuffContentSize = cSize;
+                zbc->outBuffFlushedSize = 0;
+                zbc->stage = ZBUFFcs_flush;
+                // break;   /* flush stage follows */
+            }
+
+        case ZBUFFcs_flush:
+            /* flush into dst */
+            {
+                size_t toFlush = zbc->outBuffContentSize - zbc->outBuffFlushedSize;
+                size_t flushed = ZBUFF_limitCopy(op, oend-op, zbc->outBuff + zbc->outBuffFlushedSize, toFlush);
+                op += flushed;
+                zbc->outBuffFlushedSize += flushed;
+                if (toFlush!=flushed)
+                    { notDone = 0; break; } /* not enough space within dst to store compressed block : stop there */
+                zbc->outBuffContentSize = 0;
+                zbc->outBuffFlushedSize = 0;
+                zbc->stage = ZBUFFcs_load;
+                break;
+            }
+        }
+    }
+
+    *srcSizePtr = ip - istart;
+    *maxDstSizePtr = op - ostart;
+    {
+        size_t hintInSize = zbc->inBuffTarget - zbc->inBuffPos;
+        if (hintInSize==0) hintInSize = zbc->blockSize;
+        return hintInSize;
+    }
+}
+
+size_t ZBUFF_compressContinue(ZBUFF_CCtx* zbc,
+                              void* dst, size_t* maxDstSizePtr,
+                        const void* src, size_t* srcSizePtr)
+{ return ZBUFF_compressContinue_generic(zbc, dst, maxDstSizePtr, src, srcSizePtr, 0); }
+
+
+
+/* *** Finalize *** */
+
+size_t ZBUFF_compressFlush(ZBUFF_CCtx* zbc, void* dst, size_t* maxDstSizePtr)
+{
+    size_t srcSize = 0;
+    ZBUFF_compressContinue_generic(zbc, dst, maxDstSizePtr, &srcSize, &srcSize, 1);  /* use a valid src address instead of NULL, as some sanitizer don't like it */
+    return zbc->outBuffContentSize - zbc->outBuffFlushedSize;
+}
+
+
+size_t ZBUFF_compressEnd(ZBUFF_CCtx* zbc, void* dst, size_t* maxDstSizePtr)
+{
+    BYTE* const ostart = (BYTE*)dst;
+    BYTE* op = ostart;
+    BYTE* const oend = ostart + *maxDstSizePtr;
+    size_t outSize = *maxDstSizePtr;
+    size_t epilogueSize, remaining;
+    ZBUFF_compressFlush(zbc, dst, &outSize);    /* flush any remaining inBuff */
+    op += outSize;
+    epilogueSize = ZSTD_compressEnd(zbc->zc, zbc->outBuff + zbc->outBuffContentSize, zbc->outBuffSize - zbc->outBuffContentSize);   /* epilogue into outBuff */
+    zbc->outBuffContentSize += epilogueSize;
+    outSize = oend-op;
+    zbc->stage = ZBUFFcs_flush;
+    remaining = ZBUFF_compressFlush(zbc, op, &outSize);   /* attempt to flush epilogue into dst */
+    op += outSize;
+    if (!remaining) zbc->stage = ZBUFFcs_init;  /* close only if nothing left to flush */
+    *maxDstSizePtr = op-ostart;                 /* tells how many bytes were written */
+    return remaining;
+}
+
+
+
+/** ************************************************
+*  Streaming decompression
+*
+*  A ZBUFF_DCtx object is required to track streaming operation.
+*  Use ZBUFF_createDCtx() and ZBUFF_freeDCtx() to create/release resources.
+*  Use ZBUFF_decompressInit() to start a new decompression operation.
+*  ZBUFF_DCtx objects can be reused multiple times.
+*
+*  Use ZBUFF_decompressContinue() repetitively to consume your input.
+*  *srcSizePtr and *maxDstSizePtr can be any size.
+*  The function will report how many bytes were read or written by modifying *srcSizePtr and *maxDstSizePtr.
+*  Note that it may not consume the entire input, in which case it's up to the caller to call again the function with remaining input.
+*  The content of dst will be overwritten (up to *maxDstSizePtr) at each function call, so save its content if it matters or change dst .
+*  @return : a hint to preferred nb of bytes to use as input for next function call (it's only a hint, to improve latency)
+*            or 0 when a frame is completely decoded
+*            or an error code, which can be tested using ZBUFF_isError().
+*
+*  Hint : recommended buffer sizes (not compulsory)
+*  output : 128 KB block size is the internal unit, it ensures it's always possible to write a full block when it's decoded.
+*  input : just follow indications from ZBUFF_decompressContinue() to minimize latency. It should always be <= 128 KB + 3 .
+* **************************************************/
+
+typedef enum { ZBUFFds_init, ZBUFFds_readHeader, ZBUFFds_loadHeader, ZBUFFds_decodeHeader,
+               ZBUFFds_read, ZBUFFds_load, ZBUFFds_flush } ZBUFF_dStage;
+
+/* *** Resource management *** */
+
+#define ZSTD_frameHeaderSize_max 5   /* too magical, should come from reference */
+struct ZBUFF_DCtx_s {
+    ZSTD_DCtx* zc;
+    ZSTD_parameters params;
+    char* inBuff;
+    size_t inBuffSize;
+    size_t inPos;
+    char* outBuff;
+    size_t outBuffSize;
+    size_t outStart;
+    size_t outEnd;
+    size_t hPos;
+    ZBUFF_dStage stage;
+    unsigned char headerBuffer[ZSTD_frameHeaderSize_max];
+};   /* typedef'd to ZBUFF_DCtx within "zstd_buffered.h" */
+
+
+ZBUFF_DCtx* ZBUFF_createDCtx(void)
+{
+    ZBUFF_DCtx* zbc = (ZBUFF_DCtx*)malloc(sizeof(ZBUFF_DCtx));
+    if (zbc==NULL) return NULL;
+    memset(zbc, 0, sizeof(*zbc));
+    zbc->zc = ZSTD_createDCtx();
+    zbc->stage = ZBUFFds_init;
+    return zbc;
+}
+
+size_t ZBUFF_freeDCtx(ZBUFF_DCtx* zbc)
+{
+    if (zbc==NULL) return 0;   /* support free on null */
+    ZSTD_freeDCtx(zbc->zc);
+    free(zbc->inBuff);
+    free(zbc->outBuff);
+    free(zbc);
+    return 0;
+}
+
+
+
+/* *** Initialization *** */
+
+size_t ZBUFF_decompressInit(ZBUFF_DCtx* zbc)
+{
+    zbc->stage = ZBUFFds_readHeader;
+    zbc->hPos = zbc->inPos = zbc->outStart = zbc->outEnd = 0;
+    return ZSTD_resetDCtx(zbc->zc);
+}
+
+
+
+/* *** Decompression *** */
+
+size_t ZBUFF_decompressContinue(ZBUFF_DCtx* zbc, void* dst, size_t* maxDstSizePtr, const void* src, size_t* srcSizePtr)
+{
+    const char* const istart = (const char*)src;
+    const char* ip = istart;
+    const char* const iend = istart + *srcSizePtr;
+    char* const ostart = (char*)dst;
+    char* op = ostart;
+    char* const oend = ostart + *maxDstSizePtr;
+    U32 notDone = 1;
+
+    while (notDone)
+    {
+        switch(zbc->stage)
+        {
+
+        case ZBUFFds_init :
+            return ERROR(init_missing);
+
+        case ZBUFFds_readHeader :
+            /* read header from src */
+            {
+                size_t headerSize = ZSTD_getFrameParams(&(zbc->params), src, *srcSizePtr);
+                if (ZSTD_isError(headerSize)) return headerSize;
+                if (headerSize)
+                {
+                    /* not enough input to decode header : tell how many bytes would be necessary */
+                    memcpy(zbc->headerBuffer+zbc->hPos, src, *srcSizePtr);
+                    zbc->hPos += *srcSizePtr;
+                    *maxDstSizePtr = 0;
+                    zbc->stage = ZBUFFds_loadHeader;
+                    return headerSize - zbc->hPos;
+                }
+                zbc->stage = ZBUFFds_decodeHeader;
+                break;
+            }
+
+        case ZBUFFds_loadHeader:
+            /* complete header from src */
+            {
+                size_t headerSize = ZBUFF_limitCopy(
+                    zbc->headerBuffer + zbc->hPos, ZSTD_frameHeaderSize_max - zbc->hPos,
+                    src, *srcSizePtr);
+                zbc->hPos += headerSize;
+                ip += headerSize;
+                headerSize = ZSTD_getFrameParams(&(zbc->params), zbc->headerBuffer, zbc->hPos);
+                if (ZSTD_isError(headerSize)) return headerSize;
+                if (headerSize)
+                {
+                    /* not enough input to decode header : tell how many bytes would be necessary */
+                    *maxDstSizePtr = 0;
+                    return headerSize - zbc->hPos;
+                }
+                // zbc->stage = ZBUFFds_decodeHeader; break;   /* useless : stage follows */
+            }
+
+        case ZBUFFds_decodeHeader:
+                /* apply header to create / resize buffers */
+                {
+                    size_t neededOutSize = (size_t)1 << zbc->params.windowLog;
+                    size_t neededInSize = BLOCKSIZE;   /* a block is never > BLOCKSIZE */
+                    if (zbc->inBuffSize < neededInSize)
+                    {
+                        free(zbc->inBuff);
+                        zbc->inBuffSize = neededInSize;
+                        zbc->inBuff = (char*)malloc(neededInSize);
+                        if (zbc->inBuff == NULL) return ERROR(memory_allocation);
+                    }
+                    if (zbc->outBuffSize < neededOutSize)
+                    {
+                        free(zbc->outBuff);
+                        zbc->outBuffSize = neededOutSize;
+                        zbc->outBuff = (char*)malloc(neededOutSize);
+                        if (zbc->outBuff == NULL) return ERROR(memory_allocation);
+                    }
+                }
+                if (zbc->hPos)
+                {
+                    /* some data already loaded into headerBuffer : transfer into inBuff */
+                    memcpy(zbc->inBuff, zbc->headerBuffer, zbc->hPos);
+                    zbc->inPos = zbc->hPos;
+                    zbc->hPos = 0;
+                    zbc->stage = ZBUFFds_load;
+                    break;
+                }
+                zbc->stage = ZBUFFds_read;
+
+        case ZBUFFds_read:
+            {
+                size_t neededInSize = ZSTD_nextSrcSizeToDecompress(zbc->zc);
+                if (neededInSize==0)   /* end of frame */
+                {
+                    zbc->stage = ZBUFFds_init;
+                    notDone = 0;
+                    break;
+                }
+                if ((size_t)(iend-ip) >= neededInSize)
+                {
+                    /* directly decode from src */
+                    size_t decodedSize = ZSTD_decompressContinue(zbc->zc,
+                        zbc->outBuff + zbc->outStart, zbc->outBuffSize - zbc->outStart,
+                        ip, neededInSize);
+                    if (ZSTD_isError(decodedSize)) return decodedSize;
+                    ip += neededInSize;
+                    if (!decodedSize) break;   /* this was just a header */
+                    zbc->outEnd = zbc->outStart +  decodedSize;
+                    zbc->stage = ZBUFFds_flush;
+                    break;
+                }
+                if (ip==iend) { notDone = 0; break; }   /* no more input */
+                zbc->stage = ZBUFFds_load;
+            }
+
+        case ZBUFFds_load:
+            {
+                size_t neededInSize = ZSTD_nextSrcSizeToDecompress(zbc->zc);
+                size_t toLoad = neededInSize - zbc->inPos;   /* should always be <= remaining space within inBuff */
+                size_t loadedSize;
+                if (toLoad > zbc->inBuffSize - zbc->inPos) return ERROR(corruption_detected);   /* should never happen */
+                loadedSize = ZBUFF_limitCopy(zbc->inBuff + zbc->inPos, toLoad, ip, iend-ip);
+                ip += loadedSize;
+                zbc->inPos += loadedSize;
+                if (loadedSize < toLoad) { notDone = 0; break; }   /* not enough input, wait for more */
+                {
+                    size_t decodedSize = ZSTD_decompressContinue(zbc->zc,
+                        zbc->outBuff + zbc->outStart, zbc->outBuffSize - zbc->outStart,
+                        zbc->inBuff, neededInSize);
+                    if (ZSTD_isError(decodedSize)) return decodedSize;
+                    zbc->inPos = 0;   /* input is consumed */
+                    if (!decodedSize) { zbc->stage = ZBUFFds_read; break; }   /* this was just a header */
+                    zbc->outEnd = zbc->outStart +  decodedSize;
+                    zbc->stage = ZBUFFds_flush;
+                    // break; /* ZBUFFds_flush follows */
+                }
+            }
+        case ZBUFFds_flush:
+            {
+                size_t toFlushSize = zbc->outEnd - zbc->outStart;
+                size_t flushedSize = ZBUFF_limitCopy(op, oend-op, zbc->outBuff + zbc->outStart, toFlushSize);
+                op += flushedSize;
+                zbc->outStart += flushedSize;
+                if (flushedSize == toFlushSize)
+                {
+                    zbc->stage = ZBUFFds_read;
+                    if (zbc->outStart + BLOCKSIZE > zbc->outBuffSize)
+                        zbc->outStart = zbc->outEnd = 0;
+                    break;
+                }
+                /* cannot flush everything */
+                notDone = 0;
+                break;
+            }
+        }
+    }
+
+    *srcSizePtr = ip-istart;
+    *maxDstSizePtr = op-ostart;
+
+    {
+        size_t nextSrcSizeHint = ZSTD_nextSrcSizeToDecompress(zbc->zc);
+        if (nextSrcSizeHint > 3) nextSrcSizeHint+= 3;   /* get the next block header while at it */
+        nextSrcSizeHint -= zbc->inPos;   /* already loaded*/
+        return nextSrcSizeHint;
+    }
+}
+
+
+
+
+
+
+
+
+/* *************************************
+*  Tool functions
+***************************************/
+unsigned ZBUFF_isError(size_t errorCode) { return ERR_isError(errorCode); }
+const char* ZBUFF_getErrorName(size_t errorCode) { return ERR_getErrorName(errorCode); }
+
+size_t ZBUFF_recommendedCInSize()  { return BLOCKSIZE; }
+size_t ZBUFF_recommendedCOutSize() { return ZSTD_compressBound(BLOCKSIZE) + 6; }
+size_t ZBUFF_recommendedDInSize()  { return BLOCKSIZE + 3; }
+size_t ZBUFF_recommendedDOutSize() { return BLOCKSIZE; }
diff --git a/lib/zstd_buffered.h b/lib/zstd_buffered.h
new file mode 100644
index 0000000..80ba819
--- /dev/null
+++ b/lib/zstd_buffered.h
@@ -0,0 +1,147 @@
+/*
+    Buffered version of Zstd compression library
+    Copyright (C) 2015, Yann Collet.
+
+    BSD 2-Clause License (http://www.opensource.org/licenses/bsd-license.php)
+
+    Redistribution and use in source and binary forms, with or without
+    modification, are permitted provided that the following conditions are
+    met:
+    * Redistributions of source code must retain the above copyright
+    notice, this list of conditions and the following disclaimer.
+    * Redistributions in binary form must reproduce the above
+    copyright notice, this list of conditions and the following disclaimer
+    in the documentation and/or other materials provided with the
+    distribution.
+    THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+    "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+    LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+    A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+    OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+    SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+    LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+    DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+    THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+    (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+    OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+    You can contact the author at :
+    - zstd source repository : https://github.com/Cyan4973/zstd
+    - ztsd public forum : https://groups.google.com/forum/#!forum/lz4c
+*/
+#ifndef ZSTD_BUFFERED_H
+#define ZSTD_BUFFERED_H
+
+/* The objects defined into this file should be considered experimental.
+ * They are not labelled stable, as their prototype may change in the future.
+ * You can use them for tests, provide feedback, or if you can endure risk of future changes.
+ */
+
+#if defined (__cplusplus)
+extern "C" {
+#endif
+
+/* *************************************
+*  Includes
+***************************************/
+#include <stddef.h>   /* size_t */
+
+
+/* *************************************
+*  Streaming functions
+***************************************/
+typedef struct ZBUFF_CCtx_s ZBUFF_CCtx;
+ZBUFF_CCtx* ZBUFF_createCCtx(void);
+size_t      ZBUFF_freeCCtx(ZBUFF_CCtx* cctx);
+
+size_t ZBUFF_compressInit(ZBUFF_CCtx* cctx, int compressionLevel);
+size_t ZBUFF_compressContinue(ZBUFF_CCtx* cctx, void* dst, size_t* maxDstSizePtr, const void* src, size_t* srcSizePtr);
+size_t ZBUFF_compressFlush(ZBUFF_CCtx* cctx, void* dst, size_t* maxDstSizePtr);
+size_t ZBUFF_compressEnd(ZBUFF_CCtx* cctx, void* dst, size_t* maxDstSizePtr);
+
+/** ************************************************
+*  Streaming compression
+*
+*  A ZBUFF_CCtx object is required to track streaming operation.
+*  Use ZBUFF_createCCtx() and ZBUFF_freeCCtx() to create/release resources.
+*  Use ZBUFF_compressInit() to start a new compression operation.
+*  ZBUFF_CCtx objects can be reused multiple times.
+*
+*  Use ZBUFF_compressContinue() repetitively to consume input stream.
+*  *srcSizePtr and *maxDstSizePtr can be any size.
+*  The function will report how many bytes were read or written within *srcSizePtr and *maxDstSizePtr.
+*  Note that it may not consume the entire input, in which case it's up to the caller to present again remaining data.
+*  The content of dst will be overwritten (up to *maxDstSizePtr) at each function call, so save its content if it matters or move dst .
+*  @return : a hint to preferred nb of bytes to use as input for next function call (it's only a hint, to improve latency)
+*            or an error code, which can be tested using ZBUFF_isError().
+*
+*  ZBUFF_compressFlush() can be used to instruct ZBUFF to compress and output whatever remains within its buffer.
+*  Note that it will not output more than *maxDstSizePtr.
+*  Therefore, some content might still be left into its internal buffer if dst buffer is too small.
+*  @return : nb of bytes still present into internal buffer (0 if it's empty)
+*            or an error code, which can be tested using ZBUFF_isError().
+*
+*  ZBUFF_compressEnd() instructs to finish a frame.
+*  It will perform a flush and write frame epilogue.
+*  Note that the epilogue is necessary for decoders to consider a frame completed.
+*  Similar to ZBUFF_compressFlush(), it may not be able to output the entire internal buffer content if *maxDstSizePtr is too small.
+*  In which case, call again ZBUFF_compressFlush() to complete the flush.
+*  @return : nb of bytes still present into internal buffer (0 if it's empty)
+*            or an error code, which can be tested using ZBUFF_isError().
+*
+*  Hint : recommended buffer sizes (not compulsory) : ZBUFF_recommendedCInSize / ZBUFF_recommendedCOutSize
+*  input : ZBUFF_recommendedCInSize==128 KB block size is the internal unit, it improves latency to use this value.
+*  output : ZBUFF_recommendedCOutSize==ZSTD_compressBound(128 KB) + 3 + 3 : ensures it's always possible to write/flush/end a full block. Skip some buffering.
+*  By using both, you ensure that input will be entirely consumed, and output will always contain the result.
+* **************************************************/
+
+
+typedef struct ZBUFF_DCtx_s ZBUFF_DCtx;
+ZBUFF_DCtx* ZBUFF_createDCtx(void);
+size_t      ZBUFF_freeDCtx(ZBUFF_DCtx* dctx);
+
+size_t ZBUFF_decompressInit(ZBUFF_DCtx* dctx);
+size_t ZBUFF_decompressContinue(ZBUFF_DCtx* dctx, void* dst, size_t* maxDstSizePtr, const void* src, size_t* srcSizePtr);
+
+/** ************************************************
+*  Streaming decompression
+*
+*  A ZBUFF_DCtx object is required to track streaming operation.
+*  Use ZBUFF_createDCtx() and ZBUFF_freeDCtx() to create/release resources.
+*  Use ZBUFF_decompressInit() to start a new decompression operation.
+*  ZBUFF_DCtx objects can be reused multiple times.
+*
+*  Use ZBUFF_decompressContinue() repetitively to consume your input.
+*  *srcSizePtr and *maxDstSizePtr can be any size.
+*  The function will report how many bytes were read or written by modifying *srcSizePtr and *maxDstSizePtr.
+*  Note that it may not consume the entire input, in which case it's up to the caller to present remaining input again.
+*  The content of dst will be overwritten (up to *maxDstSizePtr) at each function call, so save its content if it matters or change dst.
+*  @return : a hint to preferred nb of bytes to use as input for next function call (it's only a hint, to improve latency)
+*            or 0 when a frame is completely decoded
+*            or an error code, which can be tested using ZBUFF_isError().
+*
+*  Hint : recommended buffer sizes (not compulsory) : ZBUFF_recommendedDInSize / ZBUFF_recommendedDOutSize
+*  output : ZBUFF_recommendedDOutSize==128 KB block size is the internal unit, it ensures it's always possible to write a full block when it's decoded.
+*  input : ZBUFF_recommendedDInSize==128Kb+3; just follow indications from ZBUFF_decompressContinue() to minimize latency. It should always be <= 128 KB + 3 .
+* **************************************************/
+
+
+/* *************************************
+*  Tool functions
+***************************************/
+unsigned ZBUFF_isError(size_t errorCode);
+const char* ZBUFF_getErrorName(size_t errorCode);
+
+/** The below functions provide recommended buffer sizes for Compression or Decompression operations.
+*   These sizes are not compulsory, they just tend to offer better latency */
+size_t ZBUFF_recommendedCInSize(void);
+size_t ZBUFF_recommendedCOutSize(void);
+size_t ZBUFF_recommendedDInSize(void);
+size_t ZBUFF_recommendedDOutSize(void);
+
+
+#if defined (__cplusplus)
+}
+#endif
+
+#endif  /* ZSTD_BUFFERED_H */
diff --git a/lib/zstd_buffered_static.h b/lib/zstd_buffered_static.h
new file mode 100644
index 0000000..cba1d6f
--- /dev/null
+++ b/lib/zstd_buffered_static.h
@@ -0,0 +1,62 @@
+/*
+    zstd - buffered version of compression library
+    experimental complementary API, for static linking only
+    Copyright (C) 2015, Yann Collet.
+
+    BSD 2-Clause License (http://www.opensource.org/licenses/bsd-license.php)
+
+    Redistribution and use in source and binary forms, with or without
+    modification, are permitted provided that the following conditions are
+    met:
+    * Redistributions of source code must retain the above copyright
+    notice, this list of conditions and the following disclaimer.
+    * Redistributions in binary form must reproduce the above
+    copyright notice, this list of conditions and the following disclaimer
+    in the documentation and/or other materials provided with the
+    distribution.
+    THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+    "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+    LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+    A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+    OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+    SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+    LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+    DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+    THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+    (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+    OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+    You can contact the author at :
+    - zstd source repository : https://github.com/Cyan4973/zstd
+    - ztsd public forum : https://groups.google.com/forum/#!forum/lz4c
+*/
+#ifndef ZSTD_BUFFERED_STATIC_H
+#define ZSTD_BUFFERED_STATIC_H
+
+/* The objects defined into this file should be considered experimental.
+ * They are not labelled stable, as their prototype may change in the future.
+ * You can use them for tests, provide feedback, or if you can endure risk of future changes.
+ */
+
+#if defined (__cplusplus)
+extern "C" {
+#endif
+
+/* *************************************
+*  Includes
+***************************************/
+#include "zstd_static.h"
+#include "zstd_buffered.h"
+
+
+/* *************************************
+*  Advanced Streaming functions
+***************************************/
+size_t ZBUFF_compressInit_advanced(ZBUFF_CCtx* cctx, ZSTD_parameters params);
+
+
+#if defined (__cplusplus)
+}
+#endif
+
+#endif  /* ZSTD_BUFFERED_STATIC_H */
diff --git a/lib/zstd_compress.c b/lib/zstd_compress.c
new file mode 100644
index 0000000..5ca4437
--- /dev/null
+++ b/lib/zstd_compress.c
@@ -0,0 +1,2116 @@
+/*
+    ZSTD HC - High Compression Mode of Zstandard
+    Copyright (C) 2015, Yann Collet.
+
+    BSD 2-Clause License (http://www.opensource.org/licenses/bsd-license.php)
+
+    Redistribution and use in source and binary forms, with or without
+    modification, are permitted provided that the following conditions are
+    met:
+
+    * Redistributions of source code must retain the above copyright
+    notice, this list of conditions and the following disclaimer.
+    * Redistributions in binary form must reproduce the above
+    copyright notice, this list of conditions and the following disclaimer
+    in the documentation and/or other materials provided with the
+    distribution.
+
+    THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+    "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+    LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+    A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+    OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+    SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+    LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+    DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+    THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+    (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+    OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+    You can contact the author at :
+       - Zstd source repository : https://www.zstd.net
+*/
+
+
+/* *******************************************************
+*  Compiler specifics
+*********************************************************/
+#ifdef _MSC_VER    /* Visual Studio */
+#  define FORCE_INLINE static __forceinline
+#  include <intrin.h>                    /* For Visual 2005 */
+#  pragma warning(disable : 4127)        /* disable: C4127: conditional expression is constant */
+#  pragma warning(disable : 4324)        /* disable: C4324: padded structure */
+#else
+#  define GCC_VERSION (__GNUC__ * 100 + __GNUC_MINOR__)
+#  ifdef __GNUC__
+#    define FORCE_INLINE static inline __attribute__((always_inline))
+#  else
+#    define FORCE_INLINE static inline
+#  endif
+#endif
+
+
+/* *************************************
+*  Includes
+***************************************/
+#include <stdlib.h>   /* malloc */
+#include <string.h>   /* memset */
+#include "mem.h"
+#include "fse_static.h"
+#include "huff0.h"
+#include "zstd_static.h"
+#include "zstd_internal.h"
+
+
+/* *************************************
+*  Constants
+***************************************/
+static const U32 g_searchStrength = 8;
+
+
+/* *************************************
+*  Sequence storage
+***************************************/
+typedef struct {
+    void* buffer;
+    U32*  offsetStart;
+    U32*  offset;
+    BYTE* offCodeStart;
+    BYTE* offCode;
+    BYTE* litStart;
+    BYTE* lit;
+    BYTE* litLengthStart;
+    BYTE* litLength;
+    BYTE* matchLengthStart;
+    BYTE* matchLength;
+    BYTE* dumpsStart;
+    BYTE* dumps;
+} seqStore_t;
+
+static void ZSTD_resetSeqStore(seqStore_t* ssPtr)
+{
+    ssPtr->offset = ssPtr->offsetStart;
+    ssPtr->lit = ssPtr->litStart;
+    ssPtr->litLength = ssPtr->litLengthStart;
+    ssPtr->matchLength = ssPtr->matchLengthStart;
+    ssPtr->dumps = ssPtr->dumpsStart;
+}
+
+
+/* *************************************
+*  Context memory management
+***************************************/
+#define WORKPLACESIZE (BLOCKSIZE*3)
+
+struct ZSTD_CCtx_s
+{
+    const BYTE* nextSrc;    /* next block here to continue on current prefix */
+    const BYTE* base;       /* All regular indexes relative to this position */
+    const BYTE* dictBase;   /* extDict indexes relative to this position */
+    U32   dictLimit;        /* below that point, need extDict */
+    U32   lowLimit;         /* below that point, no more data */
+    U32   nextToUpdate;     /* index from which to continue dictionary update */
+    ZSTD_parameters params;
+    void* workSpace;
+    size_t workSpaceSize;
+
+    seqStore_t seqStore;    /* sequences storage ptrs */
+    U32* hashTable;
+    U32* contentTable;
+};
+
+
+ZSTD_CCtx* ZSTD_createCCtx(void)
+{
+    return (ZSTD_CCtx*) calloc(1, sizeof(ZSTD_CCtx));
+}
+
+size_t ZSTD_freeCCtx(ZSTD_CCtx* cctx)
+{
+    free(cctx->workSpace);
+    free(cctx);
+    return 0;
+}
+
+
+static unsigned ZSTD_highbit(U32 val);
+
+/** ZSTD_validateParams
+    correct params value to remain within authorized range
+    optimize for srcSize if srcSize > 0 */
+void ZSTD_validateParams(ZSTD_parameters* params)
+{
+    const U32 btPlus = (params->strategy == ZSTD_btlazy2);
+
+    /* validate params */
+    if (MEM_32bits()) if (params->windowLog > 25) params->windowLog = 25;   /* 32 bits mode cannot flush > 24 bits */
+    if (params->windowLog   > ZSTD_WINDOWLOG_MAX) params->windowLog = ZSTD_WINDOWLOG_MAX;
+    if (params->windowLog   < ZSTD_WINDOWLOG_MIN) params->windowLog = ZSTD_WINDOWLOG_MIN;
+
+    /* correct params, to use less memory */
+    if ((params->srcSize > 0) && (params->srcSize < (1<<ZSTD_WINDOWLOG_MAX)))
+    {
+        U32 srcLog = ZSTD_highbit((U32)(params->srcSize)-1) + 1;
+        if (params->windowLog > srcLog) params->windowLog = srcLog;
+    }
+
+    if (params->windowLog   < ZSTD_WINDOWLOG_ABSOLUTEMIN) params->windowLog = ZSTD_WINDOWLOG_ABSOLUTEMIN;  /* required for frame header */
+    if (params->contentLog  > params->windowLog+btPlus) params->contentLog = params->windowLog+btPlus;   /* <= ZSTD_CONTENTLOG_MAX */
+    if (params->contentLog  < ZSTD_CONTENTLOG_MIN) params->contentLog = ZSTD_CONTENTLOG_MIN;
+    if (params->hashLog     > ZSTD_HASHLOG_MAX) params->hashLog = ZSTD_HASHLOG_MAX;
+    if (params->hashLog     < ZSTD_HASHLOG_MIN) params->hashLog = ZSTD_HASHLOG_MIN;
+    if (params->searchLog   > ZSTD_SEARCHLOG_MAX) params->searchLog = ZSTD_SEARCHLOG_MAX;
+    if (params->searchLog   < ZSTD_SEARCHLOG_MIN) params->searchLog = ZSTD_SEARCHLOG_MIN;
+    if (params->searchLength> ZSTD_SEARCHLENGTH_MAX) params->searchLength = ZSTD_SEARCHLENGTH_MAX;
+    if (params->searchLength< ZSTD_SEARCHLENGTH_MIN) params->searchLength = ZSTD_SEARCHLENGTH_MIN;
+    if ((U32)params->strategy>(U32)ZSTD_btlazy2) params->strategy = ZSTD_btlazy2;
+}
+
+
+static size_t ZSTD_resetCCtx_advanced (ZSTD_CCtx* zc,
+                                       ZSTD_parameters params)
+{
+    /* reserve table memory */
+    {
+        const U32 contentLog = (params.strategy == ZSTD_fast) ? 1 : params.contentLog;
+        const size_t tableSpace = ((1 << contentLog) + (1 << params.hashLog)) * sizeof(U32);
+        const size_t neededSpace = tableSpace + WORKPLACESIZE;
+        if (zc->workSpaceSize < neededSpace)
+        {
+            free(zc->workSpace);
+            zc->workSpaceSize = neededSpace;
+            zc->workSpace = malloc(neededSpace);
+            if (zc->workSpace == NULL) return ERROR(memory_allocation);
+        }
+        memset(zc->workSpace, 0, tableSpace );
+        zc->hashTable = (U32*)(zc->workSpace);
+        zc->contentTable = zc->hashTable + ((size_t)1 << params.hashLog);
+        zc->seqStore.buffer = (void*) (zc->contentTable + ((size_t)1 << contentLog));
+    }
+
+    zc->nextToUpdate = 1;
+    zc->nextSrc = NULL;
+    zc->base = NULL;
+    zc->dictBase = NULL;
+    zc->dictLimit = 0;
+    zc->lowLimit = 0;
+    zc->params = params;
+    zc->seqStore.offsetStart = (U32*) (zc->seqStore.buffer);
+    zc->seqStore.offCodeStart = (BYTE*) (zc->seqStore.offsetStart + (BLOCKSIZE>>2));
+    zc->seqStore.litStart = zc->seqStore.offCodeStart + (BLOCKSIZE>>2);
+    zc->seqStore.litLengthStart =  zc->seqStore.litStart + BLOCKSIZE;
+    zc->seqStore.matchLengthStart = zc->seqStore.litLengthStart + (BLOCKSIZE>>2);
+    zc->seqStore.dumpsStart = zc->seqStore.matchLengthStart + (BLOCKSIZE>>2);
+
+    return 0;
+}
+
+
+/** ZSTD_reduceIndex
+*   rescale indexes to avoid future overflow (indexes are U32) */
+static void ZSTD_reduceIndex (ZSTD_CCtx* zc,
+                        const U32 reducerValue)
+{
+    const U32 contentLog = (zc->params.strategy == ZSTD_fast) ? 1 : zc->params.contentLog;
+    const U32 tableSpaceU32 = (1 << contentLog) + (1 << zc->params.hashLog);
+    U32* table32 = zc->hashTable;
+    U32 index;
+
+    for (index=0 ; index < tableSpaceU32 ; index++)
+    {
+        if (table32[index] < reducerValue) table32[index] = 0;
+        else table32[index] -= reducerValue;
+    }
+}
+
+
+/* *******************************************************
+*  Block entropic compression
+*********************************************************/
+size_t ZSTD_compressBound(size_t srcSize)   /* maximum compressed size */
+{
+    return FSE_compressBound(srcSize) + 12;
+}
+
+
+size_t ZSTD_noCompressBlock (void* dst, size_t maxDstSize, const void* src, size_t srcSize)
+{
+    BYTE* const ostart = (BYTE* const)dst;
+
+    if (srcSize + ZSTD_blockHeaderSize > maxDstSize) return ERROR(dstSize_tooSmall);
+    memcpy(ostart + ZSTD_blockHeaderSize, src, srcSize);
+
+    /* Build header */
+    ostart[0]  = (BYTE)(srcSize>>16);
+    ostart[1]  = (BYTE)(srcSize>>8);
+    ostart[2]  = (BYTE) srcSize;
+    ostart[0] += (BYTE)(bt_raw<<6);   /* is a raw (uncompressed) block */
+
+    return ZSTD_blockHeaderSize+srcSize;
+}
+
+
+static size_t ZSTD_noCompressLiterals (void* dst, size_t maxDstSize, const void* src, size_t srcSize)
+{
+    BYTE* const ostart = (BYTE* const)dst;
+
+    if (srcSize + 3 > maxDstSize) return ERROR(dstSize_tooSmall);
+
+    MEM_writeLE32(dst, ((U32)srcSize << 2) | IS_RAW);
+    memcpy(ostart + 3, src, srcSize);
+    return srcSize + 3;
+}
+
+static size_t ZSTD_compressRleLiteralsBlock (void* dst, size_t maxDstSize, const void* src, size_t srcSize)
+{
+    BYTE* const ostart = (BYTE* const)dst;
+
+    (void)maxDstSize;
+    MEM_writeLE32(dst, ((U32)srcSize << 2) | IS_RLE);  /* note : maxDstSize > litHeaderSize > 4 */
+    ostart[3] = *(const BYTE*)src;
+    return 4;
+}
+
+size_t ZSTD_minGain(size_t srcSize) { return (srcSize >> 6) + 1; }
+
+static size_t ZSTD_compressLiterals (void* dst, size_t maxDstSize,
+                               const void* src, size_t srcSize)
+{
+    const size_t minGain = ZSTD_minGain(srcSize);
+    BYTE* const ostart = (BYTE*)dst;
+    size_t hsize;
+    static const size_t litHeaderSize = 5;
+
+    if (maxDstSize < litHeaderSize+1) return ERROR(dstSize_tooSmall);   /* not enough space for compression */
+
+    hsize = HUF_compress(ostart+litHeaderSize, maxDstSize-litHeaderSize, src, srcSize);
+
+    if ((hsize==0) || (hsize >= srcSize - minGain)) return ZSTD_noCompressLiterals(dst, maxDstSize, src, srcSize);
+    if (hsize==1) return ZSTD_compressRleLiteralsBlock(dst, maxDstSize, src, srcSize);
+
+    /* Build header */
+    {
+        ostart[0]  = (BYTE)(srcSize << 2); /* is a block, is compressed */
+        ostart[1]  = (BYTE)(srcSize >> 6);
+        ostart[2]  = (BYTE)(srcSize >>14);
+        ostart[2] += (BYTE)(hsize << 5);
+        ostart[3]  = (BYTE)(hsize >> 3);
+        ostart[4]  = (BYTE)(hsize >>11);
+    }
+
+    return hsize+litHeaderSize;
+}
+
+
+#define LITERAL_NOENTROPY 63   /* cheap heuristic */
+
+size_t ZSTD_compressSequences(void* dst, size_t maxDstSize,
+                        const seqStore_t* seqStorePtr,
+                              size_t srcSize)
+{
+    U32 count[MaxSeq+1];
+    S16 norm[MaxSeq+1];
+    size_t mostFrequent;
+    U32 max = 255;
+    U32 tableLog = 11;
+    U32 CTable_LitLength  [FSE_CTABLE_SIZE_U32(LLFSELog, MaxLL )];
+    U32 CTable_OffsetBits [FSE_CTABLE_SIZE_U32(OffFSELog,MaxOff)];
+    U32 CTable_MatchLength[FSE_CTABLE_SIZE_U32(MLFSELog, MaxML )];
+    U32 LLtype, Offtype, MLtype;   /* compressed, raw or rle */
+    const BYTE* const op_lit_start = seqStorePtr->litStart;
+    const BYTE* const llTable = seqStorePtr->litLengthStart;
+    const BYTE* const llPtr = seqStorePtr->litLength;
+    const BYTE* const mlTable = seqStorePtr->matchLengthStart;
+    const U32*  const offsetTable = seqStorePtr->offsetStart;
+    BYTE* const offCodeTable = seqStorePtr->offCodeStart;
+    BYTE* const ostart = (BYTE*)dst;
+    BYTE* op = ostart;
+    BYTE* const oend = ostart + maxDstSize;
+    const size_t nbSeq = llPtr - llTable;
+    const size_t minGain = ZSTD_minGain(srcSize);
+    const size_t maxCSize = srcSize - minGain;
+    BYTE* seqHead;
+
+
+    /* Compress literals */
+    {
+        size_t cSize;
+        size_t litSize = seqStorePtr->lit - op_lit_start;
+
+        if (litSize <= LITERAL_NOENTROPY)
+            cSize = ZSTD_noCompressLiterals(op, maxDstSize, op_lit_start, litSize);
+        else
+            cSize = ZSTD_compressLiterals(op, maxDstSize, op_lit_start, litSize);
+        if (ZSTD_isError(cSize)) return cSize;
+        op += cSize;
+    }
+
+    /* Sequences Header */
+    if ((oend-op) < MIN_SEQUENCES_SIZE)
+        return ERROR(dstSize_tooSmall);
+    MEM_writeLE16(op, (U16)nbSeq); op+=2;
+    seqHead = op;
+
+    /* dumps : contains too large lengths */
+    {
+        size_t dumpsLength = seqStorePtr->dumps - seqStorePtr->dumpsStart;
+        if (dumpsLength < 512)
+        {
+            op[0] = (BYTE)(dumpsLength >> 8);
+            op[1] = (BYTE)(dumpsLength);
+            op += 2;
+        }
+        else
+        {
+            op[0] = 2;
+            op[1] = (BYTE)(dumpsLength>>8);
+            op[2] = (BYTE)(dumpsLength);
+            op += 3;
+        }
+        if ((size_t)(oend-op) < dumpsLength+6) return ERROR(dstSize_tooSmall);
+        memcpy(op, seqStorePtr->dumpsStart, dumpsLength);
+        op += dumpsLength;
+    }
+
+    /* CTable for Literal Lengths */
+    max = MaxLL;
+    mostFrequent = FSE_countFast(count, &max, seqStorePtr->litLengthStart, nbSeq);
+    if ((mostFrequent == nbSeq) && (nbSeq > 2))
+    {
+        *op++ = *(seqStorePtr->litLengthStart);
+        FSE_buildCTable_rle(CTable_LitLength, (BYTE)max);
+        LLtype = bt_rle;
+    }
+    else if ((nbSeq < 64) || (mostFrequent < (nbSeq >> (LLbits-1))))
+    {
+        FSE_buildCTable_raw(CTable_LitLength, LLbits);
+        LLtype = bt_raw;
+    }
+    else
+    {
+        size_t NCountSize;
+        tableLog = FSE_optimalTableLog(LLFSELog, nbSeq, max);
+        FSE_normalizeCount(norm, tableLog, count, nbSeq, max);
+        NCountSize = FSE_writeNCount(op, oend-op, norm, max, tableLog);   /* overflow protected */
+        if (FSE_isError(NCountSize)) return ERROR(GENERIC);
+        op += NCountSize;
+        FSE_buildCTable(CTable_LitLength, norm, max, tableLog);
+        LLtype = bt_compressed;
+    }
+
+    /* CTable for Offsets codes */
+    {
+        /* create Offset codes */
+        size_t i;
+        max = MaxOff;
+        for (i=0; i<nbSeq; i++)
+        {
+            offCodeTable[i] = (BYTE)ZSTD_highbit(offsetTable[i]) + 1;
+            if (offsetTable[i]==0) offCodeTable[i]=0;
+        }
+        mostFrequent = FSE_countFast(count, &max, offCodeTable, nbSeq);
+    }
+    if ((mostFrequent == nbSeq) && (nbSeq > 2))
+    {
+        *op++ = *offCodeTable;
+        FSE_buildCTable_rle(CTable_OffsetBits, (BYTE)max);
+        Offtype = bt_rle;
+    }
+    else if ((nbSeq < 64) || (mostFrequent < (nbSeq >> (Offbits-1))))
+    {
+        FSE_buildCTable_raw(CTable_OffsetBits, Offbits);
+        Offtype = bt_raw;
+    }
+    else
+    {
+        size_t NCountSize;
+        tableLog = FSE_optimalTableLog(OffFSELog, nbSeq, max);
+        FSE_normalizeCount(norm, tableLog, count, nbSeq, max);
+        NCountSize = FSE_writeNCount(op, oend-op, norm, max, tableLog);   /* overflow protected */
+        if (FSE_isError(NCountSize)) return ERROR(GENERIC);
+        op += NCountSize;
+        FSE_buildCTable(CTable_OffsetBits, norm, max, tableLog);
+        Offtype = bt_compressed;
+    }
+
+    /* CTable for MatchLengths */
+    max = MaxML;
+    mostFrequent = FSE_countFast(count, &max, seqStorePtr->matchLengthStart, nbSeq);
+    if ((mostFrequent == nbSeq) && (nbSeq > 2))
+    {
+        *op++ = *seqStorePtr->matchLengthStart;
+        FSE_buildCTable_rle(CTable_MatchLength, (BYTE)max);
+        MLtype = bt_rle;
+    }
+    else if ((nbSeq < 64) || (mostFrequent < (nbSeq >> (MLbits-1))))
+    {
+        FSE_buildCTable_raw(CTable_MatchLength, MLbits);
+        MLtype = bt_raw;
+    }
+    else
+    {
+        size_t NCountSize;
+        tableLog = FSE_optimalTableLog(MLFSELog, nbSeq, max);
+        FSE_normalizeCount(norm, tableLog, count, nbSeq, max);
+        NCountSize = FSE_writeNCount(op, oend-op, norm, max, tableLog);   /* overflow protected */
+        if (FSE_isError(NCountSize)) return ERROR(GENERIC);
+        op += NCountSize;
+        FSE_buildCTable(CTable_MatchLength, norm, max, tableLog);
+        MLtype = bt_compressed;
+    }
+
+    seqHead[0] += (BYTE)((LLtype<<6) + (Offtype<<4) + (MLtype<<2));
+
+    /* Encoding Sequences */
+    {
+        size_t streamSize, errorCode;
+        BIT_CStream_t blockStream;
+        FSE_CState_t stateMatchLength;
+        FSE_CState_t stateOffsetBits;
+        FSE_CState_t stateLitLength;
+        int i;
+
+        errorCode = BIT_initCStream(&blockStream, op, oend-op);
+        if (ERR_isError(errorCode)) return ERROR(dstSize_tooSmall);   /* not enough space remaining */
+        FSE_initCState(&stateMatchLength, CTable_MatchLength);
+        FSE_initCState(&stateOffsetBits, CTable_OffsetBits);
+        FSE_initCState(&stateLitLength, CTable_LitLength);
+
+        for (i=(int)nbSeq-1; i>=0; i--)
+        {
+            BYTE matchLength = mlTable[i];
+            U32  offset = offsetTable[i];
+            BYTE offCode = offCodeTable[i];                                 /* 32b*/  /* 64b*/
+            U32 nbBits = (offCode-1) * (!!offCode);
+            BYTE litLength = llTable[i];                                    /* (7)*/  /* (7)*/
+            FSE_encodeSymbol(&blockStream, &stateMatchLength, matchLength); /* 17 */  /* 17 */
+            if (MEM_32bits()) BIT_flushBits(&blockStream);                  /*  7 */
+            BIT_addBits(&blockStream, offset, nbBits);                      /* 32 */  /* 42 */
+            if (MEM_32bits()) BIT_flushBits(&blockStream);                  /*  7 */
+            FSE_encodeSymbol(&blockStream, &stateOffsetBits, offCode);      /* 16 */  /* 51 */
+            FSE_encodeSymbol(&blockStream, &stateLitLength, litLength);     /* 26 */  /* 61 */
+            BIT_flushBits(&blockStream);                                    /*  7 */  /*  7 */
+        }
+
+        FSE_flushCState(&blockStream, &stateMatchLength);
+        FSE_flushCState(&blockStream, &stateOffsetBits);
+        FSE_flushCState(&blockStream, &stateLitLength);
+
+        streamSize = BIT_closeCStream(&blockStream);
+        if (streamSize==0) return ERROR(dstSize_tooSmall);   /* not enough space */
+        op += streamSize;
+    }
+
+    /* check compressibility */
+    if ((size_t)(op-ostart) >= maxCSize) return 0;
+
+    return op - ostart;
+}
+
+
+/** ZSTD_storeSeq
+    Store a sequence (literal length, literals, offset code and match length) into seqStore_t
+    @offsetCode : distance to match, or 0 == repCode
+    @matchCode : matchLength - MINMATCH
+*/
+MEM_STATIC void ZSTD_storeSeq(seqStore_t* seqStorePtr, size_t litLength, const BYTE* literals, size_t offsetCode, size_t matchCode)
+{
+#if 0
+    static const BYTE* g_start = NULL;
+    if (g_start==NULL) g_start = literals;
+    if (literals - g_start == 8695)
+    printf("pos %6u : %3u literals & match %3u bytes at distance %6u \n",
+           (U32)(literals - g_start), (U32)litLength, (U32)matchCode+4, (U32)offsetCode);
+#endif
+
+    /* copy Literals */
+    ZSTD_wildcopy(seqStorePtr->lit, literals, litLength);
+    seqStorePtr->lit += litLength;
+
+    /* literal Length */
+    if (litLength >= MaxLL)
+    {
+        *(seqStorePtr->litLength++) = MaxLL;
+        if (litLength<255 + MaxLL)
+            *(seqStorePtr->dumps++) = (BYTE)(litLength - MaxLL);
+        else
+        {
+            *(seqStorePtr->dumps++) = 255;
+            MEM_writeLE32(seqStorePtr->dumps, (U32)litLength); seqStorePtr->dumps += 3;
+        }
+    }
+    else *(seqStorePtr->litLength++) = (BYTE)litLength;
+
+    /* match offset */
+    *(seqStorePtr->offset++) = (U32)offsetCode;
+
+    /* match Length */
+    if (matchCode >= MaxML)
+    {
+        *(seqStorePtr->matchLength++) = MaxML;
+        if (matchCode < 255+MaxML)
+            *(seqStorePtr->dumps++) = (BYTE)(matchCode - MaxML);
+        else
+        {
+            *(seqStorePtr->dumps++) = 255;
+            MEM_writeLE32(seqStorePtr->dumps, (U32)matchCode); seqStorePtr->dumps += 3;
+        }
+    }
+    else *(seqStorePtr->matchLength++) = (BYTE)matchCode;
+}
+
+
+/* *************************************
+*  Match length counter
+***************************************/
+static size_t ZSTD_read_ARCH(const void* p) { size_t r; memcpy(&r, p, sizeof(r)); return r; }
+
+static unsigned ZSTD_highbit(U32 val)
+{
+#   if defined(_MSC_VER)   /* Visual */
+    unsigned long r=0;
+    _BitScanReverse(&r, val);
+    return (unsigned)r;
+#   elif defined(__GNUC__) && (__GNUC__ >= 3)   /* GCC Intrinsic */
+    return 31 - __builtin_clz(val);
+#   else   /* Software version */
+    static const int DeBruijnClz[32] = { 0, 9, 1, 10, 13, 21, 2, 29, 11, 14, 16, 18, 22, 25, 3, 30, 8, 12, 20, 28, 15, 17, 24, 7, 19, 27, 23, 6, 26, 5, 4, 31 };
+    U32 v = val;
+    int r;
+    v |= v >> 1;
+    v |= v >> 2;
+    v |= v >> 4;
+    v |= v >> 8;
+    v |= v >> 16;
+    r = DeBruijnClz[(U32)(v * 0x07C4ACDDU) >> 27];
+    return r;
+#   endif
+}
+
+static unsigned ZSTD_NbCommonBytes (register size_t val)
+{
+    if (MEM_isLittleEndian())
+    {
+        if (MEM_64bits())
+        {
+#       if defined(_MSC_VER) && defined(_WIN64)
+            unsigned long r = 0;
+            _BitScanForward64( &r, (U64)val );
+            return (int)(r>>3);
+#       elif defined(__GNUC__) && (__GNUC__ >= 3)
+            return (__builtin_ctzll((U64)val) >> 3);
+#       else
+            static const int DeBruijnBytePos[64] = { 0, 0, 0, 0, 0, 1, 1, 2, 0, 3, 1, 3, 1, 4, 2, 7, 0, 2, 3, 6, 1, 5, 3, 5, 1, 3, 4, 4, 2, 5, 6, 7, 7, 0, 1, 2, 3, 3, 4, 6, 2, 6, 5, 5, 3, 4, 5, 6, 7, 1, 2, 4, 6, 4, 4, 5, 7, 2, 6, 5, 7, 6, 7, 7 };
+            return DeBruijnBytePos[((U64)((val & -(long long)val) * 0x0218A392CDABBD3FULL)) >> 58];
+#       endif
+        }
+        else /* 32 bits */
+        {
+#       if defined(_MSC_VER)
+            unsigned long r=0;
+            _BitScanForward( &r, (U32)val );
+            return (int)(r>>3);
+#       elif defined(__GNUC__) && (__GNUC__ >= 3)
+            return (__builtin_ctz((U32)val) >> 3);
+#       else
+            static const int DeBruijnBytePos[32] = { 0, 0, 3, 0, 3, 1, 3, 0, 3, 2, 2, 1, 3, 2, 0, 1, 3, 3, 1, 2, 2, 2, 2, 0, 3, 1, 2, 0, 1, 0, 1, 1 };
+            return DeBruijnBytePos[((U32)((val & -(S32)val) * 0x077CB531U)) >> 27];
+#       endif
+        }
+    }
+    else   /* Big Endian CPU */
+    {
+        if (MEM_32bits())
+        {
+#       if defined(_MSC_VER) && defined(_WIN64)
+            unsigned long r = 0;
+            _BitScanReverse64( &r, val );
+            return (unsigned)(r>>3);
+#       elif defined(__GNUC__) && (__GNUC__ >= 3)
+            return (__builtin_clzll(val) >> 3);
+#       else
+            unsigned r;
+            const unsigned n32 = sizeof(size_t)*4;   /* calculate this way due to compiler complaining in 32-bits mode */
+            if (!(val>>n32)) { r=4; } else { r=0; val>>=n32; }
+            if (!(val>>16)) { r+=2; val>>=8; } else { val>>=24; }
+            r += (!val);
+            return r;
+#       endif
+        }
+        else /* 32 bits */
+        {
+#       if defined(_MSC_VER)
+            unsigned long r = 0;
+            _BitScanReverse( &r, (unsigned long)val );
+            return (unsigned)(r>>3);
+#       elif defined(__GNUC__) && (__GNUC__ >= 3)
+            return (__builtin_clz((U32)val) >> 3);
+#       else
+            unsigned r;
+            if (!(val>>16)) { r=2; val>>=8; } else { r=0; val>>=24; }
+            r += (!val);
+            return r;
+#       endif
+        }
+    }
+}
+
+
+static size_t ZSTD_count(const BYTE* pIn, const BYTE* pMatch, const BYTE* pInLimit)
+{
+    const BYTE* const pStart = pIn;
+
+    while ((pIn<pInLimit-(sizeof(size_t)-1)))
+    {
+        size_t diff = ZSTD_read_ARCH(pMatch) ^ ZSTD_read_ARCH(pIn);
+        if (!diff) { pIn+=sizeof(size_t); pMatch+=sizeof(size_t); continue; }
+        pIn += ZSTD_NbCommonBytes(diff);
+        return (size_t)(pIn - pStart);
+    }
+
+    if (MEM_64bits()) if ((pIn<(pInLimit-3)) && (MEM_read32(pMatch) == MEM_read32(pIn))) { pIn+=4; pMatch+=4; }
+    if ((pIn<(pInLimit-1)) && (MEM_read16(pMatch) == MEM_read16(pIn))) { pIn+=2; pMatch+=2; }
+    if ((pIn<pInLimit) && (*pMatch == *pIn)) pIn++;
+    return (size_t)(pIn - pStart);
+}
+
+/** ZSTD_count_2segments
+*   can count match length with ip & match in potentially 2 different segments.
+*   convention : on reaching mEnd, match count continue starting from iStart
+*/
+static size_t ZSTD_count_2segments(const BYTE* ip, const BYTE* match, const BYTE* iEnd, const BYTE* mEnd, const BYTE* iStart)
+{
+    size_t matchLength;
+    const BYTE* vEnd = ip + (mEnd - match);
+    if (vEnd > iEnd) vEnd = iEnd;
+    matchLength = ZSTD_count(ip, match, vEnd);
+    if (match + matchLength == mEnd)
+        matchLength += ZSTD_count(ip+matchLength, iStart, iEnd);
+    return matchLength;
+}
+
+
+
+/* *************************************
+*  Hashes
+***************************************/
+
+static const U32 prime4bytes = 2654435761U;
+static U32 ZSTD_hash4(U32 u, U32 h) { return (u * prime4bytes) >> (32-h) ; }
+static size_t ZSTD_hash4Ptr(const void* ptr, U32 h) { return ZSTD_hash4(MEM_read32(ptr), h); }
+
+static const U64 prime5bytes = 889523592379ULL;
+static size_t ZSTD_hash5(U64 u, U32 h) { return (size_t)((u * prime5bytes) << (64-40) >> (64-h)) ; }
+static size_t ZSTD_hash5Ptr(const void* p, U32 h) { return ZSTD_hash5(MEM_read64(p), h); }
+
+static const U64 prime6bytes = 227718039650203ULL;
+static size_t ZSTD_hash6(U64 u, U32 h) { return (size_t)((u * prime6bytes) << (64-48) >> (64-h)) ; }
+static size_t ZSTD_hash6Ptr(const void* p, U32 h) { return ZSTD_hash6(MEM_read64(p), h); }
+
+static const U64 prime7bytes = 58295818150454627ULL;
+static size_t ZSTD_hash7(U64 u, U32 h) { return (size_t)((u * prime7bytes) << (64-56) >> (64-h)) ; }
+static size_t ZSTD_hash7Ptr(const void* p, U32 h) { return ZSTD_hash7(MEM_read64(p), h); }
+
+static size_t ZSTD_hashPtr(const void* p, U32 hBits, U32 mls)
+{
+    switch(mls)
+    {
+    default:
+    case 4: return ZSTD_hash4Ptr(p, hBits);
+    case 5: return ZSTD_hash5Ptr(p, hBits);
+    case 6: return ZSTD_hash6Ptr(p, hBits);
+    case 7: return ZSTD_hash7Ptr(p, hBits);
+    }
+}
+
+/* *************************************
+*  Fast Scan
+***************************************/
+
+FORCE_INLINE
+size_t ZSTD_compressBlock_fast_generic(ZSTD_CCtx* zc,
+                                       void* dst, size_t maxDstSize,
+                                 const void* src, size_t srcSize,
+                                 const U32 mls)
+{
+    U32* hashTable = zc->hashTable;
+    const U32 hBits = zc->params.hashLog;
+    seqStore_t* seqStorePtr = &(zc->seqStore);
+    const BYTE* const base = zc->base;
+    const BYTE* const istart = (const BYTE*)src;
+    const BYTE* ip = istart;
+    const BYTE* anchor = istart;
+    const BYTE* const lowest = base + zc->dictLimit;
+    const BYTE* const iend = istart + srcSize;
+    const BYTE* const ilimit = iend - 8;
+
+    size_t offset_2=REPCODE_STARTVALUE, offset_1=REPCODE_STARTVALUE;
+
+
+    /* init */
+    ZSTD_resetSeqStore(seqStorePtr);
+    if (ip < base+4)
+    {
+        hashTable[ZSTD_hashPtr(base+1, hBits, mls)] = 1;
+        hashTable[ZSTD_hashPtr(base+2, hBits, mls)] = 2;
+        hashTable[ZSTD_hashPtr(base+3, hBits, mls)] = 3;
+        ip = base+4;
+    }
+
+    /* Main Search Loop */
+    while (ip < ilimit)  /* < instead of <=, because repcode check at (ip+1) */
+    {
+        size_t mlCode;
+        size_t offset;
+        const size_t h = ZSTD_hashPtr(ip, hBits, mls);
+        const BYTE* match = base + hashTable[h];
+        hashTable[h] = (U32)(ip-base);
+
+        if (MEM_read32(ip+1-offset_1) == MEM_read32(ip+1))   /* note : by construction, offset_1 <= (ip-base) */
+        {
+            mlCode = ZSTD_count(ip+1+MINMATCH, ip+1+MINMATCH-offset_1, iend);
+            ip++;
+            offset = 0;
+        }
+        else
+        {
+            if ( (match <= lowest) ||
+                 (MEM_read32(match) != MEM_read32(ip)) )
+            {
+                ip += ((ip-anchor) >> g_searchStrength) + 1;
+                continue;
+            }
+            mlCode = ZSTD_count(ip+MINMATCH, match+MINMATCH, iend);
+            offset = ip-match;
+            while ((ip>anchor) && (match>lowest) && (ip[-1] == match[-1])) { ip--; match--; mlCode++; }  /* catch up */
+            offset_2 = offset_1;
+            offset_1 = offset;
+        }
+
+        /* match found */
+        ZSTD_storeSeq(seqStorePtr, ip-anchor, anchor, offset, mlCode);
+        ip += mlCode + MINMATCH;
+        anchor = ip;
+
+        if (ip <= ilimit)
+        {
+            /* Fill Table */
+            hashTable[ZSTD_hashPtr(ip-(mlCode+MINMATCH)+2, hBits, mls)] = (U32)(ip-(mlCode+MINMATCH)+2-base);  /* here because ip-(mlCode+MINMATCH)+2 could be > iend-8 without ip <= ilimit check*/
+            hashTable[ZSTD_hashPtr(ip-2, hBits, mls)] = (U32)(ip-2-base);
+            /* check immediate repcode */
+            while ( (ip <= ilimit)
+                 && (MEM_read32(ip) == MEM_read32(ip - offset_2)) )
+            {
+                /* store sequence */
+                size_t rlCode = ZSTD_count(ip+MINMATCH, ip+MINMATCH-offset_2, iend);
+                size_t tmpOff = offset_2; offset_2 = offset_1; offset_1 = tmpOff;   /* swap offset_2 <=> offset_1 */
+                hashTable[ZSTD_hashPtr(ip, hBits, mls)] = (U32)(ip-base);
+                ZSTD_storeSeq(seqStorePtr, 0, anchor, 0, rlCode);
+                ip += rlCode+MINMATCH;
+                anchor = ip;
+                continue;   /* faster when present ... (?) */
+            }
+        }
+    }
+
+    /* Last Literals */
+    {
+        size_t lastLLSize = iend - anchor;
+        memcpy(seqStorePtr->lit, anchor, lastLLSize);
+        seqStorePtr->lit += lastLLSize;
+    }
+
+    /* Finale compression stage */
+    return ZSTD_compressSequences(dst, maxDstSize,
+                                  seqStorePtr, srcSize);
+}
+
+
+size_t ZSTD_compressBlock_fast(ZSTD_CCtx* ctx,
+                               void* dst, size_t maxDstSize,
+                         const void* src, size_t srcSize)
+{
+    const U32 mls = ctx->params.searchLength;
+    switch(mls)
+    {
+    default:
+    case 4 :
+        return ZSTD_compressBlock_fast_generic(ctx, dst, maxDstSize, src, srcSize, 4);
+    case 5 :
+        return ZSTD_compressBlock_fast_generic(ctx, dst, maxDstSize, src, srcSize, 5);
+    case 6 :
+        return ZSTD_compressBlock_fast_generic(ctx, dst, maxDstSize, src, srcSize, 6);
+    case 7 :
+        return ZSTD_compressBlock_fast_generic(ctx, dst, maxDstSize, src, srcSize, 7);
+    }
+}
+
+
+//FORCE_INLINE
+size_t ZSTD_compressBlock_fast_extDict_generic(ZSTD_CCtx* ctx,
+                                          void* dst, size_t maxDstSize,
+                                    const void* src, size_t srcSize,
+                                    const U32 mls)
+{
+    U32* hashTable = ctx->hashTable;
+    const U32 hBits = ctx->params.hashLog;
+    seqStore_t* seqStorePtr = &(ctx->seqStore);
+    const BYTE* const base = ctx->base;
+    const BYTE* const dictBase = ctx->dictBase;
+    const BYTE* const istart = (const BYTE*)src;
+    const BYTE* ip = istart;
+    const BYTE* anchor = istart;
+    const U32   lowLimit = ctx->lowLimit;
+    const BYTE* const dictStart = dictBase + lowLimit;
+    const U32   dictLimit = ctx->dictLimit;
+    const BYTE* const lowPrefixPtr = base + dictLimit;
+    const BYTE* const dictEnd = dictBase + dictLimit;
+    const BYTE* const iend = istart + srcSize;
+    const BYTE* const ilimit = iend - 8;
+
+    U32 offset_2=REPCODE_STARTVALUE, offset_1=REPCODE_STARTVALUE;
+
+
+    /* init */
+    ZSTD_resetSeqStore(seqStorePtr);
+    {
+        /* skip first 4 positions to avoid read overflow during repcode match check */
+        hashTable[ZSTD_hashPtr(ip+0, hBits, mls)] = (U32)(ip-base+0);
+        hashTable[ZSTD_hashPtr(ip+1, hBits, mls)] = (U32)(ip-base+1);
+        hashTable[ZSTD_hashPtr(ip+2, hBits, mls)] = (U32)(ip-base+2);
+        hashTable[ZSTD_hashPtr(ip+3, hBits, mls)] = (U32)(ip-base+3);
+        ip += 4;
+    }
+
+    /* Main Search Loop */
+    while (ip < ilimit)  /* < instead of <=, because (ip+1) */
+    {
+        const size_t h = ZSTD_hashPtr(ip, hBits, mls);
+        const U32 matchIndex = hashTable[h];
+        const BYTE* matchBase = matchIndex < dictLimit ? dictBase : base;
+        const BYTE* match = matchBase + matchIndex;
+        const U32 current = (U32)(ip-base);
+        const U32 repIndex = current + 1 - offset_1;
+        const BYTE* repBase = repIndex < dictLimit ? dictBase : base;
+        const BYTE* repMatch = repBase + repIndex;
+        size_t mlCode;
+        U32 offset;
+        hashTable[h] = current;   /* update hash table */
+
+        if ( ((repIndex <= dictLimit-4) || (repIndex >= dictLimit))
+          && (MEM_read32(repMatch) == MEM_read32(ip+1)) )
+        {
+            const BYTE* repMatchEnd = repIndex < dictLimit ? dictEnd : iend;
+            mlCode = ZSTD_count_2segments(ip+1+MINMATCH, repMatch+MINMATCH, iend, repMatchEnd, lowPrefixPtr);
+            ip++;
+            offset = 0;
+        }
+        else
+        {
+            if ( (matchIndex < lowLimit) ||
+                 (MEM_read32(match) != MEM_read32(ip)) )
+            { ip += ((ip-anchor) >> g_searchStrength) + 1; continue; }
+            {
+                const BYTE* matchEnd = matchIndex < dictLimit ? dictEnd : iend;
+                const BYTE* lowMatchPtr = matchIndex < dictLimit ? dictStart : lowPrefixPtr;
+                mlCode = ZSTD_count_2segments(ip+MINMATCH, match+MINMATCH, iend, matchEnd, lowPrefixPtr);
+                while ((ip>anchor) && (match>lowMatchPtr) && (ip[-1] == match[-1])) { ip--; match--; mlCode++; }  /* catch up */
+                offset = current - matchIndex;
+                offset_2 = offset_1;
+                offset_1 = offset;
+            }
+        }
+
+        /* found a match : store it */
+        ZSTD_storeSeq(seqStorePtr, ip-anchor, anchor, offset, mlCode);
+        ip += mlCode + MINMATCH;
+        anchor = ip;
+
+        if (ip <= ilimit)
+        {
+            /* Fill Table */
+			hashTable[ZSTD_hashPtr(base+current+2, hBits, mls)] = current+2;
+            hashTable[ZSTD_hashPtr(ip-2, hBits, mls)] = (U32)(ip-2-base);
+            /* check immediate repcode */
+            while (ip <= ilimit)
+            {
+                U32 current2 = (U32)(ip-base);
+                const U32 repIndex2 = current2 - offset_2;
+                const BYTE* repMatch2 = repIndex2 < dictLimit ? dictBase + repIndex2 : base + repIndex2;
+                if ( ((repIndex2 <= dictLimit-4) || (repIndex2 >= dictLimit))
+                  && (MEM_read32(repMatch2) == MEM_read32(ip)) )
+                {
+                    const BYTE* const repEnd2 = repIndex2 < dictLimit ? dictEnd : iend;
+                    size_t repLength2 = ZSTD_count_2segments(ip+MINMATCH, repMatch2+MINMATCH, iend, repEnd2, lowPrefixPtr);
+                    U32 tmpOffset = offset_2; offset_2 = offset_1; offset_1 = tmpOffset;   /* swap offset_2 <=> offset_1 */
+                    ZSTD_storeSeq(seqStorePtr, 0, anchor, 0, repLength2);
+                    hashTable[ZSTD_hashPtr(ip, hBits, mls)] = current2;
+                    ip += repLength2+MINMATCH;
+                    anchor = ip;
+                    continue;
+                }
+                break;
+            }
+        }
+    }
+
+    /* Last Literals */
+    {
+        size_t lastLLSize = iend - anchor;
+        memcpy(seqStorePtr->lit, anchor, lastLLSize);
+        seqStorePtr->lit += lastLLSize;
+    }
+
+    /* Finale compression stage */
+    return ZSTD_compressSequences(dst, maxDstSize,
+                                  seqStorePtr, srcSize);
+}
+
+
+size_t ZSTD_compressBlock_fast_extDict(ZSTD_CCtx* ctx,
+                               void* dst, size_t maxDstSize,
+                         const void* src, size_t srcSize)
+{
+    const U32 mls = ctx->params.searchLength;
+    switch(mls)
+    {
+    default:
+    case 4 :
+        return ZSTD_compressBlock_fast_extDict_generic(ctx, dst, maxDstSize, src, srcSize, 4);
+    case 5 :
+        return ZSTD_compressBlock_fast_extDict_generic(ctx, dst, maxDstSize, src, srcSize, 5);
+    case 6 :
+        return ZSTD_compressBlock_fast_extDict_generic(ctx, dst, maxDstSize, src, srcSize, 6);
+    case 7 :
+        return ZSTD_compressBlock_fast_extDict_generic(ctx, dst, maxDstSize, src, srcSize, 7);
+    }
+}
+
+
+/* *************************************
+*  Binary Tree search
+***************************************/
+/** ZSTD_insertBt1 : add one or multiple positions to tree
+*   @ip : assumed <= iend-8
+*   @return : nb of positions added */
+static U32 ZSTD_insertBt1(ZSTD_CCtx* zc, const BYTE* const ip, const U32 mls, const BYTE* const iend, U32 nbCompares)
+{
+    U32* const hashTable = zc->hashTable;
+    const U32 hashLog = zc->params.hashLog;
+    const size_t h  = ZSTD_hashPtr(ip, hashLog, mls);
+    U32* const bt   = zc->contentTable;
+    const U32 btLog = zc->params.contentLog - 1;
+    const U32 btMask= (1 << btLog) - 1;
+    U32 matchIndex  = hashTable[h];
+    size_t commonLengthSmaller=0, commonLengthLarger=0;
+    const BYTE* const base = zc->base;
+    const BYTE* match = base + matchIndex;
+    U32 current = (U32)(ip-base);
+    const U32 btLow = btMask >= current ? 0 : current - btMask;
+    U32* smallerPtr = bt + 2*(current&btMask);
+    U32* largerPtr  = bt + 2*(current&btMask) + 1;
+    U32 dummy32;   /* to be nullified at the end */
+    const U32 windowLow = zc->lowLimit;
+
+    if ( (current-matchIndex == 1)   /* RLE */
+        && (MEM_read64(match) == MEM_read64(ip)) )
+    {
+        size_t rleLength = ZSTD_count(ip+8, match+8, iend) + 8;
+        return (U32)(rleLength - mls);
+    }
+
+    hashTable[h] = (U32)(ip - base);   /* Update Hash Table */
+
+    while (nbCompares-- && (matchIndex > windowLow))
+    {
+        U32* nextPtr = bt + 2*(matchIndex & btMask);
+        size_t matchLength = MIN(commonLengthSmaller, commonLengthLarger);   /* guaranteed minimum nb of common bytes */
+
+        match = base + matchIndex;
+        if (match[matchLength] == ip[matchLength])
+            matchLength += ZSTD_count(ip+matchLength+1, match+matchLength+1, iend) +1;
+
+        if (ip+matchLength == iend)   /* equal : no way to know if inf or sup */
+            break;   /* drop , to guarantee consistency ; miss a bit of compression, but other solutions can corrupt the tree */
+
+        if (match[matchLength] < ip[matchLength])
+        {
+            /* match is smaller than current */
+            *smallerPtr = matchIndex;             /* update smaller idx */
+            commonLengthSmaller = matchLength;    /* all smaller will now have at least this guaranteed common length */
+            if (matchIndex <= btLow) { smallerPtr=&dummy32; break; }   /* beyond tree size, stop the search */
+            smallerPtr = nextPtr+1;               /* new "smaller" => larger of match */
+            matchIndex = nextPtr[1];              /* new matchIndex larger than previous (closer to current) */
+        }
+        else
+        {
+            /* match is larger than current */
+            *largerPtr = matchIndex;
+            commonLengthLarger = matchLength;
+            if (matchIndex <= btLow) { largerPtr=&dummy32; break; }   /* beyond tree size, stop the search */
+            largerPtr = nextPtr;
+            matchIndex = nextPtr[0];
+        }
+    }
+
+    *smallerPtr = *largerPtr = 0;
+    return 1;
+}
+
+
+FORCE_INLINE /* inlining is important to hardwire a hot branch (template emulation) */
+size_t ZSTD_insertBtAndFindBestMatch (
+                        ZSTD_CCtx* zc,
+                        const BYTE* const ip, const BYTE* const iend,
+                        size_t* offsetPtr,
+                        U32 nbCompares, const U32 mls)
+{
+    U32* const hashTable = zc->hashTable;
+    const U32 hashLog = zc->params.hashLog;
+    const size_t h  = ZSTD_hashPtr(ip, hashLog, mls);
+    U32* const bt   = zc->contentTable;
+    const U32 btLog = zc->params.contentLog - 1;
+    const U32 btMask= (1 << btLog) - 1;
+    U32 matchIndex  = hashTable[h];
+    size_t commonLengthSmaller=0, commonLengthLarger=0;
+    const BYTE* const base = zc->base;
+    const U32 current = (U32)(ip-base);
+    const U32 btLow = btMask >= current ? 0 : current - btMask;
+    const U32 windowLow = zc->lowLimit;
+    U32* smallerPtr = bt + 2*(current&btMask);
+    U32* largerPtr  = bt + 2*(current&btMask) + 1;
+    size_t bestLength = 0;
+    U32 dummy32;   /* to be nullified at the end */
+
+    hashTable[h] = (U32)(ip-base);   /* Update Hash Table */
+
+    while (nbCompares-- && (matchIndex > windowLow))
+    {
+        U32* nextPtr = bt + 2*(matchIndex & btMask);
+        const BYTE* match = base + matchIndex;
+        size_t matchLength = MIN(commonLengthSmaller, commonLengthLarger);   /* guaranteed minimum nb of common bytes */
+
+        if (match[matchLength] == ip[matchLength])
+            matchLength += ZSTD_count(ip+matchLength+1, match+matchLength+1, iend) +1;
+
+        if (matchLength > bestLength)
+        {
+            if ( (4*(int)(matchLength-bestLength)) > (int)(ZSTD_highbit(current-matchIndex+1) - ZSTD_highbit((U32)offsetPtr[0]+1)) )
+                bestLength = matchLength, *offsetPtr = current - matchIndex;
+            if (ip+matchLength == iend)   /* equal : no way to know if inf or sup */
+                break;   /* drop, to guarantee consistency (miss a little bit of compression) */
+        }
+
+        if (match[matchLength] < ip[matchLength])
+        {
+            /* match is smaller than current */
+            *smallerPtr = matchIndex;             /* update smaller idx */
+            commonLengthSmaller = matchLength;    /* all smaller will now have at least this guaranteed common length */
+            if (matchIndex <= btLow) { smallerPtr=&dummy32; break; }   /* beyond tree size, stop the search */
+            smallerPtr = nextPtr+1;               /* new "smaller" => larger of match */
+            matchIndex = nextPtr[1];              /* new matchIndex larger than previous (closer to current) */
+        }
+        else
+        {
+            /* match is larger than current */
+            *largerPtr = matchIndex;
+            commonLengthLarger = matchLength;
+            if (matchIndex <= btLow) { largerPtr=&dummy32; break; }   /* beyond tree size, stop the search */
+            largerPtr = nextPtr;
+            matchIndex = nextPtr[0];
+        }
+    }
+
+    *smallerPtr = *largerPtr = 0;
+
+    zc->nextToUpdate = current+1;   /* current has been inserted */
+    return bestLength;
+}
+
+
+static const BYTE* ZSTD_updateTree(ZSTD_CCtx* zc, const BYTE* const ip, const BYTE* const iend, const U32 nbCompares, const U32 mls)
+{
+    const BYTE* const base = zc->base;
+    const U32 target = (U32)(ip - base);
+    U32 idx = zc->nextToUpdate;
+
+    for( ; idx < target ; )
+        idx += ZSTD_insertBt1(zc, base+idx, mls, iend, nbCompares);
+
+    zc->nextToUpdate = idx;
+    return base + idx;
+}
+
+
+/** Tree updater, providing best match */
+FORCE_INLINE /* inlining is important to hardwire a hot branch (template emulation) */
+size_t ZSTD_BtFindBestMatch (
+                        ZSTD_CCtx* zc,
+                        const BYTE* const ip, const BYTE* const iLimit,
+                        size_t* offsetPtr,
+                        const U32 maxNbAttempts, const U32 mls)
+{
+    const BYTE* nextToUpdate = ZSTD_updateTree(zc, ip, iLimit, maxNbAttempts, mls);
+    if (nextToUpdate > ip) /* RLE data */
+        { *offsetPtr = 1; return ZSTD_count(ip, ip-1, iLimit); }
+    return ZSTD_insertBtAndFindBestMatch(zc, ip, iLimit, offsetPtr, maxNbAttempts, mls);
+}
+
+
+FORCE_INLINE size_t ZSTD_BtFindBestMatch_selectMLS (
+                        ZSTD_CCtx* zc,   /* Index table will be updated */
+                        const BYTE* ip, const BYTE* const iLimit,
+                        size_t* offsetPtr,
+                        const U32 maxNbAttempts, const U32 matchLengthSearch)
+{
+    switch(matchLengthSearch)
+    {
+    default :
+    case 4 : return ZSTD_BtFindBestMatch(zc, ip, iLimit, offsetPtr, maxNbAttempts, 4);
+    case 5 : return ZSTD_BtFindBestMatch(zc, ip, iLimit, offsetPtr, maxNbAttempts, 5);
+    case 6 : return ZSTD_BtFindBestMatch(zc, ip, iLimit, offsetPtr, maxNbAttempts, 6);
+    }
+}
+
+
+/** ZSTD_insertBt1_extDict : add one or multiple positions to tree
+*   @ip : assumed <= iend-8
+*   @return : nb of positions added */
+static U32 ZSTD_insertBt1_extDict(ZSTD_CCtx* zc, const BYTE* const ip, const U32 mls, const BYTE* const iend, U32 nbCompares)
+{
+    U32* const hashTable = zc->hashTable;
+    const U32 hashLog = zc->params.hashLog;
+    const size_t h  = ZSTD_hashPtr(ip, hashLog, mls);
+    U32* const bt   = zc->contentTable;
+    const U32 btLog = zc->params.contentLog - 1;
+    const U32 btMask= (1 << btLog) - 1;
+    U32 matchIndex  = hashTable[h];
+    size_t commonLengthSmaller=0, commonLengthLarger=0;
+    const BYTE* const base = zc->base;
+    const BYTE* const dictBase = zc->dictBase;
+    const U32 dictLimit = zc->dictLimit;
+    const BYTE* const dictEnd = dictBase + dictLimit;
+    const BYTE* const prefixStart = base + dictLimit;
+    const BYTE* match = base + matchIndex;
+    U32 current = (U32)(ip-base);
+    const U32 btLow = btMask >= current ? 0 : current - btMask;
+    U32* smallerPtr = bt + 2*(current&btMask);
+    U32* largerPtr  = bt + 2*(current&btMask) + 1;
+    U32 dummy32;   /* to be nullified at the end */
+    const U32 windowLow = zc->lowLimit;
+
+    if ( (current-matchIndex == 1)   /* RLE */
+        && (MEM_read64(match) == MEM_read64(ip)) )
+    {
+        size_t rleLength = ZSTD_count(ip+8, match+8, iend) + 8;
+        return (U32)(rleLength - mls);
+    }
+
+    hashTable[h] = (U32)(ip - base);   /* Update Hash Table */
+
+    while (nbCompares-- && (matchIndex > windowLow))
+    {
+        U32* nextPtr = bt + 2*(matchIndex & btMask);
+        size_t matchLength = MIN(commonLengthSmaller, commonLengthLarger);   /* guaranteed minimum nb of common bytes */
+
+        if (matchIndex+matchLength >= dictLimit)
+        {
+            match = base + matchIndex;
+            if (match[matchLength] == ip[matchLength])
+                matchLength += ZSTD_count(ip+matchLength+1, match+matchLength+1, iend) +1;
+        }
+        else
+        {
+            match = dictBase + matchIndex;
+            matchLength += ZSTD_count_2segments(ip+matchLength, match+matchLength, iend, dictEnd, prefixStart);
+            if (matchIndex+matchLength >= dictLimit)
+				match = base + matchIndex;   /* to prepare for next usage of match[matchLength] */
+        }
+
+        if (ip+matchLength == iend)   /* equal : no way to know if inf or sup */
+            break;   /* drop , to guarantee consistency ; miss a bit of compression, but other solutions can corrupt the tree */
+
+        if (match[matchLength] < ip[matchLength])   /* necessarily within correct buffer */
+        {
+            /* match is smaller than current */
+            *smallerPtr = matchIndex;             /* update smaller idx */
+            commonLengthSmaller = matchLength;    /* all smaller will now have at least this guaranteed common length */
+            if (matchIndex <= btLow) { smallerPtr=&dummy32; break; }   /* beyond tree size, stop the search */
+            smallerPtr = nextPtr+1;               /* new "smaller" => larger of match */
+            matchIndex = nextPtr[1];              /* new matchIndex larger than previous (closer to current) */
+        }
+        else
+        {
+            /* match is larger than current */
+            *largerPtr = matchIndex;
+            commonLengthLarger = matchLength;
+            if (matchIndex <= btLow) { largerPtr=&dummy32; break; }   /* beyond tree size, stop the search */
+            largerPtr = nextPtr;
+            matchIndex = nextPtr[0];
+        }
+    }
+
+    *smallerPtr = *largerPtr = 0;
+    return 1;
+}
+
+
+static const BYTE* ZSTD_updateTree_extDict(ZSTD_CCtx* zc, const BYTE* const ip, const BYTE* const iend, const U32 nbCompares, const U32 mls)
+{
+    const BYTE* const base = zc->base;
+    const U32 target = (U32)(ip - base);
+    U32 idx = zc->nextToUpdate;
+
+    for( ; idx < target ; )
+        idx += ZSTD_insertBt1_extDict(zc, base+idx, mls, iend, nbCompares);
+
+    zc->nextToUpdate = idx;
+    return base + idx;
+}
+
+
+FORCE_INLINE /* inlining is important to hardwire a hot branch (template emulation) */
+size_t ZSTD_insertBtAndFindBestMatch_extDict (
+                        ZSTD_CCtx* zc,
+                        const BYTE* const ip, const BYTE* const iend,
+                        size_t* offsetPtr,
+                        U32 nbCompares, const U32 mls)
+{
+    U32* const hashTable = zc->hashTable;
+    const U32 hashLog = zc->params.hashLog;
+    const size_t h  = ZSTD_hashPtr(ip, hashLog, mls);
+    U32* const bt   = zc->contentTable;
+    const U32 btLog = zc->params.contentLog - 1;
+    const U32 btMask= (1 << btLog) - 1;
+    U32 matchIndex  = hashTable[h];
+    size_t commonLengthSmaller=0, commonLengthLarger=0;
+    const BYTE* const base = zc->base;
+    const BYTE* const dictBase = zc->dictBase;
+    const U32 dictLimit = zc->dictLimit;
+    const BYTE* const dictEnd = dictBase + dictLimit;
+    const BYTE* const prefixStart = base + dictLimit;
+    const U32 current = (U32)(ip-base);
+    const U32 btLow = btMask >= current ? 0 : current - btMask;
+    const U32 windowLow = zc->lowLimit;
+    U32* smallerPtr = bt + 2*(current&btMask);
+    U32* largerPtr  = bt + 2*(current&btMask) + 1;
+    size_t bestLength = 0;
+    U32 dummy32;   /* to be nullified at the end */
+
+    hashTable[h] = (U32)(ip-base);   /* Update Hash Table */
+
+    while (nbCompares-- && (matchIndex > windowLow))
+    {
+        U32* nextPtr = bt + 2*(matchIndex & btMask);
+        size_t matchLength = MIN(commonLengthSmaller, commonLengthLarger);   /* guaranteed minimum nb of common bytes */
+        const BYTE* match;
+
+        if (matchIndex+matchLength >= dictLimit)
+        {
+            match = base + matchIndex;
+            if (match[matchLength] == ip[matchLength])
+                matchLength += ZSTD_count(ip+matchLength+1, match+matchLength+1, iend) +1;
+        }
+        else
+        {
+            match = dictBase + matchIndex;
+            matchLength += ZSTD_count_2segments(ip+matchLength, match+matchLength, iend, dictEnd, prefixStart);
+            if (matchIndex+matchLength >= dictLimit)
+				match = base + matchIndex;   /* to prepare for next usage of match[matchLength] */
+        }
+
+        if (matchLength > bestLength)
+        {
+            if ( (4*(int)(matchLength-bestLength)) > (int)(ZSTD_highbit(current-matchIndex+1) - ZSTD_highbit((U32)offsetPtr[0]+1)) )
+                bestLength = matchLength, *offsetPtr = current - matchIndex;
+            if (ip+matchLength == iend)   /* equal : no way to know if inf or sup */
+                break;   /* drop, to guarantee consistency (miss a little bit of compression) */
+        }
+
+        if (match[matchLength] < ip[matchLength])
+        {
+            /* match is smaller than current */
+            *smallerPtr = matchIndex;             /* update smaller idx */
+            commonLengthSmaller = matchLength;    /* all smaller will now have at least this guaranteed common length */
+            if (matchIndex <= btLow) { smallerPtr=&dummy32; break; }   /* beyond tree size, stop the search */
+            smallerPtr = nextPtr+1;               /* new "smaller" => larger of match */
+            matchIndex = nextPtr[1];              /* new matchIndex larger than previous (closer to current) */
+        }
+        else
+        {
+            /* match is larger than current */
+            *largerPtr = matchIndex;
+            commonLengthLarger = matchLength;
+            if (matchIndex <= btLow) { largerPtr=&dummy32; break; }   /* beyond tree size, stop the search */
+            largerPtr = nextPtr;
+            matchIndex = nextPtr[0];
+        }
+    }
+
+    *smallerPtr = *largerPtr = 0;
+
+    zc->nextToUpdate = current+1;   /* current has been inserted */
+    return bestLength;
+}
+
+/** Tree updater, providing best match */
+FORCE_INLINE /* inlining is important to hardwire a hot branch (template emulation) */
+size_t ZSTD_BtFindBestMatch_extDict (
+                        ZSTD_CCtx* zc,
+                        const BYTE* const ip, const BYTE* const iLimit,
+                        size_t* offsetPtr,
+                        const U32 maxNbAttempts, const U32 mls)
+{
+    const BYTE* nextToUpdate = ZSTD_updateTree_extDict(zc, ip, iLimit, maxNbAttempts, mls);
+    if (nextToUpdate > ip) /* RLE data */
+        { *offsetPtr = 1; return ZSTD_count(ip, ip-1, iLimit); }
+    return ZSTD_insertBtAndFindBestMatch_extDict(zc, ip, iLimit, offsetPtr, maxNbAttempts, mls);
+}
+
+
+FORCE_INLINE size_t ZSTD_BtFindBestMatch_selectMLS_extDict (
+                        ZSTD_CCtx* zc,   /* Index table will be updated */
+                        const BYTE* ip, const BYTE* const iLimit,
+                        size_t* offsetPtr,
+                        const U32 maxNbAttempts, const U32 matchLengthSearch)
+{
+    switch(matchLengthSearch)
+    {
+    default :
+    case 4 : return ZSTD_BtFindBestMatch_extDict(zc, ip, iLimit, offsetPtr, maxNbAttempts, 4);
+    case 5 : return ZSTD_BtFindBestMatch_extDict(zc, ip, iLimit, offsetPtr, maxNbAttempts, 5);
+    case 6 : return ZSTD_BtFindBestMatch_extDict(zc, ip, iLimit, offsetPtr, maxNbAttempts, 6);
+    }
+}
+
+
+/* ***********************
+*  Hash Chain
+*************************/
+
+#define NEXT_IN_CHAIN(d, mask)   chainTable[(d) & mask]
+
+/* Update chains up to ip (excluded)
+   Assumption : always within prefix (ie. not within extDict) */
+static U32 ZSTD_insertAndFindFirstIndex (ZSTD_CCtx* zc, const BYTE* ip, U32 mls)
+{
+    U32* const hashTable  = zc->hashTable;
+    const U32 hashLog = zc->params.hashLog;
+    U32* const chainTable = zc->contentTable;
+    const U32 chainMask = (1 << zc->params.contentLog) - 1;
+    const BYTE* const base = zc->base;
+    const U32 target = (U32)(ip - base);
+    U32 idx = zc->nextToUpdate;
+
+    while(idx < target)
+    {
+        size_t h = ZSTD_hashPtr(base+idx, hashLog, mls);
+        NEXT_IN_CHAIN(idx, chainMask) = hashTable[h];
+        hashTable[h] = idx;
+        idx++;
+    }
+
+    zc->nextToUpdate = target;
+    return hashTable[ZSTD_hashPtr(ip, hashLog, mls)];
+}
+
+
+FORCE_INLINE /* inlining is important to hardwire a hot branch (template emulation) */
+size_t ZSTD_HcFindBestMatch_generic (
+                        ZSTD_CCtx* zc,   /* Index table will be updated */
+                        const BYTE* const ip, const BYTE* const iLimit,
+                        size_t* offsetPtr,
+                        const U32 maxNbAttempts, const U32 mls, const U32 extDict)
+{
+    U32* const chainTable = zc->contentTable;
+    const U32 chainSize = (1 << zc->params.contentLog);
+    const U32 chainMask = chainSize-1;
+    const BYTE* const base = zc->base;
+    const BYTE* const dictBase = zc->dictBase;
+    const U32 dictLimit = zc->dictLimit;
+    const BYTE* const prefixStart = base + dictLimit;
+    const BYTE* const dictEnd = dictBase + dictLimit;
+    const U32 lowLimit = zc->lowLimit;
+    const U32 current = (U32)(ip-base);
+    const U32 minChain = current > chainSize ? current - chainSize : 0;
+    U32 matchIndex;
+    const BYTE* match;
+    int nbAttempts=maxNbAttempts;
+    size_t ml=MINMATCH-1;
+
+    /* HC4 match finder */
+    matchIndex = ZSTD_insertAndFindFirstIndex (zc, ip, mls);
+
+    while ((matchIndex>lowLimit) && (nbAttempts))
+    {
+        size_t currentMl=0;
+        nbAttempts--;
+        if ((!extDict) || matchIndex >= dictLimit)
+        {
+            match = base + matchIndex;
+            if (match[ml] == ip[ml])   /* potentially better */
+                currentMl = ZSTD_count(ip, match, iLimit);
+        }
+        else
+        {
+            match = dictBase + matchIndex;
+            if (MEM_read32(match) == MEM_read32(ip))   /* assumption : matchIndex <= dictLimit-4 (by table construction) */
+                currentMl = ZSTD_count_2segments(ip+MINMATCH, match+MINMATCH, iLimit, dictEnd, prefixStart) + MINMATCH;
+        }
+
+        /* save best solution */
+        if (currentMl > ml) { ml = currentMl; *offsetPtr = current - matchIndex; if (ip+currentMl == iLimit) break; /* best possible, and avoid read overflow*/ }
+
+        if (matchIndex <= minChain) break;
+        matchIndex = NEXT_IN_CHAIN(matchIndex, chainMask);
+    }
+
+    return ml;
+}
+
+
+FORCE_INLINE size_t ZSTD_HcFindBestMatch_selectMLS (
+                        ZSTD_CCtx* zc,
+                        const BYTE* ip, const BYTE* const iLimit,
+                        size_t* offsetPtr,
+                        const U32 maxNbAttempts, const U32 matchLengthSearch)
+{
+    switch(matchLengthSearch)
+    {
+    default :
+    case 4 : return ZSTD_HcFindBestMatch_generic(zc, ip, iLimit, offsetPtr, maxNbAttempts, 4, 0);
+    case 5 : return ZSTD_HcFindBestMatch_generic(zc, ip, iLimit, offsetPtr, maxNbAttempts, 5, 0);
+    case 6 : return ZSTD_HcFindBestMatch_generic(zc, ip, iLimit, offsetPtr, maxNbAttempts, 6, 0);
+    }
+}
+
+
+FORCE_INLINE size_t ZSTD_HcFindBestMatch_extDict_selectMLS (
+                        ZSTD_CCtx* zc,
+                        const BYTE* ip, const BYTE* const iLimit,
+                        size_t* offsetPtr,
+                        const U32 maxNbAttempts, const U32 matchLengthSearch)
+{
+    switch(matchLengthSearch)
+    {
+    default :
+    case 4 : return ZSTD_HcFindBestMatch_generic(zc, ip, iLimit, offsetPtr, maxNbAttempts, 4, 1);
+    case 5 : return ZSTD_HcFindBestMatch_generic(zc, ip, iLimit, offsetPtr, maxNbAttempts, 5, 1);
+    case 6 : return ZSTD_HcFindBestMatch_generic(zc, ip, iLimit, offsetPtr, maxNbAttempts, 6, 1);
+    }
+}
+
+
+/* *******************************
+*  Common parser - lazy strategy
+*********************************/
+FORCE_INLINE
+size_t ZSTD_compressBlock_lazy_generic(ZSTD_CCtx* ctx,
+                                     void* dst, size_t maxDstSize, const void* src, size_t srcSize,
+                                     const U32 searchMethod, const U32 depth)
+{
+    seqStore_t* seqStorePtr = &(ctx->seqStore);
+    const BYTE* const istart = (const BYTE*)src;
+    const BYTE* ip = istart;
+    const BYTE* anchor = istart;
+    const BYTE* const iend = istart + srcSize;
+    const BYTE* const ilimit = iend - 8;
+
+    size_t offset_2=REPCODE_STARTVALUE, offset_1=REPCODE_STARTVALUE;
+    const U32 maxSearches = 1 << ctx->params.searchLog;
+    const U32 mls = ctx->params.searchLength;
+
+    typedef size_t (*searchMax_f)(ZSTD_CCtx* zc, const BYTE* ip, const BYTE* iLimit,
+                        size_t* offsetPtr,
+                        U32 maxNbAttempts, U32 matchLengthSearch);
+    searchMax_f searchMax = searchMethod ? ZSTD_BtFindBestMatch_selectMLS : ZSTD_HcFindBestMatch_selectMLS;
+
+    /* init */
+    ZSTD_resetSeqStore(seqStorePtr);
+    if (((ip-ctx->base) - ctx->dictLimit) < REPCODE_STARTVALUE) ip += REPCODE_STARTVALUE;
+
+    /* Match Loop */
+    while (ip < ilimit)
+    {
+        size_t matchLength=0;
+        size_t offset=0;
+        const BYTE* start=ip+1;
+
+        /* check repCode */
+        if (MEM_read32(ip+1) == MEM_read32(ip+1 - offset_1))
+        {
+            /* repcode : we take it */
+            matchLength = ZSTD_count(ip+1+MINMATCH, ip+1+MINMATCH-offset_1, iend) + MINMATCH;
+            if (depth==0) goto _storeSequence;
+        }
+
+        {
+            /* first search (depth 0) */
+            size_t offsetFound = 99999999;
+            size_t ml2 = searchMax(ctx, ip, iend, &offsetFound, maxSearches, mls);
+            if (ml2 > matchLength)
+                matchLength = ml2, start = ip, offset=offsetFound;
+        }
+
+         if (matchLength < MINMATCH)
+        {
+            ip += ((ip-anchor) >> g_searchStrength) + 1;   /* jump faster over incompressible sections */
+            continue;
+        }
+
+        /* let's try to find a better solution */
+        if (depth>=1)
+        while (ip<ilimit)
+        {
+            ip ++;
+            if ((offset) && (MEM_read32(ip) == MEM_read32(ip - offset_1)))
+            {
+                size_t mlRep = ZSTD_count(ip+MINMATCH, ip+MINMATCH-offset_1, iend) + MINMATCH;
+                int gain2 = (int)(mlRep * 3);
+                int gain1 = (int)(matchLength*3 - ZSTD_highbit((U32)offset+1) + 1);
+                if ((mlRep >= MINMATCH) && (gain2 > gain1))
+                    matchLength = mlRep, offset = 0, start = ip;
+            }
+            {
+                size_t offset2=999999;
+                size_t ml2 = searchMax(ctx, ip, iend, &offset2, maxSearches, mls);
+                int gain2 = (int)(ml2*4 - ZSTD_highbit((U32)offset2+1));   /* raw approx */
+                int gain1 = (int)(matchLength*4 - ZSTD_highbit((U32)offset+1) + 4);
+                if ((ml2 >= MINMATCH) && (gain2 > gain1))
+                {
+                    matchLength = ml2, offset = offset2, start = ip;
+                    continue;   /* search a better one */
+                }
+            }
+
+            /* let's find an even better one */
+            if ((depth==2) && (ip<ilimit))
+            {
+                ip ++;
+                if ((offset) && (MEM_read32(ip) == MEM_read32(ip - offset_1)))
+                {
+                    size_t ml2 = ZSTD_count(ip+MINMATCH, ip+MINMATCH-offset_1, iend) + MINMATCH;
+                    int gain2 = (int)(ml2 * 4);
+                    int gain1 = (int)(matchLength*4 - ZSTD_highbit((U32)offset+1) + 1);
+                    if ((ml2 >= MINMATCH) && (gain2 > gain1))
+                        matchLength = ml2, offset = 0, start = ip;
+                }
+                {
+                    size_t offset2=999999;
+                    size_t ml2 = searchMax(ctx, ip, iend, &offset2, maxSearches, mls);
+                    int gain2 = (int)(ml2*4 - ZSTD_highbit((U32)offset2+1));   /* raw approx */
+                    int gain1 = (int)(matchLength*4 - ZSTD_highbit((U32)offset+1) + 7);
+                    if ((ml2 >= MINMATCH) && (gain2 > gain1))
+                    {
+                        matchLength = ml2, offset = offset2, start = ip;
+                        continue;
+                    }
+                }
+            }
+            break;  /* nothing found : store previous solution */
+        }
+
+        /* catch up */
+        if (offset)
+        {
+            while ((start>anchor) && (start>ctx->base+offset) && (start[-1] == start[-1-offset]))   /* only search for offset within prefix */
+                { start--; matchLength++; }
+            offset_2 = offset_1; offset_1 = offset;
+        }
+
+        /* store sequence */
+_storeSequence:
+        {
+            size_t litLength = start - anchor;
+            ZSTD_storeSeq(seqStorePtr, litLength, anchor, offset, matchLength-MINMATCH);
+            anchor = ip = start + matchLength;
+        }
+
+        /* check immediate repcode */
+        while ( (ip <= ilimit)
+             && (MEM_read32(ip) == MEM_read32(ip - offset_2)) )
+        {
+            /* store sequence */
+            matchLength = ZSTD_count(ip+MINMATCH, ip+MINMATCH-offset_2, iend);
+            offset = offset_2;
+            offset_2 = offset_1;
+            offset_1 = offset;
+            ZSTD_storeSeq(seqStorePtr, 0, anchor, 0, matchLength);
+            ip += matchLength+MINMATCH;
+            anchor = ip;
+            continue;   /* faster when present ... (?) */
+        }
+    }
+
+    /* Last Literals */
+    {
+        size_t lastLLSize = iend - anchor;
+        memcpy(seqStorePtr->lit, anchor, lastLLSize);
+        seqStorePtr->lit += lastLLSize;
+    }
+
+    /* Final compression stage */
+    return ZSTD_compressSequences(dst, maxDstSize,
+                                  seqStorePtr, srcSize);
+}
+
+size_t ZSTD_compressBlock_btlazy2(ZSTD_CCtx* ctx, void* dst, size_t maxDstSize, const void* src, size_t srcSize)
+{
+    return ZSTD_compressBlock_lazy_generic(ctx, dst, maxDstSize, src, srcSize, 1, 2);
+}
+
+size_t ZSTD_compressBlock_lazy2(ZSTD_CCtx* ctx, void* dst, size_t maxDstSize, const void* src, size_t srcSize)
+{
+    return ZSTD_compressBlock_lazy_generic(ctx, dst, maxDstSize, src, srcSize, 0, 2);
+}
+
+size_t ZSTD_compressBlock_lazy(ZSTD_CCtx* ctx, void* dst, size_t maxDstSize, const void* src, size_t srcSize)
+{
+    return ZSTD_compressBlock_lazy_generic(ctx, dst, maxDstSize, src, srcSize, 0, 1);
+}
+
+size_t ZSTD_compressBlock_greedy(ZSTD_CCtx* ctx, void* dst, size_t maxDstSize, const void* src, size_t srcSize)
+{
+    return ZSTD_compressBlock_lazy_generic(ctx, dst, maxDstSize, src, srcSize, 0, 0);
+}
+
+
+FORCE_INLINE
+size_t ZSTD_compressBlock_lazy_extDict_generic(ZSTD_CCtx* ctx,
+                                     void* dst, size_t maxDstSize, const void* src, size_t srcSize,
+                                     const U32 searchMethod, const U32 depth)
+{
+    seqStore_t* seqStorePtr = &(ctx->seqStore);
+    const BYTE* const istart = (const BYTE*)src;
+    const BYTE* ip = istart;
+    const BYTE* anchor = istart;
+    const BYTE* const iend = istart + srcSize;
+    const BYTE* const ilimit = iend - 8;
+    const BYTE* const base = ctx->base;
+    const U32 dictLimit = ctx->dictLimit;
+    const BYTE* const prefixStart = base + dictLimit;
+    const BYTE* const dictBase = ctx->dictBase;
+    const BYTE* const dictEnd  = dictBase + dictLimit;
+    const BYTE* const dictStart  = dictBase + ctx->lowLimit;
+
+    size_t offset_2=REPCODE_STARTVALUE, offset_1=REPCODE_STARTVALUE;
+    const U32 maxSearches = 1 << ctx->params.searchLog;
+    const U32 mls = ctx->params.searchLength;
+
+    typedef size_t (*searchMax_f)(ZSTD_CCtx* zc, const BYTE* ip, const BYTE* iLimit,
+                        size_t* offsetPtr,
+                        U32 maxNbAttempts, U32 matchLengthSearch);
+    searchMax_f searchMax = searchMethod ? ZSTD_BtFindBestMatch_selectMLS_extDict : ZSTD_HcFindBestMatch_extDict_selectMLS;
+
+    /* init */
+    ZSTD_resetSeqStore(seqStorePtr);
+    if (((ip-base) - dictLimit) < REPCODE_STARTVALUE) ip += REPCODE_STARTVALUE;
+
+    /* Match Loop */
+    while (ip < ilimit)
+    {
+        size_t matchLength=0;
+        size_t offset=0;
+        const BYTE* start=ip+1;
+        U32 current = (U32)(ip-base);
+
+        /* check repCode */
+        {
+            const U32 repIndex = (U32)(current+1 - offset_1);
+            const BYTE* const repBase = repIndex < dictLimit ? dictBase : base;
+            const BYTE* const repMatch = repBase + repIndex;
+            if ((U32)((dictLimit-1) - repIndex) >= 3)   /* intentional overflow */
+            if (MEM_read32(ip+1) == MEM_read32(repMatch))
+            {
+                /* repcode detected we should take it */
+                const BYTE* const repEnd = repIndex < dictLimit ? dictEnd : iend;
+                matchLength = ZSTD_count_2segments(ip+1+MINMATCH, repMatch+MINMATCH, iend, repEnd, prefixStart) + MINMATCH;
+                if (depth==0) goto _storeSequence;
+            }
+        }
+
+        {
+            /* first search (depth 0) */
+            size_t offsetFound = 99999999;
+            size_t ml2 = searchMax(ctx, ip, iend, &offsetFound, maxSearches, mls);
+            if (ml2 > matchLength)
+                matchLength = ml2, start = ip, offset=offsetFound;
+        }
+
+         if (matchLength < MINMATCH)
+        {
+            ip += ((ip-anchor) >> g_searchStrength) + 1;   /* jump faster over incompressible sections */
+            continue;
+        }
+
+        /* let's try to find a better solution */
+        if (depth>=1)
+        while (ip<ilimit)
+        {
+            ip ++;
+            current++;
+            /* check repCode */
+            if (offset)
+            {
+                const U32 repIndex = (U32)(current - offset_1);
+                const BYTE* const repBase = repIndex < dictLimit ? dictBase : base;
+                const BYTE* const repMatch = repBase + repIndex;
+                if ((U32)((dictLimit-1) - repIndex) >= 3)   /* intentional overflow */
+                if (MEM_read32(ip) == MEM_read32(repMatch))
+                {
+                    /* repcode detected */
+                    const BYTE* const repEnd = repIndex < dictLimit ? dictEnd : iend;
+                    size_t repLength = ZSTD_count_2segments(ip+MINMATCH, repMatch+MINMATCH, iend, repEnd, prefixStart) + MINMATCH;
+                    int gain2 = (int)(repLength * 3);
+                    int gain1 = (int)(matchLength*3 - ZSTD_highbit((U32)offset+1) + 1);
+                    if ((repLength >= MINMATCH) && (gain2 > gain1))
+                        matchLength = repLength, offset = 0, start = ip;
+                }
+            }
+
+            /* search match, depth 1 */
+            {
+                size_t offset2=999999;
+                size_t ml2 = searchMax(ctx, ip, iend, &offset2, maxSearches, mls);
+                int gain2 = (int)(ml2*4 - ZSTD_highbit((U32)offset2+1));   /* raw approx */
+                int gain1 = (int)(matchLength*4 - ZSTD_highbit((U32)offset+1) + 4);
+                if ((ml2 >= MINMATCH) && (gain2 > gain1))
+                {
+                    matchLength = ml2, offset = offset2, start = ip;
+                    continue;   /* search a better one */
+                }
+            }
+
+            /* let's find an even better one */
+            if ((depth==2) && (ip<ilimit))
+            {
+                ip ++;
+                current++;
+                /* check repCode */
+                if (offset)
+                {
+                    const U32 repIndex = (U32)(current - offset_1);
+                    const BYTE* const repBase = repIndex < dictLimit ? dictBase : base;
+                    const BYTE* const repMatch = repBase + repIndex;
+                    if ((U32)((dictLimit-1) - repIndex) >= 3)   /* intentional overflow */
+                    if (MEM_read32(ip) == MEM_read32(repMatch))
+                    {
+                        /* repcode detected */
+                        const BYTE* const repEnd = repIndex < dictLimit ? dictEnd : iend;
+                        size_t repLength = ZSTD_count_2segments(ip+MINMATCH, repMatch+MINMATCH, iend, repEnd, prefixStart) + MINMATCH;
+                        int gain2 = (int)(repLength * 4);
+                        int gain1 = (int)(matchLength*4 - ZSTD_highbit((U32)offset+1) + 1);
+                        if ((repLength >= MINMATCH) && (gain2 > gain1))
+                            matchLength = repLength, offset = 0, start = ip;
+                    }
+                }
+
+                /* search match, depth 2 */
+                {
+                    size_t offset2=999999;
+                    size_t ml2 = searchMax(ctx, ip, iend, &offset2, maxSearches, mls);
+                    int gain2 = (int)(ml2*4 - ZSTD_highbit((U32)offset2+1));   /* raw approx */
+                    int gain1 = (int)(matchLength*4 - ZSTD_highbit((U32)offset+1) + 7);
+                    if ((ml2 >= MINMATCH) && (gain2 > gain1))
+                    {
+                        matchLength = ml2, offset = offset2, start = ip;
+                        continue;
+                    }
+                }
+            }
+            break;  /* nothing found : store previous solution */
+        }
+
+        /* catch up */
+        if (offset)
+        {
+            U32 matchIndex = (U32)((start-base) - offset);
+            const BYTE* match = (matchIndex < dictLimit) ? dictBase + matchIndex : base + matchIndex;
+            const BYTE* const mStart = (matchIndex < dictLimit) ? dictStart : prefixStart;
+            while ((start>anchor) && (match>mStart) && (start[-1] == match[-1])) { start--; match--; matchLength++; }  /* catch up */
+            offset_2 = offset_1; offset_1 = offset;
+        }
+
+        /* store sequence */
+_storeSequence:
+        {
+            size_t litLength = start - anchor;
+            ZSTD_storeSeq(seqStorePtr, litLength, anchor, offset, matchLength-MINMATCH);
+            anchor = ip = start + matchLength;
+        }
+
+        /* check immediate repcode */
+        while (ip <= ilimit)
+        {
+            const U32 repIndex = (U32)((ip-base) - offset_2);
+            const BYTE* const repBase = repIndex < dictLimit ? dictBase : base;
+            const BYTE* const repMatch = repBase + repIndex;
+            if ((U32)((dictLimit-1) - repIndex) >= 3)   /* intentional overflow */
+            if (MEM_read32(ip) == MEM_read32(repMatch))
+            {
+                /* repcode detected we should take it */
+                const BYTE* const repEnd = repIndex < dictLimit ? dictEnd : iend;
+                matchLength = ZSTD_count_2segments(ip+MINMATCH, repMatch+MINMATCH, iend, repEnd, prefixStart) + MINMATCH;
+                offset = offset_2; offset_2 = offset_1; offset_1 = offset;   /* swap offset history */
+                ZSTD_storeSeq(seqStorePtr, 0, anchor, 0, matchLength-MINMATCH);
+                ip += matchLength;
+                anchor = ip;
+                continue;   /* faster when present ... (?) */
+            }
+            break;
+        }
+    }
+
+    /* Last Literals */
+    {
+        size_t lastLLSize = iend - anchor;
+        memcpy(seqStorePtr->lit, anchor, lastLLSize);
+        seqStorePtr->lit += lastLLSize;
+    }
+
+    /* Final compression stage */
+    return ZSTD_compressSequences(dst, maxDstSize,
+                                  seqStorePtr, srcSize);
+}
+
+size_t ZSTD_compressBlock_greedy_extDict(ZSTD_CCtx* ctx, void* dst, size_t maxDstSize, const void* src, size_t srcSize)
+{
+    return ZSTD_compressBlock_lazy_extDict_generic(ctx, dst, maxDstSize, src, srcSize, 0, 0);
+}
+
+size_t ZSTD_compressBlock_lazy_extDict(ZSTD_CCtx* ctx, void* dst, size_t maxDstSize, const void* src, size_t srcSize)
+{
+    return ZSTD_compressBlock_lazy_extDict_generic(ctx, dst, maxDstSize, src, srcSize, 0, 1);
+}
+
+size_t ZSTD_compressBlock_lazy2_extDict(ZSTD_CCtx* ctx, void* dst, size_t maxDstSize, const void* src, size_t srcSize)
+{
+    return ZSTD_compressBlock_lazy_extDict_generic(ctx, dst, maxDstSize, src, srcSize, 0, 2);
+}
+
+static size_t ZSTD_compressBlock_btlazy2_extDict(ZSTD_CCtx* ctx, void* dst, size_t maxDstSize, const void* src, size_t srcSize)
+{
+    return ZSTD_compressBlock_lazy_extDict_generic(ctx, dst, maxDstSize, src, srcSize, 1, 2);
+}
+
+
+typedef size_t (*ZSTD_blockCompressor) (ZSTD_CCtx* ctx, void* dst, size_t maxDstSize, const void* src, size_t srcSize);
+
+static ZSTD_blockCompressor ZSTD_selectBlockCompressor(ZSTD_strategy strat, int extDict)
+{
+    static const ZSTD_blockCompressor blockCompressor[2][5] = {
+        { ZSTD_compressBlock_fast, ZSTD_compressBlock_greedy, ZSTD_compressBlock_lazy,ZSTD_compressBlock_lazy2, ZSTD_compressBlock_btlazy2 },
+        { ZSTD_compressBlock_fast_extDict, ZSTD_compressBlock_greedy_extDict, ZSTD_compressBlock_lazy_extDict,ZSTD_compressBlock_lazy2_extDict, ZSTD_compressBlock_btlazy2_extDict }
+    };
+
+    return blockCompressor[extDict][(U32)strat];
+}
+
+
+size_t ZSTD_compressBlock(ZSTD_CCtx* zc, void* dst, size_t maxDstSize, const void* src, size_t srcSize)
+{
+    ZSTD_blockCompressor blockCompressor = ZSTD_selectBlockCompressor(zc->params.strategy, zc->lowLimit < zc->dictLimit);
+    if (srcSize < MIN_CBLOCK_SIZE+3) return 0;   /* don't even attempt compression below a certain srcSize */
+    return blockCompressor(zc, dst, maxDstSize, src, srcSize);
+}
+
+
+static size_t ZSTD_compress_generic (ZSTD_CCtx* ctxPtr,
+                                        void* dst, size_t maxDstSize,
+                                  const void* src, size_t srcSize)
+{
+    size_t blockSize = BLOCKSIZE;
+    size_t remaining = srcSize;
+    const BYTE* ip = (const BYTE*)src;
+    BYTE* const ostart = (BYTE*)dst;
+    BYTE* op = ostart;
+    const U32 maxDist = 1 << ctxPtr->params.windowLog;
+
+    while (remaining)
+    {
+        size_t cSize;
+
+        if (maxDstSize < 3 + MIN_CBLOCK_SIZE) return ERROR(dstSize_tooSmall);   /* not enough space to store compressed block */
+        if (remaining < blockSize) blockSize = remaining;
+
+        if ((U32)(ip+blockSize - (ctxPtr->base + ctxPtr->lowLimit)) > maxDist)
+        {
+            /* respect windowLog contract */
+            ctxPtr->lowLimit = (U32)(ip+blockSize - ctxPtr->base) - maxDist;
+            if (ctxPtr->dictLimit < ctxPtr->lowLimit) ctxPtr->dictLimit = ctxPtr->lowLimit;
+        }
+
+        cSize = ZSTD_compressBlock(ctxPtr, op+3, maxDstSize-3, ip, blockSize);
+        if (ZSTD_isError(cSize)) return cSize;
+
+        if (cSize == 0)
+        {
+            cSize = ZSTD_noCompressBlock(op, maxDstSize, ip, blockSize);   /* block is not compressible */
+        }
+        else
+        {
+            op[0] = (BYTE)(cSize>>16);
+            op[1] = (BYTE)(cSize>>8);
+            op[2] = (BYTE)cSize;
+            op[0] += (BYTE)(bt_compressed << 6); /* is a compressed block */
+            cSize += 3;
+        }
+
+        remaining -= blockSize;
+        maxDstSize -= cSize;
+        ip += blockSize;
+        op += cSize;
+    }
+
+    return op-ostart;
+}
+
+
+size_t ZSTD_compressContinue (ZSTD_CCtx* zc,
+                                 void* dst, size_t dstSize,
+                           const void* src, size_t srcSize)
+{
+    const BYTE* const ip = (const BYTE*) src;
+
+    /* preemptive overflow correction */
+    if ((zc->base > (const BYTE*)src) || (zc->lowLimit > (1<<30) ))
+    {
+        U32 correction = zc->lowLimit-1;
+        ZSTD_reduceIndex(zc, correction);
+        zc->base += correction;
+        zc->dictBase += correction;
+        zc->lowLimit -= correction;
+        zc->dictLimit -= correction;
+        if (zc->nextToUpdate < correction) zc->nextToUpdate = 0;
+        else zc->nextToUpdate -= correction;
+    }
+
+    /* Check if blocks follow each other */
+    if (src != zc->nextSrc)
+    {
+        /* not contiguous */
+        zc->lowLimit = zc->dictLimit;
+        zc->dictLimit = (U32)(zc->nextSrc - zc->base);
+        zc->dictBase = zc->base;
+        zc->base += ip - zc->nextSrc;
+        zc->nextToUpdate = zc->dictLimit;
+    }
+
+    /* input-dictionary overlap */
+    if ((ip+srcSize > zc->dictBase + zc->lowLimit) && (ip < zc->dictBase + zc->dictLimit))
+    {
+        zc->lowLimit = (U32)(ip + srcSize - zc->dictBase);
+        if (zc->lowLimit > zc->dictLimit) zc->lowLimit = zc->dictLimit;
+    }
+
+    zc->nextSrc = ip + srcSize;
+
+    return ZSTD_compress_generic (zc, dst, dstSize, src, srcSize);
+}
+
+
+/** ZSTD_compressBegin_advanced
+*   Write frame header, according to params
+*   @return : nb of bytes written */
+size_t ZSTD_compressBegin_advanced(ZSTD_CCtx* ctx,
+                                   void* dst, size_t maxDstSize,
+                                   ZSTD_parameters params)
+{
+    size_t errorCode;
+
+    ZSTD_validateParams(&params);
+
+    if (maxDstSize < ZSTD_frameHeaderSize_max) return ERROR(dstSize_tooSmall);
+    errorCode = ZSTD_resetCCtx_advanced(ctx, params);
+    if (ZSTD_isError(errorCode)) return errorCode;
+
+    MEM_writeLE32(dst, ZSTD_MAGICNUMBER); /* Write Header */
+    ((BYTE*)dst)[4] = (BYTE)(params.windowLog - ZSTD_WINDOWLOG_ABSOLUTEMIN);
+    return ZSTD_frameHeaderSize_min;
+}
+
+
+/** ZSTD_getParams
+*   return ZSTD_parameters structure for a selected compression level and srcSize.
+*   srcSizeHint value is optional, select 0 if not known */
+ZSTD_parameters ZSTD_getParams(int compressionLevel, U64 srcSizeHint)
+{
+    ZSTD_parameters result;
+    int tableID = ((srcSizeHint-1) <= 128 KB);   /* intentional underflow for srcSizeHint == 0 */
+    if (compressionLevel<=0) compressionLevel = 1;
+    if (compressionLevel > ZSTD_MAX_CLEVEL) compressionLevel = ZSTD_MAX_CLEVEL;
+    result = ZSTD_defaultParameters[tableID][compressionLevel];
+    result.srcSize = srcSizeHint;
+    return result;
+}
+
+
+size_t ZSTD_compressBegin(ZSTD_CCtx* ctx, void* dst, size_t maxDstSize, int compressionLevel)
+{
+    return ZSTD_compressBegin_advanced(ctx, dst, maxDstSize, ZSTD_getParams(compressionLevel, 0));
+}
+
+
+/** ZSTD_compressEnd
+*   Write frame epilogue
+*   @return : nb of bytes written into dst (or an error code) */
+size_t ZSTD_compressEnd(ZSTD_CCtx* ctx, void* dst, size_t maxDstSize)
+{
+    BYTE* op = (BYTE*)dst;
+
+    /* Sanity check */
+    (void)ctx;
+    if (maxDstSize < 3) return ERROR(dstSize_tooSmall);
+
+    /* End of frame */
+    op[0] = (BYTE)(bt_end << 6);
+    op[1] = 0;
+    op[2] = 0;
+
+    return 3;
+}
+
+size_t ZSTD_compress_advanced (ZSTD_CCtx* ctx,
+                               void* dst, size_t maxDstSize,
+                         const void* src, size_t srcSize,
+                               ZSTD_parameters params)
+{
+    BYTE* const ostart = (BYTE*)dst;
+    BYTE* op = ostart;
+    size_t oSize;
+
+    /* Header */
+    oSize = ZSTD_compressBegin_advanced(ctx, dst, maxDstSize, params);
+    if(ZSTD_isError(oSize)) return oSize;
+    op += oSize;
+    maxDstSize -= oSize;
+
+    /* body (compression) */
+    ctx->base = (const BYTE*)src;
+    oSize = ZSTD_compress_generic (ctx, op,  maxDstSize, src, srcSize);
+    if(ZSTD_isError(oSize)) return oSize;
+    op += oSize;
+    maxDstSize -= oSize;
+
+    /* Close frame */
+    oSize = ZSTD_compressEnd(ctx, op, maxDstSize);
+    if(ZSTD_isError(oSize)) return oSize;
+    op += oSize;
+
+    return (op - ostart);
+}
+
+size_t ZSTD_compressCCtx (ZSTD_CCtx* ctx, void* dst, size_t maxDstSize, const void* src, size_t srcSize, int compressionLevel)
+{
+    return ZSTD_compress_advanced(ctx, dst, maxDstSize, src, srcSize, ZSTD_getParams(compressionLevel, srcSize));
+}
+
+size_t ZSTD_compress(void* dst, size_t maxDstSize, const void* src, size_t srcSize, int compressionLevel)
+{
+    size_t result;
+    ZSTD_CCtx ctxBody;
+    memset(&ctxBody, 0, sizeof(ctxBody));
+    result = ZSTD_compressCCtx(&ctxBody, dst, maxDstSize, src, srcSize, compressionLevel);
+    free(ctxBody.workSpace);   /* can't free ctxBody, since it's on stack; free heap content */
+    return result;
+}
diff --git a/lib/zstd_decompress.c b/lib/zstd_decompress.c
new file mode 100644
index 0000000..e8190bc
--- /dev/null
+++ b/lib/zstd_decompress.c
@@ -0,0 +1,877 @@
+/*
+    zstd - standard compression library
+    Copyright (C) 2014-2015, Yann Collet.
+
+    BSD 2-Clause License (http://www.opensource.org/licenses/bsd-license.php)
+
+    Redistribution and use in source and binary forms, with or without
+    modification, are permitted provided that the following conditions are
+    met:
+    * Redistributions of source code must retain the above copyright
+    notice, this list of conditions and the following disclaimer.
+    * Redistributions in binary form must reproduce the above
+    copyright notice, this list of conditions and the following disclaimer
+    in the documentation and/or other materials provided with the
+    distribution.
+    THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+    "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+    LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+    A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+    OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+    SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+    LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+    DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+    THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+    (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+    OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+    You can contact the author at :
+    - zstd source repository : https://github.com/Cyan4973/zstd
+    - ztsd public forum : https://groups.google.com/forum/#!forum/lz4c
+*/
+
+/* ***************************************************************
+*  Tuning parameters
+*****************************************************************/
+/*!
+ * HEAPMODE :
+ * Select how default compression functions will allocate memory for their hash table,
+ * in memory stack (0, fastest), or in memory heap (1, requires malloc())
+ * Note that compression context is fairly large, as a consequence heap memory is recommended.
+ */
+#ifndef ZSTD_HEAPMODE
+#  define ZSTD_HEAPMODE 1
+#endif /* ZSTD_HEAPMODE */
+
+/*!
+*  LEGACY_SUPPORT :
+*  ZSTD_decompress() can decode older formats (starting from zstd 0.1+)
+*/
+#ifndef ZSTD_LEGACY_SUPPORT
+#  define ZSTD_LEGACY_SUPPORT 1
+#endif
+
+
+/* *******************************************************
+*  Includes
+*********************************************************/
+#include <stdlib.h>      /* calloc */
+#include <string.h>      /* memcpy, memmove */
+#include <stdio.h>       /* debug : printf */
+#include "mem.h"         /* low level memory routines */
+#include "zstd_static.h"
+#include "zstd_internal.h"
+#include "fse_static.h"
+#include "huff0.h"
+
+#if defined(ZSTD_LEGACY_SUPPORT) && (ZSTD_LEGACY_SUPPORT==1)
+#  include "zstd_legacy.h"
+#endif
+
+/* *******************************************************
+*  Compiler specifics
+*********************************************************/
+#ifdef _MSC_VER    /* Visual Studio */
+#  define FORCE_INLINE static __forceinline
+#  include <intrin.h>                    /* For Visual 2005 */
+#  pragma warning(disable : 4127)        /* disable: C4127: conditional expression is constant */
+#  pragma warning(disable : 4324)        /* disable: C4324: padded structure */
+#else
+#  define GCC_VERSION (__GNUC__ * 100 + __GNUC_MINOR__)
+#  ifdef __GNUC__
+#    define FORCE_INLINE static inline __attribute__((always_inline))
+#  else
+#    define FORCE_INLINE static inline
+#  endif
+#endif
+
+
+/* *************************************
+*  Local types
+***************************************/
+typedef struct
+{
+    blockType_t blockType;
+    U32 origSize;
+} blockProperties_t;
+
+
+/* *******************************************************
+*  Memory operations
+**********************************************************/
+static void ZSTD_copy4(void* dst, const void* src) { memcpy(dst, src, 4); }
+
+
+/* *************************************
+*  Error Management
+***************************************/
+unsigned ZSTD_versionNumber (void) { return ZSTD_VERSION_NUMBER; }
+
+/*! ZSTD_isError
+*   tells if a return value is an error code */
+unsigned ZSTD_isError(size_t code) { return ERR_isError(code); }
+
+/*! ZSTD_getErrorName
+*   provides error code string (useful for debugging) */
+const char* ZSTD_getErrorName(size_t code) { return ERR_getErrorName(code); }
+
+
+/* *************************************************************
+*   Context management
+***************************************************************/
+typedef enum { ZSTDds_getFrameHeaderSize, ZSTDds_decodeFrameHeader,
+               ZSTDds_decodeBlockHeader, ZSTDds_decompressBlock } ZSTD_dStage;
+
+struct ZSTD_DCtx_s
+{
+    U32 LLTable[FSE_DTABLE_SIZE_U32(LLFSELog)];
+    U32 OffTable[FSE_DTABLE_SIZE_U32(OffFSELog)];
+    U32 MLTable[FSE_DTABLE_SIZE_U32(MLFSELog)];
+    void* previousDstEnd;
+    void* base;
+    void* vBase;
+    void* dictEnd;
+    size_t expected;
+    size_t headerSize;
+    ZSTD_parameters params;
+    blockType_t bType;
+    ZSTD_dStage stage;
+    const BYTE* litPtr;
+    size_t litBufSize;
+    size_t litSize;
+    BYTE litBuffer[BLOCKSIZE + 8 /* margin for wildcopy */];
+    BYTE headerBuffer[ZSTD_frameHeaderSize_max];
+};   /* typedef'd to ZSTD_Dctx within "zstd_static.h" */
+
+size_t ZSTD_resetDCtx(ZSTD_DCtx* dctx)
+{
+    dctx->expected = ZSTD_frameHeaderSize_min;
+    dctx->stage = ZSTDds_getFrameHeaderSize;
+    dctx->previousDstEnd = NULL;
+    dctx->base = NULL;
+    dctx->vBase = NULL;
+    dctx->dictEnd = NULL;
+    return 0;
+}
+
+ZSTD_DCtx* ZSTD_createDCtx(void)
+{
+    ZSTD_DCtx* dctx = (ZSTD_DCtx*)malloc(sizeof(ZSTD_DCtx));
+    if (dctx==NULL) return NULL;
+    ZSTD_resetDCtx(dctx);
+    return dctx;
+}
+
+size_t ZSTD_freeDCtx(ZSTD_DCtx* dctx)
+{
+    free(dctx);
+    return 0;
+}
+
+
+/* *************************************************************
+*   Decompression section
+***************************************************************/
+/** ZSTD_decodeFrameHeader_Part1
+*   decode the 1st part of the Frame Header, which tells Frame Header size.
+*   srcSize must be == ZSTD_frameHeaderSize_min
+*   @return : the full size of the Frame Header */
+static size_t ZSTD_decodeFrameHeader_Part1(ZSTD_DCtx* zc, const void* src, size_t srcSize)
+{
+    U32 magicNumber;
+    if (srcSize != ZSTD_frameHeaderSize_min) return ERROR(srcSize_wrong);
+    magicNumber = MEM_readLE32(src);
+    if (magicNumber != ZSTD_MAGICNUMBER) return ERROR(prefix_unknown);
+    zc->headerSize = ZSTD_frameHeaderSize_min;
+    return zc->headerSize;
+}
+
+
+size_t ZSTD_getFrameParams(ZSTD_parameters* params, const void* src, size_t srcSize)
+{
+    U32 magicNumber;
+    if (srcSize < ZSTD_frameHeaderSize_min) return ZSTD_frameHeaderSize_max;
+    magicNumber = MEM_readLE32(src);
+    if (magicNumber != ZSTD_MAGICNUMBER) return ERROR(prefix_unknown);
+    memset(params, 0, sizeof(*params));
+    params->windowLog = (((const BYTE*)src)[4] & 15) + ZSTD_WINDOWLOG_ABSOLUTEMIN;
+    if ((((const BYTE*)src)[4] >> 4) != 0) return ERROR(frameParameter_unsupported);   /* reserved bits */
+    return 0;
+}
+
+/** ZSTD_decodeFrameHeader_Part2
+*   decode the full Frame Header
+*   srcSize must be the size provided by ZSTD_decodeFrameHeader_Part1
+*   @return : 0, or an error code, which can be tested using ZSTD_isError() */
+static size_t ZSTD_decodeFrameHeader_Part2(ZSTD_DCtx* zc, const void* src, size_t srcSize)
+{
+    size_t result;
+    if (srcSize != zc->headerSize) return ERROR(srcSize_wrong);
+    result = ZSTD_getFrameParams(&(zc->params), src, srcSize);
+    if ((MEM_32bits()) && (zc->params.windowLog > 25)) return ERROR(frameParameter_unsupportedBy32bitsImplementation);
+    return result;
+}
+
+
+size_t ZSTD_getcBlockSize(const void* src, size_t srcSize, blockProperties_t* bpPtr)
+{
+    const BYTE* const in = (const BYTE* const)src;
+    BYTE headerFlags;
+    U32 cSize;
+
+    if (srcSize < 3) return ERROR(srcSize_wrong);
+
+    headerFlags = *in;
+    cSize = in[2] + (in[1]<<8) + ((in[0] & 7)<<16);
+
+    bpPtr->blockType = (blockType_t)(headerFlags >> 6);
+    bpPtr->origSize = (bpPtr->blockType == bt_rle) ? cSize : 0;
+
+    if (bpPtr->blockType == bt_end) return 0;
+    if (bpPtr->blockType == bt_rle) return 1;
+    return cSize;
+}
+
+static size_t ZSTD_copyRawBlock(void* dst, size_t maxDstSize, const void* src, size_t srcSize)
+{
+    if (srcSize > maxDstSize) return ERROR(dstSize_tooSmall);
+    memcpy(dst, src, srcSize);
+    return srcSize;
+}
+
+
+/** ZSTD_decompressLiterals
+    @return : nb of bytes read from src, or an error code*/
+static size_t ZSTD_decompressLiterals(void* dst, size_t* maxDstSizePtr,
+                                const void* src, size_t srcSize)
+{
+    const BYTE* ip = (const BYTE*)src;
+
+    const size_t litSize = (MEM_readLE32(src) & 0x1FFFFF) >> 2;   /* no buffer issue : srcSize >= MIN_CBLOCK_SIZE */
+    const size_t litCSize = (MEM_readLE32(ip+2) & 0xFFFFFF) >> 5;   /* no buffer issue : srcSize >= MIN_CBLOCK_SIZE */
+
+    if (litSize > *maxDstSizePtr) return ERROR(corruption_detected);
+    if (litCSize + 5 > srcSize) return ERROR(corruption_detected);
+
+    if (HUF_isError(HUF_decompress(dst, litSize, ip+5, litCSize))) return ERROR(corruption_detected);
+
+    *maxDstSizePtr = litSize;
+    return litCSize + 5;
+}
+
+
+/** ZSTD_decodeLiteralsBlock
+    @return : nb of bytes read from src (< srcSize ) */
+size_t ZSTD_decodeLiteralsBlock(ZSTD_DCtx* dctx,
+                          const void* src, size_t srcSize)   /* note : srcSize < BLOCKSIZE */
+{
+    const BYTE* const istart = (const BYTE*) src;
+
+    /* any compressed block with literals segment must be at least this size */
+    if (srcSize < MIN_CBLOCK_SIZE) return ERROR(corruption_detected);
+
+    switch(*istart & 3)
+    {
+    /* compressed */
+    case 0:
+        {
+            size_t litSize = BLOCKSIZE;
+            const size_t readSize = ZSTD_decompressLiterals(dctx->litBuffer, &litSize, src, srcSize);
+            dctx->litPtr = dctx->litBuffer;
+            dctx->litBufSize = BLOCKSIZE+8;
+            dctx->litSize = litSize;
+            return readSize;   /* works if it's an error too */
+        }
+    case IS_RAW:
+        {
+            const size_t litSize = (MEM_readLE32(istart) & 0xFFFFFF) >> 2;   /* no buffer issue : srcSize >= MIN_CBLOCK_SIZE */
+            if (litSize > srcSize-11)   /* risk of reading too far with wildcopy */
+            {
+                if (litSize > srcSize-3) return ERROR(corruption_detected);
+                memcpy(dctx->litBuffer, istart, litSize);
+                dctx->litPtr = dctx->litBuffer;
+                dctx->litBufSize = BLOCKSIZE+8;
+                dctx->litSize = litSize;
+                return litSize+3;
+            }
+            /* direct reference into compressed stream */
+            dctx->litPtr = istart+3;
+            dctx->litBufSize = srcSize-3;
+            dctx->litSize = litSize;
+            return litSize+3;        }
+    case IS_RLE:
+        {
+            const size_t litSize = (MEM_readLE32(istart) & 0xFFFFFF) >> 2;   /* no buffer issue : srcSize >= MIN_CBLOCK_SIZE */
+            if (litSize > BLOCKSIZE) return ERROR(corruption_detected);
+            memset(dctx->litBuffer, istart[3], litSize);
+            dctx->litPtr = dctx->litBuffer;
+            dctx->litBufSize = BLOCKSIZE+8;
+            dctx->litSize = litSize;
+            return 4;
+        }
+    default:
+        return ERROR(corruption_detected);   /* forbidden nominal case */
+    }
+}
+
+
+size_t ZSTD_decodeSeqHeaders(int* nbSeq, const BYTE** dumpsPtr, size_t* dumpsLengthPtr,
+                         FSE_DTable* DTableLL, FSE_DTable* DTableML, FSE_DTable* DTableOffb,
+                         const void* src, size_t srcSize)
+{
+    const BYTE* const istart = (const BYTE* const)src;
+    const BYTE* ip = istart;
+    const BYTE* const iend = istart + srcSize;
+    U32 LLtype, Offtype, MLtype;
+    U32 LLlog, Offlog, MLlog;
+    size_t dumpsLength;
+
+    /* check */
+    if (srcSize < 5) return ERROR(srcSize_wrong);
+
+    /* SeqHead */
+    *nbSeq = MEM_readLE16(ip); ip+=2;
+    LLtype  = *ip >> 6;
+    Offtype = (*ip >> 4) & 3;
+    MLtype  = (*ip >> 2) & 3;
+    if (*ip & 2)
+    {
+        dumpsLength  = ip[2];
+        dumpsLength += ip[1] << 8;
+        ip += 3;
+    }
+    else
+    {
+        dumpsLength  = ip[1];
+        dumpsLength += (ip[0] & 1) << 8;
+        ip += 2;
+    }
+    *dumpsPtr = ip;
+    ip += dumpsLength;
+    *dumpsLengthPtr = dumpsLength;
+
+    /* check */
+    if (ip > iend-3) return ERROR(srcSize_wrong); /* min : all 3 are "raw", hence no header, but at least xxLog bits per type */
+
+    /* sequences */
+    {
+        S16 norm[MaxML+1];    /* assumption : MaxML >= MaxLL >= MaxOff */
+        size_t headerSize;
+
+        /* Build DTables */
+        switch(LLtype)
+        {
+        U32 max;
+        case bt_rle :
+            LLlog = 0;
+            FSE_buildDTable_rle(DTableLL, *ip++); break;
+        case bt_raw :
+            LLlog = LLbits;
+            FSE_buildDTable_raw(DTableLL, LLbits); break;
+        default :
+            max = MaxLL;
+            headerSize = FSE_readNCount(norm, &max, &LLlog, ip, iend-ip);
+            if (FSE_isError(headerSize)) return ERROR(GENERIC);
+            if (LLlog > LLFSELog) return ERROR(corruption_detected);
+            ip += headerSize;
+            FSE_buildDTable(DTableLL, norm, max, LLlog);
+        }
+
+        switch(Offtype)
+        {
+        U32 max;
+        case bt_rle :
+            Offlog = 0;
+            if (ip > iend-2) return ERROR(srcSize_wrong);   /* min : "raw", hence no header, but at least xxLog bits */
+            FSE_buildDTable_rle(DTableOffb, *ip++ & MaxOff); /* if *ip > MaxOff, data is corrupted */
+            break;
+        case bt_raw :
+            Offlog = Offbits;
+            FSE_buildDTable_raw(DTableOffb, Offbits); break;
+        default :
+            max = MaxOff;
+            headerSize = FSE_readNCount(norm, &max, &Offlog, ip, iend-ip);
+            if (FSE_isError(headerSize)) return ERROR(GENERIC);
+            if (Offlog > OffFSELog) return ERROR(corruption_detected);
+            ip += headerSize;
+            FSE_buildDTable(DTableOffb, norm, max, Offlog);
+        }
+
+        switch(MLtype)
+        {
+        U32 max;
+        case bt_rle :
+            MLlog = 0;
+            if (ip > iend-2) return ERROR(srcSize_wrong); /* min : "raw", hence no header, but at least xxLog bits */
+            FSE_buildDTable_rle(DTableML, *ip++); break;
+        case bt_raw :
+            MLlog = MLbits;
+            FSE_buildDTable_raw(DTableML, MLbits); break;
+        default :
+            max = MaxML;
+            headerSize = FSE_readNCount(norm, &max, &MLlog, ip, iend-ip);
+            if (FSE_isError(headerSize)) return ERROR(GENERIC);
+            if (MLlog > MLFSELog) return ERROR(corruption_detected);
+            ip += headerSize;
+            FSE_buildDTable(DTableML, norm, max, MLlog);
+        }
+    }
+
+    return ip-istart;
+}
+
+
+typedef struct {
+    size_t litLength;
+    size_t offset;
+    size_t matchLength;
+} seq_t;
+
+typedef struct {
+    BIT_DStream_t DStream;
+    FSE_DState_t stateLL;
+    FSE_DState_t stateOffb;
+    FSE_DState_t stateML;
+    size_t prevOffset;
+    const BYTE* dumps;
+    const BYTE* dumpsEnd;
+} seqState_t;
+
+
+static void ZSTD_decodeSequence(seq_t* seq, seqState_t* seqState)
+{
+    size_t litLength;
+    size_t prevOffset;
+    size_t offset;
+    size_t matchLength;
+    const BYTE* dumps = seqState->dumps;
+    const BYTE* const de = seqState->dumpsEnd;
+
+    /* Literal length */
+    litLength = FSE_decodeSymbol(&(seqState->stateLL), &(seqState->DStream));
+    prevOffset = litLength ? seq->offset : seqState->prevOffset;
+    if (litLength == MaxLL)
+    {
+        U32 add = *dumps++;
+        if (add < 255) litLength += add;
+        else
+        {
+            litLength = MEM_readLE32(dumps) & 0xFFFFFF;  /* no pb : dumps is always followed by seq tables > 1 byte */
+            dumps += 3;
+        }
+        if (dumps >= de) dumps = de-1;   /* late correction, to avoid read overflow (data is now corrupted anyway) */
+    }
+
+    /* Offset */
+    {
+        static const U32 offsetPrefix[MaxOff+1] = {
+                1 /*fake*/, 1, 2, 4, 8, 16, 32, 64, 128, 256,
+                512, 1024, 2048, 4096, 8192, 16384, 32768, 65536, 131072, 262144,
+                524288, 1048576, 2097152, 4194304, 8388608, 16777216, 33554432, /*fake*/ 1, 1, 1, 1, 1 };
+        U32 offsetCode, nbBits;
+        offsetCode = FSE_decodeSymbol(&(seqState->stateOffb), &(seqState->DStream));   /* <= maxOff, by table construction */
+        if (MEM_32bits()) BIT_reloadDStream(&(seqState->DStream));
+        nbBits = offsetCode - 1;
+        if (offsetCode==0) nbBits = 0;   /* cmove */
+        offset = offsetPrefix[offsetCode] + BIT_readBits(&(seqState->DStream), nbBits);
+        if (MEM_32bits()) BIT_reloadDStream(&(seqState->DStream));
+        if (offsetCode==0) offset = prevOffset;   /* cmove */
+        if (offsetCode | !litLength) seqState->prevOffset = seq->offset;   /* cmove */
+    }
+
+    /* MatchLength */
+    matchLength = FSE_decodeSymbol(&(seqState->stateML), &(seqState->DStream));
+    if (matchLength == MaxML)
+    {
+        U32 add = *dumps++;
+        if (add < 255) matchLength += add;
+        else
+        {
+            matchLength = MEM_readLE32(dumps) & 0xFFFFFF;  /* no pb : dumps is always followed by seq tables > 1 byte */
+            dumps += 3;
+        }
+        if (dumps >= de) dumps = de-1;   /* late correction, to avoid read overflow (data is now corrupted anyway) */
+    }
+    matchLength += MINMATCH;
+
+    /* save result */
+    seq->litLength = litLength;
+    seq->offset = offset;
+    seq->matchLength = matchLength;
+    seqState->dumps = dumps;
+}
+
+
+FORCE_INLINE size_t ZSTD_execSequence(BYTE* op,
+                                BYTE* const oend, seq_t sequence,
+                                const BYTE** litPtr, const BYTE* const litLimit_8,
+                                BYTE* const base, BYTE* const vBase, BYTE* const dictEnd)
+{
+    static const int dec32table[] = { 0, 1, 2, 1, 4, 4, 4, 4 };   /* added */
+    static const int dec64table[] = { 8, 8, 8, 7, 8, 9,10,11 };   /* substracted */
+    BYTE* const oLitEnd = op + sequence.litLength;
+    const size_t sequenceLength = sequence.litLength + sequence.matchLength;
+    BYTE* const oMatchEnd = op + sequenceLength;   /* risk : address space overflow (32-bits) */
+    BYTE* const oend_8 = oend-8;
+    const BYTE* const litEnd = *litPtr + sequence.litLength;
+    const BYTE* match = oLitEnd - sequence.offset;
+
+    /* check */
+    if (oLitEnd > oend_8) return ERROR(dstSize_tooSmall);   /* last match must start at a minimum distance of 8 from oend */
+    if (oMatchEnd > oend) return ERROR(dstSize_tooSmall);   /* overwrite beyond dst buffer */
+    if (litEnd > litLimit_8) return ERROR(corruption_detected);   /* risk read beyond lit buffer */
+
+    /* copy Literals */
+    ZSTD_wildcopy(op, *litPtr, sequence.litLength);   /* note : oLitEnd <= oend-8 : no risk of overwrite beyond oend */
+    op = oLitEnd;
+    *litPtr = litEnd;   /* update for next sequence */
+
+    /* copy Match */
+	/* check */
+	//if (match > oLitEnd) return ERROR(corruption_detected);   /* address space overflow test (is clang optimizer wrongly removing this test ?) */
+	if (sequence.offset > (size_t)oLitEnd) return ERROR(corruption_detected);   /* address space overflow test (this test seems preserved by clang optimizer) */
+
+	if (match < base)
+	{
+		/* offset beyond prefix */
+		if (match < vBase) return ERROR(corruption_detected);
+		match = dictEnd - (base-match);
+		if (match + sequence.matchLength <= dictEnd)
+		{
+			memcpy(oLitEnd, match, sequence.matchLength);
+			return sequenceLength;
+		}
+		/* span extDict & currentPrefixSegment */
+		{
+			size_t length1 = dictEnd - match;
+			memcpy(oLitEnd, match, length1);
+			op = oLitEnd + length1;
+			sequence.matchLength -= length1;
+			match = base;
+		}
+	}
+
+	/* match within prefix */
+	if (sequence.offset < 8)
+	{
+		/* close range match, overlap */
+		const int sub2 = dec64table[sequence.offset];
+		op[0] = match[0];
+		op[1] = match[1];
+		op[2] = match[2];
+		op[3] = match[3];
+		match += dec32table[sequence.offset];
+		ZSTD_copy4(op+4, match);
+		match -= sub2;
+	}
+	else
+	{
+		ZSTD_copy8(op, match);
+	}
+	op += 8; match += 8;
+
+	if (oMatchEnd > oend-12)
+	{
+		if (op < oend_8)
+		{
+			ZSTD_wildcopy(op, match, oend_8 - op);
+			match += oend_8 - op;
+			op = oend_8;
+		}
+		while (op < oMatchEnd) *op++ = *match++;
+	}
+	else
+	{
+		ZSTD_wildcopy(op, match, sequence.matchLength-8);   /* works even if matchLength < 8 */
+	}
+	return sequenceLength;
+}
+
+
+static size_t ZSTD_decompressSequences(
+                               ZSTD_DCtx* dctx,
+                               void* dst, size_t maxDstSize,
+                         const void* seqStart, size_t seqSize)
+{
+    const BYTE* ip = (const BYTE*)seqStart;
+    const BYTE* const iend = ip + seqSize;
+    BYTE* const ostart = (BYTE* const)dst;
+    BYTE* op = ostart;
+    BYTE* const oend = ostart + maxDstSize;
+    size_t errorCode, dumpsLength;
+    const BYTE* litPtr = dctx->litPtr;
+    const BYTE* const litLimit_8 = litPtr + dctx->litBufSize - 8;
+    const BYTE* const litEnd = litPtr + dctx->litSize;
+    int nbSeq;
+    const BYTE* dumps;
+    U32* DTableLL = dctx->LLTable;
+    U32* DTableML = dctx->MLTable;
+    U32* DTableOffb = dctx->OffTable;
+    BYTE* const base = (BYTE*) (dctx->base);
+    BYTE* const vBase = (BYTE*) (dctx->vBase);
+    BYTE* const dictEnd = (BYTE*) (dctx->dictEnd);
+
+    /* Build Decoding Tables */
+    errorCode = ZSTD_decodeSeqHeaders(&nbSeq, &dumps, &dumpsLength,
+                                      DTableLL, DTableML, DTableOffb,
+                                      ip, iend-ip);
+    if (ZSTD_isError(errorCode)) return errorCode;
+    ip += errorCode;
+
+    /* Regen sequences */
+    {
+        seq_t sequence;
+        seqState_t seqState;
+
+        memset(&sequence, 0, sizeof(sequence));
+        sequence.offset = 4;
+        seqState.dumps = dumps;
+        seqState.dumpsEnd = dumps + dumpsLength;
+        seqState.prevOffset = 4;
+        errorCode = BIT_initDStream(&(seqState.DStream), ip, iend-ip);
+        if (ERR_isError(errorCode)) return ERROR(corruption_detected);
+        FSE_initDState(&(seqState.stateLL), &(seqState.DStream), DTableLL);
+        FSE_initDState(&(seqState.stateOffb), &(seqState.DStream), DTableOffb);
+        FSE_initDState(&(seqState.stateML), &(seqState.DStream), DTableML);
+
+        for ( ; (BIT_reloadDStream(&(seqState.DStream)) < BIT_DStream_completed) ; )
+        {
+            size_t oneSeqSize;
+            nbSeq--;
+            ZSTD_decodeSequence(&sequence, &seqState);
+            oneSeqSize = ZSTD_execSequence(op, oend, sequence, &litPtr, litLimit_8, base, vBase, dictEnd);
+            if (ZSTD_isError(oneSeqSize)) return oneSeqSize;
+            op += oneSeqSize;
+        }
+
+        if (nbSeq<0) return ERROR(corruption_detected);   /* requested too many sequences : data is corrupted */
+
+		/* now BIT_reloadDStream(&(seqState.DStream)) >= BIT_DStream_completed) */
+        for ( ; (BIT_reloadDStream(&(seqState.DStream)) == BIT_DStream_completed) && nbSeq ; )
+        {
+            size_t oneSeqSize;
+            nbSeq--;
+            ZSTD_decodeSequence(&sequence, &seqState);
+            oneSeqSize = ZSTD_execSequence(op, oend, sequence, &litPtr, litLimit_8, base, vBase, dictEnd);
+            if (ZSTD_isError(oneSeqSize)) return oneSeqSize;
+            op += oneSeqSize;
+        }
+
+        /* check if reached exact end */
+        if ( !BIT_endOfDStream(&(seqState.DStream)) ) return ERROR(corruption_detected);   /* DStream should be entirely and precisely consumed; otherwise data is corrupted */
+
+        /* last literal segment */
+        {
+            size_t lastLLSize = litEnd - litPtr;
+            if (litPtr > litEnd) return ERROR(corruption_detected);
+            if (op+lastLLSize > oend) return ERROR(dstSize_tooSmall);
+            if (op != litPtr) memcpy(op, litPtr, lastLLSize);
+            op += lastLLSize;
+        }
+    }
+
+    return op-ostart;
+}
+
+
+static size_t ZSTD_decompressBlock(
+                            ZSTD_DCtx* dctx,
+                            void* dst, size_t maxDstSize,
+                      const void* src, size_t srcSize)
+{
+    /* blockType == blockCompressed */
+    const BYTE* ip = (const BYTE*)src;
+
+    /* Decode literals sub-block */
+    size_t litCSize = ZSTD_decodeLiteralsBlock(dctx, src, srcSize);
+    if (ZSTD_isError(litCSize)) return litCSize;
+    ip += litCSize;
+    srcSize -= litCSize;
+
+    return ZSTD_decompressSequences(dctx, dst, maxDstSize, ip, srcSize);
+}
+
+
+size_t ZSTD_decompressDCtx(ZSTD_DCtx* ctx, void* dst, size_t maxDstSize, const void* src, size_t srcSize)
+{
+    const BYTE* ip = (const BYTE*)src;
+    const BYTE* iend = ip + srcSize;
+    BYTE* const ostart = (BYTE* const)dst;
+    BYTE* op = ostart;
+    BYTE* const oend = ostart + maxDstSize;
+    size_t remainingSize = srcSize;
+    blockProperties_t blockProperties;
+
+
+    /* init */
+    ctx->base = ctx->vBase = ctx->dictEnd = dst;
+
+    /* Frame Header */
+    {
+        size_t frameHeaderSize;
+        if (srcSize < ZSTD_frameHeaderSize_min+ZSTD_blockHeaderSize) return ERROR(srcSize_wrong);
+#if defined(ZSTD_LEGACY_SUPPORT) && (ZSTD_LEGACY_SUPPORT==1)
+        {
+            const U32 magicNumber = MEM_readLE32(src);
+            if (ZSTD_isLegacy(magicNumber))
+                return ZSTD_decompressLegacy(dst, maxDstSize, src, srcSize, magicNumber);
+        }
+#endif
+        frameHeaderSize = ZSTD_decodeFrameHeader_Part1(ctx, src, ZSTD_frameHeaderSize_min);
+        if (ZSTD_isError(frameHeaderSize)) return frameHeaderSize;
+        if (srcSize < frameHeaderSize+ZSTD_blockHeaderSize) return ERROR(srcSize_wrong);
+        ip += frameHeaderSize; remainingSize -= frameHeaderSize;
+        frameHeaderSize = ZSTD_decodeFrameHeader_Part2(ctx, src, frameHeaderSize);
+        if (ZSTD_isError(frameHeaderSize)) return frameHeaderSize;
+    }
+
+    /* Loop on each block */
+    while (1)
+    {
+        size_t decodedSize=0;
+        size_t cBlockSize = ZSTD_getcBlockSize(ip, iend-ip, &blockProperties);
+        if (ZSTD_isError(cBlockSize)) return cBlockSize;
+
+        ip += ZSTD_blockHeaderSize;
+        remainingSize -= ZSTD_blockHeaderSize;
+        if (cBlockSize > remainingSize) return ERROR(srcSize_wrong);
+
+        switch(blockProperties.blockType)
+        {
+        case bt_compressed:
+            decodedSize = ZSTD_decompressBlock(ctx, op, oend-op, ip, cBlockSize);
+            break;
+        case bt_raw :
+            decodedSize = ZSTD_copyRawBlock(op, oend-op, ip, cBlockSize);
+            break;
+        case bt_rle :
+            return ERROR(GENERIC);   /* not yet supported */
+            break;
+        case bt_end :
+            /* end of frame */
+            if (remainingSize) return ERROR(srcSize_wrong);
+            break;
+        default:
+            return ERROR(GENERIC);   /* impossible */
+        }
+        if (cBlockSize == 0) break;   /* bt_end */
+
+        if (ZSTD_isError(decodedSize)) return decodedSize;
+        op += decodedSize;
+        ip += cBlockSize;
+        remainingSize -= cBlockSize;
+    }
+
+    return op-ostart;
+}
+
+size_t ZSTD_decompress(void* dst, size_t maxDstSize, const void* src, size_t srcSize)
+{
+    ZSTD_DCtx ctx;
+    return ZSTD_decompressDCtx(&ctx, dst, maxDstSize, src, srcSize);
+}
+
+
+/* ******************************
+*  Streaming Decompression API
+********************************/
+size_t ZSTD_nextSrcSizeToDecompress(ZSTD_DCtx* dctx)
+{
+    return dctx->expected;
+}
+
+size_t ZSTD_decompressContinue(ZSTD_DCtx* ctx, void* dst, size_t maxDstSize, const void* src, size_t srcSize)
+{
+    /* Sanity check */
+    if (srcSize != ctx->expected) return ERROR(srcSize_wrong);
+    if (dst != ctx->previousDstEnd)   /* not contiguous */
+    {
+        if ((dst > ctx->base) && (dst < ctx->previousDstEnd))   /* rolling buffer : new segment into dictionary */
+            ctx->base = (char*)dst;   /* temporary affectation, for vBase calculation */
+        ctx->dictEnd = ctx->previousDstEnd;
+        ctx->vBase = (char*)dst - ((char*)(ctx->previousDstEnd) - (char*)(ctx->base));
+        ctx->base = dst;
+        ctx->previousDstEnd = dst;
+    }
+
+    /* Decompress : frame header; part 1 */
+    switch (ctx->stage)
+    {
+    case ZSTDds_getFrameHeaderSize :
+        {
+            /* get frame header size */
+            if (srcSize != ZSTD_frameHeaderSize_min) return ERROR(srcSize_wrong);   /* impossible */
+            ctx->headerSize = ZSTD_decodeFrameHeader_Part1(ctx, src, ZSTD_frameHeaderSize_min);
+            if (ZSTD_isError(ctx->headerSize)) return ctx->headerSize;
+            memcpy(ctx->headerBuffer, src, ZSTD_frameHeaderSize_min);
+            if (ctx->headerSize > ZSTD_frameHeaderSize_min)
+            {
+                ctx->expected = ctx->headerSize - ZSTD_frameHeaderSize_min;
+                ctx->stage = ZSTDds_decodeFrameHeader;
+                return 0;
+            }
+            ctx->expected = 0;   /* not necessary to copy more */
+        }
+    case ZSTDds_decodeFrameHeader:
+        {
+            /* get frame header */
+            size_t result;
+            memcpy(ctx->headerBuffer + ZSTD_frameHeaderSize_min, src, ctx->expected);
+            result = ZSTD_decodeFrameHeader_Part2(ctx, ctx->headerBuffer, ctx->headerSize);
+            if (ZSTD_isError(result)) return result;
+            ctx->expected = ZSTD_blockHeaderSize;
+            ctx->stage = ZSTDds_decodeBlockHeader;
+            return 0;
+        }
+    case ZSTDds_decodeBlockHeader:
+        {
+            /* Decode block header */
+            blockProperties_t bp;
+            size_t blockSize = ZSTD_getcBlockSize(src, ZSTD_blockHeaderSize, &bp);
+            if (ZSTD_isError(blockSize)) return blockSize;
+            if (bp.blockType == bt_end)
+            {
+                ctx->expected = 0;
+                ctx->stage = ZSTDds_getFrameHeaderSize;
+            }
+            else
+            {
+                ctx->expected = blockSize;
+                ctx->bType = bp.blockType;
+                ctx->stage = ZSTDds_decompressBlock;
+            }
+
+            return 0;
+        }
+    case 3:
+        {
+            /* Decompress : block content */
+            size_t rSize;
+            switch(ctx->bType)
+            {
+            case bt_compressed:
+                rSize = ZSTD_decompressBlock(ctx, dst, maxDstSize, src, srcSize);
+                break;
+            case bt_raw :
+                rSize = ZSTD_copyRawBlock(dst, maxDstSize, src, srcSize);
+                break;
+            case bt_rle :
+                return ERROR(GENERIC);   /* not yet handled */
+                break;
+            case bt_end :   /* should never happen (filtered at phase 1) */
+                rSize = 0;
+                break;
+            default:
+                return ERROR(GENERIC);
+            }
+            ctx->stage = ZSTDds_decodeBlockHeader;
+            ctx->expected = ZSTD_blockHeaderSize;
+            ctx->previousDstEnd = (char*)dst + rSize;
+            return rSize;
+        }
+    default:
+        return ERROR(GENERIC);   /* impossible */
+    }
+}
+
+
diff --git a/lib/zstd_internal.h b/lib/zstd_internal.h
new file mode 100644
index 0000000..bddfc92
--- /dev/null
+++ b/lib/zstd_internal.h
@@ -0,0 +1,122 @@
+/*
+    zstd_internal - common functions to include
+    Header File for include
+    Copyright (C) 2014-2015, Yann Collet.
+
+    BSD 2-Clause License (http://www.opensource.org/licenses/bsd-license.php)
+
+    Redistribution and use in source and binary forms, with or without
+    modification, are permitted provided that the following conditions are
+    met:
+    * Redistributions of source code must retain the above copyright
+    notice, this list of conditions and the following disclaimer.
+    * Redistributions in binary form must reproduce the above
+    copyright notice, this list of conditions and the following disclaimer
+    in the documentation and/or other materials provided with the
+    distribution.
+    THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+    "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+    LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+    A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+    OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+    SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+    LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+    DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+    THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+    (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+    OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+    You can contact the author at :
+    - zstd source repository : https://github.com/Cyan4973/zstd
+    - ztsd public forum : https://groups.google.com/forum/#!forum/lz4c
+*/
+#ifndef ZSTD_CCOMMON_H_MODULE
+#define ZSTD_CCOMMON_H_MODULE
+
+#if defined (__cplusplus)
+extern "C" {
+#endif
+
+/* *************************************
+*  Includes
+***************************************/
+#include "mem.h"
+#include "error.h"
+
+
+/* *************************************
+*  Common macros
+***************************************/
+#define MIN(a,b) ((a)<(b) ? (a) : (b))
+#define MAX(a,b) ((a)>(b) ? (a) : (b))
+
+
+/* *************************************
+*  Common constants
+***************************************/
+#define ZSTD_MAGICNUMBER 0xFD2FB524   /* v0.4 */
+
+#define KB *(1 <<10)
+#define MB *(1 <<20)
+#define GB *(1U<<30)
+
+#define BLOCKSIZE (128 KB)                 /* define, for static allocation */
+
+static const size_t ZSTD_blockHeaderSize = 3;
+static const size_t ZSTD_frameHeaderSize_min = 5;
+#define ZSTD_frameHeaderSize_max 5         /* define, for static allocation */
+
+#define BIT7 128
+#define BIT6  64
+#define BIT5  32
+#define BIT4  16
+#define BIT1   2
+#define BIT0   1
+
+#define IS_RAW BIT0
+#define IS_RLE BIT1
+
+#define MINMATCH 4
+#define REPCODE_STARTVALUE 4
+
+#define MLbits   7
+#define LLbits   6
+#define Offbits  5
+#define MaxML  ((1<<MLbits) - 1)
+#define MaxLL  ((1<<LLbits) - 1)
+#define MaxOff ((1<<Offbits)- 1)
+#define MLFSELog   10
+#define LLFSELog   10
+#define OffFSELog   9
+#define MaxSeq MAX(MaxLL, MaxML)
+
+#define MIN_SEQUENCES_SIZE (2 /*seqNb*/ + 2 /*dumps*/ + 3 /*seqTables*/ + 1 /*bitStream*/)
+#define MIN_CBLOCK_SIZE (3 /*litCSize*/ + MIN_SEQUENCES_SIZE)
+
+typedef enum { bt_compressed, bt_raw, bt_rle, bt_end } blockType_t;
+
+
+/* ******************************************
+*  Shared functions to include for inlining
+********************************************/
+static void ZSTD_copy8(void* dst, const void* src) { memcpy(dst, src, 8); }
+
+#define COPY8(d,s) { ZSTD_copy8(d,s); d+=8; s+=8; }
+
+/*! ZSTD_wildcopy : custom version of memcpy(), can copy up to 7-8 bytes too many */
+static void ZSTD_wildcopy(void* dst, const void* src, size_t length)
+{
+    const BYTE* ip = (const BYTE*)src;
+    BYTE* op = (BYTE*)dst;
+    BYTE* const oend = op + length;
+    do
+        COPY8(op, ip)
+    while (op < oend);
+}
+
+
+#if defined (__cplusplus)
+}
+#endif
+
+#endif   /* ZSTD_CCOMMON_H_MODULE */
diff --git a/lib/zstd_static.h b/lib/zstd_static.h
new file mode 100644
index 0000000..db31c9f
--- /dev/null
+++ b/lib/zstd_static.h
@@ -0,0 +1,234 @@
+/*
+    zstd - standard compression library
+    Header File for static linking only
+    Copyright (C) 2014-2015, Yann Collet.
+
+    BSD 2-Clause License (http://www.opensource.org/licenses/bsd-license.php)
+
+    Redistribution and use in source and binary forms, with or without
+    modification, are permitted provided that the following conditions are
+    met:
+    * Redistributions of source code must retain the above copyright
+    notice, this list of conditions and the following disclaimer.
+    * Redistributions in binary form must reproduce the above
+    copyright notice, this list of conditions and the following disclaimer
+    in the documentation and/or other materials provided with the
+    distribution.
+    THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+    "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+    LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+    A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+    OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+    SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+    LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+    DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+    THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+    (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+    OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+    You can contact the author at :
+    - zstd source repository : https://github.com/Cyan4973/zstd
+    - ztsd public forum : https://groups.google.com/forum/#!forum/lz4c
+*/
+#ifndef ZSTD_STATIC_H
+#define ZSTD_STATIC_H
+
+/* The objects defined into this file should be considered experimental.
+ * They are not labelled stable, as their prototype may change in the future.
+ * You can use them for tests, provide feedback, or if you can endure risk of future changes.
+ */
+
+#if defined (__cplusplus)
+extern "C" {
+#endif
+
+/* *************************************
+*  Includes
+***************************************/
+#include "zstd.h"
+#include "mem.h"
+
+
+/* *************************************
+*  Types
+***************************************/
+#define ZSTD_WINDOWLOG_MAX 26
+#define ZSTD_WINDOWLOG_MIN 18
+#define ZSTD_WINDOWLOG_ABSOLUTEMIN 11
+#define ZSTD_CONTENTLOG_MAX (ZSTD_WINDOWLOG_MAX+1)
+#define ZSTD_CONTENTLOG_MIN 4
+#define ZSTD_HASHLOG_MAX 28
+#define ZSTD_HASHLOG_MIN 4
+#define ZSTD_SEARCHLOG_MAX (ZSTD_CONTENTLOG_MAX-1)
+#define ZSTD_SEARCHLOG_MIN 1
+#define ZSTD_SEARCHLENGTH_MAX 7
+#define ZSTD_SEARCHLENGTH_MIN 4
+
+/** from faster to stronger */
+typedef enum { ZSTD_fast, ZSTD_greedy, ZSTD_lazy, ZSTD_lazy2, ZSTD_btlazy2 } ZSTD_strategy;
+
+typedef struct
+{
+    U64 srcSize;       /* optional : tells how much bytes are present in the frame. Use 0 if not known. */
+    U32 windowLog;     /* largest match distance : larger == more compression, more memory needed during decompression */
+    U32 contentLog;    /* full search segment : larger == more compression, slower, more memory (useless for fast) */
+    U32 hashLog;       /* dispatch table : larger == more memory, faster */
+    U32 searchLog;     /* nb of searches : larger == more compression, slower */
+    U32 searchLength;  /* size of matches : larger == faster decompression, sometimes less compression */
+    ZSTD_strategy strategy;
+} ZSTD_parameters;
+
+
+/* *************************************
+*  Advanced function
+***************************************/
+/** ZSTD_getParams
+*   return ZSTD_parameters structure for a selected compression level and srcSize.
+*   srcSizeHint value is optional, select 0 if not known */
+ZSTD_parameters ZSTD_getParams(int compressionLevel, U64 srcSizeHint);
+
+/** ZSTD_validateParams
+*   correct params value to remain within authorized range */
+void ZSTD_validateParams(ZSTD_parameters* params);
+
+/** ZSTD_compress_advanced
+*   Same as ZSTD_compressCCtx(), with fine-tune control of each compression parameter */
+size_t ZSTD_compress_advanced (ZSTD_CCtx* ctx,
+                               void* dst, size_t maxDstSize,
+                         const void* src, size_t srcSize,
+                               ZSTD_parameters params);
+
+
+/* **************************************
+*  Streaming functions (bufferless mode)
+****************************************/
+size_t ZSTD_compressBegin(ZSTD_CCtx* cctx, void* dst, size_t maxDstSize, int compressionLevel);
+size_t ZSTD_compressBegin_advanced(ZSTD_CCtx* ctx, void* dst, size_t maxDstSize, ZSTD_parameters params);
+size_t ZSTD_compressContinue(ZSTD_CCtx* cctx, void* dst, size_t maxDstSize, const void* src, size_t srcSize);
+size_t ZSTD_compressEnd(ZSTD_CCtx* cctx, void* dst, size_t maxDstSize);
+
+/**
+  Streaming compression, bufferless mode
+
+  A ZSTD_CCtx object is required to track streaming operations.
+  Use ZSTD_createCCtx() / ZSTD_freeCCtx() to manage it.
+  A ZSTD_CCtx object can be re-used multiple times.
+
+  First operation is to start a new frame.
+  Use ZSTD_compressBegin().
+  You may also prefer the advanced derivative ZSTD_compressBegin_advanced(), for finer parameter control.
+
+  Then, consume your input using ZSTD_compressContinue().
+  The interface is synchronous, so all input will be consumed.
+  You must ensure there is enough space in destination buffer to store compressed data under worst case scenario.
+  Worst case evaluation is provided by ZSTD_compressBound().
+
+  Finish a frame with ZSTD_compressEnd(), which will write the epilogue.
+  Without it, the frame will be considered incomplete by decoders.
+  You can then re-use ZSTD_CCtx to compress new frames.
+*/
+
+
+typedef struct ZSTD_DCtx_s ZSTD_DCtx;
+ZSTD_DCtx* ZSTD_createDCtx(void);
+size_t     ZSTD_resetDCtx(ZSTD_DCtx* dctx);
+size_t     ZSTD_freeDCtx(ZSTD_DCtx* dctx);
+
+size_t ZSTD_getFrameParams(ZSTD_parameters* params, const void* src, size_t srcSize);
+size_t ZSTD_nextSrcSizeToDecompress(ZSTD_DCtx* dctx);
+size_t ZSTD_decompressContinue(ZSTD_DCtx* dctx, void* dst, size_t maxDstSize, const void* src, size_t srcSize);
+/**
+  Streaming decompression, bufferless mode
+
+  A ZSTD_DCtx object is required to track streaming operations.
+  Use ZSTD_createDCtx() / ZSTD_freeDCtx() to manage it.
+  A ZSTD_DCtx object can be re-used multiple times. Use ZSTD_resetDCtx() to return to fresh status.
+
+  First operation is to retrieve frame parameters, using ZSTD_getFrameParams().
+  This function doesn't consume its input. It needs enough input data to properly decode the frame header.
+  The objective is to retrieve *params.windowlog, to know minimum amount of memory required during decoding.
+  Result : 0 when successful, it means the ZSTD_parameters structure has been filled.
+           >0 : means there is not enough data into src. Provides the expected size to successfully decode header.
+           errorCode, which can be tested using ZSTD_isError() (For example, if it's not a ZSTD header)
+
+  Then it's possible to start decompression.
+  Use ZSTD_nextSrcSizeToDecompress() and ZSTD_decompressContinue() alternatively.
+  ZSTD_nextSrcSizeToDecompress() tells how much bytes to provide as 'srcSize' to ZSTD_decompressContinue().
+  ZSTD_decompressContinue() requires this exact amount of bytes, or just fails.
+  ZSTD_decompressContinue() needs previous data blocks during decompression, up to (1 << windowlog).
+  They should preferably be located contiguously, prior to current block. Alternatively, a round buffer is also possible.
+
+  @result of ZSTD_decompressContinue() is the number of bytes regenerated within 'dst'.
+  It can be zero, which is not an error; it just means ZSTD_decompressContinue() has decoded some header.
+
+  A frame is fully decoded when ZSTD_nextSrcSizeToDecompress() returns zero.
+*/
+
+
+/* *************************************
+*  Pre-defined compression levels
+***************************************/
+#define ZSTD_MAX_CLEVEL 20
+static const ZSTD_parameters ZSTD_defaultParameters[2][ZSTD_MAX_CLEVEL+1] = {
+{   /* "default" */
+    /*   W,  C,  H,  S,  L, strat */
+    { 0, 18, 12, 12,  1,  4, ZSTD_fast    },  /* level  0 - never used */
+    { 0, 19, 13, 14,  1,  7, ZSTD_fast    },  /* level  1 */
+    { 0, 19, 15, 16,  1,  6, ZSTD_fast    },  /* level  2 */
+    { 0, 20, 18, 20,  1,  6, ZSTD_fast    },  /* level  3 */
+    { 0, 21, 19, 21,  1,  6, ZSTD_fast    },  /* level  4 */
+    { 0, 20, 14, 18,  3,  5, ZSTD_greedy  },  /* level  5 */
+    { 0, 20, 18, 19,  3,  5, ZSTD_greedy  },  /* level  6 */
+    { 0, 21, 17, 20,  3,  5, ZSTD_lazy    },  /* level  7 */
+    { 0, 21, 19, 20,  3,  5, ZSTD_lazy    },  /* level  8 */
+    { 0, 21, 20, 20,  3,  5, ZSTD_lazy2   },  /* level  9 */
+    { 0, 21, 19, 21,  4,  5, ZSTD_lazy2   },  /* level 10 */
+    { 0, 22, 20, 22,  4,  5, ZSTD_lazy2   },  /* level 11 */
+    { 0, 22, 20, 22,  5,  5, ZSTD_lazy2   },  /* level 12 */
+    { 0, 22, 21, 22,  5,  5, ZSTD_lazy2   },  /* level 13 */
+    { 0, 22, 22, 23,  5,  5, ZSTD_lazy2   },  /* level 14 */
+    { 0, 23, 23, 23,  5,  5, ZSTD_lazy2   },  /* level 15 */
+    { 0, 23, 21, 22,  5,  5, ZSTD_btlazy2 },  /* level 16 */
+    { 0, 23, 24, 23,  4,  5, ZSTD_btlazy2 },  /* level 17 */
+    { 0, 25, 24, 23,  5,  5, ZSTD_btlazy2 },  /* level 18 */
+    { 0, 25, 26, 23,  5,  5, ZSTD_btlazy2 },  /* level 19 */
+    { 0, 26, 27, 25,  9,  5, ZSTD_btlazy2 },  /* level 20 */
+},
+{   /* for srcSize <= 128 KB */
+    /* W,  C,  H,  S,  L, strat */
+    { 0, 17, 12, 12,  1,  4, ZSTD_fast    },  /* level  0 - never used */
+    { 0, 17, 12, 13,  1,  6, ZSTD_fast    },  /* level  1 */
+    { 0, 17, 15, 16,  1,  5, ZSTD_fast    },  /* level  2 */
+    { 0, 17, 16, 17,  1,  5, ZSTD_fast    },  /* level  3 */
+    { 0, 17, 13, 15,  2,  4, ZSTD_greedy  },  /* level  4 */
+    { 0, 17, 15, 17,  3,  4, ZSTD_greedy  },  /* level  5 */
+    { 0, 17, 14, 17,  3,  4, ZSTD_lazy    },  /* level  6 */
+    { 0, 17, 16, 17,  4,  4, ZSTD_lazy    },  /* level  7 */
+    { 0, 17, 16, 17,  4,  4, ZSTD_lazy2   },  /* level  8 */
+    { 0, 17, 17, 16,  5,  4, ZSTD_lazy2   },  /* level  9 */
+    { 0, 17, 17, 16,  6,  4, ZSTD_lazy2   },  /* level 10 */
+    { 0, 17, 17, 16,  7,  4, ZSTD_lazy2   },  /* level 11 */
+    { 0, 17, 17, 16,  8,  4, ZSTD_lazy2   },  /* level 12 */
+    { 0, 17, 18, 16,  4,  4, ZSTD_btlazy2 },  /* level 13 */
+    { 0, 17, 18, 16,  5,  4, ZSTD_btlazy2 },  /* level 14 */
+    { 0, 17, 18, 16,  6,  4, ZSTD_btlazy2 },  /* level 15 */
+    { 0, 17, 18, 16,  7,  4, ZSTD_btlazy2 },  /* level 16 */
+    { 0, 17, 18, 16,  8,  4, ZSTD_btlazy2 },  /* level 17 */
+    { 0, 17, 18, 16,  9,  4, ZSTD_btlazy2 },  /* level 18 */
+    { 0, 17, 18, 16, 10,  4, ZSTD_btlazy2 },  /* level 19 */
+    { 0, 17, 18, 18, 12,  4, ZSTD_btlazy2 },  /* level 20 */
+},
+};
+
+
+/* *************************************
+*  Error management
+***************************************/
+#include "error.h"
+
+
+#if defined (__cplusplus)
+}
+#endif
+
+#endif  /* ZSTD_STATIC_H */
diff --git a/programs/.gitignore b/programs/.gitignore
new file mode 100644
index 0000000..021e893
--- /dev/null
+++ b/programs/.gitignore
@@ -0,0 +1,31 @@
+# local binary (Makefile)
+zstd
+zstd32
+fullbench
+fullbench32
+fuzzer
+fuzzer32
+datagen
+
+# Object files
+*.o
+*.ko
+
+# Libraries
+*.lib
+*.a
+
+# Shared objects (inc. Windows DLLs)
+*.dll
+*.so
+*.so.*
+*.dylib
+
+# Executables
+*.exe
+*.out
+*.app
+
+# Visual solution files
+*.suo
+*.user
diff --git a/programs/COPYING b/programs/COPYING
new file mode 100644
index 0000000..d159169
--- /dev/null
+++ b/programs/COPYING
@@ -0,0 +1,339 @@
+                    GNU GENERAL PUBLIC LICENSE
+                       Version 2, June 1991
+
+ Copyright (C) 1989, 1991 Free Software Foundation, Inc.,
+ 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ Everyone is permitted to copy and distribute verbatim copies
+ of this license document, but changing it is not allowed.
+
+                            Preamble
+
+  The licenses for most software are designed to take away your
+freedom to share and change it.  By contrast, the GNU General Public
+License is intended to guarantee your freedom to share and change free
+software--to make sure the software is free for all its users.  This
+General Public License applies to most of the Free Software
+Foundation's software and to any other program whose authors commit to
+using it.  (Some other Free Software Foundation software is covered by
+the GNU Lesser General Public License instead.)  You can apply it to
+your programs, too.
+
+  When we speak of free software, we are referring to freedom, not
+price.  Our General Public Licenses are designed to make sure that you
+have the freedom to distribute copies of free software (and charge for
+this service if you wish), that you receive source code or can get it
+if you want it, that you can change the software or use pieces of it
+in new free programs; and that you know you can do these things.
+
+  To protect your rights, we need to make restrictions that forbid
+anyone to deny you these rights or to ask you to surrender the rights.
+These restrictions translate to certain responsibilities for you if you
+distribute copies of the software, or if you modify it.
+
+  For example, if you distribute copies of such a program, whether
+gratis or for a fee, you must give the recipients all the rights that
+you have.  You must make sure that they, too, receive or can get the
+source code.  And you must show them these terms so they know their
+rights.
+
+  We protect your rights with two steps: (1) copyright the software, and
+(2) offer you this license which gives you legal permission to copy,
+distribute and/or modify the software.
+
+  Also, for each author's protection and ours, we want to make certain
+that everyone understands that there is no warranty for this free
+software.  If the software is modified by someone else and passed on, we
+want its recipients to know that what they have is not the original, so
+that any problems introduced by others will not reflect on the original
+authors' reputations.
+
+  Finally, any free program is threatened constantly by software
+patents.  We wish to avoid the danger that redistributors of a free
+program will individually obtain patent licenses, in effect making the
+program proprietary.  To prevent this, we have made it clear that any
+patent must be licensed for everyone's free use or not licensed at all.
+
+  The precise terms and conditions for copying, distribution and
+modification follow.
+
+                    GNU GENERAL PUBLIC LICENSE
+   TERMS AND CONDITIONS FOR COPYING, DISTRIBUTION AND MODIFICATION
+
+  0. This License applies to any program or other work which contains
+a notice placed by the copyright holder saying it may be distributed
+under the terms of this General Public License.  The "Program", below,
+refers to any such program or work, and a "work based on the Program"
+means either the Program or any derivative work under copyright law:
+that is to say, a work containing the Program or a portion of it,
+either verbatim or with modifications and/or translated into another
+language.  (Hereinafter, translation is included without limitation in
+the term "modification".)  Each licensee is addressed as "you".
+
+Activities other than copying, distribution and modification are not
+covered by this License; they are outside its scope.  The act of
+running the Program is not restricted, and the output from the Program
+is covered only if its contents constitute a work based on the
+Program (independent of having been made by running the Program).
+Whether that is true depends on what the Program does.
+
+  1. You may copy and distribute verbatim copies of the Program's
+source code as you receive it, in any medium, provided that you
+conspicuously and appropriately publish on each copy an appropriate
+copyright notice and disclaimer of warranty; keep intact all the
+notices that refer to this License and to the absence of any warranty;
+and give any other recipients of the Program a copy of this License
+along with the Program.
+
+You may charge a fee for the physical act of transferring a copy, and
+you may at your option offer warranty protection in exchange for a fee.
+
+  2. You may modify your copy or copies of the Program or any portion
+of it, thus forming a work based on the Program, and copy and
+distribute such modifications or work under the terms of Section 1
+above, provided that you also meet all of these conditions:
+
+    a) You must cause the modified files to carry prominent notices
+    stating that you changed the files and the date of any change.
+
+    b) You must cause any work that you distribute or publish, that in
+    whole or in part contains or is derived from the Program or any
+    part thereof, to be licensed as a whole at no charge to all third
+    parties under the terms of this License.
+
+    c) If the modified program normally reads commands interactively
+    when run, you must cause it, when started running for such
+    interactive use in the most ordinary way, to print or display an
+    announcement including an appropriate copyright notice and a
+    notice that there is no warranty (or else, saying that you provide
+    a warranty) and that users may redistribute the program under
+    these conditions, and telling the user how to view a copy of this
+    License.  (Exception: if the Program itself is interactive but
+    does not normally print such an announcement, your work based on
+    the Program is not required to print an announcement.)
+
+These requirements apply to the modified work as a whole.  If
+identifiable sections of that work are not derived from the Program,
+and can be reasonably considered independent and separate works in
+themselves, then this License, and its terms, do not apply to those
+sections when you distribute them as separate works.  But when you
+distribute the same sections as part of a whole which is a work based
+on the Program, the distribution of the whole must be on the terms of
+this License, whose permissions for other licensees extend to the
+entire whole, and thus to each and every part regardless of who wrote it.
+
+Thus, it is not the intent of this section to claim rights or contest
+your rights to work written entirely by you; rather, the intent is to
+exercise the right to control the distribution of derivative or
+collective works based on the Program.
+
+In addition, mere aggregation of another work not based on the Program
+with the Program (or with a work based on the Program) on a volume of
+a storage or distribution medium does not bring the other work under
+the scope of this License.
+
+  3. You may copy and distribute the Program (or a work based on it,
+under Section 2) in object code or executable form under the terms of
+Sections 1 and 2 above provided that you also do one of the following:
+
+    a) Accompany it with the complete corresponding machine-readable
+    source code, which must be distributed under the terms of Sections
+    1 and 2 above on a medium customarily used for software interchange; or,
+
+    b) Accompany it with a written offer, valid for at least three
+    years, to give any third party, for a charge no more than your
+    cost of physically performing source distribution, a complete
+    machine-readable copy of the corresponding source code, to be
+    distributed under the terms of Sections 1 and 2 above on a medium
+    customarily used for software interchange; or,
+
+    c) Accompany it with the information you received as to the offer
+    to distribute corresponding source code.  (This alternative is
+    allowed only for noncommercial distribution and only if you
+    received the program in object code or executable form with such
+    an offer, in accord with Subsection b above.)
+
+The source code for a work means the preferred form of the work for
+making modifications to it.  For an executable work, complete source
+code means all the source code for all modules it contains, plus any
+associated interface definition files, plus the scripts used to
+control compilation and installation of the executable.  However, as a
+special exception, the source code distributed need not include
+anything that is normally distributed (in either source or binary
+form) with the major components (compiler, kernel, and so on) of the
+operating system on which the executable runs, unless that component
+itself accompanies the executable.
+
+If distribution of executable or object code is made by offering
+access to copy from a designated place, then offering equivalent
+access to copy the source code from the same place counts as
+distribution of the source code, even though third parties are not
+compelled to copy the source along with the object code.
+
+  4. You may not copy, modify, sublicense, or distribute the Program
+except as expressly provided under this License.  Any attempt
+otherwise to copy, modify, sublicense or distribute the Program is
+void, and will automatically terminate your rights under this License.
+However, parties who have received copies, or rights, from you under
+this License will not have their licenses terminated so long as such
+parties remain in full compliance.
+
+  5. You are not required to accept this License, since you have not
+signed it.  However, nothing else grants you permission to modify or
+distribute the Program or its derivative works.  These actions are
+prohibited by law if you do not accept this License.  Therefore, by
+modifying or distributing the Program (or any work based on the
+Program), you indicate your acceptance of this License to do so, and
+all its terms and conditions for copying, distributing or modifying
+the Program or works based on it.
+
+  6. Each time you redistribute the Program (or any work based on the
+Program), the recipient automatically receives a license from the
+original licensor to copy, distribute or modify the Program subject to
+these terms and conditions.  You may not impose any further
+restrictions on the recipients' exercise of the rights granted herein.
+You are not responsible for enforcing compliance by third parties to
+this License.
+
+  7. If, as a consequence of a court judgment or allegation of patent
+infringement or for any other reason (not limited to patent issues),
+conditions are imposed on you (whether by court order, agreement or
+otherwise) that contradict the conditions of this License, they do not
+excuse you from the conditions of this License.  If you cannot
+distribute so as to satisfy simultaneously your obligations under this
+License and any other pertinent obligations, then as a consequence you
+may not distribute the Program at all.  For example, if a patent
+license would not permit royalty-free redistribution of the Program by
+all those who receive copies directly or indirectly through you, then
+the only way you could satisfy both it and this License would be to
+refrain entirely from distribution of the Program.
+
+If any portion of this section is held invalid or unenforceable under
+any particular circumstance, the balance of the section is intended to
+apply and the section as a whole is intended to apply in other
+circumstances.
+
+It is not the purpose of this section to induce you to infringe any
+patents or other property right claims or to contest validity of any
+such claims; this section has the sole purpose of protecting the
+integrity of the free software distribution system, which is
+implemented by public license practices.  Many people have made
+generous contributions to the wide range of software distributed
+through that system in reliance on consistent application of that
+system; it is up to the author/donor to decide if he or she is willing
+to distribute software through any other system and a licensee cannot
+impose that choice.
+
+This section is intended to make thoroughly clear what is believed to
+be a consequence of the rest of this License.
+
+  8. If the distribution and/or use of the Program is restricted in
+certain countries either by patents or by copyrighted interfaces, the
+original copyright holder who places the Program under this License
+may add an explicit geographical distribution limitation excluding
+those countries, so that distribution is permitted only in or among
+countries not thus excluded.  In such case, this License incorporates
+the limitation as if written in the body of this License.
+
+  9. The Free Software Foundation may publish revised and/or new versions
+of the General Public License from time to time.  Such new versions will
+be similar in spirit to the present version, but may differ in detail to
+address new problems or concerns.
+
+Each version is given a distinguishing version number.  If the Program
+specifies a version number of this License which applies to it and "any
+later version", you have the option of following the terms and conditions
+either of that version or of any later version published by the Free
+Software Foundation.  If the Program does not specify a version number of
+this License, you may choose any version ever published by the Free Software
+Foundation.
+
+  10. If you wish to incorporate parts of the Program into other free
+programs whose distribution conditions are different, write to the author
+to ask for permission.  For software which is copyrighted by the Free
+Software Foundation, write to the Free Software Foundation; we sometimes
+make exceptions for this.  Our decision will be guided by the two goals
+of preserving the free status of all derivatives of our free software and
+of promoting the sharing and reuse of software generally.
+
+                            NO WARRANTY
+
+  11. BECAUSE THE PROGRAM IS LICENSED FREE OF CHARGE, THERE IS NO WARRANTY
+FOR THE PROGRAM, TO THE EXTENT PERMITTED BY APPLICABLE LAW.  EXCEPT WHEN
+OTHERWISE STATED IN WRITING THE COPYRIGHT HOLDERS AND/OR OTHER PARTIES
+PROVIDE THE PROGRAM "AS IS" WITHOUT WARRANTY OF ANY KIND, EITHER EXPRESSED
+OR IMPLIED, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
+MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE.  THE ENTIRE RISK AS
+TO THE QUALITY AND PERFORMANCE OF THE PROGRAM IS WITH YOU.  SHOULD THE
+PROGRAM PROVE DEFECTIVE, YOU ASSUME THE COST OF ALL NECESSARY SERVICING,
+REPAIR OR CORRECTION.
+
+  12. IN NO EVENT UNLESS REQUIRED BY APPLICABLE LAW OR AGREED TO IN WRITING
+WILL ANY COPYRIGHT HOLDER, OR ANY OTHER PARTY WHO MAY MODIFY AND/OR
+REDISTRIBUTE THE PROGRAM AS PERMITTED ABOVE, BE LIABLE TO YOU FOR DAMAGES,
+INCLUDING ANY GENERAL, SPECIAL, INCIDENTAL OR CONSEQUENTIAL DAMAGES ARISING
+OUT OF THE USE OR INABILITY TO USE THE PROGRAM (INCLUDING BUT NOT LIMITED
+TO LOSS OF DATA OR DATA BEING RENDERED INACCURATE OR LOSSES SUSTAINED BY
+YOU OR THIRD PARTIES OR A FAILURE OF THE PROGRAM TO OPERATE WITH ANY OTHER
+PROGRAMS), EVEN IF SUCH HOLDER OR OTHER PARTY HAS BEEN ADVISED OF THE
+POSSIBILITY OF SUCH DAMAGES.
+
+                     END OF TERMS AND CONDITIONS
+
+            How to Apply These Terms to Your New Programs
+
+  If you develop a new program, and you want it to be of the greatest
+possible use to the public, the best way to achieve this is to make it
+free software which everyone can redistribute and change under these terms.
+
+  To do so, attach the following notices to the program.  It is safest
+to attach them to the start of each source file to most effectively
+convey the exclusion of warranty; and each file should have at least
+the "copyright" line and a pointer to where the full notice is found.
+
+    <one line to give the program's name and a brief idea of what it does.>
+    Copyright (C) <year>  <name of author>
+
+    This program is free software; you can redistribute it and/or modify
+    it under the terms of the GNU General Public License as published by
+    the Free Software Foundation; either version 2 of the License, or
+    (at your option) any later version.
+
+    This program is distributed in the hope that it will be useful,
+    but WITHOUT ANY WARRANTY; without even the implied warranty of
+    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+    GNU General Public License for more details.
+
+    You should have received a copy of the GNU General Public License along
+    with this program; if not, write to the Free Software Foundation, Inc.,
+    51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
+
+Also add information on how to contact you by electronic and paper mail.
+
+If the program is interactive, make it output a short notice like this
+when it starts in an interactive mode:
+
+    Gnomovision version 69, Copyright (C) year name of author
+    Gnomovision comes with ABSOLUTELY NO WARRANTY; for details type `show w'.
+    This is free software, and you are welcome to redistribute it
+    under certain conditions; type `show c' for details.
+
+The hypothetical commands `show w' and `show c' should show the appropriate
+parts of the General Public License.  Of course, the commands you use may
+be called something other than `show w' and `show c'; they could even be
+mouse-clicks or menu items--whatever suits your program.
+
+You should also get your employer (if you work as a programmer) or your
+school, if any, to sign a "copyright disclaimer" for the program, if
+necessary.  Here is a sample; alter the names:
+
+  Yoyodyne, Inc., hereby disclaims all copyright interest in the program
+  `Gnomovision' (which makes passes at compilers) written by James Hacker.
+
+  <signature of Ty Coon>, 1 April 1989
+  Ty Coon, President of Vice
+
+This General Public License does not permit incorporating your program into
+proprietary programs.  If your program is a subroutine library, you may
+consider it more useful to permit linking proprietary applications with the
+library.  If this is what you want to do, use the GNU Lesser General
+Public License instead of this License.
diff --git a/programs/Makefile b/programs/Makefile
new file mode 100644
index 0000000..9d73cb9
--- /dev/null
+++ b/programs/Makefile
@@ -0,0 +1,268 @@
+# ##########################################################################
+# ZSTD programs - Makefile
+# Copyright (C) Yann Collet 2015
+#
+# GPL v2 License
+#
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation; either version 2 of the License, or
+# (at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License along
+# with this program; if not, write to the Free Software Foundation, Inc.,
+# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
+#
+# You can contact the author at :
+#  - ZSTD source repository : http://code.google.com/p/zstd/
+#  - Public forum : https://groups.google.com/forum/#!forum/lz4c
+# ##########################################################################
+# zstd : Command Line Utility, supporting gzip-like arguments
+# datagen : Synthetic and parametrable data generator, for tests
+# fuzzer  : Test tool, to check zstd integrity on target platform
+# fuzzer32: Same as fuzzer, but forced to compile in 32-bits mode
+# fullbench  : Precisely measure speed for each zstd inner function
+# fullbench32: Same as fullbench, but forced to compile in 32-bits mode
+# ##########################################################################
+
+VERSION?= 0.4.0
+
+DESTDIR?=
+PREFIX ?= /usr/local
+CPPFLAGS= -I../lib -DZSTD_VERSION=\"$(VERSION)\"
+CFLAGS ?= -O3  # -falign-loops=32   # not always beneficial
+CFLAGS += -std=c99 -Wall -Wextra -Wundef -Wshadow -Wcast-qual -Wcast-align -Wstrict-prototypes
+FLAGS   = $(CPPFLAGS) $(CFLAGS) $(LDFLAGS) $(MOREFLAGS)
+
+BINDIR  = $(PREFIX)/bin
+MANDIR  = $(PREFIX)/share/man/man1
+ZSTDDIR = ../lib
+
+ZSTD_FILES := $(ZSTDDIR)/zstd_compress.c $(ZSTDDIR)/zstd_decompress.c $(ZSTDDIR)/fse.c $(ZSTDDIR)/huff0.c
+ZSTD_LEGACY:= $(ZSTDDIR)/legacy/zstd_v01.c $(ZSTDDIR)/legacy/zstd_v02.c $(ZSTDDIR)/legacy/zstd_v03.c
+
+ifeq ($(ZSTD_LEGACY),disable)
+CPPFLAGS  += -DZSTD_LEGACY_SUPPORT=0
+else
+ZSTD_FILES+= $(ZSTD_LEGACY)
+CPPFLAGS  += -I../lib/legacy -I./legacy -DZSTD_LEGACY_SUPPORT=1
+ZSTD_FILEIO_LEGACY = legacy/fileio_legacy.c
+endif
+
+
+# Define *.exe as extension for Windows systems
+ifneq (,$(filter Windows%,$(OS)))
+EXT =.exe
+VOID = nul
+else
+EXT =
+VOID = /dev/null
+endif
+
+ZBUFFTEST = -T2mn
+
+.PHONY: default all clean install uninstall test test32 test-all
+
+default: zstd
+
+all: zstd zstd32 fullbench fullbench32 fuzzer fuzzer32 zbufftest zbufftest32 paramgrill datagen
+
+zstd  : $(ZSTD_FILES) $(ZSTDDIR)/zstd_buffered.c \
+        xxhash.c bench.c fileio.c zstdcli.c $(ZSTD_FILEIO_LEGACY)
+	$(CC)      $(FLAGS) $^ -o $@$(EXT)
+
+zstd32: $(ZSTD_FILES) $(ZSTDDIR)/zstd_buffered.c \
+        xxhash.c bench.c fileio.c zstdcli.c $(ZSTD_FILEIO_LEGACY)
+	$(CC) -m32 $(FLAGS) $^ -o $@$(EXT)
+
+fullbench  : $(ZSTD_FILES) \
+        datagen.c fullbench.c
+	$(CC)      $(FLAGS) $^ -o $@$(EXT)
+
+fullbench32: $(ZSTD_FILES) \
+      datagen.c fullbench.c
+	$(CC) -m32 $(FLAGS) $^ -o $@$(EXT)
+
+fuzzer  : $(ZSTD_FILES) \
+      datagen.c xxhash.c fuzzer.c
+	$(CC)      $(FLAGS) $^ -o $@$(EXT)
+
+fuzzer32: $(ZSTD_FILES) \
+      datagen.c xxhash.c fuzzer.c
+	$(CC) -m32 $(FLAGS) $^ -o $@$(EXT)
+
+zbufftest  : $(ZSTD_FILES) $(ZSTDDIR)/zstd_buffered.c \
+      datagen.c xxhash.c zbufftest.c
+	$(CC)      $(FLAGS) $^ -o $@$(EXT)
+
+zbufftest32: $(ZSTD_FILES) $(ZSTDDIR)/zstd_buffered.c \
+      datagen.c xxhash.c zbufftest.c
+	$(CC) -m32 $(FLAGS) $^ -o $@$(EXT)
+
+paramgrill : $(ZSTD_FILES) \
+      datagen.c xxhash.c paramgrill.c
+	$(CC)      $(FLAGS) $^ -lm -o $@$(EXT)
+
+datagen : datagen.c datagencli.c
+	$(CC)      $(FLAGS) $^ -o $@$(EXT)
+
+clean:
+	@rm -f core *.o tmp* \
+        zstd$(EXT) zstd32$(EXT) \
+        fullbench$(EXT) fullbench32$(EXT) \
+        fuzzer$(EXT) fuzzer32$(EXT) zbufftest$(EXT) zbufftest32$(EXT) \
+	datagen$(EXT) paramgrill$(EXT)
+	@echo Cleaning completed
+
+
+#------------------------------------------------------------------------
+#make install is validated only for Linux, OSX, kFreeBSD and Hurd targets
+ifneq (,$(filter $(shell uname),Linux Darwin GNU/kFreeBSD GNU))
+
+install: zstd
+	@echo Installing binaries
+	@install -d -m 755 $(DESTDIR)$(BINDIR)/ $(DESTDIR)$(MANDIR)/
+	@install -m 755 zstd$(EXT) $(DESTDIR)$(BINDIR)/zstd$(EXT)
+	@ln -sf zstd$(EXT) $(DESTDIR)$(BINDIR)/zstdcat
+	@ln -sf zstd$(EXT) $(DESTDIR)$(BINDIR)/unzstd
+	@echo Installing man pages
+	@install -m 644 zstd.1 $(DESTDIR)$(MANDIR)/zstd.1
+	@ln -sf zstd.1 $(DESTDIR)$(MANDIR)/zstdcat.1
+	@ln -sf zstd.1 $(DESTDIR)$(MANDIR)/unzstd.1
+	@echo zstd installation completed
+
+uninstall:
+	rm -f $(DESTDIR)$(BINDIR)/zstdcat
+	rm -f $(DESTDIR)$(BINDIR)/unzstd
+	[ -x $(DESTDIR)$(BINDIR)/zstd$(EXT) ] && rm -f $(DESTDIR)$(BINDIR)/zstd$(EXT)
+	rm -f $(DESTDIR)$(MANDIR)/zstdcat.1
+	rm -f $(DESTDIR)$(MANDIR)/unzstd.1
+	[ -f $(DESTDIR)$(MANDIR)/zstd.1 ] && rm -f $(DESTDIR)$(MANDIR)/zstd.1
+	@echo zstd programs successfully uninstalled
+
+test: test-zstd test-fullbench test-fuzzer test-zbuff
+
+test32: test-zstd32 test-fullbench32 test-fuzzer32 test-zbuff32
+
+test-all: test test32 valgrindTest
+
+zstd-playTests: datagen
+	@echo "\n**** frame concatenation **** "
+	@echo "hello " > hello.tmp
+	@echo "world!" > world.tmp
+	@cat hello.tmp world.tmp > helloworld.tmp
+	$(ZSTD) hello.tmp > hello.zstd
+	$(ZSTD) world.tmp > world.zstd
+	@cat hello.zstd world.zstd > helloworld.zstd
+	$(ZSTD) -df helloworld.zstd > result.tmp
+	cat result.tmp
+	sdiff helloworld.tmp result.tmp
+	@rm *.tmp *.zstd
+	@echo frame concatenation test completed
+	@echo "**** flush write error test **** "
+	echo foo | $(ZSTD) > /dev/full; if [ $$? -eq 0 ] ; then echo "write error not detected!"; false; fi
+	echo foo | $(ZSTD) | $(ZSTD) -d > /dev/full; if [ $$? -eq 0 ] ; then echo "write error not detected!"; false; fi
+	@echo "**** zstd round-trip tests **** "
+	@./datagen             | md5sum > tmp1
+	./datagen              | $(ZSTD) -v    | $(ZSTD) -d  | md5sum > tmp2
+	@diff tmp1 tmp2   
+	./datagen              | $(ZSTD) -6 -v | $(ZSTD) -d  | md5sum > tmp2
+	@diff tmp1 tmp2   
+	@./datagen -g270000000 | md5sum > tmp1
+	./datagen -g270000000  | $(ZSTD) -v    | $(ZSTD) -d  | md5sum > tmp2
+	@diff tmp1 tmp2   
+	./datagen -g270000000  | $(ZSTD) -v2   | $(ZSTD) -d  | md5sum > tmp2
+	@diff tmp1 tmp2   
+	./datagen -g270000000  | $(ZSTD) -v3   | $(ZSTD) -d  | md5sum > tmp2
+	@diff tmp1 tmp2   
+	@./datagen -g140000000 -P60| md5sum > tmp1
+	./datagen -g140000000 -P60 | $(ZSTD) -v4   | $(ZSTD) -d  | md5sum > tmp2
+	@diff tmp1 tmp2   
+	./datagen -g140000000 -P60 | $(ZSTD) -v5   | $(ZSTD) -d  | md5sum > tmp2
+	@diff tmp1 tmp2   
+	./datagen -g140000000 -P60 | $(ZSTD) -v6   | $(ZSTD) -d  | md5sum > tmp2
+	@diff tmp1 tmp2   
+	@./datagen -g70000000 -P70 | md5sum > tmp1
+	./datagen -g70000000 -P70  | $(ZSTD) -v7   | $(ZSTD) -d  | md5sum > tmp2
+	@diff tmp1 tmp2   
+	./datagen -g70000000 -P70  | $(ZSTD) -v8   | $(ZSTD) -d  | md5sum > tmp2
+	@diff tmp1 tmp2   
+	./datagen -g70000000 -P70  | $(ZSTD) -v9   | $(ZSTD) -d  | md5sum > tmp2
+	@diff tmp1 tmp2   
+	@./datagen -g35000000 -P75 | md5sum > tmp1
+	./datagen -g35000000 -P75  | $(ZSTD) -v10  | $(ZSTD) -d  | md5sum > tmp2
+	@diff tmp1 tmp2   
+	./datagen -g35000000 -P75  | $(ZSTD) -v11  | $(ZSTD) -d  | md5sum > tmp2
+	@diff tmp1 tmp2   
+	./datagen -g35000000 -P75  | $(ZSTD) -v12  | $(ZSTD) -d  | md5sum > tmp2
+	@diff tmp1 tmp2   
+	@./datagen -g18000000 -P80 | md5sum > tmp1
+	./datagen -g18000000 -P80  | $(ZSTD) -v13  | $(ZSTD) -d  | md5sum > tmp2
+	@diff tmp1 tmp2   
+	./datagen -g18000000 -P80  | $(ZSTD) -v14  | $(ZSTD) -d  | md5sum > tmp2
+	@diff tmp1 tmp2   
+	./datagen -g18000000 -P80  | $(ZSTD) -v15  | $(ZSTD) -d  | md5sum > tmp2
+	@diff tmp1 tmp2   
+	./datagen -g18000000 -P80  | $(ZSTD) -v16  | $(ZSTD) -d  | md5sum > tmp2
+	@diff tmp1 tmp2   
+	./datagen -g18000000 -P80  | $(ZSTD) -v17  | $(ZSTD) -d  | md5sum > tmp2
+	@diff tmp1 tmp2   
+	@./datagen -g50000000 -P94 | md5sum > tmp1
+	./datagen -g50000000 -P94  | $(ZSTD) -v18  | $(ZSTD) -d  | md5sum > tmp2
+	@diff tmp1 tmp2   
+	./datagen -g50000000 -P94  | $(ZSTD) -v19  | $(ZSTD) -d  | md5sum > tmp2
+	@diff tmp1 tmp2   
+	@./datagen -g99000000 -P99 | md5sum > tmp1
+	./datagen -g99000000 -P99  | $(ZSTD) -v20  | $(ZSTD) -d  | md5sum > tmp2
+	@diff tmp1 tmp2   
+	./datagen -g6000000000 -P99| md5sum > tmp1
+	./datagen -g6000000000 -P99| $(ZSTD) -vq   | $(ZSTD) -d  | md5sum > tmp2
+	@diff tmp1 tmp2   
+
+test-zstd: ZSTD = ./zstd
+test-zstd: zstd zstd-playTests
+
+test-zstd32: ZSTD = ./zstd32
+test-zstd32: zstd32 zstd-playTests
+
+test-fullbench: fullbench datagen
+	./fullbench -i1
+	./fullbench -i1 -P0
+
+test-fullbench32: fullbench32 datagen
+	./fullbench32 -i1
+	./fullbench32 -i1 -P0
+
+test-fuzzer: fuzzer
+	./fuzzer
+
+test-fuzzer32: fuzzer32
+	./fuzzer32
+
+test-zbuff: zbufftest
+	./zbufftest $(ZBUFFTEST)
+
+test-zbuff32: zbufftest32
+	./zbufftest32 $(ZBUFFTEST)
+
+valgrindTest: zstd datagen fuzzer fullbench zbufftest
+	@echo "\n ---- valgrind tests : memory analyzer ----"
+	valgrind --leak-check=yes --error-exitcode=1 ./datagen -g50M > $(VOID)
+	./datagen -g16KB > tmp
+	valgrind --leak-check=yes --error-exitcode=1 ./zstd -vf tmp $(VOID)
+	./datagen -g2930KB > tmp
+	valgrind --leak-check=yes --error-exitcode=1 ./zstd -5 -vf tmp tmp2
+	valgrind --leak-check=yes --error-exitcode=1 ./zstd -vdf tmp2 $(VOID)
+	./datagen -g64MB > tmp
+	valgrind --leak-check=yes --error-exitcode=1 ./zstd -vf tmp $(VOID)
+	@rm tmp
+	valgrind --leak-check=yes --error-exitcode=1 ./fuzzer -i1000 -t1
+	valgrind --leak-check=yes --error-exitcode=1 ./fullbench -i1
+	valgrind --leak-check=yes --error-exitcode=1 ./zbufftest -T1mn
+
+endif
diff --git a/programs/bench.c b/programs/bench.c
new file mode 100644
index 0000000..fa7ec55
--- /dev/null
+++ b/programs/bench.c
@@ -0,0 +1,529 @@
+/*
+    bench.c - Demo module to benchmark open-source compression algorithms
+    Copyright (C) Yann Collet 2012-2015
+
+    GPL v2 License
+
+    This program is free software; you can redistribute it and/or modify
+    it under the terms of the GNU General Public License as published by
+    the Free Software Foundation; either version 2 of the License, or
+    (at your option) any later version.
+
+    This program is distributed in the hope that it will be useful,
+    but WITHOUT ANY WARRANTY; without even the implied warranty of
+    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+    GNU General Public License for more details.
+
+    You should have received a copy of the GNU General Public License along
+    with this program; if not, write to the Free Software Foundation, Inc.,
+    51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
+
+    You can contact the author at :
+    - zstd source repository : https://github.com/Cyan4973/zstd
+    - ztsd public forum : https://groups.google.com/forum/#!forum/lz4c
+*/
+
+/* **************************************
+*  Compiler Options
+****************************************/
+/* Disable some Visual warning messages */
+#define _CRT_SECURE_NO_WARNINGS                  /* fopen */
+
+/* Unix Large Files support (>4GB) */
+#define _FILE_OFFSET_BITS 64
+#if (defined(__sun__) && (!defined(__LP64__)))   /* Sun Solaris 32-bits requires specific definitions */
+#  define _LARGEFILE_SOURCE
+#elif ! defined(__LP64__)                        /* No point defining Large file for 64 bit */
+#  define _LARGEFILE64_SOURCE
+#endif
+
+/* S_ISREG & gettimeofday() are not supported by MSVC */
+#if defined(_MSC_VER) || defined(_WIN32)
+#  define BMK_LEGACY_TIMER 1
+#endif
+
+
+/* *************************************
+*  Includes
+***************************************/
+#include <stdlib.h>      /* malloc, free */
+#include <string.h>      /* memset */
+#include <stdio.h>       /* fprintf, fopen, ftello64 */
+#include <sys/types.h>   /* stat64 */
+#include <sys/stat.h>    /* stat64 */
+
+/* Use ftime() if gettimeofday() is not available */
+#if defined(BMK_LEGACY_TIMER)
+#  include <sys/timeb.h>   /* timeb, ftime */
+#else
+#  include <sys/time.h>    /* gettimeofday */
+#endif
+
+#include "mem.h"
+#include "zstd.h"
+#include "xxhash.h"
+
+
+/* *************************************
+*  Compiler specifics
+***************************************/
+#if !defined(S_ISREG)
+#  define S_ISREG(x) (((x) & S_IFMT) == S_IFREG)
+#endif
+
+
+/* *************************************
+*  Constants
+***************************************/
+#define NBLOOPS    3
+#define TIMELOOP   2500
+
+#define KB *(1 <<10)
+#define MB *(1 <<20)
+#define GB *(1U<<30)
+
+static const size_t maxMemory = sizeof(size_t)==4  ?  (2 GB - 64 MB) : (size_t)(1ULL << ((sizeof(size_t)*8)-31));
+#define DEFAULT_CHUNKSIZE   (4 MB)
+
+static U32 g_compressibilityDefault = 50;
+static U32 prime1 = 2654435761U;
+static U32 prime2 = 2246822519U;
+
+
+/* *************************************
+*  Macros
+***************************************/
+#define DISPLAY(...) fprintf(stderr, __VA_ARGS__)
+
+
+/* *************************************
+*  Benchmark Parameters
+***************************************/
+static int nbIterations = NBLOOPS;
+static size_t g_blockSize = 0;
+
+void BMK_SetNbIterations(int nbLoops)
+{
+    nbIterations = nbLoops;
+    DISPLAY("- %i iterations -\n", nbIterations);
+}
+
+void BMK_SetBlockSize(size_t blockSize)
+{
+    g_blockSize = blockSize;
+    DISPLAY("using blocks of size %u KB \n", (U32)(blockSize>>10));
+}
+
+
+/* ********************************************************
+*  Private functions
+**********************************************************/
+
+#if defined(BMK_LEGACY_TIMER)
+
+static int BMK_GetMilliStart(void)
+{
+  /* Based on Legacy ftime()
+  *  Rolls over every ~ 12.1 days (0x100000/24/60/60)
+  *  Use GetMilliSpan to correct for rollover */
+  struct timeb tb;
+  int nCount;
+  ftime( &tb );
+  nCount = (int) (tb.millitm + (tb.time & 0xfffff) * 1000);
+  return nCount;
+}
+
+#else
+
+static int BMK_GetMilliStart(void)
+{
+  /* Based on newer gettimeofday()
+  *  Use GetMilliSpan to correct for rollover */
+  struct timeval tv;
+  int nCount;
+  gettimeofday(&tv, NULL);
+  nCount = (int) (tv.tv_usec/1000 + (tv.tv_sec & 0xfffff) * 1000);
+  return nCount;
+}
+
+#endif
+
+
+static int BMK_GetMilliSpan( int nTimeStart )
+{
+  int nSpan = BMK_GetMilliStart() - nTimeStart;
+  if ( nSpan < 0 )
+    nSpan += 0x100000 * 1000;
+  return nSpan;
+}
+
+
+/* ********************************************************
+*  Data generator
+**********************************************************/
+/* will hopefully be converted into ROL instruction by compiler */
+static U32 BMK_rotl32(unsigned val32, unsigned nbBits) { return((val32 << nbBits) | (val32 >> (32 - nbBits))); }
+
+static U32 BMK_rand(U32* src)
+{
+    U32 rand32 = *src;
+    rand32 *= prime1;
+    rand32 += prime2;
+    rand32 = BMK_rotl32(rand32, 13);
+    *src = rand32;
+    return rand32 >> 9;
+}
+
+#define BMK_RAND15BITS  ( BMK_rand(&seed) & 0x7FFF)
+#define BMK_RANDLENGTH  ((BMK_rand(&seed) & 3) ? (BMK_rand(&seed) % 15) : (BMK_rand(&seed) % 510) + 15)
+#define BMK_RANDCHAR    (BYTE)((BMK_rand(&seed) & 63) + '0')
+static void BMK_datagen(void* buffer, size_t bufferSize, double proba, U32 seed)
+{
+    BYTE* BBuffer = (BYTE*)buffer;
+    unsigned pos = 0;
+    U32 P32 = (U32)(32768 * proba);
+
+    /* First Byte */
+    BBuffer[pos++] = BMK_RANDCHAR;
+
+    while (pos < bufferSize)
+    {
+        /* Select : Literal (noise) or copy (within 64K) */
+        if (BMK_RAND15BITS < P32)
+        {
+            /* Match */
+            size_t match, end;
+            unsigned length = BMK_RANDLENGTH + 4;
+            unsigned offset = BMK_RAND15BITS + 1;
+            if (offset > pos) offset = pos;
+            match = pos - offset;
+            end = pos + length;
+            if (end > bufferSize) end = bufferSize;
+            while (pos < end) BBuffer[pos++] = BBuffer[match++];
+        }
+        else
+        {
+            /* Literal */
+            size_t end;
+            unsigned length = BMK_RANDLENGTH;
+            end = pos + length;
+            if (end > bufferSize) end = bufferSize;
+            while (pos < end) BBuffer[pos++] = BMK_RANDCHAR;
+        }
+    }
+}
+
+
+/* ********************************************************
+*  Bench functions
+**********************************************************/
+typedef struct
+{
+    char*  srcPtr;
+    size_t srcSize;
+    char*  cPtr;
+    size_t cRoom;
+    size_t cSize;
+    char*  resPtr;
+    size_t resSize;
+} blockParam_t;
+
+typedef size_t (*compressor_t) (void* dst, size_t maxDstSize, const void* src, size_t srcSize, int compressionLevel);
+
+#define MIN(a,b) ((a)<(b) ? (a) : (b))
+
+static int BMK_benchMem(void* srcBuffer, size_t srcSize, const char* fileName, int cLevel)
+{
+    const size_t blockSize = (g_blockSize ? g_blockSize : srcSize) + (!srcSize);   /* avoid div by 0 */
+    const U32 nbBlocks = (U32) ((srcSize + (blockSize-1)) / blockSize);
+    blockParam_t* const blockTable = (blockParam_t*) malloc(nbBlocks * sizeof(blockParam_t));
+    const size_t maxCompressedSize = (size_t)nbBlocks * ZSTD_compressBound(blockSize);
+    void* const compressedBuffer = malloc(maxCompressedSize);
+    void* const resultBuffer = malloc(srcSize);
+    const compressor_t compressor = ZSTD_compress;
+    U64 crcOrig;
+
+    /* init */
+    if (strlen(fileName)>16)
+        fileName += strlen(fileName)-16;
+
+    /* Memory allocation & restrictions */
+    if (!compressedBuffer || !resultBuffer || !blockTable)
+    {
+        DISPLAY("\nError: not enough memory!\n");
+        free(compressedBuffer);
+        free(resultBuffer);
+        free(blockTable);
+        return 12;
+    }
+
+    /* Calculating input Checksum */
+    crcOrig = XXH64(srcBuffer, srcSize, 0);
+
+    /* Init blockTable data */
+    {
+        U32 i;
+        size_t remaining = srcSize;
+        char* srcPtr = (char*)srcBuffer;
+        char* cPtr = (char*)compressedBuffer;
+        char* resPtr = (char*)resultBuffer;
+        for (i=0; i<nbBlocks; i++)
+        {
+            size_t thisBlockSize = MIN(remaining, blockSize);
+            blockTable[i].srcPtr = srcPtr;
+            blockTable[i].cPtr = cPtr;
+            blockTable[i].resPtr = resPtr;
+            blockTable[i].srcSize = thisBlockSize;
+            blockTable[i].cRoom = ZSTD_compressBound(thisBlockSize);
+            srcPtr += thisBlockSize;
+            cPtr += blockTable[i].cRoom;
+            resPtr += thisBlockSize;
+            remaining -= thisBlockSize;
+        }
+    }
+
+    /* warmimg up memory */
+    BMK_datagen(compressedBuffer, maxCompressedSize, 0.10, 1);
+
+    /* Bench */
+    {
+        int loopNb;
+        size_t cSize = 0;
+        double fastestC = 100000000., fastestD = 100000000.;
+        double ratio = 0.;
+        U64 crcCheck = 0;
+
+        DISPLAY("\r%79s\r", "");
+        for (loopNb = 1; loopNb <= nbIterations; loopNb++)
+        {
+            int nbLoops;
+            int milliTime;
+            U32 blockNb;
+
+            /* Compression */
+            DISPLAY("%2i-%-17.17s :%10u ->\r", loopNb, fileName, (U32)srcSize);
+            memset(compressedBuffer, 0xE5, maxCompressedSize);
+
+            nbLoops = 0;
+            milliTime = BMK_GetMilliStart();
+            while (BMK_GetMilliStart() == milliTime);
+            milliTime = BMK_GetMilliStart();
+            while (BMK_GetMilliSpan(milliTime) < TIMELOOP)
+            {
+                for (blockNb=0; blockNb<nbBlocks; blockNb++)
+                    blockTable[blockNb].cSize = compressor(blockTable[blockNb].cPtr,  blockTable[blockNb].cRoom, blockTable[blockNb].srcPtr,blockTable[blockNb].srcSize, cLevel);
+                nbLoops++;
+            }
+            milliTime = BMK_GetMilliSpan(milliTime);
+
+            cSize = 0;
+            for (blockNb=0; blockNb<nbBlocks; blockNb++)
+                cSize += blockTable[blockNb].cSize;
+            if ((double)milliTime < fastestC*nbLoops) fastestC = (double)milliTime / nbLoops;
+            ratio = (double)srcSize / (double)cSize;
+            DISPLAY("%2i-%-17.17s :%10i ->%10i (%5.3f),%6.1f MB/s\r", loopNb, fileName, (int)srcSize, (int)cSize, ratio, (double)srcSize / fastestC / 1000.);
+
+#if 1
+            /* Decompression */
+            memset(resultBuffer, 0xD6, srcSize);
+
+            nbLoops = 0;
+            milliTime = BMK_GetMilliStart();
+            while (BMK_GetMilliStart() == milliTime);
+            milliTime = BMK_GetMilliStart();
+            for ( ; BMK_GetMilliSpan(milliTime) < TIMELOOP; nbLoops++)
+            {
+                for (blockNb=0; blockNb<nbBlocks; blockNb++)
+                    blockTable[blockNb].resSize = ZSTD_decompress(blockTable[blockNb].resPtr, blockTable[blockNb].srcSize,
+                                                                  blockTable[blockNb].cPtr, blockTable[blockNb].cSize);
+            }
+            milliTime = BMK_GetMilliSpan(milliTime);
+
+            if ((double)milliTime < fastestD*nbLoops) fastestD = (double)milliTime / nbLoops;
+            DISPLAY("%2i-%-17.17s :%10i ->%10i (%5.3f),%6.1f MB/s ,%6.1f MB/s\r", loopNb, fileName, (int)srcSize, (int)cSize, ratio, (double)srcSize / fastestC / 1000., (double)srcSize / fastestD / 1000.);
+
+            /* CRC Checking */
+            crcCheck = XXH64(resultBuffer, srcSize, 0);
+            if (crcOrig!=crcCheck)
+            {
+                unsigned u;
+                unsigned eBlockSize = (unsigned)(MIN(65536*2, blockSize));
+                DISPLAY("\n!!! WARNING !!! %14s : Invalid Checksum : %x != %x\n", fileName, (unsigned)crcOrig, (unsigned)crcCheck);
+                for (u=0; u<srcSize; u++)
+                {
+                    if (((BYTE*)srcBuffer)[u] != ((BYTE*)resultBuffer)[u])
+                    {
+                        printf("Decoding error at pos %u (block %u, pos %u) \n", u, u / eBlockSize, u % eBlockSize);
+                        break;
+                    }
+                }
+                break;
+            }
+#endif
+        }
+
+        if (crcOrig == crcCheck)
+            DISPLAY("%2i-%-17.17s :%10i ->%10i (%5.3f),%6.1f MB/s ,%6.1f MB/s \n", cLevel, fileName, (int)srcSize, (int)cSize, ratio, (double)srcSize / fastestC / 1000., (double)srcSize / fastestD / 1000.);
+    }
+
+    /* End cleaning */
+    free(compressedBuffer);
+    free(resultBuffer);
+    return 0;
+}
+
+
+static U64 BMK_GetFileSize(const char* infilename)
+{
+    int r;
+#if defined(_MSC_VER)
+    struct _stat64 statbuf;
+    r = _stat64(infilename, &statbuf);
+#else
+    struct stat statbuf;
+    r = stat(infilename, &statbuf);
+#endif
+    if (r || !S_ISREG(statbuf.st_mode)) return 0;   /* No good... */
+    return (U64)statbuf.st_size;
+}
+
+static size_t BMK_findMaxMem(U64 requiredMem)
+{
+    size_t step = 64 MB;
+    BYTE* testmem = NULL;
+
+    requiredMem = (((requiredMem >> 26) + 1) << 26);
+    requiredMem += 2 * step;
+    if (requiredMem > maxMemory) requiredMem = maxMemory;
+
+    while (!testmem)
+    {
+        requiredMem -= step;
+        testmem = (BYTE*)malloc((size_t)requiredMem);
+    }
+
+    free(testmem);
+    return (size_t)(requiredMem - step);
+}
+
+static int BMK_benchOneFile(const char* inFileName, int cLevel)
+{
+    FILE*  inFile;
+    U64    inFileSize;
+    size_t benchedSize, readSize;
+    void* srcBuffer;
+    int result=0;
+
+    /* Check file existence */
+    inFile = fopen(inFileName, "rb");
+    if (inFile == NULL)
+    {
+        DISPLAY("Pb opening %s\n", inFileName);
+        return 11;
+    }
+
+    /* Memory allocation & restrictions */
+    inFileSize = BMK_GetFileSize(inFileName);
+    benchedSize = BMK_findMaxMem(inFileSize * 3) / 3;
+    if ((U64)benchedSize > inFileSize) benchedSize = (size_t)inFileSize;
+    if (benchedSize < inFileSize)
+        DISPLAY("Not enough memory for '%s' full size; testing %i MB only...\n", inFileName, (int)(benchedSize >> 20));
+    srcBuffer = malloc(benchedSize);
+    if (!srcBuffer)
+    {
+        DISPLAY("\nError: not enough memory!\n");
+        fclose(inFile);
+        return 12;
+    }
+
+    /* Fill input buffer */
+    DISPLAY("Loading %s...       \r", inFileName);
+    readSize = fread(srcBuffer, 1, benchedSize, inFile);
+    fclose(inFile);
+
+    if (readSize != benchedSize)
+    {
+        DISPLAY("\nError: problem reading file '%s' !!    \n", inFileName);
+        free(srcBuffer);
+        return 13;
+    }
+
+    /* Bench */
+    if (cLevel<0)
+    {
+        int l;
+        for (l=1; l <= -cLevel; l++)
+            result = BMK_benchMem(srcBuffer, benchedSize, inFileName, l);
+    }
+    else
+        result = BMK_benchMem(srcBuffer, benchedSize, inFileName, cLevel);
+
+    /* clean up */
+    free(srcBuffer);
+    DISPLAY("\n");
+    return result;
+}
+
+
+static int BMK_syntheticTest(int cLevel, double compressibility)
+{
+    size_t benchedSize = 10000000;
+    void* srcBuffer = malloc(benchedSize);
+    int result=0;
+    char name[20] = {0};
+
+    /* Memory allocation */
+    if (!srcBuffer)
+    {
+        DISPLAY("\nError: not enough memory!\n");
+        free(srcBuffer);
+        return 12;
+    }
+
+    /* Fill input buffer */
+    BMK_datagen(srcBuffer, benchedSize, compressibility, 0);
+
+    /* Bench */
+#ifdef _MSC_VER
+    sprintf_s(name, 20, "Synthetic %2u%%", (unsigned)(compressibility*100));
+#else
+    snprintf (name, 20, "Synthetic %2u%%", (unsigned)(compressibility*100));
+#endif
+    /* Bench */
+    if (cLevel<0)
+    {
+        int l;
+        for (l=1; l <= -cLevel; l++)
+            result = BMK_benchMem(srcBuffer, benchedSize, name, l);
+    }
+    else
+        result = BMK_benchMem(srcBuffer, benchedSize, name, cLevel);
+
+    /* End */
+    free(srcBuffer);
+    DISPLAY("\n");
+    return result;
+}
+
+
+int BMK_benchFiles(const char** fileNamesTable, unsigned nbFiles, unsigned cLevel)
+{
+    double compressibility = (double)g_compressibilityDefault / 100;
+
+    if (nbFiles == 0)
+    {
+        BMK_syntheticTest(cLevel, compressibility);
+    }
+    else
+    {
+        /* Loop for each file */
+        unsigned fileIdx = 0;
+        while (fileIdx<nbFiles)
+        {
+            BMK_benchOneFile(fileNamesTable[fileIdx], cLevel);
+            fileIdx++;
+        }
+    }
+    return 0;
+}
+
diff --git a/programs/bench.h b/programs/bench.h
new file mode 100644
index 0000000..3cc67b4
--- /dev/null
+++ b/programs/bench.h
@@ -0,0 +1,35 @@
+/*
+    bench.h - Demo program to benchmark open-source compression algorithm
+    Copyright (C) Yann Collet 2012-2015
+
+    GPL v2 License
+
+    This program is free software; you can redistribute it and/or modify
+    it under the terms of the GNU General Public License as published by
+    the Free Software Foundation; either version 2 of the License, or
+    (at your option) any later version.
+
+    This program is distributed in the hope that it will be useful,
+    but WITHOUT ANY WARRANTY; without even the implied warranty of
+    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+    GNU General Public License for more details.
+
+    You should have received a copy of the GNU General Public License along
+    with this program; if not, write to the Free Software Foundation, Inc.,
+    51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
+
+    You can contact the author at :
+    - LZ4 source repository : http://code.google.com/p/lz4/
+    - LZ4 public forum : https://group.google.com/forum/#!forum/lz4c
+*/
+#pragma once
+
+
+/* Main function */
+int BMK_benchFiles(const char** fileNamesTable, unsigned nbFiles, unsigned cLevel);
+
+/* Set Parameters */
+void BMK_SetNbIterations(int nbLoops);
+void BMK_SetBlockSize(size_t blockSize);
+
+
diff --git a/programs/datagen.c b/programs/datagen.c
new file mode 100644
index 0000000..f5aa9c5
--- /dev/null
+++ b/programs/datagen.c
@@ -0,0 +1,234 @@
+/*
+    datagen.c - compressible data generator test tool
+    Copyright (C) Yann Collet 2012-2015
+
+    GPL v2 License
+
+    This program is free software; you can redistribute it and/or modify
+    it under the terms of the GNU General Public License as published by
+    the Free Software Foundation; either version 2 of the License, or
+    (at your option) any later version.
+
+    This program is distributed in the hope that it will be useful,
+    but WITHOUT ANY WARRANTY; without even the implied warranty of
+    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+    GNU General Public License for more details.
+
+    You should have received a copy of the GNU General Public License along
+    with this program; if not, write to the Free Software Foundation, Inc.,
+    51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
+
+    You can contact the author at :
+   - ZSTD source repository : https://github.com/Cyan4973/zstd
+   - Public forum : https://groups.google.com/forum/#!forum/lz4c
+*/
+
+/**************************************
+*  Includes
+**************************************/
+#include <stdlib.h>    /* malloc */
+#include <stdio.h>     /* FILE, fwrite */
+#include <string.h>    /* memcpy */
+
+
+/**************************************
+*  Basic Types
+**************************************/
+#if defined (__STDC_VERSION__) && (__STDC_VERSION__ >= 199901L)   /* C99 */
+# include <stdint.h>
+  typedef  uint8_t BYTE;
+  typedef uint16_t U16;
+  typedef uint32_t U32;
+  typedef  int32_t S32;
+  typedef uint64_t U64;
+#else
+  typedef unsigned char       BYTE;
+  typedef unsigned short      U16;
+  typedef unsigned int        U32;
+  typedef   signed int        S32;
+  typedef unsigned long long  U64;
+#endif
+
+
+/**************************************
+*  OS-specific Includes
+**************************************/
+#if defined(MSDOS) || defined(OS2) || defined(WIN32) || defined(_WIN32) || defined(__CYGWIN__)
+#  include <fcntl.h>   /* _O_BINARY */
+#  include <io.h>      /* _setmode, _isatty */
+#  define SET_BINARY_MODE(file) {int unused = _setmode(_fileno(file), _O_BINARY); (void)unused; }
+#else
+#  define SET_BINARY_MODE(file)
+#endif
+
+
+/**************************************
+*  Constants
+**************************************/
+#define KB *(1 <<10)
+
+#define PRIME1   2654435761U
+#define PRIME2   2246822519U
+
+
+/**************************************
+*  Local types
+**************************************/
+#define LTLOG 13
+#define LTSIZE (1<<LTLOG)
+#define LTMASK (LTSIZE-1)
+typedef BYTE litDistribTable[LTSIZE];
+
+
+
+/*********************************************************
+*  Local Functions
+*********************************************************/
+#define RDG_rotl32(x,r) ((x << r) | (x >> (32 - r)))
+static unsigned int RDG_rand(U32* src)
+{
+    U32 rand32 = *src;
+    rand32 *= PRIME1;
+    rand32 ^= PRIME2;
+    rand32  = RDG_rotl32(rand32, 13);
+    *src = rand32;
+    return rand32;
+}
+
+
+static void RDG_fillLiteralDistrib(litDistribTable lt, double ld)
+{
+    U32 i = 0;
+    BYTE character = '0';
+    BYTE firstChar = '(';
+    BYTE lastChar = '}';
+
+    if (ld==0.0)
+    {
+        character = 0;
+        firstChar = 0;
+        lastChar =255;
+    }
+    while (i<LTSIZE)
+    {
+        U32 weight = (U32)((double)(LTSIZE - i) * ld) + 1;
+        U32 end;
+        if (weight + i > LTSIZE) weight = LTSIZE-i;
+        end = i + weight;
+        while (i < end) lt[i++] = character;
+        character++;
+        if (character > lastChar) character = firstChar;
+    }
+}
+
+
+static BYTE RDG_genChar(U32* seed, const litDistribTable lt)
+{
+    U32 id = RDG_rand(seed) & LTMASK;
+    return (lt[id]);
+}
+
+
+#define RDG_RAND15BITS  ((RDG_rand(seed) >> 3) & 0x7FFF)
+#define RDG_RANDLENGTH  ( ((RDG_rand(seed) >> 7) & 7) ? (RDG_rand(seed) & 15) : (RDG_rand(seed) & 511) + 15)
+void RDG_genBlock(void* buffer, size_t buffSize, size_t prefixSize, double matchProba, litDistribTable lt, unsigned* seedPtr)
+{
+    BYTE* buffPtr = (BYTE*)buffer;
+    const U32 matchProba32 = (U32)(32768 * matchProba);
+    size_t pos = prefixSize;
+    U32* seed = seedPtr;
+    U32 prevOffset = 1;
+
+    /* special case : sparse content */
+    while (matchProba >= 1.0)
+    {
+        size_t size0 = RDG_rand(seed) & 3;
+        size0  = (size_t)1 << (16 + size0 * 2);
+        size0 += RDG_rand(seed) & (size0-1);   /* because size0 is power of 2*/
+        if (buffSize < pos + size0)
+        {
+            memset(buffPtr+pos, 0, buffSize-pos);
+            return;
+        }
+        memset(buffPtr+pos, 0, size0);
+        pos += size0;
+        buffPtr[pos-1] = RDG_genChar(seed, lt);
+        continue;
+    }
+
+    /* init */
+    if (pos==0) buffPtr[0] = RDG_genChar(seed, lt), pos=1;
+
+    /* Generate compressible data */
+    while (pos < buffSize)
+    {
+        /* Select : Literal (char) or Match (within 32K) */
+        if (RDG_RAND15BITS < matchProba32)
+        {
+            /* Copy (within 32K) */
+            size_t match;
+            size_t d;
+            int length = RDG_RANDLENGTH + 4;
+            U32 offset = RDG_RAND15BITS + 1;
+            U32 repeatOffset = (RDG_rand(seed) & 15) == 2;
+            if (repeatOffset) offset = prevOffset;
+            if (offset > pos) offset = (U32)pos;
+            match = pos - offset;
+            d = pos + length;
+            if (d > buffSize) d = buffSize;
+            while (pos < d) buffPtr[pos++] = buffPtr[match++];   /* correctly manages overlaps */
+        }
+        else
+        {
+            /* Literal (noise) */
+            size_t d;
+            size_t length = RDG_RANDLENGTH;
+            d = pos + length;
+            if (d > buffSize) d = buffSize;
+            while (pos < d) buffPtr[pos++] = RDG_genChar(seed, lt);
+        }
+    }
+}
+
+
+void RDG_genBuffer(void* buffer, size_t size, double matchProba, double litProba, unsigned seed)
+{
+    litDistribTable lt;
+    if (litProba==0.0) litProba = matchProba / 4.5;
+    RDG_fillLiteralDistrib(lt, litProba);
+    RDG_genBlock(buffer, size, 0, matchProba, lt, &seed);
+}
+
+
+#define RDG_DICTSIZE  (32 KB)
+#define RDG_BLOCKSIZE (128 KB)
+void RDG_genStdout(unsigned long long size, double matchProba, double litProba, unsigned seed)
+{
+    BYTE* buff = (BYTE*)malloc(RDG_DICTSIZE + RDG_BLOCKSIZE);
+    U64 total = 0;
+    size_t genBlockSize = RDG_BLOCKSIZE;
+    litDistribTable lt;
+
+    /* init */
+    if (buff==NULL) { fprintf(stdout, "not enough memory\n"); exit(1); }
+    if (litProba==0.0) litProba = matchProba / 4.5;
+    RDG_fillLiteralDistrib(lt, litProba);
+    SET_BINARY_MODE(stdout);
+
+    /* Generate initial dict */
+    RDG_genBlock(buff, RDG_DICTSIZE, 0, matchProba, lt, &seed);
+
+    /* Generate compressible data */
+    while (total < size)
+    {
+        RDG_genBlock(buff, RDG_DICTSIZE+RDG_BLOCKSIZE, RDG_DICTSIZE, matchProba, lt, &seed);
+        if (size-total < RDG_BLOCKSIZE) genBlockSize = (size_t)(size-total);
+        total += genBlockSize;
+        fwrite(buff, 1, genBlockSize, stdout);
+        /* update dict */
+        memcpy(buff, buff + RDG_BLOCKSIZE, RDG_DICTSIZE);
+    }
+
+    // cleanup
+    free(buff);
+}
diff --git a/programs/datagen.h b/programs/datagen.h
new file mode 100644
index 0000000..03b06ca
--- /dev/null
+++ b/programs/datagen.h
@@ -0,0 +1,40 @@
+/*
+    datagen.h - compressible data generator header
+    Copyright (C) Yann Collet 2012-2015
+
+    GPL v2 License
+
+    This program is free software; you can redistribute it and/or modify
+    it under the terms of the GNU General Public License as published by
+    the Free Software Foundation; either version 2 of the License, or
+    (at your option) any later version.
+
+    This program is distributed in the hope that it will be useful,
+    but WITHOUT ANY WARRANTY; without even the implied warranty of
+    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+    GNU General Public License for more details.
+
+    You should have received a copy of the GNU General Public License along
+    with this program; if not, write to the Free Software Foundation, Inc.,
+    51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
+
+    You can contact the author at :
+   - ZSTD source repository : https://github.com/Cyan4973/zstd
+   - Public forum : https://groups.google.com/forum/#!forum/lz4c
+*/
+
+
+#include <stddef.h>   /* size_t */
+
+void RDG_genStdout(unsigned long long size, double matchProba, double litProba, unsigned seed);
+void RDG_genBuffer(void* buffer, size_t size, double matchProba, double litProba, unsigned seed);
+/* RDG_genBuffer
+   Generate 'size' bytes of compressible data into 'buffer'.
+   Compressibility can be controlled using 'matchProba', which is floating point value between 0 and 1.
+   'LitProba' is optional, it affect variability of individual bytes. If litProba==0.0, default value will be used.
+   Generated data pattern can be modified using different 'seed'.
+   For a triplet (matchProba, litProba, seed), the function always generate the same content.
+
+   RDG_genStdout
+   Same as RDG_genBuffer, but generates data into stdout
+*/
diff --git a/programs/datagencli.c b/programs/datagencli.c
new file mode 100644
index 0000000..ce581ba
--- /dev/null
+++ b/programs/datagencli.c
@@ -0,0 +1,190 @@
+/*
+    datagencli.c
+    compressible data command line generator
+    Copyright (C) Yann Collet 2012-2015
+
+    GPL v2 License
+
+    This program is free software; you can redistribute it and/or modify
+    it under the terms of the GNU General Public License as published by
+    the Free Software Foundation; either version 2 of the License, or
+    (at your option) any later version.
+
+    This program is distributed in the hope that it will be useful,
+    but WITHOUT ANY WARRANTY; without even the implied warranty of
+    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+    GNU General Public License for more details.
+
+    You should have received a copy of the GNU General Public License along
+    with this program; if not, write to the Free Software Foundation, Inc.,
+    51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
+
+    You can contact the author at :
+   - ZSTD source repository : https://github.com/Cyan4973/zstd
+   - Public forum : https://groups.google.com/forum/#!forum/lz4c
+*/
+
+/**************************************
+*  Includes
+**************************************/
+#include <stdio.h>     /* fprintf, stderr */
+#include "datagen.h"   /* RDG_generate */
+
+
+/**************************************
+*  Basic Types
+**************************************/
+#if defined (__STDC_VERSION__) && (__STDC_VERSION__ >= 199901L)   /* C99 */
+# include <stdint.h>
+  typedef  uint8_t BYTE;
+  typedef uint16_t U16;
+  typedef uint32_t U32;
+  typedef  int32_t S32;
+  typedef uint64_t U64;
+#else
+  typedef unsigned char       BYTE;
+  typedef unsigned short      U16;
+  typedef unsigned int        U32;
+  typedef   signed int        S32;
+  typedef unsigned long long  U64;
+#endif
+
+
+/**************************************
+*  Constants
+**************************************/
+#ifndef ZSTD_VERSION
+#  define ZSTD_VERSION "r1"
+#endif
+
+#define KB *(1 <<10)
+#define MB *(1 <<20)
+#define GB *(1U<<30)
+
+#define SIZE_DEFAULT (64 KB)
+#define SEED_DEFAULT 0
+#define COMPRESSIBILITY_DEFAULT 50
+
+
+/**************************************
+*  Macros
+**************************************/
+#define DISPLAY(...)         fprintf(stderr, __VA_ARGS__)
+#define DISPLAYLEVEL(l, ...) if (displayLevel>=l) { DISPLAY(__VA_ARGS__); }
+static unsigned displayLevel = 2;
+
+
+/*********************************************************
+*  Command line
+*********************************************************/
+static int usage(char* programName)
+{
+    DISPLAY( "Compressible data generator\n");
+    DISPLAY( "Usage :\n");
+    DISPLAY( "      %s [args]\n", programName);
+    DISPLAY( "\n");
+    DISPLAY( "Arguments :\n");
+    DISPLAY( " -g#    : generate # data (default:%i)\n", SIZE_DEFAULT);
+    DISPLAY( " -s#    : Select seed (default:%i)\n", SEED_DEFAULT);
+    DISPLAY( " -P#    : Select compressibility in %% (default:%i%%)\n", COMPRESSIBILITY_DEFAULT);
+    DISPLAY( " -h     : display help and exit\n");
+    return 0;
+}
+
+
+int main(int argc, char** argv)
+{
+    int argNb;
+    double proba = (double)COMPRESSIBILITY_DEFAULT / 100;
+    double litProba = 0.0;
+    U64 size = SIZE_DEFAULT;
+    U32 seed = SEED_DEFAULT;
+    char* programName;
+
+    /* Check command line */
+    programName = argv[0];
+    for(argNb=1; argNb<argc; argNb++)
+    {
+        char* argument = argv[argNb];
+
+        if(!argument) continue;   /* Protection if argument empty */
+
+        /* Handle commands. Aggregated commands are allowed */
+        if (*argument=='-')
+        {
+            argument++;
+            while (*argument!=0)
+            {
+                switch(*argument)
+                {
+                case 'h':
+                    return usage(programName);
+                case 'g':
+                    argument++;
+                    size=0;
+                    while ((*argument>='0') && (*argument<='9'))
+                    {
+                        size *= 10;
+                        size += *argument - '0';
+                        argument++;
+                    }
+                    if (*argument=='K') { size <<= 10; argument++; }
+                    if (*argument=='M') { size <<= 20; argument++; }
+                    if (*argument=='G') { size <<= 30; argument++; }
+                    if (*argument=='B') { argument++; }
+                    break;
+                case 's':
+                    argument++;
+                    seed=0;
+                    while ((*argument>='0') && (*argument<='9'))
+                    {
+                        seed *= 10;
+                        seed += *argument - '0';
+                        argument++;
+                    }
+                    break;
+                case 'P':
+                    argument++;
+                    proba=0.0;
+                    while ((*argument>='0') && (*argument<='9'))
+                    {
+                        proba *= 10;
+                        proba += *argument - '0';
+                        argument++;
+                    }
+                    if (proba>100.) proba=100.;
+                    proba /= 100.;
+                    break;
+                case 'L':   /* hidden argument : Literal distribution probability */
+                    argument++;
+                    litProba=0.;
+                    while ((*argument>='0') && (*argument<='9'))
+                    {
+                        litProba *= 10;
+                        litProba += *argument - '0';
+                        argument++;
+                    }
+                    if (litProba>100.) litProba=100.;
+                    litProba /= 100.;
+                    break;
+                case 'v':
+                    displayLevel = 4;
+                    argument++;
+                    break;
+                default:
+                    return usage(programName);
+                }
+            }
+
+        }
+    }
+
+    DISPLAYLEVEL(4, "Data Generator %s \n", ZSTD_VERSION);
+    DISPLAYLEVEL(3, "Seed = %u \n", seed);
+    if (proba!=COMPRESSIBILITY_DEFAULT) DISPLAYLEVEL(3, "Compressibility : %i%%\n", (U32)(proba*100));
+
+    RDG_genStdout(size, proba, litProba, seed);
+    DISPLAYLEVEL(1, "\n");
+
+    return 0;
+}
diff --git a/programs/fileio.c b/programs/fileio.c
new file mode 100644
index 0000000..e84d633
--- /dev/null
+++ b/programs/fileio.c
@@ -0,0 +1,419 @@
+/*
+  fileio.c - File i/o handler
+  Copyright (C) Yann Collet 2013-2015
+
+  GPL v2 License
+
+  This program is free software; you can redistribute it and/or modify
+  it under the terms of the GNU General Public License as published by
+  the Free Software Foundation; either version 2 of the License, or
+  (at your option) any later version.
+
+  This program is distributed in the hope that it will be useful,
+  but WITHOUT ANY WARRANTY; without even the implied warranty of
+  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+  GNU General Public License for more details.
+
+  You should have received a copy of the GNU General Public License along
+  with this program; if not, write to the Free Software Foundation, Inc.,
+  51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
+
+  You can contact the author at :
+  - zstd source repository : https://github.com/Cyan4973/zstd
+  - Public forum : https://groups.google.com/forum/#!forum/lz4c
+*/
+/*
+  Note : this is stand-alone program.
+  It is not part of ZSTD compression library, it is a user program of ZSTD library.
+  The license of ZSTD library is BSD.
+  The license of this file is GPLv2.
+*/
+
+/* *************************************
+*  Tuning options
+***************************************/
+#ifndef ZSTD_LEGACY_SUPPORT
+/**LEGACY_SUPPORT :
+*  decompressor can decode older formats (starting from Zstd 0.1+) */
+#  define ZSTD_LEGACY_SUPPORT 1
+#endif
+
+
+/* *************************************
+*  Compiler Options
+***************************************/
+/* Disable some Visual warning messages */
+#ifdef _MSC_VER
+#  define _CRT_SECURE_NO_WARNINGS
+#  define _CRT_SECURE_NO_DEPRECATE     /* VS2005 */
+#  pragma warning(disable : 4127)      /* disable: C4127: conditional expression is constant */
+#endif
+
+#define _FILE_OFFSET_BITS 64   /* Large file support on 32-bits unix */
+#define _POSIX_SOURCE 1        /* enable fileno() within <stdio.h> on unix */
+
+
+/* *************************************
+*  Includes
+***************************************/
+#include <stdio.h>      /* fprintf, fopen, fread, _fileno, stdin, stdout */
+#include <stdlib.h>     /* malloc, free */
+#include <string.h>     /* strcmp, strlen */
+#include <time.h>       /* clock */
+#include <errno.h>      /* errno */
+#include <sys/types.h>  /* stat64 */
+#include <sys/stat.h>   /* stat64 */
+#include "mem.h"
+#include "fileio.h"
+#include "zstd_static.h"   /* ZSTD_magicNumber */
+#include "zstd_buffered_static.h"
+
+#if defined(ZSTD_LEGACY_SUPPORT) && (ZSTD_LEGACY_SUPPORT==1)
+#  include "zstd_legacy.h"    /* legacy */
+#  include "fileio_legacy.h"  /* legacy */
+#endif
+
+
+/* *************************************
+*  OS-specific Includes
+***************************************/
+#if defined(MSDOS) || defined(OS2) || defined(WIN32) || defined(_WIN32) || defined(__CYGWIN__)
+#  include <fcntl.h>    /* _O_BINARY */
+#  include <io.h>       /* _setmode, _isatty */
+#  ifdef __MINGW32__
+   // int _fileno(FILE *stream);   /* seems no longer useful /* MINGW somehow forgets to include this windows declaration into <stdio.h> */
+#  endif
+#  define SET_BINARY_MODE(file) { int unused = _setmode(_fileno(file), _O_BINARY); (void)unused; }
+#  define IS_CONSOLE(stdStream) _isatty(_fileno(stdStream))
+#else
+#  include <unistd.h>   /* isatty */
+#  define SET_BINARY_MODE(file)
+#  define IS_CONSOLE(stdStream) isatty(fileno(stdStream))
+#endif
+
+#if !defined(S_ISREG)
+#  define S_ISREG(x) (((x) & S_IFMT) == S_IFREG)
+#endif
+
+
+/* *************************************
+*  Constants
+***************************************/
+#define KB *(1U<<10)
+#define MB *(1U<<20)
+#define GB *(1U<<30)
+
+#define _1BIT  0x01
+#define _2BITS 0x03
+#define _3BITS 0x07
+#define _4BITS 0x0F
+#define _6BITS 0x3F
+#define _8BITS 0xFF
+
+#define BIT6  0x40
+#define BIT7  0x80
+
+#define BLOCKSIZE      (128 KB)
+#define ROLLBUFFERSIZE (BLOCKSIZE*8*64)
+
+#define FIO_FRAMEHEADERSIZE 5        /* as a define, because needed to allocated table on stack */
+#define FSE_CHECKSUM_SEED        0
+
+#define CACHELINE 64
+
+
+/* *************************************
+*  Macros
+***************************************/
+#define DISPLAY(...)         fprintf(stderr, __VA_ARGS__)
+#define DISPLAYLEVEL(l, ...) if (g_displayLevel>=l) { DISPLAY(__VA_ARGS__); }
+static U32 g_displayLevel = 2;   /* 0 : no display;   1: errors;   2 : + result + interaction + warnings;   3 : + progression;   4 : + information */
+
+#define DISPLAYUPDATE(l, ...) if (g_displayLevel>=l) { \
+            if ((FIO_GetMilliSpan(g_time) > refreshRate) || (g_displayLevel>=4)) \
+            { g_time = clock(); DISPLAY(__VA_ARGS__); \
+            if (g_displayLevel>=4) fflush(stdout); } }
+static const unsigned refreshRate = 150;
+static clock_t g_time = 0;
+
+
+/* *************************************
+*  Local Parameters
+***************************************/
+static U32 g_overwrite = 0;
+
+void FIO_overwriteMode(void) { g_overwrite=1; }
+void FIO_setNotificationLevel(unsigned level) { g_displayLevel=level; }
+
+
+/* *************************************
+*  Exceptions
+***************************************/
+#ifndef DEBUG
+#  define DEBUG 0
+#endif
+#define DEBUGOUTPUT(...) if (DEBUG) DISPLAY(__VA_ARGS__);
+#define EXM_THROW(error, ...)                                             \
+{                                                                         \
+    DEBUGOUTPUT("Error defined at %s, line %i : \n", __FILE__, __LINE__); \
+    DISPLAYLEVEL(1, "Error %i : ", error);                                \
+    DISPLAYLEVEL(1, __VA_ARGS__);                                         \
+    DISPLAYLEVEL(1, "\n");                                                \
+    exit(error);                                                          \
+}
+
+
+/* *************************************
+*  Functions
+***************************************/
+static unsigned FIO_GetMilliSpan(clock_t nPrevious)
+{
+    clock_t nCurrent = clock();
+    unsigned nSpan = (unsigned)(((nCurrent - nPrevious) * 1000) / CLOCKS_PER_SEC);
+    return nSpan;
+}
+
+
+static void FIO_getFileHandles(FILE** pfinput, FILE** pfoutput, const char* input_filename, const char* output_filename)
+{
+    if (!strcmp (input_filename, stdinmark))
+    {
+        DISPLAYLEVEL(4,"Using stdin for input\n");
+        *pfinput = stdin;
+        SET_BINARY_MODE(stdin);
+    }
+    else
+    {
+        *pfinput = fopen(input_filename, "rb");
+    }
+
+    if (!strcmp (output_filename, stdoutmark))
+    {
+        DISPLAYLEVEL(4,"Using stdout for output\n");
+        *pfoutput = stdout;
+        SET_BINARY_MODE(stdout);
+    }
+    else
+    {
+        /* Check if destination file already exists */
+        *pfoutput=0;
+        if (strcmp(output_filename,nulmark)) *pfoutput = fopen( output_filename, "rb" );
+        if (*pfoutput!=0)
+        {
+            fclose(*pfoutput);
+            if (!g_overwrite)
+            {
+                char ch;
+                if (g_displayLevel <= 1)   /* No interaction possible */
+                    EXM_THROW(11, "Operation aborted : %s already exists", output_filename);
+                DISPLAYLEVEL(2, "Warning : %s already exists\n", output_filename);
+                DISPLAYLEVEL(2, "Overwrite ? (Y/N) : ");
+                ch = (char)getchar();
+                if ((ch!='Y') && (ch!='y')) EXM_THROW(11, "Operation aborted : %s already exists", output_filename);
+            }
+        }
+        *pfoutput = fopen( output_filename, "wb" );
+    }
+
+    if ( *pfinput==0 ) EXM_THROW(12, "Pb opening src : %s", input_filename);
+    if ( *pfoutput==0) EXM_THROW(13, "Pb opening dst : %s", output_filename);
+}
+
+
+static U64 FIO_getFileSize(const char* infilename)
+{
+    int r;
+#if defined(_MSC_VER)
+    struct _stat64 statbuf;
+    r = _stat64(infilename, &statbuf);
+#else
+    struct stat statbuf;
+    r = stat(infilename, &statbuf);
+#endif
+    if (r || !S_ISREG(statbuf.st_mode)) return 0;
+    return (U64)statbuf.st_size;
+}
+
+
+unsigned long long FIO_compressFilename(const char* output_filename, const char* input_filename, int cLevel)
+{
+    U64 filesize = 0;
+    U64 compressedfilesize = 0;
+    BYTE* inBuff;
+    BYTE* outBuff;
+    size_t inBuffSize = ZBUFF_recommendedCInSize();
+    size_t outBuffSize = ZBUFF_recommendedCOutSize();
+    FILE* finput;
+    FILE* foutput;
+    size_t sizeCheck, errorCode;
+    ZBUFF_CCtx* ctx;
+
+    /* Allocate Memory */
+    ctx = ZBUFF_createCCtx();
+    inBuff  = (BYTE*)malloc(inBuffSize);
+    outBuff = (BYTE*)malloc(outBuffSize);
+    if (!inBuff || !outBuff || !ctx) EXM_THROW(21, "Allocation error : not enough memory");
+
+    /* init */
+    FIO_getFileHandles(&finput, &foutput, input_filename, output_filename);
+    filesize = FIO_getFileSize(input_filename);
+    errorCode = ZBUFF_compressInit_advanced(ctx, ZSTD_getParams(cLevel, filesize));
+    if (ZBUFF_isError(errorCode)) EXM_THROW(22, "Error initializing compression");
+    filesize = 0;
+
+    /* Main compression loop */
+    while (1)
+    {
+        size_t inSize;
+
+        /* Fill input Buffer */
+        inSize = fread(inBuff, (size_t)1, inBuffSize, finput);
+        if (inSize==0) break;
+        filesize += inSize;
+        DISPLAYUPDATE(2, "\rRead : %u MB  ", (U32)(filesize>>20));
+
+        {
+            /* Compress (buffered streaming ensures appropriate formatting) */
+            size_t usedInSize = inSize;
+            size_t cSize = outBuffSize;
+            size_t result = ZBUFF_compressContinue(ctx, outBuff, &cSize, inBuff, &usedInSize);
+            if (ZBUFF_isError(result))
+                EXM_THROW(23, "Compression error : %s ", ZBUFF_getErrorName(result));
+            if (inSize != usedInSize)
+                /* inBuff should be entirely consumed since buffer sizes are recommended ones */
+                EXM_THROW(24, "Compression error : input block not fully consumed");
+
+            /* Write cBlock */
+            sizeCheck = fwrite(outBuff, 1, cSize, foutput);
+            if (sizeCheck!=cSize) EXM_THROW(25, "Write error : cannot write compressed block into %s", output_filename);
+            compressedfilesize += cSize;
+        }
+
+        DISPLAYUPDATE(2, "\rRead : %u MB  ==> %.2f%%   ", (U32)(filesize>>20), (double)compressedfilesize/filesize*100);
+    }
+
+    /* End of Frame */
+    {
+        size_t cSize = outBuffSize;
+        size_t result = ZBUFF_compressEnd(ctx, outBuff, &cSize);
+        if (result!=0) EXM_THROW(26, "Compression error : cannot create frame end");
+
+        sizeCheck = fwrite(outBuff, 1, cSize, foutput);
+        if (sizeCheck!=cSize) EXM_THROW(27, "Write error : cannot write frame end into %s", output_filename);
+        compressedfilesize += cSize;
+    }
+
+    /* Status */
+    DISPLAYLEVEL(2, "\r%79s\r", "");
+    DISPLAYLEVEL(2,"Compressed %llu bytes into %llu bytes ==> %.2f%%\n",
+        (unsigned long long) filesize, (unsigned long long) compressedfilesize, (double)compressedfilesize/filesize*100);
+
+    /* clean */
+    free(inBuff);
+    free(outBuff);
+    ZBUFF_freeCCtx(ctx);
+    fclose(finput);
+    if (fclose(foutput)) EXM_THROW(28, "Write error : cannot properly close %s", output_filename);
+
+    return compressedfilesize;
+}
+
+
+unsigned long long FIO_decompressFrame(FILE* foutput, FILE* finput,
+                                       BYTE* inBuff, size_t inBuffSize, size_t alreadyLoaded,
+                                       BYTE* outBuff, size_t outBuffSize,
+                                       ZBUFF_DCtx* dctx)
+{
+    U64    frameSize = 0;
+    size_t readSize=alreadyLoaded;
+
+    /* Main decompression Loop */
+    ZBUFF_decompressInit(dctx);
+    while (1)
+    {
+        /* Decode */
+        size_t sizeCheck;
+        size_t inSize=readSize, decodedSize=outBuffSize;
+        size_t inStart=0;
+        size_t toRead = ZBUFF_decompressContinue(dctx, outBuff, &decodedSize, inBuff+inStart, &inSize);
+        if (ZBUFF_isError(toRead)) EXM_THROW(36, "Decoding error : %s", ZBUFF_getErrorName(toRead));
+        readSize -= inSize;
+        inStart += inSize;
+
+        /* Write block */
+        sizeCheck = fwrite(outBuff, 1, decodedSize, foutput);
+        if (sizeCheck != decodedSize) EXM_THROW(37, "Write error : unable to write data block to destination file");
+        frameSize += decodedSize;
+        DISPLAYUPDATE(2, "\rDecoded : %u MB...     ", (U32)(frameSize>>20) );
+
+        if (toRead == 0) break;
+        if (readSize) continue;   /* still some data left within inBuff */
+
+        /* Fill input buffer */
+        if (toRead > inBuffSize) EXM_THROW(34, "too large block");
+        readSize = fread(inBuff, 1, toRead, finput);
+        if (readSize != toRead) EXM_THROW(35, "Read error");
+    }
+
+    return frameSize;
+}
+
+
+unsigned long long FIO_decompressFilename(const char* output_filename, const char* input_filename)
+{
+    FILE* finput, *foutput;
+    BYTE* inBuff=NULL;
+    size_t inBuffSize = ZBUFF_recommendedDInSize();
+    BYTE* outBuff=NULL;
+    size_t outBuffSize = ZBUFF_recommendedDOutSize();
+    U64   filesize = 0;
+    size_t toRead;
+
+
+    /* Init */
+    ZBUFF_DCtx* dctx = ZBUFF_createDCtx();
+    FIO_getFileHandles(&finput, &foutput, input_filename, output_filename);
+
+    /* Allocate Memory (if needed) */
+    inBuff  = (BYTE*)malloc(inBuffSize);
+    outBuff  = (BYTE*)malloc(outBuffSize);
+    if (!inBuff || !outBuff) EXM_THROW(33, "Allocation error : not enough memory");
+
+    /* for each frame */
+    for ( ; ; )
+    {
+        toRead = 0;
+
+#if defined(ZSTD_LEGACY_SUPPORT) && (ZSTD_LEGACY_SUPPORT==1)
+        {
+            size_t sizeCheck;
+            /* check magic number -> version */
+            toRead = 4;
+            sizeCheck = fread(inBuff, (size_t)1, toRead, finput);
+            if (sizeCheck==0) break;   /* no more input */
+            if (sizeCheck != toRead) EXM_THROW(31, "Read error : cannot read header");
+            if (ZSTD_isLegacy(MEM_readLE32(inBuff)))
+            {
+                filesize += FIO_decompressLegacyFrame(foutput, finput, MEM_readLE32(inBuff));
+                continue;
+            }
+        }
+#endif   /* ZSTD_LEGACY_SUPPORT */
+
+        filesize += FIO_decompressFrame(foutput, finput, inBuff, inBuffSize, toRead, outBuff, outBuffSize, dctx);
+    }
+
+    DISPLAYLEVEL(2, "\r%79s\r", "");
+    DISPLAYLEVEL(2, "Decoded %llu bytes   \n", (long long unsigned)filesize);
+
+    /* clean */
+    free(inBuff);
+    free(outBuff);
+    ZBUFF_freeDCtx(dctx);
+    fclose(finput);
+    if (fclose(foutput)) EXM_THROW(38, "Write error : cannot properly close %s", output_filename);
+
+    return filesize;
+}
+
+
diff --git a/programs/fileio.h b/programs/fileio.h
new file mode 100644
index 0000000..037c819
--- /dev/null
+++ b/programs/fileio.h
@@ -0,0 +1,69 @@
+/*
+  fileio.h - file i/o handler
+  Copyright (C) Yann Collet 2013-2015
+
+  GPL v2 License
+
+  This program is free software; you can redistribute it and/or modify
+  it under the terms of the GNU General Public License as published by
+  the Free Software Foundation; either version 2 of the License, or
+  (at your option) any later version.
+
+  This program is distributed in the hope that it will be useful,
+  but WITHOUT ANY WARRANTY; without even the implied warranty of
+  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+  GNU General Public License for more details.
+
+  You should have received a copy of the GNU General Public License along
+  with this program; if not, write to the Free Software Foundation, Inc.,
+  51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
+
+  You can contact the author at :
+  - ZSTD source repository : https://github.com/Cyan4973/zstd
+  - Public forum : https://groups.google.com/forum/#!forum/lz4c
+*/
+#pragma once
+
+#if defined (__cplusplus)
+extern "C" {
+#endif
+
+
+/* *************************************
+*  Special i/o constants
+**************************************/
+#define nullString "null"
+#define stdinmark "-"
+#define stdoutmark "-"
+#ifdef _WIN32
+#  define nulmark "nul"
+#else
+#  define nulmark "/dev/null"
+#endif
+
+
+/* *************************************
+*  Parameters
+***************************************/
+void FIO_overwriteMode(void);
+void FIO_setNotificationLevel(unsigned level);
+
+
+/* *************************************
+*  Stream/File functions
+***************************************/
+unsigned long long FIO_compressFilename (const char* outfilename, const char* infilename, int compressionLevel);
+unsigned long long FIO_decompressFilename (const char* outfilename, const char* infilename);
+/**
+FIO_compressFilename :
+    @result : size of compressed file
+
+FIO_decompressFilename :
+    @result : size of regenerated file
+*/
+
+
+
+#if defined (__cplusplus)
+}
+#endif
diff --git a/programs/fullbench.c b/programs/fullbench.c
new file mode 100644
index 0000000..cb48220
--- /dev/null
+++ b/programs/fullbench.c
@@ -0,0 +1,590 @@
+/*
+    fullbench.c - Detailed bench program for zstd
+    Copyright (C) Yann Collet 2014-2015
+
+    GPL v2 License
+
+    This program is free software; you can redistribute it and/or modify
+    it under the terms of the GNU General Public License as published by
+    the Free Software Foundation; either version 2 of the License, or
+    (at your option) any later version.
+
+    This program is distributed in the hope that it will be useful,
+    but WITHOUT ANY WARRANTY; without even the implied warranty of
+    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+    GNU General Public License for more details.
+
+    You should have received a copy of the GNU General Public License along
+    with this program; if not, write to the Free Software Foundation, Inc.,
+    51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
+
+    You can contact the author at :
+    - zstd source repository : https://github.com/Cyan4973/zstd
+    - ztsd public forum : https://groups.google.com/forum/#!forum/lz4c
+*/
+
+/**************************************
+*  Compiler Options
+**************************************/
+/* Disable some Visual warning messages */
+#define _CRT_SECURE_NO_WARNINGS
+#define _CRT_SECURE_NO_DEPRECATE     /* VS2005 */
+
+/* Unix Large Files support (>4GB) */
+#if (defined(__sun__) && (!defined(__LP64__)))   /* Sun Solaris 32-bits requires specific definitions */
+#  define _LARGEFILE_SOURCE
+#  define _FILE_OFFSET_BITS 64
+#elif ! defined(__LP64__)                        /* No point defining Large file for 64 bit */
+#  define _LARGEFILE64_SOURCE
+#endif
+
+/* S_ISREG & gettimeofday() are not supported by MSVC */
+#if defined(_MSC_VER) || defined(_WIN32)
+#  define BMK_LEGACY_TIMER 1
+#endif
+
+
+/**************************************
+*  Includes
+**************************************/
+#include <stdlib.h>       /* malloc */
+#include <stdio.h>        /* fprintf, fopen, ftello64 */
+#include <sys/types.h>    /* stat64 */
+#include <sys/stat.h>     /* stat64 */
+#include <string.h>       /* strcmp */
+
+/* Use ftime() if gettimeofday() is not available on your target */
+#if defined(BMK_LEGACY_TIMER)
+#  include <sys/timeb.h>  /* timeb, ftime */
+#else
+#  include <sys/time.h>   /* gettimeofday */
+#endif
+
+#include "mem.h"
+#include "zstd_static.h"
+#include "fse_static.h"
+#include "datagen.h"
+
+
+/**************************************
+*  Compiler Options
+**************************************/
+/* S_ISREG & gettimeofday() are not supported by MSVC */
+#if !defined(S_ISREG)
+#  define S_ISREG(x) (((x) & S_IFMT) == S_IFREG)
+#endif
+
+
+/**************************************
+*  Constants
+**************************************/
+#define PROGRAM_DESCRIPTION "Zstandard speed analyzer"
+#ifndef ZSTD_VERSION
+#  define ZSTD_VERSION ""
+#endif
+#define AUTHOR "Yann Collet"
+#define WELCOME_MESSAGE "*** %s %s %i-bits, by %s (%s) ***\n", PROGRAM_DESCRIPTION, ZSTD_VERSION, (int)(sizeof(void*)*8), AUTHOR, __DATE__
+
+
+#define KB *(1<<10)
+#define MB *(1<<20)
+
+#define NBLOOPS    6
+#define TIMELOOP   2500
+
+#define KNUTH      2654435761U
+#define MAX_MEM    (1984 MB)
+#define DEFAULT_CHUNKSIZE   (4<<20)
+
+#define COMPRESSIBILITY_DEFAULT 0.50
+static const size_t sampleSize = 10000000;
+
+
+/**************************************
+*  Macros
+**************************************/
+#define DISPLAY(...)  fprintf(stderr, __VA_ARGS__)
+
+
+/**************************************
+*  Benchmark Parameters
+**************************************/
+static int nbIterations = NBLOOPS;
+static double g_compressibility = COMPRESSIBILITY_DEFAULT;
+
+void BMK_SetNbIterations(int nbLoops)
+{
+    nbIterations = nbLoops;
+    DISPLAY("- %i iterations -\n", nbIterations);
+}
+
+
+/*********************************************************
+*  Private functions
+*********************************************************/
+
+#if defined(BMK_LEGACY_TIMER)
+
+static int BMK_GetMilliStart(void)
+{
+  /* Based on Legacy ftime()
+  *  Rolls over every ~ 12.1 days (0x100000/24/60/60)
+  *  Use GetMilliSpan to correct for rollover */
+  struct timeb tb;
+  int nCount;
+  ftime( &tb );
+  nCount = (int) (tb.millitm + (tb.time & 0xfffff) * 1000);
+  return nCount;
+}
+
+#else
+
+static int BMK_GetMilliStart(void)
+{
+  /* Based on newer gettimeofday()
+  *  Use GetMilliSpan to correct for rollover */
+  struct timeval tv;
+  int nCount;
+  gettimeofday(&tv, NULL);
+  nCount = (int) (tv.tv_usec/1000 + (tv.tv_sec & 0xfffff) * 1000);
+  return nCount;
+}
+
+#endif
+
+
+static int BMK_GetMilliSpan( int nTimeStart )
+{
+  int nSpan = BMK_GetMilliStart() - nTimeStart;
+  if ( nSpan < 0 )
+    nSpan += 0x100000 * 1000;
+  return nSpan;
+}
+
+
+static size_t BMK_findMaxMem(U64 requiredMem)
+{
+    size_t step = 64 MB;
+    BYTE* testmem=NULL;
+
+    requiredMem = (((requiredMem >> 26) + 1) << 26);
+    if (requiredMem > MAX_MEM) requiredMem = MAX_MEM;
+
+    requiredMem += 2*step;
+    while (!testmem)
+    {
+        requiredMem -= step;
+        testmem = (BYTE*) malloc ((size_t)requiredMem);
+    }
+
+    free (testmem);
+    return (size_t) (requiredMem - step);
+}
+
+
+static U64 BMK_GetFileSize(char* infilename)
+{
+    int r;
+#if defined(_MSC_VER)
+    struct _stat64 statbuf;
+    r = _stat64(infilename, &statbuf);
+#else
+    struct stat statbuf;
+    r = stat(infilename, &statbuf);
+#endif
+    if (r || !S_ISREG(statbuf.st_mode)) return 0;   /* No good... */
+    return (U64)statbuf.st_size;
+}
+
+
+/*********************************************************
+*  Benchmark wrappers
+*********************************************************/
+typedef enum { bt_compressed, bt_raw, bt_rle, bt_end } blockType_t;
+typedef struct
+{
+    blockType_t blockType;
+    U32 unusedBits;
+    U32 origSize;
+} blockProperties_t;
+
+static size_t g_cSize = 0;
+static ZSTD_DCtx* g_dctxPtr = NULL;
+
+extern size_t ZSTD_getcBlockSize(const void* src, size_t srcSize, blockProperties_t* bpPtr);
+extern size_t ZSTD_decodeSeqHeaders(int* nbSeq, const BYTE** dumpsPtr, size_t* dumpsLengthPtr, FSE_DTable* DTableLL, FSE_DTable* DTableML, FSE_DTable* DTableOffb, const void* src, size_t srcSize);
+
+size_t local_ZSTD_compress(void* dst, size_t dstSize, void* buff2, const void* src, size_t srcSize)
+{
+    (void)buff2;
+    return ZSTD_compress(dst, dstSize, src, srcSize, 1);
+}
+
+size_t local_ZSTD_decompress(void* dst, size_t dstSize, void* buff2, const void* src, size_t srcSize)
+{
+    (void)src; (void)srcSize;
+    return ZSTD_decompress(dst, dstSize, buff2, g_cSize);
+}
+
+extern size_t ZSTD_decodeLiteralsBlock(ZSTD_DCtx* ctx, const void* src, size_t srcSize);
+size_t local_ZSTD_decodeLiteralsBlock(void* dst, size_t dstSize, void* buff2, const void* src, size_t srcSize)
+{
+    (void)src; (void)srcSize; (void)dst; (void)dstSize;
+    return ZSTD_decodeLiteralsBlock((ZSTD_DCtx*)g_dctxPtr, buff2, g_cSize);
+}
+
+size_t local_ZSTD_decodeSeqHeaders(void* dst, size_t dstSize, void* buff2, const void* src, size_t srcSize)
+{
+    U32 DTableML[1<<11], DTableLL[1<<10], DTableOffb[1<<9];
+    const BYTE* dumps;
+    size_t length;
+    int nbSeq;
+    (void)src; (void)srcSize; (void)dst; (void)dstSize;
+    return ZSTD_decodeSeqHeaders(&nbSeq, &dumps, &length, DTableLL, DTableML, DTableOffb, buff2, g_cSize);
+}
+
+
+
+
+/*********************************************************
+*  Bench functions
+*********************************************************/
+size_t benchMem(void* src, size_t srcSize, U32 benchNb)
+{
+    BYTE*  dstBuff;
+    size_t dstBuffSize;
+    BYTE*  buff2;
+    int loopNb;
+    const char* benchName;
+    size_t (*benchFunction)(void* dst, size_t dstSize, void* verifBuff, const void* src, size_t srcSize);
+    double bestTime = 100000000.;
+    size_t errorCode = 0;
+
+    /* Selection */
+    switch(benchNb)
+    {
+    case 1:
+        benchFunction = local_ZSTD_compress; benchName = "ZSTD_compress";
+        break;
+    case 11:
+        benchFunction = local_ZSTD_decompress; benchName = "ZSTD_decompress";
+        break;
+    case 31:
+        benchFunction = local_ZSTD_decodeLiteralsBlock; benchName = "ZSTD_decodeLiteralsBlock";
+        break;
+    case 32:
+        benchFunction = local_ZSTD_decodeSeqHeaders; benchName = "ZSTD_decodeSeqHeaders";
+        break;
+    default :
+        return 0;
+    }
+
+    /* Allocation */
+    dstBuffSize = ZSTD_compressBound(srcSize);
+    dstBuff = (BYTE*)malloc(dstBuffSize);
+    buff2 = (BYTE*)malloc(dstBuffSize);
+    g_dctxPtr = ZSTD_createDCtx();
+    if ((!dstBuff) || (!buff2))
+    {
+        DISPLAY("\nError: not enough memory!\n");
+        free(dstBuff); free(buff2);
+        return 12;
+    }
+
+    /* Preparation */
+    switch(benchNb)
+    {
+    case 11:
+        g_cSize = ZSTD_compress(buff2, dstBuffSize, src, srcSize, 1);
+        break;
+    case 31:  /* ZSTD_decodeLiteralsBlock */
+        {
+            blockProperties_t bp;
+            g_cSize = ZSTD_compress(dstBuff, dstBuffSize, src, srcSize, 1);
+            ZSTD_getcBlockSize(dstBuff+4, dstBuffSize, &bp);  /* Get 1st block type */
+            if (bp.blockType != bt_compressed)
+            {
+                DISPLAY("ZSTD_decodeLiteralsBlock : impossible to test on this sample (not compressible)\n");
+                goto _cleanOut;
+            }
+            memcpy(buff2, dstBuff+8, g_cSize-8);
+            srcSize = srcSize > 128 KB ? 128 KB : srcSize;    /* speed relative to block */
+            break;
+        }
+    case 32:   /* ZSTD_decodeSeqHeaders */
+        {
+            blockProperties_t bp;
+            const BYTE* ip = dstBuff;
+            const BYTE* iend;
+            size_t blockSize;
+            ZSTD_compress(dstBuff, dstBuffSize, src, srcSize, 1);   /* it would be better to use direct block compression here */
+            ip += 5;   /* Skip frame Header */
+            blockSize = ZSTD_getcBlockSize(ip, dstBuffSize, &bp);   /* Get 1st block type */
+            if (bp.blockType != bt_compressed)
+            {
+                DISPLAY("ZSTD_decodeSeqHeaders : impossible to test on this sample (not compressible)\n");
+                goto _cleanOut;
+            }
+            iend = ip + 3 + blockSize;   /* End of first block */
+            ip += 3;                     /* skip block header */
+            ip += ZSTD_decodeLiteralsBlock(g_dctxPtr, ip, iend-ip);  /* skip literal segment */
+            g_cSize = iend-ip;
+            memcpy(buff2, ip, g_cSize);   /* copy rest of block (it starts by SeqHeader) */
+            srcSize = srcSize > 128 KB ? 128 KB : srcSize;   /* speed relative to block */
+            break;
+        }
+
+    /* test functions */
+    /* by convention, test functions can be added > 100 */
+
+    default : ;
+    }
+
+    { size_t i; for (i=0; i<dstBuffSize; i++) dstBuff[i]=(BYTE)i; }     /* warming up memory */
+
+    for (loopNb = 1; loopNb <= nbIterations; loopNb++)
+    {
+        double averageTime;
+        int milliTime;
+        U32 nbRounds=0;
+
+        DISPLAY("%2i- %-30.30s : \r", loopNb, benchName);
+
+        milliTime = BMK_GetMilliStart();
+        while(BMK_GetMilliStart() == milliTime);
+        milliTime = BMK_GetMilliStart();
+        while(BMK_GetMilliSpan(milliTime) < TIMELOOP)
+        {
+            errorCode = benchFunction(dstBuff, dstBuffSize, buff2, src, srcSize);
+            if (ZSTD_isError(errorCode)) { DISPLAY("ERROR ! %s() => %s !! \n", benchName, ZSTD_getErrorName(errorCode)); exit(1); }
+            nbRounds++;
+        }
+        milliTime = BMK_GetMilliSpan(milliTime);
+
+        averageTime = (double)milliTime / nbRounds;
+        if (averageTime < bestTime) bestTime = averageTime;
+        DISPLAY("%2i- %-30.30s : %7.1f MB/s  (%9u)\r", loopNb, benchName, (double)srcSize / bestTime / 1000., (U32)errorCode);
+    }
+
+    DISPLAY("%2u- %-30.30s : %7.1f MB/s  (%9u)\n", benchNb, benchName, (double)srcSize / bestTime / 1000., (U32)errorCode);
+
+_cleanOut:
+    free(dstBuff);
+    free(buff2);
+    ZSTD_freeDCtx(g_dctxPtr);
+    return 0;
+}
+
+
+int benchSample(U32 benchNb)
+{
+    char* origBuff;
+    size_t benchedSize = sampleSize;
+    const char* name = "Sample 10MiB";
+
+    /* Allocation */
+    origBuff = (char*) malloc((size_t)benchedSize);
+    if(!origBuff)
+    {
+        DISPLAY("\nError: not enough memory!\n");
+        return 12;
+    }
+
+    /* Fill buffer */
+    RDG_genBuffer(origBuff, benchedSize, g_compressibility, 0.0, 0);
+
+    /* bench */
+    DISPLAY("\r%79s\r", "");
+    DISPLAY(" %s : \n", name);
+    if (benchNb)
+        benchMem(origBuff, benchedSize, benchNb);
+    else
+        for (benchNb=0; benchNb<100; benchNb++) benchMem(origBuff, benchedSize, benchNb);
+
+    free(origBuff);
+    return 0;
+}
+
+
+int benchFiles(char** fileNamesTable, int nbFiles, U32 benchNb)
+{
+    int fileIdx=0;
+
+    /* Loop for each file */
+    while (fileIdx<nbFiles)
+    {
+        FILE* inFile;
+        char* inFileName;
+        U64   inFileSize;
+        size_t benchedSize;
+        size_t readSize;
+        char* origBuff;
+
+        /* Check file existence */
+        inFileName = fileNamesTable[fileIdx++];
+        inFile = fopen( inFileName, "rb" );
+        if (inFile==NULL)
+        {
+            DISPLAY( "Pb opening %s\n", inFileName);
+            return 11;
+        }
+
+        // Memory allocation & restrictions
+        inFileSize = BMK_GetFileSize(inFileName);
+        benchedSize = (size_t) BMK_findMaxMem(inFileSize*3) / 3;
+        if ((U64)benchedSize > inFileSize) benchedSize = (size_t)inFileSize;
+        if (benchedSize < inFileSize)
+        {
+            DISPLAY("Not enough memory for '%s' full size; testing %i MB only...\n", inFileName, (int)(benchedSize>>20));
+        }
+
+        /* Alloc */
+        origBuff = (char*) malloc((size_t)benchedSize);
+        if(!origBuff)
+        {
+            DISPLAY("\nError: not enough memory!\n");
+            fclose(inFile);
+            return 12;
+        }
+
+        /* Fill input buffer */
+        DISPLAY("Loading %s...       \r", inFileName);
+        readSize = fread(origBuff, 1, benchedSize, inFile);
+        fclose(inFile);
+
+        if(readSize != benchedSize)
+        {
+            DISPLAY("\nError: problem reading file '%s' !!    \n", inFileName);
+            free(origBuff);
+            return 13;
+        }
+
+        /* bench */
+        DISPLAY("\r%79s\r", "");
+        DISPLAY(" %s : \n", inFileName);
+        if (benchNb)
+            benchMem(origBuff, benchedSize, benchNb);
+        else
+            for (benchNb=0; benchNb<100; benchNb++) benchMem(origBuff, benchedSize, benchNb);
+    }
+
+    return 0;
+}
+
+
+int usage(char* exename)
+{
+    DISPLAY( "Usage :\n");
+    DISPLAY( "      %s [arg] file1 file2 ... fileX\n", exename);
+    DISPLAY( "Arguments :\n");
+    DISPLAY( " -H/-h  : Help (this text + advanced options)\n");
+    return 0;
+}
+
+int usage_advanced(void)
+{
+    DISPLAY( "\nAdvanced options :\n");
+    DISPLAY( " -b#    : test only function # \n");
+    DISPLAY( " -i#    : iteration loops [1-9](default : %i)\n", NBLOOPS);
+    DISPLAY( " -P#    : sample compressibility (default : %.1f%%)\n", COMPRESSIBILITY_DEFAULT * 100);
+    return 0;
+}
+
+int badusage(char* exename)
+{
+    DISPLAY("Wrong parameters\n");
+    usage(exename);
+    return 0;
+}
+
+int main(int argc, char** argv)
+{
+    int i,
+        filenamesStart=0,
+        result;
+    char* exename=argv[0];
+    char* input_filename=0;
+    U32 benchNb = 0, main_pause = 0;
+
+    // Welcome message
+    DISPLAY(WELCOME_MESSAGE);
+
+    if (argc<1) { badusage(exename); return 1; }
+
+    for(i=1; i<argc; i++)
+    {
+        char* argument = argv[i];
+
+        if(!argument) continue;   // Protection if argument empty
+
+        // Decode command (note : aggregated commands are allowed)
+        if (argument[0]=='-')
+        {
+            while (argument[1]!=0)
+            {
+                argument ++;
+
+                switch(argument[0])
+                {
+                    /* Display help on usage */
+                case 'h' :
+                case 'H': usage(exename); usage_advanced(); return 0;
+
+                    /* Pause at the end (hidden option) */
+                case 'p': main_pause = 1; break;
+
+                    /* Select specific algorithm to bench */
+                case 'b':
+                    benchNb = 0;
+                    while ((argument[1]>= '0') && (argument[1]<= '9'))
+                    {
+                        benchNb *= 10;
+                        benchNb += argument[1] - '0';
+                        argument++;
+                    }
+                    break;
+
+                    /* Modify Nb Iterations */
+                case 'i':
+                    if ((argument[1] >='0') && (argument[1] <='9'))
+                    {
+                        int iters = argument[1] - '0';
+                        BMK_SetNbIterations(iters);
+                        argument++;
+                    }
+                    break;
+
+                    /* Select specific algorithm to bench */
+                case 'P':
+                    {
+                        U32 proba32 = 0;
+                        while ((argument[1]>= '0') && (argument[1]<= '9'))
+                        {
+                            proba32 *= 10;
+                            proba32 += argument[1] - '0';
+                            argument++;
+                        }
+                        g_compressibility = (double)proba32 / 100.;
+                    }
+                    break;
+
+                    /* Unknown command */
+                default : badusage(exename); return 1;
+                }
+            }
+            continue;
+        }
+
+        /* first provided filename is input */
+        if (!input_filename) { input_filename=argument; filenamesStart=i; continue; }
+    }
+
+    if (filenamesStart==0)
+        result = benchSample(benchNb);
+    else result = benchFiles(argv+filenamesStart, argc-filenamesStart, benchNb);
+
+    if (main_pause) { int unused; printf("press enter...\n"); unused = getchar(); (void)unused; }
+
+    return result;
+}
+
diff --git a/programs/fuzzer.c b/programs/fuzzer.c
new file mode 100644
index 0000000..793af7f
--- /dev/null
+++ b/programs/fuzzer.c
@@ -0,0 +1,596 @@
+/*
+    Fuzzer test tool for zstd
+    Copyright (C) Yann Collet 2014-2105
+
+    GPL v2 License
+
+    This program is free software; you can redistribute it and/or modify
+    it under the terms of the GNU General Public License as published by
+    the Free Software Foundation; either version 2 of the License, or
+    (at your option) any later version.
+
+    This program is distributed in the hope that it will be useful,
+    but WITHOUT ANY WARRANTY; without even the implied warranty of
+    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+    GNU General Public License for more details.
+
+    You should have received a copy of the GNU General Public License along
+    with this program; if not, write to the Free Software Foundation, Inc.,
+    51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
+
+    You can contact the author at :
+    - ZSTD source repository : https://github.com/Cyan4973/zstd
+    - ZSTD public forum : https://groups.google.com/forum/#!forum/lz4c
+*/
+
+/**************************************
+*  Compiler specific
+**************************************/
+#ifdef _MSC_VER    /* Visual Studio */
+#  define _CRT_SECURE_NO_WARNINGS     /* fgets */
+#  pragma warning(disable : 4127)     /* disable: C4127: conditional expression is constant */
+#  pragma warning(disable : 4146)     /* disable: C4146: minus unsigned expression */
+#endif
+
+#define GCC_VERSION (__GNUC__ * 100 + __GNUC_MINOR__)
+#ifdef __GNUC__
+#  pragma GCC diagnostic ignored "-Wmissing-braces"   /* GCC bug 53119 : doesn't accept { 0 } as initializer (https://gcc.gnu.org/bugzilla/show_bug.cgi?id=53119) */
+#  pragma GCC diagnostic ignored "-Wmissing-field-initializers"   /* GCC bug 53119 : doesn't accept { 0 } as initializer (https://gcc.gnu.org/bugzilla/show_bug.cgi?id=53119) */
+#endif
+
+
+/**************************************
+*  Includes
+**************************************/
+#include <stdlib.h>      /* free */
+#include <stdio.h>       /* fgets, sscanf */
+#include <sys/timeb.h>   /* timeb */
+#include <string.h>      /* strcmp */
+#include "zstd_static.h"
+#include "datagen.h"     /* RDG_genBuffer */
+#include "xxhash.h"      /* XXH64 */
+#include "mem.h"
+
+
+/**************************************
+ Constants
+**************************************/
+#ifndef ZSTD_VERSION
+#  define ZSTD_VERSION ""
+#endif
+
+#define KB *(1U<<10)
+#define MB *(1U<<20)
+#define GB *(1U<<30)
+
+static const U32 nbTestsDefault = 30000;
+#define COMPRESSIBLE_NOISE_LENGTH (10 MB)
+#define FUZ_COMPRESSIBILITY_DEFAULT 50
+static const U32 prime1 = 2654435761U;
+static const U32 prime2 = 2246822519U;
+
+
+
+/**************************************
+*  Display Macros
+**************************************/
+#define DISPLAY(...)          fprintf(stderr, __VA_ARGS__)
+#define DISPLAYLEVEL(l, ...)  if (g_displayLevel>=l) { DISPLAY(__VA_ARGS__); }
+static U32 g_displayLevel = 2;
+
+#define DISPLAYUPDATE(l, ...) if (g_displayLevel>=l) { \
+            if ((FUZ_GetMilliSpan(g_time) > g_refreshRate) || (g_displayLevel>=4)) \
+            { g_time = FUZ_GetMilliStart(); DISPLAY(__VA_ARGS__); \
+            if (g_displayLevel>=4) fflush(stdout); } }
+static const U32 g_refreshRate = 150;
+static U32 g_time = 0;
+
+
+/*********************************************************
+*  Fuzzer functions
+*********************************************************/
+#define MAX(a,b) ((a)>(b)?(a):(b))
+
+static U32 FUZ_GetMilliStart(void)
+{
+    struct timeb tb;
+    U32 nCount;
+    ftime( &tb );
+    nCount = (U32) (((tb.time & 0xFFFFF) * 1000) +  tb.millitm);
+    return nCount;
+}
+
+
+static U32 FUZ_GetMilliSpan(U32 nTimeStart)
+{
+    U32 nCurrent = FUZ_GetMilliStart();
+    U32 nSpan = nCurrent - nTimeStart;
+    if (nTimeStart > nCurrent)
+        nSpan += 0x100000 * 1000;
+    return nSpan;
+}
+
+
+#  define FUZ_rotl32(x,r) ((x << r) | (x >> (32 - r)))
+unsigned int FUZ_rand(unsigned int* src)
+{
+    U32 rand32 = *src;
+    rand32 *= prime1;
+    rand32 += prime2;
+    rand32  = FUZ_rotl32(rand32, 13);
+    *src = rand32;
+    return rand32 >> 5;
+}
+
+
+static unsigned FUZ_highbit32(U32 v32)
+{
+    unsigned nbBits = 0;
+    if (v32==0) return 0;
+    while (v32)
+    {
+        v32 >>= 1;
+        nbBits ++;
+    }
+    return nbBits;
+}
+
+
+static int basicUnitTests(U32 seed, double compressibility)
+{
+    int testResult = 0;
+    void* CNBuffer;
+    void* compressedBuffer;
+    void* decodedBuffer;
+    U32 randState = seed;
+    size_t result, cSize;
+    U32 testNb=0;
+
+    /* Create compressible test buffer */
+    CNBuffer = malloc(COMPRESSIBLE_NOISE_LENGTH);
+    compressedBuffer = malloc(ZSTD_compressBound(COMPRESSIBLE_NOISE_LENGTH));
+    decodedBuffer = malloc(COMPRESSIBLE_NOISE_LENGTH);
+    if (!CNBuffer || !compressedBuffer || !decodedBuffer)
+    {
+        DISPLAY("Not enough memory, aborting\n");
+        testResult = 1;
+        goto _end;
+    }
+    RDG_genBuffer(CNBuffer, COMPRESSIBLE_NOISE_LENGTH, compressibility, 0., randState);
+
+    /* Basic tests */
+    DISPLAYLEVEL(4, "test%3i : compress %u bytes : ", testNb++, COMPRESSIBLE_NOISE_LENGTH);
+    result = ZSTD_compress(compressedBuffer, ZSTD_compressBound(COMPRESSIBLE_NOISE_LENGTH), CNBuffer, COMPRESSIBLE_NOISE_LENGTH, 1);
+    if (ZSTD_isError(result)) goto _output_error;
+    cSize = result;
+    DISPLAYLEVEL(4, "OK (%u bytes : %.2f%%)\n", (U32)cSize, (double)cSize/COMPRESSIBLE_NOISE_LENGTH*100);
+
+    DISPLAYLEVEL(4, "test%3i : decompress %u bytes : ", testNb++, COMPRESSIBLE_NOISE_LENGTH);
+    result = ZSTD_decompress(decodedBuffer, COMPRESSIBLE_NOISE_LENGTH, compressedBuffer, cSize);
+    if (ZSTD_isError(result)) goto _output_error;
+    DISPLAYLEVEL(4, "OK \n");
+
+    {
+        size_t i;
+        DISPLAYLEVEL(4, "test%3i : check decompressed result : ", testNb++);
+        for (i=0; i<COMPRESSIBLE_NOISE_LENGTH; i++)
+        {
+            if (((BYTE*)decodedBuffer)[i] != ((BYTE*)CNBuffer)[i]) goto _output_error;;
+        }
+        DISPLAYLEVEL(4, "OK \n");
+    }
+
+    DISPLAYLEVEL(4, "test%3i : decompress with 1 missing byte : ", testNb++);
+    result = ZSTD_decompress(decodedBuffer, COMPRESSIBLE_NOISE_LENGTH, compressedBuffer, cSize-1);
+    if (!ZSTD_isError(result)) goto _output_error;
+    if (result != ERROR(srcSize_wrong)) goto _output_error;
+    DISPLAYLEVEL(4, "OK \n");
+
+    DISPLAYLEVEL(4, "test%3i : decompress with 1 too much byte : ", testNb++);
+    result = ZSTD_decompress(decodedBuffer, COMPRESSIBLE_NOISE_LENGTH, compressedBuffer, cSize+1);
+    if (!ZSTD_isError(result)) goto _output_error;
+    if (result != ERROR(srcSize_wrong)) goto _output_error;
+    DISPLAYLEVEL(4, "OK \n");
+
+    /* Decompression defense tests */
+    DISPLAYLEVEL(4, "test%3i : Check input length for magic number : ", testNb++);
+    result = ZSTD_decompress(decodedBuffer, COMPRESSIBLE_NOISE_LENGTH, CNBuffer, 3);
+    if (!ZSTD_isError(result)) goto _output_error;
+    if (result != ERROR(srcSize_wrong)) goto _output_error;
+    DISPLAYLEVEL(4, "OK \n");
+
+    DISPLAYLEVEL(4, "test%3i : Check magic Number : ", testNb++);
+    ((char*)(CNBuffer))[0] = 1;
+    result = ZSTD_decompress(decodedBuffer, COMPRESSIBLE_NOISE_LENGTH, CNBuffer, 4);
+    if (!ZSTD_isError(result)) goto _output_error;
+    DISPLAYLEVEL(4, "OK \n");
+
+    /* long rle test */
+    {
+        size_t sampleSize = 0;
+        DISPLAYLEVEL(4, "test%3i : Long RLE test : ", testNb++);
+        RDG_genBuffer(CNBuffer, sampleSize, compressibility, 0., randState);
+        memset((char*)CNBuffer+sampleSize, 'B', 256 KB - 1);
+        sampleSize += 256 KB - 1;
+        RDG_genBuffer((char*)CNBuffer+sampleSize, 96 KB, compressibility, 0., randState);
+        sampleSize += 96 KB;
+        cSize = ZSTD_compress(compressedBuffer, ZSTD_compressBound(sampleSize), CNBuffer, sampleSize, 1);
+        if (ZSTD_isError(cSize)) goto _output_error;
+        result = ZSTD_decompress(decodedBuffer, sampleSize, compressedBuffer, cSize);
+        if (ZSTD_isError(result)) goto _output_error;
+        if (result!=sampleSize) goto _output_error;
+        DISPLAYLEVEL(4, "OK \n");
+    }
+
+_end:
+    free(CNBuffer);
+    free(compressedBuffer);
+    free(decodedBuffer);
+    return testResult;
+
+_output_error:
+    testResult = 1;
+    DISPLAY("Error detected in Unit tests ! \n");
+    goto _end;
+}
+
+
+static size_t findDiff(const void* buf1, const void* buf2, size_t max)
+{
+    const BYTE* b1 = (const BYTE*)buf1;
+    const BYTE* b2 = (const BYTE*)buf2;
+    size_t i;
+    for (i=0; i<max; i++)
+    {
+        if (b1[i] != b2[i]) break;
+    }
+    return i;
+}
+
+#   define CHECK(cond, ...) if (cond) { DISPLAY("Error => "); DISPLAY(__VA_ARGS__); \
+                            DISPLAY(" (seed %u, test nb %u)  \n", seed, testNb); goto _output_error; }
+
+static const U32 maxSrcLog = 23;
+static const U32 maxSampleLog = 22;
+
+int fuzzerTests(U32 seed, U32 nbTests, unsigned startTest, double compressibility)
+{
+    BYTE* cNoiseBuffer[5];
+    BYTE* srcBuffer;
+    BYTE* cBuffer;
+    BYTE* dstBuffer;
+    size_t srcBufferSize = (size_t)1<<maxSrcLog;
+    size_t dstBufferSize = (size_t)1<<maxSampleLog;
+    size_t cBufferSize   = ZSTD_compressBound(dstBufferSize);
+    U32 result = 0;
+    U32 testNb = 0;
+    U32 coreSeed = seed, lseed = 0;
+    ZSTD_CCtx* ctx;
+
+    /* allocation */
+    ctx = ZSTD_createCCtx();
+    cNoiseBuffer[0] = (BYTE*)malloc (srcBufferSize);
+    cNoiseBuffer[1] = (BYTE*)malloc (srcBufferSize);
+    cNoiseBuffer[2] = (BYTE*)malloc (srcBufferSize);
+    cNoiseBuffer[3] = (BYTE*)malloc (srcBufferSize);
+    cNoiseBuffer[4] = (BYTE*)malloc (srcBufferSize);
+    dstBuffer = (BYTE*)malloc (dstBufferSize);
+    cBuffer   = (BYTE*)malloc (cBufferSize);
+    CHECK (!cNoiseBuffer[0] || !cNoiseBuffer[1] || !cNoiseBuffer[2] || !dstBuffer || !cBuffer || !ctx,
+           "Not enough memory, fuzzer tests cancelled");
+
+    /* Create initial samples */
+    RDG_genBuffer(cNoiseBuffer[0], srcBufferSize, 0.00, 0., coreSeed);    /* pure noise */
+    RDG_genBuffer(cNoiseBuffer[1], srcBufferSize, 0.05, 0., coreSeed);    /* barely compressible */
+    RDG_genBuffer(cNoiseBuffer[2], srcBufferSize, compressibility, 0., coreSeed);
+    RDG_genBuffer(cNoiseBuffer[3], srcBufferSize, 0.95, 0., coreSeed);    /* highly compressible */
+    RDG_genBuffer(cNoiseBuffer[4], srcBufferSize, 1.00, 0., coreSeed);    /* sparse content */
+    srcBuffer = cNoiseBuffer[2];
+
+    /* catch up testNb */
+    for (testNb=1; testNb < startTest; testNb++)
+        FUZ_rand(&coreSeed);
+
+    /* test loop */
+    for ( ; testNb <= nbTests; testNb++ )
+    {
+        size_t sampleSize, sampleStart;
+        size_t cSize, dSize, dSupSize;
+        U32 sampleSizeLog, buffNb, cLevelMod;
+        U64 crcOrig, crcDest;
+        int cLevel;
+        BYTE* sampleBuffer;
+
+        /* init */
+        DISPLAYUPDATE(2, "\r%6u/%6u   ", testNb, nbTests);
+        FUZ_rand(&coreSeed);
+        lseed = coreSeed ^ prime1;
+        buffNb = FUZ_rand(&lseed) & 127;
+        if (buffNb & 7) buffNb=2;
+        else
+        {
+            buffNb >>= 3;
+            if (buffNb & 7)
+            {
+                const U32 tnb[2] = { 1, 3 };
+                buffNb = tnb[buffNb >> 3];
+            }
+            else
+            {
+                const U32 tnb[2] = { 0, 4 };
+                buffNb = tnb[buffNb >> 3];
+            }
+        }
+        srcBuffer = cNoiseBuffer[buffNb];
+        sampleSizeLog = FUZ_rand(&lseed) % maxSampleLog;
+        sampleSize = (size_t)1 << sampleSizeLog;
+        sampleSize += FUZ_rand(&lseed) & (sampleSize-1);
+        sampleStart = FUZ_rand(&lseed) % (srcBufferSize - sampleSize);
+
+        /* create sample buffer (to catch read error with valgrind & sanitizers)  */
+        sampleBuffer = (BYTE*)malloc(sampleSize);
+        CHECK (sampleBuffer==NULL, "not enough memory for sample buffer");
+        memcpy(sampleBuffer, srcBuffer + sampleStart, sampleSize);
+        crcOrig = XXH64(sampleBuffer, sampleSize, 0);
+
+        /* compression test */
+        cLevelMod = MAX(1, 38 - (int)(MAX(9, sampleSizeLog) * 2));   /* use high compression levels with small samples, for speed */
+        cLevel = (FUZ_rand(&lseed) % cLevelMod) +1;
+        cSize = ZSTD_compressCCtx(ctx, cBuffer, cBufferSize, sampleBuffer, sampleSize, cLevel);
+        CHECK(ZSTD_isError(cSize), "ZSTD_compressCCtx failed");
+
+        /* compression failure test : too small dest buffer */
+        if (cSize > 3)
+        {
+            size_t errorCode;
+            const size_t missing = (FUZ_rand(&lseed) % (cSize-2)) + 1;   /* no problem, as cSize > 4 (frameHeaderSizer) */
+            const size_t tooSmallSize = cSize - missing;
+            static const U32 endMark = 0x4DC2B1A9;
+            U32 endCheck;
+            memcpy(dstBuffer+tooSmallSize, &endMark, 4);
+            errorCode = ZSTD_compressCCtx(ctx, dstBuffer, tooSmallSize, sampleBuffer, sampleSize, cLevel);
+            CHECK(!ZSTD_isError(errorCode), "ZSTD_compressCCtx should have failed ! (buffer too small : %u < %u)", (U32)tooSmallSize, (U32)cSize);
+            memcpy(&endCheck, dstBuffer+tooSmallSize, 4);
+            CHECK(endCheck != endMark, "ZSTD_compressCCtx : dst buffer overflow");
+        }
+
+        /* successfull decompression tests*/
+        dSupSize = (FUZ_rand(&lseed) & 1) ? 0 : (FUZ_rand(&lseed) & 31) + 1;
+        dSize = ZSTD_decompress(dstBuffer, sampleSize + dSupSize, cBuffer, cSize);
+        CHECK(dSize != sampleSize, "ZSTD_decompress failed (%s) (srcSize : %u ; cSize : %u)", ZSTD_getErrorName(dSize), (U32)sampleSize, (U32)cSize);
+        crcDest = XXH64(dstBuffer, sampleSize, 0);
+        CHECK(crcOrig != crcDest, "decompression result corrupted (pos %u / %u)", (U32)findDiff(sampleBuffer, dstBuffer, sampleSize), (U32)sampleSize);
+
+        free(sampleBuffer);   /* no longer useful after this point */
+
+        /* truncated src decompression test */
+        {
+            size_t errorCode;
+            const size_t missing = (FUZ_rand(&lseed) % (cSize-2)) + 1;   /* no problem, as cSize > 4 (frameHeaderSizer) */
+            const size_t tooSmallSize = cSize - missing;
+            void* cBufferTooSmall = malloc(tooSmallSize);   /* valgrind will catch overflows */
+            CHECK(cBufferTooSmall == NULL, "not enough memory !");
+            memcpy(cBufferTooSmall, cBuffer, tooSmallSize);
+            errorCode = ZSTD_decompress(dstBuffer, dstBufferSize, cBufferTooSmall, tooSmallSize);
+            CHECK(!ZSTD_isError(errorCode), "ZSTD_decompress should have failed ! (truncated src buffer)");
+            free(cBufferTooSmall);
+        }
+
+        /* too small dst decompression test */
+        if (sampleSize > 3)
+        {
+            size_t errorCode;
+            const size_t missing = (FUZ_rand(&lseed) % (sampleSize-2)) + 1;   /* no problem, as cSize > 4 (frameHeaderSizer) */
+            const size_t tooSmallSize = sampleSize - missing;
+            static const BYTE token = 0xA9;
+            dstBuffer[tooSmallSize] = token;
+            errorCode = ZSTD_decompress(dstBuffer, tooSmallSize, cBuffer, cSize);
+            CHECK(!ZSTD_isError(errorCode), "ZSTD_decompress should have failed : %u > %u (dst buffer too small)", (U32)errorCode, (U32)tooSmallSize);
+            CHECK(dstBuffer[tooSmallSize] != token, "ZSTD_decompress : dst buffer overflow");
+        }
+
+        /* noisy src decompression test */
+        if (cSize > 6)
+        {
+            const U32 maxNbBits = FUZ_highbit32((U32)(cSize-4));
+            size_t pos = 4;   /* preserve magic number (too easy to detect) */
+            U32 nbBits = FUZ_rand(&lseed) % maxNbBits;
+            size_t mask = (1<<nbBits) - 1;
+            size_t skipLength = FUZ_rand(&lseed) & mask;
+            pos += skipLength;
+
+            while (pos < cSize)
+            {
+                /* add noise */
+                size_t noiseStart, noiseLength;
+                nbBits = FUZ_rand(&lseed) % maxNbBits;
+                if (nbBits>0) nbBits--;
+                mask = (1<<nbBits) - 1;
+                noiseLength = (FUZ_rand(&lseed) & mask) + 1;
+                if ( pos+noiseLength > cSize ) noiseLength = cSize-pos;
+                noiseStart = FUZ_rand(&lseed) % (srcBufferSize - noiseLength);
+                memcpy(cBuffer + pos, srcBuffer + noiseStart, noiseLength);
+                pos += noiseLength;
+
+                /* keep some original src */
+                nbBits = FUZ_rand(&lseed) % maxNbBits;
+                mask = (1<<nbBits) - 1;
+                skipLength = FUZ_rand(&lseed) & mask;
+                pos += skipLength;
+            }
+
+            /* decompress noisy source */
+            {
+                U32 noiseSrc = FUZ_rand(&lseed) % 5;
+                const U32 endMark = 0xA9B1C3D6;
+                U32 endCheck;
+                size_t errorCode;
+                srcBuffer = cNoiseBuffer[noiseSrc];
+                memcpy(dstBuffer+sampleSize, &endMark, 4);
+                errorCode = ZSTD_decompress(dstBuffer, sampleSize, cBuffer, cSize);
+                /* result *may* be an unlikely success, but even then, it must strictly respect dest buffer boundaries */
+                CHECK((!ZSTD_isError(errorCode)) && (errorCode>sampleSize),
+                      "ZSTD_decompress on noisy src : result is too large : %u > %u (dst buffer)", (U32)errorCode, (U32)sampleSize);
+                memcpy(&endCheck, dstBuffer+sampleSize, 4);
+                CHECK(endMark!=endCheck, "ZSTD_decompress on noisy src : dst buffer overflow");
+            }
+        }
+    }
+    DISPLAY("\rAll fuzzer tests completed   \n");
+
+_cleanup:
+    ZSTD_freeCCtx(ctx);
+    free(cNoiseBuffer[0]);
+    free(cNoiseBuffer[1]);
+    free(cNoiseBuffer[2]);
+    free(cNoiseBuffer[3]);
+    free(cNoiseBuffer[4]);
+    free(cBuffer);
+    free(dstBuffer);
+    return result;
+
+_output_error:
+    result = 1;
+    goto _cleanup;
+}
+
+
+/*********************************************************
+*  Command line
+*********************************************************/
+int FUZ_usage(char* programName)
+{
+    DISPLAY( "Usage :\n");
+    DISPLAY( "      %s [args]\n", programName);
+    DISPLAY( "\n");
+    DISPLAY( "Arguments :\n");
+    DISPLAY( " -i#    : Nb of tests (default:%u) \n", nbTestsDefault);
+    DISPLAY( " -s#    : Select seed (default:prompt user)\n");
+    DISPLAY( " -t#    : Select starting test number (default:0)\n");
+    DISPLAY( " -P#    : Select compressibility in %% (default:%i%%)\n", FUZ_COMPRESSIBILITY_DEFAULT);
+    DISPLAY( " -v     : verbose\n");
+    DISPLAY( " -p     : pause at the end\n");
+    DISPLAY( " -h     : display help and exit\n");
+    return 0;
+}
+
+
+int main(int argc, char** argv)
+{
+    U32 seed=0;
+    int seedset=0;
+    int argNb;
+    int nbTests = nbTestsDefault;
+    int testNb = 0;
+    int proba = FUZ_COMPRESSIBILITY_DEFAULT;
+    int result=0;
+    U32 mainPause = 0;
+    char* programName;
+
+    /* Check command line */
+    programName = argv[0];
+    for(argNb=1; argNb<argc; argNb++)
+    {
+        char* argument = argv[argNb];
+
+        if(!argument) continue;   /* Protection if argument empty */
+
+        /* Handle commands. Aggregated commands are allowed */
+        if (argument[0]=='-')
+        {
+            argument++;
+
+            while (*argument!=0)
+            {
+                switch(*argument)
+                {
+                case 'h':
+                    return FUZ_usage(programName);
+                case 'v':
+                    argument++;
+                    g_displayLevel=4;
+                    break;
+                case 'q':
+                    argument++;
+                    g_displayLevel--;
+                    break;
+                case 'p': /* pause at the end */
+                    argument++;
+                    mainPause = 1;
+                    break;
+
+                case 'i':
+                    argument++;
+                    nbTests=0;
+                    while ((*argument>='0') && (*argument<='9'))
+                    {
+                        nbTests *= 10;
+                        nbTests += *argument - '0';
+                        argument++;
+                    }
+                    break;
+
+                case 's':
+                    argument++;
+                    seed=0;
+                    seedset=1;
+                    while ((*argument>='0') && (*argument<='9'))
+                    {
+                        seed *= 10;
+                        seed += *argument - '0';
+                        argument++;
+                    }
+                    break;
+
+                case 't':
+                    argument++;
+                    testNb=0;
+                    while ((*argument>='0') && (*argument<='9'))
+                    {
+                        testNb *= 10;
+                        testNb += *argument - '0';
+                        argument++;
+                    }
+                    break;
+
+                case 'P':   /* compressibility % */
+                    argument++;
+                    proba=0;
+                    while ((*argument>='0') && (*argument<='9'))
+                    {
+                        proba *= 10;
+                        proba += *argument - '0';
+                        argument++;
+                    }
+                    if (proba<0) proba=0;
+                    if (proba>100) proba=100;
+                    break;
+
+                default:
+                    return FUZ_usage(programName);
+                }
+            }
+        }
+    }
+
+    /* Get Seed */
+    DISPLAY("Starting zstd tester (%i-bits, %s)\n", (int)(sizeof(size_t)*8), ZSTD_VERSION);
+
+    if (!seedset) seed = FUZ_GetMilliStart() % 10000;
+    DISPLAY("Seed = %u\n", seed);
+    if (proba!=FUZ_COMPRESSIBILITY_DEFAULT) DISPLAY("Compressibility : %i%%\n", proba);
+
+    if (nbTests<=0) nbTests=1;
+
+    if (testNb==0) result = basicUnitTests(0, ((double)proba) / 100);  /* constant seed for predictability */
+    if (!result)
+        result = fuzzerTests(seed, nbTests, testNb, ((double)proba) / 100);
+    if (mainPause)
+    {
+        int unused;
+        DISPLAY("Press Enter \n");
+        unused = getchar();
+        (void)unused;
+    }
+    return result;
+}
diff --git a/programs/legacy/fileio_legacy.c b/programs/legacy/fileio_legacy.c
new file mode 100644
index 0000000..f5c4241
--- /dev/null
+++ b/programs/legacy/fileio_legacy.c
@@ -0,0 +1,345 @@
+/*
+  fileio.c - File i/o handler
+  Copyright (C) Yann Collet 2013-2015
+
+  GPL v2 License
+
+  This program is free software; you can redistribute it and/or modify
+  it under the terms of the GNU General Public License as published by
+  the Free Software Foundation; either version 2 of the License, or
+  (at your option) any later version.
+
+  This program is distributed in the hope that it will be useful,
+  but WITHOUT ANY WARRANTY; without even the implied warranty of
+  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+  GNU General Public License for more details.
+
+  You should have received a copy of the GNU General Public License along
+  with this program; if not, write to the Free Software Foundation, Inc.,
+  51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
+
+  You can contact the author at :
+  - zstd source repository : https://github.com/Cyan4973/zstd
+  - Public forum : https://groups.google.com/forum/#!forum/lz4c
+*/
+/*
+  Note : this is stand-alone program.
+  It is not part of ZSTD compression library, it is a user program of ZSTD library.
+  The license of ZSTD library is BSD.
+  The license of this file is GPLv2.
+*/
+
+/* *************************************
+*  Compiler Options
+***************************************/
+/* Disable some Visual warning messages */
+#ifdef _MSC_VER
+#  define _CRT_SECURE_NO_WARNINGS
+#  define _CRT_SECURE_NO_DEPRECATE     /* VS2005 */
+#  pragma warning(disable : 4127)      /* disable: C4127: conditional expression is constant */
+#endif
+
+#define GCC_VERSION (__GNUC__ * 100 + __GNUC_MINOR__)
+
+#define _FILE_OFFSET_BITS 64   /* Large file support on 32-bits unix */
+#define _POSIX_SOURCE 1        /* enable fileno() within <stdio.h> on unix */
+
+
+/* *************************************
+*  Includes
+***************************************/
+#include <stdio.h>     /* fprintf, fopen, fread, _fileno, stdin, stdout */
+#include <stdlib.h>    /* malloc, free */
+#include <string.h>    /* strcmp, strlen */
+#include <time.h>      /* clock */
+#include <errno.h>     /* errno */
+#include "mem.h"
+#include "fileio_legacy.h"
+#include "zstd_legacy.h"  /* legacy support */
+
+
+/* *************************************
+*  OS-specific Includes
+***************************************/
+#if defined(MSDOS) || defined(OS2) || defined(WIN32) || defined(_WIN32) || defined(__CYGWIN__)
+#  include <fcntl.h>    /* _O_BINARY */
+#  include <io.h>       /* _setmode, _isatty */
+#  ifdef __MINGW32__
+   /* int _fileno(FILE *stream);   // seems no longer useful // MINGW somehow forgets to include this windows declaration into <stdio.h> */
+#  endif
+#  define SET_BINARY_MODE(file) { int unused = _setmode(_fileno(file), _O_BINARY); (void)unused; }
+#  define IS_CONSOLE(stdStream) _isatty(_fileno(stdStream))
+#else
+#  include <unistd.h>   /* isatty */
+#  define SET_BINARY_MODE(file)
+#  define IS_CONSOLE(stdStream) isatty(fileno(stdStream))
+#endif
+
+
+/* *************************************
+*  Constants
+***************************************/
+#define KB *(1U<<10)
+#define MB *(1U<<20)
+#define GB *(1U<<30)
+
+#define _1BIT  0x01
+#define _2BITS 0x03
+#define _3BITS 0x07
+#define _4BITS 0x0F
+#define _6BITS 0x3F
+#define _8BITS 0xFF
+
+#define BIT6  0x40
+#define BIT7  0x80
+
+#define FIO_FRAMEHEADERSIZE 5        /* as a define, because needed to allocated table on stack */
+#define FSE_CHECKSUM_SEED        0
+
+#define CACHELINE 64
+
+
+/* *************************************
+*  Macros
+***************************************/
+#define DISPLAY(...)         fprintf(stderr, __VA_ARGS__)
+#define DISPLAYLEVEL(l, ...) if (g_displayLevel>=l) { DISPLAY(__VA_ARGS__); }
+static U32 g_displayLevel = 1;   /* 0 : no display;   1: errors;   2 : + result + interaction + warnings;   3 : + progression;   4 : + information */
+
+#define DISPLAYUPDATE(l, ...) if (g_displayLevel>=l) { \
+            if ((FIO_GetMilliSpan(g_time) > refreshRate) || (g_displayLevel>=4)) \
+            { g_time = clock(); DISPLAY(__VA_ARGS__); \
+            if (g_displayLevel>=4) fflush(stdout); } }
+static const unsigned refreshRate = 150;
+static clock_t g_time = 0;
+
+
+/* *************************************
+*  Local Parameters
+***************************************/
+void FIO_legacy_setNotificationLevel(unsigned level) { g_displayLevel=level; }
+
+
+/* *************************************
+*  Exceptions
+***************************************/
+#ifndef DEBUG
+#  define DEBUG 0
+#endif
+#define DEBUGOUTPUT(...) if (DEBUG) DISPLAY(__VA_ARGS__);
+#define EXM_THROW(error, ...)                                             \
+{                                                                         \
+    DEBUGOUTPUT("Error defined at %s, line %i : \n", __FILE__, __LINE__); \
+    DISPLAYLEVEL(1, "Error %i : ", error);                                \
+    DISPLAYLEVEL(1, __VA_ARGS__);                                         \
+    DISPLAYLEVEL(1, "\n");                                                \
+    exit(error);                                                          \
+}
+
+
+/* *************************************
+*  Functions
+***************************************/
+static unsigned FIO_GetMilliSpan(clock_t nPrevious)
+{
+    clock_t nCurrent = clock();
+    unsigned nSpan = (unsigned)(((nCurrent - nPrevious) * 1000) / CLOCKS_PER_SEC);
+    return nSpan;
+}
+
+
+unsigned long long FIOv01_decompressFrame(FILE* foutput, FILE* finput)
+{
+    size_t outBuffSize = 512 KB;
+    BYTE* outBuff = (BYTE*)malloc(outBuffSize);
+    size_t inBuffSize = 128 KB + 8;
+    BYTE inBuff[128 KB + 8];
+    BYTE* op = outBuff;
+    BYTE* const oend = outBuff + outBuffSize;
+    U64   filesize = 0;
+    size_t toRead;
+    size_t sizeCheck;
+    ZSTDv01_Dctx* dctx = ZSTDv01_createDCtx();
+
+
+    /* init */
+    if (outBuff==NULL) EXM_THROW(41, "Error : not enough memory to decode legacy frame");
+
+    /* restore header, already read from input */
+    MEM_writeLE32(inBuff, ZSTDv01_magicNumberLE);
+    sizeCheck = ZSTDv01_decompressContinue(dctx, NULL, 0, inBuff, sizeof(ZSTDv01_magicNumberLE));   /* Decode frame header */
+    if (ZSTDv01_isError(sizeCheck)) EXM_THROW(42, "Error decoding legacy header");
+
+    /* Main decompression Loop */
+    toRead = ZSTDv01_nextSrcSizeToDecompress(dctx);
+    while (toRead)
+    {
+        size_t readSize, decodedSize;
+
+        /* Fill input buffer */
+        if (toRead > inBuffSize)
+            EXM_THROW(43, "too large block");
+        readSize = fread(inBuff, 1, toRead, finput);
+        if (readSize != toRead)
+            EXM_THROW(44, "Read error");
+
+        /* Decode block */
+        decodedSize = ZSTDv01_decompressContinue(dctx, op, oend-op, inBuff, readSize);
+        if (ZSTDv01_isError(decodedSize)) EXM_THROW(45, "Decoding error : input corrupted");
+
+        if (decodedSize)   /* not a header */
+        {
+            /* Write block */
+            sizeCheck = fwrite(op, 1, decodedSize, foutput);
+            if (sizeCheck != decodedSize) EXM_THROW(46, "Write error : unable to write data block to destination file");
+            filesize += decodedSize;
+            op += decodedSize;
+            if (op==oend) op = outBuff;
+            DISPLAYUPDATE(2, "\rDecoded : %u MB...     ", (U32)(filesize>>20) );
+        }
+
+        /* prepare for next Block */
+        toRead = ZSTDv01_nextSrcSizeToDecompress(dctx);
+    }
+
+    /* release resources */
+    free(outBuff);
+    free(dctx);
+    return filesize;
+}
+
+
+unsigned long long FIOv02_decompressFrame(FILE* foutput, FILE* finput)
+{
+    size_t outBuffSize = 512 KB;
+    BYTE* outBuff = (BYTE*)malloc(outBuffSize);
+    size_t inBuffSize = 128 KB + 8;
+    BYTE inBuff[128 KB + 8];
+    BYTE* op = outBuff;
+    BYTE* const oend = outBuff + outBuffSize;
+    U64   filesize = 0;
+    size_t toRead;
+    size_t sizeCheck;
+    ZSTDv02_Dctx* dctx = ZSTDv02_createDCtx();
+
+    /* init */
+    if (outBuff==NULL) EXM_THROW(41, "Error : not enough memory to decode legacy frame");
+
+    /* restore header, already read from input */
+    MEM_writeLE32(inBuff, ZSTDv02_magicNumber);
+    sizeCheck = ZSTDv02_decompressContinue(dctx, NULL, 0, inBuff, sizeof(ZSTDv02_magicNumber));   /* Decode frame header */
+    if (ZSTDv02_isError(sizeCheck)) EXM_THROW(42, "Error decoding legacy header");
+
+    /* Main decompression Loop */
+    toRead = ZSTDv02_nextSrcSizeToDecompress(dctx);
+    while (toRead)
+    {
+        size_t readSize, decodedSize;
+
+        /* Fill input buffer */
+        if (toRead > inBuffSize)
+            EXM_THROW(43, "too large block");
+        readSize = fread(inBuff, 1, toRead, finput);
+        if (readSize != toRead)
+            EXM_THROW(44, "Read error");
+
+        /* Decode block */
+        decodedSize = ZSTDv02_decompressContinue(dctx, op, oend-op, inBuff, readSize);
+        if (ZSTDv02_isError(decodedSize)) EXM_THROW(45, "Decoding error : input corrupted");
+
+        if (decodedSize)   /* not a header */
+        {
+            /* Write block */
+            sizeCheck = fwrite(op, 1, decodedSize, foutput);
+            if (sizeCheck != decodedSize) EXM_THROW(46, "Write error : unable to write data block to destination file");
+            filesize += decodedSize;
+            op += decodedSize;
+            if (op==oend) op = outBuff;
+            DISPLAYUPDATE(2, "\rDecoded : %u MB...     ", (U32)(filesize>>20) );
+        }
+
+        /* prepare for next Block */
+        toRead = ZSTDv02_nextSrcSizeToDecompress(dctx);
+    }
+
+    /* release resources */
+    free(outBuff);
+    free(dctx);
+    return filesize;
+}
+
+
+unsigned long long FIOv03_decompressFrame(FILE* foutput, FILE* finput)
+{
+    size_t outBuffSize = 512 KB;
+    BYTE* outBuff = (BYTE*)malloc(outBuffSize);
+    size_t inBuffSize = 128 KB + 8;
+    BYTE inBuff[128 KB + 8];
+    BYTE* op = outBuff;
+    BYTE* const oend = outBuff + outBuffSize;
+    U64   filesize = 0;
+    size_t toRead;
+    size_t sizeCheck;
+    ZSTDv03_Dctx* dctx = ZSTDv03_createDCtx();
+
+    /* init */
+    if (outBuff==NULL) EXM_THROW(41, "Error : not enough memory to decode legacy frame");
+
+    /* restore header, already read from input */
+    MEM_writeLE32(inBuff, ZSTDv03_magicNumber);
+    sizeCheck = ZSTDv03_decompressContinue(dctx, NULL, 0, inBuff, sizeof(ZSTDv03_magicNumber));   /* Decode frame header */
+    if (ZSTDv03_isError(sizeCheck)) EXM_THROW(42, "Error decoding legacy header");
+
+    /* Main decompression Loop */
+    toRead = ZSTDv03_nextSrcSizeToDecompress(dctx);
+    while (toRead)
+    {
+        size_t readSize, decodedSize;
+
+        /* Fill input buffer */
+        if (toRead > inBuffSize)
+            EXM_THROW(43, "too large block");
+        readSize = fread(inBuff, 1, toRead, finput);
+        if (readSize != toRead)
+            EXM_THROW(44, "Read error");
+
+        /* Decode block */
+        decodedSize = ZSTDv03_decompressContinue(dctx, op, oend-op, inBuff, readSize);
+        if (ZSTDv03_isError(decodedSize)) EXM_THROW(45, "Decoding error : input corrupted");
+
+        if (decodedSize)   /* not a header */
+        {
+            /* Write block */
+            sizeCheck = fwrite(op, 1, decodedSize, foutput);
+            if (sizeCheck != decodedSize) EXM_THROW(46, "Write error : unable to write data block to destination file");
+            filesize += decodedSize;
+            op += decodedSize;
+            if (op==oend) op = outBuff;
+            DISPLAYUPDATE(2, "\rDecoded : %u MB...     ", (U32)(filesize>>20) );
+        }
+
+        /* prepare for next Block */
+        toRead = ZSTDv03_nextSrcSizeToDecompress(dctx);
+    }
+
+    /* release resources */
+    free(outBuff);
+    free(dctx);
+    return filesize;
+}
+
+
+unsigned long long FIO_decompressLegacyFrame(FILE* foutput, FILE* finput, U32 magicNumberLE)
+{
+	switch(magicNumberLE)
+	{
+		case ZSTDv01_magicNumberLE :
+			return FIOv01_decompressFrame(foutput, finput);
+		case ZSTDv02_magicNumber :
+			return FIOv02_decompressFrame(foutput, finput);
+		case ZSTDv03_magicNumber :
+			return FIOv03_decompressFrame(foutput, finput);
+		default :
+		    return ERROR(prefix_unknown);
+	}
+}
diff --git a/programs/legacy/fileio_legacy.h b/programs/legacy/fileio_legacy.h
new file mode 100644
index 0000000..a938433
--- /dev/null
+++ b/programs/legacy/fileio_legacy.h
@@ -0,0 +1,46 @@
+/*
+  fileio.h - file i/o handler
+  Copyright (C) Yann Collet 2013-2015
+
+  GPL v2 License
+
+  This program is free software; you can redistribute it and/or modify
+  it under the terms of the GNU General Public License as published by
+  the Free Software Foundation; either version 2 of the License, or
+  (at your option) any later version.
+
+  This program is distributed in the hope that it will be useful,
+  but WITHOUT ANY WARRANTY; without even the implied warranty of
+  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+  GNU General Public License for more details.
+
+  You should have received a copy of the GNU General Public License along
+  with this program; if not, write to the Free Software Foundation, Inc.,
+  51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
+
+  You can contact the author at :
+  - ZSTD source repository : https://github.com/Cyan4973/zstd
+  - Public forum : https://groups.google.com/forum/#!forum/lz4c
+*/
+#pragma once
+
+#if defined (__cplusplus)
+extern "C" {
+#endif
+
+
+/* *************************************
+*  Parameters
+***************************************/
+void FIO_legacy_setNotificationLevel(unsigned level);
+
+
+/* *************************************
+*  Stream/File functions
+***************************************/
+unsigned long long FIO_decompressLegacyFrame(FILE* foutput, FILE* finput, U32 magicNumberLE);
+
+
+#if defined (__cplusplus)
+}
+#endif
diff --git a/programs/paramgrill.c b/programs/paramgrill.c
new file mode 100644
index 0000000..346a9c3
--- /dev/null
+++ b/programs/paramgrill.c
@@ -0,0 +1,1178 @@
+/*
+    paramgrill.c - parameter tester for zstd_hc
+    Copyright (C) Yann Collet 2015
+
+    GPL v2 License
+
+    This program is free software; you can redistribute it and/or modify
+    it under the terms of the GNU General Public License as published by
+    the Free Software Foundation; either version 2 of the License, or
+    (at your option) any later version.
+
+    This program is distributed in the hope that it will be useful,
+    but WITHOUT ANY WARRANTY; without even the implied warranty of
+    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+    GNU General Public License for more details.
+
+    You should have received a copy of the GNU General Public License along
+    with this program; if not, write to the Free Software Foundation, Inc.,
+    51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
+
+    You can contact the author at :
+    - zstd source repository : https://github.com/Cyan4973/zstd
+    - ztsd public forum : https://groups.google.com/forum/#!forum/lz4c
+*/
+
+/**************************************
+*  Compiler Options
+**************************************/
+/* Disable some Visual warning messages */
+#define _CRT_SECURE_NO_WARNINGS
+#define _CRT_SECURE_NO_DEPRECATE     /* VS2005 */
+
+/* Unix Large Files support (>4GB) */
+#if (defined(__sun__) && (!defined(__LP64__)))   /* Sun Solaris 32-bits requires specific definitions */
+#  define _LARGEFILE_SOURCE
+#  define _FILE_OFFSET_BITS 64
+#elif ! defined(__LP64__)                        /* No point defining Large file for 64 bit */
+#  define _LARGEFILE64_SOURCE
+#endif
+
+/* S_ISREG & gettimeofday() are not supported by MSVC */
+#if defined(_MSC_VER) || defined(_WIN32)
+#  define BMK_LEGACY_TIMER 1
+#endif
+
+#if defined(_MSC_VER)
+#  define snprintf _snprintf    /* snprintf unsupported by Visual <= 2012 */
+#endif
+
+
+/**************************************
+*  Includes
+**************************************/
+#include <stdlib.h>       /* malloc */
+#include <stdio.h>        /* fprintf, fopen, ftello64 */
+#include <sys/types.h>    /* stat64 */
+#include <sys/stat.h>     /* stat64 */
+#include <string.h>       /* strcmp */
+#include <math.h>         /* log */
+
+/* Use ftime() if gettimeofday() is not available on your target */
+#if defined(BMK_LEGACY_TIMER)
+#  include <sys/timeb.h>  /* timeb, ftime */
+#else
+#  include <sys/time.h>   /* gettimeofday */
+#endif
+
+#include "mem.h"
+#include "zstd_static.h"
+#include "datagen.h"
+#include "xxhash.h"
+
+
+/**************************************
+*  Compiler Options
+**************************************/
+/* S_ISREG & gettimeofday() are not supported by MSVC */
+#if !defined(S_ISREG)
+#  define S_ISREG(x) (((x) & S_IFMT) == S_IFREG)
+#endif
+
+
+/**************************************
+*  Constants
+**************************************/
+#define PROGRAM_DESCRIPTION "ZSTD_HC parameters tester"
+#ifndef ZSTD_VERSION
+#  define ZSTD_VERSION ""
+#endif
+#define AUTHOR "Yann Collet"
+#define WELCOME_MESSAGE "*** %s %s %i-bits, by %s (%s) ***\n", PROGRAM_DESCRIPTION, ZSTD_VERSION, (int)(sizeof(void*)*8), AUTHOR, __DATE__
+
+
+#define KB *(1<<10)
+#define MB *(1<<20)
+#define GB *(1ULL<<30)
+
+#define NBLOOPS    2
+#define TIMELOOP   2000
+
+static const size_t maxMemory = (sizeof(size_t)==4)  ?  (2 GB - 64 MB) : (size_t)(1ULL << ((sizeof(size_t)*8)-31));
+#define DEFAULT_CHUNKSIZE   (4<<20)
+
+#define COMPRESSIBILITY_DEFAULT 0.50
+static const size_t sampleSize = 10000000;
+
+static const int g_grillDuration = 50000000;   /* about 13 hours */
+static const int g_maxParamTime = 15000;   /* 15 sec */
+static const int g_maxVariationTime = 60000;   /* 60 sec */
+static const int g_maxNbVariations = 64;
+
+
+/**************************************
+*  Macros
+**************************************/
+#define DISPLAY(...)  fprintf(stderr, __VA_ARGS__)
+
+
+/**************************************
+*  Benchmark Parameters
+**************************************/
+static U32 g_nbIterations = NBLOOPS;
+static double g_compressibility = COMPRESSIBILITY_DEFAULT;
+static U32 g_blockSize = 0;
+static U32 g_rand = 1;
+static U32 g_singleRun = 0;
+static U32 g_target = 0;
+static U32 g_noSeed = 0;
+static ZSTD_parameters g_params = { 0, 0, 0, 0, 0, 0, ZSTD_greedy };
+
+void BMK_SetNbIterations(int nbLoops)
+{
+    g_nbIterations = nbLoops;
+    DISPLAY("- %u iterations -\n", g_nbIterations);
+}
+
+
+/*********************************************************
+*  Private functions
+*********************************************************/
+
+#if defined(BMK_LEGACY_TIMER)
+
+static int BMK_GetMilliStart(void)
+{
+  /* Based on Legacy ftime()
+  *  Rolls over every ~ 12.1 days (0x100000/24/60/60)
+  *  Use GetMilliSpan to correct for rollover */
+  struct timeb tb;
+  int nCount;
+  ftime( &tb );
+  nCount = (int) (tb.millitm + (tb.time & 0xfffff) * 1000);
+  return nCount;
+}
+
+#else
+
+static int BMK_GetMilliStart(void)
+{
+  /* Based on newer gettimeofday()
+  *  Use GetMilliSpan to correct for rollover */
+  struct timeval tv;
+  int nCount;
+  gettimeofday(&tv, NULL);
+  nCount = (int) (tv.tv_usec/1000 + (tv.tv_sec & 0xfffff) * 1000);
+  return nCount;
+}
+
+#endif
+
+
+static int BMK_GetMilliSpan( int nTimeStart )
+{
+  int nSpan = BMK_GetMilliStart() - nTimeStart;
+  if ( nSpan < 0 )
+    nSpan += 0x100000 * 1000;
+  return nSpan;
+}
+
+
+static size_t BMK_findMaxMem(U64 requiredMem)
+{
+    size_t step = 64 MB;
+    BYTE* testmem=NULL;
+
+    requiredMem = (((requiredMem >> 26) + 1) << 26);
+    if (requiredMem > maxMemory) requiredMem = maxMemory;
+
+    requiredMem += 2*step;
+    while (!testmem)
+    {
+        requiredMem -= step;
+        testmem = (BYTE*) malloc ((size_t)requiredMem);
+    }
+
+    free (testmem);
+    return (size_t) (requiredMem - step);
+}
+
+
+static U64 BMK_GetFileSize(char* infilename)
+{
+    int r;
+#if defined(_MSC_VER)
+    struct _stat64 statbuf;
+    r = _stat64(infilename, &statbuf);
+#else
+    struct stat statbuf;
+    r = stat(infilename, &statbuf);
+#endif
+    if (r || !S_ISREG(statbuf.st_mode)) return 0;   /* No good... */
+    return (U64)statbuf.st_size;
+}
+
+#  define FUZ_rotl32(x,r) ((x << r) | (x >> (32 - r)))
+U32 FUZ_rand(U32* src)
+{
+    const U32 prime1 = 2654435761U;
+    const U32 prime2 = 2246822519U;
+    U32 rand32 = *src;
+    rand32 *= prime1;
+    rand32 += prime2;
+    rand32  = FUZ_rotl32(rand32, 13);
+    *src = rand32;
+    return rand32 >> 5;
+}
+
+
+/*********************************************************
+*  Bench functions
+*********************************************************/
+typedef struct {
+    size_t cSize;
+    U32 cSpeed;
+    U32 dSpeed;
+} BMK_result_t;
+
+typedef struct
+{
+    const char* srcPtr;
+    size_t srcSize;
+    char*  cPtr;
+    size_t cRoom;
+    size_t cSize;
+    char*  resPtr;
+    size_t resSize;
+} blockParam_t;
+
+
+#define MIN(a,b)  ( (a) < (b) ? (a) : (b) )
+
+static size_t BMK_benchParam(BMK_result_t* resultPtr,
+                             const void* srcBuffer, size_t srcSize,
+                             ZSTD_CCtx* ctx,
+                             const ZSTD_parameters params)
+{
+    const size_t blockSize = g_blockSize ? g_blockSize : srcSize;
+    const U32 nbBlocks = (U32) ((srcSize + (blockSize-1)) / blockSize);
+    blockParam_t* const blockTable = (blockParam_t*) malloc(nbBlocks * sizeof(blockParam_t));
+    const size_t maxCompressedSize = (size_t)nbBlocks * ZSTD_compressBound(blockSize);
+    void* const compressedBuffer = malloc(maxCompressedSize);
+    void* const resultBuffer = malloc(srcSize);
+    U32 Wlog = params.windowLog;
+    U32 Clog = params.contentLog;
+    U32 Hlog = params.hashLog;
+    U32 Slog = params.searchLog;
+    U32 Slength = params.searchLength;
+    ZSTD_strategy strat = params.strategy;
+    char name[30] = { 0 };
+    U64 crcOrig;
+
+    /* Memory allocation & restrictions */
+    snprintf(name, 30, "Sw%02uc%02uh%02us%02ul%1ut%1u", Wlog, Clog, Hlog, Slog, Slength, strat);
+    if (!compressedBuffer || !resultBuffer || !blockTable)
+    {
+        DISPLAY("\nError: not enough memory!\n");
+        free(compressedBuffer);
+        free(resultBuffer);
+        free(blockTable);
+        return 12;
+    }
+
+    /* Calculating input Checksum */
+    crcOrig = XXH64(srcBuffer, srcSize, 0);
+
+    /* Init blockTable data */
+    {
+        U32 i;
+        size_t remaining = srcSize;
+        const char* srcPtr = (const char*)srcBuffer;
+        char* cPtr = (char*)compressedBuffer;
+        char* resPtr = (char*)resultBuffer;
+        for (i=0; i<nbBlocks; i++)
+        {
+            size_t thisBlockSize = MIN(remaining, blockSize);
+            blockTable[i].srcPtr = srcPtr;
+            blockTable[i].cPtr = cPtr;
+            blockTable[i].resPtr = resPtr;
+            blockTable[i].srcSize = thisBlockSize;
+            blockTable[i].cRoom = ZSTD_compressBound(thisBlockSize);
+            srcPtr += thisBlockSize;
+            cPtr += blockTable[i].cRoom;
+            resPtr += thisBlockSize;
+            remaining -= thisBlockSize;
+        }
+    }
+
+    /* warmimg up memory */
+    RDG_genBuffer(compressedBuffer, maxCompressedSize, 0.10, 0.10, 1);
+
+    /* Bench */
+    {
+        U32 loopNb;
+        size_t cSize = 0;
+        double fastestC = 100000000., fastestD = 100000000.;
+        double ratio = 0.;
+        U64 crcCheck = 0;
+        const int startTime =BMK_GetMilliStart();
+
+        DISPLAY("\r%79s\r", "");
+        for (loopNb = 1; loopNb <= g_nbIterations; loopNb++)
+        {
+            int nbLoops;
+            int milliTime;
+            U32 blockNb;
+            const int totalTime = BMK_GetMilliSpan(startTime);
+
+            /* early break (slow params) */
+            if (totalTime > g_maxParamTime) break;
+
+            /* Compression */
+            DISPLAY("\r%1u-%s : %9u ->", loopNb, name, (U32)srcSize);
+            memset(compressedBuffer, 0xE5, maxCompressedSize);
+
+            nbLoops = 0;
+            milliTime = BMK_GetMilliStart();
+            while (BMK_GetMilliStart() == milliTime);
+            milliTime = BMK_GetMilliStart();
+            while (BMK_GetMilliSpan(milliTime) < TIMELOOP)
+            {
+                for (blockNb=0; blockNb<nbBlocks; blockNb++)
+                    blockTable[blockNb].cSize = ZSTD_compress_advanced(ctx,
+                                                    blockTable[blockNb].cPtr,  blockTable[blockNb].cRoom,
+                                                    blockTable[blockNb].srcPtr, blockTable[blockNb].srcSize,
+                                                    params);
+                nbLoops++;
+            }
+            milliTime = BMK_GetMilliSpan(milliTime);
+
+            cSize = 0;
+            for (blockNb=0; blockNb<nbBlocks; blockNb++)
+                cSize += blockTable[blockNb].cSize;
+            if ((double)milliTime < fastestC*nbLoops) fastestC = (double)milliTime / nbLoops;
+            ratio = (double)srcSize / (double)cSize;
+            DISPLAY("\r");
+            DISPLAY("%1u-%s : %9u ->", loopNb, name, (U32)srcSize);
+            DISPLAY(" %9u (%4.3f),%7.1f MB/s", (U32)cSize, ratio, (double)srcSize / fastestC / 1000.);
+            resultPtr->cSize = cSize;
+            resultPtr->cSpeed = (U32)((double)srcSize / fastestC);
+
+#if 1
+            /* Decompression */
+            memset(resultBuffer, 0xD6, srcSize);
+
+            nbLoops = 0;
+            milliTime = BMK_GetMilliStart();
+            while (BMK_GetMilliStart() == milliTime);
+            milliTime = BMK_GetMilliStart();
+            for ( ; BMK_GetMilliSpan(milliTime) < TIMELOOP; nbLoops++)
+            {
+                for (blockNb=0; blockNb<nbBlocks; blockNb++)
+                    blockTable[blockNb].resSize = ZSTD_decompress(blockTable[blockNb].resPtr, blockTable[blockNb].srcSize,
+                                                                  blockTable[blockNb].cPtr, blockTable[blockNb].cSize);
+            }
+            milliTime = BMK_GetMilliSpan(milliTime);
+
+            if ((double)milliTime < fastestD*nbLoops) fastestD = (double)milliTime / nbLoops;
+            DISPLAY("\r");
+            DISPLAY("%1u-%s : %9u -> ", loopNb, name, (U32)srcSize);
+            DISPLAY("%9u (%4.3f),%7.1f MB/s, ", (U32)cSize, ratio, (double)srcSize / fastestC / 1000.);
+            DISPLAY("%7.1f MB/s", (double)srcSize / fastestD / 1000.);
+            resultPtr->dSpeed = (U32)((double)srcSize / fastestD);
+
+            /* CRC Checking */
+            crcCheck = XXH64(resultBuffer, srcSize, 0);
+            if (crcOrig!=crcCheck)
+            {
+                unsigned u;
+                unsigned eBlockSize = (unsigned)(MIN(65536*2, blockSize));
+                DISPLAY("\n!!! WARNING !!! Invalid Checksum : %x != %x\n", (unsigned)crcOrig, (unsigned)crcCheck);
+                for (u=0; u<srcSize; u++)
+                {
+                    if (((const BYTE*)srcBuffer)[u] != ((BYTE*)resultBuffer)[u])
+                    {
+                        printf("Decoding error at pos %u (block %u, pos %u) \n", u, u / eBlockSize, u % eBlockSize);
+                        break;
+                    }
+                }
+                break;
+            }
+#endif
+        }
+    }
+
+    /* End cleaning */
+    DISPLAY("\r");
+    free(compressedBuffer);
+    free(resultBuffer);
+    return 0;
+}
+
+
+const char* g_stratName[] = { "ZSTD_fast   ",
+                              "ZSTD_greedy ",
+                              "ZSTD_lazy   ",
+                              "ZSTD_lazy2  ",
+                              "ZSTD_btlazy2" };
+
+static void BMK_printWinner(FILE* f, U32 cLevel, BMK_result_t result, ZSTD_parameters params, size_t srcSize)
+{
+    DISPLAY("\r%79s\r", "");
+    fprintf(f,"    {%3u,%3u,%3u,%3u,%3u,%3u, %s },  ",
+            0, params.windowLog, params.contentLog, params.hashLog, params.searchLog, params.searchLength,
+            g_stratName[(U32)(params.strategy)]);
+    fprintf(f,
+            "/* level %2u */   /* R:%5.3f at %5.1f MB/s - %5.1f MB/s */\n",
+            cLevel, (double)srcSize / result.cSize, (double)result.cSpeed / 1000., (double)result.dSpeed / 1000.);
+}
+
+
+static U32 g_cSpeedTarget[ZSTD_MAX_CLEVEL+1] = { 0 };
+
+typedef struct {
+    BMK_result_t result;
+    ZSTD_parameters params;
+} winnerInfo_t;
+
+static void BMK_printWinners2(FILE* f, const winnerInfo_t* winners, size_t srcSize)
+{
+    int cLevel;
+
+    fprintf(f, "\n /* Proposed configurations : */ \n");
+    fprintf(f, "#define ZSTD_MAX_CLEVEL %2u \n", ZSTD_MAX_CLEVEL);
+    fprintf(f, "static const ZSTD_parameters ZSTD_defaultParameters[ZSTD_MAX_CLEVEL+1] = {\n");
+    fprintf(f, "    /* l,  W,  C,  H,  S,  L, strat */ \n");
+
+    for (cLevel=0; cLevel <= ZSTD_MAX_CLEVEL; cLevel++)
+        BMK_printWinner(f, cLevel, winners[cLevel].result, winners[cLevel].params, srcSize);
+}
+
+
+static void BMK_printWinners(FILE* f, const winnerInfo_t* winners, size_t srcSize)
+{
+    fseek(f, 0, SEEK_SET);
+    BMK_printWinners2(f, winners, srcSize);
+    fflush(f);
+    BMK_printWinners2(stdout, winners, srcSize);
+}
+
+
+static int BMK_seed(winnerInfo_t* winners, const ZSTD_parameters params,
+              const void* srcBuffer, size_t srcSize,
+                    ZSTD_CCtx* ctx)
+{
+    BMK_result_t testResult;
+    int better = 0;
+    int cLevel;
+
+    BMK_benchParam(&testResult, srcBuffer, srcSize, ctx, params);
+
+    for (cLevel = 1; cLevel <= ZSTD_MAX_CLEVEL; cLevel++)
+    {
+        if (testResult.cSpeed < g_cSpeedTarget[cLevel])
+            continue;   /* not fast enough for this level */
+        if (winners[cLevel].result.cSize==0)
+        {
+            /* first solution for this cLevel */
+            winners[cLevel].result = testResult;
+            winners[cLevel].params = params;
+            BMK_printWinner(stdout, cLevel, testResult, params, srcSize);
+            better = 1;
+            continue;
+        }
+
+        if ((double)testResult.cSize <= ((double)winners[cLevel].result.cSize * (1. + (0.02 / cLevel))) )
+        {
+            /* Validate solution is "good enough" */
+            double W_ratio = (double)srcSize / testResult.cSize;
+            double O_ratio = (double)srcSize / winners[cLevel].result.cSize;
+            double W_ratioNote = log (W_ratio);
+            double O_ratioNote = log (O_ratio);
+            size_t W_DMemUsed = (1 << params.windowLog) + (16 KB);
+            size_t O_DMemUsed = (1 << winners[cLevel].params.windowLog) + (16 KB);
+            double W_DMemUsed_note = W_ratioNote * ( 40 + 9*cLevel) - log((double)W_DMemUsed);
+            double O_DMemUsed_note = O_ratioNote * ( 40 + 9*cLevel) - log((double)O_DMemUsed);
+
+            size_t W_CMemUsed = (1 << params.windowLog) + 4 * (1 << params.hashLog) +
+                                ((params.strategy==ZSTD_fast) ? 0 : 4 * (1 << params.contentLog));
+            size_t O_CMemUsed = (1 << winners[cLevel].params.windowLog) + 4 * (1 << winners[cLevel].params.hashLog) +
+                                ((winners[cLevel].params.strategy==ZSTD_fast) ? 0 :  4 * (1 << winners[cLevel].params.contentLog));
+            double W_CMemUsed_note = W_ratioNote * ( 50 + 13*cLevel) - log((double)W_CMemUsed);
+            double O_CMemUsed_note = O_ratioNote * ( 50 + 13*cLevel) - log((double)O_CMemUsed);
+
+            double W_CSpeed_note = W_ratioNote * ( 30 + 10*cLevel) + log((double)testResult.cSpeed);
+            double O_CSpeed_note = O_ratioNote * ( 30 + 10*cLevel) + log((double)winners[cLevel].result.cSpeed);
+
+            double W_DSpeed_note = W_ratioNote * ( 20 + 2*cLevel) + log((double)testResult.dSpeed);
+            double O_DSpeed_note = O_ratioNote * ( 20 + 2*cLevel) + log((double)winners[cLevel].result.dSpeed);
+
+
+            if (W_DMemUsed_note < O_DMemUsed_note)
+            {
+                /* uses too much Decompression memory for too little benefit */
+                if (W_ratio > O_ratio)
+                DISPLAY ("Decompression Memory : %5.3f @ %4.1f MB  vs  %5.3f @ %4.1f MB   : not enough for level %i\n",
+                         W_ratio, (double)(W_DMemUsed) / 1024 / 1024,
+                         O_ratio, (double)(O_DMemUsed) / 1024 / 1024,   cLevel);
+                continue;
+            }
+            if (W_CMemUsed_note < O_CMemUsed_note)
+            {
+                /* uses too much memory for compression for too little benefit */
+                if (W_ratio > O_ratio)
+                DISPLAY ("Compression Memory : %5.3f @ %4.1f MB  vs  %5.3f @ %4.1f MB   : not enough for level %i\n",
+                         W_ratio, (double)(W_CMemUsed) / 1024 / 1024,
+                         O_ratio, (double)(O_CMemUsed) / 1024 / 1024,   cLevel);
+                continue;
+            }
+            if (W_CSpeed_note   < O_CSpeed_note  )
+            {
+                /* too large compression speed difference for the compression benefit */
+                if (W_ratio > O_ratio)
+                DISPLAY ("Compression Speed : %5.3f @ %4.1f MB/s  vs  %5.3f @ %4.1f MB/s   : not enough for level %i\n",
+                         W_ratio, (double)(testResult.cSpeed) / 1000.,
+                         O_ratio, (double)(winners[cLevel].result.cSpeed) / 1000.,   cLevel);
+                continue;
+            }
+            if (W_DSpeed_note   < O_DSpeed_note  )
+            {
+                /* too large decompression speed difference for the compression benefit */
+                if (W_ratio > O_ratio)
+                DISPLAY ("Decompression Speed : %5.3f @ %4.1f MB/s  vs  %5.3f @ %4.1f MB/s   : not enough for level %i\n",
+                         W_ratio, (double)(testResult.dSpeed) / 1000.,
+                         O_ratio, (double)(winners[cLevel].result.dSpeed) / 1000.,   cLevel);
+                continue;
+            }
+
+            if (W_ratio < O_ratio)
+                DISPLAY("Solution %4.3f selected over %4.3f at level %i, due to better secondary statistics \n", W_ratio, O_ratio, cLevel);
+
+            winners[cLevel].result = testResult;
+            winners[cLevel].params = params;
+            BMK_printWinner(stdout, cLevel, testResult, params, srcSize);
+
+            better = 1;
+        }
+
+    }
+
+    return better;
+}
+
+
+/* nullified useless params, to ensure count stats */
+static ZSTD_parameters* sanitizeParams(ZSTD_parameters params)
+{
+    g_params = params;
+    if (params.strategy == ZSTD_fast)
+    {
+        g_params.contentLog = 0;
+        g_params.searchLog = 0;
+    }
+    return &g_params;
+}
+
+
+static void paramVariation(ZSTD_parameters* p)
+{
+    U32 nbChanges = (FUZ_rand(&g_rand) & 3) + 1;
+    for (; nbChanges; nbChanges--)
+    {
+        const U32 changeID = FUZ_rand(&g_rand) % 12;
+        switch(changeID)
+        {
+        case 0:
+            p->contentLog++; break;
+        case 1:
+            p->contentLog--; break;
+        case 2:
+            p->hashLog++; break;
+        case 3:
+            p->hashLog--; break;
+        case 4:
+            p->searchLog++; break;
+        case 5:
+            p->searchLog--; break;
+        case 6:
+            p->windowLog++; break;
+        case 7:
+            p->windowLog--; break;
+        case 8:
+            p->searchLength++; break;
+        case 9:
+            p->searchLength--; break;
+        case 10:
+            p->strategy = (ZSTD_strategy)(((U32)p->strategy)+1); break;
+        case 11:
+            p->strategy = (ZSTD_strategy)(((U32)p->strategy)-1); break;
+        }
+    }
+    ZSTD_validateParams(p);
+}
+
+
+#define PARAMTABLELOG   25
+#define PARAMTABLESIZE (1<<PARAMTABLELOG)
+#define PARAMTABLEMASK (PARAMTABLESIZE-1)
+static BYTE g_alreadyTested[PARAMTABLESIZE] = {0};   /* init to zero */
+
+#define NB_TESTS_PLAYED(p) \
+    g_alreadyTested[(XXH64(sanitizeParams(p), sizeof(p), 0) >> 3) & PARAMTABLEMASK]
+
+
+#define MAX(a,b)   ( (a) > (b) ? (a) : (b) )
+
+static void playAround(FILE* f, winnerInfo_t* winners,
+                       ZSTD_parameters params,
+                       const void* srcBuffer, size_t srcSize,
+                       ZSTD_CCtx* ctx)
+{
+    int nbVariations = 0;
+    const int startTime = BMK_GetMilliStart();
+
+    while (BMK_GetMilliSpan(startTime) < g_maxVariationTime)
+    {
+        ZSTD_parameters p = params;
+
+        if (nbVariations++ > g_maxNbVariations) break;
+        paramVariation(&p);
+
+        /* exclude faster if already played params */
+        if (FUZ_rand(&g_rand) & ((1 << NB_TESTS_PLAYED(p))-1))
+            continue;
+
+        /* test */
+        NB_TESTS_PLAYED(p)++;
+        if (!BMK_seed(winners, p, srcBuffer, srcSize, ctx)) continue;
+
+        /* improvement found => search more */
+        BMK_printWinners(f, winners, srcSize);
+        playAround(f, winners, p, srcBuffer, srcSize, ctx);
+    }
+
+}
+
+
+static void potentialRandomParams(ZSTD_parameters* p, U32 inverseChance)
+{
+    U32 chance = (FUZ_rand(&g_rand) % (inverseChance+1));
+    if (!chance)
+    {
+        /* totally random entry */
+        p->contentLog = FUZ_rand(&g_rand) % (ZSTD_CONTENTLOG_MAX+1 - ZSTD_CONTENTLOG_MIN) + ZSTD_CONTENTLOG_MIN;
+        p->hashLog    = FUZ_rand(&g_rand) % (ZSTD_HASHLOG_MAX+1 - ZSTD_HASHLOG_MIN) + ZSTD_HASHLOG_MIN;
+        p->searchLog  = FUZ_rand(&g_rand) % (ZSTD_SEARCHLOG_MAX+1 - ZSTD_SEARCHLOG_MIN) + ZSTD_SEARCHLOG_MIN;
+        p->windowLog  = FUZ_rand(&g_rand) % (ZSTD_WINDOWLOG_MAX+1 - ZSTD_WINDOWLOG_MIN) + ZSTD_WINDOWLOG_MIN;
+        p->searchLength=FUZ_rand(&g_rand) % (ZSTD_SEARCHLENGTH_MAX+1 - ZSTD_SEARCHLENGTH_MIN) + ZSTD_SEARCHLENGTH_MIN;
+        p->strategy   = (ZSTD_strategy) (FUZ_rand(&g_rand) % (ZSTD_btlazy2+1));
+        ZSTD_validateParams(p);
+    }
+}
+
+static void BMK_selectRandomStart(
+                       FILE* f, winnerInfo_t* winners,
+                       const void* srcBuffer, size_t srcSize,
+                       ZSTD_CCtx* ctx)
+{
+    U32 id = (FUZ_rand(&g_rand) % (ZSTD_MAX_CLEVEL+1));
+    if ((id==0) || (winners[id].params.windowLog==0))
+    {
+        /* totally random entry */
+        ZSTD_parameters p;
+        potentialRandomParams(&p, 1);
+        playAround(f, winners, p, srcBuffer, srcSize, ctx);
+    }
+    else
+        playAround(f, winners, winners[id].params, srcBuffer, srcSize, ctx);
+}
+
+
+static void BMK_benchMem(void* srcBuffer, size_t srcSize)
+{
+    ZSTD_CCtx* ctx = ZSTD_createCCtx();
+    ZSTD_parameters params;
+    winnerInfo_t winners[ZSTD_MAX_CLEVEL+1];
+    int i;
+    const char* rfName = "grillResults.txt";
+    FILE* f;
+    const size_t blockSize = g_blockSize ? g_blockSize : srcSize;
+
+    if (g_singleRun)
+    {
+        BMK_result_t testResult;
+        g_params.srcSize = blockSize;
+        ZSTD_validateParams(&g_params);
+        BMK_benchParam(&testResult, srcBuffer, srcSize, ctx, g_params);
+        DISPLAY("\n");
+        return;
+    }
+
+    /* init */
+    memset(winners, 0, sizeof(winners));
+    f = fopen(rfName, "w");
+    if (f==NULL) { DISPLAY("error opening %s \n", rfName); exit(1); }
+
+    if (g_target)
+        g_cSpeedTarget[1] = g_target * 1000;
+    else
+    {
+        /* baseline config for level 1 */
+        BMK_result_t testResult;
+        params = ZSTD_getParams(1, blockSize);
+        BMK_benchParam(&testResult, srcBuffer, srcSize, ctx, params);
+        g_cSpeedTarget[1] = (testResult.cSpeed * 31) >> 5;
+    }
+
+    /* establish speed objectives (relative to level 1) */
+    for (i=2; i<=ZSTD_MAX_CLEVEL; i++)
+        g_cSpeedTarget[i] = (g_cSpeedTarget[i-1] * 25) >> 5;
+
+    /* populate initial solution */
+    {
+        const int maxSeeds = g_noSeed ? 1 : ZSTD_MAX_CLEVEL;
+        for (i=1; i<=maxSeeds; i++)
+        {
+            params = ZSTD_getParams(i, blockSize);
+            BMK_seed(winners, params, srcBuffer, srcSize, ctx);
+        }
+    }
+    BMK_printWinners(f, winners, srcSize);
+
+    /* start tests */
+    {
+        const int milliStart = BMK_GetMilliStart();
+        do
+        {
+            BMK_selectRandomStart(f, winners, srcBuffer, srcSize, ctx);
+        } while (BMK_GetMilliSpan(milliStart) < g_grillDuration);
+    }
+
+    /* end summary */
+    BMK_printWinners(f, winners, srcSize);
+    DISPLAY("grillParams operations completed \n");
+
+    /* clean up*/
+    fclose(f);
+    ZSTD_freeCCtx(ctx);
+}
+
+
+static int benchSample(void)
+{
+    char* origBuff;
+    size_t benchedSize = sampleSize;
+    const char* name = "Sample 10MiB";
+
+    /* Allocation */
+    origBuff = (char*) malloc((size_t)benchedSize);
+    if(!origBuff)
+    {
+        DISPLAY("\nError: not enough memory!\n");
+        return 12;
+    }
+
+    /* Fill buffer */
+    RDG_genBuffer(origBuff, benchedSize, g_compressibility, 0.0, 0);
+
+    /* bench */
+    DISPLAY("\r%79s\r", "");
+    DISPLAY("using %s %i%%: \n", name, (int)(g_compressibility*100));
+    BMK_benchMem(origBuff, benchedSize);
+
+    free(origBuff);
+    return 0;
+}
+
+
+int benchFiles(char** fileNamesTable, int nbFiles)
+{
+    int fileIdx=0;
+
+    /* Loop for each file */
+    while (fileIdx<nbFiles)
+    {
+        FILE* inFile;
+        char* inFileName;
+        U64   inFileSize;
+        size_t benchedSize;
+        size_t readSize;
+        char* origBuff;
+
+        /* Check file existence */
+        inFileName = fileNamesTable[fileIdx++];
+        inFile = fopen( inFileName, "rb" );
+        if (inFile==NULL)
+        {
+            DISPLAY( "Pb opening %s\n", inFileName);
+            return 11;
+        }
+
+        /* Memory allocation & restrictions */
+        inFileSize = BMK_GetFileSize(inFileName);
+        benchedSize = (size_t) BMK_findMaxMem(inFileSize*3) / 3;
+        if ((U64)benchedSize > inFileSize) benchedSize = (size_t)inFileSize;
+        if (benchedSize < inFileSize)
+        {
+            DISPLAY("Not enough memory for '%s' full size; testing %i MB only...\n", inFileName, (int)(benchedSize>>20));
+        }
+
+        /* Alloc */
+        origBuff = (char*) malloc((size_t)benchedSize);
+        if(!origBuff)
+        {
+            DISPLAY("\nError: not enough memory!\n");
+            fclose(inFile);
+            return 12;
+        }
+
+        /* Fill input buffer */
+        DISPLAY("Loading %s...       \r", inFileName);
+        readSize = fread(origBuff, 1, benchedSize, inFile);
+        fclose(inFile);
+
+        if(readSize != benchedSize)
+        {
+            DISPLAY("\nError: problem reading file '%s' !!    \n", inFileName);
+            free(origBuff);
+            return 13;
+        }
+
+        /* bench */
+        DISPLAY("\r%79s\r", "");
+        DISPLAY("using %s : \n", inFileName);
+        BMK_benchMem(origBuff, benchedSize);
+    }
+
+    return 0;
+}
+
+
+int optimizeForSize(char* inFileName)
+{
+    FILE* inFile;
+    U64   inFileSize;
+    size_t benchedSize;
+    size_t readSize;
+    char* origBuff;
+
+    /* Check file existence */
+    inFile = fopen( inFileName, "rb" );
+    if (inFile==NULL)
+    {
+        DISPLAY( "Pb opening %s\n", inFileName);
+        return 11;
+    }
+
+    /* Memory allocation & restrictions */
+    inFileSize = BMK_GetFileSize(inFileName);
+    benchedSize = (size_t) BMK_findMaxMem(inFileSize*3) / 3;
+    if ((U64)benchedSize > inFileSize) benchedSize = (size_t)inFileSize;
+    if (benchedSize < inFileSize)
+    {
+        DISPLAY("Not enough memory for '%s' full size; testing %i MB only...\n", inFileName, (int)(benchedSize>>20));
+    }
+
+    /* Alloc */
+    origBuff = (char*) malloc((size_t)benchedSize);
+    if(!origBuff)
+    {
+        DISPLAY("\nError: not enough memory!\n");
+        fclose(inFile);
+        return 12;
+    }
+
+    /* Fill input buffer */
+    DISPLAY("Loading %s...       \r", inFileName);
+    readSize = fread(origBuff, 1, benchedSize, inFile);
+    fclose(inFile);
+
+    if(readSize != benchedSize)
+    {
+        DISPLAY("\nError: problem reading file '%s' !!    \n", inFileName);
+        free(origBuff);
+        return 13;
+    }
+
+    /* bench */
+    DISPLAY("\r%79s\r", "");
+    DISPLAY("optimizing for %s : \n", inFileName);
+
+    {
+        ZSTD_CCtx* ctx = ZSTD_createCCtx();
+        ZSTD_parameters params;
+        winnerInfo_t winner;
+        BMK_result_t candidate;
+        const size_t blockSize = g_blockSize ? g_blockSize : benchedSize;
+        int i;
+
+        /* init */
+        memset(&winner, 0, sizeof(winner));
+        winner.result.cSize = (size_t)(-1);
+
+        /* find best solution from default params */
+        {
+            const int maxSeeds = g_noSeed ? 1 : ZSTD_MAX_CLEVEL;
+            for (i=1; i<=maxSeeds; i++)
+            {
+                params = ZSTD_getParams(i, blockSize);
+                BMK_benchParam(&candidate, origBuff, benchedSize, ctx, params);
+                if ( (candidate.cSize < winner.result.cSize)
+                   ||((candidate.cSize == winner.result.cSize) && (candidate.cSpeed > winner.result.cSpeed)) )
+                {
+                    winner.params = params;
+                    winner.result = candidate;
+                    BMK_printWinner(stdout, i, winner.result, winner.params, benchedSize);
+                }
+            }
+        }
+        BMK_printWinner(stdout, 99, winner.result, winner.params, benchedSize);
+
+        /* start tests */
+        {
+            const int milliStart = BMK_GetMilliStart();
+            do
+            {
+                params = winner.params;
+                paramVariation(&params);
+                potentialRandomParams(&params, 16);
+
+                /* exclude faster if already played set of params */
+                if (FUZ_rand(&g_rand) & ((1 << NB_TESTS_PLAYED(params))-1)) continue;
+
+                /* test */
+                NB_TESTS_PLAYED(params)++;
+                BMK_benchParam(&candidate, origBuff, benchedSize, ctx, params);
+
+                /* improvement found => new winner */
+                if ( (candidate.cSize < winner.result.cSize)
+                   ||((candidate.cSize == winner.result.cSize) && (candidate.cSpeed > winner.result.cSpeed)) )
+                {
+                    winner.params = params;
+                    winner.result = candidate;
+                    BMK_printWinner(stdout, 99, winner.result, winner.params, benchedSize);
+                }
+
+            } while (BMK_GetMilliSpan(milliStart) < g_grillDuration);
+        }
+
+        /* end summary */
+        BMK_printWinner(stdout, 99, winner.result, winner.params, benchedSize);
+        DISPLAY("grillParams size - optimizer completed \n");
+
+        /* clean up*/
+        ZSTD_freeCCtx(ctx);
+    }
+
+    return 0;
+}
+
+
+int usage(char* exename)
+{
+    DISPLAY( "Usage :\n");
+    DISPLAY( "      %s [arg] file\n", exename);
+    DISPLAY( "Arguments :\n");
+    DISPLAY( " file : path to the file used as reference (if none, generates a compressible sample)\n");
+    DISPLAY( " -H/-h  : Help (this text + advanced options)\n");
+    return 0;
+}
+
+int usage_advanced(void)
+{
+    DISPLAY( "\nAdvanced options :\n");
+    DISPLAY( " -i#    : iteration loops [1-9](default : %i)\n", NBLOOPS);
+    DISPLAY( " -B#    : cut input into blocks of size # (default : single block)\n");
+    DISPLAY( " -P#    : generated sample compressibility (default : %.1f%%)\n", COMPRESSIBILITY_DEFAULT * 100);
+    return 0;
+}
+
+int badusage(char* exename)
+{
+    DISPLAY("Wrong parameters\n");
+    usage(exename);
+    return 1;
+}
+
+int main(int argc, char** argv)
+{
+    int i,
+        filenamesStart=0,
+        result;
+    char* exename=argv[0];
+    char* input_filename=0;
+    U32 optimizer = 0;
+    U32 main_pause = 0;
+
+    /* Welcome message */
+    DISPLAY(WELCOME_MESSAGE);
+
+    if (argc<1) { badusage(exename); return 1; }
+
+    for(i=1; i<argc; i++)
+    {
+        char* argument = argv[i];
+
+        if(!argument) continue;   /* Protection if argument empty */
+
+        if(!strcmp(argument,"--no-seed")) { g_noSeed = 1; continue; }
+
+        /* Decode command (note : aggregated commands are allowed) */
+        if (argument[0]=='-')
+        {
+            argument++;
+
+            while (argument[0]!=0)
+            {
+
+                switch(argument[0])
+                {
+                    /* Display help on usage */
+                case 'h' :
+                case 'H': usage(exename); usage_advanced(); return 0;
+
+                    /* Pause at the end (hidden option) */
+                case 'p': main_pause = 1; argument++; break;
+
+                    /* Modify Nb Iterations */
+                case 'i':
+                    argument++;
+                    if ((argument[0] >='0') && (argument[0] <='9'))
+                        g_nbIterations = *argument++ - '0';
+                    break;
+
+                    /* Sample compressibility (when no file provided) */
+                case 'P':
+                    argument++;
+                    {
+                        U32 proba32 = 0;
+                        while ((argument[0]>= '0') && (argument[0]<= '9'))
+                        {
+                            proba32 *= 10;
+                            proba32 += argument[0] - '0';
+                            argument++;
+                        }
+                        g_compressibility = (double)proba32 / 100.;
+                    }
+                    break;
+
+                case 'O':
+                    argument++;
+                    optimizer=1;
+                    break;
+
+                    /* Run Single conf */
+                case 'S':
+                    g_singleRun = 1;
+                    argument++;
+                    g_params = ZSTD_getParams(2, g_blockSize);
+                    for ( ; ; )
+                    {
+                        switch(*argument)
+                        {
+                        case 'w':
+                            g_params.windowLog = 0;
+                            argument++;
+                            while ((*argument>= '0') && (*argument<='9'))
+                                g_params.windowLog *= 10, g_params.windowLog += *argument++ - '0';
+                            continue;
+                        case 'c':
+                            g_params.contentLog = 0;
+                            argument++;
+                            while ((*argument>= '0') && (*argument<='9'))
+                                g_params.contentLog *= 10, g_params.contentLog += *argument++ - '0';
+                            continue;
+                        case 'h':
+                            g_params.hashLog = 0;
+                            argument++;
+                            while ((*argument>= '0') && (*argument<='9'))
+                                g_params.hashLog *= 10, g_params.hashLog += *argument++ - '0';
+                            continue;
+                        case 's':
+                            g_params.searchLog = 0;
+                            argument++;
+                            while ((*argument>= '0') && (*argument<='9'))
+                                g_params.searchLog *= 10, g_params.searchLog += *argument++ - '0';
+                            continue;
+                        case 'l':  /* search length */
+                            g_params.searchLength = 0;
+                            argument++;
+                            while ((*argument>= '0') && (*argument<='9'))
+                                g_params.searchLength *= 10, g_params.searchLength += *argument++ - '0';
+                            continue;
+                        case 't':  /* strategy */
+                            g_params.strategy = (ZSTD_strategy)0;
+                            argument++;
+                            while ((*argument>= '0') && (*argument<='9'))
+                            {
+                                g_params.strategy = (ZSTD_strategy)((U32)g_params.strategy *10);
+                                g_params.strategy = (ZSTD_strategy)((U32)g_params.strategy + *argument++ - '0');
+                            }
+                            continue;
+                        case 'L':
+                            {
+                                int cLevel = 0;
+                                argument++;
+                                while ((*argument>= '0') && (*argument<='9'))
+                                    cLevel *= 10, cLevel += *argument++ - '0';
+                                g_params = ZSTD_getParams(cLevel, g_blockSize);
+                                continue;
+                            }
+                        default : ;
+                        }
+                        break;
+                    }
+                    break;
+
+                    /* target level1 speed objective, in MB/s */
+                case 'T':
+                    argument++;
+                    g_target = 0;
+                    while ((*argument >= '0') && (*argument <= '9'))
+                    {
+                        g_target *= 10;
+                        g_target += *argument - '0';
+                        argument++;
+                    }
+                    break;
+
+                    /* cut input into blocks */
+                case 'B':
+                    {
+                        g_blockSize = 0;
+                        argument++;
+                        while ((*argument >='0') && (*argument <='9'))
+                            g_blockSize *= 10, g_blockSize += *argument++ - '0';
+                        if (*argument=='K') g_blockSize<<=10, argument++;  /* allows using KB notation */
+                        if (*argument=='M') g_blockSize<<=20, argument++;
+                        if (*argument=='B') argument++;
+                        DISPLAY("using %u KB block size \n", g_blockSize>>10);
+                    }
+                    break;
+
+                    /* Unknown command */
+                default : return badusage(exename);
+                }
+            }
+            continue;
+        }
+
+        /* first provided filename is input */
+        if (!input_filename) { input_filename=argument; filenamesStart=i; continue; }
+    }
+
+    if (filenamesStart==0)
+        result = benchSample();
+    else
+    {
+        if (optimizer)
+            result = optimizeForSize(input_filename);
+        else
+            result = benchFiles(argv+filenamesStart, argc-filenamesStart);
+    }
+
+    if (main_pause) { int unused; printf("press enter...\n"); unused = getchar(); (void)unused; }
+
+    return result;
+}
+
diff --git a/programs/xxhash.c b/programs/xxhash.c
new file mode 100644
index 0000000..511d994
--- /dev/null
+++ b/programs/xxhash.c
@@ -0,0 +1,962 @@
+/*
+xxHash - Fast Hash algorithm
+Copyright (C) 2012-2015, Yann Collet
+
+BSD 2-Clause License (http://www.opensource.org/licenses/bsd-license.php)
+
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions are
+met:
+
+* Redistributions of source code must retain the above copyright
+notice, this list of conditions and the following disclaimer.
+* Redistributions in binary form must reproduce the above
+copyright notice, this list of conditions and the following disclaimer
+in the documentation and/or other materials provided with the
+distribution.
+
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+You can contact the author at :
+- xxHash source repository : https://github.com/Cyan4973/xxHash
+*/
+
+
+/**************************************
+*  Tuning parameters
+**************************************/
+/* XXH_FORCE_MEMORY_ACCESS
+ * By default, access to unaligned memory is controlled by `memcpy()`, which is safe and portable.
+ * Unfortunately, on some target/compiler combinations, the generated assembly is sub-optimal.
+ * The below switch allow to select different access method for improved performance.
+ * Method 0 (default) : use `memcpy()`. Safe and portable.
+ * Method 1 : `__packed` statement. It depends on compiler extension (ie, not portable).
+ *            This method is safe if your compiler supports it, and *generally* as fast or faster than `memcpy`.
+ * Method 2 : direct access. This method is portable but violate C standard.
+ *            It can generate buggy code on targets which generate assembly depending on alignment.
+ *            But in some circumstances, it's the only known way to get the most performance (ie GCC + ARMv6)
+ * See http://stackoverflow.com/a/32095106/646947 for details.
+ * Prefer these methods in priority order (0 > 1 > 2)
+ */
+#ifndef XXH_FORCE_MEMORY_ACCESS   /* can be defined externally, on command line for example */
+#  if defined(__GNUC__) && ( defined(__ARM_ARCH_6__) || defined(__ARM_ARCH_6J__) || defined(__ARM_ARCH_6K__) || defined(__ARM_ARCH_6Z__) || defined(__ARM_ARCH_6ZK__) || defined(__ARM_ARCH_6T2__) )
+#    define XXH_FORCE_MEMORY_ACCESS 2
+#  elif defined(__INTEL_COMPILER) || \
+  (defined(__GNUC__) && ( defined(__ARM_ARCH_7__) || defined(__ARM_ARCH_7A__) || defined(__ARM_ARCH_7R__) || defined(__ARM_ARCH_7M__) || defined(__ARM_ARCH_7S__) ))
+#    define XXH_FORCE_MEMORY_ACCESS 1
+#  endif
+#endif
+
+/* XXH_ACCEPT_NULL_INPUT_POINTER :
+ * If the input pointer is a null pointer, xxHash default behavior is to trigger a memory access error, since it is a bad pointer.
+ * When this option is enabled, xxHash output for null input pointers will be the same as a null-length input.
+ * By default, this option is disabled. To enable it, uncomment below define :
+ */
+/* #define XXH_ACCEPT_NULL_INPUT_POINTER 1 */
+
+/* XXH_FORCE_NATIVE_FORMAT :
+ * By default, xxHash library provides endian-independant Hash values, based on little-endian convention.
+ * Results are therefore identical for little-endian and big-endian CPU.
+ * This comes at a performance cost for big-endian CPU, since some swapping is required to emulate little-endian format.
+ * Should endian-independance be of no importance for your application, you may set the #define below to 1,
+ * to improve speed for Big-endian CPU.
+ * This option has no impact on Little_Endian CPU.
+ */
+#define XXH_FORCE_NATIVE_FORMAT 0
+
+/* XXH_USELESS_ALIGN_BRANCH :
+ * This is a minor performance trick, only useful with lots of very small keys.
+ * It means : don't make a test between aligned/unaligned, because performance will be the same.
+ * It saves one initial branch per hash.
+ */
+#if defined(__i386) || defined(_M_IX86) || defined(__x86_64__) || defined(_M_X64)
+#  define XXH_USELESS_ALIGN_BRANCH 1
+#endif
+
+
+/**************************************
+*  Compiler Specific Options
+***************************************/
+#ifdef _MSC_VER    /* Visual Studio */
+#  pragma warning(disable : 4127)      /* disable: C4127: conditional expression is constant */
+#  define FORCE_INLINE static __forceinline
+#else
+#  if defined (__STDC_VERSION__) && __STDC_VERSION__ >= 199901L   /* C99 */
+#    ifdef __GNUC__
+#      define FORCE_INLINE static inline __attribute__((always_inline))
+#    else
+#      define FORCE_INLINE static inline
+#    endif
+#  else
+#    define FORCE_INLINE static
+#  endif /* __STDC_VERSION__ */
+#endif
+
+
+/**************************************
+*  Includes & Memory related functions
+***************************************/
+#include "xxhash.h"
+/* Modify the local functions below should you wish to use some other memory routines */
+/* for malloc(), free() */
+#include <stdlib.h>
+static void* XXH_malloc(size_t s) { return malloc(s); }
+static void  XXH_free  (void* p)  { free(p); }
+/* for memcpy() */
+#include <string.h>
+static void* XXH_memcpy(void* dest, const void* src, size_t size) { return memcpy(dest,src,size); }
+
+
+/**************************************
+*  Basic Types
+***************************************/
+#if defined (__STDC_VERSION__) && __STDC_VERSION__ >= 199901L   /* C99 */
+# include <stdint.h>
+  typedef uint8_t  BYTE;
+  typedef uint16_t U16;
+  typedef uint32_t U32;
+  typedef  int32_t S32;
+  typedef uint64_t U64;
+#else
+  typedef unsigned char      BYTE;
+  typedef unsigned short     U16;
+  typedef unsigned int       U32;
+  typedef   signed int       S32;
+  typedef unsigned long long U64;
+#endif
+
+
+#if (defined(XXH_FORCE_MEMORY_ACCESS) && (XXH_FORCE_MEMORY_ACCESS==2))
+
+/* Force direct memory access. Only works on CPU which support unaligned memory access in hardware */
+static U32 XXH_read32(const void* memPtr) { return *(const U32*) memPtr; }
+static U64 XXH_read64(const void* memPtr) { return *(const U64*) memPtr; }
+
+#elif (defined(XXH_FORCE_MEMORY_ACCESS) && (XXH_FORCE_MEMORY_ACCESS==1))
+
+/* __pack instructions are safer, but compiler specific, hence potentially problematic for some compilers */
+/* currently only defined for gcc and icc */
+typedef union { U32 u32; U64 u64; } __attribute__((packed)) unalign;
+
+static U32 XXH_read32(const void* ptr) { return ((const unalign*)ptr)->u32; }
+static U64 XXH_read64(const void* ptr) { return ((const unalign*)ptr)->u64; }
+
+#else
+
+/* portable and safe solution. Generally efficient.
+ * see : http://stackoverflow.com/a/32095106/646947
+ */
+
+static U32 XXH_read32(const void* memPtr)
+{
+    U32 val;
+    memcpy(&val, memPtr, sizeof(val));
+    return val;
+}
+
+static U64 XXH_read64(const void* memPtr)
+{
+    U64 val;
+    memcpy(&val, memPtr, sizeof(val));
+    return val;
+}
+
+#endif // XXH_FORCE_DIRECT_MEMORY_ACCESS
+
+
+/******************************************
+*  Compiler-specific Functions and Macros
+******************************************/
+#define GCC_VERSION (__GNUC__ * 100 + __GNUC_MINOR__)
+
+/* Note : although _rotl exists for minGW (GCC under windows), performance seems poor */
+#if defined(_MSC_VER)
+#  define XXH_rotl32(x,r) _rotl(x,r)
+#  define XXH_rotl64(x,r) _rotl64(x,r)
+#else
+#  define XXH_rotl32(x,r) ((x << r) | (x >> (32 - r)))
+#  define XXH_rotl64(x,r) ((x << r) | (x >> (64 - r)))
+#endif
+
+#if defined(_MSC_VER)     /* Visual Studio */
+#  define XXH_swap32 _byteswap_ulong
+#  define XXH_swap64 _byteswap_uint64
+#elif GCC_VERSION >= 403
+#  define XXH_swap32 __builtin_bswap32
+#  define XXH_swap64 __builtin_bswap64
+#else
+static U32 XXH_swap32 (U32 x)
+{
+    return  ((x << 24) & 0xff000000 ) |
+            ((x <<  8) & 0x00ff0000 ) |
+            ((x >>  8) & 0x0000ff00 ) |
+            ((x >> 24) & 0x000000ff );
+}
+static U64 XXH_swap64 (U64 x)
+{
+    return  ((x << 56) & 0xff00000000000000ULL) |
+            ((x << 40) & 0x00ff000000000000ULL) |
+            ((x << 24) & 0x0000ff0000000000ULL) |
+            ((x << 8)  & 0x000000ff00000000ULL) |
+            ((x >> 8)  & 0x00000000ff000000ULL) |
+            ((x >> 24) & 0x0000000000ff0000ULL) |
+            ((x >> 40) & 0x000000000000ff00ULL) |
+            ((x >> 56) & 0x00000000000000ffULL);
+}
+#endif
+
+
+/***************************************
+*  Architecture Macros
+***************************************/
+typedef enum { XXH_bigEndian=0, XXH_littleEndian=1 } XXH_endianess;
+
+/* XXH_CPU_LITTLE_ENDIAN can be defined externally, for example one the compiler command line */
+#ifndef XXH_CPU_LITTLE_ENDIAN
+    static const int one = 1;
+#   define XXH_CPU_LITTLE_ENDIAN   (*(const char*)(&one))
+#endif
+
+
+/*****************************
+*  Memory reads
+*****************************/
+typedef enum { XXH_aligned, XXH_unaligned } XXH_alignment;
+
+FORCE_INLINE U32 XXH_readLE32_align(const void* ptr, XXH_endianess endian, XXH_alignment align)
+{
+    if (align==XXH_unaligned)
+        return endian==XXH_littleEndian ? XXH_read32(ptr) : XXH_swap32(XXH_read32(ptr));
+    else
+        return endian==XXH_littleEndian ? *(const U32*)ptr : XXH_swap32(*(const U32*)ptr);
+}
+
+FORCE_INLINE U32 XXH_readLE32(const void* ptr, XXH_endianess endian)
+{
+    return XXH_readLE32_align(ptr, endian, XXH_unaligned);
+}
+
+FORCE_INLINE U64 XXH_readLE64_align(const void* ptr, XXH_endianess endian, XXH_alignment align)
+{
+    if (align==XXH_unaligned)
+        return endian==XXH_littleEndian ? XXH_read64(ptr) : XXH_swap64(XXH_read64(ptr));
+    else
+        return endian==XXH_littleEndian ? *(const U64*)ptr : XXH_swap64(*(const U64*)ptr);
+}
+
+FORCE_INLINE U64 XXH_readLE64(const void* ptr, XXH_endianess endian)
+{
+    return XXH_readLE64_align(ptr, endian, XXH_unaligned);
+}
+
+
+/***************************************
+*  Macros
+***************************************/
+#define XXH_STATIC_ASSERT(c)   { enum { XXH_static_assert = 1/(!!(c)) }; }    /* use only *after* variable declarations */
+
+
+/***************************************
+*  Constants
+***************************************/
+#define PRIME32_1   2654435761U
+#define PRIME32_2   2246822519U
+#define PRIME32_3   3266489917U
+#define PRIME32_4    668265263U
+#define PRIME32_5    374761393U
+
+#define PRIME64_1 11400714785074694791ULL
+#define PRIME64_2 14029467366897019727ULL
+#define PRIME64_3  1609587929392839161ULL
+#define PRIME64_4  9650029242287828579ULL
+#define PRIME64_5  2870177450012600261ULL
+
+
+/*****************************
+*  Simple Hash Functions
+*****************************/
+FORCE_INLINE U32 XXH32_endian_align(const void* input, size_t len, U32 seed, XXH_endianess endian, XXH_alignment align)
+{
+    const BYTE* p = (const BYTE*)input;
+    const BYTE* bEnd = p + len;
+    U32 h32;
+#define XXH_get32bits(p) XXH_readLE32_align(p, endian, align)
+
+#ifdef XXH_ACCEPT_NULL_INPUT_POINTER
+    if (p==NULL)
+    {
+        len=0;
+        bEnd=p=(const BYTE*)(size_t)16;
+    }
+#endif
+
+    if (len>=16)
+    {
+        const BYTE* const limit = bEnd - 16;
+        U32 v1 = seed + PRIME32_1 + PRIME32_2;
+        U32 v2 = seed + PRIME32_2;
+        U32 v3 = seed + 0;
+        U32 v4 = seed - PRIME32_1;
+
+        do
+        {
+            v1 += XXH_get32bits(p) * PRIME32_2;
+            v1 = XXH_rotl32(v1, 13);
+            v1 *= PRIME32_1;
+            p+=4;
+            v2 += XXH_get32bits(p) * PRIME32_2;
+            v2 = XXH_rotl32(v2, 13);
+            v2 *= PRIME32_1;
+            p+=4;
+            v3 += XXH_get32bits(p) * PRIME32_2;
+            v3 = XXH_rotl32(v3, 13);
+            v3 *= PRIME32_1;
+            p+=4;
+            v4 += XXH_get32bits(p) * PRIME32_2;
+            v4 = XXH_rotl32(v4, 13);
+            v4 *= PRIME32_1;
+            p+=4;
+        }
+        while (p<=limit);
+
+        h32 = XXH_rotl32(v1, 1) + XXH_rotl32(v2, 7) + XXH_rotl32(v3, 12) + XXH_rotl32(v4, 18);
+    }
+    else
+    {
+        h32  = seed + PRIME32_5;
+    }
+
+    h32 += (U32) len;
+
+    while (p+4<=bEnd)
+    {
+        h32 += XXH_get32bits(p) * PRIME32_3;
+        h32  = XXH_rotl32(h32, 17) * PRIME32_4 ;
+        p+=4;
+    }
+
+    while (p<bEnd)
+    {
+        h32 += (*p) * PRIME32_5;
+        h32 = XXH_rotl32(h32, 11) * PRIME32_1 ;
+        p++;
+    }
+
+    h32 ^= h32 >> 15;
+    h32 *= PRIME32_2;
+    h32 ^= h32 >> 13;
+    h32 *= PRIME32_3;
+    h32 ^= h32 >> 16;
+
+    return h32;
+}
+
+
+unsigned int XXH32 (const void* input, size_t len, unsigned int seed)
+{
+#if 0
+    /* Simple version, good for code maintenance, but unfortunately slow for small inputs */
+    XXH32_state_t state;
+    XXH32_reset(&state, seed);
+    XXH32_update(&state, input, len);
+    return XXH32_digest(&state);
+#else
+    XXH_endianess endian_detected = (XXH_endianess)XXH_CPU_LITTLE_ENDIAN;
+
+#  if !defined(XXH_USELESS_ALIGN_BRANCH)
+    if ((((size_t)input) & 3) == 0)   /* Input is 4-bytes aligned, leverage the speed benefit */
+    {
+        if ((endian_detected==XXH_littleEndian) || XXH_FORCE_NATIVE_FORMAT)
+            return XXH32_endian_align(input, len, seed, XXH_littleEndian, XXH_aligned);
+        else
+            return XXH32_endian_align(input, len, seed, XXH_bigEndian, XXH_aligned);
+    }
+#  endif
+
+    if ((endian_detected==XXH_littleEndian) || XXH_FORCE_NATIVE_FORMAT)
+        return XXH32_endian_align(input, len, seed, XXH_littleEndian, XXH_unaligned);
+    else
+        return XXH32_endian_align(input, len, seed, XXH_bigEndian, XXH_unaligned);
+#endif
+}
+
+FORCE_INLINE U64 XXH64_endian_align(const void* input, size_t len, U64 seed, XXH_endianess endian, XXH_alignment align)
+{
+    const BYTE* p = (const BYTE*)input;
+    const BYTE* bEnd = p + len;
+    U64 h64;
+#define XXH_get64bits(p) XXH_readLE64_align(p, endian, align)
+
+#ifdef XXH_ACCEPT_NULL_INPUT_POINTER
+    if (p==NULL)
+    {
+        len=0;
+        bEnd=p=(const BYTE*)(size_t)32;
+    }
+#endif
+
+    if (len>=32)
+    {
+        const BYTE* const limit = bEnd - 32;
+        U64 v1 = seed + PRIME64_1 + PRIME64_2;
+        U64 v2 = seed + PRIME64_2;
+        U64 v3 = seed + 0;
+        U64 v4 = seed - PRIME64_1;
+
+        do
+        {
+            v1 += XXH_get64bits(p) * PRIME64_2;
+            p+=8;
+            v1 = XXH_rotl64(v1, 31);
+            v1 *= PRIME64_1;
+            v2 += XXH_get64bits(p) * PRIME64_2;
+            p+=8;
+            v2 = XXH_rotl64(v2, 31);
+            v2 *= PRIME64_1;
+            v3 += XXH_get64bits(p) * PRIME64_2;
+            p+=8;
+            v3 = XXH_rotl64(v3, 31);
+            v3 *= PRIME64_1;
+            v4 += XXH_get64bits(p) * PRIME64_2;
+            p+=8;
+            v4 = XXH_rotl64(v4, 31);
+            v4 *= PRIME64_1;
+        }
+        while (p<=limit);
+
+        h64 = XXH_rotl64(v1, 1) + XXH_rotl64(v2, 7) + XXH_rotl64(v3, 12) + XXH_rotl64(v4, 18);
+
+        v1 *= PRIME64_2;
+        v1 = XXH_rotl64(v1, 31);
+        v1 *= PRIME64_1;
+        h64 ^= v1;
+        h64 = h64 * PRIME64_1 + PRIME64_4;
+
+        v2 *= PRIME64_2;
+        v2 = XXH_rotl64(v2, 31);
+        v2 *= PRIME64_1;
+        h64 ^= v2;
+        h64 = h64 * PRIME64_1 + PRIME64_4;
+
+        v3 *= PRIME64_2;
+        v3 = XXH_rotl64(v3, 31);
+        v3 *= PRIME64_1;
+        h64 ^= v3;
+        h64 = h64 * PRIME64_1 + PRIME64_4;
+
+        v4 *= PRIME64_2;
+        v4 = XXH_rotl64(v4, 31);
+        v4 *= PRIME64_1;
+        h64 ^= v4;
+        h64 = h64 * PRIME64_1 + PRIME64_4;
+    }
+    else
+    {
+        h64  = seed + PRIME64_5;
+    }
+
+    h64 += (U64) len;
+
+    while (p+8<=bEnd)
+    {
+        U64 k1 = XXH_get64bits(p);
+        k1 *= PRIME64_2;
+        k1 = XXH_rotl64(k1,31);
+        k1 *= PRIME64_1;
+        h64 ^= k1;
+        h64 = XXH_rotl64(h64,27) * PRIME64_1 + PRIME64_4;
+        p+=8;
+    }
+
+    if (p+4<=bEnd)
+    {
+        h64 ^= (U64)(XXH_get32bits(p)) * PRIME64_1;
+        h64 = XXH_rotl64(h64, 23) * PRIME64_2 + PRIME64_3;
+        p+=4;
+    }
+
+    while (p<bEnd)
+    {
+        h64 ^= (*p) * PRIME64_5;
+        h64 = XXH_rotl64(h64, 11) * PRIME64_1;
+        p++;
+    }
+
+    h64 ^= h64 >> 33;
+    h64 *= PRIME64_2;
+    h64 ^= h64 >> 29;
+    h64 *= PRIME64_3;
+    h64 ^= h64 >> 32;
+
+    return h64;
+}
+
+
+unsigned long long XXH64 (const void* input, size_t len, unsigned long long seed)
+{
+#if 0
+    /* Simple version, good for code maintenance, but unfortunately slow for small inputs */
+    XXH64_state_t state;
+    XXH64_reset(&state, seed);
+    XXH64_update(&state, input, len);
+    return XXH64_digest(&state);
+#else
+    XXH_endianess endian_detected = (XXH_endianess)XXH_CPU_LITTLE_ENDIAN;
+
+#  if !defined(XXH_USELESS_ALIGN_BRANCH)
+    if ((((size_t)input) & 7)==0)   /* Input is aligned, let's leverage the speed advantage */
+    {
+        if ((endian_detected==XXH_littleEndian) || XXH_FORCE_NATIVE_FORMAT)
+            return XXH64_endian_align(input, len, seed, XXH_littleEndian, XXH_aligned);
+        else
+            return XXH64_endian_align(input, len, seed, XXH_bigEndian, XXH_aligned);
+    }
+#  endif
+
+    if ((endian_detected==XXH_littleEndian) || XXH_FORCE_NATIVE_FORMAT)
+        return XXH64_endian_align(input, len, seed, XXH_littleEndian, XXH_unaligned);
+    else
+        return XXH64_endian_align(input, len, seed, XXH_bigEndian, XXH_unaligned);
+#endif
+}
+
+/****************************************************
+*  Advanced Hash Functions
+****************************************************/
+
+/*** Allocation ***/
+typedef struct
+{
+    U64 total_len;
+    U32 seed;
+    U32 v1;
+    U32 v2;
+    U32 v3;
+    U32 v4;
+    U32 mem32[4];   /* defined as U32 for alignment */
+    U32 memsize;
+} XXH_istate32_t;
+
+typedef struct
+{
+    U64 total_len;
+    U64 seed;
+    U64 v1;
+    U64 v2;
+    U64 v3;
+    U64 v4;
+    U64 mem64[4];   /* defined as U64 for alignment */
+    U32 memsize;
+} XXH_istate64_t;
+
+
+XXH32_state_t* XXH32_createState(void)
+{
+    XXH_STATIC_ASSERT(sizeof(XXH32_state_t) >= sizeof(XXH_istate32_t));   /* A compilation error here means XXH32_state_t is not large enough */
+    return (XXH32_state_t*)XXH_malloc(sizeof(XXH32_state_t));
+}
+XXH_errorcode XXH32_freeState(XXH32_state_t* statePtr)
+{
+    XXH_free(statePtr);
+    return XXH_OK;
+}
+
+XXH64_state_t* XXH64_createState(void)
+{
+    XXH_STATIC_ASSERT(sizeof(XXH64_state_t) >= sizeof(XXH_istate64_t));   /* A compilation error here means XXH64_state_t is not large enough */
+    return (XXH64_state_t*)XXH_malloc(sizeof(XXH64_state_t));
+}
+XXH_errorcode XXH64_freeState(XXH64_state_t* statePtr)
+{
+    XXH_free(statePtr);
+    return XXH_OK;
+}
+
+
+/*** Hash feed ***/
+
+XXH_errorcode XXH32_reset(XXH32_state_t* state_in, unsigned int seed)
+{
+    XXH_istate32_t* state = (XXH_istate32_t*) state_in;
+    state->seed = seed;
+    state->v1 = seed + PRIME32_1 + PRIME32_2;
+    state->v2 = seed + PRIME32_2;
+    state->v3 = seed + 0;
+    state->v4 = seed - PRIME32_1;
+    state->total_len = 0;
+    state->memsize = 0;
+    return XXH_OK;
+}
+
+XXH_errorcode XXH64_reset(XXH64_state_t* state_in, unsigned long long seed)
+{
+    XXH_istate64_t* state = (XXH_istate64_t*) state_in;
+    state->seed = seed;
+    state->v1 = seed + PRIME64_1 + PRIME64_2;
+    state->v2 = seed + PRIME64_2;
+    state->v3 = seed + 0;
+    state->v4 = seed - PRIME64_1;
+    state->total_len = 0;
+    state->memsize = 0;
+    return XXH_OK;
+}
+
+
+FORCE_INLINE XXH_errorcode XXH32_update_endian (XXH32_state_t* state_in, const void* input, size_t len, XXH_endianess endian)
+{
+    XXH_istate32_t* state = (XXH_istate32_t *) state_in;
+    const BYTE* p = (const BYTE*)input;
+    const BYTE* const bEnd = p + len;
+
+#ifdef XXH_ACCEPT_NULL_INPUT_POINTER
+    if (input==NULL) return XXH_ERROR;
+#endif
+
+    state->total_len += len;
+
+    if (state->memsize + len < 16)   /* fill in tmp buffer */
+    {
+        XXH_memcpy((BYTE*)(state->mem32) + state->memsize, input, len);
+        state->memsize += (U32)len;
+        return XXH_OK;
+    }
+
+    if (state->memsize)   /* some data left from previous update */
+    {
+        XXH_memcpy((BYTE*)(state->mem32) + state->memsize, input, 16-state->memsize);
+        {
+            const U32* p32 = state->mem32;
+            state->v1 += XXH_readLE32(p32, endian) * PRIME32_2;
+            state->v1 = XXH_rotl32(state->v1, 13);
+            state->v1 *= PRIME32_1;
+            p32++;
+            state->v2 += XXH_readLE32(p32, endian) * PRIME32_2;
+            state->v2 = XXH_rotl32(state->v2, 13);
+            state->v2 *= PRIME32_1;
+            p32++;
+            state->v3 += XXH_readLE32(p32, endian) * PRIME32_2;
+            state->v3 = XXH_rotl32(state->v3, 13);
+            state->v3 *= PRIME32_1;
+            p32++;
+            state->v4 += XXH_readLE32(p32, endian) * PRIME32_2;
+            state->v4 = XXH_rotl32(state->v4, 13);
+            state->v4 *= PRIME32_1;
+            p32++;
+        }
+        p += 16-state->memsize;
+        state->memsize = 0;
+    }
+
+    if (p <= bEnd-16)
+    {
+        const BYTE* const limit = bEnd - 16;
+        U32 v1 = state->v1;
+        U32 v2 = state->v2;
+        U32 v3 = state->v3;
+        U32 v4 = state->v4;
+
+        do
+        {
+            v1 += XXH_readLE32(p, endian) * PRIME32_2;
+            v1 = XXH_rotl32(v1, 13);
+            v1 *= PRIME32_1;
+            p+=4;
+            v2 += XXH_readLE32(p, endian) * PRIME32_2;
+            v2 = XXH_rotl32(v2, 13);
+            v2 *= PRIME32_1;
+            p+=4;
+            v3 += XXH_readLE32(p, endian) * PRIME32_2;
+            v3 = XXH_rotl32(v3, 13);
+            v3 *= PRIME32_1;
+            p+=4;
+            v4 += XXH_readLE32(p, endian) * PRIME32_2;
+            v4 = XXH_rotl32(v4, 13);
+            v4 *= PRIME32_1;
+            p+=4;
+        }
+        while (p<=limit);
+
+        state->v1 = v1;
+        state->v2 = v2;
+        state->v3 = v3;
+        state->v4 = v4;
+    }
+
+    if (p < bEnd)
+    {
+        XXH_memcpy(state->mem32, p, bEnd-p);
+        state->memsize = (int)(bEnd-p);
+    }
+
+    return XXH_OK;
+}
+
+XXH_errorcode XXH32_update (XXH32_state_t* state_in, const void* input, size_t len)
+{
+    XXH_endianess endian_detected = (XXH_endianess)XXH_CPU_LITTLE_ENDIAN;
+
+    if ((endian_detected==XXH_littleEndian) || XXH_FORCE_NATIVE_FORMAT)
+        return XXH32_update_endian(state_in, input, len, XXH_littleEndian);
+    else
+        return XXH32_update_endian(state_in, input, len, XXH_bigEndian);
+}
+
+
+
+FORCE_INLINE U32 XXH32_digest_endian (const XXH32_state_t* state_in, XXH_endianess endian)
+{
+    const XXH_istate32_t* state = (const XXH_istate32_t*) state_in;
+    const BYTE * p = (const BYTE*)state->mem32;
+    const BYTE* bEnd = (const BYTE*)(state->mem32) + state->memsize;
+    U32 h32;
+
+    if (state->total_len >= 16)
+    {
+        h32 = XXH_rotl32(state->v1, 1) + XXH_rotl32(state->v2, 7) + XXH_rotl32(state->v3, 12) + XXH_rotl32(state->v4, 18);
+    }
+    else
+    {
+        h32  = state->seed + PRIME32_5;
+    }
+
+    h32 += (U32) state->total_len;
+
+    while (p+4<=bEnd)
+    {
+        h32 += XXH_readLE32(p, endian) * PRIME32_3;
+        h32  = XXH_rotl32(h32, 17) * PRIME32_4;
+        p+=4;
+    }
+
+    while (p<bEnd)
+    {
+        h32 += (*p) * PRIME32_5;
+        h32 = XXH_rotl32(h32, 11) * PRIME32_1;
+        p++;
+    }
+
+    h32 ^= h32 >> 15;
+    h32 *= PRIME32_2;
+    h32 ^= h32 >> 13;
+    h32 *= PRIME32_3;
+    h32 ^= h32 >> 16;
+
+    return h32;
+}
+
+
+unsigned int XXH32_digest (const XXH32_state_t* state_in)
+{
+    XXH_endianess endian_detected = (XXH_endianess)XXH_CPU_LITTLE_ENDIAN;
+
+    if ((endian_detected==XXH_littleEndian) || XXH_FORCE_NATIVE_FORMAT)
+        return XXH32_digest_endian(state_in, XXH_littleEndian);
+    else
+        return XXH32_digest_endian(state_in, XXH_bigEndian);
+}
+
+
+FORCE_INLINE XXH_errorcode XXH64_update_endian (XXH64_state_t* state_in, const void* input, size_t len, XXH_endianess endian)
+{
+    XXH_istate64_t * state = (XXH_istate64_t *) state_in;
+    const BYTE* p = (const BYTE*)input;
+    const BYTE* const bEnd = p + len;
+
+#ifdef XXH_ACCEPT_NULL_INPUT_POINTER
+    if (input==NULL) return XXH_ERROR;
+#endif
+
+    state->total_len += len;
+
+    if (state->memsize + len < 32)   /* fill in tmp buffer */
+    {
+        XXH_memcpy(((BYTE*)state->mem64) + state->memsize, input, len);
+        state->memsize += (U32)len;
+        return XXH_OK;
+    }
+
+    if (state->memsize)   /* some data left from previous update */
+    {
+        XXH_memcpy(((BYTE*)state->mem64) + state->memsize, input, 32-state->memsize);
+        {
+            const U64* p64 = state->mem64;
+            state->v1 += XXH_readLE64(p64, endian) * PRIME64_2;
+            state->v1 = XXH_rotl64(state->v1, 31);
+            state->v1 *= PRIME64_1;
+            p64++;
+            state->v2 += XXH_readLE64(p64, endian) * PRIME64_2;
+            state->v2 = XXH_rotl64(state->v2, 31);
+            state->v2 *= PRIME64_1;
+            p64++;
+            state->v3 += XXH_readLE64(p64, endian) * PRIME64_2;
+            state->v3 = XXH_rotl64(state->v3, 31);
+            state->v3 *= PRIME64_1;
+            p64++;
+            state->v4 += XXH_readLE64(p64, endian) * PRIME64_2;
+            state->v4 = XXH_rotl64(state->v4, 31);
+            state->v4 *= PRIME64_1;
+            p64++;
+        }
+        p += 32-state->memsize;
+        state->memsize = 0;
+    }
+
+    if (p+32 <= bEnd)
+    {
+        const BYTE* const limit = bEnd - 32;
+        U64 v1 = state->v1;
+        U64 v2 = state->v2;
+        U64 v3 = state->v3;
+        U64 v4 = state->v4;
+
+        do
+        {
+            v1 += XXH_readLE64(p, endian) * PRIME64_2;
+            v1 = XXH_rotl64(v1, 31);
+            v1 *= PRIME64_1;
+            p+=8;
+            v2 += XXH_readLE64(p, endian) * PRIME64_2;
+            v2 = XXH_rotl64(v2, 31);
+            v2 *= PRIME64_1;
+            p+=8;
+            v3 += XXH_readLE64(p, endian) * PRIME64_2;
+            v3 = XXH_rotl64(v3, 31);
+            v3 *= PRIME64_1;
+            p+=8;
+            v4 += XXH_readLE64(p, endian) * PRIME64_2;
+            v4 = XXH_rotl64(v4, 31);
+            v4 *= PRIME64_1;
+            p+=8;
+        }
+        while (p<=limit);
+
+        state->v1 = v1;
+        state->v2 = v2;
+        state->v3 = v3;
+        state->v4 = v4;
+    }
+
+    if (p < bEnd)
+    {
+        XXH_memcpy(state->mem64, p, bEnd-p);
+        state->memsize = (int)(bEnd-p);
+    }
+
+    return XXH_OK;
+}
+
+XXH_errorcode XXH64_update (XXH64_state_t* state_in, const void* input, size_t len)
+{
+    XXH_endianess endian_detected = (XXH_endianess)XXH_CPU_LITTLE_ENDIAN;
+
+    if ((endian_detected==XXH_littleEndian) || XXH_FORCE_NATIVE_FORMAT)
+        return XXH64_update_endian(state_in, input, len, XXH_littleEndian);
+    else
+        return XXH64_update_endian(state_in, input, len, XXH_bigEndian);
+}
+
+
+
+FORCE_INLINE U64 XXH64_digest_endian (const XXH64_state_t* state_in, XXH_endianess endian)
+{
+    const XXH_istate64_t * state = (const XXH_istate64_t *) state_in;
+    const BYTE * p = (const BYTE*)state->mem64;
+    const BYTE* bEnd = (const BYTE*)state->mem64 + state->memsize;
+    U64 h64;
+
+    if (state->total_len >= 32)
+    {
+        U64 v1 = state->v1;
+        U64 v2 = state->v2;
+        U64 v3 = state->v3;
+        U64 v4 = state->v4;
+
+        h64 = XXH_rotl64(v1, 1) + XXH_rotl64(v2, 7) + XXH_rotl64(v3, 12) + XXH_rotl64(v4, 18);
+
+        v1 *= PRIME64_2;
+        v1 = XXH_rotl64(v1, 31);
+        v1 *= PRIME64_1;
+        h64 ^= v1;
+        h64 = h64*PRIME64_1 + PRIME64_4;
+
+        v2 *= PRIME64_2;
+        v2 = XXH_rotl64(v2, 31);
+        v2 *= PRIME64_1;
+        h64 ^= v2;
+        h64 = h64*PRIME64_1 + PRIME64_4;
+
+        v3 *= PRIME64_2;
+        v3 = XXH_rotl64(v3, 31);
+        v3 *= PRIME64_1;
+        h64 ^= v3;
+        h64 = h64*PRIME64_1 + PRIME64_4;
+
+        v4 *= PRIME64_2;
+        v4 = XXH_rotl64(v4, 31);
+        v4 *= PRIME64_1;
+        h64 ^= v4;
+        h64 = h64*PRIME64_1 + PRIME64_4;
+    }
+    else
+    {
+        h64  = state->seed + PRIME64_5;
+    }
+
+    h64 += (U64) state->total_len;
+
+    while (p+8<=bEnd)
+    {
+        U64 k1 = XXH_readLE64(p, endian);
+        k1 *= PRIME64_2;
+        k1 = XXH_rotl64(k1,31);
+        k1 *= PRIME64_1;
+        h64 ^= k1;
+        h64 = XXH_rotl64(h64,27) * PRIME64_1 + PRIME64_4;
+        p+=8;
+    }
+
+    if (p+4<=bEnd)
+    {
+        h64 ^= (U64)(XXH_readLE32(p, endian)) * PRIME64_1;
+        h64 = XXH_rotl64(h64, 23) * PRIME64_2 + PRIME64_3;
+        p+=4;
+    }
+
+    while (p<bEnd)
+    {
+        h64 ^= (*p) * PRIME64_5;
+        h64 = XXH_rotl64(h64, 11) * PRIME64_1;
+        p++;
+    }
+
+    h64 ^= h64 >> 33;
+    h64 *= PRIME64_2;
+    h64 ^= h64 >> 29;
+    h64 *= PRIME64_3;
+    h64 ^= h64 >> 32;
+
+    return h64;
+}
+
+
+unsigned long long XXH64_digest (const XXH64_state_t* state_in)
+{
+    XXH_endianess endian_detected = (XXH_endianess)XXH_CPU_LITTLE_ENDIAN;
+
+    if ((endian_detected==XXH_littleEndian) || XXH_FORCE_NATIVE_FORMAT)
+        return XXH64_digest_endian(state_in, XXH_littleEndian);
+    else
+        return XXH64_digest_endian(state_in, XXH_bigEndian);
+}
+
+
diff --git a/programs/xxhash.h b/programs/xxhash.h
new file mode 100644
index 0000000..c60aa61
--- /dev/null
+++ b/programs/xxhash.h
@@ -0,0 +1,192 @@
+/*
+   xxHash - Extremely Fast Hash algorithm
+   Header File
+   Copyright (C) 2012-2015, Yann Collet.
+
+   BSD 2-Clause License (http://www.opensource.org/licenses/bsd-license.php)
+
+   Redistribution and use in source and binary forms, with or without
+   modification, are permitted provided that the following conditions are
+   met:
+
+       * Redistributions of source code must retain the above copyright
+   notice, this list of conditions and the following disclaimer.
+       * Redistributions in binary form must reproduce the above
+   copyright notice, this list of conditions and the following disclaimer
+   in the documentation and/or other materials provided with the
+   distribution.
+
+   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+   You can contact the author at :
+   - xxHash source repository : https://github.com/Cyan4973/xxHash
+*/
+
+/* Notice extracted from xxHash homepage :
+
+xxHash is an extremely fast Hash algorithm, running at RAM speed limits.
+It also successfully passes all tests from the SMHasher suite.
+
+Comparison (single thread, Windows Seven 32 bits, using SMHasher on a Core 2 Duo @3GHz)
+
+Name            Speed       Q.Score   Author
+xxHash          5.4 GB/s     10
+CrapWow         3.2 GB/s      2       Andrew
+MumurHash 3a    2.7 GB/s     10       Austin Appleby
+SpookyHash      2.0 GB/s     10       Bob Jenkins
+SBox            1.4 GB/s      9       Bret Mulvey
+Lookup3         1.2 GB/s      9       Bob Jenkins
+SuperFastHash   1.2 GB/s      1       Paul Hsieh
+CityHash64      1.05 GB/s    10       Pike & Alakuijala
+FNV             0.55 GB/s     5       Fowler, Noll, Vo
+CRC32           0.43 GB/s     9
+MD5-32          0.33 GB/s    10       Ronald L. Rivest
+SHA1-32         0.28 GB/s    10
+
+Q.Score is a measure of quality of the hash function.
+It depends on successfully passing SMHasher test set.
+10 is a perfect score.
+
+A 64-bits version, named XXH64, is available since r35.
+It offers much better speed, but for 64-bits applications only.
+Name     Speed on 64 bits    Speed on 32 bits
+XXH64       13.8 GB/s            1.9 GB/s
+XXH32        6.8 GB/s            6.0 GB/s
+*/
+
+#pragma once
+
+#if defined (__cplusplus)
+extern "C" {
+#endif
+
+
+/*****************************
+*  Definitions
+*****************************/
+#include <stddef.h>   /* size_t */
+typedef enum { XXH_OK=0, XXH_ERROR } XXH_errorcode;
+
+
+/*****************************
+*  Namespace Emulation
+*****************************/
+/* Motivations :
+
+If you need to include xxHash into your library,
+but wish to avoid xxHash symbols to be present on your library interface
+in an effort to avoid potential name collision if another library also includes xxHash,
+
+you can use XXH_NAMESPACE, which will automatically prefix any symbol from xxHash
+with the value of XXH_NAMESPACE (so avoid to keep it NULL, and avoid numeric values).
+
+Note that no change is required within the calling program :
+it can still call xxHash functions using their regular name.
+They will be automatically translated by this header.
+*/
+#ifdef XXH_NAMESPACE
+#  define XXH_CAT(A,B) A##B
+#  define XXH_NAME2(A,B) XXH_CAT(A,B)
+#  define XXH32 XXH_NAME2(XXH_NAMESPACE, XXH32)
+#  define XXH64 XXH_NAME2(XXH_NAMESPACE, XXH64)
+#  define XXH32_createState XXH_NAME2(XXH_NAMESPACE, XXH32_createState)
+#  define XXH64_createState XXH_NAME2(XXH_NAMESPACE, XXH64_createState)
+#  define XXH32_freeState XXH_NAME2(XXH_NAMESPACE, XXH32_freeState)
+#  define XXH64_freeState XXH_NAME2(XXH_NAMESPACE, XXH64_freeState)
+#  define XXH32_reset XXH_NAME2(XXH_NAMESPACE, XXH32_reset)
+#  define XXH64_reset XXH_NAME2(XXH_NAMESPACE, XXH64_reset)
+#  define XXH32_update XXH_NAME2(XXH_NAMESPACE, XXH32_update)
+#  define XXH64_update XXH_NAME2(XXH_NAMESPACE, XXH64_update)
+#  define XXH32_digest XXH_NAME2(XXH_NAMESPACE, XXH32_digest)
+#  define XXH64_digest XXH_NAME2(XXH_NAMESPACE, XXH64_digest)
+#endif
+
+
+/*****************************
+*  Simple Hash Functions
+*****************************/
+
+unsigned int       XXH32 (const void* input, size_t length, unsigned seed);
+unsigned long long XXH64 (const void* input, size_t length, unsigned long long seed);
+
+/*
+XXH32() :
+    Calculate the 32-bits hash of sequence "length" bytes stored at memory address "input".
+    The memory between input & input+length must be valid (allocated and read-accessible).
+    "seed" can be used to alter the result predictably.
+    This function successfully passes all SMHasher tests.
+    Speed on Core 2 Duo @ 3 GHz (single thread, SMHasher benchmark) : 5.4 GB/s
+XXH64() :
+    Calculate the 64-bits hash of sequence of length "len" stored at memory address "input".
+    Faster on 64-bits systems. Slower on 32-bits systems.
+*/
+
+
+
+/*****************************
+*  Advanced Hash Functions
+*****************************/
+typedef struct { long long ll[ 6]; } XXH32_state_t;
+typedef struct { long long ll[11]; } XXH64_state_t;
+
+/*
+These structures allow static allocation of XXH states.
+States must then be initialized using XXHnn_reset() before first use.
+
+If you prefer dynamic allocation, please refer to functions below.
+*/
+
+XXH32_state_t* XXH32_createState(void);
+XXH_errorcode  XXH32_freeState(XXH32_state_t* statePtr);
+
+XXH64_state_t* XXH64_createState(void);
+XXH_errorcode  XXH64_freeState(XXH64_state_t* statePtr);
+
+/*
+These functions create and release memory for XXH state.
+States must then be initialized using XXHnn_reset() before first use.
+*/
+
+
+XXH_errorcode XXH32_reset  (XXH32_state_t* statePtr, unsigned seed);
+XXH_errorcode XXH32_update (XXH32_state_t* statePtr, const void* input, size_t length);
+unsigned int  XXH32_digest (const XXH32_state_t* statePtr);
+
+XXH_errorcode      XXH64_reset  (XXH64_state_t* statePtr, unsigned long long seed);
+XXH_errorcode      XXH64_update (XXH64_state_t* statePtr, const void* input, size_t length);
+unsigned long long XXH64_digest (const XXH64_state_t* statePtr);
+
+/*
+These functions calculate the xxHash of an input provided in multiple smaller packets,
+as opposed to an input provided as a single block.
+
+XXH state space must first be allocated, using either static or dynamic method provided above.
+
+Start a new hash by initializing state with a seed, using XXHnn_reset().
+
+Then, feed the hash state by calling XXHnn_update() as many times as necessary.
+Obviously, input must be valid, meaning allocated and read accessible.
+The function returns an error code, with 0 meaning OK, and any other value meaning there is an error.
+
+Finally, you can produce a hash anytime, by using XXHnn_digest().
+This function returns the final nn-bits hash.
+You can nonetheless continue feeding the hash state with more input,
+and therefore get some new hashes, by calling again XXHnn_digest().
+
+When you are done, don't forget to free XXH state space, using typically XXHnn_freeState().
+*/
+
+
+#if defined (__cplusplus)
+}
+#endif
diff --git a/programs/zbufftest.c b/programs/zbufftest.c
new file mode 100644
index 0000000..5903935
--- /dev/null
+++ b/programs/zbufftest.c
@@ -0,0 +1,600 @@
+/*
+    Fuzzer test tool for zstd_buffered
+    Copyright (C) Yann Collet 2105
+
+    GPL v2 License
+
+    This program is free software; you can redistribute it and/or modify
+    it under the terms of the GNU General Public License as published by
+    the Free Software Foundation; either version 2 of the License, or
+    (at your option) any later version.
+
+    This program is distributed in the hope that it will be useful,
+    but WITHOUT ANY WARRANTY; without even the implied warranty of
+    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+    GNU General Public License for more details.
+
+    You should have received a copy of the GNU General Public License along
+    with this program; if not, write to the Free Software Foundation, Inc.,
+    51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
+
+    You can contact the author at :
+    - ZSTD source repository : https://github.com/Cyan4973/zstd
+    - ZSTD public forum : https://groups.google.com/forum/#!forum/lz4c
+*/
+
+/**************************************
+*  Compiler specific
+**************************************/
+#ifdef _MSC_VER    /* Visual Studio */
+#  define _CRT_SECURE_NO_WARNINGS     /* fgets */
+#  pragma warning(disable : 4127)     /* disable: C4127: conditional expression is constant */
+#  pragma warning(disable : 4146)     /* disable: C4146: minus unsigned expression */
+#endif
+
+
+/**************************************
+*  Includes
+**************************************/
+#include <stdlib.h>      /* free */
+#include <stdio.h>       /* fgets, sscanf */
+#include <sys/timeb.h>   /* timeb */
+#include <string.h>      /* strcmp */
+#include "mem.h"
+#include "zstd_buffered.h"
+#include "zstd.h"        /* ZSTD_compressBound() */
+#include "datagen.h"     /* RDG_genBuffer */
+#include "xxhash.h"      /* XXH64 */
+
+
+/**************************************
+ Constants
+**************************************/
+#ifndef ZSTD_VERSION
+#  define ZSTD_VERSION ""
+#endif
+
+#define KB *(1U<<10)
+#define MB *(1U<<20)
+#define GB *(1U<<30)
+
+static const U32 nbTestsDefault = 10000;
+#define COMPRESSIBLE_NOISE_LENGTH (10 MB)
+#define FUZ_COMPRESSIBILITY_DEFAULT 50
+static const U32 prime1 = 2654435761U;
+static const U32 prime2 = 2246822519U;
+
+
+
+/**************************************
+*  Display Macros
+**************************************/
+#define DISPLAY(...)          fprintf(stderr, __VA_ARGS__)
+#define DISPLAYLEVEL(l, ...)  if (g_displayLevel>=l) { DISPLAY(__VA_ARGS__); }
+static U32 g_displayLevel = 2;
+
+#define DISPLAYUPDATE(l, ...) if (g_displayLevel>=l) { \
+            if ((FUZ_GetMilliSpan(g_displayTime) > g_refreshRate) || (g_displayLevel>=4)) \
+            { g_displayTime = FUZ_GetMilliStart(); DISPLAY(__VA_ARGS__); \
+            if (g_displayLevel>=4) fflush(stdout); } }
+static const U32 g_refreshRate = 150;
+static U32 g_displayTime = 0;
+
+static U32 g_testTime = 0;
+
+
+/*********************************************************
+*  Fuzzer functions
+*********************************************************/
+#define MAX(a,b) ((a)>(b)?(a):(b))
+
+static U32 FUZ_GetMilliStart(void)
+{
+    struct timeb tb;
+    U32 nCount;
+    ftime( &tb );
+    nCount = (U32) (((tb.time & 0xFFFFF) * 1000) +  tb.millitm);
+    return nCount;
+}
+
+
+static U32 FUZ_GetMilliSpan(U32 nTimeStart)
+{
+    U32 nCurrent = FUZ_GetMilliStart();
+    U32 nSpan = nCurrent - nTimeStart;
+    if (nTimeStart > nCurrent)
+        nSpan += 0x100000 * 1000;
+    return nSpan;
+}
+
+
+#  define FUZ_rotl32(x,r) ((x << r) | (x >> (32 - r)))
+unsigned int FUZ_rand(unsigned int* src)
+{
+    U32 rand32 = *src;
+    rand32 *= prime1;
+    rand32 += prime2;
+    rand32  = FUZ_rotl32(rand32, 13);
+    *src = rand32;
+    return rand32 >> 5;
+}
+
+
+/*
+static unsigned FUZ_highbit32(U32 v32)
+{
+    unsigned nbBits = 0;
+    if (v32==0) return 0;
+    for ( ; v32 ; v32>>=1) nbBits++;
+    return nbBits;
+}
+*/
+
+static int basicUnitTests(U32 seed, double compressibility)
+{
+    int testResult = 0;
+    void* CNBuffer;
+    size_t CNBufferSize = COMPRESSIBLE_NOISE_LENGTH;
+    void* compressedBuffer;
+    size_t compressedBufferSize = ZSTD_compressBound(COMPRESSIBLE_NOISE_LENGTH);
+    void* decodedBuffer;
+    size_t decodedBufferSize = CNBufferSize;
+    U32 randState = seed;
+    size_t result, cSize, readSize, genSize;
+    U32 testNb=0;
+    ZBUFF_CCtx* zc = ZBUFF_createCCtx();
+    ZBUFF_DCtx* zd = ZBUFF_createDCtx();
+
+    /* Create compressible test buffer */
+    CNBuffer = malloc(CNBufferSize);
+    compressedBuffer = malloc(compressedBufferSize);
+    decodedBuffer = malloc(decodedBufferSize);
+    if (!CNBuffer || !compressedBuffer || !decodedBuffer || !zc || !zd)
+    {
+        DISPLAY("Not enough memory, aborting\n");
+        goto _output_error;
+    }
+    RDG_genBuffer(CNBuffer, CNBufferSize, compressibility, 0., randState);
+
+    /* Basic compression test */
+    DISPLAYLEVEL(4, "test%3i : compress %u bytes : ", testNb++, COMPRESSIBLE_NOISE_LENGTH);
+    ZBUFF_compressInit(zc, 1);
+    readSize = CNBufferSize;
+    genSize = compressedBufferSize;
+    result = ZBUFF_compressContinue(zc, compressedBuffer, &genSize, CNBuffer, &readSize);
+    if (ZBUFF_isError(result)) goto _output_error;
+    if (readSize != CNBufferSize) goto _output_error;   /* entire input should be consumed */
+    cSize = genSize;
+    genSize = compressedBufferSize - cSize;
+    result = ZBUFF_compressEnd(zc, ((char*)compressedBuffer)+cSize, &genSize);
+    if (result != 0) goto _output_error;   /* error, or some data not flushed */
+    cSize += genSize;
+    DISPLAYLEVEL(4, "OK (%u bytes : %.2f%%)\n", (U32)cSize, (double)cSize/COMPRESSIBLE_NOISE_LENGTH*100);
+
+    /* Basic decompression test */
+    DISPLAYLEVEL(4, "test%3i : decompress %u bytes : ", testNb++, COMPRESSIBLE_NOISE_LENGTH);
+    ZBUFF_decompressInit(zd);
+    readSize = cSize;
+    genSize = CNBufferSize;
+    result = ZBUFF_decompressContinue(zd, decodedBuffer, &genSize, compressedBuffer, &readSize);
+    if (result != 0) goto _output_error;  /* should reach end of frame == 0; otherwise, some data left, or an error */
+    if (genSize != CNBufferSize) goto _output_error;   /* should regenerate the same amount */
+    if (readSize != cSize) goto _output_error;   /* should have read the entire frame */
+    DISPLAYLEVEL(4, "OK \n");
+
+    /* check regenerated data is byte exact */
+    {
+        size_t i;
+        DISPLAYLEVEL(4, "test%3i : check decompressed result : ", testNb++);
+        for (i=0; i<CNBufferSize; i++)
+        {
+            if (((BYTE*)decodedBuffer)[i] != ((BYTE*)CNBuffer)[i]) goto _output_error;;
+        }
+        DISPLAYLEVEL(4, "OK \n");
+    }
+
+_end:
+    ZBUFF_freeCCtx(zc);
+    ZBUFF_freeDCtx(zd);
+    free(CNBuffer);
+    free(compressedBuffer);
+    free(decodedBuffer);
+    return testResult;
+
+_output_error:
+    testResult = 1;
+    DISPLAY("Error detected in Unit tests ! \n");
+    goto _end;
+}
+
+
+static size_t findDiff(const void* buf1, const void* buf2, size_t max)
+{
+    const BYTE* b1 = (const BYTE*)buf1;
+    const BYTE* b2 = (const BYTE*)buf2;
+    size_t i;
+    for (i=0; i<max; i++)
+    {
+        if (b1[i] != b2[i]) break;
+    }
+    return i;
+}
+
+#define MIN(a,b)   ( (a) < (b) ? (a) : (b) )
+
+#define CHECK(cond, ...) if (cond) { DISPLAY("Error => "); DISPLAY(__VA_ARGS__); \
+                         DISPLAY(" (seed %u, test nb %u)  \n", seed, testNb); goto _output_error; }
+
+static const U32 maxSrcLog = 24;
+static const U32 maxSampleLog = 19;
+
+int fuzzerTests(U32 seed, U32 nbTests, unsigned startTest, double compressibility)
+{
+    BYTE* cNoiseBuffer[5];
+    BYTE* srcBuffer;
+    size_t srcBufferSize = (size_t)1<<maxSrcLog;
+    BYTE* copyBuffer;
+    size_t copyBufferSize = srcBufferSize + (1<<maxSampleLog);
+    BYTE* cBuffer;
+    size_t cBufferSize   = ZSTD_compressBound(srcBufferSize);
+    BYTE* dstBuffer;
+    size_t dstBufferSize = srcBufferSize;
+    U32 result = 0;
+    U32 testNb = 0;
+    U32 coreSeed = seed, lseed = 0;
+    ZBUFF_CCtx* zc;
+    ZBUFF_DCtx* zd;
+    XXH64_state_t crc64;
+    U32 startTime = FUZ_GetMilliStart();
+
+    /* allocation */
+    zc = ZBUFF_createCCtx();
+    zd = ZBUFF_createDCtx();
+    cNoiseBuffer[0] = (BYTE*)malloc (srcBufferSize);
+    cNoiseBuffer[1] = (BYTE*)malloc (srcBufferSize);
+    cNoiseBuffer[2] = (BYTE*)malloc (srcBufferSize);
+    cNoiseBuffer[3] = (BYTE*)malloc (srcBufferSize);
+    cNoiseBuffer[4] = (BYTE*)malloc (srcBufferSize);
+    copyBuffer= (BYTE*)malloc (copyBufferSize);
+    dstBuffer = (BYTE*)malloc (dstBufferSize);
+    cBuffer   = (BYTE*)malloc (cBufferSize);
+    CHECK (!cNoiseBuffer[0] || !cNoiseBuffer[1] || !cNoiseBuffer[2] || !cNoiseBuffer[3] || !cNoiseBuffer[4] ||
+           !copyBuffer || !dstBuffer || !cBuffer || !zc || !zd,
+           "Not enough memory, fuzzer tests cancelled");
+
+    /* Create initial samples */
+    RDG_genBuffer(cNoiseBuffer[0], srcBufferSize, 0.00, 0., coreSeed);    /* pure noise */
+    RDG_genBuffer(cNoiseBuffer[1], srcBufferSize, 0.05, 0., coreSeed);    /* barely compressible */
+    RDG_genBuffer(cNoiseBuffer[2], srcBufferSize, compressibility, 0., coreSeed);
+    RDG_genBuffer(cNoiseBuffer[3], srcBufferSize, 0.95, 0., coreSeed);    /* highly compressible */
+    RDG_genBuffer(cNoiseBuffer[4], srcBufferSize, 1.00, 0., coreSeed);    /* sparse content */
+    srcBuffer = cNoiseBuffer[2];
+    memset(copyBuffer, 0x65, copyBufferSize);
+    memcpy(copyBuffer, srcBuffer, MIN(copyBufferSize,srcBufferSize));   /* make copyBuffer considered initialized */
+
+    /* catch up testNb */
+    for (testNb=1; testNb < startTest; testNb++)
+        FUZ_rand(&coreSeed);
+
+    /* test loop */
+    for ( ; (testNb <= nbTests) || (FUZ_GetMilliSpan(startTime) < g_testTime); testNb++ )
+    {
+        size_t sampleSize, sampleStart;
+        size_t cSize;
+        size_t maxTestSize, totalTestSize, readSize, totalCSize, genSize, totalGenSize;
+        size_t errorCode;
+        U32 sampleSizeLog, buffNb, n, nbChunks;
+        U64 crcOrig, crcDest;
+
+        /* init */
+        DISPLAYUPDATE(2, "\r%6u", testNb);
+        if (nbTests >= testNb) DISPLAYUPDATE(2, "/%6u   ", nbTests);
+        FUZ_rand(&coreSeed);
+        lseed = coreSeed ^ prime1;
+        buffNb = FUZ_rand(&lseed) & 127;
+        if (buffNb & 7) buffNb=2;   /* select buffer */
+        else
+        {
+            buffNb >>= 3;
+            if (buffNb & 7)
+            {
+                const U32 tnb[2] = { 1, 3 };
+                buffNb = tnb[buffNb >> 3];
+            }
+            else
+            {
+                const U32 tnb[2] = { 0, 4 };
+                buffNb = tnb[buffNb >> 3];
+            }
+        }
+        srcBuffer = cNoiseBuffer[buffNb];
+
+        /* Multi - segments compression test */
+        XXH64_reset(&crc64, 0);
+        nbChunks = (FUZ_rand(&lseed) & 127) + 2;
+        sampleSizeLog = FUZ_rand(&lseed) % maxSrcLog;
+        maxTestSize = (size_t)1 << sampleSizeLog;
+        maxTestSize += FUZ_rand(&lseed) & (maxTestSize-1);
+        ZBUFF_compressInit(zc, (FUZ_rand(&lseed) % (20 - (sampleSizeLog/3))) + 1);
+        totalTestSize = 0;
+        cSize = 0;
+        for (n=0; n<nbChunks; n++)
+        {
+            sampleSizeLog = FUZ_rand(&lseed) % maxSampleLog;
+            sampleSize = (size_t)1 << sampleSizeLog;
+            sampleSize += FUZ_rand(&lseed) & (sampleSize-1);
+            sampleStart = FUZ_rand(&lseed) % (srcBufferSize - sampleSize);
+            readSize = sampleSize;
+
+            /* random size output buffer */
+            sampleSizeLog = FUZ_rand(&lseed) % maxSampleLog;
+            sampleSize = (size_t)1 << sampleSizeLog;
+            sampleSize += FUZ_rand(&lseed) & (sampleSize-1);
+            genSize = MIN (cBufferSize - cSize, sampleSize);
+
+            errorCode = ZBUFF_compressContinue(zc, cBuffer+cSize, &genSize, srcBuffer+sampleStart, &readSize);
+            CHECK (ZBUFF_isError(errorCode), "compression error : %s", ZBUFF_getErrorName(errorCode));
+
+            XXH64_update(&crc64, srcBuffer+sampleStart, readSize);
+            memcpy(copyBuffer+totalTestSize, srcBuffer+sampleStart, readSize);
+            cSize += genSize;
+            totalTestSize += readSize;
+
+            if ((FUZ_rand(&lseed) & 15) == 0)
+            {
+                /* add a few random flushes operations, to mess around */
+                sampleSizeLog = FUZ_rand(&lseed) % maxSampleLog;
+                sampleSize = (size_t)1 << sampleSizeLog;
+                sampleSize += FUZ_rand(&lseed) & (sampleSize-1);
+                genSize = MIN (cBufferSize - cSize, sampleSize);
+                errorCode = ZBUFF_compressFlush(zc, cBuffer+cSize, &genSize);
+                CHECK (ZBUFF_isError(errorCode), "flush error : %s", ZBUFF_getErrorName(errorCode));
+                cSize += genSize;
+            }
+
+            if (totalTestSize > maxTestSize) break;
+        }
+        genSize = cBufferSize - cSize;
+        errorCode = ZBUFF_compressEnd(zc, cBuffer+cSize, &genSize);
+        CHECK (ZBUFF_isError(errorCode), "compression error : %s", ZBUFF_getErrorName(errorCode));
+        CHECK (errorCode != 0, "frame epilogue not fully consumed");
+        cSize += genSize;
+        crcOrig = XXH64_digest(&crc64);
+
+        /* multi - fragments decompression test */
+        ZBUFF_decompressInit(zd);
+        totalCSize = 0;
+        totalGenSize = 0;
+        while (totalCSize < cSize)
+        {
+            sampleSizeLog  = FUZ_rand(&lseed) % maxSampleLog;
+            sampleSize  = (size_t)1 << sampleSizeLog;
+            sampleSize += FUZ_rand(&lseed) & (sampleSize-1);
+            readSize = sampleSize;
+            sampleSizeLog  = FUZ_rand(&lseed) % maxSampleLog;
+            sampleSize  = (size_t)1 << sampleSizeLog;
+            sampleSize += FUZ_rand(&lseed) & (sampleSize-1);
+            genSize = MIN(sampleSize, dstBufferSize - totalGenSize);
+            errorCode = ZBUFF_decompressContinue(zd, dstBuffer+totalGenSize, &genSize, cBuffer+totalCSize, &readSize);
+            CHECK (ZBUFF_isError(errorCode), "decompression error : %s", ZBUFF_getErrorName(errorCode));
+            totalGenSize += genSize;
+            totalCSize += readSize;
+        }
+        CHECK (errorCode != 0, "frame not fully decoded");
+        CHECK (totalGenSize != totalTestSize, "decompressed data : wrong size")
+        CHECK (totalCSize != cSize, "compressed data should be fully read")
+        crcDest = XXH64(dstBuffer, totalTestSize, 0);
+        if (crcDest!=crcOrig) findDiff(copyBuffer, dstBuffer, totalTestSize);
+        CHECK (crcDest!=crcOrig, "decompressed data corrupted");
+
+        /* noisy/erroneous src decompression test */
+        /* add some noise */
+        nbChunks = (FUZ_rand(&lseed) & 7) + 2;
+        for (n=0; n<nbChunks; n++)
+        {
+            size_t cStart;
+
+            sampleSizeLog = FUZ_rand(&lseed) % maxSampleLog;
+            sampleSize = (size_t)1 << sampleSizeLog;
+            sampleSize += FUZ_rand(&lseed) & (sampleSize-1);
+            if (sampleSize > cSize/3) sampleSize = cSize/3;
+            sampleStart = FUZ_rand(&lseed) % (srcBufferSize - sampleSize);
+            cStart = FUZ_rand(&lseed) % (cSize - sampleSize);
+
+            memcpy(cBuffer+cStart, srcBuffer+sampleStart, sampleSize);
+        }
+
+        /* try decompression on noisy data */
+        ZBUFF_decompressInit(zd);
+        totalCSize = 0;
+        totalGenSize = 0;
+        while ( (totalCSize < cSize) && (totalGenSize < dstBufferSize) )
+        {
+            sampleSizeLog  = FUZ_rand(&lseed) % maxSampleLog;
+            sampleSize  = (size_t)1 << sampleSizeLog;
+            sampleSize += FUZ_rand(&lseed) & (sampleSize-1);
+            readSize = sampleSize;
+            sampleSizeLog  = FUZ_rand(&lseed) % maxSampleLog;
+            sampleSize  = (size_t)1 << sampleSizeLog;
+            sampleSize += FUZ_rand(&lseed) & (sampleSize-1);
+            genSize = MIN(sampleSize, dstBufferSize - totalGenSize);
+            errorCode = ZBUFF_decompressContinue(zd, dstBuffer+totalGenSize, &genSize, cBuffer+totalCSize, &readSize);
+            if (ZBUFF_isError(errorCode)) break;   /* error correctly detected */
+            totalGenSize += genSize;
+            totalCSize += readSize;
+        }
+    }
+    DISPLAY("\r%u fuzzer tests completed   \n", testNb);
+
+_cleanup:
+    ZBUFF_freeCCtx(zc);
+    ZBUFF_freeDCtx(zd);
+    free(cNoiseBuffer[0]);
+    free(cNoiseBuffer[1]);
+    free(cNoiseBuffer[2]);
+    free(cNoiseBuffer[3]);
+    free(cNoiseBuffer[4]);
+    free(copyBuffer);
+    free(cBuffer);
+    free(dstBuffer);
+    return result;
+
+_output_error:
+    result = 1;
+    goto _cleanup;
+}
+
+
+/*********************************************************
+*  Command line
+*********************************************************/
+int FUZ_usage(char* programName)
+{
+    DISPLAY( "Usage :\n");
+    DISPLAY( "      %s [args]\n", programName);
+    DISPLAY( "\n");
+    DISPLAY( "Arguments :\n");
+    DISPLAY( " -i#    : Nb of tests (default:%u) \n", nbTestsDefault);
+    DISPLAY( " -s#    : Select seed (default:prompt user)\n");
+    DISPLAY( " -t#    : Select starting test number (default:0)\n");
+    DISPLAY( " -P#    : Select compressibility in %% (default:%i%%)\n", FUZ_COMPRESSIBILITY_DEFAULT);
+    DISPLAY( " -v     : verbose\n");
+    DISPLAY( " -p     : pause at the end\n");
+    DISPLAY( " -h     : display help and exit\n");
+    return 0;
+}
+
+
+int main(int argc, char** argv)
+{
+    U32 seed=0;
+    int seedset=0;
+    int argNb;
+    int nbTests = nbTestsDefault;
+    int testNb = 0;
+    int proba = FUZ_COMPRESSIBILITY_DEFAULT;
+    int result=0;
+    U32 mainPause = 0;
+    char* programName;
+
+    /* Check command line */
+    programName = argv[0];
+    for(argNb=1; argNb<argc; argNb++)
+    {
+        char* argument = argv[argNb];
+
+        if(!argument) continue;   /* Protection if argument empty */
+
+        /* Handle commands. Aggregated commands are allowed */
+        if (argument[0]=='-')
+        {
+            argument++;
+
+            while (*argument!=0)
+            {
+                switch(*argument)
+                {
+                case 'h':
+                    return FUZ_usage(programName);
+                case 'v':
+                    argument++;
+                    g_displayLevel=4;
+                    break;
+                case 'q':
+                    argument++;
+                    g_displayLevel--;
+                    break;
+                case 'p': /* pause at the end */
+                    argument++;
+                    mainPause = 1;
+                    break;
+
+                case 'i':
+                    argument++;
+                    nbTests=0; g_testTime=0;
+                    while ((*argument>='0') && (*argument<='9'))
+                    {
+                        nbTests *= 10;
+                        nbTests += *argument - '0';
+                        argument++;
+                    }
+                    break;
+
+                case 'T':
+                    argument++;
+                    nbTests=0; g_testTime=0;
+                    while ((*argument>='0') && (*argument<='9'))
+                    {
+                        g_testTime *= 10;
+                        g_testTime += *argument - '0';
+                        argument++;
+                    }
+                    if (*argument=='m') g_testTime *=60, argument++;
+                    if (*argument=='n') argument++;
+                    g_testTime *= 1000;
+                    break;
+
+                case 's':
+                    argument++;
+                    seed=0;
+                    seedset=1;
+                    while ((*argument>='0') && (*argument<='9'))
+                    {
+                        seed *= 10;
+                        seed += *argument - '0';
+                        argument++;
+                    }
+                    break;
+
+                case 't':
+                    argument++;
+                    testNb=0;
+                    while ((*argument>='0') && (*argument<='9'))
+                    {
+                        testNb *= 10;
+                        testNb += *argument - '0';
+                        argument++;
+                    }
+                    break;
+
+                case 'P':   /* compressibility % */
+                    argument++;
+                    proba=0;
+                    while ((*argument>='0') && (*argument<='9'))
+                    {
+                        proba *= 10;
+                        proba += *argument - '0';
+                        argument++;
+                    }
+                    if (proba<0) proba=0;
+                    if (proba>100) proba=100;
+                    break;
+
+                default:
+                    return FUZ_usage(programName);
+                }
+            }
+        }
+    }
+
+    /* Get Seed */
+    DISPLAY("Starting zstd_buffered tester (%i-bits, %s)\n", (int)(sizeof(size_t)*8), ZSTD_VERSION);
+
+    if (!seedset) seed = FUZ_GetMilliStart() % 10000;
+    DISPLAY("Seed = %u\n", seed);
+    if (proba!=FUZ_COMPRESSIBILITY_DEFAULT) DISPLAY("Compressibility : %i%%\n", proba);
+
+    if (nbTests<=0) nbTests=1;
+
+    if (testNb==0) result = basicUnitTests(0, ((double)proba) / 100);  /* constant seed for predictability */
+    if (!result)
+        result = fuzzerTests(seed, nbTests, testNb, ((double)proba) / 100);
+    if (mainPause)
+    {
+        int unused;
+        DISPLAY("Press Enter \n");
+        unused = getchar();
+        (void)unused;
+    }
+    return result;
+}
diff --git a/programs/zstd.1 b/programs/zstd.1
new file mode 100644
index 0000000..fdc8cc4
--- /dev/null
+++ b/programs/zstd.1
@@ -0,0 +1,78 @@
+\"
+\" zstd.1: This is a manual page for 'zstd' program. This file is part of the
+\" zstd <http://www.zstd.net/> project.
+\" Author: Yann Collet
+\"
+
+\" No hyphenation
+.hy 0
+.nr HY 0
+
+.TH zstd "1" "2015-08-22" "zstd" "User Commands"
+.SH NAME
+\fBzstd, unzstd, zstdcat\fR - Compress or decompress .zst files
+
+.SH SYNOPSIS
+.TP 5
+\fBzstd\fR [\fBOPTIONS\fR] [-|INPUT-FILE] <OUTPUT-FILE>
+.PP
+.B unzstd
+is equivalent to
+.BR "zstd \-d" 
+.br
+.B zstdcat
+is equivalent to
+.BR "zstd \-dc" 
+.br
+
+.SH DESCRIPTION
+.PP
+\fBzstd\fR is a fast lossless compression algorithm.
+It is based on the \fBLZ77\fR family, with FSE & huff0 entropy stage.
+zstd offers compression speed > 200 MB/s per core.
+It also features a fast decoder, with speed > 500 MB/s per core.
+\fBzstd\fR supports the following options :
+
+.SH OPTIONS
+.TP
+.B \-1
+ fast compression (default)
+.TP
+.B \-d
+ decompression
+.TP
+.B \-f
+ overwrite output without prompting
+.TP
+.BR \-h/\-H ", " --help
+ display help/long help and exit
+.TP
+.BR \-V ", " --version
+ display Version number and exit
+.TP
+.BR \-v ", " --verbose
+ verbose mode
+.TP
+.B \-q
+ suppress warnings; specify twice to suppress errors too
+.TP
+.B \-c
+ force write to standard output, even if it is the console
+.TP
+.B \-t
+ test compressed file integrity
+.TP
+.B \-z
+ force compression
+.TP
+.B \-b
+ benchmark file(s)
+.TP
+.B \-i#
+ iteration loops [1-9](default : 3), benchmark mode only
+
+.SH BUGS
+Report bugs at:- https://github.com/Cyan4973/zstd/issues
+
+.SH AUTHOR
+Yann Collet
diff --git a/programs/zstdcli.c b/programs/zstdcli.c
new file mode 100644
index 0000000..418fec7
--- /dev/null
+++ b/programs/zstdcli.c
@@ -0,0 +1,386 @@
+/*
+  zstdcli - Command Line Interface (cli) for zstd
+  Copyright (C) Yann Collet 2014-2015
+
+  GPL v2 License
+
+  This program is free software; you can redistribute it and/or modify
+  it under the terms of the GNU General Public License as published by
+  the Free Software Foundation; either version 2 of the License, or
+  (at your option) any later version.
+
+  This program is distributed in the hope that it will be useful,
+  but WITHOUT ANY WARRANTY; without even the implied warranty of
+  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+  GNU General Public License for more details.
+
+  You should have received a copy of the GNU General Public License along
+  with this program; if not, write to the Free Software Foundation, Inc.,
+  51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
+
+  You can contact the author at :
+  - zstd source repository : https://github.com/Cyan4973/zstd
+  - ztsd public forum : https://groups.google.com/forum/#!forum/lz4c
+*/
+/*
+  Note : this is user program.
+  It is not part of zstd compression library.
+  The license of this compression CLI program is GPLv2.
+  The license of zstd library is BSD.
+*/
+
+
+/**************************************
+*  Compiler Options
+**************************************/
+#define _CRT_SECURE_NO_WARNINGS  /* Visual : removes warning from strcpy */
+#define _POSIX_SOURCE 1          /* triggers fileno() within <stdio.h> on unix */
+
+
+/**************************************
+*  Includes
+**************************************/
+#include <stdio.h>    /* fprintf, getchar */
+#include <stdlib.h>   /* exit, calloc, free */
+#include <string.h>   /* strcmp, strlen */
+#include "bench.h"    /* BMK_benchFiles, BMK_SetNbIterations */
+#include "fileio.h"
+
+
+/**************************************
+*  OS-specific Includes
+**************************************/
+#if defined(MSDOS) || defined(OS2) || defined(WIN32) || defined(_WIN32) || defined(__CYGWIN__)
+#  include <fcntl.h>    // _O_BINARY
+#  include <io.h>       // _setmode, _isatty
+#  ifdef __MINGW32__
+   /* int _fileno(FILE *stream);   // seems no longer useful // MINGW somehow forgets to include this windows declaration into <stdio.h> */
+#  endif
+#  define SET_BINARY_MODE(file) _setmode(_fileno(file), _O_BINARY)
+#  define IS_CONSOLE(stdStream) _isatty(_fileno(stdStream))
+#else
+#  include <unistd.h>   // isatty
+#  define SET_BINARY_MODE(file)
+#  define IS_CONSOLE(stdStream) isatty(fileno(stdStream))
+#endif
+
+
+/**************************************
+*  Constants
+**************************************/
+#define COMPRESSOR_NAME "zstd command line interface"
+#ifndef ZSTD_VERSION
+#  define ZSTD_VERSION "v0.4.0"
+#endif
+#define AUTHOR "Yann Collet"
+#define WELCOME_MESSAGE "*** %s %i-bits %s, by %s (%s) ***\n", COMPRESSOR_NAME, (int)(sizeof(void*)*8), ZSTD_VERSION, AUTHOR, __DATE__
+#define ZSTD_EXTENSION ".zst"
+#define ZSTD_CAT "zstdcat"
+#define ZSTD_UNZSTD "unzstd"
+
+#define KB *(1 <<10)
+#define MB *(1 <<20)
+#define GB *(1U<<30)
+
+
+/**************************************
+*  Display Macros
+**************************************/
+#define DISPLAY(...)           fprintf(displayOut, __VA_ARGS__)
+#define DISPLAYLEVEL(l, ...)   if (displayLevel>=l) { DISPLAY(__VA_ARGS__); }
+static FILE* displayOut;
+static unsigned displayLevel = 2;   // 0 : no display  // 1: errors  // 2 : + result + interaction + warnings ;  // 3 : + progression;  // 4 : + information
+
+
+/**************************************
+*  Exceptions
+**************************************/
+#define DEBUG 0
+#define DEBUGOUTPUT(...) if (DEBUG) DISPLAY(__VA_ARGS__);
+#define EXM_THROW(error, ...)                                             \
+{                                                                         \
+    DEBUGOUTPUT("Error defined at %s, line %i : \n", __FILE__, __LINE__); \
+    DISPLAYLEVEL(1, "Error %i : ", error);                                \
+    DISPLAYLEVEL(1, __VA_ARGS__);                                         \
+    DISPLAYLEVEL(1, "\n");                                                \
+    exit(error);                                                          \
+}
+
+
+/**************************************
+*  Command Line
+**************************************/
+static int usage(const char* programName)
+{
+    DISPLAY( "Usage :\n");
+    DISPLAY( "      %s [arg] [input] [output]\n", programName);
+    DISPLAY( "\n");
+    DISPLAY( "input   : a filename\n");
+    DISPLAY( "          with no FILE, or when FILE is - , read standard input\n");
+    DISPLAY( "Arguments :\n");
+    DISPLAY( " -1     : Fast compression (default) \n");
+    DISPLAY( " -9     : High compression \n");
+    DISPLAY( " -d     : decompression (default for %s extension)\n", ZSTD_EXTENSION);
+    //DISPLAY( " -z     : force compression\n");
+    DISPLAY( " -f     : overwrite output without prompting \n");
+    DISPLAY( " -h/-H  : display help/long help and exit\n");
+    return 0;
+}
+
+static int usage_advanced(const char* programName)
+{
+    DISPLAY(WELCOME_MESSAGE);
+    usage(programName);
+    DISPLAY( "\n");
+    DISPLAY( "Advanced arguments :\n");
+    DISPLAY( " -V     : display Version number and exit\n");
+    DISPLAY( " -v     : verbose mode\n");
+    DISPLAY( " -q     : suppress warnings; specify twice to suppress errors too\n");
+    DISPLAY( " -c     : force write to standard output, even if it is the console\n");
+    //DISPLAY( " -t     : test compressed file integrity\n");
+    DISPLAY( "Benchmark arguments :\n");
+    DISPLAY( " -b#    : benchmark file(s), using # compression level (default : 1) \n");
+    DISPLAY( " -B#    : cut file into independent blocks of size # (default : no block)\n");
+    DISPLAY( " -i#    : iteration loops [1-9](default : 3)\n");
+    DISPLAY( " -r#    : test all compression levels from 1 to # (default : disabled)\n");
+    return 0;
+}
+
+static int badusage(const char* programName)
+{
+    DISPLAYLEVEL(1, "Incorrect parameters\n");
+    if (displayLevel >= 1) usage(programName);
+    return 1;
+}
+
+
+static void waitEnter(void)
+{
+    int unused;
+    DISPLAY("Press enter to continue...\n");
+    unused = getchar();
+    (void)unused;
+}
+
+
+int main(int argCount, const char** argv)
+{
+    int i,
+        bench=0,
+        decode=0,
+        forceStdout=0,
+        main_pause=0,
+        rangeBench = 1;
+    unsigned fileNameStart = 0;
+    unsigned nbFiles = 0;
+    unsigned cLevel = 1;
+    const char* programName = argv[0];
+    const char* inFileName = NULL;
+    const char* outFileName = NULL;
+    char* dynNameSpace = NULL;
+    const char extension[] = ZSTD_EXTENSION;
+
+    displayOut = stderr;
+    /* Pick out basename component. Don't rely on stdlib because of conflicting behavior. */
+    for (i = (int)strlen(programName); i > 0; i--)
+    {
+        if (programName[i] == '/') { i++; break; }
+    }
+    programName += i;
+
+    /* zstdcat preset behavior */
+    if (!strcmp(programName, ZSTD_CAT)) { decode=1; forceStdout=1; displayLevel=1; outFileName=stdoutmark; }
+
+    /* unzstd preset behavior */
+    if (!strcmp(programName, ZSTD_UNZSTD))
+        decode=1;
+
+    /* command switches */
+    for(i=1; i<argCount; i++)
+    {
+        const char* argument = argv[i];
+
+        if(!argument) continue;   /* Protection if argument empty */
+
+        /* long commands (--long-word) */
+        if (!strcmp(argument, "--version")) { displayOut=stdout; DISPLAY(WELCOME_MESSAGE); return 0; }
+        if (!strcmp(argument, "--help")) { displayOut=stdout; return usage_advanced(programName); }
+        if (!strcmp(argument, "--verbose")) { displayLevel=4; continue; }
+
+        /* Decode commands (note : aggregated commands are allowed) */
+        if (argument[0]=='-')
+        {
+            /* '-' means stdin/stdout */
+            if (argument[1]==0)
+            {
+                if (!inFileName) inFileName=stdinmark;
+                else outFileName=stdoutmark;
+                continue;
+            }
+
+            argument++;
+
+            while (argument[0]!=0)
+            {
+                /* compression Level */
+                if ((*argument>='0') && (*argument<='9'))
+                {
+                    cLevel = 0;
+                    while ((*argument >= '0') && (*argument <= '9'))
+                    {
+                        cLevel *= 10;
+                        cLevel += *argument - '0';
+                        argument++;
+                    }
+                    continue;
+                }
+
+                switch(argument[0])
+                {
+                    /* Display help */
+                case 'V': displayOut=stdout; DISPLAY(WELCOME_MESSAGE); return 0;   /* Version Only */
+                case 'H':
+                case 'h': displayOut=stdout; return usage_advanced(programName);
+
+                    /* Compression (default) */
+                //case 'z': forceCompress = 1; break;
+
+                    /* Decoding */
+                case 'd': decode=1; argument++; break;
+
+                    /* Force stdout, even if stdout==console */
+                case 'c': forceStdout=1; outFileName=stdoutmark; displayLevel=1; argument++; break;
+
+                    // Test
+                //case 't': decode=1; LZ4IO_setOverwrite(1); output_filename=nulmark; break;
+
+                    /* Overwrite */
+                case 'f': FIO_overwriteMode(); argument++; break;
+
+                    /* Verbose mode */
+                case 'v': displayLevel=4; argument++; break;
+
+                    /* Quiet mode */
+                case 'q': displayLevel--; argument++; break;
+
+                    /* keep source file (default anyway, so useless; only for xz/lzma compatibility) */
+                case 'k': argument++; break;
+
+                    /* Benchmark */
+                case 'b': bench=1; argument++; break;
+
+                    /* Modify Nb Iterations (benchmark only) */
+                case 'i':
+                    {
+                        int iters= 0;
+                        argument++;
+                        while ((*argument >='0') && (*argument <='9'))
+                            iters *= 10, iters += *argument++ - '0';
+                        BMK_SetNbIterations(iters);
+                    }
+                    break;
+
+                    /* cut input into blocks (benchmark only) */
+                case 'B':
+                    {
+                        size_t bSize = 0;
+                        argument++;
+                        while ((*argument >='0') && (*argument <='9'))
+                            bSize *= 10, bSize += *argument++ - '0';
+                        if (*argument=='K') bSize<<=10, argument++;  /* allows using KB notation */
+                        if (*argument=='M') bSize<<=20, argument++;
+                        if (*argument=='B') argument++;
+                        BMK_SetBlockSize(bSize);
+                    }
+                    break;
+
+                    /* range bench (benchmark only) */
+                case 'r':
+                        rangeBench = -1;
+                        argument++;
+                        break;
+
+                    /* Pause at the end (hidden option) */
+                case 'p': main_pause=1; argument++; break;
+
+                    /* unknown command */
+                default : return badusage(programName);
+                }
+            }
+            continue;
+        }
+
+        /* first provided filename is input */
+        if (!inFileName) { inFileName = argument; fileNameStart = i; nbFiles = argCount-i; continue; }
+
+        /* second provided filename is output */
+        if (!outFileName)
+        {
+            outFileName = argument;
+            if (!strcmp (outFileName, nullString)) outFileName = nulmark;
+            continue;
+        }
+    }
+
+    /* Welcome message (if verbose) */
+    DISPLAYLEVEL(3, WELCOME_MESSAGE);
+
+    /* No input filename ==> use stdin */
+    if(!inFileName) { inFileName=stdinmark; }
+
+    /* Check if input defined as console; trigger an error in this case */
+    if (!strcmp(inFileName, stdinmark) && IS_CONSOLE(stdin) ) return badusage(programName);
+
+    /* Check if benchmark is selected */
+    if (bench) { BMK_benchFiles(argv+fileNameStart, nbFiles, cLevel*rangeBench); goto _end; }
+
+    /* No output filename ==> try to select one automatically (when possible) */
+    while (!outFileName)
+    {
+        if (!IS_CONSOLE(stdout)) { outFileName=stdoutmark; break; }   /* Default to stdout whenever possible (i.e. not a console) */
+        if (!decode)   /* compression to file */
+        {
+            size_t l = strlen(inFileName);
+            dynNameSpace = (char*)calloc(1,l+5);
+            if (dynNameSpace==NULL) { DISPLAY("not enough memory\n"); exit(1); }
+            strcpy(dynNameSpace, inFileName);
+            strcpy(dynNameSpace+l, ZSTD_EXTENSION);
+            outFileName = dynNameSpace;
+            DISPLAYLEVEL(2, "Compressed filename will be : %s \n", outFileName);
+            break;
+        }
+        /* decompression to file (automatic name will work only if input filename has correct format extension) */
+        {
+            size_t filenameSize = strlen(inFileName);
+            if (strcmp(inFileName + (filenameSize-4), extension))
+            {
+                 DISPLAYLEVEL(1, "unknown suffix - cannot determine destination filename\n");
+                 return badusage(programName);
+            }
+            dynNameSpace = (char*)calloc(1,filenameSize+1);
+            if (dynNameSpace==NULL) { DISPLAY("not enough memory\n"); exit(1); }
+            outFileName = dynNameSpace;
+            strcpy(dynNameSpace, inFileName);
+            dynNameSpace[filenameSize-4]=0;
+            DISPLAYLEVEL(2, "Decoding file %s \n", outFileName);
+        }
+    }
+
+    /* Check if output is defined as console; trigger an error in this case */
+    if (!strcmp(outFileName,stdoutmark) && IS_CONSOLE(stdout) && !forceStdout) return badusage(programName);
+
+    /* No warning message in pure pipe mode (stdin + stdout) */
+    if (!strcmp(inFileName, stdinmark) && !strcmp(outFileName,stdoutmark) && (displayLevel==2)) displayLevel=1;
+
+    /* IO Stream/File */
+    FIO_setNotificationLevel(displayLevel);
+    if (decode)
+        FIO_decompressFilename(outFileName, inFileName);
+    else
+        FIO_compressFilename(outFileName, inFileName, cLevel);
+
+_end:
+    if (main_pause) waitEnter();
+    free(dynNameSpace);
+    return 0;
+}
diff --git a/visual/2012/fullbench/fullbench.vcxproj b/visual/2012/fullbench/fullbench.vcxproj
new file mode 100644
index 0000000..9a36e5b
--- /dev/null
+++ b/visual/2012/fullbench/fullbench.vcxproj
@@ -0,0 +1,188 @@
+<?xml version="1.0" encoding="utf-8"?>
+<Project DefaultTargets="Build" ToolsVersion="4.0" xmlns="http://schemas.microsoft.com/developer/msbuild/2003">
+  <ItemGroup Label="ProjectConfigurations">
+    <ProjectConfiguration Include="Debug|Win32">
+      <Configuration>Debug</Configuration>
+      <Platform>Win32</Platform>
+    </ProjectConfiguration>
+    <ProjectConfiguration Include="Debug|x64">
+      <Configuration>Debug</Configuration>
+      <Platform>x64</Platform>
+    </ProjectConfiguration>
+    <ProjectConfiguration Include="Release|Win32">
+      <Configuration>Release</Configuration>
+      <Platform>Win32</Platform>
+    </ProjectConfiguration>
+    <ProjectConfiguration Include="Release|x64">
+      <Configuration>Release</Configuration>
+      <Platform>x64</Platform>
+    </ProjectConfiguration>
+  </ItemGroup>
+  <PropertyGroup Label="Globals">
+    <ProjectGuid>{61ABD629-1CC8-4FD7-9281-6B8DBB9D3DF8}</ProjectGuid>
+    <Keyword>Win32Proj</Keyword>
+    <RootNamespace>fullbench</RootNamespace>
+  </PropertyGroup>
+  <Import Project="$(VCTargetsPath)\Microsoft.Cpp.Default.props" />
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'" Label="Configuration">
+    <ConfigurationType>Application</ConfigurationType>
+    <UseDebugLibraries>true</UseDebugLibraries>
+    <PlatformToolset>v110</PlatformToolset>
+    <CharacterSet>Unicode</CharacterSet>
+  </PropertyGroup>
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|x64'" Label="Configuration">
+    <ConfigurationType>Application</ConfigurationType>
+    <UseDebugLibraries>true</UseDebugLibraries>
+    <PlatformToolset>v110</PlatformToolset>
+    <CharacterSet>Unicode</CharacterSet>
+  </PropertyGroup>
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|Win32'" Label="Configuration">
+    <ConfigurationType>Application</ConfigurationType>
+    <UseDebugLibraries>false</UseDebugLibraries>
+    <PlatformToolset>v110</PlatformToolset>
+    <WholeProgramOptimization>true</WholeProgramOptimization>
+    <CharacterSet>Unicode</CharacterSet>
+  </PropertyGroup>
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|x64'" Label="Configuration">
+    <ConfigurationType>Application</ConfigurationType>
+    <UseDebugLibraries>false</UseDebugLibraries>
+    <PlatformToolset>v110</PlatformToolset>
+    <WholeProgramOptimization>true</WholeProgramOptimization>
+    <CharacterSet>Unicode</CharacterSet>
+  </PropertyGroup>
+  <Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
+  <ImportGroup Label="ExtensionSettings">
+  </ImportGroup>
+  <ImportGroup Label="PropertySheets" Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">
+    <Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
+  </ImportGroup>
+  <ImportGroup Condition="'$(Configuration)|$(Platform)'=='Debug|x64'" Label="PropertySheets">
+    <Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
+  </ImportGroup>
+  <ImportGroup Label="PropertySheets" Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">
+    <Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
+  </ImportGroup>
+  <ImportGroup Condition="'$(Configuration)|$(Platform)'=='Release|x64'" Label="PropertySheets">
+    <Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
+  </ImportGroup>
+  <PropertyGroup Label="UserMacros" />
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">
+    <LinkIncremental>true</LinkIncremental>
+    <IncludePath>$(SolutionDir)..\..\lib;$(SolutionDir)..\..\lib\legacy;$(VCInstallDir)include;$(VCInstallDir)atlmfc\include;$(WindowsSDK_IncludePath);</IncludePath>
+    <RunCodeAnalysis>true</RunCodeAnalysis>
+  </PropertyGroup>
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">
+    <LinkIncremental>true</LinkIncremental>
+    <IncludePath>$(SolutionDir)..\..\lib\legacy;$(SolutionDir)..\..\lib;$(VCInstallDir)include;$(VCInstallDir)atlmfc\include;$(WindowsSDK_IncludePath);</IncludePath>
+    <RunCodeAnalysis>true</RunCodeAnalysis>
+  </PropertyGroup>
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">
+    <LinkIncremental>false</LinkIncremental>
+    <IncludePath>$(SolutionDir)..\..\lib\legacy;$(SolutionDir)..\..\lib;$(VCInstallDir)include;$(VCInstallDir)atlmfc\include;$(WindowsSDK_IncludePath);</IncludePath>
+    <RunCodeAnalysis>true</RunCodeAnalysis>
+  </PropertyGroup>
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|x64'">
+    <LinkIncremental>false</LinkIncremental>
+    <IncludePath>$(SolutionDir)..\..\lib\legacy;$(SolutionDir)..\..\lib;$(VCInstallDir)include;$(VCInstallDir)atlmfc\include;$(WindowsSDK_IncludePath);</IncludePath>
+    <RunCodeAnalysis>true</RunCodeAnalysis>
+  </PropertyGroup>
+  <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">
+    <ClCompile>
+      <PrecompiledHeader>
+      </PrecompiledHeader>
+      <WarningLevel>Level4</WarningLevel>
+      <Optimization>Disabled</Optimization>
+      <PreprocessorDefinitions>WIN32;_DEBUG;_CONSOLE;%(PreprocessorDefinitions)</PreprocessorDefinitions>
+      <TreatWarningAsError>true</TreatWarningAsError>
+      <EnablePREfast>true</EnablePREfast>
+      <AdditionalOptions>/analyze:stacksize25000 %(AdditionalOptions)</AdditionalOptions>
+    </ClCompile>
+    <Link>
+      <SubSystem>Console</SubSystem>
+      <GenerateDebugInformation>true</GenerateDebugInformation>
+    </Link>
+  </ItemDefinitionGroup>
+  <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">
+    <ClCompile>
+      <PrecompiledHeader>
+      </PrecompiledHeader>
+      <WarningLevel>Level4</WarningLevel>
+      <Optimization>Disabled</Optimization>
+      <PreprocessorDefinitions>WIN32;_DEBUG;_CONSOLE;%(PreprocessorDefinitions)</PreprocessorDefinitions>
+      <TreatWarningAsError>true</TreatWarningAsError>
+      <EnablePREfast>true</EnablePREfast>
+      <AdditionalOptions>/analyze:stacksize19000 %(AdditionalOptions)</AdditionalOptions>
+    </ClCompile>
+    <Link>
+      <SubSystem>Console</SubSystem>
+      <GenerateDebugInformation>true</GenerateDebugInformation>
+    </Link>
+  </ItemDefinitionGroup>
+  <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">
+    <ClCompile>
+      <WarningLevel>Level4</WarningLevel>
+      <PrecompiledHeader>
+      </PrecompiledHeader>
+      <Optimization>MaxSpeed</Optimization>
+      <FunctionLevelLinking>true</FunctionLevelLinking>
+      <IntrinsicFunctions>true</IntrinsicFunctions>
+      <PreprocessorDefinitions>WIN32;NDEBUG;_CONSOLE;%(PreprocessorDefinitions)</PreprocessorDefinitions>
+      <EnablePREfast>true</EnablePREfast>
+      <TreatWarningAsError>true</TreatWarningAsError>
+      <AdditionalOptions>/analyze:stacksize19000 %(AdditionalOptions)</AdditionalOptions>
+    </ClCompile>
+    <Link>
+      <SubSystem>Console</SubSystem>
+      <GenerateDebugInformation>true</GenerateDebugInformation>
+      <EnableCOMDATFolding>true</EnableCOMDATFolding>
+      <OptimizeReferences>true</OptimizeReferences>
+    </Link>
+  </ItemDefinitionGroup>
+  <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Release|x64'">
+    <ClCompile>
+      <WarningLevel>Level4</WarningLevel>
+      <PrecompiledHeader>
+      </PrecompiledHeader>
+      <Optimization>MaxSpeed</Optimization>
+      <FunctionLevelLinking>true</FunctionLevelLinking>
+      <IntrinsicFunctions>true</IntrinsicFunctions>
+      <PreprocessorDefinitions>WIN32;NDEBUG;_CONSOLE;%(PreprocessorDefinitions)</PreprocessorDefinitions>
+      <TreatWarningAsError>true</TreatWarningAsError>
+      <EnablePREfast>true</EnablePREfast>
+      <AdditionalOptions>/analyze:stacksize19000 %(AdditionalOptions)</AdditionalOptions>
+    </ClCompile>
+    <Link>
+      <SubSystem>Console</SubSystem>
+      <GenerateDebugInformation>true</GenerateDebugInformation>
+      <EnableCOMDATFolding>true</EnableCOMDATFolding>
+      <OptimizeReferences>true</OptimizeReferences>
+    </Link>
+  </ItemDefinitionGroup>
+  <ItemGroup>
+    <ClCompile Include="..\..\..\lib\fse.c" />
+    <ClCompile Include="..\..\..\lib\huff0.c" />
+    <ClCompile Include="..\..\..\lib\legacy\zstd_v01.c" />
+    <ClCompile Include="..\..\..\lib\legacy\zstd_v02.c" />
+    <ClCompile Include="..\..\..\lib\legacy\zstd_v03.c" />
+    <ClCompile Include="..\..\..\lib\zstd_compress.c" />
+    <ClCompile Include="..\..\..\lib\zstd_decompress.c" />
+    <ClCompile Include="..\..\..\programs\datagen.c" />
+    <ClCompile Include="..\..\..\programs\fullbench.c" />
+  </ItemGroup>
+  <ItemGroup>
+    <ClInclude Include="..\..\..\lib\fse.h" />
+    <ClInclude Include="..\..\..\lib\fse_static.h" />
+    <ClInclude Include="..\..\..\lib\huff0.h" />
+    <ClInclude Include="..\..\..\lib\huff0_static.h" />
+    <ClInclude Include="..\..\..\lib\legacy\zstd_legacy.h" />
+    <ClInclude Include="..\..\..\lib\legacy\zstd_v01.h" />
+    <ClInclude Include="..\..\..\lib\legacy\zstd_v02.h" />
+    <ClInclude Include="..\..\..\lib\legacy\zstd_v03.h" />
+    <ClInclude Include="..\..\..\lib\zstd.h" />
+    <ClInclude Include="..\..\..\lib\zstd_static.h" />
+    <ClInclude Include="..\..\..\programs\datagen.h" />
+  </ItemGroup>
+  <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
+  <ImportGroup Label="ExtensionTargets">
+  </ImportGroup>
+</Project>
\ No newline at end of file
diff --git a/visual/2012/fullbench/fullbench.vcxproj.filters b/visual/2012/fullbench/fullbench.vcxproj.filters
new file mode 100644
index 0000000..c3db197
--- /dev/null
+++ b/visual/2012/fullbench/fullbench.vcxproj.filters
@@ -0,0 +1,81 @@
+<?xml version="1.0" encoding="utf-8"?>
+<Project ToolsVersion="4.0" xmlns="http://schemas.microsoft.com/developer/msbuild/2003">
+  <ItemGroup>
+    <Filter Include="Fichiers sources">
+      <UniqueIdentifier>{4FC737F1-C7A5-4376-A066-2A32D752A2FF}</UniqueIdentifier>
+      <Extensions>cpp;c;cc;cxx;def;odl;idl;hpj;bat;asm;asmx</Extensions>
+    </Filter>
+    <Filter Include="Fichiers d%27en-tête">
+      <UniqueIdentifier>{93995380-89BD-4b04-88EB-625FBE52EBFB}</UniqueIdentifier>
+      <Extensions>h;hpp;hxx;hm;inl;inc;xsd</Extensions>
+    </Filter>
+    <Filter Include="Fichiers de ressources">
+      <UniqueIdentifier>{67DA6AB6-F800-4c08-8B7A-83BB121AAD01}</UniqueIdentifier>
+      <Extensions>rc;ico;cur;bmp;dlg;rc2;rct;bin;rgs;gif;jpg;jpeg;jpe;resx;tiff;tif;png;wav;mfcribbon-ms</Extensions>
+    </Filter>
+  </ItemGroup>
+  <ItemGroup>
+    <ClCompile Include="..\..\..\lib\fse.c">
+      <Filter>Fichiers sources</Filter>
+    </ClCompile>
+    <ClCompile Include="..\..\..\programs\fullbench.c">
+      <Filter>Fichiers sources</Filter>
+    </ClCompile>
+    <ClCompile Include="..\..\..\programs\datagen.c">
+      <Filter>Fichiers sources</Filter>
+    </ClCompile>
+    <ClCompile Include="..\..\..\lib\legacy\zstd_v01.c">
+      <Filter>Fichiers sources</Filter>
+    </ClCompile>
+    <ClCompile Include="..\..\..\lib\huff0.c">
+      <Filter>Fichiers sources</Filter>
+    </ClCompile>
+    <ClCompile Include="..\..\..\lib\legacy\zstd_v02.c">
+      <Filter>Fichiers sources</Filter>
+    </ClCompile>
+    <ClCompile Include="..\..\..\lib\zstd_compress.c">
+      <Filter>Fichiers sources</Filter>
+    </ClCompile>
+    <ClCompile Include="..\..\..\lib\zstd_decompress.c">
+      <Filter>Fichiers sources</Filter>
+    </ClCompile>
+    <ClCompile Include="..\..\..\lib\legacy\zstd_v03.c">
+      <Filter>Fichiers sources</Filter>
+    </ClCompile>
+  </ItemGroup>
+  <ItemGroup>
+    <ClInclude Include="..\..\..\lib\fse.h">
+      <Filter>Fichiers d%27en-tête</Filter>
+    </ClInclude>
+    <ClInclude Include="..\..\..\lib\fse_static.h">
+      <Filter>Fichiers d%27en-tête</Filter>
+    </ClInclude>
+    <ClInclude Include="..\..\..\lib\zstd.h">
+      <Filter>Fichiers d%27en-tête</Filter>
+    </ClInclude>
+    <ClInclude Include="..\..\..\lib\zstd_static.h">
+      <Filter>Fichiers d%27en-tête</Filter>
+    </ClInclude>
+    <ClInclude Include="..\..\..\programs\datagen.h">
+      <Filter>Fichiers d%27en-tête</Filter>
+    </ClInclude>
+    <ClInclude Include="..\..\..\lib\legacy\zstd_v01.h">
+      <Filter>Fichiers d%27en-tête</Filter>
+    </ClInclude>
+    <ClInclude Include="..\..\..\lib\huff0.h">
+      <Filter>Fichiers d%27en-tête</Filter>
+    </ClInclude>
+    <ClInclude Include="..\..\..\lib\huff0_static.h">
+      <Filter>Fichiers d%27en-tête</Filter>
+    </ClInclude>
+    <ClInclude Include="..\..\..\lib\legacy\zstd_legacy.h">
+      <Filter>Fichiers d%27en-tête</Filter>
+    </ClInclude>
+    <ClInclude Include="..\..\..\lib\legacy\zstd_v02.h">
+      <Filter>Fichiers d%27en-tête</Filter>
+    </ClInclude>
+    <ClInclude Include="..\..\..\lib\legacy\zstd_v03.h">
+      <Filter>Fichiers d%27en-tête</Filter>
+    </ClInclude>
+  </ItemGroup>
+</Project>
\ No newline at end of file
diff --git a/visual/2012/fuzzer/fuzzer.vcxproj b/visual/2012/fuzzer/fuzzer.vcxproj
new file mode 100644
index 0000000..63ab472
--- /dev/null
+++ b/visual/2012/fuzzer/fuzzer.vcxproj
@@ -0,0 +1,191 @@
+<?xml version="1.0" encoding="utf-8"?>
+<Project DefaultTargets="Build" ToolsVersion="4.0" xmlns="http://schemas.microsoft.com/developer/msbuild/2003">
+  <ItemGroup Label="ProjectConfigurations">
+    <ProjectConfiguration Include="Debug|Win32">
+      <Configuration>Debug</Configuration>
+      <Platform>Win32</Platform>
+    </ProjectConfiguration>
+    <ProjectConfiguration Include="Debug|x64">
+      <Configuration>Debug</Configuration>
+      <Platform>x64</Platform>
+    </ProjectConfiguration>
+    <ProjectConfiguration Include="Release|Win32">
+      <Configuration>Release</Configuration>
+      <Platform>Win32</Platform>
+    </ProjectConfiguration>
+    <ProjectConfiguration Include="Release|x64">
+      <Configuration>Release</Configuration>
+      <Platform>x64</Platform>
+    </ProjectConfiguration>
+  </ItemGroup>
+  <PropertyGroup Label="Globals">
+    <ProjectGuid>{6FD4352B-346C-4703-96EA-D4A8B9A6976E}</ProjectGuid>
+    <Keyword>Win32Proj</Keyword>
+    <RootNamespace>fuzzer</RootNamespace>
+  </PropertyGroup>
+  <Import Project="$(VCTargetsPath)\Microsoft.Cpp.Default.props" />
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'" Label="Configuration">
+    <ConfigurationType>Application</ConfigurationType>
+    <UseDebugLibraries>true</UseDebugLibraries>
+    <PlatformToolset>v110</PlatformToolset>
+    <CharacterSet>Unicode</CharacterSet>
+  </PropertyGroup>
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|x64'" Label="Configuration">
+    <ConfigurationType>Application</ConfigurationType>
+    <UseDebugLibraries>true</UseDebugLibraries>
+    <PlatformToolset>v110</PlatformToolset>
+    <CharacterSet>Unicode</CharacterSet>
+  </PropertyGroup>
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|Win32'" Label="Configuration">
+    <ConfigurationType>Application</ConfigurationType>
+    <UseDebugLibraries>false</UseDebugLibraries>
+    <PlatformToolset>v110</PlatformToolset>
+    <WholeProgramOptimization>true</WholeProgramOptimization>
+    <CharacterSet>Unicode</CharacterSet>
+  </PropertyGroup>
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|x64'" Label="Configuration">
+    <ConfigurationType>Application</ConfigurationType>
+    <UseDebugLibraries>false</UseDebugLibraries>
+    <PlatformToolset>v110</PlatformToolset>
+    <WholeProgramOptimization>true</WholeProgramOptimization>
+    <CharacterSet>Unicode</CharacterSet>
+  </PropertyGroup>
+  <Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
+  <ImportGroup Label="ExtensionSettings">
+  </ImportGroup>
+  <ImportGroup Label="PropertySheets" Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">
+    <Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
+  </ImportGroup>
+  <ImportGroup Condition="'$(Configuration)|$(Platform)'=='Debug|x64'" Label="PropertySheets">
+    <Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
+  </ImportGroup>
+  <ImportGroup Label="PropertySheets" Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">
+    <Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
+  </ImportGroup>
+  <ImportGroup Condition="'$(Configuration)|$(Platform)'=='Release|x64'" Label="PropertySheets">
+    <Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
+  </ImportGroup>
+  <PropertyGroup Label="UserMacros" />
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">
+    <LinkIncremental>true</LinkIncremental>
+    <IncludePath>$(SolutionDir)..\..\lib;$(SolutionDir)..\..\lib\legacy;$(VCInstallDir)include;$(VCInstallDir)atlmfc\include;$(WindowsSDK_IncludePath);</IncludePath>
+    <RunCodeAnalysis>true</RunCodeAnalysis>
+  </PropertyGroup>
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">
+    <LinkIncremental>true</LinkIncremental>
+    <IncludePath>$(SolutionDir)..\..\lib\legacy;$(SolutionDir)..\..\lib;$(VCInstallDir)include;$(VCInstallDir)atlmfc\include;$(WindowsSDK_IncludePath);</IncludePath>
+    <RunCodeAnalysis>true</RunCodeAnalysis>
+  </PropertyGroup>
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">
+    <LinkIncremental>false</LinkIncremental>
+    <IncludePath>$(SolutionDir)..\..\lib\legacy;$(SolutionDir)..\..\lib;$(VCInstallDir)include;$(VCInstallDir)atlmfc\include;$(WindowsSDK_IncludePath);</IncludePath>
+    <RunCodeAnalysis>true</RunCodeAnalysis>
+  </PropertyGroup>
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|x64'">
+    <LinkIncremental>false</LinkIncremental>
+    <IncludePath>$(SolutionDir)..\..\lib\legacy;$(SolutionDir)..\..\lib;$(VCInstallDir)include;$(VCInstallDir)atlmfc\include;$(WindowsSDK_IncludePath);</IncludePath>
+    <RunCodeAnalysis>true</RunCodeAnalysis>
+  </PropertyGroup>
+  <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">
+    <ClCompile>
+      <PrecompiledHeader>
+      </PrecompiledHeader>
+      <WarningLevel>Level4</WarningLevel>
+      <Optimization>Disabled</Optimization>
+      <PreprocessorDefinitions>WIN32;_DEBUG;_CONSOLE;%(PreprocessorDefinitions)</PreprocessorDefinitions>
+      <TreatWarningAsError>true</TreatWarningAsError>
+      <EnablePREfast>true</EnablePREfast>
+      <AdditionalOptions>/analyze:stacksize25000 %(AdditionalOptions)</AdditionalOptions>
+    </ClCompile>
+    <Link>
+      <SubSystem>Console</SubSystem>
+      <GenerateDebugInformation>true</GenerateDebugInformation>
+    </Link>
+  </ItemDefinitionGroup>
+  <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">
+    <ClCompile>
+      <PrecompiledHeader>
+      </PrecompiledHeader>
+      <WarningLevel>Level4</WarningLevel>
+      <Optimization>Disabled</Optimization>
+      <PreprocessorDefinitions>WIN32;_DEBUG;_CONSOLE;%(PreprocessorDefinitions)</PreprocessorDefinitions>
+      <TreatWarningAsError>true</TreatWarningAsError>
+      <EnablePREfast>true</EnablePREfast>
+      <AdditionalOptions>/analyze:stacksize19000 %(AdditionalOptions)</AdditionalOptions>
+    </ClCompile>
+    <Link>
+      <SubSystem>Console</SubSystem>
+      <GenerateDebugInformation>true</GenerateDebugInformation>
+    </Link>
+  </ItemDefinitionGroup>
+  <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">
+    <ClCompile>
+      <WarningLevel>Level4</WarningLevel>
+      <PrecompiledHeader>
+      </PrecompiledHeader>
+      <Optimization>MaxSpeed</Optimization>
+      <FunctionLevelLinking>true</FunctionLevelLinking>
+      <IntrinsicFunctions>true</IntrinsicFunctions>
+      <PreprocessorDefinitions>WIN32;NDEBUG;_CONSOLE;%(PreprocessorDefinitions)</PreprocessorDefinitions>
+      <EnablePREfast>true</EnablePREfast>
+      <TreatWarningAsError>true</TreatWarningAsError>
+      <AdditionalOptions>/analyze:stacksize19000 %(AdditionalOptions)</AdditionalOptions>
+    </ClCompile>
+    <Link>
+      <SubSystem>Console</SubSystem>
+      <GenerateDebugInformation>true</GenerateDebugInformation>
+      <EnableCOMDATFolding>true</EnableCOMDATFolding>
+      <OptimizeReferences>true</OptimizeReferences>
+    </Link>
+  </ItemDefinitionGroup>
+  <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Release|x64'">
+    <ClCompile>
+      <WarningLevel>Level4</WarningLevel>
+      <PrecompiledHeader>
+      </PrecompiledHeader>
+      <Optimization>MaxSpeed</Optimization>
+      <FunctionLevelLinking>true</FunctionLevelLinking>
+      <IntrinsicFunctions>true</IntrinsicFunctions>
+      <PreprocessorDefinitions>WIN32;NDEBUG;_CONSOLE;%(PreprocessorDefinitions)</PreprocessorDefinitions>
+      <TreatWarningAsError>true</TreatWarningAsError>
+      <EnablePREfast>true</EnablePREfast>
+      <AdditionalOptions>/analyze:stacksize19000 %(AdditionalOptions)</AdditionalOptions>
+    </ClCompile>
+    <Link>
+      <SubSystem>Console</SubSystem>
+      <GenerateDebugInformation>true</GenerateDebugInformation>
+      <EnableCOMDATFolding>true</EnableCOMDATFolding>
+      <OptimizeReferences>true</OptimizeReferences>
+    </Link>
+  </ItemDefinitionGroup>
+  <ItemGroup>
+    <ClCompile Include="..\..\..\lib\fse.c" />
+    <ClCompile Include="..\..\..\lib\huff0.c" />
+    <ClCompile Include="..\..\..\lib\legacy\zstd_v01.c" />
+    <ClCompile Include="..\..\..\lib\legacy\zstd_v02.c" />
+    <ClCompile Include="..\..\..\lib\legacy\zstd_v03.c" />
+    <ClCompile Include="..\..\..\lib\zstd_compress.c" />
+    <ClCompile Include="..\..\..\lib\zstd_decompress.c" />
+    <ClCompile Include="..\..\..\programs\datagen.c" />
+    <ClCompile Include="..\..\..\programs\fuzzer.c" />
+    <ClCompile Include="..\..\..\programs\xxhash.c" />
+  </ItemGroup>
+  <ItemGroup>
+    <ClInclude Include="..\..\..\lib\fse.h" />
+    <ClInclude Include="..\..\..\lib\fse_static.h" />
+    <ClInclude Include="..\..\..\lib\huff0.h" />
+    <ClInclude Include="..\..\..\lib\huff0_static.h" />
+    <ClInclude Include="..\..\..\lib\legacy\zstd_legacy.h" />
+    <ClInclude Include="..\..\..\lib\legacy\zstd_v01.h" />
+    <ClInclude Include="..\..\..\lib\legacy\zstd_v02.h" />
+    <ClInclude Include="..\..\..\lib\legacy\zstd_v03.h" />
+    <ClInclude Include="..\..\..\lib\zstd.h" />
+    <ClInclude Include="..\..\..\lib\zstd_internal.h" />
+    <ClInclude Include="..\..\..\lib\zstd_static.h" />
+    <ClInclude Include="..\..\..\programs\datagen.h" />
+    <ClInclude Include="..\..\..\programs\xxhash.h" />
+  </ItemGroup>
+  <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
+  <ImportGroup Label="ExtensionTargets">
+  </ImportGroup>
+</Project>
\ No newline at end of file
diff --git a/visual/2012/fuzzer/fuzzer.vcxproj.filters b/visual/2012/fuzzer/fuzzer.vcxproj.filters
new file mode 100644
index 0000000..500da87
--- /dev/null
+++ b/visual/2012/fuzzer/fuzzer.vcxproj.filters
@@ -0,0 +1,90 @@
+<?xml version="1.0" encoding="utf-8"?>
+<Project ToolsVersion="4.0" xmlns="http://schemas.microsoft.com/developer/msbuild/2003">
+  <ItemGroup>
+    <Filter Include="Fichiers sources">
+      <UniqueIdentifier>{4FC737F1-C7A5-4376-A066-2A32D752A2FF}</UniqueIdentifier>
+      <Extensions>cpp;c;cc;cxx;def;odl;idl;hpj;bat;asm;asmx</Extensions>
+    </Filter>
+    <Filter Include="Fichiers d%27en-tête">
+      <UniqueIdentifier>{93995380-89BD-4b04-88EB-625FBE52EBFB}</UniqueIdentifier>
+      <Extensions>h;hpp;hxx;hm;inl;inc;xsd</Extensions>
+    </Filter>
+    <Filter Include="Fichiers de ressources">
+      <UniqueIdentifier>{67DA6AB6-F800-4c08-8B7A-83BB121AAD01}</UniqueIdentifier>
+      <Extensions>rc;ico;cur;bmp;dlg;rc2;rct;bin;rgs;gif;jpg;jpeg;jpe;resx;tiff;tif;png;wav;mfcribbon-ms</Extensions>
+    </Filter>
+  </ItemGroup>
+  <ItemGroup>
+    <ClCompile Include="..\..\..\lib\fse.c">
+      <Filter>Fichiers sources</Filter>
+    </ClCompile>
+    <ClCompile Include="..\..\..\programs\fuzzer.c">
+      <Filter>Fichiers sources</Filter>
+    </ClCompile>
+    <ClCompile Include="..\..\..\programs\xxhash.c">
+      <Filter>Fichiers sources</Filter>
+    </ClCompile>
+    <ClCompile Include="..\..\..\programs\datagen.c">
+      <Filter>Fichiers sources</Filter>
+    </ClCompile>
+    <ClCompile Include="..\..\..\lib\legacy\zstd_v01.c">
+      <Filter>Fichiers sources</Filter>
+    </ClCompile>
+    <ClCompile Include="..\..\..\lib\huff0.c">
+      <Filter>Fichiers sources</Filter>
+    </ClCompile>
+    <ClCompile Include="..\..\..\lib\legacy\zstd_v02.c">
+      <Filter>Fichiers sources</Filter>
+    </ClCompile>
+    <ClCompile Include="..\..\..\lib\zstd_compress.c">
+      <Filter>Fichiers sources</Filter>
+    </ClCompile>
+    <ClCompile Include="..\..\..\lib\zstd_decompress.c">
+      <Filter>Fichiers sources</Filter>
+    </ClCompile>
+    <ClCompile Include="..\..\..\lib\legacy\zstd_v03.c">
+      <Filter>Fichiers sources</Filter>
+    </ClCompile>
+  </ItemGroup>
+  <ItemGroup>
+    <ClInclude Include="..\..\..\lib\fse.h">
+      <Filter>Fichiers d%27en-tête</Filter>
+    </ClInclude>
+    <ClInclude Include="..\..\..\lib\fse_static.h">
+      <Filter>Fichiers d%27en-tête</Filter>
+    </ClInclude>
+    <ClInclude Include="..\..\..\lib\zstd.h">
+      <Filter>Fichiers d%27en-tête</Filter>
+    </ClInclude>
+    <ClInclude Include="..\..\..\lib\zstd_static.h">
+      <Filter>Fichiers d%27en-tête</Filter>
+    </ClInclude>
+    <ClInclude Include="..\..\..\programs\xxhash.h">
+      <Filter>Fichiers d%27en-tête</Filter>
+    </ClInclude>
+    <ClInclude Include="..\..\..\programs\datagen.h">
+      <Filter>Fichiers d%27en-tête</Filter>
+    </ClInclude>
+    <ClInclude Include="..\..\..\lib\legacy\zstd_v01.h">
+      <Filter>Fichiers d%27en-tête</Filter>
+    </ClInclude>
+    <ClInclude Include="..\..\..\lib\huff0.h">
+      <Filter>Fichiers d%27en-tête</Filter>
+    </ClInclude>
+    <ClInclude Include="..\..\..\lib\huff0_static.h">
+      <Filter>Fichiers d%27en-tête</Filter>
+    </ClInclude>
+    <ClInclude Include="..\..\..\lib\zstd_internal.h">
+      <Filter>Fichiers d%27en-tête</Filter>
+    </ClInclude>
+    <ClInclude Include="..\..\..\lib\legacy\zstd_legacy.h">
+      <Filter>Fichiers d%27en-tête</Filter>
+    </ClInclude>
+    <ClInclude Include="..\..\..\lib\legacy\zstd_v02.h">
+      <Filter>Fichiers d%27en-tête</Filter>
+    </ClInclude>
+    <ClInclude Include="..\..\..\lib\legacy\zstd_v03.h">
+      <Filter>Fichiers d%27en-tête</Filter>
+    </ClInclude>
+  </ItemGroup>
+</Project>
\ No newline at end of file
diff --git a/visual/2012/zstd.sln b/visual/2012/zstd.sln
new file mode 100644
index 0000000..2b5eb98
--- /dev/null
+++ b/visual/2012/zstd.sln
@@ -0,0 +1,46 @@
+
+Microsoft Visual Studio Solution File, Format Version 12.00
+# Visual Studio Express 2012 for Windows Desktop
+Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "zstd", "zstd\zstd.vcxproj", "{4E52A41A-F33B-4C7A-8C36-A1A6B4F4277C}"
+EndProject
+Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "fuzzer", "fuzzer\fuzzer.vcxproj", "{6FD4352B-346C-4703-96EA-D4A8B9A6976E}"
+EndProject
+Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "fullbench", "fullbench\fullbench.vcxproj", "{61ABD629-1CC8-4FD7-9281-6B8DBB9D3DF8}"
+EndProject
+Global
+	GlobalSection(SolutionConfigurationPlatforms) = preSolution
+		Debug|Win32 = Debug|Win32
+		Debug|x64 = Debug|x64
+		Release|Win32 = Release|Win32
+		Release|x64 = Release|x64
+	EndGlobalSection
+	GlobalSection(ProjectConfigurationPlatforms) = postSolution
+		{4E52A41A-F33B-4C7A-8C36-A1A6B4F4277C}.Debug|Win32.ActiveCfg = Debug|Win32
+		{4E52A41A-F33B-4C7A-8C36-A1A6B4F4277C}.Debug|Win32.Build.0 = Debug|Win32
+		{4E52A41A-F33B-4C7A-8C36-A1A6B4F4277C}.Debug|x64.ActiveCfg = Debug|x64
+		{4E52A41A-F33B-4C7A-8C36-A1A6B4F4277C}.Debug|x64.Build.0 = Debug|x64
+		{4E52A41A-F33B-4C7A-8C36-A1A6B4F4277C}.Release|Win32.ActiveCfg = Release|Win32
+		{4E52A41A-F33B-4C7A-8C36-A1A6B4F4277C}.Release|Win32.Build.0 = Release|Win32
+		{4E52A41A-F33B-4C7A-8C36-A1A6B4F4277C}.Release|x64.ActiveCfg = Release|x64
+		{4E52A41A-F33B-4C7A-8C36-A1A6B4F4277C}.Release|x64.Build.0 = Release|x64
+		{6FD4352B-346C-4703-96EA-D4A8B9A6976E}.Debug|Win32.ActiveCfg = Debug|Win32
+		{6FD4352B-346C-4703-96EA-D4A8B9A6976E}.Debug|Win32.Build.0 = Debug|Win32
+		{6FD4352B-346C-4703-96EA-D4A8B9A6976E}.Debug|x64.ActiveCfg = Debug|x64
+		{6FD4352B-346C-4703-96EA-D4A8B9A6976E}.Debug|x64.Build.0 = Debug|x64
+		{6FD4352B-346C-4703-96EA-D4A8B9A6976E}.Release|Win32.ActiveCfg = Release|Win32
+		{6FD4352B-346C-4703-96EA-D4A8B9A6976E}.Release|Win32.Build.0 = Release|Win32
+		{6FD4352B-346C-4703-96EA-D4A8B9A6976E}.Release|x64.ActiveCfg = Release|x64
+		{6FD4352B-346C-4703-96EA-D4A8B9A6976E}.Release|x64.Build.0 = Release|x64
+		{61ABD629-1CC8-4FD7-9281-6B8DBB9D3DF8}.Debug|Win32.ActiveCfg = Debug|Win32
+		{61ABD629-1CC8-4FD7-9281-6B8DBB9D3DF8}.Debug|Win32.Build.0 = Debug|Win32
+		{61ABD629-1CC8-4FD7-9281-6B8DBB9D3DF8}.Debug|x64.ActiveCfg = Debug|x64
+		{61ABD629-1CC8-4FD7-9281-6B8DBB9D3DF8}.Debug|x64.Build.0 = Debug|x64
+		{61ABD629-1CC8-4FD7-9281-6B8DBB9D3DF8}.Release|Win32.ActiveCfg = Release|Win32
+		{61ABD629-1CC8-4FD7-9281-6B8DBB9D3DF8}.Release|Win32.Build.0 = Release|Win32
+		{61ABD629-1CC8-4FD7-9281-6B8DBB9D3DF8}.Release|x64.ActiveCfg = Release|x64
+		{61ABD629-1CC8-4FD7-9281-6B8DBB9D3DF8}.Release|x64.Build.0 = Release|x64
+	EndGlobalSection
+	GlobalSection(SolutionProperties) = preSolution
+		HideSolutionNode = FALSE
+	EndGlobalSection
+EndGlobal
diff --git a/visual/2012/zstd/zstd.vcxproj b/visual/2012/zstd/zstd.vcxproj
new file mode 100644
index 0000000..f952462
--- /dev/null
+++ b/visual/2012/zstd/zstd.vcxproj
@@ -0,0 +1,198 @@
+<?xml version="1.0" encoding="utf-8"?>
+<Project DefaultTargets="Build" ToolsVersion="4.0" xmlns="http://schemas.microsoft.com/developer/msbuild/2003">
+  <ItemGroup Label="ProjectConfigurations">
+    <ProjectConfiguration Include="Debug|Win32">
+      <Configuration>Debug</Configuration>
+      <Platform>Win32</Platform>
+    </ProjectConfiguration>
+    <ProjectConfiguration Include="Debug|x64">
+      <Configuration>Debug</Configuration>
+      <Platform>x64</Platform>
+    </ProjectConfiguration>
+    <ProjectConfiguration Include="Release|Win32">
+      <Configuration>Release</Configuration>
+      <Platform>Win32</Platform>
+    </ProjectConfiguration>
+    <ProjectConfiguration Include="Release|x64">
+      <Configuration>Release</Configuration>
+      <Platform>x64</Platform>
+    </ProjectConfiguration>
+  </ItemGroup>
+  <ItemGroup>
+    <ClCompile Include="..\..\..\lib\fse.c" />
+    <ClCompile Include="..\..\..\lib\huff0.c" />
+    <ClCompile Include="..\..\..\lib\legacy\zstd_v01.c" />
+    <ClCompile Include="..\..\..\lib\legacy\zstd_v02.c" />
+    <ClCompile Include="..\..\..\lib\legacy\zstd_v03.c" />
+    <ClCompile Include="..\..\..\lib\zstd_buffered.c" />
+    <ClCompile Include="..\..\..\lib\zstd_compress.c" />
+    <ClCompile Include="..\..\..\lib\zstd_decompress.c" />
+    <ClCompile Include="..\..\..\programs\bench.c" />
+    <ClCompile Include="..\..\..\programs\fileio.c" />
+    <ClCompile Include="..\..\..\programs\legacy\fileio_legacy.c" />
+    <ClCompile Include="..\..\..\programs\xxhash.c" />
+    <ClCompile Include="..\..\..\programs\zstdcli.c" />
+  </ItemGroup>
+  <ItemGroup>
+    <ClInclude Include="..\..\..\lib\fse.h" />
+    <ClInclude Include="..\..\..\lib\fse_static.h" />
+    <ClInclude Include="..\..\..\lib\huff0.h" />
+    <ClInclude Include="..\..\..\lib\huff0_static.h" />
+    <ClInclude Include="..\..\..\lib\legacy\zstd_legacy.h" />
+    <ClInclude Include="..\..\..\lib\legacy\zstd_v01.h" />
+    <ClInclude Include="..\..\..\lib\legacy\zstd_v02.h" />
+    <ClInclude Include="..\..\..\lib\legacy\zstd_v03.h" />
+    <ClInclude Include="..\..\..\lib\zstd.h" />
+    <ClInclude Include="..\..\..\lib\zstd_buffered.h" />
+    <ClInclude Include="..\..\..\lib\zstd_buffered_static.h" />
+    <ClInclude Include="..\..\..\lib\zstd_internal.h" />
+    <ClInclude Include="..\..\..\lib\zstd_static.h" />
+    <ClInclude Include="..\..\..\programs\bench.h" />
+    <ClInclude Include="..\..\..\programs\fileio.h" />
+    <ClInclude Include="..\..\..\programs\legacy\fileio_legacy.h" />
+    <ClInclude Include="..\..\..\programs\xxhash.h" />
+  </ItemGroup>
+  <PropertyGroup Label="Globals">
+    <ProjectGuid>{4E52A41A-F33B-4C7A-8C36-A1A6B4F4277C}</ProjectGuid>
+    <Keyword>Win32Proj</Keyword>
+    <RootNamespace>zstd</RootNamespace>
+  </PropertyGroup>
+  <Import Project="$(VCTargetsPath)\Microsoft.Cpp.Default.props" />
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'" Label="Configuration">
+    <ConfigurationType>Application</ConfigurationType>
+    <UseDebugLibraries>true</UseDebugLibraries>
+    <PlatformToolset>v110</PlatformToolset>
+    <CharacterSet>Unicode</CharacterSet>
+  </PropertyGroup>
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|x64'" Label="Configuration">
+    <ConfigurationType>Application</ConfigurationType>
+    <UseDebugLibraries>true</UseDebugLibraries>
+    <PlatformToolset>v110</PlatformToolset>
+    <CharacterSet>Unicode</CharacterSet>
+  </PropertyGroup>
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|Win32'" Label="Configuration">
+    <ConfigurationType>Application</ConfigurationType>
+    <UseDebugLibraries>false</UseDebugLibraries>
+    <PlatformToolset>v110</PlatformToolset>
+    <WholeProgramOptimization>true</WholeProgramOptimization>
+    <CharacterSet>Unicode</CharacterSet>
+  </PropertyGroup>
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|x64'" Label="Configuration">
+    <ConfigurationType>Application</ConfigurationType>
+    <UseDebugLibraries>false</UseDebugLibraries>
+    <PlatformToolset>v110</PlatformToolset>
+    <WholeProgramOptimization>true</WholeProgramOptimization>
+    <CharacterSet>Unicode</CharacterSet>
+  </PropertyGroup>
+  <Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
+  <ImportGroup Label="ExtensionSettings">
+  </ImportGroup>
+  <ImportGroup Label="PropertySheets" Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">
+    <Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
+  </ImportGroup>
+  <ImportGroup Condition="'$(Configuration)|$(Platform)'=='Debug|x64'" Label="PropertySheets">
+    <Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
+  </ImportGroup>
+  <ImportGroup Label="PropertySheets" Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">
+    <Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
+  </ImportGroup>
+  <ImportGroup Condition="'$(Configuration)|$(Platform)'=='Release|x64'" Label="PropertySheets">
+    <Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
+  </ImportGroup>
+  <PropertyGroup Label="UserMacros" />
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">
+    <LinkIncremental>true</LinkIncremental>
+    <IncludePath>$(SolutionDir)..\..\programs\legacy;$(SolutionDir)..\..\lib;$(SolutionDir)..\..\lib\legacy;$(VCInstallDir)include;$(VCInstallDir)atlmfc\include;$(WindowsSDK_IncludePath);</IncludePath>
+    <RunCodeAnalysis>true</RunCodeAnalysis>
+  </PropertyGroup>
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">
+    <LinkIncremental>true</LinkIncremental>
+    <IncludePath>$(SolutionDir)..\..\programs\legacy;$(SolutionDir)..\..\lib\legacy;$(SolutionDir)..\..\lib;$(VCInstallDir)include;$(VCInstallDir)atlmfc\include;$(WindowsSDK_IncludePath);</IncludePath>
+    <RunCodeAnalysis>true</RunCodeAnalysis>
+  </PropertyGroup>
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">
+    <LinkIncremental>false</LinkIncremental>
+    <IncludePath>$(SolutionDir)..\..\programs\legacy;$(SolutionDir)..\..\lib\legacy;$(SolutionDir)..\..\lib;$(VCInstallDir)include;$(VCInstallDir)atlmfc\include;$(WindowsSDK_IncludePath);</IncludePath>
+    <RunCodeAnalysis>true</RunCodeAnalysis>
+  </PropertyGroup>
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|x64'">
+    <LinkIncremental>false</LinkIncremental>
+    <IncludePath>$(SolutionDir)..\..\programs\legacy;$(SolutionDir)..\..\lib\legacy;$(SolutionDir)..\..\lib;$(VCInstallDir)include;$(VCInstallDir)atlmfc\include;$(WindowsSDK_IncludePath);</IncludePath>
+    <RunCodeAnalysis>false</RunCodeAnalysis>
+  </PropertyGroup>
+  <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">
+    <ClCompile>
+      <PrecompiledHeader>
+      </PrecompiledHeader>
+      <WarningLevel>Level4</WarningLevel>
+      <Optimization>Disabled</Optimization>
+      <PreprocessorDefinitions>WIN32;_DEBUG;_CONSOLE;%(PreprocessorDefinitions)</PreprocessorDefinitions>
+      <TreatWarningAsError>true</TreatWarningAsError>
+      <EnablePREfast>true</EnablePREfast>
+      <AdditionalOptions>/analyze:stacksize25000 %(AdditionalOptions)</AdditionalOptions>
+    </ClCompile>
+    <Link>
+      <SubSystem>Console</SubSystem>
+      <GenerateDebugInformation>true</GenerateDebugInformation>
+    </Link>
+  </ItemDefinitionGroup>
+  <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">
+    <ClCompile>
+      <PrecompiledHeader>
+      </PrecompiledHeader>
+      <WarningLevel>Level4</WarningLevel>
+      <Optimization>Disabled</Optimization>
+      <PreprocessorDefinitions>WIN32;_DEBUG;_CONSOLE;%(PreprocessorDefinitions)</PreprocessorDefinitions>
+      <TreatWarningAsError>false</TreatWarningAsError>
+      <EnablePREfast>true</EnablePREfast>
+      <AdditionalOptions>/analyze:stacksize19000 %(AdditionalOptions)</AdditionalOptions>
+    </ClCompile>
+    <Link>
+      <SubSystem>Console</SubSystem>
+      <GenerateDebugInformation>true</GenerateDebugInformation>
+    </Link>
+  </ItemDefinitionGroup>
+  <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">
+    <ClCompile>
+      <WarningLevel>Level4</WarningLevel>
+      <PrecompiledHeader>
+      </PrecompiledHeader>
+      <Optimization>MaxSpeed</Optimization>
+      <FunctionLevelLinking>true</FunctionLevelLinking>
+      <IntrinsicFunctions>true</IntrinsicFunctions>
+      <PreprocessorDefinitions>WIN32;NDEBUG;_CONSOLE;%(PreprocessorDefinitions)</PreprocessorDefinitions>
+      <EnablePREfast>true</EnablePREfast>
+      <TreatWarningAsError>true</TreatWarningAsError>
+      <AdditionalOptions>/analyze:stacksize19000 %(AdditionalOptions)</AdditionalOptions>
+    </ClCompile>
+    <Link>
+      <SubSystem>Console</SubSystem>
+      <GenerateDebugInformation>true</GenerateDebugInformation>
+      <EnableCOMDATFolding>true</EnableCOMDATFolding>
+      <OptimizeReferences>true</OptimizeReferences>
+    </Link>
+  </ItemDefinitionGroup>
+  <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Release|x64'">
+    <ClCompile>
+      <WarningLevel>Level4</WarningLevel>
+      <PrecompiledHeader>
+      </PrecompiledHeader>
+      <Optimization>MaxSpeed</Optimization>
+      <FunctionLevelLinking>true</FunctionLevelLinking>
+      <IntrinsicFunctions>true</IntrinsicFunctions>
+      <PreprocessorDefinitions>WIN32;NDEBUG;_CONSOLE;%(PreprocessorDefinitions)</PreprocessorDefinitions>
+      <TreatWarningAsError>false</TreatWarningAsError>
+      <EnablePREfast>false</EnablePREfast>
+      <RuntimeLibrary>MultiThreaded</RuntimeLibrary>
+    </ClCompile>
+    <Link>
+      <SubSystem>Console</SubSystem>
+      <GenerateDebugInformation>true</GenerateDebugInformation>
+      <EnableCOMDATFolding>true</EnableCOMDATFolding>
+      <OptimizeReferences>true</OptimizeReferences>
+    </Link>
+  </ItemDefinitionGroup>
+  <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
+  <ImportGroup Label="ExtensionTargets">
+  </ImportGroup>
+</Project>
\ No newline at end of file
diff --git a/visual/2012/zstd/zstd.vcxproj.filters b/visual/2012/zstd/zstd.vcxproj.filters
new file mode 100644
index 0000000..442ecae
--- /dev/null
+++ b/visual/2012/zstd/zstd.vcxproj.filters
@@ -0,0 +1,111 @@
+<?xml version="1.0" encoding="utf-8"?>
+<Project ToolsVersion="4.0" xmlns="http://schemas.microsoft.com/developer/msbuild/2003">
+  <ItemGroup>
+    <Filter Include="Fichiers sources">
+      <UniqueIdentifier>{4FC737F1-C7A5-4376-A066-2A32D752A2FF}</UniqueIdentifier>
+      <Extensions>cpp;c;cc;cxx;def;odl;idl;hpj;bat;asm;asmx</Extensions>
+    </Filter>
+    <Filter Include="Fichiers d%27en-tête">
+      <UniqueIdentifier>{93995380-89BD-4b04-88EB-625FBE52EBFB}</UniqueIdentifier>
+      <Extensions>h;hpp;hxx;hm;inl;inc;xsd</Extensions>
+    </Filter>
+    <Filter Include="Fichiers de ressources">
+      <UniqueIdentifier>{67DA6AB6-F800-4c08-8B7A-83BB121AAD01}</UniqueIdentifier>
+      <Extensions>rc;ico;cur;bmp;dlg;rc2;rct;bin;rgs;gif;jpg;jpeg;jpe;resx;tiff;tif;png;wav;mfcribbon-ms</Extensions>
+    </Filter>
+  </ItemGroup>
+  <ItemGroup>
+    <ClCompile Include="..\..\..\lib\fse.c">
+      <Filter>Fichiers sources</Filter>
+    </ClCompile>
+    <ClCompile Include="..\..\..\programs\bench.c">
+      <Filter>Fichiers sources</Filter>
+    </ClCompile>
+    <ClCompile Include="..\..\..\programs\fileio.c">
+      <Filter>Fichiers sources</Filter>
+    </ClCompile>
+    <ClCompile Include="..\..\..\programs\xxhash.c">
+      <Filter>Fichiers sources</Filter>
+    </ClCompile>
+    <ClCompile Include="..\..\..\programs\zstdcli.c">
+      <Filter>Fichiers sources</Filter>
+    </ClCompile>
+    <ClCompile Include="..\..\..\lib\legacy\zstd_v01.c">
+      <Filter>Fichiers sources</Filter>
+    </ClCompile>
+    <ClCompile Include="..\..\..\lib\huff0.c">
+      <Filter>Fichiers sources</Filter>
+    </ClCompile>
+    <ClCompile Include="..\..\..\lib\legacy\zstd_v02.c">
+      <Filter>Fichiers sources</Filter>
+    </ClCompile>
+    <ClCompile Include="..\..\..\programs\legacy\fileio_legacy.c">
+      <Filter>Fichiers sources</Filter>
+    </ClCompile>
+    <ClCompile Include="..\..\..\lib\zstd_compress.c">
+      <Filter>Fichiers sources</Filter>
+    </ClCompile>
+    <ClCompile Include="..\..\..\lib\zstd_decompress.c">
+      <Filter>Fichiers sources</Filter>
+    </ClCompile>
+    <ClCompile Include="..\..\..\lib\zstd_buffered.c">
+      <Filter>Fichiers sources</Filter>
+    </ClCompile>
+    <ClCompile Include="..\..\..\lib\legacy\zstd_v03.c">
+      <Filter>Fichiers sources</Filter>
+    </ClCompile>
+  </ItemGroup>
+  <ItemGroup>
+    <ClInclude Include="..\..\..\lib\fse.h">
+      <Filter>Fichiers d%27en-tête</Filter>
+    </ClInclude>
+    <ClInclude Include="..\..\..\lib\fse_static.h">
+      <Filter>Fichiers d%27en-tête</Filter>
+    </ClInclude>
+    <ClInclude Include="..\..\..\lib\zstd.h">
+      <Filter>Fichiers d%27en-tête</Filter>
+    </ClInclude>
+    <ClInclude Include="..\..\..\lib\zstd_static.h">
+      <Filter>Fichiers d%27en-tête</Filter>
+    </ClInclude>
+    <ClInclude Include="..\..\..\programs\bench.h">
+      <Filter>Fichiers d%27en-tête</Filter>
+    </ClInclude>
+    <ClInclude Include="..\..\..\programs\fileio.h">
+      <Filter>Fichiers d%27en-tête</Filter>
+    </ClInclude>
+    <ClInclude Include="..\..\..\programs\xxhash.h">
+      <Filter>Fichiers d%27en-tête</Filter>
+    </ClInclude>
+    <ClInclude Include="..\..\..\lib\legacy\zstd_v01.h">
+      <Filter>Fichiers d%27en-tête</Filter>
+    </ClInclude>
+    <ClInclude Include="..\..\..\lib\huff0.h">
+      <Filter>Fichiers d%27en-tête</Filter>
+    </ClInclude>
+    <ClInclude Include="..\..\..\lib\huff0_static.h">
+      <Filter>Fichiers d%27en-tête</Filter>
+    </ClInclude>
+    <ClInclude Include="..\..\..\lib\zstd_internal.h">
+      <Filter>Fichiers d%27en-tête</Filter>
+    </ClInclude>
+    <ClInclude Include="..\..\..\lib\legacy\zstd_legacy.h">
+      <Filter>Fichiers d%27en-tête</Filter>
+    </ClInclude>
+    <ClInclude Include="..\..\..\lib\legacy\zstd_v02.h">
+      <Filter>Fichiers d%27en-tête</Filter>
+    </ClInclude>
+    <ClInclude Include="..\..\..\programs\legacy\fileio_legacy.h">
+      <Filter>Fichiers d%27en-tête</Filter>
+    </ClInclude>
+    <ClInclude Include="..\..\..\lib\zstd_buffered.h">
+      <Filter>Fichiers d%27en-tête</Filter>
+    </ClInclude>
+    <ClInclude Include="..\..\..\lib\zstd_buffered_static.h">
+      <Filter>Fichiers d%27en-tête</Filter>
+    </ClInclude>
+    <ClInclude Include="..\..\..\lib\legacy\zstd_v03.h">
+      <Filter>Fichiers d%27en-tête</Filter>
+    </ClInclude>
+  </ItemGroup>
+</Project>
\ No newline at end of file

-- 
Alioth's /usr/local/bin/git-commit-notice on /srv/git.debian.org/git/debian-med/libzstd.git



More information about the debian-med-commit mailing list