Commit 158738ea authored by Linus Torvalds's avatar Linus Torvalds

Merge tag 'zstd-linus-v6.2' of https://github.com/terrelln/linux

Pull zstd updates from Nick Terrell:
 "Update the kernel to upstream zstd v1.5.2 [0]. Specifically to the tag
  v1.5.2-kernel [1] which includes several cherrypicked fixes for the
  kernel on top of v1.5.2.

  Excepting the MAINTAINERS change, all the changes in this can be
  generated by:

    git clone https://github.com/facebook/zstd
    cd zstd/contrib/linux-kernel
    git checkout v1.5.2-kernel
    LINUX=/path/to/linux/repo make import

  Additionally, this includes several minor typo fixes, which have all
  been fixed upstream so they are maintained on the next import"

Link: https://github.com/facebook/zstd/releases/tag/v1.5.2 [0]
Link: https://github.com/facebook/zstd/tree/v1.5.2-kernel [1]
Link: https://lore.kernel.org/lkml/20221024202606.404049-1-nickrterrell@gmail.com/
Link: https://github.com/torvalds/linux/commit/637a642f5ca5e850186bb64ac75ebb0f124b458d

* tag 'zstd-linus-v6.2' of https://github.com/terrelln/linux:
  zstd: import usptream v1.5.2
  zstd: Move zstd-common module exports to zstd_common_module.c
  lib: zstd: Fix comment typo
  lib: zstd: fix repeated words in comments
  MAINTAINERS: git://github -> https://github.com for terrelln
  lib: zstd: clean up double word in comment.
parents aeba12b2 70d822cf
...@@ -23073,7 +23073,7 @@ ZSTD ...@@ -23073,7 +23073,7 @@ ZSTD
M: Nick Terrell <terrelln@fb.com> M: Nick Terrell <terrelln@fb.com>
S: Maintained S: Maintained
B: https://github.com/facebook/zstd/issues B: https://github.com/facebook/zstd/issues
T: git git://github.com/terrelln/linux.git T: git https://github.com/terrelln/linux.git
F: include/linux/zstd* F: include/linux/zstd*
F: lib/zstd/ F: lib/zstd/
F: lib/decompress_unzstd.c F: lib/decompress_unzstd.c
......
This diff is collapsed.
...@@ -35,6 +35,7 @@ zstd_decompress-y := \ ...@@ -35,6 +35,7 @@ zstd_decompress-y := \
decompress/zstd_decompress_block.o \ decompress/zstd_decompress_block.o \
zstd_common-y := \ zstd_common-y := \
zstd_common_module.o \
common/debug.o \ common/debug.o \
common/entropy_common.o \ common/entropy_common.o \
common/error_private.o \ common/error_private.o \
......
...@@ -313,7 +313,16 @@ MEM_STATIC FORCE_INLINE_ATTR size_t BIT_getMiddleBits(size_t bitContainer, U32 c ...@@ -313,7 +313,16 @@ MEM_STATIC FORCE_INLINE_ATTR size_t BIT_getMiddleBits(size_t bitContainer, U32 c
U32 const regMask = sizeof(bitContainer)*8 - 1; U32 const regMask = sizeof(bitContainer)*8 - 1;
/* if start > regMask, bitstream is corrupted, and result is undefined */ /* if start > regMask, bitstream is corrupted, and result is undefined */
assert(nbBits < BIT_MASK_SIZE); assert(nbBits < BIT_MASK_SIZE);
/* x86 transform & ((1 << nbBits) - 1) to bzhi instruction, it is better
* than accessing memory. When bmi2 instruction is not present, we consider
* such cpus old (pre-Haswell, 2013) and their performance is not of that
* importance.
*/
#if defined(__x86_64__) || defined(_M_X86)
return (bitContainer >> (start & regMask)) & ((((U64)1) << nbBits) - 1);
#else
return (bitContainer >> (start & regMask)) & BIT_mask[nbBits]; return (bitContainer >> (start & regMask)) & BIT_mask[nbBits];
#endif
} }
MEM_STATIC FORCE_INLINE_ATTR size_t BIT_getLowerBits(size_t bitContainer, U32 const nbBits) MEM_STATIC FORCE_INLINE_ATTR size_t BIT_getLowerBits(size_t bitContainer, U32 const nbBits)
......
...@@ -11,6 +11,8 @@ ...@@ -11,6 +11,8 @@
#ifndef ZSTD_COMPILER_H #ifndef ZSTD_COMPILER_H
#define ZSTD_COMPILER_H #define ZSTD_COMPILER_H
#include "portability_macros.h"
/*-******************************************************* /*-*******************************************************
* Compiler specifics * Compiler specifics
*********************************************************/ *********************************************************/
...@@ -34,7 +36,7 @@ ...@@ -34,7 +36,7 @@
/* /*
On MSVC qsort requires that functions passed into it use the __cdecl calling conversion(CC). On MSVC qsort requires that functions passed into it use the __cdecl calling conversion(CC).
This explictly marks such functions as __cdecl so that the code will still compile This explicitly marks such functions as __cdecl so that the code will still compile
if a CC other than __cdecl has been made the default. if a CC other than __cdecl has been made the default.
*/ */
#define WIN_CDECL #define WIN_CDECL
...@@ -70,25 +72,13 @@ ...@@ -70,25 +72,13 @@
/* target attribute */ /* target attribute */
#ifndef __has_attribute
#define __has_attribute(x) 0 /* Compatibility with non-clang compilers. */
#endif
#define TARGET_ATTRIBUTE(target) __attribute__((__target__(target))) #define TARGET_ATTRIBUTE(target) __attribute__((__target__(target)))
/* Enable runtime BMI2 dispatch based on the CPU. /* Target attribute for BMI2 dynamic dispatch.
* Enabled for clang & gcc >=4.8 on x86 when BMI2 isn't enabled by default. * Enable lzcnt, bmi, and bmi2.
* We test for bmi1 & bmi2. lzcnt is included in bmi1.
*/ */
#ifndef DYNAMIC_BMI2 #define BMI2_TARGET_ATTRIBUTE TARGET_ATTRIBUTE("lzcnt,bmi,bmi2")
#if ((defined(__clang__) && __has_attribute(__target__)) \
|| (defined(__GNUC__) \
&& (__GNUC__ >= 5 || (__GNUC__ == 4 && __GNUC_MINOR__ >= 8)))) \
&& (defined(__x86_64__) || defined(_M_X86)) \
&& !defined(__BMI2__)
# define DYNAMIC_BMI2 1
#else
# define DYNAMIC_BMI2 0
#endif
#endif
/* prefetch /* prefetch
* can be disabled, by declaring NO_PREFETCH build macro */ * can be disabled, by declaring NO_PREFETCH build macro */
...@@ -115,8 +105,9 @@ ...@@ -115,8 +105,9 @@
} }
/* vectorization /* vectorization
* older GCC (pre gcc-4.3 picked as the cutoff) uses a different syntax */ * older GCC (pre gcc-4.3 picked as the cutoff) uses a different syntax,
#if !defined(__INTEL_COMPILER) && !defined(__clang__) && defined(__GNUC__) * and some compilers, like Intel ICC and MCST LCC, do not support it at all. */
#if !defined(__INTEL_COMPILER) && !defined(__clang__) && defined(__GNUC__) && !defined(__LCC__)
# if (__GNUC__ == 4 && __GNUC_MINOR__ > 3) || (__GNUC__ >= 5) # if (__GNUC__ == 4 && __GNUC_MINOR__ > 3) || (__GNUC__ >= 5)
# define DONT_VECTORIZE __attribute__((optimize("no-tree-vectorize"))) # define DONT_VECTORIZE __attribute__((optimize("no-tree-vectorize")))
# else # else
...@@ -134,20 +125,18 @@ ...@@ -134,20 +125,18 @@
#define LIKELY(x) (__builtin_expect((x), 1)) #define LIKELY(x) (__builtin_expect((x), 1))
#define UNLIKELY(x) (__builtin_expect((x), 0)) #define UNLIKELY(x) (__builtin_expect((x), 0))
#if __has_builtin(__builtin_unreachable) || (defined(__GNUC__) && (__GNUC__ > 4 || (__GNUC__ == 4 && __GNUC_MINOR__ >= 5)))
# define ZSTD_UNREACHABLE { assert(0), __builtin_unreachable(); }
#else
# define ZSTD_UNREACHABLE { assert(0); }
#endif
/* disable warnings */ /* disable warnings */
/*Like DYNAMIC_BMI2 but for compile time determination of BMI2 support*/ /*Like DYNAMIC_BMI2 but for compile time determination of BMI2 support*/
/* compat. with non-clang compilers */ /* compile time determination of SIMD support */
#ifndef __has_builtin
# define __has_builtin(x) 0
#endif
/* compat. with non-clang compilers */
#ifndef __has_feature
# define __has_feature(x) 0
#endif
/* C-language Attributes are added in C23. */ /* C-language Attributes are added in C23. */
#if defined(__STDC_VERSION__) && (__STDC_VERSION__ > 201710L) && defined(__has_c_attribute) #if defined(__STDC_VERSION__) && (__STDC_VERSION__ > 201710L) && defined(__has_c_attribute)
...@@ -168,10 +157,28 @@ ...@@ -168,10 +157,28 @@
*/ */
#define ZSTD_FALLTHROUGH fallthrough #define ZSTD_FALLTHROUGH fallthrough
/* detects whether we are being compiled under msan */ /*-**************************************************************
* Alignment check
*****************************************************************/
/* this test was initially positioned in mem.h,
* but this file is removed (or replaced) for linux kernel
* so it's now hosted in compiler.h,
* which remains valid for both user & kernel spaces.
*/
#ifndef ZSTD_ALIGNOF
/* covers gcc, clang & MSVC */
/* note : this section must come first, before C11,
* due to a limitation in the kernel source generator */
# define ZSTD_ALIGNOF(T) __alignof(T)
#endif /* ZSTD_ALIGNOF */
/*-**************************************************************
* Sanitizer
*****************************************************************/
/* detects whether we are being compiled under asan */
#endif /* ZSTD_COMPILER_H */ #endif /* ZSTD_COMPILER_H */
...@@ -15,7 +15,6 @@ ...@@ -15,7 +15,6 @@
/* ************************************* /* *************************************
* Dependencies * Dependencies
***************************************/ ***************************************/
#include <linux/module.h>
#include "mem.h" #include "mem.h"
#include "error_private.h" /* ERR_*, ERROR */ #include "error_private.h" /* ERR_*, ERROR */
#define FSE_STATIC_LINKING_ONLY /* FSE_MIN_TABLELOG */ #define FSE_STATIC_LINKING_ONLY /* FSE_MIN_TABLELOG */
...@@ -213,7 +212,7 @@ static size_t FSE_readNCount_body_default( ...@@ -213,7 +212,7 @@ static size_t FSE_readNCount_body_default(
} }
#if DYNAMIC_BMI2 #if DYNAMIC_BMI2
TARGET_ATTRIBUTE("bmi2") static size_t FSE_readNCount_body_bmi2( BMI2_TARGET_ATTRIBUTE static size_t FSE_readNCount_body_bmi2(
short* normalizedCounter, unsigned* maxSVPtr, unsigned* tableLogPtr, short* normalizedCounter, unsigned* maxSVPtr, unsigned* tableLogPtr,
const void* headerBuffer, size_t hbSize) const void* headerBuffer, size_t hbSize)
{ {
...@@ -240,7 +239,7 @@ size_t FSE_readNCount( ...@@ -240,7 +239,7 @@ size_t FSE_readNCount(
{ {
return FSE_readNCount_bmi2(normalizedCounter, maxSVPtr, tableLogPtr, headerBuffer, hbSize, /* bmi2 */ 0); return FSE_readNCount_bmi2(normalizedCounter, maxSVPtr, tableLogPtr, headerBuffer, hbSize, /* bmi2 */ 0);
} }
EXPORT_SYMBOL_GPL(FSE_readNCount);
/*! HUF_readStats() : /*! HUF_readStats() :
Read compact Huffman tree, saved by HUF_writeCTable(). Read compact Huffman tree, saved by HUF_writeCTable().
...@@ -256,7 +255,6 @@ size_t HUF_readStats(BYTE* huffWeight, size_t hwSize, U32* rankStats, ...@@ -256,7 +255,6 @@ size_t HUF_readStats(BYTE* huffWeight, size_t hwSize, U32* rankStats,
U32 wksp[HUF_READ_STATS_WORKSPACE_SIZE_U32]; U32 wksp[HUF_READ_STATS_WORKSPACE_SIZE_U32];
return HUF_readStats_wksp(huffWeight, hwSize, rankStats, nbSymbolsPtr, tableLogPtr, src, srcSize, wksp, sizeof(wksp), /* bmi2 */ 0); return HUF_readStats_wksp(huffWeight, hwSize, rankStats, nbSymbolsPtr, tableLogPtr, src, srcSize, wksp, sizeof(wksp), /* bmi2 */ 0);
} }
EXPORT_SYMBOL_GPL(HUF_readStats);
FORCE_INLINE_TEMPLATE size_t FORCE_INLINE_TEMPLATE size_t
HUF_readStats_body(BYTE* huffWeight, size_t hwSize, U32* rankStats, HUF_readStats_body(BYTE* huffWeight, size_t hwSize, U32* rankStats,
...@@ -296,7 +294,7 @@ HUF_readStats_body(BYTE* huffWeight, size_t hwSize, U32* rankStats, ...@@ -296,7 +294,7 @@ HUF_readStats_body(BYTE* huffWeight, size_t hwSize, U32* rankStats,
ZSTD_memset(rankStats, 0, (HUF_TABLELOG_MAX + 1) * sizeof(U32)); ZSTD_memset(rankStats, 0, (HUF_TABLELOG_MAX + 1) * sizeof(U32));
weightTotal = 0; weightTotal = 0;
{ U32 n; for (n=0; n<oSize; n++) { { U32 n; for (n=0; n<oSize; n++) {
if (huffWeight[n] >= HUF_TABLELOG_MAX) return ERROR(corruption_detected); if (huffWeight[n] > HUF_TABLELOG_MAX) return ERROR(corruption_detected);
rankStats[huffWeight[n]]++; rankStats[huffWeight[n]]++;
weightTotal += (1 << huffWeight[n]) >> 1; weightTotal += (1 << huffWeight[n]) >> 1;
} } } }
...@@ -334,7 +332,7 @@ static size_t HUF_readStats_body_default(BYTE* huffWeight, size_t hwSize, U32* r ...@@ -334,7 +332,7 @@ static size_t HUF_readStats_body_default(BYTE* huffWeight, size_t hwSize, U32* r
} }
#if DYNAMIC_BMI2 #if DYNAMIC_BMI2
static TARGET_ATTRIBUTE("bmi2") size_t HUF_readStats_body_bmi2(BYTE* huffWeight, size_t hwSize, U32* rankStats, static BMI2_TARGET_ATTRIBUTE size_t HUF_readStats_body_bmi2(BYTE* huffWeight, size_t hwSize, U32* rankStats,
U32* nbSymbolsPtr, U32* tableLogPtr, U32* nbSymbolsPtr, U32* tableLogPtr,
const void* src, size_t srcSize, const void* src, size_t srcSize,
void* workSpace, size_t wkspSize) void* workSpace, size_t wkspSize)
...@@ -357,4 +355,3 @@ size_t HUF_readStats_wksp(BYTE* huffWeight, size_t hwSize, U32* rankStats, ...@@ -357,4 +355,3 @@ size_t HUF_readStats_wksp(BYTE* huffWeight, size_t hwSize, U32* rankStats,
(void)bmi2; (void)bmi2;
return HUF_readStats_body_default(huffWeight, hwSize, rankStats, nbSymbolsPtr, tableLogPtr, src, srcSize, workSpace, wkspSize); return HUF_readStats_body_default(huffWeight, hwSize, rankStats, nbSymbolsPtr, tableLogPtr, src, srcSize, workSpace, wkspSize);
} }
EXPORT_SYMBOL_GPL(HUF_readStats_wksp);
...@@ -18,8 +18,10 @@ ...@@ -18,8 +18,10 @@
/* **************************************** /* ****************************************
* Dependencies * Dependencies
******************************************/ ******************************************/
#include "zstd_deps.h" /* size_t */
#include <linux/zstd_errors.h> /* enum list */ #include <linux/zstd_errors.h> /* enum list */
#include "compiler.h"
#include "debug.h"
#include "zstd_deps.h" /* size_t */
/* **************************************** /* ****************************************
...@@ -62,5 +64,82 @@ ERR_STATIC const char* ERR_getErrorName(size_t code) ...@@ -62,5 +64,82 @@ ERR_STATIC const char* ERR_getErrorName(size_t code)
return ERR_getErrorString(ERR_getErrorCode(code)); return ERR_getErrorString(ERR_getErrorCode(code));
} }
/*
* Ignore: this is an internal helper.
*
* This is a helper function to help force C99-correctness during compilation.
* Under strict compilation modes, variadic macro arguments can't be empty.
* However, variadic function arguments can be. Using a function therefore lets
* us statically check that at least one (string) argument was passed,
* independent of the compilation flags.
*/
static INLINE_KEYWORD UNUSED_ATTR
void _force_has_format_string(const char *format, ...) {
(void)format;
}
/*
* Ignore: this is an internal helper.
*
* We want to force this function invocation to be syntactically correct, but
* we don't want to force runtime evaluation of its arguments.
*/
#define _FORCE_HAS_FORMAT_STRING(...) \
if (0) { \
_force_has_format_string(__VA_ARGS__); \
}
#define ERR_QUOTE(str) #str
/*
* Return the specified error if the condition evaluates to true.
*
* In debug modes, prints additional information.
* In order to do that (particularly, printing the conditional that failed),
* this can't just wrap RETURN_ERROR().
*/
#define RETURN_ERROR_IF(cond, err, ...) \
if (cond) { \
RAWLOG(3, "%s:%d: ERROR!: check %s failed, returning %s", \
__FILE__, __LINE__, ERR_QUOTE(cond), ERR_QUOTE(ERROR(err))); \
_FORCE_HAS_FORMAT_STRING(__VA_ARGS__); \
RAWLOG(3, ": " __VA_ARGS__); \
RAWLOG(3, "\n"); \
return ERROR(err); \
}
/*
* Unconditionally return the specified error.
*
* In debug modes, prints additional information.
*/
#define RETURN_ERROR(err, ...) \
do { \
RAWLOG(3, "%s:%d: ERROR!: unconditional check failed, returning %s", \
__FILE__, __LINE__, ERR_QUOTE(ERROR(err))); \
_FORCE_HAS_FORMAT_STRING(__VA_ARGS__); \
RAWLOG(3, ": " __VA_ARGS__); \
RAWLOG(3, "\n"); \
return ERROR(err); \
} while(0);
/*
* If the provided expression evaluates to an error code, returns that error code.
*
* In debug modes, prints additional information.
*/
#define FORWARD_IF_ERROR(err, ...) \
do { \
size_t const err_code = (err); \
if (ERR_isError(err_code)) { \
RAWLOG(3, "%s:%d: ERROR!: forwarding error in %s: %s", \
__FILE__, __LINE__, ERR_QUOTE(err), ERR_getErrorName(err_code)); \
_FORCE_HAS_FORMAT_STRING(__VA_ARGS__); \
RAWLOG(3, ": " __VA_ARGS__); \
RAWLOG(3, "\n"); \
return err_code; \
} \
} while(0);
#endif /* ERROR_H_MODULE */ #endif /* ERROR_H_MODULE */
...@@ -333,8 +333,9 @@ size_t FSE_buildCTable_rle (FSE_CTable* ct, unsigned char symbolValue); ...@@ -333,8 +333,9 @@ size_t FSE_buildCTable_rle (FSE_CTable* ct, unsigned char symbolValue);
/* FSE_buildCTable_wksp() : /* FSE_buildCTable_wksp() :
* Same as FSE_buildCTable(), but using an externally allocated scratch buffer (`workSpace`). * Same as FSE_buildCTable(), but using an externally allocated scratch buffer (`workSpace`).
* `wkspSize` must be >= `FSE_BUILD_CTABLE_WORKSPACE_SIZE_U32(maxSymbolValue, tableLog)` of `unsigned`. * `wkspSize` must be >= `FSE_BUILD_CTABLE_WORKSPACE_SIZE_U32(maxSymbolValue, tableLog)` of `unsigned`.
* See FSE_buildCTable_wksp() for breakdown of workspace usage.
*/ */
#define FSE_BUILD_CTABLE_WORKSPACE_SIZE_U32(maxSymbolValue, tableLog) (maxSymbolValue + 2 + (1ull << (tableLog - 2))) #define FSE_BUILD_CTABLE_WORKSPACE_SIZE_U32(maxSymbolValue, tableLog) (((maxSymbolValue + 2) + (1ull << (tableLog)))/2 + sizeof(U64)/sizeof(U32) /* additional 8 bytes for potential table overwrite */)
#define FSE_BUILD_CTABLE_WORKSPACE_SIZE(maxSymbolValue, tableLog) (sizeof(unsigned) * FSE_BUILD_CTABLE_WORKSPACE_SIZE_U32(maxSymbolValue, tableLog)) #define FSE_BUILD_CTABLE_WORKSPACE_SIZE(maxSymbolValue, tableLog) (sizeof(unsigned) * FSE_BUILD_CTABLE_WORKSPACE_SIZE_U32(maxSymbolValue, tableLog))
size_t FSE_buildCTable_wksp(FSE_CTable* ct, const short* normalizedCounter, unsigned maxSymbolValue, unsigned tableLog, void* workSpace, size_t wkspSize); size_t FSE_buildCTable_wksp(FSE_CTable* ct, const short* normalizedCounter, unsigned maxSymbolValue, unsigned tableLog, void* workSpace, size_t wkspSize);
......
...@@ -365,7 +365,7 @@ static size_t FSE_decompress_wksp_body_default(void* dst, size_t dstCapacity, co ...@@ -365,7 +365,7 @@ static size_t FSE_decompress_wksp_body_default(void* dst, size_t dstCapacity, co
} }
#if DYNAMIC_BMI2 #if DYNAMIC_BMI2
TARGET_ATTRIBUTE("bmi2") static size_t FSE_decompress_wksp_body_bmi2(void* dst, size_t dstCapacity, const void* cSrc, size_t cSrcSize, unsigned maxLog, void* workSpace, size_t wkspSize) BMI2_TARGET_ATTRIBUTE static size_t FSE_decompress_wksp_body_bmi2(void* dst, size_t dstCapacity, const void* cSrc, size_t cSrcSize, unsigned maxLog, void* workSpace, size_t wkspSize)
{ {
return FSE_decompress_wksp_body(dst, dstCapacity, cSrc, cSrcSize, maxLog, workSpace, wkspSize, 1); return FSE_decompress_wksp_body(dst, dstCapacity, cSrc, cSrcSize, maxLog, workSpace, wkspSize, 1);
} }
......
...@@ -86,9 +86,9 @@ HUF_PUBLIC_API size_t HUF_compress2 (void* dst, size_t dstCapacity, ...@@ -86,9 +86,9 @@ HUF_PUBLIC_API size_t HUF_compress2 (void* dst, size_t dstCapacity,
/* HUF_compress4X_wksp() : /* HUF_compress4X_wksp() :
* Same as HUF_compress2(), but uses externally allocated `workSpace`. * Same as HUF_compress2(), but uses externally allocated `workSpace`.
* `workspace` must have minimum alignment of 4, and be at least as large as HUF_WORKSPACE_SIZE */ * `workspace` must be at least as large as HUF_WORKSPACE_SIZE */
#define HUF_WORKSPACE_SIZE ((6 << 10) + 256) #define HUF_WORKSPACE_SIZE ((8 << 10) + 512 /* sorting scratch space */)
#define HUF_WORKSPACE_SIZE_U32 (HUF_WORKSPACE_SIZE / sizeof(U32)) #define HUF_WORKSPACE_SIZE_U64 (HUF_WORKSPACE_SIZE / sizeof(U64))
HUF_PUBLIC_API size_t HUF_compress4X_wksp (void* dst, size_t dstCapacity, HUF_PUBLIC_API size_t HUF_compress4X_wksp (void* dst, size_t dstCapacity,
const void* src, size_t srcSize, const void* src, size_t srcSize,
unsigned maxSymbolValue, unsigned tableLog, unsigned maxSymbolValue, unsigned tableLog,
...@@ -113,11 +113,11 @@ HUF_PUBLIC_API size_t HUF_compress4X_wksp (void* dst, size_t dstCapacity, ...@@ -113,11 +113,11 @@ HUF_PUBLIC_API size_t HUF_compress4X_wksp (void* dst, size_t dstCapacity,
/* *** Constants *** */ /* *** Constants *** */
#define HUF_TABLELOG_MAX 12 /* max runtime value of tableLog (due to static allocation); can be modified up to HUF_ABSOLUTEMAX_TABLELOG */ #define HUF_TABLELOG_MAX 12 /* max runtime value of tableLog (due to static allocation); can be modified up to HUF_TABLELOG_ABSOLUTEMAX */
#define HUF_TABLELOG_DEFAULT 11 /* default tableLog value when none specified */ #define HUF_TABLELOG_DEFAULT 11 /* default tableLog value when none specified */
#define HUF_SYMBOLVALUE_MAX 255 #define HUF_SYMBOLVALUE_MAX 255
#define HUF_TABLELOG_ABSOLUTEMAX 15 /* absolute limit of HUF_MAX_TABLELOG. Beyond that value, code does not work */ #define HUF_TABLELOG_ABSOLUTEMAX 12 /* absolute limit of HUF_MAX_TABLELOG. Beyond that value, code does not work */
#if (HUF_TABLELOG_MAX > HUF_TABLELOG_ABSOLUTEMAX) #if (HUF_TABLELOG_MAX > HUF_TABLELOG_ABSOLUTEMAX)
# error "HUF_TABLELOG_MAX is too large !" # error "HUF_TABLELOG_MAX is too large !"
#endif #endif
...@@ -133,15 +133,11 @@ HUF_PUBLIC_API size_t HUF_compress4X_wksp (void* dst, size_t dstCapacity, ...@@ -133,15 +133,11 @@ HUF_PUBLIC_API size_t HUF_compress4X_wksp (void* dst, size_t dstCapacity,
/* static allocation of HUF's Compression Table */ /* static allocation of HUF's Compression Table */
/* this is a private definition, just exposed for allocation and strict aliasing purpose. never EVER access its members directly */ /* this is a private definition, just exposed for allocation and strict aliasing purpose. never EVER access its members directly */
struct HUF_CElt_s { typedef size_t HUF_CElt; /* consider it an incomplete type */
U16 val; #define HUF_CTABLE_SIZE_ST(maxSymbolValue) ((maxSymbolValue)+2) /* Use tables of size_t, for proper alignment */
BYTE nbBits; #define HUF_CTABLE_SIZE(maxSymbolValue) (HUF_CTABLE_SIZE_ST(maxSymbolValue) * sizeof(size_t))
}; /* typedef'd to HUF_CElt */
typedef struct HUF_CElt_s HUF_CElt; /* consider it an incomplete type */
#define HUF_CTABLE_SIZE_U32(maxSymbolValue) ((maxSymbolValue)+1) /* Use tables of U32, for proper alignment */
#define HUF_CTABLE_SIZE(maxSymbolValue) (HUF_CTABLE_SIZE_U32(maxSymbolValue) * sizeof(U32))
#define HUF_CREATE_STATIC_CTABLE(name, maxSymbolValue) \ #define HUF_CREATE_STATIC_CTABLE(name, maxSymbolValue) \
HUF_CElt name[HUF_CTABLE_SIZE_U32(maxSymbolValue)] /* no final ; */ HUF_CElt name[HUF_CTABLE_SIZE_ST(maxSymbolValue)] /* no final ; */
/* static allocation of HUF's DTable */ /* static allocation of HUF's DTable */
typedef U32 HUF_DTable; typedef U32 HUF_DTable;
...@@ -191,6 +187,7 @@ size_t HUF_buildCTable (HUF_CElt* CTable, const unsigned* count, unsigned maxSym ...@@ -191,6 +187,7 @@ size_t HUF_buildCTable (HUF_CElt* CTable, const unsigned* count, unsigned maxSym
size_t HUF_writeCTable (void* dst, size_t maxDstSize, const HUF_CElt* CTable, unsigned maxSymbolValue, unsigned huffLog); size_t HUF_writeCTable (void* dst, size_t maxDstSize, const HUF_CElt* CTable, unsigned maxSymbolValue, unsigned huffLog);
size_t HUF_writeCTable_wksp(void* dst, size_t maxDstSize, const HUF_CElt* CTable, unsigned maxSymbolValue, unsigned huffLog, void* workspace, size_t workspaceSize); size_t HUF_writeCTable_wksp(void* dst, size_t maxDstSize, const HUF_CElt* CTable, unsigned maxSymbolValue, unsigned huffLog, void* workspace, size_t workspaceSize);
size_t HUF_compress4X_usingCTable(void* dst, size_t dstSize, const void* src, size_t srcSize, const HUF_CElt* CTable); size_t HUF_compress4X_usingCTable(void* dst, size_t dstSize, const void* src, size_t srcSize, const HUF_CElt* CTable);
size_t HUF_compress4X_usingCTable_bmi2(void* dst, size_t dstSize, const void* src, size_t srcSize, const HUF_CElt* CTable, int bmi2);
size_t HUF_estimateCompressedSize(const HUF_CElt* CTable, const unsigned* count, unsigned maxSymbolValue); size_t HUF_estimateCompressedSize(const HUF_CElt* CTable, const unsigned* count, unsigned maxSymbolValue);
int HUF_validateCTable(const HUF_CElt* CTable, const unsigned* count, unsigned maxSymbolValue); int HUF_validateCTable(const HUF_CElt* CTable, const unsigned* count, unsigned maxSymbolValue);
...@@ -203,12 +200,13 @@ typedef enum { ...@@ -203,12 +200,13 @@ typedef enum {
* Same as HUF_compress4X_wksp(), but considers using hufTable if *repeat != HUF_repeat_none. * Same as HUF_compress4X_wksp(), but considers using hufTable if *repeat != HUF_repeat_none.
* If it uses hufTable it does not modify hufTable or repeat. * If it uses hufTable it does not modify hufTable or repeat.
* If it doesn't, it sets *repeat = HUF_repeat_none, and it sets hufTable to the table used. * If it doesn't, it sets *repeat = HUF_repeat_none, and it sets hufTable to the table used.
* If preferRepeat then the old table will always be used if valid. */ * If preferRepeat then the old table will always be used if valid.
* If suspectUncompressible then some sampling checks will be run to potentially skip huffman coding */
size_t HUF_compress4X_repeat(void* dst, size_t dstSize, size_t HUF_compress4X_repeat(void* dst, size_t dstSize,
const void* src, size_t srcSize, const void* src, size_t srcSize,
unsigned maxSymbolValue, unsigned tableLog, unsigned maxSymbolValue, unsigned tableLog,
void* workSpace, size_t wkspSize, /*< `workSpace` must be aligned on 4-bytes boundaries, `wkspSize` must be >= HUF_WORKSPACE_SIZE */ void* workSpace, size_t wkspSize, /*< `workSpace` must be aligned on 4-bytes boundaries, `wkspSize` must be >= HUF_WORKSPACE_SIZE */
HUF_CElt* hufTable, HUF_repeat* repeat, int preferRepeat, int bmi2); HUF_CElt* hufTable, HUF_repeat* repeat, int preferRepeat, int bmi2, unsigned suspectUncompressible);
/* HUF_buildCTable_wksp() : /* HUF_buildCTable_wksp() :
* Same as HUF_buildCTable(), but using externally allocated scratch buffer. * Same as HUF_buildCTable(), but using externally allocated scratch buffer.
...@@ -246,11 +244,10 @@ size_t HUF_readStats_wksp(BYTE* huffWeight, size_t hwSize, ...@@ -246,11 +244,10 @@ size_t HUF_readStats_wksp(BYTE* huffWeight, size_t hwSize,
* Loading a CTable saved with HUF_writeCTable() */ * Loading a CTable saved with HUF_writeCTable() */
size_t HUF_readCTable (HUF_CElt* CTable, unsigned* maxSymbolValuePtr, const void* src, size_t srcSize, unsigned *hasZeroWeights); size_t HUF_readCTable (HUF_CElt* CTable, unsigned* maxSymbolValuePtr, const void* src, size_t srcSize, unsigned *hasZeroWeights);
/* HUF_getNbBits() : /* HUF_getNbBitsFromCTable() :
* Read nbBits from CTable symbolTable, for symbol `symbolValue` presumed <= HUF_SYMBOLVALUE_MAX * Read nbBits from CTable symbolTable, for symbol `symbolValue` presumed <= HUF_SYMBOLVALUE_MAX
* Note 1 : is not inlined, as HUF_CElt definition is private * Note 1 : is not inlined, as HUF_CElt definition is private */
* Note 2 : const void* used, so that it can provide a statically allocated table as argument (which uses type U32) */ U32 HUF_getNbBitsFromCTable(const HUF_CElt* symbolTable, U32 symbolValue);
U32 HUF_getNbBits(const void* symbolTable, U32 symbolValue);
/* /*
* HUF_decompress() does the following: * HUF_decompress() does the following:
...@@ -302,18 +299,20 @@ size_t HUF_decompress4X2_usingDTable(void* dst, size_t maxDstSize, const void* c ...@@ -302,18 +299,20 @@ size_t HUF_decompress4X2_usingDTable(void* dst, size_t maxDstSize, const void* c
/* ====================== */ /* ====================== */
size_t HUF_compress1X (void* dst, size_t dstSize, const void* src, size_t srcSize, unsigned maxSymbolValue, unsigned tableLog); size_t HUF_compress1X (void* dst, size_t dstSize, const void* src, size_t srcSize, unsigned maxSymbolValue, unsigned tableLog);
size_t HUF_compress1X_wksp (void* dst, size_t dstSize, const void* src, size_t srcSize, unsigned maxSymbolValue, unsigned tableLog, void* workSpace, size_t wkspSize); /*< `workSpace` must be a table of at least HUF_WORKSPACE_SIZE_U32 unsigned */ size_t HUF_compress1X_wksp (void* dst, size_t dstSize, const void* src, size_t srcSize, unsigned maxSymbolValue, unsigned tableLog, void* workSpace, size_t wkspSize); /*< `workSpace` must be a table of at least HUF_WORKSPACE_SIZE_U64 U64 */
size_t HUF_compress1X_usingCTable(void* dst, size_t dstSize, const void* src, size_t srcSize, const HUF_CElt* CTable); size_t HUF_compress1X_usingCTable(void* dst, size_t dstSize, const void* src, size_t srcSize, const HUF_CElt* CTable);
size_t HUF_compress1X_usingCTable_bmi2(void* dst, size_t dstSize, const void* src, size_t srcSize, const HUF_CElt* CTable, int bmi2);
/* HUF_compress1X_repeat() : /* HUF_compress1X_repeat() :
* Same as HUF_compress1X_wksp(), but considers using hufTable if *repeat != HUF_repeat_none. * Same as HUF_compress1X_wksp(), but considers using hufTable if *repeat != HUF_repeat_none.
* If it uses hufTable it does not modify hufTable or repeat. * If it uses hufTable it does not modify hufTable or repeat.
* If it doesn't, it sets *repeat = HUF_repeat_none, and it sets hufTable to the table used. * If it doesn't, it sets *repeat = HUF_repeat_none, and it sets hufTable to the table used.
* If preferRepeat then the old table will always be used if valid. */ * If preferRepeat then the old table will always be used if valid.
* If suspectUncompressible then some sampling checks will be run to potentially skip huffman coding */
size_t HUF_compress1X_repeat(void* dst, size_t dstSize, size_t HUF_compress1X_repeat(void* dst, size_t dstSize,
const void* src, size_t srcSize, const void* src, size_t srcSize,
unsigned maxSymbolValue, unsigned tableLog, unsigned maxSymbolValue, unsigned tableLog,
void* workSpace, size_t wkspSize, /*< `workSpace` must be aligned on 4-bytes boundaries, `wkspSize` must be >= HUF_WORKSPACE_SIZE */ void* workSpace, size_t wkspSize, /*< `workSpace` must be aligned on 4-bytes boundaries, `wkspSize` must be >= HUF_WORKSPACE_SIZE */
HUF_CElt* hufTable, HUF_repeat* repeat, int preferRepeat, int bmi2); HUF_CElt* hufTable, HUF_repeat* repeat, int preferRepeat, int bmi2, unsigned suspectUncompressible);
size_t HUF_decompress1X1 (void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize); /* single-symbol decoder */ size_t HUF_decompress1X1 (void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize); /* single-symbol decoder */
#ifndef HUF_FORCE_DECOMPRESS_X1 #ifndef HUF_FORCE_DECOMPRESS_X1
...@@ -351,6 +350,9 @@ size_t HUF_decompress4X_hufOnly_wksp_bmi2(HUF_DTable* dctx, void* dst, size_t ds ...@@ -351,6 +350,9 @@ size_t HUF_decompress4X_hufOnly_wksp_bmi2(HUF_DTable* dctx, void* dst, size_t ds
#ifndef HUF_FORCE_DECOMPRESS_X2 #ifndef HUF_FORCE_DECOMPRESS_X2
size_t HUF_readDTableX1_wksp_bmi2(HUF_DTable* DTable, const void* src, size_t srcSize, void* workSpace, size_t wkspSize, int bmi2); size_t HUF_readDTableX1_wksp_bmi2(HUF_DTable* DTable, const void* src, size_t srcSize, void* workSpace, size_t wkspSize, int bmi2);
#endif #endif
#ifndef HUF_FORCE_DECOMPRESS_X1
size_t HUF_readDTableX2_wksp_bmi2(HUF_DTable* DTable, const void* src, size_t srcSize, void* workSpace, size_t wkspSize, int bmi2);
#endif
#endif /* HUF_STATIC_LINKING_ONLY */ #endif /* HUF_STATIC_LINKING_ONLY */
...@@ -30,6 +30,8 @@ ...@@ -30,6 +30,8 @@
* Basic Types * Basic Types
*****************************************************************/ *****************************************************************/
typedef uint8_t BYTE; typedef uint8_t BYTE;
typedef uint8_t U8;
typedef int8_t S8;
typedef uint16_t U16; typedef uint16_t U16;
typedef int16_t S16; typedef int16_t S16;
typedef uint32_t U32; typedef uint32_t U32;
......
/*
* Copyright (c) Facebook, Inc.
* All rights reserved.
*
* This source code is licensed under both the BSD-style license (found in the
* LICENSE file in the root directory of this source tree) and the GPLv2 (found
* in the COPYING file in the root directory of this source tree).
* You may select, at your option, one of the above-listed licenses.
*/
#ifndef ZSTD_PORTABILITY_MACROS_H
#define ZSTD_PORTABILITY_MACROS_H
/*
* This header file contains macro defintions to support portability.
* This header is shared between C and ASM code, so it MUST only
* contain macro definitions. It MUST not contain any C code.
*
* This header ONLY defines macros to detect platforms/feature support.
*
*/
/* compat. with non-clang compilers */
#ifndef __has_attribute
#define __has_attribute(x) 0
#endif
/* compat. with non-clang compilers */
#ifndef __has_builtin
# define __has_builtin(x) 0
#endif
/* compat. with non-clang compilers */
#ifndef __has_feature
# define __has_feature(x) 0
#endif
/* detects whether we are being compiled under msan */
/* detects whether we are being compiled under asan */
/* detects whether we are being compiled under dfsan */
/* Mark the internal assembly functions as hidden */
#ifdef __ELF__
# define ZSTD_HIDE_ASM_FUNCTION(func) .hidden func
#else
# define ZSTD_HIDE_ASM_FUNCTION(func)
#endif
/* Enable runtime BMI2 dispatch based on the CPU.
* Enabled for clang & gcc >=4.8 on x86 when BMI2 isn't enabled by default.
*/
#ifndef DYNAMIC_BMI2
#if ((defined(__clang__) && __has_attribute(__target__)) \
|| (defined(__GNUC__) \
&& (__GNUC__ >= 5 || (__GNUC__ == 4 && __GNUC_MINOR__ >= 8)))) \
&& (defined(__x86_64__) || defined(_M_X64)) \
&& !defined(__BMI2__)
# define DYNAMIC_BMI2 1
#else
# define DYNAMIC_BMI2 0
#endif
#endif
/*
* Only enable assembly for GNUC comptabile compilers,
* because other platforms may not support GAS assembly syntax.
*
* Only enable assembly for Linux / MacOS, other platforms may
* work, but they haven't been tested. This could likely be
* extended to BSD systems.
*
* Disable assembly when MSAN is enabled, because MSAN requires
* 100% of code to be instrumented to work.
*/
#define ZSTD_ASM_SUPPORTED 1
/*
* Determines whether we should enable assembly for x86-64
* with BMI2.
*
* Enable if all of the following conditions hold:
* - ASM hasn't been explicitly disabled by defining ZSTD_DISABLE_ASM
* - Assembly is supported
* - We are compiling for x86-64 and either:
* - DYNAMIC_BMI2 is enabled
* - BMI2 is supported at compile time
*/
#define ZSTD_ENABLE_ASM_X86_64_BMI2 0
#endif /* ZSTD_PORTABILITY_MACROS_H */
...@@ -13,7 +13,6 @@ ...@@ -13,7 +13,6 @@
/*-************************************* /*-*************************************
* Dependencies * Dependencies
***************************************/ ***************************************/
#include <linux/module.h>
#define ZSTD_DEPS_NEED_MALLOC #define ZSTD_DEPS_NEED_MALLOC
#include "zstd_deps.h" /* ZSTD_malloc, ZSTD_calloc, ZSTD_free, ZSTD_memset */ #include "zstd_deps.h" /* ZSTD_malloc, ZSTD_calloc, ZSTD_free, ZSTD_memset */
#include "error_private.h" #include "error_private.h"
...@@ -36,17 +35,14 @@ const char* ZSTD_versionString(void) { return ZSTD_VERSION_STRING; } ...@@ -36,17 +35,14 @@ const char* ZSTD_versionString(void) { return ZSTD_VERSION_STRING; }
* tells if a return value is an error code * tells if a return value is an error code
* symbol is required for external callers */ * symbol is required for external callers */
unsigned ZSTD_isError(size_t code) { return ERR_isError(code); } unsigned ZSTD_isError(size_t code) { return ERR_isError(code); }
EXPORT_SYMBOL_GPL(ZSTD_isError);
/*! ZSTD_getErrorName() : /*! ZSTD_getErrorName() :
* provides error code string from function result (useful for debugging) */ * provides error code string from function result (useful for debugging) */
const char* ZSTD_getErrorName(size_t code) { return ERR_getErrorName(code); } const char* ZSTD_getErrorName(size_t code) { return ERR_getErrorName(code); }
EXPORT_SYMBOL_GPL(ZSTD_getErrorName);
/*! ZSTD_getError() : /*! ZSTD_getError() :
* convert a `size_t` function result into a proper ZSTD_errorCode enum */ * convert a `size_t` function result into a proper ZSTD_errorCode enum */
ZSTD_ErrorCode ZSTD_getErrorCode(size_t code) { return ERR_getErrorCode(code); } ZSTD_ErrorCode ZSTD_getErrorCode(size_t code) { return ERR_getErrorCode(code); }
EXPORT_SYMBOL_GPL(ZSTD_getErrorCode);
/*! ZSTD_getErrorString() : /*! ZSTD_getErrorString() :
* provides error code string from enum */ * provides error code string from enum */
...@@ -63,7 +59,6 @@ void* ZSTD_customMalloc(size_t size, ZSTD_customMem customMem) ...@@ -63,7 +59,6 @@ void* ZSTD_customMalloc(size_t size, ZSTD_customMem customMem)
return customMem.customAlloc(customMem.opaque, size); return customMem.customAlloc(customMem.opaque, size);
return ZSTD_malloc(size); return ZSTD_malloc(size);
} }
EXPORT_SYMBOL_GPL(ZSTD_customMalloc);
void* ZSTD_customCalloc(size_t size, ZSTD_customMem customMem) void* ZSTD_customCalloc(size_t size, ZSTD_customMem customMem)
{ {
...@@ -76,7 +71,6 @@ void* ZSTD_customCalloc(size_t size, ZSTD_customMem customMem) ...@@ -76,7 +71,6 @@ void* ZSTD_customCalloc(size_t size, ZSTD_customMem customMem)
} }
return ZSTD_calloc(1, size); return ZSTD_calloc(1, size);
} }
EXPORT_SYMBOL_GPL(ZSTD_customCalloc);
void ZSTD_customFree(void* ptr, ZSTD_customMem customMem) void ZSTD_customFree(void* ptr, ZSTD_customMem customMem)
{ {
...@@ -87,7 +81,3 @@ void ZSTD_customFree(void* ptr, ZSTD_customMem customMem) ...@@ -87,7 +81,3 @@ void ZSTD_customFree(void* ptr, ZSTD_customMem customMem)
ZSTD_free(ptr); ZSTD_free(ptr);
} }
} }
EXPORT_SYMBOL_GPL(ZSTD_customFree);
MODULE_LICENSE("Dual BSD/GPL");
MODULE_DESCRIPTION("Zstd Common");
This diff is collapsed.
/*
* Copyright (c) Yann Collet, Facebook, Inc.
* All rights reserved.
*
* This source code is licensed under both the BSD-style license (found in the
* LICENSE file in the root directory of this source tree) and the GPLv2 (found
* in the COPYING file in the root directory of this source tree).
* You may select, at your option, one of the above-listed licenses.
*/
#ifndef ZSTD_CLEVELS_H
#define ZSTD_CLEVELS_H
#define ZSTD_STATIC_LINKING_ONLY /* ZSTD_compressionParameters */
#include <linux/zstd.h>
/*-===== Pre-defined compression levels =====-*/
#define ZSTD_MAX_CLEVEL 22
__attribute__((__unused__))
static const ZSTD_compressionParameters ZSTD_defaultCParameters[4][ZSTD_MAX_CLEVEL+1] = {
{ /* "default" - for any srcSize > 256 KB */
/* W, C, H, S, L, TL, strat */
{ 19, 12, 13, 1, 6, 1, ZSTD_fast }, /* base for negative levels */
{ 19, 13, 14, 1, 7, 0, ZSTD_fast }, /* level 1 */
{ 20, 15, 16, 1, 6, 0, ZSTD_fast }, /* level 2 */
{ 21, 16, 17, 1, 5, 0, ZSTD_dfast }, /* level 3 */
{ 21, 18, 18, 1, 5, 0, ZSTD_dfast }, /* level 4 */
{ 21, 18, 19, 3, 5, 2, ZSTD_greedy }, /* level 5 */
{ 21, 18, 19, 3, 5, 4, ZSTD_lazy }, /* level 6 */
{ 21, 19, 20, 4, 5, 8, ZSTD_lazy }, /* level 7 */
{ 21, 19, 20, 4, 5, 16, ZSTD_lazy2 }, /* level 8 */
{ 22, 20, 21, 4, 5, 16, ZSTD_lazy2 }, /* level 9 */
{ 22, 21, 22, 5, 5, 16, ZSTD_lazy2 }, /* level 10 */
{ 22, 21, 22, 6, 5, 16, ZSTD_lazy2 }, /* level 11 */
{ 22, 22, 23, 6, 5, 32, ZSTD_lazy2 }, /* level 12 */
{ 22, 22, 22, 4, 5, 32, ZSTD_btlazy2 }, /* level 13 */
{ 22, 22, 23, 5, 5, 32, ZSTD_btlazy2 }, /* level 14 */
{ 22, 23, 23, 6, 5, 32, ZSTD_btlazy2 }, /* level 15 */
{ 22, 22, 22, 5, 5, 48, ZSTD_btopt }, /* level 16 */
{ 23, 23, 22, 5, 4, 64, ZSTD_btopt }, /* level 17 */
{ 23, 23, 22, 6, 3, 64, ZSTD_btultra }, /* level 18 */
{ 23, 24, 22, 7, 3,256, ZSTD_btultra2}, /* level 19 */
{ 25, 25, 23, 7, 3,256, ZSTD_btultra2}, /* level 20 */
{ 26, 26, 24, 7, 3,512, ZSTD_btultra2}, /* level 21 */
{ 27, 27, 25, 9, 3,999, ZSTD_btultra2}, /* level 22 */
},
{ /* for srcSize <= 256 KB */
/* W, C, H, S, L, T, strat */
{ 18, 12, 13, 1, 5, 1, ZSTD_fast }, /* base for negative levels */
{ 18, 13, 14, 1, 6, 0, ZSTD_fast }, /* level 1 */
{ 18, 14, 14, 1, 5, 0, ZSTD_dfast }, /* level 2 */
{ 18, 16, 16, 1, 4, 0, ZSTD_dfast }, /* level 3 */
{ 18, 16, 17, 3, 5, 2, ZSTD_greedy }, /* level 4.*/
{ 18, 17, 18, 5, 5, 2, ZSTD_greedy }, /* level 5.*/
{ 18, 18, 19, 3, 5, 4, ZSTD_lazy }, /* level 6.*/
{ 18, 18, 19, 4, 4, 4, ZSTD_lazy }, /* level 7 */
{ 18, 18, 19, 4, 4, 8, ZSTD_lazy2 }, /* level 8 */
{ 18, 18, 19, 5, 4, 8, ZSTD_lazy2 }, /* level 9 */
{ 18, 18, 19, 6, 4, 8, ZSTD_lazy2 }, /* level 10 */
{ 18, 18, 19, 5, 4, 12, ZSTD_btlazy2 }, /* level 11.*/
{ 18, 19, 19, 7, 4, 12, ZSTD_btlazy2 }, /* level 12.*/
{ 18, 18, 19, 4, 4, 16, ZSTD_btopt }, /* level 13 */
{ 18, 18, 19, 4, 3, 32, ZSTD_btopt }, /* level 14.*/
{ 18, 18, 19, 6, 3,128, ZSTD_btopt }, /* level 15.*/
{ 18, 19, 19, 6, 3,128, ZSTD_btultra }, /* level 16.*/
{ 18, 19, 19, 8, 3,256, ZSTD_btultra }, /* level 17.*/
{ 18, 19, 19, 6, 3,128, ZSTD_btultra2}, /* level 18.*/
{ 18, 19, 19, 8, 3,256, ZSTD_btultra2}, /* level 19.*/
{ 18, 19, 19, 10, 3,512, ZSTD_btultra2}, /* level 20.*/
{ 18, 19, 19, 12, 3,512, ZSTD_btultra2}, /* level 21.*/
{ 18, 19, 19, 13, 3,999, ZSTD_btultra2}, /* level 22.*/
},
{ /* for srcSize <= 128 KB */
/* W, C, H, S, L, T, strat */
{ 17, 12, 12, 1, 5, 1, ZSTD_fast }, /* base for negative levels */
{ 17, 12, 13, 1, 6, 0, ZSTD_fast }, /* level 1 */
{ 17, 13, 15, 1, 5, 0, ZSTD_fast }, /* level 2 */
{ 17, 15, 16, 2, 5, 0, ZSTD_dfast }, /* level 3 */
{ 17, 17, 17, 2, 4, 0, ZSTD_dfast }, /* level 4 */
{ 17, 16, 17, 3, 4, 2, ZSTD_greedy }, /* level 5 */
{ 17, 16, 17, 3, 4, 4, ZSTD_lazy }, /* level 6 */
{ 17, 16, 17, 3, 4, 8, ZSTD_lazy2 }, /* level 7 */
{ 17, 16, 17, 4, 4, 8, ZSTD_lazy2 }, /* level 8 */
{ 17, 16, 17, 5, 4, 8, ZSTD_lazy2 }, /* level 9 */
{ 17, 16, 17, 6, 4, 8, ZSTD_lazy2 }, /* level 10 */
{ 17, 17, 17, 5, 4, 8, ZSTD_btlazy2 }, /* level 11 */
{ 17, 18, 17, 7, 4, 12, ZSTD_btlazy2 }, /* level 12 */
{ 17, 18, 17, 3, 4, 12, ZSTD_btopt }, /* level 13.*/
{ 17, 18, 17, 4, 3, 32, ZSTD_btopt }, /* level 14.*/
{ 17, 18, 17, 6, 3,256, ZSTD_btopt }, /* level 15.*/
{ 17, 18, 17, 6, 3,128, ZSTD_btultra }, /* level 16.*/
{ 17, 18, 17, 8, 3,256, ZSTD_btultra }, /* level 17.*/
{ 17, 18, 17, 10, 3,512, ZSTD_btultra }, /* level 18.*/
{ 17, 18, 17, 5, 3,256, ZSTD_btultra2}, /* level 19.*/
{ 17, 18, 17, 7, 3,512, ZSTD_btultra2}, /* level 20.*/
{ 17, 18, 17, 9, 3,512, ZSTD_btultra2}, /* level 21.*/
{ 17, 18, 17, 11, 3,999, ZSTD_btultra2}, /* level 22.*/
},
{ /* for srcSize <= 16 KB */
/* W, C, H, S, L, T, strat */
{ 14, 12, 13, 1, 5, 1, ZSTD_fast }, /* base for negative levels */
{ 14, 14, 15, 1, 5, 0, ZSTD_fast }, /* level 1 */
{ 14, 14, 15, 1, 4, 0, ZSTD_fast }, /* level 2 */
{ 14, 14, 15, 2, 4, 0, ZSTD_dfast }, /* level 3 */
{ 14, 14, 14, 4, 4, 2, ZSTD_greedy }, /* level 4 */
{ 14, 14, 14, 3, 4, 4, ZSTD_lazy }, /* level 5.*/
{ 14, 14, 14, 4, 4, 8, ZSTD_lazy2 }, /* level 6 */
{ 14, 14, 14, 6, 4, 8, ZSTD_lazy2 }, /* level 7 */
{ 14, 14, 14, 8, 4, 8, ZSTD_lazy2 }, /* level 8.*/
{ 14, 15, 14, 5, 4, 8, ZSTD_btlazy2 }, /* level 9.*/
{ 14, 15, 14, 9, 4, 8, ZSTD_btlazy2 }, /* level 10.*/
{ 14, 15, 14, 3, 4, 12, ZSTD_btopt }, /* level 11.*/
{ 14, 15, 14, 4, 3, 24, ZSTD_btopt }, /* level 12.*/
{ 14, 15, 14, 5, 3, 32, ZSTD_btultra }, /* level 13.*/
{ 14, 15, 15, 6, 3, 64, ZSTD_btultra }, /* level 14.*/
{ 14, 15, 15, 7, 3,256, ZSTD_btultra }, /* level 15.*/
{ 14, 15, 15, 5, 3, 48, ZSTD_btultra2}, /* level 16.*/
{ 14, 15, 15, 6, 3,128, ZSTD_btultra2}, /* level 17.*/
{ 14, 15, 15, 7, 3,256, ZSTD_btultra2}, /* level 18.*/
{ 14, 15, 15, 8, 3,256, ZSTD_btultra2}, /* level 19.*/
{ 14, 15, 15, 8, 3,512, ZSTD_btultra2}, /* level 20.*/
{ 14, 15, 15, 9, 3,512, ZSTD_btultra2}, /* level 21.*/
{ 14, 15, 15, 10, 3,999, ZSTD_btultra2}, /* level 22.*/
},
};
#endif /* ZSTD_CLEVELS_H */
...@@ -75,13 +75,14 @@ size_t FSE_buildCTable_wksp(FSE_CTable* ct, ...@@ -75,13 +75,14 @@ size_t FSE_buildCTable_wksp(FSE_CTable* ct,
void* const FSCT = ((U32*)ptr) + 1 /* header */ + (tableLog ? tableSize>>1 : 1) ; void* const FSCT = ((U32*)ptr) + 1 /* header */ + (tableLog ? tableSize>>1 : 1) ;
FSE_symbolCompressionTransform* const symbolTT = (FSE_symbolCompressionTransform*) (FSCT); FSE_symbolCompressionTransform* const symbolTT = (FSE_symbolCompressionTransform*) (FSCT);
U32 const step = FSE_TABLESTEP(tableSize); U32 const step = FSE_TABLESTEP(tableSize);
U32 const maxSV1 = maxSymbolValue+1;
U32* cumul = (U32*)workSpace; U16* cumul = (U16*)workSpace; /* size = maxSV1 */
FSE_FUNCTION_TYPE* tableSymbol = (FSE_FUNCTION_TYPE*)(cumul + (maxSymbolValue + 2)); FSE_FUNCTION_TYPE* const tableSymbol = (FSE_FUNCTION_TYPE*)(cumul + (maxSV1+1)); /* size = tableSize */
U32 highThreshold = tableSize-1; U32 highThreshold = tableSize-1;
if ((size_t)workSpace & 3) return ERROR(GENERIC); /* Must be 4 byte aligned */ assert(((size_t)workSpace & 1) == 0); /* Must be 2 bytes-aligned */
if (FSE_BUILD_CTABLE_WORKSPACE_SIZE(maxSymbolValue, tableLog) > wkspSize) return ERROR(tableLog_tooLarge); if (FSE_BUILD_CTABLE_WORKSPACE_SIZE(maxSymbolValue, tableLog) > wkspSize) return ERROR(tableLog_tooLarge);
/* CTable header */ /* CTable header */
tableU16[-2] = (U16) tableLog; tableU16[-2] = (U16) tableLog;
...@@ -98,20 +99,61 @@ size_t FSE_buildCTable_wksp(FSE_CTable* ct, ...@@ -98,20 +99,61 @@ size_t FSE_buildCTable_wksp(FSE_CTable* ct,
/* symbol start positions */ /* symbol start positions */
{ U32 u; { U32 u;
cumul[0] = 0; cumul[0] = 0;
for (u=1; u <= maxSymbolValue+1; u++) { for (u=1; u <= maxSV1; u++) {
if (normalizedCounter[u-1]==-1) { /* Low proba symbol */ if (normalizedCounter[u-1]==-1) { /* Low proba symbol */
cumul[u] = cumul[u-1] + 1; cumul[u] = cumul[u-1] + 1;
tableSymbol[highThreshold--] = (FSE_FUNCTION_TYPE)(u-1); tableSymbol[highThreshold--] = (FSE_FUNCTION_TYPE)(u-1);
} else { } else {
cumul[u] = cumul[u-1] + normalizedCounter[u-1]; assert(normalizedCounter[u-1] >= 0);
cumul[u] = cumul[u-1] + (U16)normalizedCounter[u-1];
assert(cumul[u] >= cumul[u-1]); /* no overflow */
} } } }
cumul[maxSymbolValue+1] = tableSize+1; cumul[maxSV1] = (U16)(tableSize+1);
} }
/* Spread symbols */ /* Spread symbols */
{ U32 position = 0; if (highThreshold == tableSize - 1) {
/* Case for no low prob count symbols. Lay down 8 bytes at a time
* to reduce branch misses since we are operating on a small block
*/
BYTE* const spread = tableSymbol + tableSize; /* size = tableSize + 8 (may write beyond tableSize) */
{ U64 const add = 0x0101010101010101ull;
size_t pos = 0;
U64 sv = 0;
U32 s;
for (s=0; s<maxSV1; ++s, sv += add) {
int i;
int const n = normalizedCounter[s];
MEM_write64(spread + pos, sv);
for (i = 8; i < n; i += 8) {
MEM_write64(spread + pos + i, sv);
}
assert(n>=0);
pos += (size_t)n;
}
}
/* Spread symbols across the table. Lack of lowprob symbols means that
* we don't need variable sized inner loop, so we can unroll the loop and
* reduce branch misses.
*/
{ size_t position = 0;
size_t s;
size_t const unroll = 2; /* Experimentally determined optimal unroll */
assert(tableSize % unroll == 0); /* FSE_MIN_TABLELOG is 5 */
for (s = 0; s < (size_t)tableSize; s += unroll) {
size_t u;
for (u = 0; u < unroll; ++u) {
size_t const uPosition = (position + (u * step)) & tableMask;
tableSymbol[uPosition] = spread[s + u];
}
position = (position + (unroll * step)) & tableMask;
}
assert(position == 0); /* Must have initialized all positions */
}
} else {
U32 position = 0;
U32 symbol; U32 symbol;
for (symbol=0; symbol<=maxSymbolValue; symbol++) { for (symbol=0; symbol<maxSV1; symbol++) {
int nbOccurrences; int nbOccurrences;
int const freq = normalizedCounter[symbol]; int const freq = normalizedCounter[symbol];
for (nbOccurrences=0; nbOccurrences<freq; nbOccurrences++) { for (nbOccurrences=0; nbOccurrences<freq; nbOccurrences++) {
...@@ -120,7 +162,6 @@ size_t FSE_buildCTable_wksp(FSE_CTable* ct, ...@@ -120,7 +162,6 @@ size_t FSE_buildCTable_wksp(FSE_CTable* ct,
while (position > highThreshold) while (position > highThreshold)
position = (position + step) & tableMask; /* Low proba area */ position = (position + step) & tableMask; /* Low proba area */
} } } }
assert(position==0); /* Must have initialized all positions */ assert(position==0); /* Must have initialized all positions */
} }
...@@ -144,16 +185,17 @@ size_t FSE_buildCTable_wksp(FSE_CTable* ct, ...@@ -144,16 +185,17 @@ size_t FSE_buildCTable_wksp(FSE_CTable* ct,
case -1: case -1:
case 1: case 1:
symbolTT[s].deltaNbBits = (tableLog << 16) - (1<<tableLog); symbolTT[s].deltaNbBits = (tableLog << 16) - (1<<tableLog);
symbolTT[s].deltaFindState = total - 1; assert(total <= INT_MAX);
symbolTT[s].deltaFindState = (int)(total - 1);
total ++; total ++;
break; break;
default : default :
{ assert(normalizedCounter[s] > 1);
U32 const maxBitsOut = tableLog - BIT_highbit32 (normalizedCounter[s]-1); { U32 const maxBitsOut = tableLog - BIT_highbit32 ((U32)normalizedCounter[s]-1);
U32 const minStatePlus = normalizedCounter[s] << maxBitsOut; U32 const minStatePlus = (U32)normalizedCounter[s] << maxBitsOut;
symbolTT[s].deltaNbBits = (maxBitsOut << 16) - minStatePlus; symbolTT[s].deltaNbBits = (maxBitsOut << 16) - minStatePlus;
symbolTT[s].deltaFindState = total - normalizedCounter[s]; symbolTT[s].deltaFindState = (int)(total - (unsigned)normalizedCounter[s]);
total += normalizedCounter[s]; total += (unsigned)normalizedCounter[s];
} } } } } } } }
#if 0 /* debug : symbol costs */ #if 0 /* debug : symbol costs */
...@@ -164,8 +206,7 @@ size_t FSE_buildCTable_wksp(FSE_CTable* ct, ...@@ -164,8 +206,7 @@ size_t FSE_buildCTable_wksp(FSE_CTable* ct,
symbol, normalizedCounter[symbol], symbol, normalizedCounter[symbol],
FSE_getMaxNbBits(symbolTT, symbol), FSE_getMaxNbBits(symbolTT, symbol),
(double)FSE_bitCost(symbolTT, tableLog, symbol, 8) / 256); (double)FSE_bitCost(symbolTT, tableLog, symbol, 8) / 256);
} } }
}
#endif #endif
return 0; return 0;
...@@ -173,16 +214,18 @@ size_t FSE_buildCTable_wksp(FSE_CTable* ct, ...@@ -173,16 +214,18 @@ size_t FSE_buildCTable_wksp(FSE_CTable* ct,
#ifndef FSE_COMMONDEFS_ONLY #ifndef FSE_COMMONDEFS_ONLY
/*-************************************************************** /*-**************************************************************
* FSE NCount encoding * FSE NCount encoding
****************************************************************/ ****************************************************************/
size_t FSE_NCountWriteBound(unsigned maxSymbolValue, unsigned tableLog) size_t FSE_NCountWriteBound(unsigned maxSymbolValue, unsigned tableLog)
{ {
size_t const maxHeaderSize = (((maxSymbolValue+1) * tableLog) >> 3) + 3; size_t const maxHeaderSize = (((maxSymbolValue+1) * tableLog
+ 4 /* bitCount initialized at 4 */
+ 2 /* first two symbols may use one additional bit each */) / 8)
+ 1 /* round up to whole nb bytes */
+ 2 /* additional two bytes for bitstream flush */;
return maxSymbolValue ? maxHeaderSize : FSE_NCOUNTBOUND; /* maxSymbolValue==0 ? use default */ return maxSymbolValue ? maxHeaderSize : FSE_NCOUNTBOUND; /* maxSymbolValue==0 ? use default */
} }
......
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
...@@ -73,7 +73,8 @@ size_t ZSTD_compressLiterals (ZSTD_hufCTables_t const* prevHuf, ...@@ -73,7 +73,8 @@ size_t ZSTD_compressLiterals (ZSTD_hufCTables_t const* prevHuf,
void* dst, size_t dstCapacity, void* dst, size_t dstCapacity,
const void* src, size_t srcSize, const void* src, size_t srcSize,
void* entropyWorkspace, size_t entropyWorkspaceSize, void* entropyWorkspace, size_t entropyWorkspaceSize,
const int bmi2) const int bmi2,
unsigned suspectUncompressible)
{ {
size_t const minGain = ZSTD_minGain(srcSize, strategy); size_t const minGain = ZSTD_minGain(srcSize, strategy);
size_t const lhSize = 3 + (srcSize >= 1 KB) + (srcSize >= 16 KB); size_t const lhSize = 3 + (srcSize >= 1 KB) + (srcSize >= 16 KB);
...@@ -105,11 +106,11 @@ size_t ZSTD_compressLiterals (ZSTD_hufCTables_t const* prevHuf, ...@@ -105,11 +106,11 @@ size_t ZSTD_compressLiterals (ZSTD_hufCTables_t const* prevHuf,
HUF_compress1X_repeat( HUF_compress1X_repeat(
ostart+lhSize, dstCapacity-lhSize, src, srcSize, ostart+lhSize, dstCapacity-lhSize, src, srcSize,
HUF_SYMBOLVALUE_MAX, HUF_TABLELOG_DEFAULT, entropyWorkspace, entropyWorkspaceSize, HUF_SYMBOLVALUE_MAX, HUF_TABLELOG_DEFAULT, entropyWorkspace, entropyWorkspaceSize,
(HUF_CElt*)nextHuf->CTable, &repeat, preferRepeat, bmi2) : (HUF_CElt*)nextHuf->CTable, &repeat, preferRepeat, bmi2, suspectUncompressible) :
HUF_compress4X_repeat( HUF_compress4X_repeat(
ostart+lhSize, dstCapacity-lhSize, src, srcSize, ostart+lhSize, dstCapacity-lhSize, src, srcSize,
HUF_SYMBOLVALUE_MAX, HUF_TABLELOG_DEFAULT, entropyWorkspace, entropyWorkspaceSize, HUF_SYMBOLVALUE_MAX, HUF_TABLELOG_DEFAULT, entropyWorkspace, entropyWorkspaceSize,
(HUF_CElt*)nextHuf->CTable, &repeat, preferRepeat, bmi2); (HUF_CElt*)nextHuf->CTable, &repeat, preferRepeat, bmi2, suspectUncompressible);
if (repeat != HUF_repeat_none) { if (repeat != HUF_repeat_none) {
/* reused the existing table */ /* reused the existing table */
DEBUGLOG(5, "Reusing previous huffman table"); DEBUGLOG(5, "Reusing previous huffman table");
...@@ -117,7 +118,7 @@ size_t ZSTD_compressLiterals (ZSTD_hufCTables_t const* prevHuf, ...@@ -117,7 +118,7 @@ size_t ZSTD_compressLiterals (ZSTD_hufCTables_t const* prevHuf,
} }
} }
if ((cLitSize==0) | (cLitSize >= srcSize - minGain) | ERR_isError(cLitSize)) { if ((cLitSize==0) || (cLitSize >= srcSize - minGain) || ERR_isError(cLitSize)) {
ZSTD_memcpy(nextHuf, prevHuf, sizeof(*prevHuf)); ZSTD_memcpy(nextHuf, prevHuf, sizeof(*prevHuf));
return ZSTD_noCompressLiterals(dst, dstCapacity, src, srcSize); return ZSTD_noCompressLiterals(dst, dstCapacity, src, srcSize);
} }
......
...@@ -18,12 +18,14 @@ size_t ZSTD_noCompressLiterals (void* dst, size_t dstCapacity, const void* src, ...@@ -18,12 +18,14 @@ size_t ZSTD_noCompressLiterals (void* dst, size_t dstCapacity, const void* src,
size_t ZSTD_compressRleLiteralsBlock (void* dst, size_t dstCapacity, const void* src, size_t srcSize); size_t ZSTD_compressRleLiteralsBlock (void* dst, size_t dstCapacity, const void* src, size_t srcSize);
/* If suspectUncompressible then some sampling checks will be run to potentially skip huffman coding */
size_t ZSTD_compressLiterals (ZSTD_hufCTables_t const* prevHuf, size_t ZSTD_compressLiterals (ZSTD_hufCTables_t const* prevHuf,
ZSTD_hufCTables_t* nextHuf, ZSTD_hufCTables_t* nextHuf,
ZSTD_strategy strategy, int disableLiteralCompression, ZSTD_strategy strategy, int disableLiteralCompression,
void* dst, size_t dstCapacity, void* dst, size_t dstCapacity,
const void* src, size_t srcSize, const void* src, size_t srcSize,
void* entropyWorkspace, size_t entropyWorkspaceSize, void* entropyWorkspace, size_t entropyWorkspaceSize,
const int bmi2); const int bmi2,
unsigned suspectUncompressible);
#endif /* ZSTD_COMPRESS_LITERALS_H */ #endif /* ZSTD_COMPRESS_LITERALS_H */
...@@ -85,6 +85,8 @@ static size_t ZSTD_entropyCost(unsigned const* count, unsigned const max, size_t ...@@ -85,6 +85,8 @@ static size_t ZSTD_entropyCost(unsigned const* count, unsigned const max, size_t
{ {
unsigned cost = 0; unsigned cost = 0;
unsigned s; unsigned s;
assert(total > 0);
for (s = 0; s <= max; ++s) { for (s = 0; s <= max; ++s) {
unsigned norm = (unsigned)((256 * count[s]) / total); unsigned norm = (unsigned)((256 * count[s]) / total);
if (count[s] != 0 && norm == 0) if (count[s] != 0 && norm == 0)
...@@ -273,10 +275,11 @@ ZSTD_buildCTable(void* dst, size_t dstCapacity, ...@@ -273,10 +275,11 @@ ZSTD_buildCTable(void* dst, size_t dstCapacity,
assert(nbSeq_1 > 1); assert(nbSeq_1 > 1);
assert(entropyWorkspaceSize >= sizeof(ZSTD_BuildCTableWksp)); assert(entropyWorkspaceSize >= sizeof(ZSTD_BuildCTableWksp));
(void)entropyWorkspaceSize; (void)entropyWorkspaceSize;
FORWARD_IF_ERROR(FSE_normalizeCount(wksp->norm, tableLog, count, nbSeq_1, max, ZSTD_useLowProbCount(nbSeq_1)), ""); FORWARD_IF_ERROR(FSE_normalizeCount(wksp->norm, tableLog, count, nbSeq_1, max, ZSTD_useLowProbCount(nbSeq_1)), "FSE_normalizeCount failed");
{ size_t const NCountSize = FSE_writeNCount(op, oend - op, wksp->norm, max, tableLog); /* overflow protected */ assert(oend >= op);
{ size_t const NCountSize = FSE_writeNCount(op, (size_t)(oend - op), wksp->norm, max, tableLog); /* overflow protected */
FORWARD_IF_ERROR(NCountSize, "FSE_writeNCount failed"); FORWARD_IF_ERROR(NCountSize, "FSE_writeNCount failed");
FORWARD_IF_ERROR(FSE_buildCTable_wksp(nextCTable, wksp->norm, max, tableLog, wksp->wksp, sizeof(wksp->wksp)), ""); FORWARD_IF_ERROR(FSE_buildCTable_wksp(nextCTable, wksp->norm, max, tableLog, wksp->wksp, sizeof(wksp->wksp)), "FSE_buildCTable_wksp failed");
return NCountSize; return NCountSize;
} }
} }
...@@ -310,19 +313,19 @@ ZSTD_encodeSequences_body( ...@@ -310,19 +313,19 @@ ZSTD_encodeSequences_body(
FSE_initCState2(&stateLitLength, CTable_LitLength, llCodeTable[nbSeq-1]); FSE_initCState2(&stateLitLength, CTable_LitLength, llCodeTable[nbSeq-1]);
BIT_addBits(&blockStream, sequences[nbSeq-1].litLength, LL_bits[llCodeTable[nbSeq-1]]); BIT_addBits(&blockStream, sequences[nbSeq-1].litLength, LL_bits[llCodeTable[nbSeq-1]]);
if (MEM_32bits()) BIT_flushBits(&blockStream); if (MEM_32bits()) BIT_flushBits(&blockStream);
BIT_addBits(&blockStream, sequences[nbSeq-1].matchLength, ML_bits[mlCodeTable[nbSeq-1]]); BIT_addBits(&blockStream, sequences[nbSeq-1].mlBase, ML_bits[mlCodeTable[nbSeq-1]]);
if (MEM_32bits()) BIT_flushBits(&blockStream); if (MEM_32bits()) BIT_flushBits(&blockStream);
if (longOffsets) { if (longOffsets) {
U32 const ofBits = ofCodeTable[nbSeq-1]; U32 const ofBits = ofCodeTable[nbSeq-1];
unsigned const extraBits = ofBits - MIN(ofBits, STREAM_ACCUMULATOR_MIN-1); unsigned const extraBits = ofBits - MIN(ofBits, STREAM_ACCUMULATOR_MIN-1);
if (extraBits) { if (extraBits) {
BIT_addBits(&blockStream, sequences[nbSeq-1].offset, extraBits); BIT_addBits(&blockStream, sequences[nbSeq-1].offBase, extraBits);
BIT_flushBits(&blockStream); BIT_flushBits(&blockStream);
} }
BIT_addBits(&blockStream, sequences[nbSeq-1].offset >> extraBits, BIT_addBits(&blockStream, sequences[nbSeq-1].offBase >> extraBits,
ofBits - extraBits); ofBits - extraBits);
} else { } else {
BIT_addBits(&blockStream, sequences[nbSeq-1].offset, ofCodeTable[nbSeq-1]); BIT_addBits(&blockStream, sequences[nbSeq-1].offBase, ofCodeTable[nbSeq-1]);
} }
BIT_flushBits(&blockStream); BIT_flushBits(&blockStream);
...@@ -336,8 +339,8 @@ ZSTD_encodeSequences_body( ...@@ -336,8 +339,8 @@ ZSTD_encodeSequences_body(
U32 const mlBits = ML_bits[mlCode]; U32 const mlBits = ML_bits[mlCode];
DEBUGLOG(6, "encoding: litlen:%2u - matchlen:%2u - offCode:%7u", DEBUGLOG(6, "encoding: litlen:%2u - matchlen:%2u - offCode:%7u",
(unsigned)sequences[n].litLength, (unsigned)sequences[n].litLength,
(unsigned)sequences[n].matchLength + MINMATCH, (unsigned)sequences[n].mlBase + MINMATCH,
(unsigned)sequences[n].offset); (unsigned)sequences[n].offBase);
/* 32b*/ /* 64b*/ /* 32b*/ /* 64b*/
/* (7)*/ /* (7)*/ /* (7)*/ /* (7)*/
FSE_encodeSymbol(&blockStream, &stateOffsetBits, ofCode); /* 15 */ /* 15 */ FSE_encodeSymbol(&blockStream, &stateOffsetBits, ofCode); /* 15 */ /* 15 */
...@@ -348,18 +351,18 @@ ZSTD_encodeSequences_body( ...@@ -348,18 +351,18 @@ ZSTD_encodeSequences_body(
BIT_flushBits(&blockStream); /* (7)*/ BIT_flushBits(&blockStream); /* (7)*/
BIT_addBits(&blockStream, sequences[n].litLength, llBits); BIT_addBits(&blockStream, sequences[n].litLength, llBits);
if (MEM_32bits() && ((llBits+mlBits)>24)) BIT_flushBits(&blockStream); if (MEM_32bits() && ((llBits+mlBits)>24)) BIT_flushBits(&blockStream);
BIT_addBits(&blockStream, sequences[n].matchLength, mlBits); BIT_addBits(&blockStream, sequences[n].mlBase, mlBits);
if (MEM_32bits() || (ofBits+mlBits+llBits > 56)) BIT_flushBits(&blockStream); if (MEM_32bits() || (ofBits+mlBits+llBits > 56)) BIT_flushBits(&blockStream);
if (longOffsets) { if (longOffsets) {
unsigned const extraBits = ofBits - MIN(ofBits, STREAM_ACCUMULATOR_MIN-1); unsigned const extraBits = ofBits - MIN(ofBits, STREAM_ACCUMULATOR_MIN-1);
if (extraBits) { if (extraBits) {
BIT_addBits(&blockStream, sequences[n].offset, extraBits); BIT_addBits(&blockStream, sequences[n].offBase, extraBits);
BIT_flushBits(&blockStream); /* (7)*/ BIT_flushBits(&blockStream); /* (7)*/
} }
BIT_addBits(&blockStream, sequences[n].offset >> extraBits, BIT_addBits(&blockStream, sequences[n].offBase >> extraBits,
ofBits - extraBits); /* 31 */ ofBits - extraBits); /* 31 */
} else { } else {
BIT_addBits(&blockStream, sequences[n].offset, ofBits); /* 31 */ BIT_addBits(&blockStream, sequences[n].offBase, ofBits); /* 31 */
} }
BIT_flushBits(&blockStream); /* (7)*/ BIT_flushBits(&blockStream); /* (7)*/
DEBUGLOG(7, "remaining space : %i", (int)(blockStream.endPtr - blockStream.ptr)); DEBUGLOG(7, "remaining space : %i", (int)(blockStream.endPtr - blockStream.ptr));
...@@ -396,7 +399,7 @@ ZSTD_encodeSequences_default( ...@@ -396,7 +399,7 @@ ZSTD_encodeSequences_default(
#if DYNAMIC_BMI2 #if DYNAMIC_BMI2
static TARGET_ATTRIBUTE("bmi2") size_t static BMI2_TARGET_ATTRIBUTE size_t
ZSTD_encodeSequences_bmi2( ZSTD_encodeSequences_bmi2(
void* dst, size_t dstCapacity, void* dst, size_t dstCapacity,
FSE_CTable const* CTable_MatchLength, BYTE const* mlCodeTable, FSE_CTable const* CTable_MatchLength, BYTE const* mlCodeTable,
......
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
...@@ -23,6 +23,7 @@ ...@@ -23,6 +23,7 @@
#define ZSTD_LAZY_DDSS_BUCKET_LOG 2 #define ZSTD_LAZY_DDSS_BUCKET_LOG 2
U32 ZSTD_insertAndFindFirstIndex(ZSTD_matchState_t* ms, const BYTE* ip); U32 ZSTD_insertAndFindFirstIndex(ZSTD_matchState_t* ms, const BYTE* ip);
void ZSTD_row_update(ZSTD_matchState_t* const ms, const BYTE* ip);
void ZSTD_dedicatedDictSearch_lazy_loadDictionary(ZSTD_matchState_t* ms, const BYTE* const ip); void ZSTD_dedicatedDictSearch_lazy_loadDictionary(ZSTD_matchState_t* ms, const BYTE* const ip);
...@@ -40,6 +41,15 @@ size_t ZSTD_compressBlock_lazy( ...@@ -40,6 +41,15 @@ size_t ZSTD_compressBlock_lazy(
size_t ZSTD_compressBlock_greedy( size_t ZSTD_compressBlock_greedy(
ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
void const* src, size_t srcSize); void const* src, size_t srcSize);
size_t ZSTD_compressBlock_lazy2_row(
ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
void const* src, size_t srcSize);
size_t ZSTD_compressBlock_lazy_row(
ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
void const* src, size_t srcSize);
size_t ZSTD_compressBlock_greedy_row(
ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
void const* src, size_t srcSize);
size_t ZSTD_compressBlock_btlazy2_dictMatchState( size_t ZSTD_compressBlock_btlazy2_dictMatchState(
ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
...@@ -53,6 +63,15 @@ size_t ZSTD_compressBlock_lazy_dictMatchState( ...@@ -53,6 +63,15 @@ size_t ZSTD_compressBlock_lazy_dictMatchState(
size_t ZSTD_compressBlock_greedy_dictMatchState( size_t ZSTD_compressBlock_greedy_dictMatchState(
ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
void const* src, size_t srcSize); void const* src, size_t srcSize);
size_t ZSTD_compressBlock_lazy2_dictMatchState_row(
ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
void const* src, size_t srcSize);
size_t ZSTD_compressBlock_lazy_dictMatchState_row(
ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
void const* src, size_t srcSize);
size_t ZSTD_compressBlock_greedy_dictMatchState_row(
ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
void const* src, size_t srcSize);
size_t ZSTD_compressBlock_lazy2_dedicatedDictSearch( size_t ZSTD_compressBlock_lazy2_dedicatedDictSearch(
ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
...@@ -63,6 +82,15 @@ size_t ZSTD_compressBlock_lazy_dedicatedDictSearch( ...@@ -63,6 +82,15 @@ size_t ZSTD_compressBlock_lazy_dedicatedDictSearch(
size_t ZSTD_compressBlock_greedy_dedicatedDictSearch( size_t ZSTD_compressBlock_greedy_dedicatedDictSearch(
ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
void const* src, size_t srcSize); void const* src, size_t srcSize);
size_t ZSTD_compressBlock_lazy2_dedicatedDictSearch_row(
ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
void const* src, size_t srcSize);
size_t ZSTD_compressBlock_lazy_dedicatedDictSearch_row(
ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
void const* src, size_t srcSize);
size_t ZSTD_compressBlock_greedy_dedicatedDictSearch_row(
ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
void const* src, size_t srcSize);
size_t ZSTD_compressBlock_greedy_extDict( size_t ZSTD_compressBlock_greedy_extDict(
ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
...@@ -73,9 +101,19 @@ size_t ZSTD_compressBlock_lazy_extDict( ...@@ -73,9 +101,19 @@ size_t ZSTD_compressBlock_lazy_extDict(
size_t ZSTD_compressBlock_lazy2_extDict( size_t ZSTD_compressBlock_lazy2_extDict(
ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
void const* src, size_t srcSize); void const* src, size_t srcSize);
size_t ZSTD_compressBlock_greedy_extDict_row(
ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
void const* src, size_t srcSize);
size_t ZSTD_compressBlock_lazy_extDict_row(
ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
void const* src, size_t srcSize);
size_t ZSTD_compressBlock_lazy2_extDict_row(
ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
void const* src, size_t srcSize);
size_t ZSTD_compressBlock_btlazy2_extDict( size_t ZSTD_compressBlock_btlazy2_extDict(
ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
void const* src, size_t srcSize); void const* src, size_t srcSize);
#endif /* ZSTD_LAZY_H */ #endif /* ZSTD_LAZY_H */
...@@ -57,6 +57,33 @@ static void ZSTD_ldm_gear_init(ldmRollingHashState_t* state, ldmParams_t const* ...@@ -57,6 +57,33 @@ static void ZSTD_ldm_gear_init(ldmRollingHashState_t* state, ldmParams_t const*
} }
} }
/* ZSTD_ldm_gear_reset()
* Feeds [data, data + minMatchLength) into the hash without registering any
* splits. This effectively resets the hash state. This is used when skipping
* over data, either at the beginning of a block, or skipping sections.
*/
static void ZSTD_ldm_gear_reset(ldmRollingHashState_t* state,
BYTE const* data, size_t minMatchLength)
{
U64 hash = state->rolling;
size_t n = 0;
#define GEAR_ITER_ONCE() do { \
hash = (hash << 1) + ZSTD_ldm_gearTab[data[n] & 0xff]; \
n += 1; \
} while (0)
while (n + 3 < minMatchLength) {
GEAR_ITER_ONCE();
GEAR_ITER_ONCE();
GEAR_ITER_ONCE();
GEAR_ITER_ONCE();
}
while (n < minMatchLength) {
GEAR_ITER_ONCE();
}
#undef GEAR_ITER_ONCE
}
/* ZSTD_ldm_gear_feed(): /* ZSTD_ldm_gear_feed():
* *
* Registers in the splits array all the split points found in the first * Registers in the splits array all the split points found in the first
...@@ -132,12 +159,12 @@ size_t ZSTD_ldm_getTableSize(ldmParams_t params) ...@@ -132,12 +159,12 @@ size_t ZSTD_ldm_getTableSize(ldmParams_t params)
size_t const ldmBucketSize = ((size_t)1) << (params.hashLog - ldmBucketSizeLog); size_t const ldmBucketSize = ((size_t)1) << (params.hashLog - ldmBucketSizeLog);
size_t const totalSize = ZSTD_cwksp_alloc_size(ldmBucketSize) size_t const totalSize = ZSTD_cwksp_alloc_size(ldmBucketSize)
+ ZSTD_cwksp_alloc_size(ldmHSize * sizeof(ldmEntry_t)); + ZSTD_cwksp_alloc_size(ldmHSize * sizeof(ldmEntry_t));
return params.enableLdm ? totalSize : 0; return params.enableLdm == ZSTD_ps_enable ? totalSize : 0;
} }
size_t ZSTD_ldm_getMaxNbSeq(ldmParams_t params, size_t maxChunkSize) size_t ZSTD_ldm_getMaxNbSeq(ldmParams_t params, size_t maxChunkSize)
{ {
return params.enableLdm ? (maxChunkSize / params.minMatchLength) : 0; return params.enableLdm == ZSTD_ps_enable ? (maxChunkSize / params.minMatchLength) : 0;
} }
/* ZSTD_ldm_getBucket() : /* ZSTD_ldm_getBucket() :
...@@ -255,7 +282,7 @@ void ZSTD_ldm_fillHashTable( ...@@ -255,7 +282,7 @@ void ZSTD_ldm_fillHashTable(
while (ip < iend) { while (ip < iend) {
size_t hashed; size_t hashed;
unsigned n; unsigned n;
numSplits = 0; numSplits = 0;
hashed = ZSTD_ldm_gear_feed(&hashState, ip, iend - ip, splits, &numSplits); hashed = ZSTD_ldm_gear_feed(&hashState, ip, iend - ip, splits, &numSplits);
...@@ -327,16 +354,8 @@ static size_t ZSTD_ldm_generateSequences_internal( ...@@ -327,16 +354,8 @@ static size_t ZSTD_ldm_generateSequences_internal(
/* Initialize the rolling hash state with the first minMatchLength bytes */ /* Initialize the rolling hash state with the first minMatchLength bytes */
ZSTD_ldm_gear_init(&hashState, params); ZSTD_ldm_gear_init(&hashState, params);
{ ZSTD_ldm_gear_reset(&hashState, ip, minMatchLength);
size_t n = 0; ip += minMatchLength;
while (n < minMatchLength) {
numSplits = 0;
n += ZSTD_ldm_gear_feed(&hashState, ip + n, minMatchLength - n,
splits, &numSplits);
}
ip += minMatchLength;
}
while (ip < ilimit) { while (ip < ilimit) {
size_t hashed; size_t hashed;
...@@ -361,6 +380,7 @@ static size_t ZSTD_ldm_generateSequences_internal( ...@@ -361,6 +380,7 @@ static size_t ZSTD_ldm_generateSequences_internal(
for (n = 0; n < numSplits; n++) { for (n = 0; n < numSplits; n++) {
size_t forwardMatchLength = 0, backwardMatchLength = 0, size_t forwardMatchLength = 0, backwardMatchLength = 0,
bestMatchLength = 0, mLength; bestMatchLength = 0, mLength;
U32 offset;
BYTE const* const split = candidates[n].split; BYTE const* const split = candidates[n].split;
U32 const checksum = candidates[n].checksum; U32 const checksum = candidates[n].checksum;
U32 const hash = candidates[n].hash; U32 const hash = candidates[n].hash;
...@@ -428,9 +448,9 @@ static size_t ZSTD_ldm_generateSequences_internal( ...@@ -428,9 +448,9 @@ static size_t ZSTD_ldm_generateSequences_internal(
} }
/* Match found */ /* Match found */
offset = (U32)(split - base) - bestEntry->offset;
mLength = forwardMatchLength + backwardMatchLength; mLength = forwardMatchLength + backwardMatchLength;
{ {
U32 const offset = (U32)(split - base) - bestEntry->offset;
rawSeq* const seq = rawSeqStore->seq + rawSeqStore->size; rawSeq* const seq = rawSeqStore->seq + rawSeqStore->size;
/* Out of sequence storage */ /* Out of sequence storage */
...@@ -447,6 +467,21 @@ static size_t ZSTD_ldm_generateSequences_internal( ...@@ -447,6 +467,21 @@ static size_t ZSTD_ldm_generateSequences_internal(
ZSTD_ldm_insertEntry(ldmState, hash, newEntry, *params); ZSTD_ldm_insertEntry(ldmState, hash, newEntry, *params);
anchor = split + forwardMatchLength; anchor = split + forwardMatchLength;
/* If we find a match that ends after the data that we've hashed
* then we have a repeating, overlapping, pattern. E.g. all zeros.
* If one repetition of the pattern matches our `stopMask` then all
* repetitions will. We don't need to insert them all into out table,
* only the first one. So skip over overlapping matches.
* This is a major speed boost (20x) for compressing a single byte
* repeated, when that byte ends up in the table.
*/
if (anchor > ip + hashed) {
ZSTD_ldm_gear_reset(&hashState, anchor - minMatchLength, minMatchLength);
/* Continue the outer loop at anchor (ip + hashed == anchor). */
ip = anchor - hashed;
break;
}
} }
ip += hashed; ip += hashed;
...@@ -500,7 +535,7 @@ size_t ZSTD_ldm_generateSequences( ...@@ -500,7 +535,7 @@ size_t ZSTD_ldm_generateSequences(
assert(chunkStart < iend); assert(chunkStart < iend);
/* 1. Perform overflow correction if necessary. */ /* 1. Perform overflow correction if necessary. */
if (ZSTD_window_needOverflowCorrection(ldmState->window, chunkEnd)) { if (ZSTD_window_needOverflowCorrection(ldmState->window, 0, maxDist, ldmState->loadedDictEnd, chunkStart, chunkEnd)) {
U32 const ldmHSize = 1U << params->hashLog; U32 const ldmHSize = 1U << params->hashLog;
U32 const correction = ZSTD_window_correctOverflow( U32 const correction = ZSTD_window_correctOverflow(
&ldmState->window, /* cycleLog */ 0, maxDist, chunkStart); &ldmState->window, /* cycleLog */ 0, maxDist, chunkStart);
...@@ -544,7 +579,9 @@ size_t ZSTD_ldm_generateSequences( ...@@ -544,7 +579,9 @@ size_t ZSTD_ldm_generateSequences(
return 0; return 0;
} }
void ZSTD_ldm_skipSequences(rawSeqStore_t* rawSeqStore, size_t srcSize, U32 const minMatch) { void
ZSTD_ldm_skipSequences(rawSeqStore_t* rawSeqStore, size_t srcSize, U32 const minMatch)
{
while (srcSize > 0 && rawSeqStore->pos < rawSeqStore->size) { while (srcSize > 0 && rawSeqStore->pos < rawSeqStore->size) {
rawSeq* seq = rawSeqStore->seq + rawSeqStore->pos; rawSeq* seq = rawSeqStore->seq + rawSeqStore->pos;
if (srcSize <= seq->litLength) { if (srcSize <= seq->litLength) {
...@@ -622,12 +659,13 @@ void ZSTD_ldm_skipRawSeqStoreBytes(rawSeqStore_t* rawSeqStore, size_t nbBytes) { ...@@ -622,12 +659,13 @@ void ZSTD_ldm_skipRawSeqStoreBytes(rawSeqStore_t* rawSeqStore, size_t nbBytes) {
size_t ZSTD_ldm_blockCompress(rawSeqStore_t* rawSeqStore, size_t ZSTD_ldm_blockCompress(rawSeqStore_t* rawSeqStore,
ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
ZSTD_paramSwitch_e useRowMatchFinder,
void const* src, size_t srcSize) void const* src, size_t srcSize)
{ {
const ZSTD_compressionParameters* const cParams = &ms->cParams; const ZSTD_compressionParameters* const cParams = &ms->cParams;
unsigned const minMatch = cParams->minMatch; unsigned const minMatch = cParams->minMatch;
ZSTD_blockCompressor const blockCompressor = ZSTD_blockCompressor const blockCompressor =
ZSTD_selectBlockCompressor(cParams->strategy, ZSTD_matchState_dictMode(ms)); ZSTD_selectBlockCompressor(cParams->strategy, useRowMatchFinder, ZSTD_matchState_dictMode(ms));
/* Input bounds */ /* Input bounds */
BYTE const* const istart = (BYTE const*)src; BYTE const* const istart = (BYTE const*)src;
BYTE const* const iend = istart + srcSize; BYTE const* const iend = istart + srcSize;
...@@ -673,8 +711,8 @@ size_t ZSTD_ldm_blockCompress(rawSeqStore_t* rawSeqStore, ...@@ -673,8 +711,8 @@ size_t ZSTD_ldm_blockCompress(rawSeqStore_t* rawSeqStore,
rep[0] = sequence.offset; rep[0] = sequence.offset;
/* Store the sequence */ /* Store the sequence */
ZSTD_storeSeq(seqStore, newLitLength, ip - newLitLength, iend, ZSTD_storeSeq(seqStore, newLitLength, ip - newLitLength, iend,
sequence.offset + ZSTD_REP_MOVE, STORE_OFFSET(sequence.offset),
sequence.matchLength - MINMATCH); sequence.matchLength);
ip += sequence.matchLength; ip += sequence.matchLength;
} }
} }
......
...@@ -63,6 +63,7 @@ size_t ZSTD_ldm_generateSequences( ...@@ -63,6 +63,7 @@ size_t ZSTD_ldm_generateSequences(
*/ */
size_t ZSTD_ldm_blockCompress(rawSeqStore_t* rawSeqStore, size_t ZSTD_ldm_blockCompress(rawSeqStore_t* rawSeqStore,
ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
ZSTD_paramSwitch_e useRowMatchFinder,
void const* src, size_t srcSize); void const* src, size_t srcSize);
/* /*
......
...@@ -11,7 +11,10 @@ ...@@ -11,7 +11,10 @@
#ifndef ZSTD_LDM_GEARTAB_H #ifndef ZSTD_LDM_GEARTAB_H
#define ZSTD_LDM_GEARTAB_H #define ZSTD_LDM_GEARTAB_H
static U64 ZSTD_ldm_gearTab[256] = { #include "../common/compiler.h" /* UNUSED_ATTR */
#include "../common/mem.h" /* U64 */
static UNUSED_ATTR const U64 ZSTD_ldm_gearTab[256] = {
0xf5b8f72c5f77775c, 0x84935f266b7ac412, 0xb647ada9ca730ccc, 0xf5b8f72c5f77775c, 0x84935f266b7ac412, 0xb647ada9ca730ccc,
0xb065bb4b114fb1de, 0x34584e7e8c3a9fd0, 0x4e97e17c6ae26b05, 0xb065bb4b114fb1de, 0x34584e7e8c3a9fd0, 0x4e97e17c6ae26b05,
0x3a03d743bc99a604, 0xcecd042422c4044f, 0x76de76c58524259e, 0x3a03d743bc99a604, 0xcecd042422c4044f, 0x76de76c58524259e,
......
This diff is collapsed.
This diff is collapsed.
...@@ -53,7 +53,6 @@ ...@@ -53,7 +53,6 @@
* Dependencies * Dependencies
*********************************************************/ *********************************************************/
#include "../common/zstd_deps.h" /* ZSTD_memcpy, ZSTD_memmove, ZSTD_memset */ #include "../common/zstd_deps.h" /* ZSTD_memcpy, ZSTD_memmove, ZSTD_memset */
#include "../common/cpu.h" /* bmi2 */
#include "../common/mem.h" /* low level memory routines */ #include "../common/mem.h" /* low level memory routines */
#define FSE_STATIC_LINKING_ONLY #define FSE_STATIC_LINKING_ONLY
#include "../common/fse.h" #include "../common/fse.h"
...@@ -252,11 +251,11 @@ static void ZSTD_initDCtx_internal(ZSTD_DCtx* dctx) ...@@ -252,11 +251,11 @@ static void ZSTD_initDCtx_internal(ZSTD_DCtx* dctx)
dctx->inBuffSize = 0; dctx->inBuffSize = 0;
dctx->outBuffSize = 0; dctx->outBuffSize = 0;
dctx->streamStage = zdss_init; dctx->streamStage = zdss_init;
dctx->legacyContext = NULL;
dctx->previousLegacyVersion = 0;
dctx->noForwardProgress = 0; dctx->noForwardProgress = 0;
dctx->oversizedDuration = 0; dctx->oversizedDuration = 0;
dctx->bmi2 = ZSTD_cpuid_bmi2(ZSTD_cpuid()); #if DYNAMIC_BMI2
dctx->bmi2 = ZSTD_cpuSupportsBmi2();
#endif
dctx->ddictSet = NULL; dctx->ddictSet = NULL;
ZSTD_DCtx_resetParameters(dctx); ZSTD_DCtx_resetParameters(dctx);
#ifdef FUZZING_BUILD_MODE_UNSAFE_FOR_PRODUCTION #ifdef FUZZING_BUILD_MODE_UNSAFE_FOR_PRODUCTION
...@@ -277,8 +276,7 @@ ZSTD_DCtx* ZSTD_initStaticDCtx(void *workspace, size_t workspaceSize) ...@@ -277,8 +276,7 @@ ZSTD_DCtx* ZSTD_initStaticDCtx(void *workspace, size_t workspaceSize)
return dctx; return dctx;
} }
ZSTD_DCtx* ZSTD_createDCtx_advanced(ZSTD_customMem customMem) static ZSTD_DCtx* ZSTD_createDCtx_internal(ZSTD_customMem customMem) {
{
if ((!customMem.customAlloc) ^ (!customMem.customFree)) return NULL; if ((!customMem.customAlloc) ^ (!customMem.customFree)) return NULL;
{ ZSTD_DCtx* const dctx = (ZSTD_DCtx*)ZSTD_customMalloc(sizeof(*dctx), customMem); { ZSTD_DCtx* const dctx = (ZSTD_DCtx*)ZSTD_customMalloc(sizeof(*dctx), customMem);
...@@ -289,10 +287,15 @@ ZSTD_DCtx* ZSTD_createDCtx_advanced(ZSTD_customMem customMem) ...@@ -289,10 +287,15 @@ ZSTD_DCtx* ZSTD_createDCtx_advanced(ZSTD_customMem customMem)
} }
} }
ZSTD_DCtx* ZSTD_createDCtx_advanced(ZSTD_customMem customMem)
{
return ZSTD_createDCtx_internal(customMem);
}
ZSTD_DCtx* ZSTD_createDCtx(void) ZSTD_DCtx* ZSTD_createDCtx(void)
{ {
DEBUGLOG(3, "ZSTD_createDCtx"); DEBUGLOG(3, "ZSTD_createDCtx");
return ZSTD_createDCtx_advanced(ZSTD_defaultCMem); return ZSTD_createDCtx_internal(ZSTD_defaultCMem);
} }
static void ZSTD_clearDict(ZSTD_DCtx* dctx) static void ZSTD_clearDict(ZSTD_DCtx* dctx)
...@@ -370,6 +373,19 @@ unsigned ZSTD_isFrame(const void* buffer, size_t size) ...@@ -370,6 +373,19 @@ unsigned ZSTD_isFrame(const void* buffer, size_t size)
return 0; return 0;
} }
/*! ZSTD_isSkippableFrame() :
* Tells if the content of `buffer` starts with a valid Frame Identifier for a skippable frame.
* Note : Frame Identifier is 4 bytes. If `size < 4`, @return will always be 0.
*/
unsigned ZSTD_isSkippableFrame(const void* buffer, size_t size)
{
if (size < ZSTD_FRAMEIDSIZE) return 0;
{ U32 const magic = MEM_readLE32(buffer);
if ((magic & ZSTD_MAGIC_SKIPPABLE_MASK) == ZSTD_MAGIC_SKIPPABLE_START) return 1;
}
return 0;
}
/* ZSTD_frameHeaderSize_internal() : /* ZSTD_frameHeaderSize_internal() :
* srcSize must be large enough to reach header size fields. * srcSize must be large enough to reach header size fields.
* note : only works for formats ZSTD_f_zstd1 and ZSTD_f_zstd1_magicless. * note : only works for formats ZSTD_f_zstd1 and ZSTD_f_zstd1_magicless.
...@@ -497,7 +513,6 @@ size_t ZSTD_getFrameHeader(ZSTD_frameHeader* zfhPtr, const void* src, size_t src ...@@ -497,7 +513,6 @@ size_t ZSTD_getFrameHeader(ZSTD_frameHeader* zfhPtr, const void* src, size_t src
return ZSTD_getFrameHeader_advanced(zfhPtr, src, srcSize, ZSTD_f_zstd1); return ZSTD_getFrameHeader_advanced(zfhPtr, src, srcSize, ZSTD_f_zstd1);
} }
/* ZSTD_getFrameContentSize() : /* ZSTD_getFrameContentSize() :
* compatible with legacy mode * compatible with legacy mode
* @return : decompressed size of the single frame pointed to be `src` if known, otherwise * @return : decompressed size of the single frame pointed to be `src` if known, otherwise
...@@ -532,6 +547,37 @@ static size_t readSkippableFrameSize(void const* src, size_t srcSize) ...@@ -532,6 +547,37 @@ static size_t readSkippableFrameSize(void const* src, size_t srcSize)
} }
} }
/*! ZSTD_readSkippableFrame() :
* Retrieves a zstd skippable frame containing data given by src, and writes it to dst buffer.
*
* The parameter magicVariant will receive the magicVariant that was supplied when the frame was written,
* i.e. magicNumber - ZSTD_MAGIC_SKIPPABLE_START. This can be NULL if the caller is not interested
* in the magicVariant.
*
* Returns an error if destination buffer is not large enough, or if the frame is not skippable.
*
* @return : number of bytes written or a ZSTD error.
*/
ZSTDLIB_API size_t ZSTD_readSkippableFrame(void* dst, size_t dstCapacity, unsigned* magicVariant,
const void* src, size_t srcSize)
{
U32 const magicNumber = MEM_readLE32(src);
size_t skippableFrameSize = readSkippableFrameSize(src, srcSize);
size_t skippableContentSize = skippableFrameSize - ZSTD_SKIPPABLEHEADERSIZE;
/* check input validity */
RETURN_ERROR_IF(!ZSTD_isSkippableFrame(src, srcSize), frameParameter_unsupported, "");
RETURN_ERROR_IF(skippableFrameSize < ZSTD_SKIPPABLEHEADERSIZE || skippableFrameSize > srcSize, srcSize_wrong, "");
RETURN_ERROR_IF(skippableContentSize > dstCapacity, dstSize_tooSmall, "");
/* deliver payload */
if (skippableContentSize > 0 && dst != NULL)
ZSTD_memcpy(dst, (const BYTE *)src + ZSTD_SKIPPABLEHEADERSIZE, skippableContentSize);
if (magicVariant != NULL)
*magicVariant = magicNumber - ZSTD_MAGIC_SKIPPABLE_START;
return skippableContentSize;
}
/* ZSTD_findDecompressedSize() : /* ZSTD_findDecompressedSize() :
* compatible with legacy mode * compatible with legacy mode
* `srcSize` must be the exact length of some number of ZSTD compressed and/or * `srcSize` must be the exact length of some number of ZSTD compressed and/or
...@@ -824,7 +870,7 @@ static size_t ZSTD_decompressFrame(ZSTD_DCtx* dctx, ...@@ -824,7 +870,7 @@ static size_t ZSTD_decompressFrame(ZSTD_DCtx* dctx,
switch(blockProperties.blockType) switch(blockProperties.blockType)
{ {
case bt_compressed: case bt_compressed:
decodedSize = ZSTD_decompressBlock_internal(dctx, op, (size_t)(oend-op), ip, cBlockSize, /* frame */ 1); decodedSize = ZSTD_decompressBlock_internal(dctx, op, (size_t)(oend-op), ip, cBlockSize, /* frame */ 1, not_streaming);
break; break;
case bt_raw : case bt_raw :
decodedSize = ZSTD_copyRawBlock(op, (size_t)(oend-op), ip, cBlockSize); decodedSize = ZSTD_copyRawBlock(op, (size_t)(oend-op), ip, cBlockSize);
...@@ -976,7 +1022,7 @@ size_t ZSTD_decompress(void* dst, size_t dstCapacity, const void* src, size_t sr ...@@ -976,7 +1022,7 @@ size_t ZSTD_decompress(void* dst, size_t dstCapacity, const void* src, size_t sr
{ {
#if defined(ZSTD_HEAPMODE) && (ZSTD_HEAPMODE>=1) #if defined(ZSTD_HEAPMODE) && (ZSTD_HEAPMODE>=1)
size_t regenSize; size_t regenSize;
ZSTD_DCtx* const dctx = ZSTD_createDCtx(); ZSTD_DCtx* const dctx = ZSTD_createDCtx_internal(ZSTD_defaultCMem);
RETURN_ERROR_IF(dctx==NULL, memory_allocation, "NULL pointer!"); RETURN_ERROR_IF(dctx==NULL, memory_allocation, "NULL pointer!");
regenSize = ZSTD_decompressDCtx(dctx, dst, dstCapacity, src, srcSize); regenSize = ZSTD_decompressDCtx(dctx, dst, dstCapacity, src, srcSize);
ZSTD_freeDCtx(dctx); ZSTD_freeDCtx(dctx);
...@@ -996,7 +1042,7 @@ size_t ZSTD_decompress(void* dst, size_t dstCapacity, const void* src, size_t sr ...@@ -996,7 +1042,7 @@ size_t ZSTD_decompress(void* dst, size_t dstCapacity, const void* src, size_t sr
size_t ZSTD_nextSrcSizeToDecompress(ZSTD_DCtx* dctx) { return dctx->expected; } size_t ZSTD_nextSrcSizeToDecompress(ZSTD_DCtx* dctx) { return dctx->expected; }
/* /*
* Similar to ZSTD_nextSrcSizeToDecompress(), but when when a block input can be streamed, * Similar to ZSTD_nextSrcSizeToDecompress(), but when a block input can be streamed,
* we allow taking a partial block as the input. Currently only raw uncompressed blocks can * we allow taking a partial block as the input. Currently only raw uncompressed blocks can
* be streamed. * be streamed.
* *
...@@ -1010,7 +1056,7 @@ static size_t ZSTD_nextSrcSizeToDecompressWithInputSize(ZSTD_DCtx* dctx, size_t ...@@ -1010,7 +1056,7 @@ static size_t ZSTD_nextSrcSizeToDecompressWithInputSize(ZSTD_DCtx* dctx, size_t
return dctx->expected; return dctx->expected;
if (dctx->bType != bt_raw) if (dctx->bType != bt_raw)
return dctx->expected; return dctx->expected;
return MIN(MAX(inputSize, 1), dctx->expected); return BOUNDED(1, inputSize, dctx->expected);
} }
ZSTD_nextInputType_e ZSTD_nextInputType(ZSTD_DCtx* dctx) { ZSTD_nextInputType_e ZSTD_nextInputType(ZSTD_DCtx* dctx) {
...@@ -1116,7 +1162,7 @@ size_t ZSTD_decompressContinue(ZSTD_DCtx* dctx, void* dst, size_t dstCapacity, c ...@@ -1116,7 +1162,7 @@ size_t ZSTD_decompressContinue(ZSTD_DCtx* dctx, void* dst, size_t dstCapacity, c
{ {
case bt_compressed: case bt_compressed:
DEBUGLOG(5, "ZSTD_decompressContinue: case bt_compressed"); DEBUGLOG(5, "ZSTD_decompressContinue: case bt_compressed");
rSize = ZSTD_decompressBlock_internal(dctx, dst, dstCapacity, src, srcSize, /* frame */ 1); rSize = ZSTD_decompressBlock_internal(dctx, dst, dstCapacity, src, srcSize, /* frame */ 1, is_streaming);
dctx->expected = 0; /* Streaming not supported */ dctx->expected = 0; /* Streaming not supported */
break; break;
case bt_raw : case bt_raw :
...@@ -1438,7 +1484,7 @@ size_t ZSTD_decompress_usingDDict(ZSTD_DCtx* dctx, ...@@ -1438,7 +1484,7 @@ size_t ZSTD_decompress_usingDDict(ZSTD_DCtx* dctx,
ZSTD_DStream* ZSTD_createDStream(void) ZSTD_DStream* ZSTD_createDStream(void)
{ {
DEBUGLOG(3, "ZSTD_createDStream"); DEBUGLOG(3, "ZSTD_createDStream");
return ZSTD_createDStream_advanced(ZSTD_defaultCMem); return ZSTD_createDCtx_internal(ZSTD_defaultCMem);
} }
ZSTD_DStream* ZSTD_initStaticDStream(void *workspace, size_t workspaceSize) ZSTD_DStream* ZSTD_initStaticDStream(void *workspace, size_t workspaceSize)
...@@ -1448,7 +1494,7 @@ ZSTD_DStream* ZSTD_initStaticDStream(void *workspace, size_t workspaceSize) ...@@ -1448,7 +1494,7 @@ ZSTD_DStream* ZSTD_initStaticDStream(void *workspace, size_t workspaceSize)
ZSTD_DStream* ZSTD_createDStream_advanced(ZSTD_customMem customMem) ZSTD_DStream* ZSTD_createDStream_advanced(ZSTD_customMem customMem)
{ {
return ZSTD_createDCtx_advanced(customMem); return ZSTD_createDCtx_internal(customMem);
} }
size_t ZSTD_freeDStream(ZSTD_DStream* zds) size_t ZSTD_freeDStream(ZSTD_DStream* zds)
...@@ -1708,7 +1754,8 @@ size_t ZSTD_sizeof_DStream(const ZSTD_DStream* dctx) ...@@ -1708,7 +1754,8 @@ size_t ZSTD_sizeof_DStream(const ZSTD_DStream* dctx)
size_t ZSTD_decodingBufferSize_min(unsigned long long windowSize, unsigned long long frameContentSize) size_t ZSTD_decodingBufferSize_min(unsigned long long windowSize, unsigned long long frameContentSize)
{ {
size_t const blockSize = (size_t) MIN(windowSize, ZSTD_BLOCKSIZE_MAX); size_t const blockSize = (size_t) MIN(windowSize, ZSTD_BLOCKSIZE_MAX);
unsigned long long const neededRBSize = windowSize + blockSize + (WILDCOPY_OVERLENGTH * 2); /* space is needed to store the litbuffer after the output of a given block without stomping the extDict of a previous run, as well as to cover both windows against wildcopy*/
unsigned long long const neededRBSize = windowSize + blockSize + ZSTD_BLOCKSIZE_MAX + (WILDCOPY_OVERLENGTH * 2);
unsigned long long const neededSize = MIN(frameContentSize, neededRBSize); unsigned long long const neededSize = MIN(frameContentSize, neededRBSize);
size_t const minRBSize = (size_t) neededSize; size_t const minRBSize = (size_t) neededSize;
RETURN_ERROR_IF((unsigned long long)minRBSize != neededSize, RETURN_ERROR_IF((unsigned long long)minRBSize != neededSize,
...@@ -1842,7 +1889,6 @@ size_t ZSTD_decompressStream(ZSTD_DStream* zds, ZSTD_outBuffer* output, ZSTD_inB ...@@ -1842,7 +1889,6 @@ size_t ZSTD_decompressStream(ZSTD_DStream* zds, ZSTD_outBuffer* output, ZSTD_inB
DEBUGLOG(5, "stage zdss_init => transparent reset "); DEBUGLOG(5, "stage zdss_init => transparent reset ");
zds->streamStage = zdss_loadHeader; zds->streamStage = zdss_loadHeader;
zds->lhSize = zds->inPos = zds->outStart = zds->outEnd = 0; zds->lhSize = zds->inPos = zds->outStart = zds->outEnd = 0;
zds->legacyVersion = 0;
zds->hostageByte = 0; zds->hostageByte = 0;
zds->expectedOutBuffer = *output; zds->expectedOutBuffer = *output;
ZSTD_FALLTHROUGH; ZSTD_FALLTHROUGH;
......
This diff is collapsed.
...@@ -33,6 +33,12 @@ ...@@ -33,6 +33,12 @@
*/ */
/* Streaming state is used to inform allocation of the literal buffer */
typedef enum {
not_streaming = 0,
is_streaming = 1
} streaming_operation;
/* ZSTD_decompressBlock_internal() : /* ZSTD_decompressBlock_internal() :
* decompress block, starting at `src`, * decompress block, starting at `src`,
* into destination buffer `dst`. * into destination buffer `dst`.
...@@ -41,7 +47,7 @@ ...@@ -41,7 +47,7 @@
*/ */
size_t ZSTD_decompressBlock_internal(ZSTD_DCtx* dctx, size_t ZSTD_decompressBlock_internal(ZSTD_DCtx* dctx,
void* dst, size_t dstCapacity, void* dst, size_t dstCapacity,
const void* src, size_t srcSize, const int frame); const void* src, size_t srcSize, const int frame, const streaming_operation streaming);
/* ZSTD_buildFSETable() : /* ZSTD_buildFSETable() :
* generate FSE decoding table for one symbol (ll, ml or off) * generate FSE decoding table for one symbol (ll, ml or off)
...@@ -54,7 +60,7 @@ size_t ZSTD_decompressBlock_internal(ZSTD_DCtx* dctx, ...@@ -54,7 +60,7 @@ size_t ZSTD_decompressBlock_internal(ZSTD_DCtx* dctx,
*/ */
void ZSTD_buildFSETable(ZSTD_seqSymbol* dt, void ZSTD_buildFSETable(ZSTD_seqSymbol* dt,
const short* normalizedCounter, unsigned maxSymbolValue, const short* normalizedCounter, unsigned maxSymbolValue,
const U32* baseValue, const U32* nbAdditionalBits, const U32* baseValue, const U8* nbAdditionalBits,
unsigned tableLog, void* wksp, size_t wkspSize, unsigned tableLog, void* wksp, size_t wkspSize,
int bmi2); int bmi2);
......
...@@ -16,6 +16,12 @@ ...@@ -16,6 +16,12 @@
* decompression. * decompression.
*/ */
/*
* Disable the ASM Huffman implementation because we need to
* include all the sources.
*/
#define ZSTD_DISABLE_ASM 1
#include "common/debug.c" #include "common/debug.c"
#include "common/entropy_common.c" #include "common/entropy_common.c"
#include "common/error_private.c" #include "common/error_private.c"
......
This diff is collapsed.
...@@ -133,7 +133,11 @@ EXPORT_SYMBOL(zstd_init_cstream); ...@@ -133,7 +133,11 @@ EXPORT_SYMBOL(zstd_init_cstream);
size_t zstd_reset_cstream(zstd_cstream *cstream, size_t zstd_reset_cstream(zstd_cstream *cstream,
unsigned long long pledged_src_size) unsigned long long pledged_src_size)
{ {
return ZSTD_resetCStream(cstream, pledged_src_size); if (pledged_src_size == 0)
pledged_src_size = ZSTD_CONTENTSIZE_UNKNOWN;
ZSTD_FORWARD_IF_ERR( ZSTD_CCtx_reset(cstream, ZSTD_reset_session_only) );
ZSTD_FORWARD_IF_ERR( ZSTD_CCtx_setPledgedSrcSize(cstream, pledged_src_size) );
return 0;
} }
EXPORT_SYMBOL(zstd_reset_cstream); EXPORT_SYMBOL(zstd_reset_cstream);
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment