[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index] Re: [Xen-devel] [PATCH 2/3] lzo: update LZO compression to current upstream version
On Wed, 2014-07-02 at 15:26 +0100, Jan Beulich wrote: > From: Markus F.X.J. Oberhumer <markus@xxxxxxxxxxxxx> > > This commit updates the kernel LZO code to the current upsteam version > which features a significant speed improvement - benchmarking the Calgary > and Silesia test corpora typically shows a doubled performance in > both compression and decompression on modern i386/x86_64/powerpc machines. > > Signed-off-by: Markus F.X.J. Oberhumer <markus@xxxxxxxxxxxxx> > > Original Linux commit: 8b975bd3f9089f8ee5d7bbfd798537b992bbc7e7. > > Signed-off-by: Jan Beulich <jbeulich@xxxxxxxx> I've not reviewed this but on the principal of importing a new upstream file is OK: Acked-by: Ian Campbell <ian.campbell@xxxxxxxxxx> > > --- a/xen/common/lzo.c > +++ b/xen/common/lzo.c > @@ -1,7 +1,7 @@ > /* > - * lzo.c -- LZO1X Compressor from MiniLZO > + * lzo.c -- LZO1X Compressor from LZO > * > - * Copyright (C) 1996-2005 Markus F.X.J. Oberhumer <markus@xxxxxxxxxxxxx> > + * Copyright (C) 1996-2012 Markus F.X.J. Oberhumer <markus@xxxxxxxxxxxxx> > * > * The full LZO package can be found at: > * http://www.oberhumer.com/opensource/lzo/ > @@ -14,19 +14,47 @@ > /* > * lzodefs.h -- architecture, OS and compiler specific defines > * > - * Copyright (C) 1996-2005 Markus F.X.J. Oberhumer <markus@xxxxxxxxxxxxx> > + * Copyright (C) 1996-2012 Markus F.X.J. Oberhumer <markus@xxxxxxxxxxxxx> > * > * The full LZO package can be found at: > * http://www.oberhumer.com/opensource/lzo/ > * > - * Changed for kernel use by: > + * Changed for Linux kernel use by: > * Nitin Gupta <nitingupta910@xxxxxxxxx> > * Richard Purdie <rpurdie@xxxxxxxxxxxxxx> > */ > > -#define LZO_VERSION 0x2020 > -#define LZO_VERSION_STRING "2.02" > -#define LZO_VERSION_DATE "Oct 17 2005" > + > +#define COPY4(dst, src) \ > + put_unaligned(get_unaligned((const u32 *)(src)), (u32 *)(dst)) > +#if defined(__x86_64__) > +#define COPY8(dst, src) \ > + put_unaligned(get_unaligned((const u64 *)(src)), (u64 *)(dst)) > +#else > +#define COPY8(dst, src) \ > + COPY4(dst, src); COPY4((dst) + 4, (src) + 4) > +#endif > + > +#ifdef __MINIOS__ > +# include <lib.h> > +# if __BYTE_ORDER == __LITTLE_ENDIAN > +# undef __BIG_ENDIAN > +# endif > +# if __BYTE_ORDER == __BIG_ENDIAN > +# undef __LITTLE_ENDIAN > +# endif > +#endif > + > +#if defined(__BIG_ENDIAN) && defined(__LITTLE_ENDIAN) > +#error "conflicting endian definitions" > +#elif defined(__x86_64__) > +#define LZO_USE_CTZ64 1 > +#define LZO_USE_CTZ32 1 > +#elif defined(__i386__) || defined(__powerpc__) > +#define LZO_USE_CTZ32 1 > +#elif defined(__arm__) && (__LINUX_ARM_ARCH__ >= 5) > +#define LZO_USE_CTZ32 1 > +#endif > > #define M1_MAX_OFFSET 0x0400 > #define M2_MAX_OFFSET 0x0800 > @@ -47,208 +75,257 @@ > #define M3_MARKER 32 > #define M4_MARKER 16 > > -#define D_BITS 14 > -#define D_MASK ((1u << D_BITS) - 1) > +#define lzo_dict_t unsigned short > +#define D_BITS 13 > +#define D_SIZE (1u << D_BITS) > +#define D_MASK (D_SIZE - 1) > #define D_HIGH ((D_MASK >> 1) + 1) > > -#define DX2(p, s1, s2) (((((size_t)((p)[2]) << (s2)) ^ (p)[1]) \ > - << (s1)) ^ (p)[0]) > -#define DX3(p, s1, s2, s3) ((DX2((p)+1, s2, s3) << (s1)) ^ (p)[0]) > - > /* > - * LZO1X Compressor from MiniLZO > + * LZO1X Compressor from LZO > * > - * Copyright (C) 1996-2005 Markus F.X.J. Oberhumer <markus@xxxxxxxxxxxxx> > + * Copyright (C) 1996-2012 Markus F.X.J. Oberhumer <markus@xxxxxxxxxxxxx> > * > * The full LZO package can be found at: > * http://www.oberhumer.com/opensource/lzo/ > * > - * Changed for kernel use by: > + * Changed for Linux kernel use by: > * Nitin Gupta <nitingupta910@xxxxxxxxx> > * Richard Purdie <rpurdie@xxxxxxxxxxxxxx> > */ > > #ifdef __XEN__ > -#include <xen/types.h> > +#include <xen/lib.h> > +#include <asm/byteorder.h> > #endif > > #include <xen/lzo.h> > #define get_unaligned(_p) (*(_p)) > #define put_unaligned(_val,_p) (*(_p)=_val) > #define get_unaligned_le16(_p) (*(u16 *)(_p)) > +#define get_unaligned_le32(_p) (*(u32 *)(_p)) > > static noinline size_t > -_lzo1x_1_do_compress(const unsigned char *in, size_t in_len, > - unsigned char *out, size_t *out_len, void *wrkmem) > +lzo1x_1_do_compress(const unsigned char *in, size_t in_len, > + unsigned char *out, size_t *out_len, > + size_t ti, void *wrkmem) > { > + const unsigned char *ip; > + unsigned char *op; > const unsigned char * const in_end = in + in_len; > - const unsigned char * const ip_end = in + in_len - M2_MAX_LEN - 5; > - const unsigned char ** const dict = wrkmem; > - const unsigned char *ip = in, *ii = ip; > - const unsigned char *end, *m, *m_pos; > - size_t m_off, m_len, dindex; > - unsigned char *op = out; > + const unsigned char * const ip_end = in + in_len - 20; > + const unsigned char *ii; > + lzo_dict_t * const dict = (lzo_dict_t *) wrkmem; > > - ip += 4; > + op = out; > + ip = in; > + ii = ip; > + ip += ti < 4 ? 4 - ti : 0; > > for (;;) { > - dindex = ((size_t)(0x21 * DX3(ip, 5, 5, 6)) >> 5) & D_MASK; > - m_pos = dict[dindex]; > - > - if (m_pos < in) > - goto literal; > - > - if (ip == m_pos || ((size_t)(ip - m_pos) > M4_MAX_OFFSET)) > - goto literal; > - > - m_off = ip - m_pos; > - if (m_off <= M2_MAX_OFFSET || m_pos[3] == ip[3]) > - goto try_match; > - > - dindex = (dindex & (D_MASK & 0x7ff)) ^ (D_HIGH | 0x1f); > - m_pos = dict[dindex]; > - > - if (m_pos < in) > - goto literal; > - > - if (ip == m_pos || ((size_t)(ip - m_pos) > M4_MAX_OFFSET)) > - goto literal; > - > - m_off = ip - m_pos; > - if (m_off <= M2_MAX_OFFSET || m_pos[3] == ip[3]) > - goto try_match; > - > - goto literal; > - > - try_match: > - if (get_unaligned((const unsigned short *)m_pos) > - == get_unaligned((const unsigned short *)ip)) { > - if (likely(m_pos[2] == ip[2])) > - goto match; > - } > - > + const unsigned char *m_pos; > + size_t t, m_len, m_off; > + u32 dv; > literal: > - dict[dindex] = ip; > - ++ip; > + ip += 1 + ((ip - ii) >> 5); > + next: > if (unlikely(ip >= ip_end)) > break; > - continue; > - > - match: > - dict[dindex] = ip; > - if (ip != ii) { > - size_t t = ip - ii; > + dv = get_unaligned_le32(ip); > + t = ((dv * 0x1824429d) >> (32 - D_BITS)) & D_MASK; > + m_pos = in + dict[t]; > + dict[t] = (lzo_dict_t) (ip - in); > + if (unlikely(dv != get_unaligned_le32(m_pos))) > + goto literal; > > + ii -= ti; > + ti = 0; > + t = ip - ii; > + if (t != 0) { > if (t <= 3) { > op[-2] |= t; > - } else if (t <= 18) { > + COPY4(op, ii); > + op += t; > + } else if (t <= 16) { > *op++ = (t - 3); > + COPY8(op, ii); > + COPY8(op + 8, ii + 8); > + op += t; > } else { > - size_t tt = t - 18; > - > - *op++ = 0; > - while (tt > 255) { > - tt -= 255; > + if (t <= 18) { > + *op++ = (t - 3); > + } else { > + size_t tt = t - 18; > *op++ = 0; > + while (unlikely(tt > 255)) { > + tt -= 255; > + *op++ = 0; > + } > + *op++ = tt; > } > - *op++ = tt; > + do { > + COPY8(op, ii); > + COPY8(op + 8, ii + 8); > + op += 16; > + ii += 16; > + t -= 16; > + } while (t >= 16); > + if (t > 0) do { > + *op++ = *ii++; > + } while (--t > 0); > } > + } > + > + m_len = 4; > + { > +#if defined(CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS) && defined(LZO_USE_CTZ64) > + u64 v; > + v = get_unaligned((const u64 *) (ip + m_len)) ^ > + get_unaligned((const u64 *) (m_pos + m_len)); > + if (unlikely(v == 0)) { > do { > - *op++ = *ii++; > - } while (--t > 0); > + m_len += 8; > + v = get_unaligned((const u64 *) (ip + m_len)) ^ > + get_unaligned((const u64 *) (m_pos + m_len)); > + if (unlikely(ip + m_len >= ip_end)) > + goto m_len_done; > + } while (v == 0); > } > +# if defined(__LITTLE_ENDIAN) > + m_len += (unsigned) __builtin_ctzll(v) / 8; > +# elif defined(__BIG_ENDIAN) > + m_len += (unsigned) __builtin_clzll(v) / 8; > +# else > +# error "missing endian definition" > +# endif > +#elif defined(CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS) && > defined(LZO_USE_CTZ32) > + u32 v; > + v = get_unaligned((const u32 *) (ip + m_len)) ^ > + get_unaligned((const u32 *) (m_pos + m_len)); > + if (unlikely(v == 0)) { > + do { > + m_len += 4; > + v = get_unaligned((const u32 *) (ip + m_len)) ^ > + get_unaligned((const u32 *) (m_pos + m_len)); > + if (v != 0) > + break; > + m_len += 4; > + v = get_unaligned((const u32 *) (ip + m_len)) ^ > + get_unaligned((const u32 *) (m_pos + m_len)); > + if (unlikely(ip + m_len >= ip_end)) > + goto m_len_done; > + } while (v == 0); > + } > +# if defined(__LITTLE_ENDIAN) > + m_len += (unsigned) __builtin_ctz(v) / 8; > +# elif defined(__BIG_ENDIAN) > + m_len += (unsigned) __builtin_clz(v) / 8; > +# else > +# error "missing endian definition" > +# endif > +#else > + if (unlikely(ip[m_len] == m_pos[m_len])) { > + do { > + m_len += 1; > + if (ip[m_len] != m_pos[m_len]) > + break; > + m_len += 1; > + if (ip[m_len] != m_pos[m_len]) > + break; > + m_len += 1; > + if (ip[m_len] != m_pos[m_len]) > + break; > + m_len += 1; > + if (ip[m_len] != m_pos[m_len]) > + break; > + m_len += 1; > + if (ip[m_len] != m_pos[m_len]) > + break; > + m_len += 1; > + if (ip[m_len] != m_pos[m_len]) > + break; > + m_len += 1; > + if (ip[m_len] != m_pos[m_len]) > + break; > + m_len += 1; > + if (unlikely(ip + m_len >= ip_end)) > + goto m_len_done; > + } while (ip[m_len] == m_pos[m_len]); > + } > +#endif > + } > + m_len_done: > > - ip += 3; > - if (m_pos[3] != *ip++ || m_pos[4] != *ip++ > - || m_pos[5] != *ip++ || m_pos[6] != *ip++ > - || m_pos[7] != *ip++ || m_pos[8] != *ip++) { > - --ip; > - m_len = ip - ii; > - > - if (m_off <= M2_MAX_OFFSET) { > - m_off -= 1; > - *op++ = (((m_len - 1) << 5) > - | ((m_off & 7) << 2)); > - *op++ = (m_off >> 3); > - } else if (m_off <= M3_MAX_OFFSET) { > - m_off -= 1; > + m_off = ip - m_pos; > + ip += m_len; > + ii = ip; > + if (m_len <= M2_MAX_LEN && m_off <= M2_MAX_OFFSET) { > + m_off -= 1; > + *op++ = (((m_len - 1) << 5) | ((m_off & 7) << 2)); > + *op++ = (m_off >> 3); > + } else if (m_off <= M3_MAX_OFFSET) { > + m_off -= 1; > + if (m_len <= M3_MAX_LEN) > *op++ = (M3_MARKER | (m_len - 2)); > - goto m3_m4_offset; > - } else { > - m_off -= 0x4000; > - > - *op++ = (M4_MARKER | ((m_off & 0x4000) >> 11) > - | (m_len - 2)); > - goto m3_m4_offset; > + else { > + m_len -= M3_MAX_LEN; > + *op++ = M3_MARKER | 0; > + while (unlikely(m_len > 255)) { > + m_len -= 255; > + *op++ = 0; > + } > + *op++ = (m_len); > } > + *op++ = (m_off << 2); > + *op++ = (m_off >> 6); > } else { > - end = in_end; > - m = m_pos + M2_MAX_LEN + 1; > - > - while (ip < end && *m == *ip) { > - m++; > - ip++; > - } > - m_len = ip - ii; > - > - if (m_off <= M3_MAX_OFFSET) { > - m_off -= 1; > - if (m_len <= 33) { > - *op++ = (M3_MARKER | (m_len - 2)); > - } else { > - m_len -= 33; > - *op++ = M3_MARKER | 0; > - goto m3_m4_len; > - } > - } else { > - m_off -= 0x4000; > - if (m_len <= M4_MAX_LEN) { > - *op++ = (M4_MARKER > - | ((m_off & 0x4000) >> 11) > + m_off -= 0x4000; > + if (m_len <= M4_MAX_LEN) > + *op++ = (M4_MARKER | ((m_off >> 11) & 8) > | (m_len - 2)); > - } else { > - m_len -= M4_MAX_LEN; > - *op++ = (M4_MARKER > - | ((m_off & 0x4000) >> 11)); > - m3_m4_len: > - while (m_len > 255) { > - m_len -= 255; > - *op++ = 0; > - } > - > - *op++ = (m_len); > + else { > + m_len -= M4_MAX_LEN; > + *op++ = (M4_MARKER | ((m_off >> 11) & 8)); > + while (unlikely(m_len > 255)) { > + m_len -= 255; > + *op++ = 0; > } > + *op++ = (m_len); > } > - m3_m4_offset: > - *op++ = ((m_off & 63) << 2); > + *op++ = (m_off << 2); > *op++ = (m_off >> 6); > } > - > - ii = ip; > - if (unlikely(ip >= ip_end)) > - break; > + goto next; > } > - > *out_len = op - out; > - return in_end - ii; > + return in_end - (ii - ti); > } > > -int lzo1x_1_compress(const unsigned char *in, size_t in_len, unsigned char > *out, > - size_t *out_len, void *wrkmem) > +int lzo1x_1_compress(const unsigned char *in, size_t in_len, > + unsigned char *out, size_t *out_len, > + void *wrkmem) > { > - const unsigned char *ii; > + const unsigned char *ip = in; > unsigned char *op = out; > - size_t t; > + size_t l = in_len; > + size_t t = 0; > > - if (unlikely(in_len <= M2_MAX_LEN + 5)) { > - t = in_len; > - } else { > - t = _lzo1x_1_do_compress(in, in_len, op, out_len, wrkmem); > + while (l > 20) { > + size_t ll = l <= (M4_MAX_OFFSET + 1) ? l : (M4_MAX_OFFSET + 1); > + uintptr_t ll_end = (uintptr_t) ip + ll; > + if ((ll_end + ((t + ll) >> 5)) <= ll_end) > + break; > + BUILD_BUG_ON(D_SIZE * sizeof(lzo_dict_t) > LZO1X_1_MEM_COMPRESS); > + memset(wrkmem, 0, D_SIZE * sizeof(lzo_dict_t)); > + t = lzo1x_1_do_compress(ip, ll, op, out_len, t, wrkmem); > + ip += ll; > op += *out_len; > + l -= ll; > } > + t += l; > > if (t > 0) { > - ii = in + in_len - t; > + const unsigned char *ii = in + in_len - t; > > if (op == out && t <= 238) { > *op++ = (17 + t); > @@ -258,16 +335,21 @@ int lzo1x_1_compress(const unsigned char > *op++ = (t - 3); > } else { > size_t tt = t - 18; > - > *op++ = 0; > while (tt > 255) { > tt -= 255; > *op++ = 0; > } > - > *op++ = tt; > } > - do { > + if (t >= 16) do { > + COPY8(op, ii); > + COPY8(op + 8, ii + 8); > + op += 16; > + ii += 16; > + t -= 16; > + } while (t >= 16); > + if (t > 0) do { > *op++ = *ii++; > } while (--t > 0); > } > @@ -281,232 +363,215 @@ int lzo1x_1_compress(const unsigned char > } > > /* > - * LZO1X Decompressor from MiniLZO > + * LZO1X Decompressor from LZO > * > - * Copyright (C) 1996-2005 Markus F.X.J. Oberhumer <markus@xxxxxxxxxxxxx> > + * Copyright (C) 1996-2012 Markus F.X.J. Oberhumer <markus@xxxxxxxxxxxxx> > * > * The full LZO package can be found at: > * http://www.oberhumer.com/opensource/lzo/ > * > - * Changed for kernel use by: > + * Changed for Linux kernel use by: > * Nitin Gupta <nitingupta910@xxxxxxxxx> > * Richard Purdie <rpurdie@xxxxxxxxxxxxxx> > */ > > -#define HAVE_IP(x, ip_end, ip) ((size_t)(ip_end - ip) < (x)) > -#define HAVE_OP(x, op_end, op) ((size_t)(op_end - op) < (x)) > -#define HAVE_LB(m_pos, out, op) (m_pos < out || m_pos >= op) > - > -#define COPY4(dst, src) \ > - put_unaligned(get_unaligned((const u32 *)(src)), (u32 *)(dst)) > +#define HAVE_IP(x) ((size_t)(ip_end - ip) >= (size_t)(x)) > +#define HAVE_OP(x) ((size_t)(op_end - op) >= (size_t)(x)) > +#define NEED_IP(x) if (!HAVE_IP(x)) goto input_overrun > +#define NEED_OP(x) if (!HAVE_OP(x)) goto output_overrun > +#define TEST_LB(m_pos) if ((m_pos) < out) goto lookbehind_overrun > > int lzo1x_decompress_safe(const unsigned char *in, size_t in_len, > unsigned char *out, size_t *out_len) > { > + unsigned char *op; > + const unsigned char *ip; > + size_t t, next; > + size_t state = 0; > + const unsigned char *m_pos; > const unsigned char * const ip_end = in + in_len; > unsigned char * const op_end = out + *out_len; > - const unsigned char *ip = in, *m_pos; > - unsigned char *op = out; > - size_t t; > > - *out_len = 0; > + op = out; > + ip = in; > > + if (unlikely(in_len < 3)) > + goto input_overrun; > if (*ip > 17) { > t = *ip++ - 17; > - if (t < 4) > + if (t < 4) { > + next = t; > goto match_next; > - if (HAVE_OP(t, op_end, op)) > - goto output_overrun; > - if (HAVE_IP(t + 1, ip_end, ip)) > - goto input_overrun; > - do { > - *op++ = *ip++; > - } while (--t > 0); > - goto first_literal_run; > - } > - > - while ((ip < ip_end)) { > - t = *ip++; > - if (t >= 16) > - goto match; > - if (t == 0) { > - if (HAVE_IP(1, ip_end, ip)) > - goto input_overrun; > - while (*ip == 0) { > - t += 255; > - ip++; > - if (HAVE_IP(1, ip_end, ip)) > - goto input_overrun; > - } > - t += 15 + *ip++; > - } > - if (HAVE_OP(t + 3, op_end, op)) > - goto output_overrun; > - if (HAVE_IP(t + 4, ip_end, ip)) > - goto input_overrun; > - > - COPY4(op, ip); > - op += 4; > - ip += 4; > - if (--t > 0) { > - if (t >= 4) { > - do { > - COPY4(op, ip); > - op += 4; > - ip += 4; > - t -= 4; > - } while (t >= 4); > - if (t > 0) { > - do { > - *op++ = *ip++; > - } while (--t > 0); > - } > - } else { > - do { > - *op++ = *ip++; > - } while (--t > 0); > - } > } > + goto copy_literal_run; > + } > > - first_literal_run: > + for (;;) { > t = *ip++; > - if (t >= 16) > - goto match; > - m_pos = op - (1 + M2_MAX_OFFSET); > - m_pos -= t >> 2; > - m_pos -= *ip++ << 2; > - > - if (HAVE_LB(m_pos, out, op)) > - goto lookbehind_overrun; > - > - if (HAVE_OP(3, op_end, op)) > - goto output_overrun; > - *op++ = *m_pos++; > - *op++ = *m_pos++; > - *op++ = *m_pos; > - > - goto match_done; > - > - do { > - match: > - if (t >= 64) { > - m_pos = op - 1; > - m_pos -= (t >> 2) & 7; > - m_pos -= *ip++ << 3; > - t = (t >> 5) - 1; > - if (HAVE_LB(m_pos, out, op)) > - goto lookbehind_overrun; > - if (HAVE_OP(t + 3 - 1, op_end, op)) > - goto output_overrun; > - goto copy_match; > - } else if (t >= 32) { > - t &= 31; > - if (t == 0) { > - if (HAVE_IP(1, ip_end, ip)) > - goto input_overrun; > - while (*ip == 0) { > + if (t < 16) { > + if (likely(state == 0)) { > + if (unlikely(t == 0)) { > + while (unlikely(*ip == 0)) { > t += 255; > ip++; > - if (HAVE_IP(1, ip_end, ip)) > - goto input_overrun; > + NEED_IP(1); > } > - t += 31 + *ip++; > + t += 15 + *ip++; > } > - m_pos = op - 1; > - m_pos -= get_unaligned_le16(ip) >> 2; > - ip += 2; > - } else if (t >= 16) { > - m_pos = op; > - m_pos -= (t & 8) << 11; > - > - t &= 7; > - if (t == 0) { > - if (HAVE_IP(1, ip_end, ip)) > - goto input_overrun; > - while (*ip == 0) { > - t += 255; > - ip++; > - if (HAVE_IP(1, ip_end, ip)) > - goto input_overrun; > - } > - t += 7 + *ip++; > + t += 3; > + copy_literal_run: > +#if defined(CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS) > + if (likely(HAVE_IP(t + 15) && HAVE_OP(t + 15))) { > + const unsigned char *ie = ip + t; > + unsigned char *oe = op + t; > + do { > + COPY8(op, ip); > + op += 8; > + ip += 8; > + COPY8(op, ip); > + op += 8; > + ip += 8; > + } while (ip < ie); > + ip = ie; > + op = oe; > + } else > +#endif > + { > + NEED_OP(t); > + NEED_IP(t + 3); > + do { > + *op++ = *ip++; > + } while (--t > 0); > } > - m_pos -= get_unaligned_le16(ip) >> 2; > - ip += 2; > - if (m_pos == op) > - goto eof_found; > - m_pos -= 0x4000; > - } else { > + state = 4; > + continue; > + } else if (state != 4) { > + next = t & 3; > m_pos = op - 1; > m_pos -= t >> 2; > m_pos -= *ip++ << 2; > - > - if (HAVE_LB(m_pos, out, op)) > - goto lookbehind_overrun; > - if (HAVE_OP(2, op_end, op)) > - goto output_overrun; > - > - *op++ = *m_pos++; > - *op++ = *m_pos; > - goto match_done; > + TEST_LB(m_pos); > + NEED_OP(2); > + op[0] = m_pos[0]; > + op[1] = m_pos[1]; > + op += 2; > + goto match_next; > + } else { > + next = t & 3; > + m_pos = op - (1 + M2_MAX_OFFSET); > + m_pos -= t >> 2; > + m_pos -= *ip++ << 2; > + t = 3; > } > - > - if (HAVE_LB(m_pos, out, op)) > - goto lookbehind_overrun; > - if (HAVE_OP(t + 3 - 1, op_end, op)) > - goto output_overrun; > - > - if (t >= 2 * 4 - (3 - 1) && (op - m_pos) >= 4) { > - COPY4(op, m_pos); > - op += 4; > - m_pos += 4; > - t -= 4 - (3 - 1); > + } else if (t >= 64) { > + next = t & 3; > + m_pos = op - 1; > + m_pos -= (t >> 2) & 7; > + m_pos -= *ip++ << 3; > + t = (t >> 5) - 1 + (3 - 1); > + } else if (t >= 32) { > + t = (t & 31) + (3 - 1); > + if (unlikely(t == 2)) { > + while (unlikely(*ip == 0)) { > + t += 255; > + ip++; > + NEED_IP(1); > + } > + t += 31 + *ip++; > + NEED_IP(2); > + } > + m_pos = op - 1; > + next = get_unaligned_le16(ip); > + ip += 2; > + m_pos -= next >> 2; > + next &= 3; > + } else { > + m_pos = op; > + m_pos -= (t & 8) << 11; > + t = (t & 7) + (3 - 1); > + if (unlikely(t == 2)) { > + while (unlikely(*ip == 0)) { > + t += 255; > + ip++; > + NEED_IP(1); > + } > + t += 7 + *ip++; > + NEED_IP(2); > + } > + next = get_unaligned_le16(ip); > + ip += 2; > + m_pos -= next >> 2; > + next &= 3; > + if (m_pos == op) > + goto eof_found; > + m_pos -= 0x4000; > + } > + TEST_LB(m_pos); > +#if defined(CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS) > + if (op - m_pos >= 8) { > + unsigned char *oe = op + t; > + if (likely(HAVE_OP(t + 15))) { > do { > - COPY4(op, m_pos); > - op += 4; > - m_pos += 4; > - t -= 4; > - } while (t >= 4); > - if (t > 0) > - do { > - *op++ = *m_pos++; > - } while (--t > 0); > + COPY8(op, m_pos); > + op += 8; > + m_pos += 8; > + COPY8(op, m_pos); > + op += 8; > + m_pos += 8; > + } while (op < oe); > + op = oe; > + if (HAVE_IP(6)) { > + state = next; > + COPY4(op, ip); > + op += next; > + ip += next; > + continue; > + } > } else { > - copy_match: > - *op++ = *m_pos++; > - *op++ = *m_pos++; > + NEED_OP(t); > do { > *op++ = *m_pos++; > - } while (--t > 0); > + } while (op < oe); > } > - match_done: > - t = ip[-2] & 3; > - if (t == 0) > - break; > + } else > +#endif > + { > + unsigned char *oe = op + t; > + NEED_OP(t); > + op[0] = m_pos[0]; > + op[1] = m_pos[1]; > + op += 2; > + m_pos += 2; > + do { > + *op++ = *m_pos++; > + } while (op < oe); > + } > match_next: > - if (HAVE_OP(t, op_end, op)) > - goto output_overrun; > - if (HAVE_IP(t + 1, ip_end, ip)) > - goto input_overrun; > - > - *op++ = *ip++; > - if (t > 1) { > + state = next; > + t = next; > +#if defined(CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS) > + if (likely(HAVE_IP(6) && HAVE_OP(4))) { > + COPY4(op, ip); > + op += t; > + ip += t; > + } else > +#endif > + { > + NEED_IP(t + 3); > + NEED_OP(t); > + while (t > 0) { > *op++ = *ip++; > - if (t > 2) > - *op++ = *ip++; > + t--; > } > - > - t = *ip++; > - } while (ip < ip_end); > + } > } > > - *out_len = op - out; > - return LZO_E_EOF_NOT_FOUND; > - > eof_found: > *out_len = op - out; > - return (ip == ip_end ? LZO_E_OK : > - (ip < ip_end ? LZO_E_INPUT_NOT_CONSUMED : LZO_E_INPUT_OVERRUN)); > + return (t != 3 ? LZO_E_ERROR : > + ip == ip_end ? LZO_E_OK : > + ip < ip_end ? LZO_E_INPUT_NOT_CONSUMED : LZO_E_INPUT_OVERRUN); > + > input_overrun: > *out_len = op - out; > return LZO_E_INPUT_OVERRUN; > --- a/xen/include/xen/lzo.h > +++ b/xen/include/xen/lzo.h > @@ -4,22 +4,22 @@ > * LZO Public Kernel Interface > * A mini subset of the LZO real-time data compression library > * > - * Copyright (C) 1996-2005 Markus F.X.J. Oberhumer <markus@xxxxxxxxxxxxx> > + * Copyright (C) 1996-2012 Markus F.X.J. Oberhumer <markus@xxxxxxxxxxxxx> > * > * The full LZO package can be found at: > * http://www.oberhumer.com/opensource/lzo/ > * > - * Changed for kernel use by: > + * Changed for Linux kernel use by: > * Nitin Gupta <nitingupta910@xxxxxxxxx> > * Richard Purdie <rpurdie@xxxxxxxxxxxxxx> > */ > > -#define LZO1X_MEM_COMPRESS (16384 * sizeof(unsigned char *)) > -#define LZO1X_1_MEM_COMPRESS LZO1X_MEM_COMPRESS > +#define LZO1X_1_MEM_COMPRESS (8192 * sizeof(unsigned short)) > +#define LZO1X_MEM_COMPRESS LZO1X_1_MEM_COMPRESS > > #define lzo1x_worst_compress(x) ((x) + ((x) / 16) + 64 + 3) > > -/* This requires 'workmem' of size LZO1X_1_MEM_COMPRESS */ > +/* This requires 'wrkmem' of size LZO1X_1_MEM_COMPRESS */ > int lzo1x_1_compress(const unsigned char *src, size_t src_len, > unsigned char *dst, size_t *dst_len, void *wrkmem); > > @@ -40,5 +40,6 @@ int lzo1x_decompress_safe(const unsigned > #define LZO_E_EOF_NOT_FOUND (-7) > #define LZO_E_INPUT_NOT_CONSUMED (-8) > #define LZO_E_NOT_YET_IMPLEMENTED (-9) > +#define LZO_E_INVALID_ARGUMENT (-10) > > #endif > > _______________________________________________ Xen-devel mailing list Xen-devel@xxxxxxxxxxxxx http://lists.xen.org/xen-devel
|
Lists.xenproject.org is hosted with RackSpace, monitoring our |