Developer Documentation
snappy.cc
1 // Copyright 2005 Google Inc. All Rights Reserved.
2 //
3 // Redistribution and use in source and binary forms, with or without
4 // modification, are permitted provided that the following conditions are
5 // met:
6 //
7 // * Redistributions of source code must retain the above copyright
8 // notice, this list of conditions and the following disclaimer.
9 // * Redistributions in binary form must reproduce the above
10 // copyright notice, this list of conditions and the following disclaimer
11 // in the documentation and/or other materials provided with the
12 // distribution.
13 // * Neither the name of Google Inc. nor the names of its
14 // contributors may be used to endorse or promote products derived from
15 // this software without specific prior written permission.
16 //
17 // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
18 // "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
19 // LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
20 // A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
21 // OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
22 // SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
23 // LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
24 // DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
25 // THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
26 // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
27 // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
28 
29 #include "snappy.h"
30 #include "snappy-internal.h"
31 #include "snappy-sinksource.h"
32 
33 #include <stdio.h>
34 
35 #include <algorithm>
36 #include <string>
37 #include <vector>
38 
39 
40 namespace snappy {
41 
42 
43 // Any hash function will produce a valid compressed bitstream, but a good
44 // hash function reduces the number of collisions and thus yields better
45 // compression for compressible input, and more speed for incompressible
46 // input. Of course, it doesn't hurt if the hash function is reasonably fast
47 // either, as it gets called a lot.
48 static inline uint32 HashBytes(uint32 bytes, int shift) {
49  uint32 kMul = 0x1e35a7bd;
50  return (bytes * kMul) >> shift;
51 }
52 static inline uint32 Hash(const char* p, int shift) {
53  return HashBytes(UNALIGNED_LOAD32(p), shift);
54 }
55 
56 size_t MaxCompressedLength(size_t source_len) {
57  // Compressed data can be defined as:
58  // compressed := item* literal*
59  // item := literal* copy
60  //
61  // The trailing literal sequence has a space blowup of at most 62/60
62  // since a literal of length 60 needs one tag byte + one extra byte
63  // for length information.
64  //
65  // Item blowup is trickier to measure. Suppose the "copy" op copies
66  // 4 bytes of data. Because of a special check in the encoding code,
67  // we produce a 4-byte copy only if the offset is < 65536. Therefore
68  // the copy op takes 3 bytes to encode, and this type of item leads
69  // to at most the 62/60 blowup for representing literals.
70  //
71  // Suppose the "copy" op copies 5 bytes of data. If the offset is big
72  // enough, it will take 5 bytes to encode the copy op. Therefore the
73  // worst case here is a one-byte literal followed by a five-byte copy.
74  // I.e., 6 bytes of input turn into 7 bytes of "compressed" data.
75  //
76  // This last factor dominates the blowup, so the final estimate is:
77  return 32 + source_len + source_len/6;
78 }
79 
80 enum {
81  LITERAL = 0,
82  COPY_1_BYTE_OFFSET = 1, // 3 bit length + 3 bits of offset in opcode
83  COPY_2_BYTE_OFFSET = 2,
84  COPY_4_BYTE_OFFSET = 3
85 };
86 static const int kMaximumTagLength = 5; // COPY_4_BYTE_OFFSET plus the actual offset.
87 
88 // Copy "len" bytes from "src" to "op", one byte at a time. Used for
89 // handling COPY operations where the input and output regions may
90 // overlap. For example, suppose:
91 // src == "ab"
92 // op == src + 2
93 // len == 20
94 // After IncrementalCopy(src, op, len), the result will have
95 // eleven copies of "ab"
96 // ababababababababababab
97 // Note that this does not match the semantics of either memcpy()
98 // or memmove().
99 static inline void IncrementalCopy(const char* src, char* op, int64_t len) {
100  assert(len > 0);
101  do {
102  *op++ = *src++;
103  } while (--len > 0);
104 }
105 
106 // Equivalent to IncrementalCopy except that it can write up to ten extra
107 // bytes after the end of the copy, and that it is faster.
108 //
109 // The main part of this loop is a simple copy of eight bytes at a time until
110 // we've copied (at least) the requested amount of bytes. However, if op and
111 // src are less than eight bytes apart (indicating a repeating pattern of
112 // length < 8), we first need to expand the pattern in order to get the correct
113 // results. For instance, if the buffer looks like this, with the eight-byte
114 // <src> and <op> patterns marked as intervals:
115 //
116 // abxxxxxxxxxxxx
117 // [------] src
118 // [------] op
119 //
120 // a single eight-byte copy from <src> to <op> will repeat the pattern once,
121 // after which we can move <op> two bytes without moving <src>:
122 //
123 // ababxxxxxxxxxx
124 // [------] src
125 // [------] op
126 //
127 // and repeat the exercise until the two no longer overlap.
128 //
129 // This allows us to do very well in the special case of one single byte
130 // repeated many times, without taking a big hit for more general cases.
131 //
132 // The worst case of extra writing past the end of the match occurs when
133 // op - src == 1 and len == 1; the last copy will read from byte positions
134 // [0..7] and write to [4..11], whereas it was only supposed to write to
135 // position 1. Thus, ten excess bytes.
136 
137 namespace {
138 
139 const int kMaxIncrementCopyOverflow = 10;
140 
141 inline void IncrementalCopyFastPath(const char* src, char* op, int64_t len) {
142  while (PREDICT_FALSE(op - src < 8)) {
143  UnalignedCopy64(src, op);
144  len -= op - src;
145  op += op - src;
146  }
147  while (len > 0) {
148  UnalignedCopy64(src, op);
149  src += 8;
150  op += 8;
151  len -= 8;
152  }
153 }
154 
155 } // namespace
156 
157 static inline char* EmitLiteral(char* op,
158  const char* literal,
159  int len,
160  bool allow_fast_path) {
161  int n = len - 1; // Zero-length literals are disallowed
162  if (n < 60) {
163  // Fits in tag byte
164  *op++ = LITERAL | (n << 2);
165 
166  // The vast majority of copies are below 16 bytes, for which a
167  // call to memcpy is overkill. This fast path can sometimes
168  // copy up to 15 bytes too much, but that is okay in the
169  // main loop, since we have a bit to go on for both sides:
170  //
171  // - The input will always have kInputMarginBytes = 15 extra
172  // available bytes, as long as we're in the main loop, and
173  // if not, allow_fast_path = false.
174  // - The output will always have 32 spare bytes (see
175  // MaxCompressedLength).
176  if (allow_fast_path && len <= 16) {
177  UnalignedCopy64(literal, op);
178  UnalignedCopy64(literal + 8, op + 8);
179  return op + len;
180  }
181  } else {
182  // Encode in upcoming bytes
183  char* base = op;
184  int count = 0;
185  op++;
186  while (n > 0) {
187  *op++ = n & 0xff;
188  n >>= 8;
189  count++;
190  }
191  assert(count >= 1);
192  assert(count <= 4);
193  *base = LITERAL | ((59+count) << 2);
194  }
195  memcpy(op, literal, len);
196  return op + len;
197 }
198 
199 static inline char* EmitCopyLessThan64(char* op, size_t offset, int len) {
200  assert(len <= 64);
201  assert(len >= 4);
202  assert(offset < 65536);
203 
204  if ((len < 12) && (offset < 2048)) {
205  size_t len_minus_4 = len - 4;
206  assert(len_minus_4 < 8); // Must fit in 3 bits
207  *op++ = COPY_1_BYTE_OFFSET + ((len_minus_4) << 2) + ((offset >> 8) << 5);
208  *op++ = offset & 0xff;
209  } else {
210  *op++ = COPY_2_BYTE_OFFSET + ((len-1) << 2);
211  LittleEndian::Store16(op, offset);
212  op += 2;
213  }
214  return op;
215 }
216 
217 static inline char* EmitCopy(char* op, size_t offset, int len) {
218  // Emit 64 byte copies but make sure to keep at least four bytes reserved
219  while (PREDICT_FALSE(len >= 68)) {
220  op = EmitCopyLessThan64(op, offset, 64);
221  len -= 64;
222  }
223 
224  // Emit an extra 60 byte copy if have too much data to fit in one copy
225  if (len > 64) {
226  op = EmitCopyLessThan64(op, offset, 60);
227  len -= 60;
228  }
229 
230  // Emit remainder
231  op = EmitCopyLessThan64(op, offset, len);
232  return op;
233 }
234 
235 
236 bool GetUncompressedLength(const char* start, size_t n, size_t* result) {
237  uint32 v = 0;
238  const char* limit = start + n;
239  if (Varint::Parse32WithLimit(start, limit, &v) != NULL) {
240  *result = v;
241  return true;
242  } else {
243  return false;
244  }
245 }
246 
247 namespace internal {
248 uint16* WorkingMemory::GetHashTable(size_t input_size, int* table_size) {
249  // Use smaller hash table when input.size() is smaller, since we
250  // fill the table, incurring O(hash table size) overhead for
251  // compression, and if the input is short, we won't need that
252  // many hash table entries anyway.
253  assert(kMaxHashTableSize >= 256);
254  size_t htsize = 256;
255  while (htsize < kMaxHashTableSize && htsize < input_size) {
256  htsize <<= 1;
257  }
258 
259  uint16* table;
260  if (htsize <= ARRAYSIZE(small_table_)) {
261  table = small_table_;
262  } else {
263  if (large_table_ == NULL) {
264  large_table_ = new uint16[kMaxHashTableSize];
265  }
266  table = large_table_;
267  }
268 
269  *table_size = htsize;
270  memset(table, 0, htsize * sizeof(*table));
271  return table;
272 }
273 } // end namespace internal
274 
275 // For 0 <= offset <= 4, GetUint32AtOffset(GetEightBytesAt(p), offset) will
276 // equal UNALIGNED_LOAD32(p + offset). Motivation: On x86-64 hardware we have
277 // empirically found that overlapping loads such as
278 // UNALIGNED_LOAD32(p) ... UNALIGNED_LOAD32(p+1) ... UNALIGNED_LOAD32(p+2)
279 // are slower than UNALIGNED_LOAD64(p) followed by shifts and casts to uint32.
280 //
281 // We have different versions for 64- and 32-bit; ideally we would avoid the
282 // two functions and just inline the UNALIGNED_LOAD64 call into
283 // GetUint32AtOffset, but GCC (at least not as of 4.6) is seemingly not clever
284 // enough to avoid loading the value multiple times then. For 64-bit, the load
285 // is done when GetEightBytesAt() is called, whereas for 32-bit, the load is
286 // done at GetUint32AtOffset() time.
287 
288 #ifdef ARCH_K8
289 
290 typedef uint64 EightBytesReference;
291 
292 static inline EightBytesReference GetEightBytesAt(const char* ptr) {
293  return UNALIGNED_LOAD64(ptr);
294 }
295 
296 static inline uint32 GetUint32AtOffset(uint64 v, int offset) {
297  assert(offset >= 0);
298  assert(offset <= 4);
299  return v >> (LittleEndian::IsLittleEndian() ? 8 * offset : 32 - 8 * offset);
300 }
301 
302 #else
303 
304 typedef const char* EightBytesReference;
305 
306 static inline EightBytesReference GetEightBytesAt(const char* ptr) {
307  return ptr;
308 }
309 
310 static inline uint32 GetUint32AtOffset(const char* v, int offset) {
311  assert(offset >= 0);
312  assert(offset <= 4);
313  return UNALIGNED_LOAD32(v + offset);
314 }
315 
316 #endif
317 
318 // Flat array compression that does not emit the "uncompressed length"
319 // prefix. Compresses "input" string to the "*op" buffer.
320 //
321 // REQUIRES: "input" is at most "kBlockSize" bytes long.
322 // REQUIRES: "op" points to an array of memory that is at least
323 // "MaxCompressedLength(input.size())" in size.
324 // REQUIRES: All elements in "table[0..table_size-1]" are initialized to zero.
325 // REQUIRES: "table_size" is a power of two
326 //
327 // Returns an "end" pointer into "op" buffer.
328 // "end - op" is the compressed size of "input".
329 namespace internal {
330 char* CompressFragment(const char* input,
331  size_t input_size,
332  char* op,
333  uint16* table,
334  const int table_size) {
335  // "ip" is the input pointer, and "op" is the output pointer.
336  const char* ip = input;
337  assert(input_size <= kBlockSize);
338  assert((table_size & (table_size - 1)) == 0); // table must be power of two
339  const int shift = 32 - Bits::Log2Floor(table_size);
340  assert(static_cast<int>(kuint32max >> shift) == table_size - 1);
341  const char* ip_end = input + input_size;
342  const char* base_ip = ip;
343  // Bytes in [next_emit, ip) will be emitted as literal bytes. Or
344  // [next_emit, ip_end) after the main loop.
345  const char* next_emit = ip;
346 
347  const size_t kInputMarginBytes = 15;
348  if (PREDICT_TRUE(input_size >= kInputMarginBytes)) {
349  const char* ip_limit = input + input_size - kInputMarginBytes;
350 
351  for (uint32 next_hash = Hash(++ip, shift); ; ) {
352  assert(next_emit < ip);
353  // The body of this loop calls EmitLiteral once and then EmitCopy one or
354  // more times. (The exception is that when we're close to exhausting
355  // the input we goto emit_remainder.)
356  //
357  // In the first iteration of this loop we're just starting, so
358  // there's nothing to copy, so calling EmitLiteral once is
359  // necessary. And we only start a new iteration when the
360  // current iteration has determined that a call to EmitLiteral will
361  // precede the next call to EmitCopy (if any).
362  //
363  // Step 1: Scan forward in the input looking for a 4-byte-long match.
364  // If we get close to exhausting the input then goto emit_remainder.
365  //
366  // Heuristic match skipping: If 32 bytes are scanned with no matches
367  // found, start looking only at every other byte. If 32 more bytes are
368  // scanned, look at every third byte, etc.. When a match is found,
369  // immediately go back to looking at every byte. This is a small loss
370  // (~5% performance, ~0.1% density) for compressible data due to more
371  // bookkeeping, but for non-compressible data (such as JPEG) it's a huge
372  // win since the compressor quickly "realizes" the data is incompressible
373  // and doesn't bother looking for matches everywhere.
374  //
375  // The "skip" variable keeps track of how many bytes there are since the
376  // last match; dividing it by 32 (ie. right-shifting by five) gives the
377  // number of bytes to move ahead for each iteration.
378  uint32 skip = 32;
379 
380  const char* next_ip = ip;
381  const char* candidate;
382  do {
383  ip = next_ip;
384  uint32 hash = next_hash;
385  assert(hash == Hash(ip, shift));
386  uint32 bytes_between_hash_lookups = skip++ >> 5;
387  next_ip = ip + bytes_between_hash_lookups;
388  if (PREDICT_FALSE(next_ip > ip_limit)) {
389  goto emit_remainder;
390  }
391  next_hash = Hash(next_ip, shift);
392  candidate = base_ip + table[hash];
393  assert(candidate >= base_ip);
394  assert(candidate < ip);
395 
396  table[hash] = ip - base_ip;
397  } while (PREDICT_TRUE(UNALIGNED_LOAD32(ip) !=
398  UNALIGNED_LOAD32(candidate)));
399 
400  // Step 2: A 4-byte match has been found. We'll later see if more
401  // than 4 bytes match. But, prior to the match, input
402  // bytes [next_emit, ip) are unmatched. Emit them as "literal bytes."
403  assert(next_emit + 16 <= ip_end);
404  op = EmitLiteral(op, next_emit, ip - next_emit, true);
405 
406  // Step 3: Call EmitCopy, and then see if another EmitCopy could
407  // be our next move. Repeat until we find no match for the
408  // input immediately after what was consumed by the last EmitCopy call.
409  //
410  // If we exit this loop normally then we need to call EmitLiteral next,
411  // though we don't yet know how big the literal will be. We handle that
412  // by proceeding to the next iteration of the main loop. We also can exit
413  // this loop via goto if we get close to exhausting the input.
414  EightBytesReference input_bytes;
415  uint32 candidate_bytes = 0;
416 
417  do {
418  // We have a 4-byte match at ip, and no need to emit any
419  // "literal bytes" prior to ip.
420  const char* base = ip;
421  int matched = 4 + FindMatchLength(candidate + 4, ip + 4, ip_end);
422  ip += matched;
423  size_t offset = base - candidate;
424  assert(0 == memcmp(base, candidate, matched));
425  op = EmitCopy(op, offset, matched);
426  // We could immediately start working at ip now, but to improve
427  // compression we first update table[Hash(ip - 1, ...)].
428  const char* insert_tail = ip - 1;
429  next_emit = ip;
430  if (PREDICT_FALSE(ip >= ip_limit)) {
431  goto emit_remainder;
432  }
433  input_bytes = GetEightBytesAt(insert_tail);
434  uint32 prev_hash = HashBytes(GetUint32AtOffset(input_bytes, 0), shift);
435  table[prev_hash] = ip - base_ip - 1;
436  uint32 cur_hash = HashBytes(GetUint32AtOffset(input_bytes, 1), shift);
437  candidate = base_ip + table[cur_hash];
438  candidate_bytes = UNALIGNED_LOAD32(candidate);
439  table[cur_hash] = ip - base_ip;
440  } while (GetUint32AtOffset(input_bytes, 1) == candidate_bytes);
441 
442  next_hash = HashBytes(GetUint32AtOffset(input_bytes, 2), shift);
443  ++ip;
444  }
445  }
446 
447  emit_remainder:
448  // Emit the remaining bytes as a literal
449  if (next_emit < ip_end) {
450  op = EmitLiteral(op, next_emit, ip_end - next_emit, false);
451  }
452 
453  return op;
454 }
455 } // end namespace internal
456 
457 // Signature of output types needed by decompression code.
458 // The decompression code is templatized on a type that obeys this
459 // signature so that we do not pay virtual function call overhead in
460 // the middle of a tight decompression loop.
461 //
462 // class DecompressionWriter {
463 // public:
464 // // Called before decompression
465 // void SetExpectedLength(size_t length);
466 //
467 // // Called after decompression
468 // bool CheckLength() const;
469 //
470 // // Called repeatedly during decompression
471 // bool Append(const char* ip, size_t length);
472 // bool AppendFromSelf(uint32 offset, size_t length);
473 //
474 // // The rules for how TryFastAppend differs from Append are somewhat
475 // // convoluted:
476 // //
477 // // - TryFastAppend is allowed to decline (return false) at any
478 // // time, for any reason -- just "return false" would be
479 // // a perfectly legal implementation of TryFastAppend.
480 // // The intention is for TryFastAppend to allow a fast path
481 // // in the common case of a small append.
482 // // - TryFastAppend is allowed to read up to <available> bytes
483 // // from the input buffer, whereas Append is allowed to read
484 // // <length>. However, if it returns true, it must leave
485 // // at least five (kMaximumTagLength) bytes in the input buffer
486 // // afterwards, so that there is always enough space to read the
487 // // next tag without checking for a refill.
488 // // - TryFastAppend must always return decline (return false)
489 // // if <length> is 61 or more, as in this case the literal length is not
490 // // decoded fully. In practice, this should not be a big problem,
491 // // as it is unlikely that one would implement a fast path accepting
492 // // this much data.
493 // //
494 // bool TryFastAppend(const char* ip, size_t available, size_t length);
495 // };
496 
497 // -----------------------------------------------------------------------
498 // Lookup table for decompression code. Generated by ComputeTable() below.
499 // -----------------------------------------------------------------------
500 
501 // Mapping from i in range [0,4] to a mask to extract the bottom 8*i bits
502 static const uint32 wordmask[] = {
503  0u, 0xffu, 0xffffu, 0xffffffu, 0xffffffffu
504 };
505 
506 // Data stored per entry in lookup table:
507 // Range Bits-used Description
508 // ------------------------------------
509 // 1..64 0..7 Literal/copy length encoded in opcode byte
510 // 0..7 8..10 Copy offset encoded in opcode byte / 256
511 // 0..4 11..13 Extra bytes after opcode
512 //
513 // We use eight bits for the length even though 7 would have sufficed
514 // because of efficiency reasons:
515 // (1) Extracting a byte is faster than a bit-field
516 // (2) It properly aligns copy offset so we do not need a <<8
517 static const uint16 char_table[256] = {
518  0x0001, 0x0804, 0x1001, 0x2001, 0x0002, 0x0805, 0x1002, 0x2002,
519  0x0003, 0x0806, 0x1003, 0x2003, 0x0004, 0x0807, 0x1004, 0x2004,
520  0x0005, 0x0808, 0x1005, 0x2005, 0x0006, 0x0809, 0x1006, 0x2006,
521  0x0007, 0x080a, 0x1007, 0x2007, 0x0008, 0x080b, 0x1008, 0x2008,
522  0x0009, 0x0904, 0x1009, 0x2009, 0x000a, 0x0905, 0x100a, 0x200a,
523  0x000b, 0x0906, 0x100b, 0x200b, 0x000c, 0x0907, 0x100c, 0x200c,
524  0x000d, 0x0908, 0x100d, 0x200d, 0x000e, 0x0909, 0x100e, 0x200e,
525  0x000f, 0x090a, 0x100f, 0x200f, 0x0010, 0x090b, 0x1010, 0x2010,
526  0x0011, 0x0a04, 0x1011, 0x2011, 0x0012, 0x0a05, 0x1012, 0x2012,
527  0x0013, 0x0a06, 0x1013, 0x2013, 0x0014, 0x0a07, 0x1014, 0x2014,
528  0x0015, 0x0a08, 0x1015, 0x2015, 0x0016, 0x0a09, 0x1016, 0x2016,
529  0x0017, 0x0a0a, 0x1017, 0x2017, 0x0018, 0x0a0b, 0x1018, 0x2018,
530  0x0019, 0x0b04, 0x1019, 0x2019, 0x001a, 0x0b05, 0x101a, 0x201a,
531  0x001b, 0x0b06, 0x101b, 0x201b, 0x001c, 0x0b07, 0x101c, 0x201c,
532  0x001d, 0x0b08, 0x101d, 0x201d, 0x001e, 0x0b09, 0x101e, 0x201e,
533  0x001f, 0x0b0a, 0x101f, 0x201f, 0x0020, 0x0b0b, 0x1020, 0x2020,
534  0x0021, 0x0c04, 0x1021, 0x2021, 0x0022, 0x0c05, 0x1022, 0x2022,
535  0x0023, 0x0c06, 0x1023, 0x2023, 0x0024, 0x0c07, 0x1024, 0x2024,
536  0x0025, 0x0c08, 0x1025, 0x2025, 0x0026, 0x0c09, 0x1026, 0x2026,
537  0x0027, 0x0c0a, 0x1027, 0x2027, 0x0028, 0x0c0b, 0x1028, 0x2028,
538  0x0029, 0x0d04, 0x1029, 0x2029, 0x002a, 0x0d05, 0x102a, 0x202a,
539  0x002b, 0x0d06, 0x102b, 0x202b, 0x002c, 0x0d07, 0x102c, 0x202c,
540  0x002d, 0x0d08, 0x102d, 0x202d, 0x002e, 0x0d09, 0x102e, 0x202e,
541  0x002f, 0x0d0a, 0x102f, 0x202f, 0x0030, 0x0d0b, 0x1030, 0x2030,
542  0x0031, 0x0e04, 0x1031, 0x2031, 0x0032, 0x0e05, 0x1032, 0x2032,
543  0x0033, 0x0e06, 0x1033, 0x2033, 0x0034, 0x0e07, 0x1034, 0x2034,
544  0x0035, 0x0e08, 0x1035, 0x2035, 0x0036, 0x0e09, 0x1036, 0x2036,
545  0x0037, 0x0e0a, 0x1037, 0x2037, 0x0038, 0x0e0b, 0x1038, 0x2038,
546  0x0039, 0x0f04, 0x1039, 0x2039, 0x003a, 0x0f05, 0x103a, 0x203a,
547  0x003b, 0x0f06, 0x103b, 0x203b, 0x003c, 0x0f07, 0x103c, 0x203c,
548  0x0801, 0x0f08, 0x103d, 0x203d, 0x1001, 0x0f09, 0x103e, 0x203e,
549  0x1801, 0x0f0a, 0x103f, 0x203f, 0x2001, 0x0f0b, 0x1040, 0x2040
550 };
551 
552 // In debug mode, allow optional computation of the table at startup.
553 // Also, check that the decompression table is correct.
554 #ifndef NDEBUG
555 DEFINE_bool(snappy_dump_decompression_table, false,
556  "If true, we print the decompression table at startup.");
557 
558 static uint16 MakeEntry(unsigned int extra,
559  unsigned int len,
560  unsigned int copy_offset) {
561  // Check that all of the fields fit within the allocated space
562  assert(extra == (extra & 0x7)); // At most 3 bits
563  assert(copy_offset == (copy_offset & 0x7)); // At most 3 bits
564  assert(len == (len & 0x7f)); // At most 7 bits
565  return len | (copy_offset << 8) | (extra << 11);
566 }
567 
568 static void ComputeTable() {
569  uint16 dst[256];
570 
571  // Place invalid entries in all places to detect missing initialization
572  int assigned = 0;
573  for (int i = 0; i < 256; i++) {
574  dst[i] = 0xffff;
575  }
576 
577  // Small LITERAL entries. We store (len-1) in the top 6 bits.
578  for (unsigned int len = 1; len <= 60; len++) {
579  dst[LITERAL | ((len-1) << 2)] = MakeEntry(0, len, 0);
580  assigned++;
581  }
582 
583  // Large LITERAL entries. We use 60..63 in the high 6 bits to
584  // encode the number of bytes of length info that follow the opcode.
585  for (unsigned int extra_bytes = 1; extra_bytes <= 4; extra_bytes++) {
586  // We set the length field in the lookup table to 1 because extra
587  // bytes encode len-1.
588  dst[LITERAL | ((extra_bytes+59) << 2)] = MakeEntry(extra_bytes, 1, 0);
589  assigned++;
590  }
591 
592  // COPY_1_BYTE_OFFSET.
593  //
594  // The tag byte in the compressed data stores len-4 in 3 bits, and
595  // offset/256 in 5 bits. offset%256 is stored in the next byte.
596  //
597  // This format is used for length in range [4..11] and offset in
598  // range [0..2047]
599  for (unsigned int len = 4; len < 12; len++) {
600  for (unsigned int offset = 0; offset < 2048; offset += 256) {
601  dst[COPY_1_BYTE_OFFSET | ((len-4)<<2) | ((offset>>8)<<5)] =
602  MakeEntry(1, len, offset>>8);
603  assigned++;
604  }
605  }
606 
607  // COPY_2_BYTE_OFFSET.
608  // Tag contains len-1 in top 6 bits, and offset in next two bytes.
609  for (unsigned int len = 1; len <= 64; len++) {
610  dst[COPY_2_BYTE_OFFSET | ((len-1)<<2)] = MakeEntry(2, len, 0);
611  assigned++;
612  }
613 
614  // COPY_4_BYTE_OFFSET.
615  // Tag contents len-1 in top 6 bits, and offset in next four bytes.
616  for (unsigned int len = 1; len <= 64; len++) {
617  dst[COPY_4_BYTE_OFFSET | ((len-1)<<2)] = MakeEntry(4, len, 0);
618  assigned++;
619  }
620 
621  // Check that each entry was initialized exactly once.
622  if (assigned != 256) {
623  fprintf(stderr, "ComputeTable: assigned only %d of 256\n", assigned);
624  abort();
625  }
626  for (int i = 0; i < 256; i++) {
627  if (dst[i] == 0xffff) {
628  fprintf(stderr, "ComputeTable: did not assign byte %d\n", i);
629  abort();
630  }
631  }
632 
633  if (FLAGS_snappy_dump_decompression_table) {
634  printf("static const uint16 char_table[256] = {\n ");
635  for (int i = 0; i < 256; i++) {
636  printf("0x%04x%s",
637  dst[i],
638  ((i == 255) ? "\n" : (((i%8) == 7) ? ",\n " : ", ")));
639  }
640  printf("};\n");
641  }
642 
643  // Check that computed table matched recorded table
644  for (int i = 0; i < 256; i++) {
645  if (dst[i] != char_table[i]) {
646  fprintf(stderr, "ComputeTable: byte %d: computed (%x), expect (%x)\n",
647  i, static_cast<int>(dst[i]), static_cast<int>(char_table[i]));
648  abort();
649  }
650  }
651 }
652 #endif /* !NDEBUG */
653 
654 // Helper class for decompression
656  private:
657  Source* reader_; // Underlying source of bytes to decompress
658  const char* ip_; // Points to next buffered byte
659  const char* ip_limit_; // Points just past buffered bytes
660  uint32 peeked_; // Bytes peeked from reader (need to skip)
661  bool eof_; // Hit end of input without an error?
662  char scratch_[kMaximumTagLength]; // See RefillTag().
663 
664  // Ensure that all of the tag metadata for the next tag is available
665  // in [ip_..ip_limit_-1]. Also ensures that [ip,ip+4] is readable even
666  // if (ip_limit_ - ip_ < 5).
667  //
668  // Returns true on success, false on error or end of input.
669  bool RefillTag();
670 
671  public:
672  explicit SnappyDecompressor(Source* reader)
673  : reader_(reader),
674  ip_(NULL),
675  ip_limit_(NULL),
676  peeked_(0),
677  eof_(false)
678  {
679  }
680 
681  ~SnappyDecompressor() {
682  // Advance past any bytes we peeked at from the reader
683  reader_->Skip(peeked_);
684  }
685 
686  // Returns true iff we have hit the end of the input without an error.
687  bool eof() const {
688  return eof_;
689  }
690 
691  // Read the uncompressed length stored at the start of the compressed data.
692  // On succcess, stores the length in *result and returns true.
693  // On failure, returns false.
694  bool ReadUncompressedLength(uint32* result) {
695  assert(ip_ == NULL); // Must not have read anything yet
696  // Length is encoded in 1..5 bytes
697  *result = 0;
698  uint32 shift = 0;
699  while (true) {
700  if (shift >= 32) return false;
701  size_t n;
702  const char* ip = reader_->Peek(&n);
703  if (n == 0) return false;
704  const unsigned char c = *(reinterpret_cast<const unsigned char*>(ip));
705  reader_->Skip(1);
706  *result |= static_cast<uint32>(c & 0x7f) << shift;
707  if (c < 128) {
708  break;
709  }
710  shift += 7;
711  }
712  return true;
713  }
714 
715  // Process the next item found in the input.
716  // Returns true if successful, false on error or end of input.
717  template <class Writer>
718  void DecompressAllTags(Writer* writer) {
719  const char* ip = ip_;
720 
721  // We could have put this refill fragment only at the beginning of the loop.
722  // However, duplicating it at the end of each branch gives the compiler more
723  // scope to optimize the <ip_limit_ - ip> expression based on the local
724  // context, which overall increases speed.
725  #define MAYBE_REFILL() \
726  if (ip_limit_ - ip < kMaximumTagLength) { \
727  ip_ = ip; \
728  if (!RefillTag()) return; \
729  ip = ip_; \
730  }
731 
732  MAYBE_REFILL();
733  for ( ;; ) {
734  const unsigned char c = *(reinterpret_cast<const unsigned char*>(ip++));
735 
736  if ((c & 0x3) == LITERAL) {
737  size_t literal_length = (c >> 2) + 1u;
738  if (writer->TryFastAppend(ip, ip_limit_ - ip, literal_length)) {
739  assert(literal_length < 61);
740  ip += literal_length;
741  // NOTE(user): There is no MAYBE_REFILL() here, as TryFastAppend()
742  // will not return true unless there's already at least five spare
743  // bytes in addition to the literal.
744  continue;
745  }
746  if (PREDICT_FALSE(literal_length >= 61)) {
747  // Long literal.
748  const size_t literal_length_length = literal_length - 60;
749  literal_length =
750  (LittleEndian::Load32(ip) & wordmask[literal_length_length]) + 1;
751  ip += literal_length_length;
752  }
753 
754  size_t avail = ip_limit_ - ip;
755  while (avail < literal_length) {
756  if (!writer->Append(ip, avail)) return;
757  literal_length -= avail;
758  reader_->Skip(peeked_);
759  size_t n;
760  ip = reader_->Peek(&n);
761  avail = n;
762  peeked_ = avail;
763  if (avail == 0) return; // Premature end of input
764  ip_limit_ = ip + avail;
765  }
766  if (!writer->Append(ip, literal_length)) {
767  return;
768  }
769  ip += literal_length;
770  MAYBE_REFILL();
771  } else {
772  const uint32 entry = char_table[c];
773  const uint32 trailer = LittleEndian::Load32(ip) & wordmask[entry >> 11];
774  const uint32 length = entry & 0xff;
775  ip += entry >> 11;
776 
777  // copy_offset/256 is encoded in bits 8..10. By just fetching
778  // those bits, we get copy_offset (since the bit-field starts at
779  // bit 8).
780  const uint32 copy_offset = entry & 0x700;
781  if (!writer->AppendFromSelf(copy_offset + trailer, length)) {
782  return;
783  }
784  MAYBE_REFILL();
785  }
786  }
787 
788 #undef MAYBE_REFILL
789  }
790 };
791 
792 bool SnappyDecompressor::RefillTag() {
793  const char* ip = ip_;
794  if (ip == ip_limit_) {
795  // Fetch a new fragment from the reader
796  reader_->Skip(peeked_); // All peeked bytes are used up
797  size_t n;
798  ip = reader_->Peek(&n);
799  peeked_ = n;
800  if (n == 0) {
801  eof_ = true;
802  return false;
803  }
804  ip_limit_ = ip + n;
805  }
806 
807  // Read the tag character
808  assert(ip < ip_limit_);
809  const unsigned char c = *(reinterpret_cast<const unsigned char*>(ip));
810  const uint32 entry = char_table[c];
811  const uint32 needed = (entry >> 11) + 1; // +1 byte for 'c'
812  assert(needed <= sizeof(scratch_));
813 
814  // Read more bytes from reader if needed
815  uint32 nbuf = ip_limit_ - ip;
816  if (nbuf < needed) {
817  // Stitch together bytes from ip and reader to form the word
818  // contents. We store the needed bytes in "scratch_". They
819  // will be consumed immediately by the caller since we do not
820  // read more than we need.
821  memmove(scratch_, ip, nbuf);
822  reader_->Skip(peeked_); // All peeked bytes are used up
823  peeked_ = 0;
824  while (nbuf < needed) {
825  size_t length;
826  const char* src = reader_->Peek(&length);
827  if (length == 0) return false;
828  uint32 to_add = min<uint32>(needed - nbuf, length);
829  memcpy(scratch_ + nbuf, src, to_add);
830  nbuf += to_add;
831  reader_->Skip(to_add);
832  }
833  assert(nbuf == needed);
834  ip_ = scratch_;
835  ip_limit_ = scratch_ + needed;
836  } else if (nbuf < kMaximumTagLength) {
837  // Have enough bytes, but move into scratch_ so that we do not
838  // read past end of input
839  memmove(scratch_, ip, nbuf);
840  reader_->Skip(peeked_); // All peeked bytes are used up
841  peeked_ = 0;
842  ip_ = scratch_;
843  ip_limit_ = scratch_ + nbuf;
844  } else {
845  // Pass pointer to buffer returned by reader_.
846  ip_ = ip;
847  }
848  return true;
849 }
850 
851 template <typename Writer>
852 static bool InternalUncompress(Source* r, Writer* writer) {
853  // Read the uncompressed length from the front of the compressed input
854  SnappyDecompressor decompressor(r);
855  uint32 uncompressed_len = 0;
856  if (!decompressor.ReadUncompressedLength(&uncompressed_len)) return false;
857  return InternalUncompressAllTags(&decompressor, writer, uncompressed_len);
858 }
859 
860 template <typename Writer>
861 static bool InternalUncompressAllTags(SnappyDecompressor* decompressor,
862  Writer* writer,
863  uint32 uncompressed_len) {
864  writer->SetExpectedLength(uncompressed_len);
865 
866  // Process the entire input
867  decompressor->DecompressAllTags(writer);
868  writer->Flush();
869  return (decompressor->eof() && writer->CheckLength());
870 }
871 
872 bool GetUncompressedLength(Source* source, uint32* result) {
873  SnappyDecompressor decompressor(source);
874  return decompressor.ReadUncompressedLength(result);
875 }
876 
877 size_t Compress(Source* reader, Sink* writer) {
878  size_t written = 0;
879  size_t N = reader->Available();
880  char ulength[Varint::kMax32];
881  char* p = Varint::Encode32(ulength, N);
882  writer->Append(ulength, p-ulength);
883  written += (p - ulength);
884 
886  char* scratch = NULL;
887  char* scratch_output = NULL;
888 
889  while (N > 0) {
890  // Get next block to compress (without copying if possible)
891  size_t fragment_size;
892  const char* fragment = reader->Peek(&fragment_size);
893  assert(fragment_size != 0); // premature end of input
894  const size_t num_to_read = min(N, kBlockSize);
895  size_t bytes_read = fragment_size;
896 
897  size_t pending_advance = 0;
898  if (bytes_read >= num_to_read) {
899  // Buffer returned by reader is large enough
900  pending_advance = num_to_read;
901  fragment_size = num_to_read;
902  } else {
903  // Read into scratch buffer
904  if (scratch == NULL) {
905  // If this is the last iteration, we want to allocate N bytes
906  // of space, otherwise the max possible kBlockSize space.
907  // num_to_read contains exactly the correct value
908  scratch = new char[num_to_read];
909  }
910  memcpy(scratch, fragment, bytes_read);
911  reader->Skip(bytes_read);
912 
913  while (bytes_read < num_to_read) {
914  fragment = reader->Peek(&fragment_size);
915  size_t n = min<size_t>(fragment_size, num_to_read - bytes_read);
916  memcpy(scratch + bytes_read, fragment, n);
917  bytes_read += n;
918  reader->Skip(n);
919  }
920  assert(bytes_read == num_to_read);
921  fragment = scratch;
922  fragment_size = num_to_read;
923  }
924  assert(fragment_size == num_to_read);
925 
926  // Get encoding table for compression
927  int table_size;
928  uint16* table = wmem.GetHashTable(num_to_read, &table_size);
929 
930  // Compress input_fragment and append to dest
931  const int max_output = MaxCompressedLength(num_to_read);
932 
933  // Need a scratch buffer for the output, in case the byte sink doesn't
934  // have room for us directly.
935  if (scratch_output == NULL) {
936  scratch_output = new char[max_output];
937  } else {
938  // Since we encode kBlockSize regions followed by a region
939  // which is <= kBlockSize in length, a previously allocated
940  // scratch_output[] region is big enough for this iteration.
941  }
942  char* dest = writer->GetAppendBuffer(max_output, scratch_output);
943  char* end = internal::CompressFragment(fragment, fragment_size,
944  dest, table, table_size);
945  writer->Append(dest, end - dest);
946  written += (end - dest);
947 
948  N -= num_to_read;
949  reader->Skip(pending_advance);
950  }
951 
952  delete[] scratch;
953  delete[] scratch_output;
954 
955  return written;
956 }
957 
958 // -----------------------------------------------------------------------
959 // IOVec interfaces
960 // -----------------------------------------------------------------------
961 
962 // A type that writes to an iovec.
963 // Note that this is not a "ByteSink", but a type that matches the
964 // Writer template argument to SnappyDecompressor::DecompressAllTags().
966  private:
967  const struct iovec* output_iov_;
968  const size_t output_iov_count_;
969 
970  // We are currently writing into output_iov_[curr_iov_index_].
971  int curr_iov_index_;
972 
973  // Bytes written to output_iov_[curr_iov_index_] so far.
974  size_t curr_iov_written_;
975 
976  // Total bytes decompressed into output_iov_ so far.
977  size_t total_written_;
978 
979  // Maximum number of bytes that will be decompressed into output_iov_.
980  size_t output_limit_;
981 
982  inline char* GetIOVecPointer(int index, size_t offset) {
983  return reinterpret_cast<char*>(output_iov_[index].iov_base) +
984  offset;
985  }
986 
987  public:
988  // Does not take ownership of iov. iov must be valid during the
989  // entire lifetime of the SnappyIOVecWriter.
990  inline SnappyIOVecWriter(const struct iovec* iov, size_t iov_count)
991  : output_iov_(iov),
992  output_iov_count_(iov_count),
993  curr_iov_index_(0),
994  curr_iov_written_(0),
995  total_written_(0),
996  output_limit_(-1) {
997  }
998 
999  inline void SetExpectedLength(size_t len) {
1000  output_limit_ = len;
1001  }
1002 
1003  inline bool CheckLength() const {
1004  return total_written_ == output_limit_;
1005  }
1006 
1007  inline bool Append(const char* ip, size_t len) {
1008  if (total_written_ + len > output_limit_) {
1009  return false;
1010  }
1011 
1012  while (len > 0) {
1013  assert(curr_iov_written_ <= output_iov_[curr_iov_index_].iov_len);
1014  if (curr_iov_written_ >= output_iov_[curr_iov_index_].iov_len) {
1015  // This iovec is full. Go to the next one.
1016  if (size_t(curr_iov_index_) + 1 >= output_iov_count_) {
1017  return false;
1018  }
1019  curr_iov_written_ = 0;
1020  ++curr_iov_index_;
1021  }
1022 
1023  const size_t to_write = std::min(
1024  len, output_iov_[curr_iov_index_].iov_len - curr_iov_written_);
1025  memcpy(GetIOVecPointer(curr_iov_index_, curr_iov_written_),
1026  ip,
1027  to_write);
1028  curr_iov_written_ += to_write;
1029  total_written_ += to_write;
1030  ip += to_write;
1031  len -= to_write;
1032  }
1033 
1034  return true;
1035  }
1036 
1037  inline bool TryFastAppend(const char* ip, size_t available, size_t len) {
1038  const size_t space_left = output_limit_ - total_written_;
1039  if (len <= 16 && available >= 16 + kMaximumTagLength && space_left >= 16 &&
1040  output_iov_[curr_iov_index_].iov_len - curr_iov_written_ >= 16) {
1041  // Fast path, used for the majority (about 95%) of invocations.
1042  char* ptr = GetIOVecPointer(curr_iov_index_, curr_iov_written_);
1043  UnalignedCopy64(ip, ptr);
1044  UnalignedCopy64(ip + 8, ptr + 8);
1045  curr_iov_written_ += len;
1046  total_written_ += len;
1047  return true;
1048  }
1049 
1050  return false;
1051  }
1052 
1053  inline bool AppendFromSelf(size_t offset, size_t len) {
1054  if (offset > total_written_ || offset == 0) {
1055  return false;
1056  }
1057  const size_t space_left = output_limit_ - total_written_;
1058  if (len > space_left) {
1059  return false;
1060  }
1061 
1062  // Locate the iovec from which we need to start the copy.
1063  int from_iov_index = curr_iov_index_;
1064  size_t from_iov_offset = curr_iov_written_;
1065  while (offset > 0) {
1066  if (from_iov_offset >= offset) {
1067  from_iov_offset -= offset;
1068  break;
1069  }
1070 
1071  offset -= from_iov_offset;
1072  --from_iov_index;
1073  assert(from_iov_index >= 0);
1074  from_iov_offset = output_iov_[from_iov_index].iov_len;
1075  }
1076 
1077  // Copy <len> bytes starting from the iovec pointed to by from_iov_index to
1078  // the current iovec.
1079  while (len > 0) {
1080  assert(from_iov_index <= curr_iov_index_);
1081  if (from_iov_index != curr_iov_index_) {
1082  const size_t to_copy = std::min(
1083  output_iov_[from_iov_index].iov_len - from_iov_offset,
1084  len);
1085  Append(GetIOVecPointer(from_iov_index, from_iov_offset), to_copy);
1086  len -= to_copy;
1087  if (len > 0) {
1088  ++from_iov_index;
1089  from_iov_offset = 0;
1090  }
1091  } else {
1092  assert(curr_iov_written_ <= output_iov_[curr_iov_index_].iov_len);
1093  size_t to_copy = std::min(output_iov_[curr_iov_index_].iov_len -
1094  curr_iov_written_,
1095  len);
1096  if (to_copy == 0) {
1097  // This iovec is full. Go to the next one.
1098  if ( size_t(curr_iov_index_) + 1 >= output_iov_count_) {
1099  return false;
1100  }
1101  ++curr_iov_index_;
1102  curr_iov_written_ = 0;
1103  continue;
1104  }
1105  if (to_copy > len) {
1106  to_copy = len;
1107  }
1108  IncrementalCopy(GetIOVecPointer(from_iov_index, from_iov_offset),
1109  GetIOVecPointer(curr_iov_index_, curr_iov_written_),
1110  to_copy);
1111  curr_iov_written_ += to_copy;
1112  from_iov_offset += to_copy;
1113  total_written_ += to_copy;
1114  len -= to_copy;
1115  }
1116  }
1117 
1118  return true;
1119  }
1120 
1121  inline void Flush() {}
1122 };
1123 
1124 bool RawUncompressToIOVec(const char* compressed, size_t compressed_length,
1125  const struct iovec* iov, size_t iov_cnt) {
1126  ByteArraySource reader(compressed, compressed_length);
1127  return RawUncompressToIOVec(&reader, iov, iov_cnt);
1128 }
1129 
1130 bool RawUncompressToIOVec(Source* compressed, const struct iovec* iov,
1131  size_t iov_cnt) {
1132  SnappyIOVecWriter output(iov, iov_cnt);
1133  return InternalUncompress(compressed, &output);
1134 }
1135 
1136 // -----------------------------------------------------------------------
1137 // Flat array interfaces
1138 // -----------------------------------------------------------------------
1139 
1140 // A type that writes to a flat array.
1141 // Note that this is not a "ByteSink", but a type that matches the
1142 // Writer template argument to SnappyDecompressor::DecompressAllTags().
1144  private:
1145  char* base_;
1146  char* op_;
1147  char* op_limit_;
1148 
1149  public:
1150  inline explicit SnappyArrayWriter(char* dst)
1151  : base_(dst),
1152  op_(dst),
1153  op_limit_(dst) {
1154  }
1155 
1156  inline void SetExpectedLength(size_t len) {
1157  op_limit_ = op_ + len;
1158  }
1159 
1160  inline bool CheckLength() const {
1161  return op_ == op_limit_;
1162  }
1163 
1164  inline bool Append(const char* ip, size_t len) {
1165  char* op = op_;
1166  const size_t space_left = op_limit_ - op;
1167  if (space_left < len) {
1168  return false;
1169  }
1170  memcpy(op, ip, len);
1171  op_ = op + len;
1172  return true;
1173  }
1174 
1175  inline bool TryFastAppend(const char* ip, size_t available, size_t len) {
1176  char* op = op_;
1177  const size_t space_left = op_limit_ - op;
1178  if (len <= 16 && available >= 16 + kMaximumTagLength && space_left >= 16) {
1179  // Fast path, used for the majority (about 95%) of invocations.
1180  UnalignedCopy64(ip, op);
1181  UnalignedCopy64(ip + 8, op + 8);
1182  op_ = op + len;
1183  return true;
1184  } else {
1185  return false;
1186  }
1187  }
1188 
1189  inline bool AppendFromSelf(size_t offset, size_t len) {
1190  char* op = op_;
1191  const size_t space_left = op_limit_ - op;
1192 
1193  // Check if we try to append from before the start of the buffer.
1194  // Normally this would just be a check for "produced < offset",
1195  // but "produced <= offset - 1u" is equivalent for every case
1196  // except the one where offset==0, where the right side will wrap around
1197  // to a very big number. This is convenient, as offset==0 is another
1198  // invalid case that we also want to catch, so that we do not go
1199  // into an infinite loop.
1200  assert(op >= base_);
1201  size_t produced = op - base_;
1202  if (produced <= offset - 1u) {
1203  return false;
1204  }
1205  if (len <= 16 && offset >= 8 && space_left >= 16) {
1206  // Fast path, used for the majority (70-80%) of dynamic invocations.
1207  UnalignedCopy64(op - offset, op);
1208  UnalignedCopy64(op - offset + 8, op + 8);
1209  } else {
1210  if (space_left >= len + kMaxIncrementCopyOverflow) {
1211  IncrementalCopyFastPath(op - offset, op, len);
1212  } else {
1213  if (space_left < len) {
1214  return false;
1215  }
1216  IncrementalCopy(op - offset, op, len);
1217  }
1218  }
1219 
1220  op_ = op + len;
1221  return true;
1222  }
1223  inline size_t Produced() const {
1224  return op_ - base_;
1225  }
1226  inline void Flush() {}
1227 };
1228 
1229 bool RawUncompress(const char* compressed, size_t n, char* uncompressed) {
1230  ByteArraySource reader(compressed, n);
1231  return RawUncompress(&reader, uncompressed);
1232 }
1233 
1234 bool RawUncompress(Source* compressed, char* uncompressed) {
1235  SnappyArrayWriter output(uncompressed);
1236  return InternalUncompress(compressed, &output);
1237 }
1238 
1239 bool Uncompress(const char* compressed, size_t n, string* uncompressed) {
1240  size_t ulength;
1241  if (!GetUncompressedLength(compressed, n, &ulength)) {
1242  return false;
1243  }
1244  // On 32-bit builds: max_size() < kuint32max. Check for that instead
1245  // of crashing (e.g., consider externally specified compressed data).
1246  if (ulength > uncompressed->max_size()) {
1247  return false;
1248  }
1249  STLStringResizeUninitialized(uncompressed, ulength);
1250  return RawUncompress(compressed, n, string_as_array(uncompressed));
1251 }
1252 
1253 // A Writer that drops everything on the floor and just does validation
1255  private:
1256  size_t expected_;
1257  size_t produced_;
1258 
1259  public:
1260  inline SnappyDecompressionValidator() : expected_(0), produced_(0) { }
1261  inline void SetExpectedLength(size_t len) {
1262  expected_ = len;
1263  }
1264  inline bool CheckLength() const {
1265  return expected_ == produced_;
1266  }
1267  inline bool Append(const char* ip, size_t len) {
1268  produced_ += len;
1269  return produced_ <= expected_;
1270  }
1271  inline bool TryFastAppend(const char* ip, size_t available, size_t length) {
1272  return false;
1273  }
1274  inline bool AppendFromSelf(size_t offset, size_t len) {
1275  // See SnappyArrayWriter::AppendFromSelf for an explanation of
1276  // the "offset - 1u" trick.
1277  if (produced_ <= offset - 1u) return false;
1278  produced_ += len;
1279  return produced_ <= expected_;
1280  }
1281  inline void Flush() {}
1282 };
1283 
1284 bool IsValidCompressedBuffer(const char* compressed, size_t n) {
1285  ByteArraySource reader(compressed, n);
1287  return InternalUncompress(&reader, &writer);
1288 }
1289 
1290 bool IsValidCompressed(Source* compressed) {
1292  return InternalUncompress(compressed, &writer);
1293 }
1294 
1295 void RawCompress(const char* input,
1296  size_t input_length,
1297  char* compressed,
1298  size_t* compressed_length) {
1299  ByteArraySource reader(input, input_length);
1300  UncheckedByteArraySink writer(compressed);
1301  Compress(&reader, &writer);
1302 
1303  // Compute how many bytes were added
1304  *compressed_length = (writer.CurrentDestination() - compressed);
1305 }
1306 
1307 size_t Compress(const char* input, size_t input_length, string* compressed) {
1308  // Pre-grow the buffer to the max length of the compressed output
1309  compressed->resize(MaxCompressedLength(input_length));
1310 
1311  size_t compressed_length;
1312  RawCompress(input, input_length, string_as_array(compressed),
1313  &compressed_length);
1314  compressed->resize(compressed_length);
1315  return compressed_length;
1316 }
1317 
1318 // -----------------------------------------------------------------------
1319 // Sink interface
1320 // -----------------------------------------------------------------------
1321 
1322 // A type that decompresses into a Sink. The template parameter
1323 // Allocator must export one method "char* Allocate(int size);", which
1324 // allocates a buffer of "size" and appends that to the destination.
1325 template <typename Allocator>
1327  Allocator allocator_;
1328 
1329  // We need random access into the data generated so far. Therefore
1330  // we keep track of all of the generated data as an array of blocks.
1331  // All of the blocks except the last have length kBlockSize.
1332  vector<char*> blocks_;
1333  size_t expected_;
1334 
1335  // Total size of all fully generated blocks so far
1336  size_t full_size_;
1337 
1338  // Pointer into current output block
1339  char* op_base_; // Base of output block
1340  char* op_ptr_; // Pointer to next unfilled byte in block
1341  char* op_limit_; // Pointer just past block
1342 
1343  inline size_t Size() const {
1344  return full_size_ + (op_ptr_ - op_base_);
1345  }
1346 
1347  bool SlowAppend(const char* ip, size_t len);
1348  bool SlowAppendFromSelf(size_t offset, size_t len);
1349 
1350  public:
1351  inline explicit SnappyScatteredWriter(const Allocator& allocator)
1352  : allocator_(allocator),
1353  expected_(0),
1354  full_size_(0),
1355  op_base_(NULL),
1356  op_ptr_(NULL),
1357  op_limit_(NULL) {
1358  }
1359 
1360  inline void SetExpectedLength(size_t len) {
1361  assert(blocks_.empty());
1362  expected_ = len;
1363  }
1364 
1365  inline bool CheckLength() const {
1366  return Size() == expected_;
1367  }
1368 
1369  // Return the number of bytes actually uncompressed so far
1370  inline size_t Produced() const {
1371  return Size();
1372  }
1373 
1374  inline bool Append(const char* ip, size_t len) {
1375  size_t avail = op_limit_ - op_ptr_;
1376  if (len <= avail) {
1377  // Fast path
1378  memcpy(op_ptr_, ip, len);
1379  op_ptr_ += len;
1380  return true;
1381  } else {
1382  return SlowAppend(ip, len);
1383  }
1384  }
1385 
1386  inline bool TryFastAppend(const char* ip, size_t available, size_t length) {
1387  char* op = op_ptr_;
1388  const int space_left = op_limit_ - op;
1389  if (length <= 16 && available >= 16 + kMaximumTagLength &&
1390  space_left >= 16) {
1391  // Fast path, used for the majority (about 95%) of invocations.
1392  UNALIGNED_STORE64(op, UNALIGNED_LOAD64(ip));
1393  UNALIGNED_STORE64(op + 8, UNALIGNED_LOAD64(ip + 8));
1394  op_ptr_ = op + length;
1395  return true;
1396  } else {
1397  return false;
1398  }
1399  }
1400 
1401  inline bool AppendFromSelf(size_t offset, size_t len) {
1402  // See SnappyArrayWriter::AppendFromSelf for an explanation of
1403  // the "offset - 1u" trick.
1404 
1405 // if (offset - 1u < op_ptr_ - op_base_) { // old original code with warning
1406  if ( offset != 0 && long(offset) <= op_ptr_ - op_base_) { // new code without warning
1407  const size_t space_left = op_limit_ - op_ptr_;
1408  if (space_left >= len + kMaxIncrementCopyOverflow) {
1409  // Fast path: src and dst in current block.
1410  IncrementalCopyFastPath(op_ptr_ - offset, op_ptr_, len);
1411  op_ptr_ += len;
1412  return true;
1413  }
1414  }
1415  return SlowAppendFromSelf(offset, len);
1416  }
1417 
1418  // Called at the end of the decompress. We ask the allocator
1419  // write all blocks to the sink.
1420  inline void Flush() { allocator_.Flush(Produced()); }
1421 };
1422 
1423 template<typename Allocator>
1424 bool SnappyScatteredWriter<Allocator>::SlowAppend(const char* ip, size_t len) {
1425  size_t avail = op_limit_ - op_ptr_;
1426  while (len > avail) {
1427  // Completely fill this block
1428  memcpy(op_ptr_, ip, avail);
1429  op_ptr_ += avail;
1430  assert(op_limit_ - op_ptr_ == 0);
1431  full_size_ += (op_ptr_ - op_base_);
1432  len -= avail;
1433  ip += avail;
1434 
1435  // Bounds check
1436  if (full_size_ + len > expected_) {
1437  return false;
1438  }
1439 
1440  // Make new block
1441  size_t bsize = min<size_t>(kBlockSize, expected_ - full_size_);
1442  op_base_ = allocator_.Allocate(bsize);
1443  op_ptr_ = op_base_;
1444  op_limit_ = op_base_ + bsize;
1445  blocks_.push_back(op_base_);
1446  avail = bsize;
1447  }
1448 
1449  memcpy(op_ptr_, ip, len);
1450  op_ptr_ += len;
1451  return true;
1452 }
1453 
1454 template<typename Allocator>
1456  size_t len) {
1457  // Overflow check
1458  // See SnappyArrayWriter::AppendFromSelf for an explanation of
1459  // the "offset - 1u" trick.
1460  const size_t cur = Size();
1461  if (offset - 1u >= cur) return false;
1462  if (expected_ - cur < len) return false;
1463 
1464  // Currently we shouldn't ever hit this path because Compress() chops the
1465  // input into blocks and does not create cross-block copies. However, it is
1466  // nice if we do not rely on that, since we can get better compression if we
1467  // allow cross-block copies and thus might want to change the compressor in
1468  // the future.
1469  size_t src = cur - offset;
1470  while (len-- > 0) {
1471  char c = blocks_[src >> kBlockLog][src & (kBlockSize-1)];
1472  Append(&c, 1);
1473  src++;
1474  }
1475  return true;
1476 }
1477 
1479  public:
1480  explicit SnappySinkAllocator(Sink* dest): dest_(dest) {}
1481  ~SnappySinkAllocator() {}
1482 
1483  char* Allocate(int size) {
1484  Datablock block(new char[size], size);
1485  blocks_.push_back(block);
1486  return block.data;
1487  }
1488 
1489  // We flush only at the end, because the writer wants
1490  // random access to the blocks and once we hand the
1491  // block over to the sink, we can't access it anymore.
1492  // Also we don't write more than has been actually written
1493  // to the blocks.
1494  void Flush(size_t size) {
1495  size_t size_written = 0;
1496  for (size_t i = 0; i < blocks_.size(); ++i) {
1497  size_t block_size = min<size_t>(blocks_[i].size, size - size_written);
1498  dest_->AppendAndTakeOwnership(blocks_[i].data, block_size,
1499  &SnappySinkAllocator::Deleter, NULL);
1500  size_written += block_size;
1501  }
1502  blocks_.clear();
1503  }
1504 
1505  private:
1506  struct Datablock {
1507  char* data;
1508  size_t size;
1509  Datablock(char* p, size_t s) : data(p), size(s) {}
1510  };
1511 
1512  static void Deleter(void* arg, const char* bytes, size_t size) {
1513  delete[] bytes;
1514  }
1515 
1516  Sink* dest_;
1517  vector<Datablock> blocks_;
1518 
1519  // Note: copying this object is allowed
1520 };
1521 
1522 size_t UncompressAsMuchAsPossible(Source* compressed, Sink* uncompressed) {
1523  SnappySinkAllocator allocator(uncompressed);
1525  InternalUncompress(compressed, &writer);
1526  return writer.Produced();
1527 }
1528 
1529 bool Uncompress(Source* compressed, Sink* uncompressed) {
1530  // Read the uncompressed length from the front of the compressed input
1531  SnappyDecompressor decompressor(compressed);
1532  uint32 uncompressed_len = 0;
1533  if (!decompressor.ReadUncompressedLength(&uncompressed_len)) {
1534  return false;
1535  }
1536 
1537  char c;
1538  size_t allocated_size;
1539  char* buf = uncompressed->GetAppendBufferVariable(
1540  1, uncompressed_len, &c, 1, &allocated_size);
1541 
1542  // If we can get a flat buffer, then use it, otherwise do block by block
1543  // uncompression
1544  if (allocated_size >= uncompressed_len) {
1545  SnappyArrayWriter writer(buf);
1546  bool result = InternalUncompressAllTags(
1547  &decompressor, &writer, uncompressed_len);
1548  uncompressed->Append(buf, writer.Produced());
1549  return result;
1550  } else {
1551  SnappySinkAllocator allocator(uncompressed);
1553  return InternalUncompressAllTags(&decompressor, &writer, uncompressed_len);
1554  }
1555 }
1556 
1557 } // end namespace snappy