Turi Create  4.0
integer_pack.hpp
1 /* Copyright © 2017 Apple Inc. All rights reserved.
2  *
3  * Use of this source code is governed by a BSD-3-clause license that can
4  * be found in the LICENSE.txt file or at https://opensource.org/licenses/BSD-3-Clause
5  */
6 #ifndef TURI_SFRAME_INTEGER_PACK_HPP
7 #define TURI_SFRAME_INTEGER_PACK_HPP
8 #include <cstdint>
9 #include <cstddef>
10 #include <algorithm>
11 #include <core/logging/logger.hpp>
12 #include <core/logging/assertions.hpp>
13 #include <core/storage/sframe_data/integer_pack_impl.hpp>
14 #include <core/util/bitops.hpp>
15 
16 namespace turi {
17 
18 
19 /**
20  * \ingroup sframe_physical
21  * \addtogroup Compression Integer Compression Routines
22  * \{
23  */
24 
25 /**
26  * \internal
27  * Integer Packing Routines
28  */
29 namespace integer_pack {
30 /**
31  * Performs a byte aligned variable length encode of 8 byte wide integers.
32  *
33  * It is important to remember that x86(-64) is little endian. i.e. the
34  * least significant byte is at the lowest memory address.
35  * This makes the presentation a little different from how most other
36  * integer compression blogs which generally "prefix" the binary string
37  * with stuff, thus in the most significant bits. This
38  * makes it very complicated to decode. Here we are instead going to
39  * attach stuff to the lowest significant bits which will then appear as a
40  * prefix when writing to a stream.
41  *
42  * The format is relatively straightforward.
43  * [... binary representation of number .. ] [0] [as many "1" bits as (length of
44  * number in bytes, rounded up. - 1)]
45  *
46  * Essentially this leaves 8 possible encodings
47  * - [ 7 bit number ] [0] --> 1 byte
48  * - [ 14 bit number ] [0] [1] --> 2 bytes
49  * - [ 21 bit number ] [0] [11] --> 3 bytes
50  * - [ 28 bit number ] [0] [111] --> 4 bytes
51  * - [ 35 bit number ] [0] [1111] --> 5 bytes
52  * - [ 42 bit number ] [0] [11111] --> 6 bytes
53  * - [ 49 bit number ] [0] [111111] --> 7 bytes
54  * - [ 64 bit number ] [0] [1111111] --> 9 bytes
55  *
56  * Note that there is no code of total length 8 bytes. (since the suffix has
57  * 7 "1"'s, the suffix is 1 byte long, and the remaining 8 bytes can hold all
58  * 64-bit numbers.
59  *
60  * The basic encode algorithm is then as such:
61  * if s >> [...] == 0 (test if s can be encoded with just that many bits)
62  * Then to put in the suffix, (s << ...) | (...)
63  *
64  * Example codes:
65  * [0001100] [0] -- decodes to --> [00001100] = 12
66  * [0001100] [0] -- decodes to --> [00001100] = 12
67  * [00000010] [001100] [0] [1] -- decodes to --> [00000010,00001100] = 524
68  */
69 template <typename OutArcType>
70 static inline void variable_encode(OutArcType& oarc, uint64_t s) {
71  if ((s >> (8-1)) == 0) {
72  // 0b00000000 - 0b01111111
73  unsigned char trunc_s = s << 1;
74  oarc.direct_assign(trunc_s);
75  }
76  else if ((s >> (16-2)) == 0) {
77  // 0b00000000 00000000 -
78  // 0b00111111 11111111
79  uint16_t trunc_s = s;
80  trunc_s = (trunc_s << 2) | 1;
81  oarc.direct_assign(trunc_s);
82  }
83  else if ((s >> (24-3)) == 0) {
84  // 0b00000000 00000000 00000000 -
85  // 0b00011111 11111111 11111111
86  uint32_t trunc_s = static_cast<uint32_t>(s);
87  trunc_s = (trunc_s << 3) | 3;
88  oarc.write((char*)(&trunc_s), 3);
89  }
90  else if ((s >> (32-4)) == 0) {
91  // 0b00000000 00000000 00000000 00000000 -
92  // 0b00001111 11111111 11111111 11111111
93  uint32_t trunc_s = static_cast<uint32_t>(s);
94  trunc_s = (trunc_s << 4) | 7;
95  oarc.direct_assign(trunc_s);
96  }
97  else if ((s >> (40-5)) == 0) {
98  // 0b00000000 00000000 00000000 00000000 00000000 -
99  // 0b00000111 11111111 11111111 11111111 11111111
100  uint64_t trunc_s = s;
101  trunc_s = (trunc_s << 5) | 15;
102  oarc.write((char*)(&trunc_s), 5);
103  }
104  else if ((s >> (48-6)) == 0) {
105  // 0b00000000 00000000 00000000 00000000 00000000 00000000 -
106  // 0b00000011 11111111 11111111 11111111 11111111 11111111
107  uint64_t trunc_s = s;
108  trunc_s = (trunc_s << 6) | 31;
109  oarc.write((char*)(&trunc_s), 6);
110  }
111  else if ((s >> (56-7)) == 0) {
112  // 0b00000000 00000000 00000000 00000000 00000000 00000000 00000000 -
113  // 0b00000001 11111111 11111111 11111111 11111111 11111111 11111111
114  uint64_t trunc_s = s;
115  trunc_s = (trunc_s << 7) | 63;
116  oarc.write((char*)(&trunc_s), 7);
117  }
118  else {
119  // all 8 bytes
120  unsigned char c = 127;
121  oarc.direct_assign(c);
122  oarc.direct_assign(s);
123  }
124 }
125 
126 /**
127  * Performs a byte aligned variable length decode of 8 byte wide integers.
128  * See the documentation for \ref variable_encode() for details on the encoding.
129  */
130 template <typename InArcType>
131 static inline void variable_decode(InArcType& iarc, uint64_t & s) {
132  union {
133  uint64_t intval;
134  char chars[8];
135  } c;
136  c.intval = 0;
137  iarc.read(c.chars, 1);
138  if ((c.intval & 1) == 0) {
139  s = c.intval >> 1;
140  } else if ((c.intval & 3) == 1) {
141  // read a second byte
142  iarc.read(c.chars + 1, 1);
143  s = c.intval >> 2;
144  } else if ((c.intval & 7) == 3) {
145  // read 3 byte
146  iarc.read(c.chars + 1, 2);
147  s = c.intval >> 3;
148  } else if ((c.intval & 15) == 7) {
149  iarc.read(c.chars + 1, 3);
150  s = c.intval >> 4;
151  } else if ((c.intval & 31) == 15) {
152  iarc.read(c.chars + 1, 4);
153  s = c.intval >> 5;
154  } else if ((c.intval & 63) == 31) {
155  iarc.read(c.chars + 1, 5);
156  s = c.intval >> 6;
157  } else if ((c.intval & 127) == 63) {
158  iarc.read(c.chars + 1, 6);
159  s = c.intval >> 7;
160  } else {
161  // read 8 more bytes
162  iarc.read(c.chars, 8);
163  s = c.intval;
164  }
165 }
166 
167 /**
168  * Maps values [0,-1,1,-2,2,-3,3,-4,4...] to [0,1,2,3,4,5,6,...]
169  * return 2*|val| - sign.
170  *
171  * (This is equivalent Google Protobuf's ZigZag encoding
172  * (see https://developers.google.com/protocol-buffers/docs/encoding#types)
173  * and can be more compactly written as (n << 1) ^ (n >> 63)
174  * (requiring only 3 ops instead of 5 ops here)
175  *
176  */
177 inline uint64_t shifted_integer_encode(int64_t val) {
178  // if val < 0 sign == -1, else sign = 0;
179  int64_t sign = (val >> 63);
180  // turning a negative value into a positive value is ~(t - 1)
181  uint64_t absval = (val + sign) ^ sign; // compute the absolute value
182  return (absval << 1) + sign;
183 }
184 
185 /**
186  * Reverse of shifted_integer_encode.
187  * Maps values [0,1,2,3,4,5,6,...] to [0,-1,1,-2,2,-3,3,-4,4...]
188  */
189 inline int64_t shifted_integer_decode(uint64_t val) {
190  // if val < 0 sign == -1, else sign = 0;
191  int64_t sign = -(int64_t)(val & 1);
192  uint64_t absval = (val >> 1) - sign;
193  // turning a positive value into a negative value is also ~(t - 1)
194  return (absval + sign) ^ sign;
195 }
196 
197 /**
198  * The codec number for "frame of reference" coding.
199  */
200 static constexpr unsigned char FRAME_OF_REFERENCE = 0;
201 
202 /**
203  * The codec number for "frame of reference" delta-coding.
204  */
205 static constexpr unsigned char FRAME_OF_REFERENCE_DELTA = 1;
206 /**
207  * The codec number for "frame of reference" delta-coding with negative numbers.
208  */
209 static constexpr unsigned char FRAME_OF_REFERENCE_DELTA_NEGATIVE = 2;
210 
211 /**
212  * Number of bits used by the header to store the codec number
213  */
214 static constexpr unsigned char FRAME_OF_REFERENCE_HEADER_NUM_BITS = 2;
215 
216 /**
217  * The mask you can apply to the codec header to extract the codec number
218  */
219 static constexpr unsigned char FRAME_OF_REFERENCE_HEADER_MASK = 3;
220 
221 /**
222  * Performs a group encode of a collection of up to 128 64-bit numbers.
223  *
224  * There are 3 basic strategies for computing the code.
225  * - Frame of Reference Coding: (code difference to a minimum value)
226  * Use \ref variable_encode() to code the smallest value. Then compute an
227  * array which is the difference of every number against the smallest value,
228  * and pack it using as few bits as possible. See below for the details
229  * on the packing.
230  * - Frame of Reference Delta Coding: (code incremental gaps)
231  * Use \ref variable_encode() to code the first value. Then compute a delta
232  * array (which is the the difference between consecutive values), and
233  * pack that using as few bits as possible. See below for the details on
234  * the packing.
235  * - Frame of Reference Delta Negative Coding: (Like delta, but supports negative gaps)
236  * Use \ref variable_encode() to code the first value. Then compute a delta
237  * array (which is the the difference between consecutive values), and
238  * apply the \ref shifted_integer_encode() to the delta array, and
239  * pack that using as few bits as possible. See below for the details on
240  * the packing.
241  *
242  * Packing
243  * -------
244  * After the values to be coded are generated (see above), the remaining numbers
245  * are packed by finding the maximum number of bits required to represent
246  * any value.
247  * i.e.
248  *
249  * [0,0,0,0,0,0] --> 0 bit maximum
250  * [1,0,1,0,1,0] --> 1 bit maximum
251  * [1,0,1,0,1,7] --> 3 bits maximum
252  * [1,0,16,0,1,7] --> 5 bits maximum
253  *
254  * This maximum is then rounded up to the nearest power of two (thus ensuring
255  * the coding/decoding is simple and is generally word-aligned.)
256  * i.e.
257  *
258  * [0,0,0,0,0,0] --> 0 bit code
259  * [1,0,1,0,1,0] --> 1 bit code
260  * [1,0,1,0,1,7] --> 4 bit code
261  * [1,0,16,0,1,7] --> 8 bit code
262  *
263  * Then one of the pack_... functions are used to code the values.
264  *
265  * Coding
266  * ------
267  * First there is a 1 byte header:
268  * [6 bit: 1 + log2 code length] [2 bits codec type]
269  * first 6 bits: If code length == 0 , we write 0 here. Otherwise, 1 + log2 code length;
270  * The codec type is one of:
271  * - FRAME_OF_REFERENCE
272  * - FRAME_OF_REFERENCE_DELTA
273  * - FRAME_OF_REFERENCE_DELTA_NEGATIVE
274  *
275  *
276  * \note The coding does not store the number of values stored. The decoder
277  * \ref frame_of_reference_decode_128() requires the number of values to
278  * decode correctly.
279  */
280 template <typename OutArcType>
281 GL_HOT
282 void frame_of_reference_encode_128(const uint64_t* input,
283  size_t len,
284  OutArcType& oarc) {
285  if (len == 0) return;
286  DASSERT_LE(len, 128);
287  // 3 possible encodings
288  // Frame of Reference (encode delta to the minimum value)
289  // Frame of Reference Delta Incremental (encode a minimum value then encode the deltas)
290  // Frame of Reference Delta (encode a minimum value then encode the deltas, supporting negative values)
291  //
292  // In Frame of Reference Delta, negative values are supported by mapping
293  // this sequence (0, -1,1,-2,2,-3,3,-4,4...) to the [0,1,2,3,4,5,6,...]
294  // (Forward mapping is -> if i positive, return 2i,
295  // else if i negative, return -2i - 1)
296  // To do this in bits interestingly, simply involves making the
297  // least significant bit the sign it. i.e.
298  // return (abs(t) << 1) + sgn(t)
299  // Note that the conversion is 1-1.
300  uint64_t minvalue = input[0];
301  uint64_t frame[128];
302  uint64_t delta[128];
303  uint64_t delta_negative[128];
304  unsigned char nbits = 0;
305  unsigned char nbits_frame = 0, nbits_delta = 255, nbits_delta_negative = 255;
306  bool is_incremental = true;
307  for (size_t i = 0;i < len; ++i) {
308  minvalue = std::min(minvalue, input[i]);
309  if (i > 0 && input[i] < input[i-1]) {is_incremental = false; break;}
310  }
311 
312  // Calculate the frame in the same loop as the rest
313  frame[0] = input[0] - minvalue;
314  uint64_t all_or_frame = frame[0];
315 
316  if (is_incremental) {
317  nbits_delta = 0;
318  delta[0] = input[0];
319 
320  uint64_t all_or = 0;
321  for (size_t i = 1; i < len; ++i) {
322  delta[i] = input[i] - input[i-1];
323  all_or |= delta[i];
324  frame[i] = input[i] - minvalue;
325  all_or_frame |= frame[i];
326  }
327 
328  nbits_delta = 64 - n_leading_zeros(all_or);
329  } else {
330  nbits_delta_negative = 0;
331  delta_negative[0] = input[0];
332 
333  uint64_t all_or = 0;
334  for (size_t i = 1;i < len; ++i) {
335  delta_negative[i] = shifted_integer_encode((int64_t)input[i] - (int64_t)input[i-1]);
336  all_or |= delta_negative[i];
337  frame[i] = input[i] - minvalue;
338  all_or_frame |= frame[i];
339  }
340 
341  nbits_delta_negative = 64 - n_leading_zeros(all_or);
342  }
343  nbits_frame = 64 - n_leading_zeros(all_or_frame);
344 
345  // whats the most efficient encoding?
346  unsigned char coding_technique = 0;
347  if (nbits_frame <= nbits_delta && nbits_frame <= nbits_delta_negative) {
348  nbits = nbits_frame;
349  coding_technique = FRAME_OF_REFERENCE;
350  input = frame;
351  } else if (nbits_delta <= nbits_frame && nbits_delta <= nbits_delta_negative) {
352  nbits = nbits_delta;
353  coding_technique = FRAME_OF_REFERENCE_DELTA;
354  input = delta;
355  } else { //if (nbits_delta_negative <= nbits_frame && nbits_delta_negative <= nbits_delta)
356  nbits = nbits_delta_negative;
357  coding_technique = FRAME_OF_REFERENCE_DELTA_NEGATIVE;
358  input = delta_negative;
359  }
360  // encode the header
361  // round nbits to next power of 2.
362  --nbits;
363  nbits = nbits| (nbits >> 1);
364  nbits = nbits| (nbits >> 2);
365  nbits = nbits| (nbits >> 4);
366  ++nbits;
367  unsigned char header = coding_technique;
368  if (nbits > 0) {
369  unsigned char shiftpos = 64 - n_leading_zeros((uint64_t)nbits);
370  header = header + (shiftpos << 2);
371  }
372  oarc.direct_assign(header);
373 // logstream(LOG_INFO) << "Encoding header " << (int)(header) << ": " << len << std::endl;
374  if (coding_technique == FRAME_OF_REFERENCE) {
375  variable_encode(oarc, minvalue);
376  } else if (coding_technique == FRAME_OF_REFERENCE_DELTA ||
377  coding_technique == FRAME_OF_REFERENCE_DELTA_NEGATIVE) {
378  variable_encode(oarc, input[0]);
379  ++input;
380  --len;
381  }
382  if (nbits == 0) return;
383 // logstream(LOG_INFO) << "Encoding at bitrate: " << (int)nbits << std::endl;
384  uint8_t pack[128*8];
385  size_t bytes_used = 0;
386  switch(nbits) {
387  case 1:
388  bytes_used = pack_1(input, len, pack);
389  oarc.write((char*)pack, bytes_used);
390  break;
391  case 2:
392  bytes_used = pack_2(input, len, pack);
393  oarc.write((char*)pack, bytes_used);
394  break;
395  case 4:
396  bytes_used = pack_4(input, len, pack);
397  oarc.write((char*)pack, bytes_used);
398  break;
399  case 8:
400  bytes_used = pack_8(input, len, pack);
401  oarc.write((char*)pack, bytes_used);
402  break;
403  case 16:
404  bytes_used = pack_16(input, len, (uint16_t*)pack);
405  oarc.write((char*)pack, bytes_used);
406  break;
407  case 32:
408  bytes_used = pack_32(input, len, (uint32_t*)pack);
409  oarc.write((char*)pack, bytes_used);
410  break;
411  case 64:
412  oarc.write((char*)input, sizeof(uint64_t)*len);
413  break;
414  default:
415  ASSERT_TRUE(false);
416  __builtin_unreachable();
417  }
418 }
419 
420 
421 
422 /**
423  * Performs a group decode of a collection of up to 128 64-bit numbers.
424  * See \ref frame_of_reference_encode_128() for the encoding details.
425  */
426 template <typename InArcType>
427 void frame_of_reference_decode_128(InArcType& iarc,
428  size_t len,
429  uint64_t* output) {
430  if (len == 0) return;
431  DASSERT_LE(len, 128);
432  unsigned char header;
433  iarc.read_into(header);
434  unsigned char nbits = 0;
435  unsigned char shiftpos = header >> FRAME_OF_REFERENCE_HEADER_NUM_BITS;
436  unsigned char coding_technique = header & FRAME_OF_REFERENCE_HEADER_MASK;
437  uint64_t minvalue;
438  if (shiftpos > 0) nbits = 1 << (shiftpos - 1);
439  if (nbits == 0) {
440  // if nbits is 0 it really doesn't matter what the coding technique is.
441  // All will produce the same output
442  variable_decode(iarc, minvalue);
443  for (size_t i = 0;i < len; ++i) output[i] = minvalue;
444  return;
445  }
446 // logstream(LOG_INFO) << "Decoding header " << (int)(header) << ": " << len << std::endl;
447  if (coding_technique == FRAME_OF_REFERENCE) {
448  variable_decode(iarc, minvalue);
449  } else if (coding_technique == FRAME_OF_REFERENCE_DELTA ||
450  coding_technique == FRAME_OF_REFERENCE_DELTA_NEGATIVE) {
451  variable_decode(iarc, output[0]);
452  ++output;
453  --len;
454  }
455 
456  uint8_t pack[128*8];
457  size_t nbits_to_read = (size_t)(nbits) * len;
458  size_t nbytes_to_read = (nbits_to_read + 7) / 8;
459  switch(nbits) {
460  case 1:
461  iarc.read((char*)pack, nbytes_to_read);
462  unpack_1(pack, len, output);
463  break;
464  case 2:
465  iarc.read((char*)pack, nbytes_to_read);
466  unpack_2(pack, len, output);
467  break;
468  case 4:
469  iarc.read((char*)pack, nbytes_to_read);
470  unpack_4(pack, len, output);
471  break;
472  case 8:
473  iarc.read((char*)pack, nbytes_to_read);
474  unpack_8(pack, len, output);
475  break;
476  case 16:
477  iarc.read((char*)pack, nbytes_to_read);
478  unpack_16((uint16_t*)pack, len, output);
479  break;
480  case 32:
481  iarc.read((char*)pack, nbytes_to_read);
482  unpack_32((uint32_t*)pack, len, output);
483  break;
484  case 64:
485  iarc.read((char*)output, sizeof(uint64_t)*len);
486  break;
487  default:
488  ASSERT_TRUE(false);
489  __builtin_unreachable();
490  }
491 
492 
493  if (coding_technique == FRAME_OF_REFERENCE) {
494  for (size_t i = 0;i < len; ++i) {
495  output[i] += minvalue;
496  }
497  } else if (coding_technique == FRAME_OF_REFERENCE_DELTA) {
498  for (int i = 0;i < (int)len; ++i) {
499  // yes this will will go below 0. yes this is intentional
500  output[i] += output[i-1];
501  }
502  } else if (coding_technique == FRAME_OF_REFERENCE_DELTA_NEGATIVE) {
503  for (int i = 0;i < (int)len; ++i) {
504  output[i] = shifted_integer_decode(output[i]);
505  // yes this will will go below 0. yes this is intentional
506  output[i] += output[i-1];
507  }
508  }
509 }
510 
511 } // namespace integer_pack
512 
513 /// \}
514 } // namespace turi
515 #endif
uint64_t shifted_integer_encode(int64_t val)
static void variable_decode(InArcType &iarc, uint64_t &s)
static void unpack_16(const uint16_t *src, size_t nout_values, uint64_t *out)
static size_t pack_32(const uint64_t *src, size_t srclen, uint32_t *out)
static unsigned int n_leading_zeros(T v, _ENABLE_IF_UINT(T))
Definition: bitops.hpp:287
static constexpr unsigned char FRAME_OF_REFERENCE_HEADER_NUM_BITS
static size_t pack_4(const uint64_t *src, size_t srclen, uint8_t *out)
static size_t pack_8(const uint64_t *src, size_t srclen, uint8_t *out)
void frame_of_reference_decode_128(InArcType &iarc, size_t len, uint64_t *output)
static constexpr unsigned char FRAME_OF_REFERENCE_HEADER_MASK
static void variable_encode(OutArcType &oarc, uint64_t s)
static size_t pack_16(const uint64_t *src, size_t srclen, uint16_t *out)
static size_t pack_2(const uint64_t *src, size_t srclen, uint8_t *out)
static void unpack_1(const uint8_t *src, size_t nout_values, uint64_t *out)
static void unpack_2(const uint8_t *src, size_t nout_values, uint64_t *out)
static void unpack_32(const uint32_t *src, size_t nout_values, uint64_t *out)
static void unpack_8(const uint8_t *src, size_t nout_values, uint64_t *out)
static size_t pack_1(const uint64_t *src, size_t srclen, uint8_t *out)
#define ASSERT_TRUE(cond)
Definition: assertions.hpp:309
GL_HOT void frame_of_reference_encode_128(const uint64_t *input, size_t len, OutArcType &oarc)
static constexpr unsigned char FRAME_OF_REFERENCE_DELTA
static void unpack_4(const uint8_t *src, size_t nout_values, uint64_t *out)
static constexpr unsigned char FRAME_OF_REFERENCE_DELTA_NEGATIVE
int64_t shifted_integer_decode(uint64_t val)
static constexpr unsigned char FRAME_OF_REFERENCE