decompress.go 30 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950951952953954955956957958959960961962963964965966967968969970971972973974975976977978979980981982983984985986987988989990991992993994995996997998999100010011002100310041005100610071008100910101011101210131014101510161017101810191020102110221023102410251026102710281029103010311032103310341035103610371038103910401041104210431044104510461047104810491050105110521053105410551056105710581059106010611062106310641065106610671068106910701071107210731074107510761077107810791080108110821083108410851086108710881089109010911092109310941095109610971098109911001101110211031104110511061107110811091110111111121113111411151116111711181119112011211122112311241125112611271128112911301131113211331134113511361137113811391140114111421143114411451146114711481149115011511152115311541155115611571158115911601161116211631164
  1. package huff0
  2. import (
  3. "errors"
  4. "fmt"
  5. "io"
  6. "github.com/klauspost/compress/fse"
  7. )
  8. type dTable struct {
  9. single []dEntrySingle
  10. double []dEntryDouble
  11. }
  12. // single-symbols decoding
  13. type dEntrySingle struct {
  14. entry uint16
  15. }
  16. // double-symbols decoding
  17. type dEntryDouble struct {
  18. seq uint16
  19. nBits uint8
  20. len uint8
  21. }
  22. // Uses special code for all tables that are < 8 bits.
  23. const use8BitTables = true
  24. // ReadTable will read a table from the input.
  25. // The size of the input may be larger than the table definition.
  26. // Any content remaining after the table definition will be returned.
  27. // If no Scratch is provided a new one is allocated.
  28. // The returned Scratch can be used for encoding or decoding input using this table.
  29. func ReadTable(in []byte, s *Scratch) (s2 *Scratch, remain []byte, err error) {
  30. s, err = s.prepare(in)
  31. if err != nil {
  32. return s, nil, err
  33. }
  34. if len(in) <= 1 {
  35. return s, nil, errors.New("input too small for table")
  36. }
  37. iSize := in[0]
  38. in = in[1:]
  39. if iSize >= 128 {
  40. // Uncompressed
  41. oSize := iSize - 127
  42. iSize = (oSize + 1) / 2
  43. if int(iSize) > len(in) {
  44. return s, nil, errors.New("input too small for table")
  45. }
  46. for n := uint8(0); n < oSize; n += 2 {
  47. v := in[n/2]
  48. s.huffWeight[n] = v >> 4
  49. s.huffWeight[n+1] = v & 15
  50. }
  51. s.symbolLen = uint16(oSize)
  52. in = in[iSize:]
  53. } else {
  54. if len(in) < int(iSize) {
  55. return s, nil, fmt.Errorf("input too small for table, want %d bytes, have %d", iSize, len(in))
  56. }
  57. // FSE compressed weights
  58. s.fse.DecompressLimit = 255
  59. hw := s.huffWeight[:]
  60. s.fse.Out = hw
  61. b, err := fse.Decompress(in[:iSize], s.fse)
  62. s.fse.Out = nil
  63. if err != nil {
  64. return s, nil, err
  65. }
  66. if len(b) > 255 {
  67. return s, nil, errors.New("corrupt input: output table too large")
  68. }
  69. s.symbolLen = uint16(len(b))
  70. in = in[iSize:]
  71. }
  72. // collect weight stats
  73. var rankStats [16]uint32
  74. weightTotal := uint32(0)
  75. for _, v := range s.huffWeight[:s.symbolLen] {
  76. if v > tableLogMax {
  77. return s, nil, errors.New("corrupt input: weight too large")
  78. }
  79. v2 := v & 15
  80. rankStats[v2]++
  81. // (1 << (v2-1)) is slower since the compiler cannot prove that v2 isn't 0.
  82. weightTotal += (1 << v2) >> 1
  83. }
  84. if weightTotal == 0 {
  85. return s, nil, errors.New("corrupt input: weights zero")
  86. }
  87. // get last non-null symbol weight (implied, total must be 2^n)
  88. {
  89. tableLog := highBit32(weightTotal) + 1
  90. if tableLog > tableLogMax {
  91. return s, nil, errors.New("corrupt input: tableLog too big")
  92. }
  93. s.actualTableLog = uint8(tableLog)
  94. // determine last weight
  95. {
  96. total := uint32(1) << tableLog
  97. rest := total - weightTotal
  98. verif := uint32(1) << highBit32(rest)
  99. lastWeight := highBit32(rest) + 1
  100. if verif != rest {
  101. // last value must be a clean power of 2
  102. return s, nil, errors.New("corrupt input: last value not power of two")
  103. }
  104. s.huffWeight[s.symbolLen] = uint8(lastWeight)
  105. s.symbolLen++
  106. rankStats[lastWeight]++
  107. }
  108. }
  109. if (rankStats[1] < 2) || (rankStats[1]&1 != 0) {
  110. // by construction : at least 2 elts of rank 1, must be even
  111. return s, nil, errors.New("corrupt input: min elt size, even check failed ")
  112. }
  113. // TODO: Choose between single/double symbol decoding
  114. // Calculate starting value for each rank
  115. {
  116. var nextRankStart uint32
  117. for n := uint8(1); n < s.actualTableLog+1; n++ {
  118. current := nextRankStart
  119. nextRankStart += rankStats[n] << (n - 1)
  120. rankStats[n] = current
  121. }
  122. }
  123. // fill DTable (always full size)
  124. tSize := 1 << tableLogMax
  125. if len(s.dt.single) != tSize {
  126. s.dt.single = make([]dEntrySingle, tSize)
  127. }
  128. cTable := s.prevTable
  129. if cap(cTable) < maxSymbolValue+1 {
  130. cTable = make([]cTableEntry, 0, maxSymbolValue+1)
  131. }
  132. cTable = cTable[:maxSymbolValue+1]
  133. s.prevTable = cTable[:s.symbolLen]
  134. s.prevTableLog = s.actualTableLog
  135. for n, w := range s.huffWeight[:s.symbolLen] {
  136. if w == 0 {
  137. cTable[n] = cTableEntry{
  138. val: 0,
  139. nBits: 0,
  140. }
  141. continue
  142. }
  143. length := (uint32(1) << w) >> 1
  144. d := dEntrySingle{
  145. entry: uint16(s.actualTableLog+1-w) | (uint16(n) << 8),
  146. }
  147. rank := &rankStats[w]
  148. cTable[n] = cTableEntry{
  149. val: uint16(*rank >> (w - 1)),
  150. nBits: uint8(d.entry),
  151. }
  152. single := s.dt.single[*rank : *rank+length]
  153. for i := range single {
  154. single[i] = d
  155. }
  156. *rank += length
  157. }
  158. return s, in, nil
  159. }
  160. // Decompress1X will decompress a 1X encoded stream.
  161. // The length of the supplied input must match the end of a block exactly.
  162. // Before this is called, the table must be initialized with ReadTable unless
  163. // the encoder re-used the table.
  164. // deprecated: Use the stateless Decoder() to get a concurrent version.
  165. func (s *Scratch) Decompress1X(in []byte) (out []byte, err error) {
  166. if cap(s.Out) < s.MaxDecodedSize {
  167. s.Out = make([]byte, s.MaxDecodedSize)
  168. }
  169. s.Out = s.Out[:0:s.MaxDecodedSize]
  170. s.Out, err = s.Decoder().Decompress1X(s.Out, in)
  171. return s.Out, err
  172. }
  173. // Decompress4X will decompress a 4X encoded stream.
  174. // Before this is called, the table must be initialized with ReadTable unless
  175. // the encoder re-used the table.
  176. // The length of the supplied input must match the end of a block exactly.
  177. // The destination size of the uncompressed data must be known and provided.
  178. // deprecated: Use the stateless Decoder() to get a concurrent version.
  179. func (s *Scratch) Decompress4X(in []byte, dstSize int) (out []byte, err error) {
  180. if dstSize > s.MaxDecodedSize {
  181. return nil, ErrMaxDecodedSizeExceeded
  182. }
  183. if cap(s.Out) < dstSize {
  184. s.Out = make([]byte, s.MaxDecodedSize)
  185. }
  186. s.Out = s.Out[:0:dstSize]
  187. s.Out, err = s.Decoder().Decompress4X(s.Out, in)
  188. return s.Out, err
  189. }
  190. // Decoder will return a stateless decoder that can be used by multiple
  191. // decompressors concurrently.
  192. // Before this is called, the table must be initialized with ReadTable.
  193. // The Decoder is still linked to the scratch buffer so that cannot be reused.
  194. // However, it is safe to discard the scratch.
  195. func (s *Scratch) Decoder() *Decoder {
  196. return &Decoder{
  197. dt: s.dt,
  198. actualTableLog: s.actualTableLog,
  199. }
  200. }
  201. // Decoder provides stateless decoding.
  202. type Decoder struct {
  203. dt dTable
  204. actualTableLog uint8
  205. }
  206. // Decompress1X will decompress a 1X encoded stream.
  207. // The cap of the output buffer will be the maximum decompressed size.
  208. // The length of the supplied input must match the end of a block exactly.
  209. func (d *Decoder) Decompress1X(dst, src []byte) ([]byte, error) {
  210. if len(d.dt.single) == 0 {
  211. return nil, errors.New("no table loaded")
  212. }
  213. if use8BitTables && d.actualTableLog <= 8 {
  214. return d.decompress1X8Bit(dst, src)
  215. }
  216. var br bitReaderShifted
  217. err := br.init(src)
  218. if err != nil {
  219. return dst, err
  220. }
  221. maxDecodedSize := cap(dst)
  222. dst = dst[:0]
  223. // Avoid bounds check by always having full sized table.
  224. const tlSize = 1 << tableLogMax
  225. const tlMask = tlSize - 1
  226. dt := d.dt.single[:tlSize]
  227. // Use temp table to avoid bound checks/append penalty.
  228. var buf [256]byte
  229. var off uint8
  230. for br.off >= 8 {
  231. br.fillFast()
  232. v := dt[br.peekBitsFast(d.actualTableLog)&tlMask]
  233. br.advance(uint8(v.entry))
  234. buf[off+0] = uint8(v.entry >> 8)
  235. v = dt[br.peekBitsFast(d.actualTableLog)&tlMask]
  236. br.advance(uint8(v.entry))
  237. buf[off+1] = uint8(v.entry >> 8)
  238. // Refill
  239. br.fillFast()
  240. v = dt[br.peekBitsFast(d.actualTableLog)&tlMask]
  241. br.advance(uint8(v.entry))
  242. buf[off+2] = uint8(v.entry >> 8)
  243. v = dt[br.peekBitsFast(d.actualTableLog)&tlMask]
  244. br.advance(uint8(v.entry))
  245. buf[off+3] = uint8(v.entry >> 8)
  246. off += 4
  247. if off == 0 {
  248. if len(dst)+256 > maxDecodedSize {
  249. br.close()
  250. return nil, ErrMaxDecodedSizeExceeded
  251. }
  252. dst = append(dst, buf[:]...)
  253. }
  254. }
  255. if len(dst)+int(off) > maxDecodedSize {
  256. br.close()
  257. return nil, ErrMaxDecodedSizeExceeded
  258. }
  259. dst = append(dst, buf[:off]...)
  260. // br < 8, so uint8 is fine
  261. bitsLeft := uint8(br.off)*8 + 64 - br.bitsRead
  262. for bitsLeft > 0 {
  263. br.fill()
  264. if false && br.bitsRead >= 32 {
  265. if br.off >= 4 {
  266. v := br.in[br.off-4:]
  267. v = v[:4]
  268. low := (uint32(v[0])) | (uint32(v[1]) << 8) | (uint32(v[2]) << 16) | (uint32(v[3]) << 24)
  269. br.value = (br.value << 32) | uint64(low)
  270. br.bitsRead -= 32
  271. br.off -= 4
  272. } else {
  273. for br.off > 0 {
  274. br.value = (br.value << 8) | uint64(br.in[br.off-1])
  275. br.bitsRead -= 8
  276. br.off--
  277. }
  278. }
  279. }
  280. if len(dst) >= maxDecodedSize {
  281. br.close()
  282. return nil, ErrMaxDecodedSizeExceeded
  283. }
  284. v := d.dt.single[br.peekBitsFast(d.actualTableLog)&tlMask]
  285. nBits := uint8(v.entry)
  286. br.advance(nBits)
  287. bitsLeft -= nBits
  288. dst = append(dst, uint8(v.entry>>8))
  289. }
  290. return dst, br.close()
  291. }
  292. // decompress1X8Bit will decompress a 1X encoded stream with tablelog <= 8.
  293. // The cap of the output buffer will be the maximum decompressed size.
  294. // The length of the supplied input must match the end of a block exactly.
  295. func (d *Decoder) decompress1X8Bit(dst, src []byte) ([]byte, error) {
  296. if d.actualTableLog == 8 {
  297. return d.decompress1X8BitExactly(dst, src)
  298. }
  299. var br bitReaderBytes
  300. err := br.init(src)
  301. if err != nil {
  302. return dst, err
  303. }
  304. maxDecodedSize := cap(dst)
  305. dst = dst[:0]
  306. // Avoid bounds check by always having full sized table.
  307. dt := d.dt.single[:256]
  308. // Use temp table to avoid bound checks/append penalty.
  309. var buf [256]byte
  310. var off uint8
  311. shift := (8 - d.actualTableLog) & 7
  312. //fmt.Printf("mask: %b, tl:%d\n", mask, d.actualTableLog)
  313. for br.off >= 4 {
  314. br.fillFast()
  315. v := dt[br.peekByteFast()>>shift]
  316. br.advance(uint8(v.entry))
  317. buf[off+0] = uint8(v.entry >> 8)
  318. v = dt[br.peekByteFast()>>shift]
  319. br.advance(uint8(v.entry))
  320. buf[off+1] = uint8(v.entry >> 8)
  321. v = dt[br.peekByteFast()>>shift]
  322. br.advance(uint8(v.entry))
  323. buf[off+2] = uint8(v.entry >> 8)
  324. v = dt[br.peekByteFast()>>shift]
  325. br.advance(uint8(v.entry))
  326. buf[off+3] = uint8(v.entry >> 8)
  327. off += 4
  328. if off == 0 {
  329. if len(dst)+256 > maxDecodedSize {
  330. br.close()
  331. return nil, ErrMaxDecodedSizeExceeded
  332. }
  333. dst = append(dst, buf[:]...)
  334. }
  335. }
  336. if len(dst)+int(off) > maxDecodedSize {
  337. br.close()
  338. return nil, ErrMaxDecodedSizeExceeded
  339. }
  340. dst = append(dst, buf[:off]...)
  341. // br < 4, so uint8 is fine
  342. bitsLeft := int8(uint8(br.off)*8 + (64 - br.bitsRead))
  343. for bitsLeft > 0 {
  344. if br.bitsRead >= 64-8 {
  345. for br.off > 0 {
  346. br.value |= uint64(br.in[br.off-1]) << (br.bitsRead - 8)
  347. br.bitsRead -= 8
  348. br.off--
  349. }
  350. }
  351. if len(dst) >= maxDecodedSize {
  352. br.close()
  353. return nil, ErrMaxDecodedSizeExceeded
  354. }
  355. v := dt[br.peekByteFast()>>shift]
  356. nBits := uint8(v.entry)
  357. br.advance(nBits)
  358. bitsLeft -= int8(nBits)
  359. dst = append(dst, uint8(v.entry>>8))
  360. }
  361. return dst, br.close()
  362. }
  363. // decompress1X8Bit will decompress a 1X encoded stream with tablelog <= 8.
  364. // The cap of the output buffer will be the maximum decompressed size.
  365. // The length of the supplied input must match the end of a block exactly.
  366. func (d *Decoder) decompress1X8BitExactly(dst, src []byte) ([]byte, error) {
  367. var br bitReaderBytes
  368. err := br.init(src)
  369. if err != nil {
  370. return dst, err
  371. }
  372. maxDecodedSize := cap(dst)
  373. dst = dst[:0]
  374. // Avoid bounds check by always having full sized table.
  375. dt := d.dt.single[:256]
  376. // Use temp table to avoid bound checks/append penalty.
  377. var buf [256]byte
  378. var off uint8
  379. const shift = 0
  380. //fmt.Printf("mask: %b, tl:%d\n", mask, d.actualTableLog)
  381. for br.off >= 4 {
  382. br.fillFast()
  383. v := dt[br.peekByteFast()>>shift]
  384. br.advance(uint8(v.entry))
  385. buf[off+0] = uint8(v.entry >> 8)
  386. v = dt[br.peekByteFast()>>shift]
  387. br.advance(uint8(v.entry))
  388. buf[off+1] = uint8(v.entry >> 8)
  389. v = dt[br.peekByteFast()>>shift]
  390. br.advance(uint8(v.entry))
  391. buf[off+2] = uint8(v.entry >> 8)
  392. v = dt[br.peekByteFast()>>shift]
  393. br.advance(uint8(v.entry))
  394. buf[off+3] = uint8(v.entry >> 8)
  395. off += 4
  396. if off == 0 {
  397. if len(dst)+256 > maxDecodedSize {
  398. br.close()
  399. return nil, ErrMaxDecodedSizeExceeded
  400. }
  401. dst = append(dst, buf[:]...)
  402. }
  403. }
  404. if len(dst)+int(off) > maxDecodedSize {
  405. br.close()
  406. return nil, ErrMaxDecodedSizeExceeded
  407. }
  408. dst = append(dst, buf[:off]...)
  409. // br < 4, so uint8 is fine
  410. bitsLeft := int8(uint8(br.off)*8 + (64 - br.bitsRead))
  411. for bitsLeft > 0 {
  412. if br.bitsRead >= 64-8 {
  413. for br.off > 0 {
  414. br.value |= uint64(br.in[br.off-1]) << (br.bitsRead - 8)
  415. br.bitsRead -= 8
  416. br.off--
  417. }
  418. }
  419. if len(dst) >= maxDecodedSize {
  420. br.close()
  421. return nil, ErrMaxDecodedSizeExceeded
  422. }
  423. v := dt[br.peekByteFast()>>shift]
  424. nBits := uint8(v.entry)
  425. br.advance(nBits)
  426. bitsLeft -= int8(nBits)
  427. dst = append(dst, uint8(v.entry>>8))
  428. }
  429. return dst, br.close()
  430. }
  431. // Decompress4X will decompress a 4X encoded stream.
  432. // The length of the supplied input must match the end of a block exactly.
  433. // The *capacity* of the dst slice must match the destination size of
  434. // the uncompressed data exactly.
  435. func (d *Decoder) Decompress4X(dst, src []byte) ([]byte, error) {
  436. if len(d.dt.single) == 0 {
  437. return nil, errors.New("no table loaded")
  438. }
  439. if len(src) < 6+(4*1) {
  440. return nil, errors.New("input too small")
  441. }
  442. if use8BitTables && d.actualTableLog <= 8 {
  443. return d.decompress4X8bit(dst, src)
  444. }
  445. var br [4]bitReaderShifted
  446. start := 6
  447. for i := 0; i < 3; i++ {
  448. length := int(src[i*2]) | (int(src[i*2+1]) << 8)
  449. if start+length >= len(src) {
  450. return nil, errors.New("truncated input (or invalid offset)")
  451. }
  452. err := br[i].init(src[start : start+length])
  453. if err != nil {
  454. return nil, err
  455. }
  456. start += length
  457. }
  458. err := br[3].init(src[start:])
  459. if err != nil {
  460. return nil, err
  461. }
  462. // destination, offset to match first output
  463. dstSize := cap(dst)
  464. dst = dst[:dstSize]
  465. out := dst
  466. dstEvery := (dstSize + 3) / 4
  467. const tlSize = 1 << tableLogMax
  468. const tlMask = tlSize - 1
  469. single := d.dt.single[:tlSize]
  470. // Use temp table to avoid bound checks/append penalty.
  471. var buf [256]byte
  472. var off uint8
  473. var decoded int
  474. // Decode 2 values from each decoder/loop.
  475. const bufoff = 256 / 4
  476. for {
  477. if br[0].off < 4 || br[1].off < 4 || br[2].off < 4 || br[3].off < 4 {
  478. break
  479. }
  480. {
  481. const stream = 0
  482. const stream2 = 1
  483. br[stream].fillFast()
  484. br[stream2].fillFast()
  485. val := br[stream].peekBitsFast(d.actualTableLog)
  486. v := single[val&tlMask]
  487. br[stream].advance(uint8(v.entry))
  488. buf[off+bufoff*stream] = uint8(v.entry >> 8)
  489. val2 := br[stream2].peekBitsFast(d.actualTableLog)
  490. v2 := single[val2&tlMask]
  491. br[stream2].advance(uint8(v2.entry))
  492. buf[off+bufoff*stream2] = uint8(v2.entry >> 8)
  493. val = br[stream].peekBitsFast(d.actualTableLog)
  494. v = single[val&tlMask]
  495. br[stream].advance(uint8(v.entry))
  496. buf[off+bufoff*stream+1] = uint8(v.entry >> 8)
  497. val2 = br[stream2].peekBitsFast(d.actualTableLog)
  498. v2 = single[val2&tlMask]
  499. br[stream2].advance(uint8(v2.entry))
  500. buf[off+bufoff*stream2+1] = uint8(v2.entry >> 8)
  501. }
  502. {
  503. const stream = 2
  504. const stream2 = 3
  505. br[stream].fillFast()
  506. br[stream2].fillFast()
  507. val := br[stream].peekBitsFast(d.actualTableLog)
  508. v := single[val&tlMask]
  509. br[stream].advance(uint8(v.entry))
  510. buf[off+bufoff*stream] = uint8(v.entry >> 8)
  511. val2 := br[stream2].peekBitsFast(d.actualTableLog)
  512. v2 := single[val2&tlMask]
  513. br[stream2].advance(uint8(v2.entry))
  514. buf[off+bufoff*stream2] = uint8(v2.entry >> 8)
  515. val = br[stream].peekBitsFast(d.actualTableLog)
  516. v = single[val&tlMask]
  517. br[stream].advance(uint8(v.entry))
  518. buf[off+bufoff*stream+1] = uint8(v.entry >> 8)
  519. val2 = br[stream2].peekBitsFast(d.actualTableLog)
  520. v2 = single[val2&tlMask]
  521. br[stream2].advance(uint8(v2.entry))
  522. buf[off+bufoff*stream2+1] = uint8(v2.entry >> 8)
  523. }
  524. off += 2
  525. if off == bufoff {
  526. if bufoff > dstEvery {
  527. return nil, errors.New("corruption detected: stream overrun 1")
  528. }
  529. copy(out, buf[:bufoff])
  530. copy(out[dstEvery:], buf[bufoff:bufoff*2])
  531. copy(out[dstEvery*2:], buf[bufoff*2:bufoff*3])
  532. copy(out[dstEvery*3:], buf[bufoff*3:bufoff*4])
  533. off = 0
  534. out = out[bufoff:]
  535. decoded += 256
  536. // There must at least be 3 buffers left.
  537. if len(out) < dstEvery*3 {
  538. return nil, errors.New("corruption detected: stream overrun 2")
  539. }
  540. }
  541. }
  542. if off > 0 {
  543. ioff := int(off)
  544. if len(out) < dstEvery*3+ioff {
  545. return nil, errors.New("corruption detected: stream overrun 3")
  546. }
  547. copy(out, buf[:off])
  548. copy(out[dstEvery:dstEvery+ioff], buf[bufoff:bufoff*2])
  549. copy(out[dstEvery*2:dstEvery*2+ioff], buf[bufoff*2:bufoff*3])
  550. copy(out[dstEvery*3:dstEvery*3+ioff], buf[bufoff*3:bufoff*4])
  551. decoded += int(off) * 4
  552. out = out[off:]
  553. }
  554. // Decode remaining.
  555. for i := range br {
  556. offset := dstEvery * i
  557. br := &br[i]
  558. bitsLeft := br.off*8 + uint(64-br.bitsRead)
  559. for bitsLeft > 0 {
  560. br.fill()
  561. if false && br.bitsRead >= 32 {
  562. if br.off >= 4 {
  563. v := br.in[br.off-4:]
  564. v = v[:4]
  565. low := (uint32(v[0])) | (uint32(v[1]) << 8) | (uint32(v[2]) << 16) | (uint32(v[3]) << 24)
  566. br.value = (br.value << 32) | uint64(low)
  567. br.bitsRead -= 32
  568. br.off -= 4
  569. } else {
  570. for br.off > 0 {
  571. br.value = (br.value << 8) | uint64(br.in[br.off-1])
  572. br.bitsRead -= 8
  573. br.off--
  574. }
  575. }
  576. }
  577. // end inline...
  578. if offset >= len(out) {
  579. return nil, errors.New("corruption detected: stream overrun 4")
  580. }
  581. // Read value and increment offset.
  582. val := br.peekBitsFast(d.actualTableLog)
  583. v := single[val&tlMask].entry
  584. nBits := uint8(v)
  585. br.advance(nBits)
  586. bitsLeft -= uint(nBits)
  587. out[offset] = uint8(v >> 8)
  588. offset++
  589. }
  590. decoded += offset - dstEvery*i
  591. err = br.close()
  592. if err != nil {
  593. return nil, err
  594. }
  595. }
  596. if dstSize != decoded {
  597. return nil, errors.New("corruption detected: short output block")
  598. }
  599. return dst, nil
  600. }
  601. // Decompress4X will decompress a 4X encoded stream.
  602. // The length of the supplied input must match the end of a block exactly.
  603. // The *capacity* of the dst slice must match the destination size of
  604. // the uncompressed data exactly.
  605. func (d *Decoder) decompress4X8bit(dst, src []byte) ([]byte, error) {
  606. if d.actualTableLog == 8 {
  607. return d.decompress4X8bitExactly(dst, src)
  608. }
  609. var br [4]bitReaderBytes
  610. start := 6
  611. for i := 0; i < 3; i++ {
  612. length := int(src[i*2]) | (int(src[i*2+1]) << 8)
  613. if start+length >= len(src) {
  614. return nil, errors.New("truncated input (or invalid offset)")
  615. }
  616. err := br[i].init(src[start : start+length])
  617. if err != nil {
  618. return nil, err
  619. }
  620. start += length
  621. }
  622. err := br[3].init(src[start:])
  623. if err != nil {
  624. return nil, err
  625. }
  626. // destination, offset to match first output
  627. dstSize := cap(dst)
  628. dst = dst[:dstSize]
  629. out := dst
  630. dstEvery := (dstSize + 3) / 4
  631. shift := (8 - d.actualTableLog) & 7
  632. const tlSize = 1 << 8
  633. const tlMask = tlSize - 1
  634. single := d.dt.single[:tlSize]
  635. // Use temp table to avoid bound checks/append penalty.
  636. var buf [256]byte
  637. var off uint8
  638. var decoded int
  639. // Decode 4 values from each decoder/loop.
  640. const bufoff = 256 / 4
  641. for {
  642. if br[0].off < 4 || br[1].off < 4 || br[2].off < 4 || br[3].off < 4 {
  643. break
  644. }
  645. {
  646. // Interleave 2 decodes.
  647. const stream = 0
  648. const stream2 = 1
  649. br[stream].fillFast()
  650. br[stream2].fillFast()
  651. v := single[br[stream].peekByteFast()>>shift].entry
  652. buf[off+bufoff*stream] = uint8(v >> 8)
  653. br[stream].advance(uint8(v))
  654. v2 := single[br[stream2].peekByteFast()>>shift].entry
  655. buf[off+bufoff*stream2] = uint8(v2 >> 8)
  656. br[stream2].advance(uint8(v2))
  657. v = single[br[stream].peekByteFast()>>shift].entry
  658. buf[off+bufoff*stream+1] = uint8(v >> 8)
  659. br[stream].advance(uint8(v))
  660. v2 = single[br[stream2].peekByteFast()>>shift].entry
  661. buf[off+bufoff*stream2+1] = uint8(v2 >> 8)
  662. br[stream2].advance(uint8(v2))
  663. v = single[br[stream].peekByteFast()>>shift].entry
  664. buf[off+bufoff*stream+2] = uint8(v >> 8)
  665. br[stream].advance(uint8(v))
  666. v2 = single[br[stream2].peekByteFast()>>shift].entry
  667. buf[off+bufoff*stream2+2] = uint8(v2 >> 8)
  668. br[stream2].advance(uint8(v2))
  669. v = single[br[stream].peekByteFast()>>shift].entry
  670. buf[off+bufoff*stream+3] = uint8(v >> 8)
  671. br[stream].advance(uint8(v))
  672. v2 = single[br[stream2].peekByteFast()>>shift].entry
  673. buf[off+bufoff*stream2+3] = uint8(v2 >> 8)
  674. br[stream2].advance(uint8(v2))
  675. }
  676. {
  677. const stream = 2
  678. const stream2 = 3
  679. br[stream].fillFast()
  680. br[stream2].fillFast()
  681. v := single[br[stream].peekByteFast()>>shift].entry
  682. buf[off+bufoff*stream] = uint8(v >> 8)
  683. br[stream].advance(uint8(v))
  684. v2 := single[br[stream2].peekByteFast()>>shift].entry
  685. buf[off+bufoff*stream2] = uint8(v2 >> 8)
  686. br[stream2].advance(uint8(v2))
  687. v = single[br[stream].peekByteFast()>>shift].entry
  688. buf[off+bufoff*stream+1] = uint8(v >> 8)
  689. br[stream].advance(uint8(v))
  690. v2 = single[br[stream2].peekByteFast()>>shift].entry
  691. buf[off+bufoff*stream2+1] = uint8(v2 >> 8)
  692. br[stream2].advance(uint8(v2))
  693. v = single[br[stream].peekByteFast()>>shift].entry
  694. buf[off+bufoff*stream+2] = uint8(v >> 8)
  695. br[stream].advance(uint8(v))
  696. v2 = single[br[stream2].peekByteFast()>>shift].entry
  697. buf[off+bufoff*stream2+2] = uint8(v2 >> 8)
  698. br[stream2].advance(uint8(v2))
  699. v = single[br[stream].peekByteFast()>>shift].entry
  700. buf[off+bufoff*stream+3] = uint8(v >> 8)
  701. br[stream].advance(uint8(v))
  702. v2 = single[br[stream2].peekByteFast()>>shift].entry
  703. buf[off+bufoff*stream2+3] = uint8(v2 >> 8)
  704. br[stream2].advance(uint8(v2))
  705. }
  706. off += 4
  707. if off == bufoff {
  708. if bufoff > dstEvery {
  709. return nil, errors.New("corruption detected: stream overrun 1")
  710. }
  711. copy(out, buf[:bufoff])
  712. copy(out[dstEvery:], buf[bufoff:bufoff*2])
  713. copy(out[dstEvery*2:], buf[bufoff*2:bufoff*3])
  714. copy(out[dstEvery*3:], buf[bufoff*3:bufoff*4])
  715. off = 0
  716. out = out[bufoff:]
  717. decoded += 256
  718. // There must at least be 3 buffers left.
  719. if len(out) < dstEvery*3 {
  720. return nil, errors.New("corruption detected: stream overrun 2")
  721. }
  722. }
  723. }
  724. if off > 0 {
  725. ioff := int(off)
  726. if len(out) < dstEvery*3+ioff {
  727. return nil, errors.New("corruption detected: stream overrun 3")
  728. }
  729. copy(out, buf[:off])
  730. copy(out[dstEvery:dstEvery+ioff], buf[bufoff:bufoff*2])
  731. copy(out[dstEvery*2:dstEvery*2+ioff], buf[bufoff*2:bufoff*3])
  732. copy(out[dstEvery*3:dstEvery*3+ioff], buf[bufoff*3:bufoff*4])
  733. decoded += int(off) * 4
  734. out = out[off:]
  735. }
  736. // Decode remaining.
  737. for i := range br {
  738. offset := dstEvery * i
  739. br := &br[i]
  740. bitsLeft := int(br.off*8) + int(64-br.bitsRead)
  741. for bitsLeft > 0 {
  742. if br.finished() {
  743. return nil, io.ErrUnexpectedEOF
  744. }
  745. if br.bitsRead >= 56 {
  746. if br.off >= 4 {
  747. v := br.in[br.off-4:]
  748. v = v[:4]
  749. low := (uint32(v[0])) | (uint32(v[1]) << 8) | (uint32(v[2]) << 16) | (uint32(v[3]) << 24)
  750. br.value |= uint64(low) << (br.bitsRead - 32)
  751. br.bitsRead -= 32
  752. br.off -= 4
  753. } else {
  754. for br.off > 0 {
  755. br.value |= uint64(br.in[br.off-1]) << (br.bitsRead - 8)
  756. br.bitsRead -= 8
  757. br.off--
  758. }
  759. }
  760. }
  761. // end inline...
  762. if offset >= len(out) {
  763. return nil, errors.New("corruption detected: stream overrun 4")
  764. }
  765. // Read value and increment offset.
  766. v := single[br.peekByteFast()>>shift].entry
  767. nBits := uint8(v)
  768. br.advance(nBits)
  769. bitsLeft -= int(nBits)
  770. out[offset] = uint8(v >> 8)
  771. offset++
  772. }
  773. decoded += offset - dstEvery*i
  774. err = br.close()
  775. if err != nil {
  776. return nil, err
  777. }
  778. }
  779. if dstSize != decoded {
  780. return nil, errors.New("corruption detected: short output block")
  781. }
  782. return dst, nil
  783. }
  784. // Decompress4X will decompress a 4X encoded stream.
  785. // The length of the supplied input must match the end of a block exactly.
  786. // The *capacity* of the dst slice must match the destination size of
  787. // the uncompressed data exactly.
  788. func (d *Decoder) decompress4X8bitExactly(dst, src []byte) ([]byte, error) {
  789. var br [4]bitReaderBytes
  790. start := 6
  791. for i := 0; i < 3; i++ {
  792. length := int(src[i*2]) | (int(src[i*2+1]) << 8)
  793. if start+length >= len(src) {
  794. return nil, errors.New("truncated input (or invalid offset)")
  795. }
  796. err := br[i].init(src[start : start+length])
  797. if err != nil {
  798. return nil, err
  799. }
  800. start += length
  801. }
  802. err := br[3].init(src[start:])
  803. if err != nil {
  804. return nil, err
  805. }
  806. // destination, offset to match first output
  807. dstSize := cap(dst)
  808. dst = dst[:dstSize]
  809. out := dst
  810. dstEvery := (dstSize + 3) / 4
  811. const shift = 0
  812. const tlSize = 1 << 8
  813. const tlMask = tlSize - 1
  814. single := d.dt.single[:tlSize]
  815. // Use temp table to avoid bound checks/append penalty.
  816. var buf [256]byte
  817. var off uint8
  818. var decoded int
  819. // Decode 4 values from each decoder/loop.
  820. const bufoff = 256 / 4
  821. for {
  822. if br[0].off < 4 || br[1].off < 4 || br[2].off < 4 || br[3].off < 4 {
  823. break
  824. }
  825. {
  826. // Interleave 2 decodes.
  827. const stream = 0
  828. const stream2 = 1
  829. br[stream].fillFast()
  830. br[stream2].fillFast()
  831. v := single[br[stream].peekByteFast()>>shift].entry
  832. buf[off+bufoff*stream] = uint8(v >> 8)
  833. br[stream].advance(uint8(v))
  834. v2 := single[br[stream2].peekByteFast()>>shift].entry
  835. buf[off+bufoff*stream2] = uint8(v2 >> 8)
  836. br[stream2].advance(uint8(v2))
  837. v = single[br[stream].peekByteFast()>>shift].entry
  838. buf[off+bufoff*stream+1] = uint8(v >> 8)
  839. br[stream].advance(uint8(v))
  840. v2 = single[br[stream2].peekByteFast()>>shift].entry
  841. buf[off+bufoff*stream2+1] = uint8(v2 >> 8)
  842. br[stream2].advance(uint8(v2))
  843. v = single[br[stream].peekByteFast()>>shift].entry
  844. buf[off+bufoff*stream+2] = uint8(v >> 8)
  845. br[stream].advance(uint8(v))
  846. v2 = single[br[stream2].peekByteFast()>>shift].entry
  847. buf[off+bufoff*stream2+2] = uint8(v2 >> 8)
  848. br[stream2].advance(uint8(v2))
  849. v = single[br[stream].peekByteFast()>>shift].entry
  850. buf[off+bufoff*stream+3] = uint8(v >> 8)
  851. br[stream].advance(uint8(v))
  852. v2 = single[br[stream2].peekByteFast()>>shift].entry
  853. buf[off+bufoff*stream2+3] = uint8(v2 >> 8)
  854. br[stream2].advance(uint8(v2))
  855. }
  856. {
  857. const stream = 2
  858. const stream2 = 3
  859. br[stream].fillFast()
  860. br[stream2].fillFast()
  861. v := single[br[stream].peekByteFast()>>shift].entry
  862. buf[off+bufoff*stream] = uint8(v >> 8)
  863. br[stream].advance(uint8(v))
  864. v2 := single[br[stream2].peekByteFast()>>shift].entry
  865. buf[off+bufoff*stream2] = uint8(v2 >> 8)
  866. br[stream2].advance(uint8(v2))
  867. v = single[br[stream].peekByteFast()>>shift].entry
  868. buf[off+bufoff*stream+1] = uint8(v >> 8)
  869. br[stream].advance(uint8(v))
  870. v2 = single[br[stream2].peekByteFast()>>shift].entry
  871. buf[off+bufoff*stream2+1] = uint8(v2 >> 8)
  872. br[stream2].advance(uint8(v2))
  873. v = single[br[stream].peekByteFast()>>shift].entry
  874. buf[off+bufoff*stream+2] = uint8(v >> 8)
  875. br[stream].advance(uint8(v))
  876. v2 = single[br[stream2].peekByteFast()>>shift].entry
  877. buf[off+bufoff*stream2+2] = uint8(v2 >> 8)
  878. br[stream2].advance(uint8(v2))
  879. v = single[br[stream].peekByteFast()>>shift].entry
  880. buf[off+bufoff*stream+3] = uint8(v >> 8)
  881. br[stream].advance(uint8(v))
  882. v2 = single[br[stream2].peekByteFast()>>shift].entry
  883. buf[off+bufoff*stream2+3] = uint8(v2 >> 8)
  884. br[stream2].advance(uint8(v2))
  885. }
  886. off += 4
  887. if off == bufoff {
  888. if bufoff > dstEvery {
  889. return nil, errors.New("corruption detected: stream overrun 1")
  890. }
  891. copy(out, buf[:bufoff])
  892. copy(out[dstEvery:], buf[bufoff:bufoff*2])
  893. copy(out[dstEvery*2:], buf[bufoff*2:bufoff*3])
  894. copy(out[dstEvery*3:], buf[bufoff*3:bufoff*4])
  895. off = 0
  896. out = out[bufoff:]
  897. decoded += 256
  898. // There must at least be 3 buffers left.
  899. if len(out) < dstEvery*3 {
  900. return nil, errors.New("corruption detected: stream overrun 2")
  901. }
  902. }
  903. }
  904. if off > 0 {
  905. ioff := int(off)
  906. if len(out) < dstEvery*3+ioff {
  907. return nil, errors.New("corruption detected: stream overrun 3")
  908. }
  909. copy(out, buf[:off])
  910. copy(out[dstEvery:dstEvery+ioff], buf[bufoff:bufoff*2])
  911. copy(out[dstEvery*2:dstEvery*2+ioff], buf[bufoff*2:bufoff*3])
  912. copy(out[dstEvery*3:dstEvery*3+ioff], buf[bufoff*3:bufoff*4])
  913. decoded += int(off) * 4
  914. out = out[off:]
  915. }
  916. // Decode remaining.
  917. for i := range br {
  918. offset := dstEvery * i
  919. br := &br[i]
  920. bitsLeft := int(br.off*8) + int(64-br.bitsRead)
  921. for bitsLeft > 0 {
  922. if br.finished() {
  923. return nil, io.ErrUnexpectedEOF
  924. }
  925. if br.bitsRead >= 56 {
  926. if br.off >= 4 {
  927. v := br.in[br.off-4:]
  928. v = v[:4]
  929. low := (uint32(v[0])) | (uint32(v[1]) << 8) | (uint32(v[2]) << 16) | (uint32(v[3]) << 24)
  930. br.value |= uint64(low) << (br.bitsRead - 32)
  931. br.bitsRead -= 32
  932. br.off -= 4
  933. } else {
  934. for br.off > 0 {
  935. br.value |= uint64(br.in[br.off-1]) << (br.bitsRead - 8)
  936. br.bitsRead -= 8
  937. br.off--
  938. }
  939. }
  940. }
  941. // end inline...
  942. if offset >= len(out) {
  943. return nil, errors.New("corruption detected: stream overrun 4")
  944. }
  945. // Read value and increment offset.
  946. v := single[br.peekByteFast()>>shift].entry
  947. nBits := uint8(v)
  948. br.advance(nBits)
  949. bitsLeft -= int(nBits)
  950. out[offset] = uint8(v >> 8)
  951. offset++
  952. }
  953. decoded += offset - dstEvery*i
  954. err = br.close()
  955. if err != nil {
  956. return nil, err
  957. }
  958. }
  959. if dstSize != decoded {
  960. return nil, errors.New("corruption detected: short output block")
  961. }
  962. return dst, nil
  963. }
  964. // matches will compare a decoding table to a coding table.
  965. // Errors are written to the writer.
  966. // Nothing will be written if table is ok.
  967. func (s *Scratch) matches(ct cTable, w io.Writer) {
  968. if s == nil || len(s.dt.single) == 0 {
  969. return
  970. }
  971. dt := s.dt.single[:1<<s.actualTableLog]
  972. tablelog := s.actualTableLog
  973. ok := 0
  974. broken := 0
  975. for sym, enc := range ct {
  976. errs := 0
  977. broken++
  978. if enc.nBits == 0 {
  979. for _, dec := range dt {
  980. if uint8(dec.entry>>8) == byte(sym) {
  981. fmt.Fprintf(w, "symbol %x has decoder, but no encoder\n", sym)
  982. errs++
  983. break
  984. }
  985. }
  986. if errs == 0 {
  987. broken--
  988. }
  989. continue
  990. }
  991. // Unused bits in input
  992. ub := tablelog - enc.nBits
  993. top := enc.val << ub
  994. // decoder looks at top bits.
  995. dec := dt[top]
  996. if uint8(dec.entry) != enc.nBits {
  997. fmt.Fprintf(w, "symbol 0x%x bit size mismatch (enc: %d, dec:%d).\n", sym, enc.nBits, uint8(dec.entry))
  998. errs++
  999. }
  1000. if uint8(dec.entry>>8) != uint8(sym) {
  1001. fmt.Fprintf(w, "symbol 0x%x decoder output mismatch (enc: %d, dec:%d).\n", sym, sym, uint8(dec.entry>>8))
  1002. errs++
  1003. }
  1004. if errs > 0 {
  1005. fmt.Fprintf(w, "%d errros in base, stopping\n", errs)
  1006. continue
  1007. }
  1008. // Ensure that all combinations are covered.
  1009. for i := uint16(0); i < (1 << ub); i++ {
  1010. vval := top | i
  1011. dec := dt[vval]
  1012. if uint8(dec.entry) != enc.nBits {
  1013. fmt.Fprintf(w, "symbol 0x%x bit size mismatch (enc: %d, dec:%d).\n", vval, enc.nBits, uint8(dec.entry))
  1014. errs++
  1015. }
  1016. if uint8(dec.entry>>8) != uint8(sym) {
  1017. fmt.Fprintf(w, "symbol 0x%x decoder output mismatch (enc: %d, dec:%d).\n", vval, sym, uint8(dec.entry>>8))
  1018. errs++
  1019. }
  1020. if errs > 20 {
  1021. fmt.Fprintf(w, "%d errros, stopping\n", errs)
  1022. break
  1023. }
  1024. }
  1025. if errs == 0 {
  1026. ok++
  1027. broken--
  1028. }
  1029. }
  1030. if broken > 0 {
  1031. fmt.Fprintf(w, "%d broken, %d ok\n", broken, ok)
  1032. }
  1033. }