encode_best.go 21 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796
  1. // Copyright 2016 The Snappy-Go Authors. All rights reserved.
  2. // Copyright (c) 2019 Klaus Post. All rights reserved.
  3. // Use of this source code is governed by a BSD-style
  4. // license that can be found in the LICENSE file.
  5. package s2
  6. import (
  7. "fmt"
  8. "math"
  9. "math/bits"
  10. )
  11. // encodeBlockBest encodes a non-empty src to a guaranteed-large-enough dst. It
  12. // assumes that the varint-encoded length of the decompressed bytes has already
  13. // been written.
  14. //
  15. // It also assumes that:
  16. //
  17. // len(dst) >= MaxEncodedLen(len(src)) &&
  18. // minNonLiteralBlockSize <= len(src) && len(src) <= maxBlockSize
  19. func encodeBlockBest(dst, src []byte, dict *Dict) (d int) {
  20. // Initialize the hash tables.
  21. const (
  22. // Long hash matches.
  23. lTableBits = 19
  24. maxLTableSize = 1 << lTableBits
  25. // Short hash matches.
  26. sTableBits = 16
  27. maxSTableSize = 1 << sTableBits
  28. inputMargin = 8 + 2
  29. debug = false
  30. )
  31. // sLimit is when to stop looking for offset/length copies. The inputMargin
  32. // lets us use a fast path for emitLiteral in the main loop, while we are
  33. // looking for copies.
  34. sLimit := len(src) - inputMargin
  35. if len(src) < minNonLiteralBlockSize {
  36. return 0
  37. }
  38. sLimitDict := len(src) - inputMargin
  39. if sLimitDict > MaxDictSrcOffset-inputMargin {
  40. sLimitDict = MaxDictSrcOffset - inputMargin
  41. }
  42. var lTable [maxLTableSize]uint64
  43. var sTable [maxSTableSize]uint64
  44. // Bail if we can't compress to at least this.
  45. dstLimit := len(src) - 5
  46. // nextEmit is where in src the next emitLiteral should start from.
  47. nextEmit := 0
  48. // The encoded form must start with a literal, as there are no previous
  49. // bytes to copy, so we start looking for hash matches at s == 1.
  50. s := 1
  51. repeat := 1
  52. if dict != nil {
  53. dict.initBest()
  54. s = 0
  55. repeat = len(dict.dict) - dict.repeat
  56. }
  57. cv := load64(src, s)
  58. // We search for a repeat at -1, but don't output repeats when nextEmit == 0
  59. const lowbitMask = 0xffffffff
  60. getCur := func(x uint64) int {
  61. return int(x & lowbitMask)
  62. }
  63. getPrev := func(x uint64) int {
  64. return int(x >> 32)
  65. }
  66. const maxSkip = 64
  67. for {
  68. type match struct {
  69. offset int
  70. s int
  71. length int
  72. score int
  73. rep, dict bool
  74. }
  75. var best match
  76. for {
  77. // Next src position to check
  78. nextS := (s-nextEmit)>>8 + 1
  79. if nextS > maxSkip {
  80. nextS = s + maxSkip
  81. } else {
  82. nextS += s
  83. }
  84. if nextS > sLimit {
  85. goto emitRemainder
  86. }
  87. if dict != nil && s >= MaxDictSrcOffset {
  88. dict = nil
  89. if repeat > s {
  90. repeat = math.MinInt32
  91. }
  92. }
  93. hashL := hash8(cv, lTableBits)
  94. hashS := hash4(cv, sTableBits)
  95. candidateL := lTable[hashL]
  96. candidateS := sTable[hashS]
  97. score := func(m match) int {
  98. // Matches that are longer forward are penalized since we must emit it as a literal.
  99. score := m.length - m.s
  100. if nextEmit == m.s {
  101. // If we do not have to emit literals, we save 1 byte
  102. score++
  103. }
  104. offset := m.s - m.offset
  105. if m.rep {
  106. return score - emitRepeatSize(offset, m.length)
  107. }
  108. return score - emitCopySize(offset, m.length)
  109. }
  110. matchAt := func(offset, s int, first uint32, rep bool) match {
  111. if best.length != 0 && best.s-best.offset == s-offset {
  112. // Don't retest if we have the same offset.
  113. return match{offset: offset, s: s}
  114. }
  115. if load32(src, offset) != first {
  116. return match{offset: offset, s: s}
  117. }
  118. m := match{offset: offset, s: s, length: 4 + offset, rep: rep}
  119. s += 4
  120. for s < len(src) {
  121. if len(src)-s < 8 {
  122. if src[s] == src[m.length] {
  123. m.length++
  124. s++
  125. continue
  126. }
  127. break
  128. }
  129. if diff := load64(src, s) ^ load64(src, m.length); diff != 0 {
  130. m.length += bits.TrailingZeros64(diff) >> 3
  131. break
  132. }
  133. s += 8
  134. m.length += 8
  135. }
  136. m.length -= offset
  137. m.score = score(m)
  138. if m.score <= -m.s {
  139. // Eliminate if no savings, we might find a better one.
  140. m.length = 0
  141. }
  142. return m
  143. }
  144. matchDict := func(candidate, s int, first uint32, rep bool) match {
  145. if s >= MaxDictSrcOffset {
  146. return match{offset: candidate, s: s}
  147. }
  148. // Calculate offset as if in continuous array with s
  149. offset := -len(dict.dict) + candidate
  150. if best.length != 0 && best.s-best.offset == s-offset && !rep {
  151. // Don't retest if we have the same offset.
  152. return match{offset: offset, s: s}
  153. }
  154. if load32(dict.dict, candidate) != first {
  155. return match{offset: offset, s: s}
  156. }
  157. m := match{offset: offset, s: s, length: 4 + candidate, rep: rep, dict: true}
  158. s += 4
  159. if !rep {
  160. for s < sLimitDict && m.length < len(dict.dict) {
  161. if len(src)-s < 8 || len(dict.dict)-m.length < 8 {
  162. if src[s] == dict.dict[m.length] {
  163. m.length++
  164. s++
  165. continue
  166. }
  167. break
  168. }
  169. if diff := load64(src, s) ^ load64(dict.dict, m.length); diff != 0 {
  170. m.length += bits.TrailingZeros64(diff) >> 3
  171. break
  172. }
  173. s += 8
  174. m.length += 8
  175. }
  176. } else {
  177. for s < len(src) && m.length < len(dict.dict) {
  178. if len(src)-s < 8 || len(dict.dict)-m.length < 8 {
  179. if src[s] == dict.dict[m.length] {
  180. m.length++
  181. s++
  182. continue
  183. }
  184. break
  185. }
  186. if diff := load64(src, s) ^ load64(dict.dict, m.length); diff != 0 {
  187. m.length += bits.TrailingZeros64(diff) >> 3
  188. break
  189. }
  190. s += 8
  191. m.length += 8
  192. }
  193. }
  194. m.length -= candidate
  195. m.score = score(m)
  196. if m.score <= -m.s {
  197. // Eliminate if no savings, we might find a better one.
  198. m.length = 0
  199. }
  200. return m
  201. }
  202. bestOf := func(a, b match) match {
  203. if b.length == 0 {
  204. return a
  205. }
  206. if a.length == 0 {
  207. return b
  208. }
  209. as := a.score + b.s
  210. bs := b.score + a.s
  211. if as >= bs {
  212. return a
  213. }
  214. return b
  215. }
  216. if s > 0 {
  217. best = bestOf(matchAt(getCur(candidateL), s, uint32(cv), false), matchAt(getPrev(candidateL), s, uint32(cv), false))
  218. best = bestOf(best, matchAt(getCur(candidateS), s, uint32(cv), false))
  219. best = bestOf(best, matchAt(getPrev(candidateS), s, uint32(cv), false))
  220. }
  221. if dict != nil {
  222. candidateL := dict.bestTableLong[hashL]
  223. candidateS := dict.bestTableShort[hashS]
  224. best = bestOf(best, matchDict(int(candidateL&0xffff), s, uint32(cv), false))
  225. best = bestOf(best, matchDict(int(candidateL>>16), s, uint32(cv), false))
  226. best = bestOf(best, matchDict(int(candidateS&0xffff), s, uint32(cv), false))
  227. best = bestOf(best, matchDict(int(candidateS>>16), s, uint32(cv), false))
  228. }
  229. {
  230. if (dict == nil || repeat <= s) && repeat > 0 {
  231. best = bestOf(best, matchAt(s-repeat+1, s+1, uint32(cv>>8), true))
  232. } else if s-repeat < -4 && dict != nil {
  233. candidate := len(dict.dict) - (repeat - s)
  234. best = bestOf(best, matchDict(candidate, s, uint32(cv), true))
  235. candidate++
  236. best = bestOf(best, matchDict(candidate, s+1, uint32(cv>>8), true))
  237. }
  238. if best.length > 0 {
  239. hashS := hash4(cv>>8, sTableBits)
  240. // s+1
  241. nextShort := sTable[hashS]
  242. s := s + 1
  243. cv := load64(src, s)
  244. hashL := hash8(cv, lTableBits)
  245. nextLong := lTable[hashL]
  246. best = bestOf(best, matchAt(getCur(nextShort), s, uint32(cv), false))
  247. best = bestOf(best, matchAt(getPrev(nextShort), s, uint32(cv), false))
  248. best = bestOf(best, matchAt(getCur(nextLong), s, uint32(cv), false))
  249. best = bestOf(best, matchAt(getPrev(nextLong), s, uint32(cv), false))
  250. // Dict at + 1
  251. if dict != nil {
  252. candidateL := dict.bestTableLong[hashL]
  253. candidateS := dict.bestTableShort[hashS]
  254. best = bestOf(best, matchDict(int(candidateL&0xffff), s, uint32(cv), false))
  255. best = bestOf(best, matchDict(int(candidateS&0xffff), s, uint32(cv), false))
  256. }
  257. // s+2
  258. if true {
  259. hashS := hash4(cv>>8, sTableBits)
  260. nextShort = sTable[hashS]
  261. s++
  262. cv = load64(src, s)
  263. hashL := hash8(cv, lTableBits)
  264. nextLong = lTable[hashL]
  265. if (dict == nil || repeat <= s) && repeat > 0 {
  266. // Repeat at + 2
  267. best = bestOf(best, matchAt(s-repeat, s, uint32(cv), true))
  268. } else if repeat-s > 4 && dict != nil {
  269. candidate := len(dict.dict) - (repeat - s)
  270. best = bestOf(best, matchDict(candidate, s, uint32(cv), true))
  271. }
  272. best = bestOf(best, matchAt(getCur(nextShort), s, uint32(cv), false))
  273. best = bestOf(best, matchAt(getPrev(nextShort), s, uint32(cv), false))
  274. best = bestOf(best, matchAt(getCur(nextLong), s, uint32(cv), false))
  275. best = bestOf(best, matchAt(getPrev(nextLong), s, uint32(cv), false))
  276. // Dict at +2
  277. // Very small gain
  278. if dict != nil {
  279. candidateL := dict.bestTableLong[hashL]
  280. candidateS := dict.bestTableShort[hashS]
  281. best = bestOf(best, matchDict(int(candidateL&0xffff), s, uint32(cv), false))
  282. best = bestOf(best, matchDict(int(candidateS&0xffff), s, uint32(cv), false))
  283. }
  284. }
  285. // Search for a match at best match end, see if that is better.
  286. // Allow some bytes at the beginning to mismatch.
  287. // Sweet spot is around 1-2 bytes, but depends on input.
  288. // The skipped bytes are tested in Extend backwards,
  289. // and still picked up as part of the match if they do.
  290. const skipBeginning = 2
  291. const skipEnd = 1
  292. if sAt := best.s + best.length - skipEnd; sAt < sLimit {
  293. sBack := best.s + skipBeginning - skipEnd
  294. backL := best.length - skipBeginning
  295. // Load initial values
  296. cv = load64(src, sBack)
  297. // Grab candidates...
  298. next := lTable[hash8(load64(src, sAt), lTableBits)]
  299. if checkAt := getCur(next) - backL; checkAt > 0 {
  300. best = bestOf(best, matchAt(checkAt, sBack, uint32(cv), false))
  301. }
  302. if checkAt := getPrev(next) - backL; checkAt > 0 {
  303. best = bestOf(best, matchAt(checkAt, sBack, uint32(cv), false))
  304. }
  305. // Disabled: Extremely small gain
  306. if false {
  307. next = sTable[hash4(load64(src, sAt), sTableBits)]
  308. if checkAt := getCur(next) - backL; checkAt > 0 {
  309. best = bestOf(best, matchAt(checkAt, sBack, uint32(cv), false))
  310. }
  311. if checkAt := getPrev(next) - backL; checkAt > 0 {
  312. best = bestOf(best, matchAt(checkAt, sBack, uint32(cv), false))
  313. }
  314. }
  315. }
  316. }
  317. }
  318. // Update table
  319. lTable[hashL] = uint64(s) | candidateL<<32
  320. sTable[hashS] = uint64(s) | candidateS<<32
  321. if best.length > 0 {
  322. break
  323. }
  324. cv = load64(src, nextS)
  325. s = nextS
  326. }
  327. // Extend backwards, not needed for repeats...
  328. s = best.s
  329. if !best.rep && !best.dict {
  330. for best.offset > 0 && s > nextEmit && src[best.offset-1] == src[s-1] {
  331. best.offset--
  332. best.length++
  333. s--
  334. }
  335. }
  336. if false && best.offset >= s {
  337. panic(fmt.Errorf("t %d >= s %d", best.offset, s))
  338. }
  339. // Bail if we exceed the maximum size.
  340. if d+(s-nextEmit) > dstLimit {
  341. return 0
  342. }
  343. base := s
  344. offset := s - best.offset
  345. s += best.length
  346. if offset > 65535 && s-base <= 5 && !best.rep {
  347. // Bail if the match is equal or worse to the encoding.
  348. s = best.s + 1
  349. if s >= sLimit {
  350. goto emitRemainder
  351. }
  352. cv = load64(src, s)
  353. continue
  354. }
  355. if debug && nextEmit != base {
  356. fmt.Println("EMIT", base-nextEmit, "literals. base-after:", base)
  357. }
  358. d += emitLiteral(dst[d:], src[nextEmit:base])
  359. if best.rep {
  360. if nextEmit > 0 || best.dict {
  361. if debug {
  362. fmt.Println("REPEAT, length", best.length, "offset:", offset, "s-after:", s, "dict:", best.dict, "best:", best)
  363. }
  364. // same as `add := emitCopy(dst[d:], repeat, s-base)` but skips storing offset.
  365. d += emitRepeat(dst[d:], offset, best.length)
  366. } else {
  367. // First match without dict cannot be a repeat.
  368. if debug {
  369. fmt.Println("COPY, length", best.length, "offset:", offset, "s-after:", s, "dict:", best.dict, "best:", best)
  370. }
  371. d += emitCopy(dst[d:], offset, best.length)
  372. }
  373. } else {
  374. if debug {
  375. fmt.Println("COPY, length", best.length, "offset:", offset, "s-after:", s, "dict:", best.dict, "best:", best)
  376. }
  377. d += emitCopy(dst[d:], offset, best.length)
  378. }
  379. repeat = offset
  380. nextEmit = s
  381. if s >= sLimit {
  382. goto emitRemainder
  383. }
  384. if d > dstLimit {
  385. // Do we have space for more, if not bail.
  386. return 0
  387. }
  388. // Fill tables...
  389. for i := best.s + 1; i < s; i++ {
  390. cv0 := load64(src, i)
  391. long0 := hash8(cv0, lTableBits)
  392. short0 := hash4(cv0, sTableBits)
  393. lTable[long0] = uint64(i) | lTable[long0]<<32
  394. sTable[short0] = uint64(i) | sTable[short0]<<32
  395. }
  396. cv = load64(src, s)
  397. }
  398. emitRemainder:
  399. if nextEmit < len(src) {
  400. // Bail if we exceed the maximum size.
  401. if d+len(src)-nextEmit > dstLimit {
  402. return 0
  403. }
  404. if debug && nextEmit != s {
  405. fmt.Println("emitted ", len(src)-nextEmit, "literals")
  406. }
  407. d += emitLiteral(dst[d:], src[nextEmit:])
  408. }
  409. return d
  410. }
  411. // encodeBlockBestSnappy encodes a non-empty src to a guaranteed-large-enough dst. It
  412. // assumes that the varint-encoded length of the decompressed bytes has already
  413. // been written.
  414. //
  415. // It also assumes that:
  416. //
  417. // len(dst) >= MaxEncodedLen(len(src)) &&
  418. // minNonLiteralBlockSize <= len(src) && len(src) <= maxBlockSize
  419. func encodeBlockBestSnappy(dst, src []byte) (d int) {
  420. // Initialize the hash tables.
  421. const (
  422. // Long hash matches.
  423. lTableBits = 19
  424. maxLTableSize = 1 << lTableBits
  425. // Short hash matches.
  426. sTableBits = 16
  427. maxSTableSize = 1 << sTableBits
  428. inputMargin = 8 + 2
  429. )
  430. // sLimit is when to stop looking for offset/length copies. The inputMargin
  431. // lets us use a fast path for emitLiteral in the main loop, while we are
  432. // looking for copies.
  433. sLimit := len(src) - inputMargin
  434. if len(src) < minNonLiteralBlockSize {
  435. return 0
  436. }
  437. var lTable [maxLTableSize]uint64
  438. var sTable [maxSTableSize]uint64
  439. // Bail if we can't compress to at least this.
  440. dstLimit := len(src) - 5
  441. // nextEmit is where in src the next emitLiteral should start from.
  442. nextEmit := 0
  443. // The encoded form must start with a literal, as there are no previous
  444. // bytes to copy, so we start looking for hash matches at s == 1.
  445. s := 1
  446. cv := load64(src, s)
  447. // We search for a repeat at -1, but don't output repeats when nextEmit == 0
  448. repeat := 1
  449. const lowbitMask = 0xffffffff
  450. getCur := func(x uint64) int {
  451. return int(x & lowbitMask)
  452. }
  453. getPrev := func(x uint64) int {
  454. return int(x >> 32)
  455. }
  456. const maxSkip = 64
  457. for {
  458. type match struct {
  459. offset int
  460. s int
  461. length int
  462. score int
  463. }
  464. var best match
  465. for {
  466. // Next src position to check
  467. nextS := (s-nextEmit)>>8 + 1
  468. if nextS > maxSkip {
  469. nextS = s + maxSkip
  470. } else {
  471. nextS += s
  472. }
  473. if nextS > sLimit {
  474. goto emitRemainder
  475. }
  476. hashL := hash8(cv, lTableBits)
  477. hashS := hash4(cv, sTableBits)
  478. candidateL := lTable[hashL]
  479. candidateS := sTable[hashS]
  480. score := func(m match) int {
  481. // Matches that are longer forward are penalized since we must emit it as a literal.
  482. score := m.length - m.s
  483. if nextEmit == m.s {
  484. // If we do not have to emit literals, we save 1 byte
  485. score++
  486. }
  487. offset := m.s - m.offset
  488. return score - emitCopyNoRepeatSize(offset, m.length)
  489. }
  490. matchAt := func(offset, s int, first uint32) match {
  491. if best.length != 0 && best.s-best.offset == s-offset {
  492. // Don't retest if we have the same offset.
  493. return match{offset: offset, s: s}
  494. }
  495. if load32(src, offset) != first {
  496. return match{offset: offset, s: s}
  497. }
  498. m := match{offset: offset, s: s, length: 4 + offset}
  499. s += 4
  500. for s <= sLimit {
  501. if diff := load64(src, s) ^ load64(src, m.length); diff != 0 {
  502. m.length += bits.TrailingZeros64(diff) >> 3
  503. break
  504. }
  505. s += 8
  506. m.length += 8
  507. }
  508. m.length -= offset
  509. m.score = score(m)
  510. if m.score <= -m.s {
  511. // Eliminate if no savings, we might find a better one.
  512. m.length = 0
  513. }
  514. return m
  515. }
  516. bestOf := func(a, b match) match {
  517. if b.length == 0 {
  518. return a
  519. }
  520. if a.length == 0 {
  521. return b
  522. }
  523. as := a.score + b.s
  524. bs := b.score + a.s
  525. if as >= bs {
  526. return a
  527. }
  528. return b
  529. }
  530. best = bestOf(matchAt(getCur(candidateL), s, uint32(cv)), matchAt(getPrev(candidateL), s, uint32(cv)))
  531. best = bestOf(best, matchAt(getCur(candidateS), s, uint32(cv)))
  532. best = bestOf(best, matchAt(getPrev(candidateS), s, uint32(cv)))
  533. {
  534. best = bestOf(best, matchAt(s-repeat+1, s+1, uint32(cv>>8)))
  535. if best.length > 0 {
  536. // s+1
  537. nextShort := sTable[hash4(cv>>8, sTableBits)]
  538. s := s + 1
  539. cv := load64(src, s)
  540. nextLong := lTable[hash8(cv, lTableBits)]
  541. best = bestOf(best, matchAt(getCur(nextShort), s, uint32(cv)))
  542. best = bestOf(best, matchAt(getPrev(nextShort), s, uint32(cv)))
  543. best = bestOf(best, matchAt(getCur(nextLong), s, uint32(cv)))
  544. best = bestOf(best, matchAt(getPrev(nextLong), s, uint32(cv)))
  545. // Repeat at + 2
  546. best = bestOf(best, matchAt(s-repeat+1, s+1, uint32(cv>>8)))
  547. // s+2
  548. if true {
  549. nextShort = sTable[hash4(cv>>8, sTableBits)]
  550. s++
  551. cv = load64(src, s)
  552. nextLong = lTable[hash8(cv, lTableBits)]
  553. best = bestOf(best, matchAt(getCur(nextShort), s, uint32(cv)))
  554. best = bestOf(best, matchAt(getPrev(nextShort), s, uint32(cv)))
  555. best = bestOf(best, matchAt(getCur(nextLong), s, uint32(cv)))
  556. best = bestOf(best, matchAt(getPrev(nextLong), s, uint32(cv)))
  557. }
  558. // Search for a match at best match end, see if that is better.
  559. if sAt := best.s + best.length; sAt < sLimit {
  560. sBack := best.s
  561. backL := best.length
  562. // Load initial values
  563. cv = load64(src, sBack)
  564. // Search for mismatch
  565. next := lTable[hash8(load64(src, sAt), lTableBits)]
  566. //next := sTable[hash4(load64(src, sAt), sTableBits)]
  567. if checkAt := getCur(next) - backL; checkAt > 0 {
  568. best = bestOf(best, matchAt(checkAt, sBack, uint32(cv)))
  569. }
  570. if checkAt := getPrev(next) - backL; checkAt > 0 {
  571. best = bestOf(best, matchAt(checkAt, sBack, uint32(cv)))
  572. }
  573. }
  574. }
  575. }
  576. // Update table
  577. lTable[hashL] = uint64(s) | candidateL<<32
  578. sTable[hashS] = uint64(s) | candidateS<<32
  579. if best.length > 0 {
  580. break
  581. }
  582. cv = load64(src, nextS)
  583. s = nextS
  584. }
  585. // Extend backwards, not needed for repeats...
  586. s = best.s
  587. if true {
  588. for best.offset > 0 && s > nextEmit && src[best.offset-1] == src[s-1] {
  589. best.offset--
  590. best.length++
  591. s--
  592. }
  593. }
  594. if false && best.offset >= s {
  595. panic(fmt.Errorf("t %d >= s %d", best.offset, s))
  596. }
  597. // Bail if we exceed the maximum size.
  598. if d+(s-nextEmit) > dstLimit {
  599. return 0
  600. }
  601. base := s
  602. offset := s - best.offset
  603. s += best.length
  604. if offset > 65535 && s-base <= 5 {
  605. // Bail if the match is equal or worse to the encoding.
  606. s = best.s + 1
  607. if s >= sLimit {
  608. goto emitRemainder
  609. }
  610. cv = load64(src, s)
  611. continue
  612. }
  613. d += emitLiteral(dst[d:], src[nextEmit:base])
  614. d += emitCopyNoRepeat(dst[d:], offset, best.length)
  615. repeat = offset
  616. nextEmit = s
  617. if s >= sLimit {
  618. goto emitRemainder
  619. }
  620. if d > dstLimit {
  621. // Do we have space for more, if not bail.
  622. return 0
  623. }
  624. // Fill tables...
  625. for i := best.s + 1; i < s; i++ {
  626. cv0 := load64(src, i)
  627. long0 := hash8(cv0, lTableBits)
  628. short0 := hash4(cv0, sTableBits)
  629. lTable[long0] = uint64(i) | lTable[long0]<<32
  630. sTable[short0] = uint64(i) | sTable[short0]<<32
  631. }
  632. cv = load64(src, s)
  633. }
  634. emitRemainder:
  635. if nextEmit < len(src) {
  636. // Bail if we exceed the maximum size.
  637. if d+len(src)-nextEmit > dstLimit {
  638. return 0
  639. }
  640. d += emitLiteral(dst[d:], src[nextEmit:])
  641. }
  642. return d
  643. }
  644. // emitCopySize returns the size to encode the offset+length
  645. //
  646. // It assumes that:
  647. //
  648. // 1 <= offset && offset <= math.MaxUint32
  649. // 4 <= length && length <= 1 << 24
  650. func emitCopySize(offset, length int) int {
  651. if offset >= 65536 {
  652. i := 0
  653. if length > 64 {
  654. length -= 64
  655. if length >= 4 {
  656. // Emit remaining as repeats
  657. return 5 + emitRepeatSize(offset, length)
  658. }
  659. i = 5
  660. }
  661. if length == 0 {
  662. return i
  663. }
  664. return i + 5
  665. }
  666. // Offset no more than 2 bytes.
  667. if length > 64 {
  668. if offset < 2048 {
  669. // Emit 8 bytes, then rest as repeats...
  670. return 2 + emitRepeatSize(offset, length-8)
  671. }
  672. // Emit remaining as repeats, at least 4 bytes remain.
  673. return 3 + emitRepeatSize(offset, length-60)
  674. }
  675. if length >= 12 || offset >= 2048 {
  676. return 3
  677. }
  678. // Emit the remaining copy, encoded as 2 bytes.
  679. return 2
  680. }
  681. // emitCopyNoRepeatSize returns the size to encode the offset+length
  682. //
  683. // It assumes that:
  684. //
  685. // 1 <= offset && offset <= math.MaxUint32
  686. // 4 <= length && length <= 1 << 24
  687. func emitCopyNoRepeatSize(offset, length int) int {
  688. if offset >= 65536 {
  689. return 5 + 5*(length/64)
  690. }
  691. // Offset no more than 2 bytes.
  692. if length > 64 {
  693. // Emit remaining as repeats, at least 4 bytes remain.
  694. return 3 + 3*(length/60)
  695. }
  696. if length >= 12 || offset >= 2048 {
  697. return 3
  698. }
  699. // Emit the remaining copy, encoded as 2 bytes.
  700. return 2
  701. }
  702. // emitRepeatSize returns the number of bytes required to encode a repeat.
  703. // Length must be at least 4 and < 1<<24
  704. func emitRepeatSize(offset, length int) int {
  705. // Repeat offset, make length cheaper
  706. if length <= 4+4 || (length < 8+4 && offset < 2048) {
  707. return 2
  708. }
  709. if length < (1<<8)+4+4 {
  710. return 3
  711. }
  712. if length < (1<<16)+(1<<8)+4 {
  713. return 4
  714. }
  715. const maxRepeat = (1 << 24) - 1
  716. length -= (1 << 16) - 4
  717. left := 0
  718. if length > maxRepeat {
  719. left = length - maxRepeat + 4
  720. }
  721. if left > 0 {
  722. return 5 + emitRepeatSize(offset, left)
  723. }
  724. return 5
  725. }