enc_better.go 34 KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576777879808182838485868788899091929394959697989910010110210310410510610710810911011111211311411511611711811912012112212312412512612712812913013113213313413513613713813914014114214314414514614714814915015115215315415515615715815916016116216316416516616716816917017117217317417517617717817918018118218318418518618718818919019119219319419519619719819920020120220320420520620720820921021121221321421521621721821922022122222322422522622722822923023123223323423523623723823924024124224324424524624724824925025125225325425525625725825926026126226326426526626726826927027127227327427527627727827928028128228328428528628728828929029129229329429529629729829930030130230330430530630730830931031131231331431531631731831932032132232332432532632732832933033133233333433533633733833934034134234334434534634734834935035135235335435535635735835936036136236336436536636736836937037137237337437537637737837938038138238338438538638738838939039139239339439539639739839940040140240340440540640740840941041141241341441541641741841942042142242342442542642742842943043143243343443543643743843944044144244344444544644744844945045145245345445545645745845946046146246346446546646746846947047147247347447547647747847948048148248348448548648748848949049149249349449549649749849950050150250350450550650750850951051151251351451551651751851952052152252352452552652752852953053153253353453553653753853954054154254354454554654754854955055155255355455555655755855956056156256356456556656756856957057157257357457557657757857958058158258358458558658758858959059159259359459559659759859960060160260360460560660760860961061161261361461561661761861962062162262362462562662762862963063163263363463563663763863964064164264364464564664764864965065165265365465565665765865966066166266366466566666766866967067167267367467567667767867968068168268368468568668768868969069169269369469569669769869970070170270370470570670770870971071171271371471571671771871972072172272372472572672772872973073173273373473573673773873974074174274374474574674774874975075175275375475575675775875976076176276376476576676776876977077177277377477577677777877978078178278378478578678778878979079179279379479579679779879980080180280380480580680780880981081181281381481581681781881982082182282382482582682782882983083183283383483583683783883984084184284384484584684784884985085185285385485585685785885986086186286386486586686786886987087187287387487587687787887988088188288388488588688788888989089189289389489589689789889990090190290390490590690790890991091191291391491591691791891992092192292392492592692792892993093193293393493593693793893994094194294394494594694794894995095195295395495595695795895996096196296396496596696796896997097197297397497597697797897998098198298398498598698798898999099199299399499599699799899910001001100210031004100510061007100810091010101110121013101410151016101710181019102010211022102310241025102610271028102910301031103210331034103510361037103810391040104110421043104410451046104710481049105010511052105310541055105610571058105910601061106210631064106510661067106810691070107110721073107410751076107710781079108010811082108310841085108610871088108910901091109210931094109510961097109810991100110111021103110411051106110711081109111011111112111311141115111611171118111911201121112211231124112511261127112811291130113111321133113411351136113711381139114011411142114311441145114611471148114911501151115211531154115511561157115811591160116111621163116411651166116711681169117011711172117311741175117611771178117911801181118211831184118511861187118811891190119111921193119411951196119711981199120012011202120312041205120612071208120912101211121212131214121512161217121812191220122112221223122412251226122712281229123012311232123312341235123612371238123912401241
  1. // Copyright 2019+ Klaus Post. All rights reserved.
  2. // License information can be found in the LICENSE file.
  3. // Based on work by Yann Collet, released under BSD License.
  4. package zstd
  5. import "fmt"
  6. const (
  7. betterLongTableBits = 19 // Bits used in the long match table
  8. betterLongTableSize = 1 << betterLongTableBits // Size of the table
  9. betterLongLen = 8 // Bytes used for table hash
  10. // Note: Increasing the short table bits or making the hash shorter
  11. // can actually lead to compression degradation since it will 'steal' more from the
  12. // long match table and match offsets are quite big.
  13. // This greatly depends on the type of input.
  14. betterShortTableBits = 13 // Bits used in the short match table
  15. betterShortTableSize = 1 << betterShortTableBits // Size of the table
  16. betterShortLen = 5 // Bytes used for table hash
  17. betterLongTableShardCnt = 1 << (betterLongTableBits - dictShardBits) // Number of shards in the table
  18. betterLongTableShardSize = betterLongTableSize / betterLongTableShardCnt // Size of an individual shard
  19. betterShortTableShardCnt = 1 << (betterShortTableBits - dictShardBits) // Number of shards in the table
  20. betterShortTableShardSize = betterShortTableSize / betterShortTableShardCnt // Size of an individual shard
  21. )
  22. type prevEntry struct {
  23. offset int32
  24. prev int32
  25. }
  26. // betterFastEncoder uses 2 tables, one for short matches (5 bytes) and one for long matches.
  27. // The long match table contains the previous entry with the same hash,
  28. // effectively making it a "chain" of length 2.
  29. // When we find a long match we choose between the two values and select the longest.
  30. // When we find a short match, after checking the long, we check if we can find a long at n+1
  31. // and that it is longer (lazy matching).
  32. type betterFastEncoder struct {
  33. fastBase
  34. table [betterShortTableSize]tableEntry
  35. longTable [betterLongTableSize]prevEntry
  36. }
  37. type betterFastEncoderDict struct {
  38. betterFastEncoder
  39. dictTable []tableEntry
  40. dictLongTable []prevEntry
  41. shortTableShardDirty [betterShortTableShardCnt]bool
  42. longTableShardDirty [betterLongTableShardCnt]bool
  43. allDirty bool
  44. }
  45. // Encode improves compression...
  46. func (e *betterFastEncoder) Encode(blk *blockEnc, src []byte) {
  47. const (
  48. // Input margin is the number of bytes we read (8)
  49. // and the maximum we will read ahead (2)
  50. inputMargin = 8 + 2
  51. minNonLiteralBlockSize = 16
  52. )
  53. // Protect against e.cur wraparound.
  54. for e.cur >= e.bufferReset-int32(len(e.hist)) {
  55. if len(e.hist) == 0 {
  56. e.table = [betterShortTableSize]tableEntry{}
  57. e.longTable = [betterLongTableSize]prevEntry{}
  58. e.cur = e.maxMatchOff
  59. break
  60. }
  61. // Shift down everything in the table that isn't already too far away.
  62. minOff := e.cur + int32(len(e.hist)) - e.maxMatchOff
  63. for i := range e.table[:] {
  64. v := e.table[i].offset
  65. if v < minOff {
  66. v = 0
  67. } else {
  68. v = v - e.cur + e.maxMatchOff
  69. }
  70. e.table[i].offset = v
  71. }
  72. for i := range e.longTable[:] {
  73. v := e.longTable[i].offset
  74. v2 := e.longTable[i].prev
  75. if v < minOff {
  76. v = 0
  77. v2 = 0
  78. } else {
  79. v = v - e.cur + e.maxMatchOff
  80. if v2 < minOff {
  81. v2 = 0
  82. } else {
  83. v2 = v2 - e.cur + e.maxMatchOff
  84. }
  85. }
  86. e.longTable[i] = prevEntry{
  87. offset: v,
  88. prev: v2,
  89. }
  90. }
  91. e.cur = e.maxMatchOff
  92. break
  93. }
  94. s := e.addBlock(src)
  95. blk.size = len(src)
  96. if len(src) < minNonLiteralBlockSize {
  97. blk.extraLits = len(src)
  98. blk.literals = blk.literals[:len(src)]
  99. copy(blk.literals, src)
  100. return
  101. }
  102. // Override src
  103. src = e.hist
  104. sLimit := int32(len(src)) - inputMargin
  105. // stepSize is the number of bytes to skip on every main loop iteration.
  106. // It should be >= 1.
  107. const stepSize = 1
  108. const kSearchStrength = 9
  109. // nextEmit is where in src the next emitLiteral should start from.
  110. nextEmit := s
  111. cv := load6432(src, s)
  112. // Relative offsets
  113. offset1 := int32(blk.recentOffsets[0])
  114. offset2 := int32(blk.recentOffsets[1])
  115. addLiterals := func(s *seq, until int32) {
  116. if until == nextEmit {
  117. return
  118. }
  119. blk.literals = append(blk.literals, src[nextEmit:until]...)
  120. s.litLen = uint32(until - nextEmit)
  121. }
  122. if debugEncoder {
  123. println("recent offsets:", blk.recentOffsets)
  124. }
  125. encodeLoop:
  126. for {
  127. var t int32
  128. // We allow the encoder to optionally turn off repeat offsets across blocks
  129. canRepeat := len(blk.sequences) > 2
  130. var matched, index0 int32
  131. for {
  132. if debugAsserts && canRepeat && offset1 == 0 {
  133. panic("offset0 was 0")
  134. }
  135. nextHashL := hashLen(cv, betterLongTableBits, betterLongLen)
  136. nextHashS := hashLen(cv, betterShortTableBits, betterShortLen)
  137. candidateL := e.longTable[nextHashL]
  138. candidateS := e.table[nextHashS]
  139. const repOff = 1
  140. repIndex := s - offset1 + repOff
  141. off := s + e.cur
  142. e.longTable[nextHashL] = prevEntry{offset: off, prev: candidateL.offset}
  143. e.table[nextHashS] = tableEntry{offset: off, val: uint32(cv)}
  144. index0 = s + 1
  145. if canRepeat {
  146. if repIndex >= 0 && load3232(src, repIndex) == uint32(cv>>(repOff*8)) {
  147. // Consider history as well.
  148. var seq seq
  149. lenght := 4 + e.matchlen(s+4+repOff, repIndex+4, src)
  150. seq.matchLen = uint32(lenght - zstdMinMatch)
  151. // We might be able to match backwards.
  152. // Extend as long as we can.
  153. start := s + repOff
  154. // We end the search early, so we don't risk 0 literals
  155. // and have to do special offset treatment.
  156. startLimit := nextEmit + 1
  157. tMin := s - e.maxMatchOff
  158. if tMin < 0 {
  159. tMin = 0
  160. }
  161. for repIndex > tMin && start > startLimit && src[repIndex-1] == src[start-1] && seq.matchLen < maxMatchLength-zstdMinMatch-1 {
  162. repIndex--
  163. start--
  164. seq.matchLen++
  165. }
  166. addLiterals(&seq, start)
  167. // rep 0
  168. seq.offset = 1
  169. if debugSequences {
  170. println("repeat sequence", seq, "next s:", s)
  171. }
  172. blk.sequences = append(blk.sequences, seq)
  173. // Index match start+1 (long) -> s - 1
  174. index0 := s + repOff
  175. s += lenght + repOff
  176. nextEmit = s
  177. if s >= sLimit {
  178. if debugEncoder {
  179. println("repeat ended", s, lenght)
  180. }
  181. break encodeLoop
  182. }
  183. // Index skipped...
  184. for index0 < s-1 {
  185. cv0 := load6432(src, index0)
  186. cv1 := cv0 >> 8
  187. h0 := hashLen(cv0, betterLongTableBits, betterLongLen)
  188. off := index0 + e.cur
  189. e.longTable[h0] = prevEntry{offset: off, prev: e.longTable[h0].offset}
  190. e.table[hashLen(cv1, betterShortTableBits, betterShortLen)] = tableEntry{offset: off + 1, val: uint32(cv1)}
  191. index0 += 2
  192. }
  193. cv = load6432(src, s)
  194. continue
  195. }
  196. const repOff2 = 1
  197. // We deviate from the reference encoder and also check offset 2.
  198. // Still slower and not much better, so disabled.
  199. // repIndex = s - offset2 + repOff2
  200. if false && repIndex >= 0 && load6432(src, repIndex) == load6432(src, s+repOff) {
  201. // Consider history as well.
  202. var seq seq
  203. lenght := 8 + e.matchlen(s+8+repOff2, repIndex+8, src)
  204. seq.matchLen = uint32(lenght - zstdMinMatch)
  205. // We might be able to match backwards.
  206. // Extend as long as we can.
  207. start := s + repOff2
  208. // We end the search early, so we don't risk 0 literals
  209. // and have to do special offset treatment.
  210. startLimit := nextEmit + 1
  211. tMin := s - e.maxMatchOff
  212. if tMin < 0 {
  213. tMin = 0
  214. }
  215. for repIndex > tMin && start > startLimit && src[repIndex-1] == src[start-1] && seq.matchLen < maxMatchLength-zstdMinMatch-1 {
  216. repIndex--
  217. start--
  218. seq.matchLen++
  219. }
  220. addLiterals(&seq, start)
  221. // rep 2
  222. seq.offset = 2
  223. if debugSequences {
  224. println("repeat sequence 2", seq, "next s:", s)
  225. }
  226. blk.sequences = append(blk.sequences, seq)
  227. s += lenght + repOff2
  228. nextEmit = s
  229. if s >= sLimit {
  230. if debugEncoder {
  231. println("repeat ended", s, lenght)
  232. }
  233. break encodeLoop
  234. }
  235. // Index skipped...
  236. for index0 < s-1 {
  237. cv0 := load6432(src, index0)
  238. cv1 := cv0 >> 8
  239. h0 := hashLen(cv0, betterLongTableBits, betterLongLen)
  240. off := index0 + e.cur
  241. e.longTable[h0] = prevEntry{offset: off, prev: e.longTable[h0].offset}
  242. e.table[hashLen(cv1, betterShortTableBits, betterShortLen)] = tableEntry{offset: off + 1, val: uint32(cv1)}
  243. index0 += 2
  244. }
  245. cv = load6432(src, s)
  246. // Swap offsets
  247. offset1, offset2 = offset2, offset1
  248. continue
  249. }
  250. }
  251. // Find the offsets of our two matches.
  252. coffsetL := candidateL.offset - e.cur
  253. coffsetLP := candidateL.prev - e.cur
  254. // Check if we have a long match.
  255. if s-coffsetL < e.maxMatchOff && cv == load6432(src, coffsetL) {
  256. // Found a long match, at least 8 bytes.
  257. matched = e.matchlen(s+8, coffsetL+8, src) + 8
  258. t = coffsetL
  259. if debugAsserts && s <= t {
  260. panic(fmt.Sprintf("s (%d) <= t (%d)", s, t))
  261. }
  262. if debugAsserts && s-t > e.maxMatchOff {
  263. panic("s - t >e.maxMatchOff")
  264. }
  265. if debugMatches {
  266. println("long match")
  267. }
  268. if s-coffsetLP < e.maxMatchOff && cv == load6432(src, coffsetLP) {
  269. // Found a long match, at least 8 bytes.
  270. prevMatch := e.matchlen(s+8, coffsetLP+8, src) + 8
  271. if prevMatch > matched {
  272. matched = prevMatch
  273. t = coffsetLP
  274. }
  275. if debugAsserts && s <= t {
  276. panic(fmt.Sprintf("s (%d) <= t (%d)", s, t))
  277. }
  278. if debugAsserts && s-t > e.maxMatchOff {
  279. panic("s - t >e.maxMatchOff")
  280. }
  281. if debugMatches {
  282. println("long match")
  283. }
  284. }
  285. break
  286. }
  287. // Check if we have a long match on prev.
  288. if s-coffsetLP < e.maxMatchOff && cv == load6432(src, coffsetLP) {
  289. // Found a long match, at least 8 bytes.
  290. matched = e.matchlen(s+8, coffsetLP+8, src) + 8
  291. t = coffsetLP
  292. if debugAsserts && s <= t {
  293. panic(fmt.Sprintf("s (%d) <= t (%d)", s, t))
  294. }
  295. if debugAsserts && s-t > e.maxMatchOff {
  296. panic("s - t >e.maxMatchOff")
  297. }
  298. if debugMatches {
  299. println("long match")
  300. }
  301. break
  302. }
  303. coffsetS := candidateS.offset - e.cur
  304. // Check if we have a short match.
  305. if s-coffsetS < e.maxMatchOff && uint32(cv) == candidateS.val {
  306. // found a regular match
  307. matched = e.matchlen(s+4, coffsetS+4, src) + 4
  308. // See if we can find a long match at s+1
  309. const checkAt = 1
  310. cv := load6432(src, s+checkAt)
  311. nextHashL = hashLen(cv, betterLongTableBits, betterLongLen)
  312. candidateL = e.longTable[nextHashL]
  313. coffsetL = candidateL.offset - e.cur
  314. // We can store it, since we have at least a 4 byte match.
  315. e.longTable[nextHashL] = prevEntry{offset: s + checkAt + e.cur, prev: candidateL.offset}
  316. if s-coffsetL < e.maxMatchOff && cv == load6432(src, coffsetL) {
  317. // Found a long match, at least 8 bytes.
  318. matchedNext := e.matchlen(s+8+checkAt, coffsetL+8, src) + 8
  319. if matchedNext > matched {
  320. t = coffsetL
  321. s += checkAt
  322. matched = matchedNext
  323. if debugMatches {
  324. println("long match (after short)")
  325. }
  326. break
  327. }
  328. }
  329. // Check prev long...
  330. coffsetL = candidateL.prev - e.cur
  331. if s-coffsetL < e.maxMatchOff && cv == load6432(src, coffsetL) {
  332. // Found a long match, at least 8 bytes.
  333. matchedNext := e.matchlen(s+8+checkAt, coffsetL+8, src) + 8
  334. if matchedNext > matched {
  335. t = coffsetL
  336. s += checkAt
  337. matched = matchedNext
  338. if debugMatches {
  339. println("prev long match (after short)")
  340. }
  341. break
  342. }
  343. }
  344. t = coffsetS
  345. if debugAsserts && s <= t {
  346. panic(fmt.Sprintf("s (%d) <= t (%d)", s, t))
  347. }
  348. if debugAsserts && s-t > e.maxMatchOff {
  349. panic("s - t >e.maxMatchOff")
  350. }
  351. if debugAsserts && t < 0 {
  352. panic("t<0")
  353. }
  354. if debugMatches {
  355. println("short match")
  356. }
  357. break
  358. }
  359. // No match found, move forward in input.
  360. s += stepSize + ((s - nextEmit) >> (kSearchStrength - 1))
  361. if s >= sLimit {
  362. break encodeLoop
  363. }
  364. cv = load6432(src, s)
  365. }
  366. // Try to find a better match by searching for a long match at the end of the current best match
  367. if s+matched < sLimit {
  368. // Allow some bytes at the beginning to mismatch.
  369. // Sweet spot is around 3 bytes, but depends on input.
  370. // The skipped bytes are tested in Extend backwards,
  371. // and still picked up as part of the match if they do.
  372. const skipBeginning = 3
  373. nextHashL := hashLen(load6432(src, s+matched), betterLongTableBits, betterLongLen)
  374. s2 := s + skipBeginning
  375. cv := load3232(src, s2)
  376. candidateL := e.longTable[nextHashL]
  377. coffsetL := candidateL.offset - e.cur - matched + skipBeginning
  378. if coffsetL >= 0 && coffsetL < s2 && s2-coffsetL < e.maxMatchOff && cv == load3232(src, coffsetL) {
  379. // Found a long match, at least 4 bytes.
  380. matchedNext := e.matchlen(s2+4, coffsetL+4, src) + 4
  381. if matchedNext > matched {
  382. t = coffsetL
  383. s = s2
  384. matched = matchedNext
  385. if debugMatches {
  386. println("long match at end-of-match")
  387. }
  388. }
  389. }
  390. // Check prev long...
  391. if true {
  392. coffsetL = candidateL.prev - e.cur - matched + skipBeginning
  393. if coffsetL >= 0 && coffsetL < s2 && s2-coffsetL < e.maxMatchOff && cv == load3232(src, coffsetL) {
  394. // Found a long match, at least 4 bytes.
  395. matchedNext := e.matchlen(s2+4, coffsetL+4, src) + 4
  396. if matchedNext > matched {
  397. t = coffsetL
  398. s = s2
  399. matched = matchedNext
  400. if debugMatches {
  401. println("prev long match at end-of-match")
  402. }
  403. }
  404. }
  405. }
  406. }
  407. // A match has been found. Update recent offsets.
  408. offset2 = offset1
  409. offset1 = s - t
  410. if debugAsserts && s <= t {
  411. panic(fmt.Sprintf("s (%d) <= t (%d)", s, t))
  412. }
  413. if debugAsserts && canRepeat && int(offset1) > len(src) {
  414. panic("invalid offset")
  415. }
  416. // Extend the n-byte match as long as possible.
  417. l := matched
  418. // Extend backwards
  419. tMin := s - e.maxMatchOff
  420. if tMin < 0 {
  421. tMin = 0
  422. }
  423. for t > tMin && s > nextEmit && src[t-1] == src[s-1] && l < maxMatchLength {
  424. s--
  425. t--
  426. l++
  427. }
  428. // Write our sequence
  429. var seq seq
  430. seq.litLen = uint32(s - nextEmit)
  431. seq.matchLen = uint32(l - zstdMinMatch)
  432. if seq.litLen > 0 {
  433. blk.literals = append(blk.literals, src[nextEmit:s]...)
  434. }
  435. seq.offset = uint32(s-t) + 3
  436. s += l
  437. if debugSequences {
  438. println("sequence", seq, "next s:", s)
  439. }
  440. blk.sequences = append(blk.sequences, seq)
  441. nextEmit = s
  442. if s >= sLimit {
  443. break encodeLoop
  444. }
  445. // Index match start+1 (long) -> s - 1
  446. off := index0 + e.cur
  447. for index0 < s-1 {
  448. cv0 := load6432(src, index0)
  449. cv1 := cv0 >> 8
  450. h0 := hashLen(cv0, betterLongTableBits, betterLongLen)
  451. e.longTable[h0] = prevEntry{offset: off, prev: e.longTable[h0].offset}
  452. e.table[hashLen(cv1, betterShortTableBits, betterShortLen)] = tableEntry{offset: off + 1, val: uint32(cv1)}
  453. index0 += 2
  454. off += 2
  455. }
  456. cv = load6432(src, s)
  457. if !canRepeat {
  458. continue
  459. }
  460. // Check offset 2
  461. for {
  462. o2 := s - offset2
  463. if load3232(src, o2) != uint32(cv) {
  464. // Do regular search
  465. break
  466. }
  467. // Store this, since we have it.
  468. nextHashL := hashLen(cv, betterLongTableBits, betterLongLen)
  469. nextHashS := hashLen(cv, betterShortTableBits, betterShortLen)
  470. // We have at least 4 byte match.
  471. // No need to check backwards. We come straight from a match
  472. l := 4 + e.matchlen(s+4, o2+4, src)
  473. e.longTable[nextHashL] = prevEntry{offset: s + e.cur, prev: e.longTable[nextHashL].offset}
  474. e.table[nextHashS] = tableEntry{offset: s + e.cur, val: uint32(cv)}
  475. seq.matchLen = uint32(l) - zstdMinMatch
  476. seq.litLen = 0
  477. // Since litlen is always 0, this is offset 1.
  478. seq.offset = 1
  479. s += l
  480. nextEmit = s
  481. if debugSequences {
  482. println("sequence", seq, "next s:", s)
  483. }
  484. blk.sequences = append(blk.sequences, seq)
  485. // Swap offset 1 and 2.
  486. offset1, offset2 = offset2, offset1
  487. if s >= sLimit {
  488. // Finished
  489. break encodeLoop
  490. }
  491. cv = load6432(src, s)
  492. }
  493. }
  494. if int(nextEmit) < len(src) {
  495. blk.literals = append(blk.literals, src[nextEmit:]...)
  496. blk.extraLits = len(src) - int(nextEmit)
  497. }
  498. blk.recentOffsets[0] = uint32(offset1)
  499. blk.recentOffsets[1] = uint32(offset2)
  500. if debugEncoder {
  501. println("returning, recent offsets:", blk.recentOffsets, "extra literals:", blk.extraLits)
  502. }
  503. }
  504. // EncodeNoHist will encode a block with no history and no following blocks.
  505. // Most notable difference is that src will not be copied for history and
  506. // we do not need to check for max match length.
  507. func (e *betterFastEncoder) EncodeNoHist(blk *blockEnc, src []byte) {
  508. e.ensureHist(len(src))
  509. e.Encode(blk, src)
  510. }
  511. // Encode improves compression...
  512. func (e *betterFastEncoderDict) Encode(blk *blockEnc, src []byte) {
  513. const (
  514. // Input margin is the number of bytes we read (8)
  515. // and the maximum we will read ahead (2)
  516. inputMargin = 8 + 2
  517. minNonLiteralBlockSize = 16
  518. )
  519. // Protect against e.cur wraparound.
  520. for e.cur >= e.bufferReset-int32(len(e.hist)) {
  521. if len(e.hist) == 0 {
  522. for i := range e.table[:] {
  523. e.table[i] = tableEntry{}
  524. }
  525. for i := range e.longTable[:] {
  526. e.longTable[i] = prevEntry{}
  527. }
  528. e.cur = e.maxMatchOff
  529. e.allDirty = true
  530. break
  531. }
  532. // Shift down everything in the table that isn't already too far away.
  533. minOff := e.cur + int32(len(e.hist)) - e.maxMatchOff
  534. for i := range e.table[:] {
  535. v := e.table[i].offset
  536. if v < minOff {
  537. v = 0
  538. } else {
  539. v = v - e.cur + e.maxMatchOff
  540. }
  541. e.table[i].offset = v
  542. }
  543. for i := range e.longTable[:] {
  544. v := e.longTable[i].offset
  545. v2 := e.longTable[i].prev
  546. if v < minOff {
  547. v = 0
  548. v2 = 0
  549. } else {
  550. v = v - e.cur + e.maxMatchOff
  551. if v2 < minOff {
  552. v2 = 0
  553. } else {
  554. v2 = v2 - e.cur + e.maxMatchOff
  555. }
  556. }
  557. e.longTable[i] = prevEntry{
  558. offset: v,
  559. prev: v2,
  560. }
  561. }
  562. e.allDirty = true
  563. e.cur = e.maxMatchOff
  564. break
  565. }
  566. s := e.addBlock(src)
  567. blk.size = len(src)
  568. if len(src) < minNonLiteralBlockSize {
  569. blk.extraLits = len(src)
  570. blk.literals = blk.literals[:len(src)]
  571. copy(blk.literals, src)
  572. return
  573. }
  574. // Override src
  575. src = e.hist
  576. sLimit := int32(len(src)) - inputMargin
  577. // stepSize is the number of bytes to skip on every main loop iteration.
  578. // It should be >= 1.
  579. const stepSize = 1
  580. const kSearchStrength = 9
  581. // nextEmit is where in src the next emitLiteral should start from.
  582. nextEmit := s
  583. cv := load6432(src, s)
  584. // Relative offsets
  585. offset1 := int32(blk.recentOffsets[0])
  586. offset2 := int32(blk.recentOffsets[1])
  587. addLiterals := func(s *seq, until int32) {
  588. if until == nextEmit {
  589. return
  590. }
  591. blk.literals = append(blk.literals, src[nextEmit:until]...)
  592. s.litLen = uint32(until - nextEmit)
  593. }
  594. if debugEncoder {
  595. println("recent offsets:", blk.recentOffsets)
  596. }
  597. encodeLoop:
  598. for {
  599. var t int32
  600. // We allow the encoder to optionally turn off repeat offsets across blocks
  601. canRepeat := len(blk.sequences) > 2
  602. var matched, index0 int32
  603. for {
  604. if debugAsserts && canRepeat && offset1 == 0 {
  605. panic("offset0 was 0")
  606. }
  607. nextHashL := hashLen(cv, betterLongTableBits, betterLongLen)
  608. nextHashS := hashLen(cv, betterShortTableBits, betterShortLen)
  609. candidateL := e.longTable[nextHashL]
  610. candidateS := e.table[nextHashS]
  611. const repOff = 1
  612. repIndex := s - offset1 + repOff
  613. off := s + e.cur
  614. e.longTable[nextHashL] = prevEntry{offset: off, prev: candidateL.offset}
  615. e.markLongShardDirty(nextHashL)
  616. e.table[nextHashS] = tableEntry{offset: off, val: uint32(cv)}
  617. e.markShortShardDirty(nextHashS)
  618. index0 = s + 1
  619. if canRepeat {
  620. if repIndex >= 0 && load3232(src, repIndex) == uint32(cv>>(repOff*8)) {
  621. // Consider history as well.
  622. var seq seq
  623. lenght := 4 + e.matchlen(s+4+repOff, repIndex+4, src)
  624. seq.matchLen = uint32(lenght - zstdMinMatch)
  625. // We might be able to match backwards.
  626. // Extend as long as we can.
  627. start := s + repOff
  628. // We end the search early, so we don't risk 0 literals
  629. // and have to do special offset treatment.
  630. startLimit := nextEmit + 1
  631. tMin := s - e.maxMatchOff
  632. if tMin < 0 {
  633. tMin = 0
  634. }
  635. for repIndex > tMin && start > startLimit && src[repIndex-1] == src[start-1] && seq.matchLen < maxMatchLength-zstdMinMatch-1 {
  636. repIndex--
  637. start--
  638. seq.matchLen++
  639. }
  640. addLiterals(&seq, start)
  641. // rep 0
  642. seq.offset = 1
  643. if debugSequences {
  644. println("repeat sequence", seq, "next s:", s)
  645. }
  646. blk.sequences = append(blk.sequences, seq)
  647. // Index match start+1 (long) -> s - 1
  648. s += lenght + repOff
  649. nextEmit = s
  650. if s >= sLimit {
  651. if debugEncoder {
  652. println("repeat ended", s, lenght)
  653. }
  654. break encodeLoop
  655. }
  656. // Index skipped...
  657. for index0 < s-1 {
  658. cv0 := load6432(src, index0)
  659. cv1 := cv0 >> 8
  660. h0 := hashLen(cv0, betterLongTableBits, betterLongLen)
  661. off := index0 + e.cur
  662. e.longTable[h0] = prevEntry{offset: off, prev: e.longTable[h0].offset}
  663. e.markLongShardDirty(h0)
  664. h1 := hashLen(cv1, betterShortTableBits, betterShortLen)
  665. e.table[h1] = tableEntry{offset: off + 1, val: uint32(cv1)}
  666. e.markShortShardDirty(h1)
  667. index0 += 2
  668. }
  669. cv = load6432(src, s)
  670. continue
  671. }
  672. const repOff2 = 1
  673. // We deviate from the reference encoder and also check offset 2.
  674. // Still slower and not much better, so disabled.
  675. // repIndex = s - offset2 + repOff2
  676. if false && repIndex >= 0 && load6432(src, repIndex) == load6432(src, s+repOff) {
  677. // Consider history as well.
  678. var seq seq
  679. lenght := 8 + e.matchlen(s+8+repOff2, repIndex+8, src)
  680. seq.matchLen = uint32(lenght - zstdMinMatch)
  681. // We might be able to match backwards.
  682. // Extend as long as we can.
  683. start := s + repOff2
  684. // We end the search early, so we don't risk 0 literals
  685. // and have to do special offset treatment.
  686. startLimit := nextEmit + 1
  687. tMin := s - e.maxMatchOff
  688. if tMin < 0 {
  689. tMin = 0
  690. }
  691. for repIndex > tMin && start > startLimit && src[repIndex-1] == src[start-1] && seq.matchLen < maxMatchLength-zstdMinMatch-1 {
  692. repIndex--
  693. start--
  694. seq.matchLen++
  695. }
  696. addLiterals(&seq, start)
  697. // rep 2
  698. seq.offset = 2
  699. if debugSequences {
  700. println("repeat sequence 2", seq, "next s:", s)
  701. }
  702. blk.sequences = append(blk.sequences, seq)
  703. s += lenght + repOff2
  704. nextEmit = s
  705. if s >= sLimit {
  706. if debugEncoder {
  707. println("repeat ended", s, lenght)
  708. }
  709. break encodeLoop
  710. }
  711. // Index skipped...
  712. for index0 < s-1 {
  713. cv0 := load6432(src, index0)
  714. cv1 := cv0 >> 8
  715. h0 := hashLen(cv0, betterLongTableBits, betterLongLen)
  716. off := index0 + e.cur
  717. e.longTable[h0] = prevEntry{offset: off, prev: e.longTable[h0].offset}
  718. e.markLongShardDirty(h0)
  719. h1 := hashLen(cv1, betterShortTableBits, betterShortLen)
  720. e.table[h1] = tableEntry{offset: off + 1, val: uint32(cv1)}
  721. e.markShortShardDirty(h1)
  722. index0 += 2
  723. }
  724. cv = load6432(src, s)
  725. // Swap offsets
  726. offset1, offset2 = offset2, offset1
  727. continue
  728. }
  729. }
  730. // Find the offsets of our two matches.
  731. coffsetL := candidateL.offset - e.cur
  732. coffsetLP := candidateL.prev - e.cur
  733. // Check if we have a long match.
  734. if s-coffsetL < e.maxMatchOff && cv == load6432(src, coffsetL) {
  735. // Found a long match, at least 8 bytes.
  736. matched = e.matchlen(s+8, coffsetL+8, src) + 8
  737. t = coffsetL
  738. if debugAsserts && s <= t {
  739. panic(fmt.Sprintf("s (%d) <= t (%d)", s, t))
  740. }
  741. if debugAsserts && s-t > e.maxMatchOff {
  742. panic("s - t >e.maxMatchOff")
  743. }
  744. if debugMatches {
  745. println("long match")
  746. }
  747. if s-coffsetLP < e.maxMatchOff && cv == load6432(src, coffsetLP) {
  748. // Found a long match, at least 8 bytes.
  749. prevMatch := e.matchlen(s+8, coffsetLP+8, src) + 8
  750. if prevMatch > matched {
  751. matched = prevMatch
  752. t = coffsetLP
  753. }
  754. if debugAsserts && s <= t {
  755. panic(fmt.Sprintf("s (%d) <= t (%d)", s, t))
  756. }
  757. if debugAsserts && s-t > e.maxMatchOff {
  758. panic("s - t >e.maxMatchOff")
  759. }
  760. if debugMatches {
  761. println("long match")
  762. }
  763. }
  764. break
  765. }
  766. // Check if we have a long match on prev.
  767. if s-coffsetLP < e.maxMatchOff && cv == load6432(src, coffsetLP) {
  768. // Found a long match, at least 8 bytes.
  769. matched = e.matchlen(s+8, coffsetLP+8, src) + 8
  770. t = coffsetLP
  771. if debugAsserts && s <= t {
  772. panic(fmt.Sprintf("s (%d) <= t (%d)", s, t))
  773. }
  774. if debugAsserts && s-t > e.maxMatchOff {
  775. panic("s - t >e.maxMatchOff")
  776. }
  777. if debugMatches {
  778. println("long match")
  779. }
  780. break
  781. }
  782. coffsetS := candidateS.offset - e.cur
  783. // Check if we have a short match.
  784. if s-coffsetS < e.maxMatchOff && uint32(cv) == candidateS.val {
  785. // found a regular match
  786. matched = e.matchlen(s+4, coffsetS+4, src) + 4
  787. // See if we can find a long match at s+1
  788. const checkAt = 1
  789. cv := load6432(src, s+checkAt)
  790. nextHashL = hashLen(cv, betterLongTableBits, betterLongLen)
  791. candidateL = e.longTable[nextHashL]
  792. coffsetL = candidateL.offset - e.cur
  793. // We can store it, since we have at least a 4 byte match.
  794. e.longTable[nextHashL] = prevEntry{offset: s + checkAt + e.cur, prev: candidateL.offset}
  795. e.markLongShardDirty(nextHashL)
  796. if s-coffsetL < e.maxMatchOff && cv == load6432(src, coffsetL) {
  797. // Found a long match, at least 8 bytes.
  798. matchedNext := e.matchlen(s+8+checkAt, coffsetL+8, src) + 8
  799. if matchedNext > matched {
  800. t = coffsetL
  801. s += checkAt
  802. matched = matchedNext
  803. if debugMatches {
  804. println("long match (after short)")
  805. }
  806. break
  807. }
  808. }
  809. // Check prev long...
  810. coffsetL = candidateL.prev - e.cur
  811. if s-coffsetL < e.maxMatchOff && cv == load6432(src, coffsetL) {
  812. // Found a long match, at least 8 bytes.
  813. matchedNext := e.matchlen(s+8+checkAt, coffsetL+8, src) + 8
  814. if matchedNext > matched {
  815. t = coffsetL
  816. s += checkAt
  817. matched = matchedNext
  818. if debugMatches {
  819. println("prev long match (after short)")
  820. }
  821. break
  822. }
  823. }
  824. t = coffsetS
  825. if debugAsserts && s <= t {
  826. panic(fmt.Sprintf("s (%d) <= t (%d)", s, t))
  827. }
  828. if debugAsserts && s-t > e.maxMatchOff {
  829. panic("s - t >e.maxMatchOff")
  830. }
  831. if debugAsserts && t < 0 {
  832. panic("t<0")
  833. }
  834. if debugMatches {
  835. println("short match")
  836. }
  837. break
  838. }
  839. // No match found, move forward in input.
  840. s += stepSize + ((s - nextEmit) >> (kSearchStrength - 1))
  841. if s >= sLimit {
  842. break encodeLoop
  843. }
  844. cv = load6432(src, s)
  845. }
  846. // Try to find a better match by searching for a long match at the end of the current best match
  847. if s+matched < sLimit {
  848. nextHashL := hashLen(load6432(src, s+matched), betterLongTableBits, betterLongLen)
  849. cv := load3232(src, s)
  850. candidateL := e.longTable[nextHashL]
  851. coffsetL := candidateL.offset - e.cur - matched
  852. if coffsetL >= 0 && coffsetL < s && s-coffsetL < e.maxMatchOff && cv == load3232(src, coffsetL) {
  853. // Found a long match, at least 4 bytes.
  854. matchedNext := e.matchlen(s+4, coffsetL+4, src) + 4
  855. if matchedNext > matched {
  856. t = coffsetL
  857. matched = matchedNext
  858. if debugMatches {
  859. println("long match at end-of-match")
  860. }
  861. }
  862. }
  863. // Check prev long...
  864. if true {
  865. coffsetL = candidateL.prev - e.cur - matched
  866. if coffsetL >= 0 && coffsetL < s && s-coffsetL < e.maxMatchOff && cv == load3232(src, coffsetL) {
  867. // Found a long match, at least 4 bytes.
  868. matchedNext := e.matchlen(s+4, coffsetL+4, src) + 4
  869. if matchedNext > matched {
  870. t = coffsetL
  871. matched = matchedNext
  872. if debugMatches {
  873. println("prev long match at end-of-match")
  874. }
  875. }
  876. }
  877. }
  878. }
  879. // A match has been found. Update recent offsets.
  880. offset2 = offset1
  881. offset1 = s - t
  882. if debugAsserts && s <= t {
  883. panic(fmt.Sprintf("s (%d) <= t (%d)", s, t))
  884. }
  885. if debugAsserts && canRepeat && int(offset1) > len(src) {
  886. panic("invalid offset")
  887. }
  888. // Extend the n-byte match as long as possible.
  889. l := matched
  890. // Extend backwards
  891. tMin := s - e.maxMatchOff
  892. if tMin < 0 {
  893. tMin = 0
  894. }
  895. for t > tMin && s > nextEmit && src[t-1] == src[s-1] && l < maxMatchLength {
  896. s--
  897. t--
  898. l++
  899. }
  900. // Write our sequence
  901. var seq seq
  902. seq.litLen = uint32(s - nextEmit)
  903. seq.matchLen = uint32(l - zstdMinMatch)
  904. if seq.litLen > 0 {
  905. blk.literals = append(blk.literals, src[nextEmit:s]...)
  906. }
  907. seq.offset = uint32(s-t) + 3
  908. s += l
  909. if debugSequences {
  910. println("sequence", seq, "next s:", s)
  911. }
  912. blk.sequences = append(blk.sequences, seq)
  913. nextEmit = s
  914. if s >= sLimit {
  915. break encodeLoop
  916. }
  917. // Index match start+1 (long) -> s - 1
  918. off := index0 + e.cur
  919. for index0 < s-1 {
  920. cv0 := load6432(src, index0)
  921. cv1 := cv0 >> 8
  922. h0 := hashLen(cv0, betterLongTableBits, betterLongLen)
  923. e.longTable[h0] = prevEntry{offset: off, prev: e.longTable[h0].offset}
  924. e.markLongShardDirty(h0)
  925. h1 := hashLen(cv1, betterShortTableBits, betterShortLen)
  926. e.table[h1] = tableEntry{offset: off + 1, val: uint32(cv1)}
  927. e.markShortShardDirty(h1)
  928. index0 += 2
  929. off += 2
  930. }
  931. cv = load6432(src, s)
  932. if !canRepeat {
  933. continue
  934. }
  935. // Check offset 2
  936. for {
  937. o2 := s - offset2
  938. if load3232(src, o2) != uint32(cv) {
  939. // Do regular search
  940. break
  941. }
  942. // Store this, since we have it.
  943. nextHashL := hashLen(cv, betterLongTableBits, betterLongLen)
  944. nextHashS := hashLen(cv, betterShortTableBits, betterShortLen)
  945. // We have at least 4 byte match.
  946. // No need to check backwards. We come straight from a match
  947. l := 4 + e.matchlen(s+4, o2+4, src)
  948. e.longTable[nextHashL] = prevEntry{offset: s + e.cur, prev: e.longTable[nextHashL].offset}
  949. e.markLongShardDirty(nextHashL)
  950. e.table[nextHashS] = tableEntry{offset: s + e.cur, val: uint32(cv)}
  951. e.markShortShardDirty(nextHashS)
  952. seq.matchLen = uint32(l) - zstdMinMatch
  953. seq.litLen = 0
  954. // Since litlen is always 0, this is offset 1.
  955. seq.offset = 1
  956. s += l
  957. nextEmit = s
  958. if debugSequences {
  959. println("sequence", seq, "next s:", s)
  960. }
  961. blk.sequences = append(blk.sequences, seq)
  962. // Swap offset 1 and 2.
  963. offset1, offset2 = offset2, offset1
  964. if s >= sLimit {
  965. // Finished
  966. break encodeLoop
  967. }
  968. cv = load6432(src, s)
  969. }
  970. }
  971. if int(nextEmit) < len(src) {
  972. blk.literals = append(blk.literals, src[nextEmit:]...)
  973. blk.extraLits = len(src) - int(nextEmit)
  974. }
  975. blk.recentOffsets[0] = uint32(offset1)
  976. blk.recentOffsets[1] = uint32(offset2)
  977. if debugEncoder {
  978. println("returning, recent offsets:", blk.recentOffsets, "extra literals:", blk.extraLits)
  979. }
  980. }
  981. // ResetDict will reset and set a dictionary if not nil
  982. func (e *betterFastEncoder) Reset(d *dict, singleBlock bool) {
  983. e.resetBase(d, singleBlock)
  984. if d != nil {
  985. panic("betterFastEncoder: Reset with dict")
  986. }
  987. }
  988. // ResetDict will reset and set a dictionary if not nil
  989. func (e *betterFastEncoderDict) Reset(d *dict, singleBlock bool) {
  990. e.resetBase(d, singleBlock)
  991. if d == nil {
  992. return
  993. }
  994. // Init or copy dict table
  995. if len(e.dictTable) != len(e.table) || d.id != e.lastDictID {
  996. if len(e.dictTable) != len(e.table) {
  997. e.dictTable = make([]tableEntry, len(e.table))
  998. }
  999. end := int32(len(d.content)) - 8 + e.maxMatchOff
  1000. for i := e.maxMatchOff; i < end; i += 4 {
  1001. const hashLog = betterShortTableBits
  1002. cv := load6432(d.content, i-e.maxMatchOff)
  1003. nextHash := hashLen(cv, hashLog, betterShortLen) // 0 -> 4
  1004. nextHash1 := hashLen(cv>>8, hashLog, betterShortLen) // 1 -> 5
  1005. nextHash2 := hashLen(cv>>16, hashLog, betterShortLen) // 2 -> 6
  1006. nextHash3 := hashLen(cv>>24, hashLog, betterShortLen) // 3 -> 7
  1007. e.dictTable[nextHash] = tableEntry{
  1008. val: uint32(cv),
  1009. offset: i,
  1010. }
  1011. e.dictTable[nextHash1] = tableEntry{
  1012. val: uint32(cv >> 8),
  1013. offset: i + 1,
  1014. }
  1015. e.dictTable[nextHash2] = tableEntry{
  1016. val: uint32(cv >> 16),
  1017. offset: i + 2,
  1018. }
  1019. e.dictTable[nextHash3] = tableEntry{
  1020. val: uint32(cv >> 24),
  1021. offset: i + 3,
  1022. }
  1023. }
  1024. e.lastDictID = d.id
  1025. e.allDirty = true
  1026. }
  1027. // Init or copy dict table
  1028. if len(e.dictLongTable) != len(e.longTable) || d.id != e.lastDictID {
  1029. if len(e.dictLongTable) != len(e.longTable) {
  1030. e.dictLongTable = make([]prevEntry, len(e.longTable))
  1031. }
  1032. if len(d.content) >= 8 {
  1033. cv := load6432(d.content, 0)
  1034. h := hashLen(cv, betterLongTableBits, betterLongLen)
  1035. e.dictLongTable[h] = prevEntry{
  1036. offset: e.maxMatchOff,
  1037. prev: e.dictLongTable[h].offset,
  1038. }
  1039. end := int32(len(d.content)) - 8 + e.maxMatchOff
  1040. off := 8 // First to read
  1041. for i := e.maxMatchOff + 1; i < end; i++ {
  1042. cv = cv>>8 | (uint64(d.content[off]) << 56)
  1043. h := hashLen(cv, betterLongTableBits, betterLongLen)
  1044. e.dictLongTable[h] = prevEntry{
  1045. offset: i,
  1046. prev: e.dictLongTable[h].offset,
  1047. }
  1048. off++
  1049. }
  1050. }
  1051. e.lastDictID = d.id
  1052. e.allDirty = true
  1053. }
  1054. // Reset table to initial state
  1055. {
  1056. dirtyShardCnt := 0
  1057. if !e.allDirty {
  1058. for i := range e.shortTableShardDirty {
  1059. if e.shortTableShardDirty[i] {
  1060. dirtyShardCnt++
  1061. }
  1062. }
  1063. }
  1064. const shardCnt = betterShortTableShardCnt
  1065. const shardSize = betterShortTableShardSize
  1066. if e.allDirty || dirtyShardCnt > shardCnt*4/6 {
  1067. copy(e.table[:], e.dictTable)
  1068. for i := range e.shortTableShardDirty {
  1069. e.shortTableShardDirty[i] = false
  1070. }
  1071. } else {
  1072. for i := range e.shortTableShardDirty {
  1073. if !e.shortTableShardDirty[i] {
  1074. continue
  1075. }
  1076. copy(e.table[i*shardSize:(i+1)*shardSize], e.dictTable[i*shardSize:(i+1)*shardSize])
  1077. e.shortTableShardDirty[i] = false
  1078. }
  1079. }
  1080. }
  1081. {
  1082. dirtyShardCnt := 0
  1083. if !e.allDirty {
  1084. for i := range e.shortTableShardDirty {
  1085. if e.shortTableShardDirty[i] {
  1086. dirtyShardCnt++
  1087. }
  1088. }
  1089. }
  1090. const shardCnt = betterLongTableShardCnt
  1091. const shardSize = betterLongTableShardSize
  1092. if e.allDirty || dirtyShardCnt > shardCnt*4/6 {
  1093. copy(e.longTable[:], e.dictLongTable)
  1094. for i := range e.longTableShardDirty {
  1095. e.longTableShardDirty[i] = false
  1096. }
  1097. } else {
  1098. for i := range e.longTableShardDirty {
  1099. if !e.longTableShardDirty[i] {
  1100. continue
  1101. }
  1102. copy(e.longTable[i*shardSize:(i+1)*shardSize], e.dictLongTable[i*shardSize:(i+1)*shardSize])
  1103. e.longTableShardDirty[i] = false
  1104. }
  1105. }
  1106. }
  1107. e.cur = e.maxMatchOff
  1108. e.allDirty = false
  1109. }
  1110. func (e *betterFastEncoderDict) markLongShardDirty(entryNum uint32) {
  1111. e.longTableShardDirty[entryNum/betterLongTableShardSize] = true
  1112. }
  1113. func (e *betterFastEncoderDict) markShortShardDirty(entryNum uint32) {
  1114. e.shortTableShardDirty[entryNum/betterShortTableShardSize] = true
  1115. }