123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230 |
- // +build gc
- // +build !noasm
- // This implementation assumes that strict alignment checking is turned off.
- // The Go compiler makes the same assumption.
- #include "go_asm.h"
- #include "textflag.h"
- // Register allocation.
- #define dst R0
- #define dstorig R1
- #define src R2
- #define dstend R3
- #define dstend16 R4 // dstend - 16
- #define srcend R5
- #define srcend16 R6 // srcend - 16
- #define match R7 // Match address.
- #define dict R8
- #define dictlen R9
- #define dictend R10
- #define token R11
- #define len R12 // Literal and match lengths.
- #define lenRem R13
- #define offset R14 // Match offset.
- #define tmp1 R15
- #define tmp2 R16
- #define tmp3 R17
- #define tmp4 R19
- // func decodeBlock(dst, src, dict []byte) int
- TEXT ·decodeBlock(SB), NOFRAME+NOSPLIT, $0-80
- LDP dst_base+0(FP), (dst, dstend)
- ADD dst, dstend
- MOVD dst, dstorig
- LDP src_base+24(FP), (src, srcend)
- CBZ srcend, shortSrc
- ADD src, srcend
- // dstend16 = max(dstend-16, 0) and similarly for srcend16.
- SUBS $16, dstend, dstend16
- CSEL LO, ZR, dstend16, dstend16
- SUBS $16, srcend, srcend16
- CSEL LO, ZR, srcend16, srcend16
- LDP dict_base+48(FP), (dict, dictlen)
- ADD dict, dictlen, dictend
- loop:
- // Read token. Extract literal length.
- MOVBU.P 1(src), token
- LSR $4, token, len
- CMP $15, len
- BNE readLitlenDone
- readLitlenLoop:
- CMP src, srcend
- BEQ shortSrc
- MOVBU.P 1(src), tmp1
- ADDS tmp1, len
- BVS shortDst
- CMP $255, tmp1
- BEQ readLitlenLoop
- readLitlenDone:
- CBZ len, copyLiteralDone
- // Bounds check dst+len and src+len.
- ADDS dst, len, tmp1
- BCS shortSrc
- ADDS src, len, tmp2
- BCS shortSrc
- CMP dstend, tmp1
- BHI shortDst
- CMP srcend, tmp2
- BHI shortSrc
- // Copy literal.
- SUBS $16, len
- BLO copyLiteralShort
- copyLiteralLoop:
- LDP.P 16(src), (tmp1, tmp2)
- STP.P (tmp1, tmp2), 16(dst)
- SUBS $16, len
- BPL copyLiteralLoop
- // Copy (final part of) literal of length 0-15.
- // If we have >=16 bytes left in src and dst, just copy 16 bytes.
- copyLiteralShort:
- CMP dstend16, dst
- CCMP LO, src, srcend16, $0b0010 // 0010 = preserve carry (LO).
- BHS copyLiteralShortEnd
- AND $15, len
- LDP (src), (tmp1, tmp2)
- ADD len, src
- STP (tmp1, tmp2), (dst)
- ADD len, dst
- B copyLiteralDone
- // Safe but slow copy near the end of src, dst.
- copyLiteralShortEnd:
- TBZ $3, len, 3(PC)
- MOVD.P 8(src), tmp1
- MOVD.P tmp1, 8(dst)
- TBZ $2, len, 3(PC)
- MOVW.P 4(src), tmp2
- MOVW.P tmp2, 4(dst)
- TBZ $1, len, 3(PC)
- MOVH.P 2(src), tmp3
- MOVH.P tmp3, 2(dst)
- TBZ $0, len, 3(PC)
- MOVBU.P 1(src), tmp4
- MOVB.P tmp4, 1(dst)
- copyLiteralDone:
- // Initial part of match length.
- AND $15, token, len
- CMP src, srcend
- BEQ end
- // Read offset.
- ADDS $2, src
- BCS shortSrc
- CMP srcend, src
- BHI shortSrc
- MOVHU -2(src), offset
- CBZ offset, corrupt
- // Read rest of match length.
- CMP $15, len
- BNE readMatchlenDone
- readMatchlenLoop:
- CMP src, srcend
- BEQ shortSrc
- MOVBU.P 1(src), tmp1
- ADDS tmp1, len
- BVS shortDst
- CMP $255, tmp1
- BEQ readMatchlenLoop
- readMatchlenDone:
- ADD $const_minMatch, len
- // Bounds check dst+len.
- ADDS dst, len, tmp2
- BCS shortDst
- CMP dstend, tmp2
- BHI shortDst
- SUB offset, dst, match
- CMP dstorig, match
- BHS copyMatchTry8
- // match < dstorig means the match starts in the dictionary,
- // at len(dict) - offset + (dst - dstorig).
- SUB dstorig, dst, tmp1
- SUB offset, dictlen, tmp2
- ADDS tmp2, tmp1
- BMI shortDict
- ADD dict, tmp1, match
- copyDict:
- MOVBU.P 1(match), tmp3
- MOVB.P tmp3, 1(dst)
- SUBS $1, len
- CCMP NE, dictend, match, $0b0100 // 0100 sets the Z (EQ) flag.
- BNE copyDict
- CBZ len, copyMatchDone
- // If the match extends beyond the dictionary, the rest is at dstorig.
- // Recompute the offset for the next check.
- MOVD dstorig, match
- SUB dstorig, dst, offset
- copyMatchTry8:
- // Copy doublewords if both len and offset are at least eight.
- // A 16-at-a-time loop doesn't provide a further speedup.
- CMP $8, len
- CCMP HS, offset, $8, $0
- BLO copyMatchLoop1
- AND $7, len, lenRem
- SUB $8, len
- copyMatchLoop8:
- MOVD.P 8(match), tmp1
- MOVD.P tmp1, 8(dst)
- SUBS $8, len
- BPL copyMatchLoop8
- MOVD (match)(len), tmp2 // match+len == match+lenRem-8.
- ADD lenRem, dst
- MOVD $0, len
- MOVD tmp2, -8(dst)
- B copyMatchDone
- copyMatchLoop1:
- // Byte-at-a-time copy for small offsets.
- MOVBU.P 1(match), tmp2
- MOVB.P tmp2, 1(dst)
- SUBS $1, len
- BNE copyMatchLoop1
- copyMatchDone:
- CMP src, srcend
- BNE loop
- end:
- CBNZ len, corrupt
- SUB dstorig, dst, tmp1
- MOVD tmp1, ret+72(FP)
- RET
- // The error cases have distinct labels so we can put different
- // return codes here when debugging, or if the error returns need to
- // be changed.
- shortDict:
- shortDst:
- shortSrc:
- corrupt:
- MOVD $-1, tmp1
- MOVD tmp1, ret+72(FP)
- RET
|