123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231 |
- // +build gc
- // +build !noasm
- #include "go_asm.h"
- #include "textflag.h"
- // Register allocation.
- #define dst R0
- #define dstorig R1
- #define src R2
- #define dstend R3
- #define srcend R4
- #define match R5 // Match address.
- #define dictend R6
- #define token R7
- #define len R8 // Literal and match lengths.
- #define offset R7 // Match offset; overlaps with token.
- #define tmp1 R9
- #define tmp2 R11
- #define tmp3 R12
- // func decodeBlock(dst, src, dict []byte) int
- TEXT ·decodeBlock(SB), NOFRAME+NOSPLIT, $-4-40
- MOVW dst_base +0(FP), dst
- MOVW dst_len +4(FP), dstend
- MOVW src_base +12(FP), src
- MOVW src_len +16(FP), srcend
- CMP $0, srcend
- BEQ shortSrc
- ADD dst, dstend
- ADD src, srcend
- MOVW dst, dstorig
- loop:
- // Read token. Extract literal length.
- MOVBU.P 1(src), token
- MOVW token >> 4, len
- CMP $15, len
- BNE readLitlenDone
- readLitlenLoop:
- CMP src, srcend
- BEQ shortSrc
- MOVBU.P 1(src), tmp1
- ADD.S tmp1, len
- BVS shortDst
- CMP $255, tmp1
- BEQ readLitlenLoop
- readLitlenDone:
- CMP $0, len
- BEQ copyLiteralDone
- // Bounds check dst+len and src+len.
- ADD.S dst, len, tmp1
- ADD.CC.S src, len, tmp2
- BCS shortSrc
- CMP dstend, tmp1
- //BHI shortDst // Uncomment for distinct error codes.
- CMP.LS srcend, tmp2
- BHI shortSrc
- // Copy literal.
- CMP $4, len
- BLO copyLiteralFinish
- // Copy 0-3 bytes until src is aligned.
- TST $1, src
- MOVBU.NE.P 1(src), tmp1
- MOVB.NE.P tmp1, 1(dst)
- SUB.NE $1, len
- TST $2, src
- MOVHU.NE.P 2(src), tmp2
- MOVB.NE.P tmp2, 1(dst)
- MOVW.NE tmp2 >> 8, tmp1
- MOVB.NE.P tmp1, 1(dst)
- SUB.NE $2, len
- B copyLiteralLoopCond
- copyLiteralLoop:
- // Aligned load, unaligned write.
- MOVW.P 4(src), tmp1
- MOVW tmp1 >> 8, tmp2
- MOVB tmp2, 1(dst)
- MOVW tmp1 >> 16, tmp3
- MOVB tmp3, 2(dst)
- MOVW tmp1 >> 24, tmp2
- MOVB tmp2, 3(dst)
- MOVB.P tmp1, 4(dst)
- copyLiteralLoopCond:
- // Loop until len-4 < 0.
- SUB.S $4, len
- BPL copyLiteralLoop
- copyLiteralFinish:
- // Copy remaining 0-3 bytes.
- // At this point, len may be < 0, but len&3 is still accurate.
- TST $1, len
- MOVB.NE.P 1(src), tmp3
- MOVB.NE.P tmp3, 1(dst)
- TST $2, len
- MOVB.NE.P 2(src), tmp1
- MOVB.NE.P tmp1, 2(dst)
- MOVB.NE -1(src), tmp2
- MOVB.NE tmp2, -1(dst)
- copyLiteralDone:
- // Initial part of match length.
- // This frees up the token register for reuse as offset.
- AND $15, token, len
- CMP src, srcend
- BEQ end
- // Read offset.
- ADD.S $2, src
- BCS shortSrc
- CMP srcend, src
- BHI shortSrc
- MOVBU -2(src), offset
- MOVBU -1(src), tmp1
- ORR.S tmp1 << 8, offset
- BEQ corrupt
- // Read rest of match length.
- CMP $15, len
- BNE readMatchlenDone
- readMatchlenLoop:
- CMP src, srcend
- BEQ shortSrc
- MOVBU.P 1(src), tmp1
- ADD.S tmp1, len
- BVS shortDst
- CMP $255, tmp1
- BEQ readMatchlenLoop
- readMatchlenDone:
- // Bounds check dst+len+minMatch.
- ADD.S dst, len, tmp1
- ADD.CC.S $const_minMatch, tmp1
- BCS shortDst
- CMP dstend, tmp1
- BHI shortDst
- RSB dst, offset, match
- CMP dstorig, match
- BGE copyMatch4
- // match < dstorig means the match starts in the dictionary,
- // at len(dict) - offset + (dst - dstorig).
- MOVW dict_base+24(FP), match
- MOVW dict_len +28(FP), dictend
- ADD $const_minMatch, len
- RSB dst, dstorig, tmp1
- RSB dictend, offset, tmp2
- ADD.S tmp2, tmp1
- BMI shortDict
- ADD match, dictend
- ADD tmp1, match
- copyDict:
- MOVBU.P 1(match), tmp1
- MOVB.P tmp1, 1(dst)
- SUB.S $1, len
- CMP.NE match, dictend
- BNE copyDict
- // If the match extends beyond the dictionary, the rest is at dstorig.
- CMP $0, len
- BEQ copyMatchDone
- MOVW dstorig, match
- B copyMatch
- // Copy a regular match.
- // Since len+minMatch is at least four, we can do a 4× unrolled
- // byte copy loop. Using MOVW instead of four byte loads is faster,
- // but to remain portable we'd have to align match first, which is
- // too expensive. By alternating loads and stores, we also handle
- // the case offset < 4.
- copyMatch4:
- SUB.S $4, len
- MOVBU.P 4(match), tmp1
- MOVB.P tmp1, 4(dst)
- MOVBU -3(match), tmp2
- MOVB tmp2, -3(dst)
- MOVBU -2(match), tmp3
- MOVB tmp3, -2(dst)
- MOVBU -1(match), tmp1
- MOVB tmp1, -1(dst)
- BPL copyMatch4
- // Restore len, which is now negative.
- ADD.S $4, len
- BEQ copyMatchDone
- copyMatch:
- // Finish with a byte-at-a-time copy.
- SUB.S $1, len
- MOVBU.P 1(match), tmp2
- MOVB.P tmp2, 1(dst)
- BNE copyMatch
- copyMatchDone:
- CMP src, srcend
- BNE loop
- end:
- CMP $0, len
- BNE corrupt
- SUB dstorig, dst, tmp1
- MOVW tmp1, ret+36(FP)
- RET
- // The error cases have distinct labels so we can put different
- // return codes here when debugging, or if the error returns need to
- // be changed.
- shortDict:
- shortDst:
- shortSrc:
- corrupt:
- MOVW $-1, tmp1
- MOVW tmp1, ret+36(FP)
- RET
|