sum_ppc64le.s 3.3 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181
  1. // Copyright 2019 The Go Authors. All rights reserved.
  2. // Use of this source code is governed by a BSD-style
  3. // license that can be found in the LICENSE file.
  4. //go:build gc && !purego
  5. #include "textflag.h"
  6. // This was ported from the amd64 implementation.
  7. #define POLY1305_ADD(msg, h0, h1, h2, t0, t1, t2) \
  8. MOVD (msg), t0; \
  9. MOVD 8(msg), t1; \
  10. MOVD $1, t2; \
  11. ADDC t0, h0, h0; \
  12. ADDE t1, h1, h1; \
  13. ADDE t2, h2; \
  14. ADD $16, msg
  15. #define POLY1305_MUL(h0, h1, h2, r0, r1, t0, t1, t2, t3, t4, t5) \
  16. MULLD r0, h0, t0; \
  17. MULLD r0, h1, t4; \
  18. MULHDU r0, h0, t1; \
  19. MULHDU r0, h1, t5; \
  20. ADDC t4, t1, t1; \
  21. MULLD r0, h2, t2; \
  22. ADDZE t5; \
  23. MULHDU r1, h0, t4; \
  24. MULLD r1, h0, h0; \
  25. ADD t5, t2, t2; \
  26. ADDC h0, t1, t1; \
  27. MULLD h2, r1, t3; \
  28. ADDZE t4, h0; \
  29. MULHDU r1, h1, t5; \
  30. MULLD r1, h1, t4; \
  31. ADDC t4, t2, t2; \
  32. ADDE t5, t3, t3; \
  33. ADDC h0, t2, t2; \
  34. MOVD $-4, t4; \
  35. MOVD t0, h0; \
  36. MOVD t1, h1; \
  37. ADDZE t3; \
  38. ANDCC $3, t2, h2; \
  39. AND t2, t4, t0; \
  40. ADDC t0, h0, h0; \
  41. ADDE t3, h1, h1; \
  42. SLD $62, t3, t4; \
  43. SRD $2, t2; \
  44. ADDZE h2; \
  45. OR t4, t2, t2; \
  46. SRD $2, t3; \
  47. ADDC t2, h0, h0; \
  48. ADDE t3, h1, h1; \
  49. ADDZE h2
  50. DATA ·poly1305Mask<>+0x00(SB)/8, $0x0FFFFFFC0FFFFFFF
  51. DATA ·poly1305Mask<>+0x08(SB)/8, $0x0FFFFFFC0FFFFFFC
  52. GLOBL ·poly1305Mask<>(SB), RODATA, $16
  53. // func update(state *[7]uint64, msg []byte)
  54. TEXT ·update(SB), $0-32
  55. MOVD state+0(FP), R3
  56. MOVD msg_base+8(FP), R4
  57. MOVD msg_len+16(FP), R5
  58. MOVD 0(R3), R8 // h0
  59. MOVD 8(R3), R9 // h1
  60. MOVD 16(R3), R10 // h2
  61. MOVD 24(R3), R11 // r0
  62. MOVD 32(R3), R12 // r1
  63. CMP R5, $16
  64. BLT bytes_between_0_and_15
  65. loop:
  66. POLY1305_ADD(R4, R8, R9, R10, R20, R21, R22)
  67. multiply:
  68. POLY1305_MUL(R8, R9, R10, R11, R12, R16, R17, R18, R14, R20, R21)
  69. ADD $-16, R5
  70. CMP R5, $16
  71. BGE loop
  72. bytes_between_0_and_15:
  73. CMP R5, $0
  74. BEQ done
  75. MOVD $0, R16 // h0
  76. MOVD $0, R17 // h1
  77. flush_buffer:
  78. CMP R5, $8
  79. BLE just1
  80. MOVD $8, R21
  81. SUB R21, R5, R21
  82. // Greater than 8 -- load the rightmost remaining bytes in msg
  83. // and put into R17 (h1)
  84. MOVD (R4)(R21), R17
  85. MOVD $16, R22
  86. // Find the offset to those bytes
  87. SUB R5, R22, R22
  88. SLD $3, R22
  89. // Shift to get only the bytes in msg
  90. SRD R22, R17, R17
  91. // Put 1 at high end
  92. MOVD $1, R23
  93. SLD $3, R21
  94. SLD R21, R23, R23
  95. OR R23, R17, R17
  96. // Remainder is 8
  97. MOVD $8, R5
  98. just1:
  99. CMP R5, $8
  100. BLT less8
  101. // Exactly 8
  102. MOVD (R4), R16
  103. CMP R17, $0
  104. // Check if we've already set R17; if not
  105. // set 1 to indicate end of msg.
  106. BNE carry
  107. MOVD $1, R17
  108. BR carry
  109. less8:
  110. MOVD $0, R16 // h0
  111. MOVD $0, R22 // shift count
  112. CMP R5, $4
  113. BLT less4
  114. MOVWZ (R4), R16
  115. ADD $4, R4
  116. ADD $-4, R5
  117. MOVD $32, R22
  118. less4:
  119. CMP R5, $2
  120. BLT less2
  121. MOVHZ (R4), R21
  122. SLD R22, R21, R21
  123. OR R16, R21, R16
  124. ADD $16, R22
  125. ADD $-2, R5
  126. ADD $2, R4
  127. less2:
  128. CMP R5, $0
  129. BEQ insert1
  130. MOVBZ (R4), R21
  131. SLD R22, R21, R21
  132. OR R16, R21, R16
  133. ADD $8, R22
  134. insert1:
  135. // Insert 1 at end of msg
  136. MOVD $1, R21
  137. SLD R22, R21, R21
  138. OR R16, R21, R16
  139. carry:
  140. // Add new values to h0, h1, h2
  141. ADDC R16, R8
  142. ADDE R17, R9
  143. ADDZE R10, R10
  144. MOVD $16, R5
  145. ADD R5, R4
  146. BR multiply
  147. done:
  148. // Save h0, h1, h2 in state
  149. MOVD R8, 0(R3)
  150. MOVD R9, 8(R3)
  151. MOVD R10, 16(R3)
  152. RET