sum_amd64.s 2.5 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108
  1. // Copyright 2012 The Go Authors. All rights reserved.
  2. // Use of this source code is governed by a BSD-style
  3. // license that can be found in the LICENSE file.
  4. //go:build gc && !purego
  5. #include "textflag.h"
  6. #define POLY1305_ADD(msg, h0, h1, h2) \
  7. ADDQ 0(msg), h0; \
  8. ADCQ 8(msg), h1; \
  9. ADCQ $1, h2; \
  10. LEAQ 16(msg), msg
  11. #define POLY1305_MUL(h0, h1, h2, r0, r1, t0, t1, t2, t3) \
  12. MOVQ r0, AX; \
  13. MULQ h0; \
  14. MOVQ AX, t0; \
  15. MOVQ DX, t1; \
  16. MOVQ r0, AX; \
  17. MULQ h1; \
  18. ADDQ AX, t1; \
  19. ADCQ $0, DX; \
  20. MOVQ r0, t2; \
  21. IMULQ h2, t2; \
  22. ADDQ DX, t2; \
  23. \
  24. MOVQ r1, AX; \
  25. MULQ h0; \
  26. ADDQ AX, t1; \
  27. ADCQ $0, DX; \
  28. MOVQ DX, h0; \
  29. MOVQ r1, t3; \
  30. IMULQ h2, t3; \
  31. MOVQ r1, AX; \
  32. MULQ h1; \
  33. ADDQ AX, t2; \
  34. ADCQ DX, t3; \
  35. ADDQ h0, t2; \
  36. ADCQ $0, t3; \
  37. \
  38. MOVQ t0, h0; \
  39. MOVQ t1, h1; \
  40. MOVQ t2, h2; \
  41. ANDQ $3, h2; \
  42. MOVQ t2, t0; \
  43. ANDQ $0xFFFFFFFFFFFFFFFC, t0; \
  44. ADDQ t0, h0; \
  45. ADCQ t3, h1; \
  46. ADCQ $0, h2; \
  47. SHRQ $2, t3, t2; \
  48. SHRQ $2, t3; \
  49. ADDQ t2, h0; \
  50. ADCQ t3, h1; \
  51. ADCQ $0, h2
  52. // func update(state *[7]uint64, msg []byte)
  53. TEXT ·update(SB), $0-32
  54. MOVQ state+0(FP), DI
  55. MOVQ msg_base+8(FP), SI
  56. MOVQ msg_len+16(FP), R15
  57. MOVQ 0(DI), R8 // h0
  58. MOVQ 8(DI), R9 // h1
  59. MOVQ 16(DI), R10 // h2
  60. MOVQ 24(DI), R11 // r0
  61. MOVQ 32(DI), R12 // r1
  62. CMPQ R15, $16
  63. JB bytes_between_0_and_15
  64. loop:
  65. POLY1305_ADD(SI, R8, R9, R10)
  66. multiply:
  67. POLY1305_MUL(R8, R9, R10, R11, R12, BX, CX, R13, R14)
  68. SUBQ $16, R15
  69. CMPQ R15, $16
  70. JAE loop
  71. bytes_between_0_and_15:
  72. TESTQ R15, R15
  73. JZ done
  74. MOVQ $1, BX
  75. XORQ CX, CX
  76. XORQ R13, R13
  77. ADDQ R15, SI
  78. flush_buffer:
  79. SHLQ $8, BX, CX
  80. SHLQ $8, BX
  81. MOVB -1(SI), R13
  82. XORQ R13, BX
  83. DECQ SI
  84. DECQ R15
  85. JNZ flush_buffer
  86. ADDQ BX, R8
  87. ADCQ CX, R9
  88. ADCQ $0, R10
  89. MOVQ $16, R15
  90. JMP multiply
  91. done:
  92. MOVQ R8, 0(DI)
  93. MOVQ R9, 8(DI)
  94. MOVQ R10, 16(DI)
  95. RET