sum_s390x.go 2.0 KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576
  1. // Copyright 2018 The Go Authors. All rights reserved.
  2. // Use of this source code is governed by a BSD-style
  3. // license that can be found in the LICENSE file.
  4. //go:build gc && !purego
  5. package poly1305
  6. import (
  7. "golang.org/x/sys/cpu"
  8. )
  9. // updateVX is an assembly implementation of Poly1305 that uses vector
  10. // instructions. It must only be called if the vector facility (vx) is
  11. // available.
  12. //
  13. //go:noescape
  14. func updateVX(state *macState, msg []byte)
  15. // mac is a replacement for macGeneric that uses a larger buffer and redirects
  16. // calls that would have gone to updateGeneric to updateVX if the vector
  17. // facility is installed.
  18. //
  19. // A larger buffer is required for good performance because the vector
  20. // implementation has a higher fixed cost per call than the generic
  21. // implementation.
  22. type mac struct {
  23. macState
  24. buffer [16 * TagSize]byte // size must be a multiple of block size (16)
  25. offset int
  26. }
  27. func (h *mac) Write(p []byte) (int, error) {
  28. nn := len(p)
  29. if h.offset > 0 {
  30. n := copy(h.buffer[h.offset:], p)
  31. if h.offset+n < len(h.buffer) {
  32. h.offset += n
  33. return nn, nil
  34. }
  35. p = p[n:]
  36. h.offset = 0
  37. if cpu.S390X.HasVX {
  38. updateVX(&h.macState, h.buffer[:])
  39. } else {
  40. updateGeneric(&h.macState, h.buffer[:])
  41. }
  42. }
  43. tail := len(p) % len(h.buffer) // number of bytes to copy into buffer
  44. body := len(p) - tail // number of bytes to process now
  45. if body > 0 {
  46. if cpu.S390X.HasVX {
  47. updateVX(&h.macState, p[:body])
  48. } else {
  49. updateGeneric(&h.macState, p[:body])
  50. }
  51. }
  52. h.offset = copy(h.buffer[:], p[body:]) // copy tail bytes - can be 0
  53. return nn, nil
  54. }
  55. func (h *mac) Sum(out *[TagSize]byte) {
  56. state := h.macState
  57. remainder := h.buffer[:h.offset]
  58. // Use the generic implementation if we have 2 or fewer blocks left
  59. // to sum. The vector implementation has a higher startup time.
  60. if cpu.S390X.HasVX && len(remainder) > 2*TagSize {
  61. updateVX(&state, remainder)
  62. } else if len(remainder) > 0 {
  63. updateGeneric(&state, remainder)
  64. }
  65. finalize(out, &state.h, &state.s)
  66. }