salsa20_ref.go 5.7 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233
  1. // Copyright 2012 The Go Authors. All rights reserved.
  2. // Use of this source code is governed by a BSD-style
  3. // license that can be found in the LICENSE file.
  4. package salsa
  5. import "math/bits"
  6. const rounds = 20
  7. // core applies the Salsa20 core function to 16-byte input in, 32-byte key k,
  8. // and 16-byte constant c, and puts the result into 64-byte array out.
  9. func core(out *[64]byte, in *[16]byte, k *[32]byte, c *[16]byte) {
  10. j0 := uint32(c[0]) | uint32(c[1])<<8 | uint32(c[2])<<16 | uint32(c[3])<<24
  11. j1 := uint32(k[0]) | uint32(k[1])<<8 | uint32(k[2])<<16 | uint32(k[3])<<24
  12. j2 := uint32(k[4]) | uint32(k[5])<<8 | uint32(k[6])<<16 | uint32(k[7])<<24
  13. j3 := uint32(k[8]) | uint32(k[9])<<8 | uint32(k[10])<<16 | uint32(k[11])<<24
  14. j4 := uint32(k[12]) | uint32(k[13])<<8 | uint32(k[14])<<16 | uint32(k[15])<<24
  15. j5 := uint32(c[4]) | uint32(c[5])<<8 | uint32(c[6])<<16 | uint32(c[7])<<24
  16. j6 := uint32(in[0]) | uint32(in[1])<<8 | uint32(in[2])<<16 | uint32(in[3])<<24
  17. j7 := uint32(in[4]) | uint32(in[5])<<8 | uint32(in[6])<<16 | uint32(in[7])<<24
  18. j8 := uint32(in[8]) | uint32(in[9])<<8 | uint32(in[10])<<16 | uint32(in[11])<<24
  19. j9 := uint32(in[12]) | uint32(in[13])<<8 | uint32(in[14])<<16 | uint32(in[15])<<24
  20. j10 := uint32(c[8]) | uint32(c[9])<<8 | uint32(c[10])<<16 | uint32(c[11])<<24
  21. j11 := uint32(k[16]) | uint32(k[17])<<8 | uint32(k[18])<<16 | uint32(k[19])<<24
  22. j12 := uint32(k[20]) | uint32(k[21])<<8 | uint32(k[22])<<16 | uint32(k[23])<<24
  23. j13 := uint32(k[24]) | uint32(k[25])<<8 | uint32(k[26])<<16 | uint32(k[27])<<24
  24. j14 := uint32(k[28]) | uint32(k[29])<<8 | uint32(k[30])<<16 | uint32(k[31])<<24
  25. j15 := uint32(c[12]) | uint32(c[13])<<8 | uint32(c[14])<<16 | uint32(c[15])<<24
  26. x0, x1, x2, x3, x4, x5, x6, x7, x8 := j0, j1, j2, j3, j4, j5, j6, j7, j8
  27. x9, x10, x11, x12, x13, x14, x15 := j9, j10, j11, j12, j13, j14, j15
  28. for i := 0; i < rounds; i += 2 {
  29. u := x0 + x12
  30. x4 ^= bits.RotateLeft32(u, 7)
  31. u = x4 + x0
  32. x8 ^= bits.RotateLeft32(u, 9)
  33. u = x8 + x4
  34. x12 ^= bits.RotateLeft32(u, 13)
  35. u = x12 + x8
  36. x0 ^= bits.RotateLeft32(u, 18)
  37. u = x5 + x1
  38. x9 ^= bits.RotateLeft32(u, 7)
  39. u = x9 + x5
  40. x13 ^= bits.RotateLeft32(u, 9)
  41. u = x13 + x9
  42. x1 ^= bits.RotateLeft32(u, 13)
  43. u = x1 + x13
  44. x5 ^= bits.RotateLeft32(u, 18)
  45. u = x10 + x6
  46. x14 ^= bits.RotateLeft32(u, 7)
  47. u = x14 + x10
  48. x2 ^= bits.RotateLeft32(u, 9)
  49. u = x2 + x14
  50. x6 ^= bits.RotateLeft32(u, 13)
  51. u = x6 + x2
  52. x10 ^= bits.RotateLeft32(u, 18)
  53. u = x15 + x11
  54. x3 ^= bits.RotateLeft32(u, 7)
  55. u = x3 + x15
  56. x7 ^= bits.RotateLeft32(u, 9)
  57. u = x7 + x3
  58. x11 ^= bits.RotateLeft32(u, 13)
  59. u = x11 + x7
  60. x15 ^= bits.RotateLeft32(u, 18)
  61. u = x0 + x3
  62. x1 ^= bits.RotateLeft32(u, 7)
  63. u = x1 + x0
  64. x2 ^= bits.RotateLeft32(u, 9)
  65. u = x2 + x1
  66. x3 ^= bits.RotateLeft32(u, 13)
  67. u = x3 + x2
  68. x0 ^= bits.RotateLeft32(u, 18)
  69. u = x5 + x4
  70. x6 ^= bits.RotateLeft32(u, 7)
  71. u = x6 + x5
  72. x7 ^= bits.RotateLeft32(u, 9)
  73. u = x7 + x6
  74. x4 ^= bits.RotateLeft32(u, 13)
  75. u = x4 + x7
  76. x5 ^= bits.RotateLeft32(u, 18)
  77. u = x10 + x9
  78. x11 ^= bits.RotateLeft32(u, 7)
  79. u = x11 + x10
  80. x8 ^= bits.RotateLeft32(u, 9)
  81. u = x8 + x11
  82. x9 ^= bits.RotateLeft32(u, 13)
  83. u = x9 + x8
  84. x10 ^= bits.RotateLeft32(u, 18)
  85. u = x15 + x14
  86. x12 ^= bits.RotateLeft32(u, 7)
  87. u = x12 + x15
  88. x13 ^= bits.RotateLeft32(u, 9)
  89. u = x13 + x12
  90. x14 ^= bits.RotateLeft32(u, 13)
  91. u = x14 + x13
  92. x15 ^= bits.RotateLeft32(u, 18)
  93. }
  94. x0 += j0
  95. x1 += j1
  96. x2 += j2
  97. x3 += j3
  98. x4 += j4
  99. x5 += j5
  100. x6 += j6
  101. x7 += j7
  102. x8 += j8
  103. x9 += j9
  104. x10 += j10
  105. x11 += j11
  106. x12 += j12
  107. x13 += j13
  108. x14 += j14
  109. x15 += j15
  110. out[0] = byte(x0)
  111. out[1] = byte(x0 >> 8)
  112. out[2] = byte(x0 >> 16)
  113. out[3] = byte(x0 >> 24)
  114. out[4] = byte(x1)
  115. out[5] = byte(x1 >> 8)
  116. out[6] = byte(x1 >> 16)
  117. out[7] = byte(x1 >> 24)
  118. out[8] = byte(x2)
  119. out[9] = byte(x2 >> 8)
  120. out[10] = byte(x2 >> 16)
  121. out[11] = byte(x2 >> 24)
  122. out[12] = byte(x3)
  123. out[13] = byte(x3 >> 8)
  124. out[14] = byte(x3 >> 16)
  125. out[15] = byte(x3 >> 24)
  126. out[16] = byte(x4)
  127. out[17] = byte(x4 >> 8)
  128. out[18] = byte(x4 >> 16)
  129. out[19] = byte(x4 >> 24)
  130. out[20] = byte(x5)
  131. out[21] = byte(x5 >> 8)
  132. out[22] = byte(x5 >> 16)
  133. out[23] = byte(x5 >> 24)
  134. out[24] = byte(x6)
  135. out[25] = byte(x6 >> 8)
  136. out[26] = byte(x6 >> 16)
  137. out[27] = byte(x6 >> 24)
  138. out[28] = byte(x7)
  139. out[29] = byte(x7 >> 8)
  140. out[30] = byte(x7 >> 16)
  141. out[31] = byte(x7 >> 24)
  142. out[32] = byte(x8)
  143. out[33] = byte(x8 >> 8)
  144. out[34] = byte(x8 >> 16)
  145. out[35] = byte(x8 >> 24)
  146. out[36] = byte(x9)
  147. out[37] = byte(x9 >> 8)
  148. out[38] = byte(x9 >> 16)
  149. out[39] = byte(x9 >> 24)
  150. out[40] = byte(x10)
  151. out[41] = byte(x10 >> 8)
  152. out[42] = byte(x10 >> 16)
  153. out[43] = byte(x10 >> 24)
  154. out[44] = byte(x11)
  155. out[45] = byte(x11 >> 8)
  156. out[46] = byte(x11 >> 16)
  157. out[47] = byte(x11 >> 24)
  158. out[48] = byte(x12)
  159. out[49] = byte(x12 >> 8)
  160. out[50] = byte(x12 >> 16)
  161. out[51] = byte(x12 >> 24)
  162. out[52] = byte(x13)
  163. out[53] = byte(x13 >> 8)
  164. out[54] = byte(x13 >> 16)
  165. out[55] = byte(x13 >> 24)
  166. out[56] = byte(x14)
  167. out[57] = byte(x14 >> 8)
  168. out[58] = byte(x14 >> 16)
  169. out[59] = byte(x14 >> 24)
  170. out[60] = byte(x15)
  171. out[61] = byte(x15 >> 8)
  172. out[62] = byte(x15 >> 16)
  173. out[63] = byte(x15 >> 24)
  174. }
  175. // genericXORKeyStream is the generic implementation of XORKeyStream to be used
  176. // when no assembly implementation is available.
  177. func genericXORKeyStream(out, in []byte, counter *[16]byte, key *[32]byte) {
  178. var block [64]byte
  179. var counterCopy [16]byte
  180. copy(counterCopy[:], counter[:])
  181. for len(in) >= 64 {
  182. core(&block, &counterCopy, key, &Sigma)
  183. for i, x := range block {
  184. out[i] = in[i] ^ x
  185. }
  186. u := uint32(1)
  187. for i := 8; i < 16; i++ {
  188. u += uint32(counterCopy[i])
  189. counterCopy[i] = byte(u)
  190. u >>= 8
  191. }
  192. in = in[64:]
  193. out = out[64:]
  194. }
  195. if len(in) > 0 {
  196. core(&block, &counterCopy, key, &Sigma)
  197. for i, v := range in {
  198. out[i] = v ^ block[i]
  199. }
  200. }
  201. }