1//go:build !noasm && gc && arm64 && !amd64
2
3#include "textflag.h"
4
5// License information for the original SHA1 arm64 implemention:
6// Copyright 2017 The Go Authors. All rights reserved.
7// Use of this source code is governed by a BSD-style
8// license that can be found at:
9// - https://github.com/golang/go/blob/master/LICENSE
10//
11// Reference implementations:
12// - https://github.com/noloader/SHA-Intrinsics/blob/master/sha1-arm.c
13// - https://github.com/golang/go/blob/master/src/crypto/sha1/sha1block_arm64.s
14
15#define HASHUPDATECHOOSE \
16 SHA1C V16.S4, V1, V2 \
17 SHA1H V3, V1 \
18 VMOV V2.B16, V3.B16
19
20#define HASHUPDATEPARITY \
21 SHA1P V16.S4, V1, V2 \
22 SHA1H V3, V1 \
23 VMOV V2.B16, V3.B16
24
25#define HASHUPDATEMAJ \
26 SHA1M V16.S4, V1, V2 \
27 SHA1H V3, V1 \
28 VMOV V2.B16, V3.B16
29
30// func blockARM64(h []uint32, p []byte, m1 []uint32, cs [][5]uint32)
31TEXT ·blockARM64(SB), NOSPLIT, $80-96
32 MOVD h_base+0(FP), R0
33 MOVD p_base+24(FP), R1
34 MOVD p_len+32(FP), R2
35 MOVD m1_base+48(FP), R3
36 MOVD cs_base+72(FP), R4
37
38 LSR $6, R2, R2
39 LSL $6, R2, R2
40 ADD R16, R2, R21
41
42 VLD1.P 16(R0), [V0.S4]
43 FMOVS (R0), F20
44 SUB $16, R0, R0
45
46loop:
47 CMP R16, R21
48 BLS end
49
50 // Load block (p) into 16-bytes vectors.
51 VLD1.P 16(R1), [V4.B16]
52 VLD1.P 16(R1), [V5.B16]
53 VLD1.P 16(R1), [V6.B16]
54 VLD1.P 16(R1), [V7.B16]
55
56 // Load K constants to V19
57 MOVD $·sha1Ks(SB), R22
58 VLD1 (R22), [V19.S4]
59
60 VMOV V0.B16, V2.B16
61 VMOV V20.S[0], V1
62 VMOV V2.B16, V3.B16
63 VDUP V19.S[0], V17.S4
64
65 // Little Endian
66 VREV32 V4.B16, V4.B16
67 VREV32 V5.B16, V5.B16
68 VREV32 V6.B16, V6.B16
69 VREV32 V7.B16, V7.B16
70
71 // LOAD M1 rounds 0-15
72 VST1.P [V4.S4], (R3)
73 VST1.P [V5.S4], (R3)
74 VST1.P [V6.S4], (R3)
75 VST1.P [V7.S4], (R3)
76
77 // LOAD CS 0
78 VST1.P [V0.S4], (R4) // ABCD pre-round 0
79 VST1.P V1.S[0], 4(R4) // E pre-round 0
80
81 // Rounds 0-3
82 VDUP V19.S[1], V18.S4
83 VADD V17.S4, V4.S4, V16.S4
84 SHA1SU0 V6.S4, V5.S4, V4.S4
85 HASHUPDATECHOOSE
86 SHA1SU1 V7.S4, V4.S4
87
88 // Rounds 4-7
89 VADD V17.S4, V5.S4, V16.S4
90 SHA1SU0 V7.S4, V6.S4, V5.S4
91 HASHUPDATECHOOSE
92 SHA1SU1 V4.S4, V5.S4
93 // LOAD M1 rounds 16-19
94 VST1.P [V4.S4], (R3)
95
96 // Rounds 8-11
97 VADD V17.S4, V6.S4, V16.S4
98 SHA1SU0 V4.S4, V7.S4, V6.S4
99 HASHUPDATECHOOSE
100 SHA1SU1 V5.S4, V6.S4
101 // LOAD M1 rounds 20-23
102 VST1.P [V5.S4], (R3)
103
104 // Rounds 12-15
105 VADD V17.S4, V7.S4, V16.S4
106 SHA1SU0 V5.S4, V4.S4, V7.S4
107 HASHUPDATECHOOSE
108 SHA1SU1 V6.S4, V7.S4
109 // LOAD M1 rounds 24-27
110 VST1.P [V6.S4], (R3)
111
112 // Rounds 16-19
113 VADD V17.S4, V4.S4, V16.S4
114 SHA1SU0 V6.S4, V5.S4, V4.S4
115 HASHUPDATECHOOSE
116 SHA1SU1 V7.S4, V4.S4
117 // LOAD M1 rounds 28-31
118 VST1.P [V7.S4], (R3)
119
120 // Rounds 20-23
121 VDUP V19.S[2], V17.S4
122 VADD V18.S4, V5.S4, V16.S4
123 SHA1SU0 V7.S4, V6.S4, V5.S4
124 HASHUPDATEPARITY
125 SHA1SU1 V4.S4, V5.S4
126 // LOAD M1 rounds 32-35
127 VST1.P [V4.S4], (R3)
128
129 // Rounds 24-27
130 VADD V18.S4, V6.S4, V16.S4
131 SHA1SU0 V4.S4, V7.S4, V6.S4
132 HASHUPDATEPARITY
133 SHA1SU1 V5.S4, V6.S4
134 // LOAD M1 rounds 36-39
135 VST1.P [V5.S4], (R3)
136
137 // Rounds 28-31
138 VADD V18.S4, V7.S4, V16.S4
139 SHA1SU0 V5.S4, V4.S4, V7.S4
140 HASHUPDATEPARITY
141 SHA1SU1 V6.S4, V7.S4
142 // LOAD M1 rounds 40-43
143 VST1.P [V6.S4], (R3)
144
145 // Rounds 32-35
146 VADD V18.S4, V4.S4, V16.S4
147 SHA1SU0 V6.S4, V5.S4, V4.S4
148 HASHUPDATEPARITY
149 SHA1SU1 V7.S4, V4.S4
150 // LOAD M1 rounds 44-47
151 VST1.P [V7.S4], (R3)
152
153 // Rounds 36-39
154 VADD V18.S4, V5.S4, V16.S4
155 SHA1SU0 V7.S4, V6.S4, V5.S4
156 HASHUPDATEPARITY
157 SHA1SU1 V4.S4, V5.S4
158 // LOAD M1 rounds 48-51
159 VST1.P [V4.S4], (R3)
160
161 // Rounds 44-47
162 VDUP V19.S[3], V18.S4
163 VADD V17.S4, V6.S4, V16.S4
164 SHA1SU0 V4.S4, V7.S4, V6.S4
165 HASHUPDATEMAJ
166 SHA1SU1 V5.S4, V6.S4
167 // LOAD M1 rounds 52-55
168 VST1.P [V5.S4], (R3)
169
170 // Rounds 44-47
171 VADD V17.S4, V7.S4, V16.S4
172 SHA1SU0 V5.S4, V4.S4, V7.S4
173 HASHUPDATEMAJ
174 SHA1SU1 V6.S4, V7.S4
175 // LOAD M1 rounds 56-59
176 VST1.P [V6.S4], (R3)
177
178 // Rounds 48-51
179 VADD V17.S4, V4.S4, V16.S4
180 SHA1SU0 V6.S4, V5.S4, V4.S4
181 HASHUPDATEMAJ
182 SHA1SU1 V7.S4, V4.S4
183 // LOAD M1 rounds 60-63
184 VST1.P [V7.S4], (R3)
185
186 // Rounds 52-55
187 VADD V17.S4, V5.S4, V16.S4
188 SHA1SU0 V7.S4, V6.S4, V5.S4
189 HASHUPDATEMAJ
190 SHA1SU1 V4.S4, V5.S4
191
192 // LOAD CS 58
193 VST1.P [V3.S4], (R4) // ABCD pre-round 56
194 VST1.P V1.S[0], 4(R4) // E pre-round 56
195
196 // Rounds 56-59
197 VADD V17.S4, V6.S4, V16.S4
198 SHA1SU0 V4.S4, V7.S4, V6.S4
199 HASHUPDATEMAJ
200 SHA1SU1 V5.S4, V6.S4
201
202 // Rounds 60-63
203 VADD V18.S4, V7.S4, V16.S4
204 SHA1SU0 V5.S4, V4.S4, V7.S4
205 HASHUPDATEPARITY
206 SHA1SU1 V6.S4, V7.S4
207
208 // LOAD CS 65
209 VST1.P [V3.S4], (R4) // ABCD pre-round 64
210 VST1.P V1.S[0], 4(R4) // E pre-round 64
211
212 // Rounds 64-67
213 VADD V18.S4, V4.S4, V16.S4
214 HASHUPDATEPARITY
215
216 // LOAD M1 rounds 68-79
217 VST1.P [V4.S4], (R3)
218 VST1.P [V5.S4], (R3)
219 VST1.P [V6.S4], (R3)
220 VST1.P [V7.S4], (R3)
221
222 // Rounds 68-71
223 VADD V18.S4, V5.S4, V16.S4
224 HASHUPDATEPARITY
225
226 // Rounds 72-75
227 VADD V18.S4, V6.S4, V16.S4
228 HASHUPDATEPARITY
229
230 // Rounds 76-79
231 VADD V18.S4, V7.S4, V16.S4
232 HASHUPDATEPARITY
233
234 // Add working registers to hash state.
235 VADD V2.S4, V0.S4, V0.S4
236 VADD V1.S4, V20.S4, V20.S4
237
238end:
239 // Update h with final hash values.
240 VST1.P [V0.S4], (R0)
241 FMOVS F20, (R0)
242
243 RET
244
245DATA ·sha1Ks+0(SB)/4, $0x5A827999 // K0
246DATA ·sha1Ks+4(SB)/4, $0x6ED9EBA1 // K1
247DATA ·sha1Ks+8(SB)/4, $0x8F1BBCDC // K2
248DATA ·sha1Ks+12(SB)/4, $0xCA62C1D6 // K3
249GLOBL ·sha1Ks(SB), RODATA, $16