1//go:build !noasm && gc && arm64 && !amd64
  2
  3#include "textflag.h"
  4
  5// License information for the original SHA1 arm64 implemention:
  6// Copyright 2017 The Go Authors. All rights reserved.
  7// Use of this source code is governed by a BSD-style
  8// license that can be found at:
  9// 	- https://github.com/golang/go/blob/master/LICENSE
 10//
 11// Reference implementations:
 12// 	- https://github.com/noloader/SHA-Intrinsics/blob/master/sha1-arm.c
 13// 	- https://github.com/golang/go/blob/master/src/crypto/sha1/sha1block_arm64.s
 14
 15#define HASHUPDATECHOOSE \
 16	SHA1C	V16.S4, V1, V2 \
 17	SHA1H	V3, V1 \
 18	VMOV	V2.B16, V3.B16
 19
 20#define HASHUPDATEPARITY \
 21	SHA1P	V16.S4, V1, V2 \
 22	SHA1H	V3, V1 \
 23	VMOV	V2.B16, V3.B16
 24
 25#define HASHUPDATEMAJ \
 26	SHA1M	V16.S4, V1, V2 \
 27	SHA1H	V3, V1 \
 28	VMOV	V2.B16, V3.B16
 29
 30// func blockARM64(h []uint32, p []byte, m1 []uint32, cs [][5]uint32)
 31TEXT ·blockARM64(SB), NOSPLIT, $80-96
 32	MOVD	h_base+0(FP), R0
 33	MOVD	p_base+24(FP), R1
 34	MOVD	p_len+32(FP), R2
 35	MOVD	m1_base+48(FP), R3
 36	MOVD	cs_base+72(FP), R4
 37
 38    LSR     $6, R2, R2
 39    LSL     $6, R2, R2
 40    ADD     R16, R2, R21
 41
 42	VLD1.P	16(R0), [V0.S4]
 43	FMOVS	(R0), F20
 44	SUB	$16, R0, R0
 45
 46loop:
 47	CMP     R16, R21
 48	BLS     end
 49
 50	// Load block (p) into 16-bytes vectors.
 51	VLD1.P	16(R1), [V4.B16]
 52	VLD1.P	16(R1), [V5.B16]
 53	VLD1.P	16(R1), [V6.B16]
 54	VLD1.P	16(R1), [V7.B16]
 55	
 56	// Load K constants to V19
 57	MOVD  $·sha1Ks(SB), R22
 58	VLD1  (R22), [V19.S4]
 59                              
 60	VMOV	V0.B16, V2.B16
 61	VMOV	V20.S[0], V1
 62	VMOV	V2.B16, V3.B16
 63	VDUP	V19.S[0], V17.S4
 64	
 65	// Little Endian
 66	VREV32	V4.B16, V4.B16
 67	VREV32	V5.B16, V5.B16
 68	VREV32	V6.B16, V6.B16
 69	VREV32	V7.B16, V7.B16
 70	
 71	// LOAD M1 rounds 0-15
 72	VST1.P    [V4.S4], (R3)
 73	VST1.P    [V5.S4], (R3)
 74	VST1.P    [V6.S4], (R3)
 75	VST1.P    [V7.S4], (R3)
 76
 77	// LOAD CS 0
 78    VST1.P    [V0.S4], (R4)  // ABCD pre-round 0
 79	VST1.P    V1.S[0], 4(R4) // E pre-round 0
 80
 81	// Rounds 0-3
 82	VDUP	V19.S[1], V18.S4
 83	VADD	V17.S4, V4.S4, V16.S4
 84	SHA1SU0	V6.S4, V5.S4, V4.S4
 85	HASHUPDATECHOOSE
 86	SHA1SU1	V7.S4, V4.S4
 87
 88	// Rounds 4-7
 89	VADD	V17.S4, V5.S4, V16.S4
 90	SHA1SU0	V7.S4, V6.S4, V5.S4
 91	HASHUPDATECHOOSE
 92	SHA1SU1	V4.S4, V5.S4
 93	// LOAD M1 rounds 16-19
 94	VST1.P    [V4.S4], (R3)
 95
 96	// Rounds 8-11
 97	VADD	V17.S4, V6.S4, V16.S4
 98	SHA1SU0	V4.S4, V7.S4, V6.S4
 99	HASHUPDATECHOOSE
100	SHA1SU1	V5.S4, V6.S4
101	// LOAD M1 rounds 20-23
102	VST1.P    [V5.S4], (R3)
103
104	// Rounds 12-15
105	VADD	V17.S4, V7.S4, V16.S4
106	SHA1SU0	V5.S4, V4.S4, V7.S4
107	HASHUPDATECHOOSE
108	SHA1SU1	V6.S4, V7.S4
109	// LOAD M1 rounds 24-27
110	VST1.P    [V6.S4], (R3)
111
112	// Rounds 16-19
113	VADD	V17.S4, V4.S4, V16.S4
114	SHA1SU0	V6.S4, V5.S4, V4.S4
115	HASHUPDATECHOOSE
116	SHA1SU1	V7.S4, V4.S4
117	// LOAD M1 rounds 28-31
118	VST1.P    [V7.S4], (R3)
119
120	// Rounds 20-23
121	VDUP	V19.S[2], V17.S4
122	VADD	V18.S4, V5.S4, V16.S4
123	SHA1SU0	V7.S4, V6.S4, V5.S4
124	HASHUPDATEPARITY
125	SHA1SU1	V4.S4, V5.S4
126	// LOAD M1 rounds 32-35
127	VST1.P    [V4.S4], (R3)
128
129	// Rounds 24-27
130	VADD	V18.S4, V6.S4, V16.S4
131	SHA1SU0	V4.S4, V7.S4, V6.S4
132	HASHUPDATEPARITY
133	SHA1SU1	V5.S4, V6.S4
134	// LOAD M1 rounds 36-39
135	VST1.P    [V5.S4], (R3)
136
137	// Rounds 28-31
138	VADD	V18.S4, V7.S4, V16.S4
139	SHA1SU0	V5.S4, V4.S4, V7.S4
140	HASHUPDATEPARITY
141	SHA1SU1	V6.S4, V7.S4
142	// LOAD M1 rounds 40-43
143	VST1.P    [V6.S4], (R3)
144
145	// Rounds 32-35
146	VADD	V18.S4, V4.S4, V16.S4
147	SHA1SU0	V6.S4, V5.S4, V4.S4
148	HASHUPDATEPARITY
149	SHA1SU1	V7.S4, V4.S4
150	// LOAD M1 rounds 44-47
151	VST1.P    [V7.S4], (R3)
152
153	// Rounds 36-39
154	VADD	V18.S4, V5.S4, V16.S4
155	SHA1SU0	V7.S4, V6.S4, V5.S4
156	HASHUPDATEPARITY
157	SHA1SU1	V4.S4, V5.S4
158	// LOAD M1 rounds 48-51
159	VST1.P    [V4.S4], (R3)
160
161	// Rounds 44-47
162	VDUP	V19.S[3], V18.S4
163	VADD	V17.S4, V6.S4, V16.S4
164	SHA1SU0	V4.S4, V7.S4, V6.S4
165	HASHUPDATEMAJ
166	SHA1SU1	V5.S4, V6.S4
167	// LOAD M1 rounds 52-55
168	VST1.P    [V5.S4], (R3)
169
170	// Rounds 44-47
171	VADD	V17.S4, V7.S4, V16.S4
172	SHA1SU0	V5.S4, V4.S4, V7.S4
173	HASHUPDATEMAJ
174	SHA1SU1	V6.S4, V7.S4
175	// LOAD M1 rounds 56-59
176	VST1.P    [V6.S4], (R3)
177
178	// Rounds 48-51
179	VADD	V17.S4, V4.S4, V16.S4
180	SHA1SU0	V6.S4, V5.S4, V4.S4
181	HASHUPDATEMAJ
182	SHA1SU1	V7.S4, V4.S4
183	// LOAD M1 rounds 60-63
184	VST1.P    [V7.S4], (R3)
185	
186	// Rounds 52-55
187	VADD	V17.S4, V5.S4, V16.S4
188	SHA1SU0	V7.S4, V6.S4, V5.S4
189	HASHUPDATEMAJ
190	SHA1SU1	V4.S4, V5.S4
191
192	// LOAD CS 58
193    VST1.P    [V3.S4], (R4)  // ABCD pre-round 56
194	VST1.P    V1.S[0], 4(R4) // E pre-round 56
195
196	// Rounds 56-59
197	VADD	V17.S4, V6.S4, V16.S4
198	SHA1SU0	V4.S4, V7.S4, V6.S4
199	HASHUPDATEMAJ
200	SHA1SU1	V5.S4, V6.S4
201
202	// Rounds 60-63
203	VADD	V18.S4, V7.S4, V16.S4
204	SHA1SU0	V5.S4, V4.S4, V7.S4
205	HASHUPDATEPARITY
206	SHA1SU1	V6.S4, V7.S4
207
208	// LOAD CS 65
209    VST1.P    [V3.S4], (R4)  // ABCD pre-round 64
210	VST1.P    V1.S[0], 4(R4) // E pre-round 64
211
212	// Rounds 64-67
213	VADD	V18.S4, V4.S4, V16.S4
214	HASHUPDATEPARITY
215
216	// LOAD M1 rounds 68-79
217	VST1.P    [V4.S4], (R3)
218	VST1.P    [V5.S4], (R3)
219	VST1.P    [V6.S4], (R3)
220	VST1.P    [V7.S4], (R3)
221
222	// Rounds 68-71
223	VADD	V18.S4, V5.S4, V16.S4
224	HASHUPDATEPARITY
225
226	// Rounds 72-75
227	VADD	V18.S4, V6.S4, V16.S4
228	HASHUPDATEPARITY
229
230	// Rounds 76-79
231	VADD	V18.S4, V7.S4, V16.S4
232	HASHUPDATEPARITY
233
234	// Add working registers to hash state.
235	VADD	V2.S4, V0.S4, V0.S4
236	VADD	V1.S4, V20.S4, V20.S4
237
238end:
239	// Update h with final hash values.
240	VST1.P	[V0.S4], (R0)
241	FMOVS	F20, (R0)
242	
243	RET
244
245DATA ·sha1Ks+0(SB)/4,  $0x5A827999 // K0
246DATA ·sha1Ks+4(SB)/4,  $0x6ED9EBA1 // K1
247DATA ·sha1Ks+8(SB)/4,  $0x8F1BBCDC // K2
248DATA ·sha1Ks+12(SB)/4, $0xCA62C1D6 // K3
249GLOBL ·sha1Ks(SB), RODATA, $16