1// Copyright 2017 The Go Authors. All rights reserved.
2// Use of this source code is governed by a BSD-style
3// license that can be found in the LICENSE file.
4
5// Package argon2 implements the key derivation function Argon2.
6// Argon2 was selected as the winner of the Password Hashing Competition and can
7// be used to derive cryptographic keys from passwords.
8//
9// For a detailed specification of Argon2 see [argon2-specs.pdf].
10//
11// If you aren't sure which function you need, use Argon2id (IDKey) and
12// the parameter recommendations for your scenario.
13//
14// # Argon2i
15//
16// Argon2i (implemented by Key) is the side-channel resistant version of Argon2.
17// It uses data-independent memory access, which is preferred for password
18// hashing and password-based key derivation. Argon2i requires more passes over
19// memory than Argon2id to protect from trade-off attacks. The recommended
20// parameters (taken from [RFC 9106 Section 7.3]) for non-interactive operations are time=3 and to
21// use the maximum available memory.
22//
23// # Argon2id
24//
25// Argon2id (implemented by IDKey) is a hybrid version of Argon2 combining
26// Argon2i and Argon2d. It uses data-independent memory access for the first
27// half of the first iteration over the memory and data-dependent memory access
28// for the rest. Argon2id is side-channel resistant and provides better brute-
29// force cost savings due to time-memory tradeoffs than Argon2i. The recommended
30// parameters for non-interactive operations (taken from [RFC 9106 Section 7.3]) are time=1 and to
31// use the maximum available memory.
32//
33// [argon2-specs.pdf]: https://github.com/P-H-C/phc-winner-argon2/blob/master/argon2-specs.pdf
34// [RFC 9106 Section 7.3]: https://www.rfc-editor.org/rfc/rfc9106.html#section-7.3
35package argon2
36
37import (
38 "encoding/binary"
39 "sync"
40
41 "golang.org/x/crypto/blake2b"
42)
43
44// The Argon2 version implemented by this package.
45const Version = 0x13
46
47const (
48 argon2d = iota
49 argon2i
50 argon2id
51)
52
53// Key derives a key from the password, salt, and cost parameters using Argon2i
54// returning a byte slice of length keyLen that can be used as cryptographic
55// key. The CPU cost and parallelism degree must be greater than zero.
56//
57// For example, you can get a derived key for e.g. AES-256 (which needs a
58// 32-byte key) by doing:
59//
60// key := argon2.Key([]byte("some password"), salt, 3, 32*1024, 4, 32)
61//
62// [RFC 9106 Section 7.3] recommends time=3, and memory=32*1024 as a sensible number.
63// If using that amount of memory (32 MB) is not possible in some contexts then
64// the time parameter can be increased to compensate.
65//
66// The time parameter specifies the number of passes over the memory and the
67// memory parameter specifies the size of the memory in KiB. For example
68// memory=32*1024 sets the memory cost to ~32 MB. The number of threads can be
69// adjusted to the number of available CPUs. The cost parameters should be
70// increased as memory latency and CPU parallelism increases. Remember to get a
71// good random salt.
72//
73// [RFC 9106 Section 7.3]: https://www.rfc-editor.org/rfc/rfc9106.html#section-7.3
74func Key(password, salt []byte, time, memory uint32, threads uint8, keyLen uint32) []byte {
75 return deriveKey(argon2i, password, salt, nil, nil, time, memory, threads, keyLen)
76}
77
78// IDKey derives a key from the password, salt, and cost parameters using
79// Argon2id returning a byte slice of length keyLen that can be used as
80// cryptographic key. The CPU cost and parallelism degree must be greater than
81// zero.
82//
83// For example, you can get a derived key for e.g. AES-256 (which needs a
84// 32-byte key) by doing:
85//
86// key := argon2.IDKey([]byte("some password"), salt, 1, 64*1024, 4, 32)
87//
88// [RFC 9106 Section 7.3] recommends time=1, and memory=64*1024 as a sensible number.
89// If using that amount of memory (64 MB) is not possible in some contexts then
90// the time parameter can be increased to compensate.
91//
92// The time parameter specifies the number of passes over the memory and the
93// memory parameter specifies the size of the memory in KiB. For example
94// memory=64*1024 sets the memory cost to ~64 MB. The number of threads can be
95// adjusted to the numbers of available CPUs. The cost parameters should be
96// increased as memory latency and CPU parallelism increases. Remember to get a
97// good random salt.
98//
99// [RFC 9106 Section 7.3]: https://www.rfc-editor.org/rfc/rfc9106.html#section-7.3
100func IDKey(password, salt []byte, time, memory uint32, threads uint8, keyLen uint32) []byte {
101 return deriveKey(argon2id, password, salt, nil, nil, time, memory, threads, keyLen)
102}
103
104func deriveKey(mode int, password, salt, secret, data []byte, time, memory uint32, threads uint8, keyLen uint32) []byte {
105 if time < 1 {
106 panic("argon2: number of rounds too small")
107 }
108 if threads < 1 {
109 panic("argon2: parallelism degree too low")
110 }
111 h0 := initHash(password, salt, secret, data, time, memory, uint32(threads), keyLen, mode)
112
113 memory = memory / (syncPoints * uint32(threads)) * (syncPoints * uint32(threads))
114 if memory < 2*syncPoints*uint32(threads) {
115 memory = 2 * syncPoints * uint32(threads)
116 }
117 B := initBlocks(&h0, memory, uint32(threads))
118 processBlocks(B, time, memory, uint32(threads), mode)
119 return extractKey(B, memory, uint32(threads), keyLen)
120}
121
122const (
123 blockLength = 128
124 syncPoints = 4
125)
126
127type block [blockLength]uint64
128
129func initHash(password, salt, key, data []byte, time, memory, threads, keyLen uint32, mode int) [blake2b.Size + 8]byte {
130 var (
131 h0 [blake2b.Size + 8]byte
132 params [24]byte
133 tmp [4]byte
134 )
135
136 b2, _ := blake2b.New512(nil)
137 binary.LittleEndian.PutUint32(params[0:4], threads)
138 binary.LittleEndian.PutUint32(params[4:8], keyLen)
139 binary.LittleEndian.PutUint32(params[8:12], memory)
140 binary.LittleEndian.PutUint32(params[12:16], time)
141 binary.LittleEndian.PutUint32(params[16:20], uint32(Version))
142 binary.LittleEndian.PutUint32(params[20:24], uint32(mode))
143 b2.Write(params[:])
144 binary.LittleEndian.PutUint32(tmp[:], uint32(len(password)))
145 b2.Write(tmp[:])
146 b2.Write(password)
147 binary.LittleEndian.PutUint32(tmp[:], uint32(len(salt)))
148 b2.Write(tmp[:])
149 b2.Write(salt)
150 binary.LittleEndian.PutUint32(tmp[:], uint32(len(key)))
151 b2.Write(tmp[:])
152 b2.Write(key)
153 binary.LittleEndian.PutUint32(tmp[:], uint32(len(data)))
154 b2.Write(tmp[:])
155 b2.Write(data)
156 b2.Sum(h0[:0])
157 return h0
158}
159
160func initBlocks(h0 *[blake2b.Size + 8]byte, memory, threads uint32) []block {
161 var block0 [1024]byte
162 B := make([]block, memory)
163 for lane := uint32(0); lane < threads; lane++ {
164 j := lane * (memory / threads)
165 binary.LittleEndian.PutUint32(h0[blake2b.Size+4:], lane)
166
167 binary.LittleEndian.PutUint32(h0[blake2b.Size:], 0)
168 blake2bHash(block0[:], h0[:])
169 for i := range B[j+0] {
170 B[j+0][i] = binary.LittleEndian.Uint64(block0[i*8:])
171 }
172
173 binary.LittleEndian.PutUint32(h0[blake2b.Size:], 1)
174 blake2bHash(block0[:], h0[:])
175 for i := range B[j+1] {
176 B[j+1][i] = binary.LittleEndian.Uint64(block0[i*8:])
177 }
178 }
179 return B
180}
181
182func processBlocks(B []block, time, memory, threads uint32, mode int) {
183 lanes := memory / threads
184 segments := lanes / syncPoints
185
186 processSegment := func(n, slice, lane uint32, wg *sync.WaitGroup) {
187 var addresses, in, zero block
188 if mode == argon2i || (mode == argon2id && n == 0 && slice < syncPoints/2) {
189 in[0] = uint64(n)
190 in[1] = uint64(lane)
191 in[2] = uint64(slice)
192 in[3] = uint64(memory)
193 in[4] = uint64(time)
194 in[5] = uint64(mode)
195 }
196
197 index := uint32(0)
198 if n == 0 && slice == 0 {
199 index = 2 // we have already generated the first two blocks
200 if mode == argon2i || mode == argon2id {
201 in[6]++
202 processBlock(&addresses, &in, &zero)
203 processBlock(&addresses, &addresses, &zero)
204 }
205 }
206
207 offset := lane*lanes + slice*segments + index
208 var random uint64
209 for index < segments {
210 prev := offset - 1
211 if index == 0 && slice == 0 {
212 prev += lanes // last block in lane
213 }
214 if mode == argon2i || (mode == argon2id && n == 0 && slice < syncPoints/2) {
215 if index%blockLength == 0 {
216 in[6]++
217 processBlock(&addresses, &in, &zero)
218 processBlock(&addresses, &addresses, &zero)
219 }
220 random = addresses[index%blockLength]
221 } else {
222 random = B[prev][0]
223 }
224 newOffset := indexAlpha(random, lanes, segments, threads, n, slice, lane, index)
225 processBlockXOR(&B[offset], &B[prev], &B[newOffset])
226 index, offset = index+1, offset+1
227 }
228 wg.Done()
229 }
230
231 for n := uint32(0); n < time; n++ {
232 for slice := uint32(0); slice < syncPoints; slice++ {
233 var wg sync.WaitGroup
234 for lane := uint32(0); lane < threads; lane++ {
235 wg.Add(1)
236 go processSegment(n, slice, lane, &wg)
237 }
238 wg.Wait()
239 }
240 }
241
242}
243
244func extractKey(B []block, memory, threads, keyLen uint32) []byte {
245 lanes := memory / threads
246 for lane := uint32(0); lane < threads-1; lane++ {
247 for i, v := range B[(lane*lanes)+lanes-1] {
248 B[memory-1][i] ^= v
249 }
250 }
251
252 var block [1024]byte
253 for i, v := range B[memory-1] {
254 binary.LittleEndian.PutUint64(block[i*8:], v)
255 }
256 key := make([]byte, keyLen)
257 blake2bHash(key, block[:])
258 return key
259}
260
261func indexAlpha(rand uint64, lanes, segments, threads, n, slice, lane, index uint32) uint32 {
262 refLane := uint32(rand>>32) % threads
263 if n == 0 && slice == 0 {
264 refLane = lane
265 }
266 m, s := 3*segments, ((slice+1)%syncPoints)*segments
267 if lane == refLane {
268 m += index
269 }
270 if n == 0 {
271 m, s = slice*segments, 0
272 if slice == 0 || lane == refLane {
273 m += index
274 }
275 }
276 if index == 0 || lane == refLane {
277 m--
278 }
279 return phi(rand, uint64(m), uint64(s), refLane, lanes)
280}
281
282func phi(rand, m, s uint64, lane, lanes uint32) uint32 {
283 p := rand & 0xFFFFFFFF
284 p = (p * p) >> 32
285 p = (p * m) >> 32
286 return lane*lanes + uint32((s+m-(p+1))%uint64(lanes))
287}