1// Copyright 2017 The Go Authors. All rights reserved.
  2// Use of this source code is governed by a BSD-style
  3// license that can be found in the LICENSE file.
  4
  5// Package argon2 implements the key derivation function Argon2.
  6// Argon2 was selected as the winner of the Password Hashing Competition and can
  7// be used to derive cryptographic keys from passwords.
  8//
  9// For a detailed specification of Argon2 see [argon2-specs.pdf].
 10//
 11// If you aren't sure which function you need, use Argon2id (IDKey) and
 12// the parameter recommendations for your scenario.
 13//
 14// # Argon2i
 15//
 16// Argon2i (implemented by Key) is the side-channel resistant version of Argon2.
 17// It uses data-independent memory access, which is preferred for password
 18// hashing and password-based key derivation. Argon2i requires more passes over
 19// memory than Argon2id to protect from trade-off attacks. The recommended
 20// parameters (taken from [RFC 9106 Section 7.3]) for non-interactive operations are time=3 and to
 21// use the maximum available memory.
 22//
 23// # Argon2id
 24//
 25// Argon2id (implemented by IDKey) is a hybrid version of Argon2 combining
 26// Argon2i and Argon2d. It uses data-independent memory access for the first
 27// half of the first iteration over the memory and data-dependent memory access
 28// for the rest. Argon2id is side-channel resistant and provides better brute-
 29// force cost savings due to time-memory tradeoffs than Argon2i. The recommended
 30// parameters for non-interactive operations (taken from [RFC 9106 Section 7.3]) are time=1 and to
 31// use the maximum available memory.
 32//
 33// [argon2-specs.pdf]: https://github.com/P-H-C/phc-winner-argon2/blob/master/argon2-specs.pdf
 34// [RFC 9106 Section 7.3]: https://www.rfc-editor.org/rfc/rfc9106.html#section-7.3
 35package argon2
 36
 37import (
 38	"encoding/binary"
 39	"sync"
 40
 41	"golang.org/x/crypto/blake2b"
 42)
 43
 44// The Argon2 version implemented by this package.
 45const Version = 0x13
 46
 47const (
 48	argon2d = iota
 49	argon2i
 50	argon2id
 51)
 52
 53// Key derives a key from the password, salt, and cost parameters using Argon2i
 54// returning a byte slice of length keyLen that can be used as cryptographic
 55// key. The CPU cost and parallelism degree must be greater than zero.
 56//
 57// For example, you can get a derived key for e.g. AES-256 (which needs a
 58// 32-byte key) by doing:
 59//
 60//	key := argon2.Key([]byte("some password"), salt, 3, 32*1024, 4, 32)
 61//
 62// [RFC 9106 Section 7.3] recommends time=3, and memory=32*1024 as a sensible number.
 63// If using that amount of memory (32 MB) is not possible in some contexts then
 64// the time parameter can be increased to compensate.
 65//
 66// The time parameter specifies the number of passes over the memory and the
 67// memory parameter specifies the size of the memory in KiB. For example
 68// memory=32*1024 sets the memory cost to ~32 MB. The number of threads can be
 69// adjusted to the number of available CPUs. The cost parameters should be
 70// increased as memory latency and CPU parallelism increases. Remember to get a
 71// good random salt.
 72//
 73// [RFC 9106 Section 7.3]: https://www.rfc-editor.org/rfc/rfc9106.html#section-7.3
 74func Key(password, salt []byte, time, memory uint32, threads uint8, keyLen uint32) []byte {
 75	return deriveKey(argon2i, password, salt, nil, nil, time, memory, threads, keyLen)
 76}
 77
 78// IDKey derives a key from the password, salt, and cost parameters using
 79// Argon2id returning a byte slice of length keyLen that can be used as
 80// cryptographic key. The CPU cost and parallelism degree must be greater than
 81// zero.
 82//
 83// For example, you can get a derived key for e.g. AES-256 (which needs a
 84// 32-byte key) by doing:
 85//
 86//	key := argon2.IDKey([]byte("some password"), salt, 1, 64*1024, 4, 32)
 87//
 88// [RFC 9106 Section 7.3] recommends time=1, and memory=64*1024 as a sensible number.
 89// If using that amount of memory (64 MB) is not possible in some contexts then
 90// the time parameter can be increased to compensate.
 91//
 92// The time parameter specifies the number of passes over the memory and the
 93// memory parameter specifies the size of the memory in KiB. For example
 94// memory=64*1024 sets the memory cost to ~64 MB. The number of threads can be
 95// adjusted to the numbers of available CPUs. The cost parameters should be
 96// increased as memory latency and CPU parallelism increases. Remember to get a
 97// good random salt.
 98//
 99// [RFC 9106 Section 7.3]: https://www.rfc-editor.org/rfc/rfc9106.html#section-7.3
100func IDKey(password, salt []byte, time, memory uint32, threads uint8, keyLen uint32) []byte {
101	return deriveKey(argon2id, password, salt, nil, nil, time, memory, threads, keyLen)
102}
103
104func deriveKey(mode int, password, salt, secret, data []byte, time, memory uint32, threads uint8, keyLen uint32) []byte {
105	if time < 1 {
106		panic("argon2: number of rounds too small")
107	}
108	if threads < 1 {
109		panic("argon2: parallelism degree too low")
110	}
111	h0 := initHash(password, salt, secret, data, time, memory, uint32(threads), keyLen, mode)
112
113	memory = memory / (syncPoints * uint32(threads)) * (syncPoints * uint32(threads))
114	if memory < 2*syncPoints*uint32(threads) {
115		memory = 2 * syncPoints * uint32(threads)
116	}
117	B := initBlocks(&h0, memory, uint32(threads))
118	processBlocks(B, time, memory, uint32(threads), mode)
119	return extractKey(B, memory, uint32(threads), keyLen)
120}
121
122const (
123	blockLength = 128
124	syncPoints  = 4
125)
126
127type block [blockLength]uint64
128
129func initHash(password, salt, key, data []byte, time, memory, threads, keyLen uint32, mode int) [blake2b.Size + 8]byte {
130	var (
131		h0     [blake2b.Size + 8]byte
132		params [24]byte
133		tmp    [4]byte
134	)
135
136	b2, _ := blake2b.New512(nil)
137	binary.LittleEndian.PutUint32(params[0:4], threads)
138	binary.LittleEndian.PutUint32(params[4:8], keyLen)
139	binary.LittleEndian.PutUint32(params[8:12], memory)
140	binary.LittleEndian.PutUint32(params[12:16], time)
141	binary.LittleEndian.PutUint32(params[16:20], uint32(Version))
142	binary.LittleEndian.PutUint32(params[20:24], uint32(mode))
143	b2.Write(params[:])
144	binary.LittleEndian.PutUint32(tmp[:], uint32(len(password)))
145	b2.Write(tmp[:])
146	b2.Write(password)
147	binary.LittleEndian.PutUint32(tmp[:], uint32(len(salt)))
148	b2.Write(tmp[:])
149	b2.Write(salt)
150	binary.LittleEndian.PutUint32(tmp[:], uint32(len(key)))
151	b2.Write(tmp[:])
152	b2.Write(key)
153	binary.LittleEndian.PutUint32(tmp[:], uint32(len(data)))
154	b2.Write(tmp[:])
155	b2.Write(data)
156	b2.Sum(h0[:0])
157	return h0
158}
159
160func initBlocks(h0 *[blake2b.Size + 8]byte, memory, threads uint32) []block {
161	var block0 [1024]byte
162	B := make([]block, memory)
163	for lane := uint32(0); lane < threads; lane++ {
164		j := lane * (memory / threads)
165		binary.LittleEndian.PutUint32(h0[blake2b.Size+4:], lane)
166
167		binary.LittleEndian.PutUint32(h0[blake2b.Size:], 0)
168		blake2bHash(block0[:], h0[:])
169		for i := range B[j+0] {
170			B[j+0][i] = binary.LittleEndian.Uint64(block0[i*8:])
171		}
172
173		binary.LittleEndian.PutUint32(h0[blake2b.Size:], 1)
174		blake2bHash(block0[:], h0[:])
175		for i := range B[j+1] {
176			B[j+1][i] = binary.LittleEndian.Uint64(block0[i*8:])
177		}
178	}
179	return B
180}
181
182func processBlocks(B []block, time, memory, threads uint32, mode int) {
183	lanes := memory / threads
184	segments := lanes / syncPoints
185
186	processSegment := func(n, slice, lane uint32, wg *sync.WaitGroup) {
187		var addresses, in, zero block
188		if mode == argon2i || (mode == argon2id && n == 0 && slice < syncPoints/2) {
189			in[0] = uint64(n)
190			in[1] = uint64(lane)
191			in[2] = uint64(slice)
192			in[3] = uint64(memory)
193			in[4] = uint64(time)
194			in[5] = uint64(mode)
195		}
196
197		index := uint32(0)
198		if n == 0 && slice == 0 {
199			index = 2 // we have already generated the first two blocks
200			if mode == argon2i || mode == argon2id {
201				in[6]++
202				processBlock(&addresses, &in, &zero)
203				processBlock(&addresses, &addresses, &zero)
204			}
205		}
206
207		offset := lane*lanes + slice*segments + index
208		var random uint64
209		for index < segments {
210			prev := offset - 1
211			if index == 0 && slice == 0 {
212				prev += lanes // last block in lane
213			}
214			if mode == argon2i || (mode == argon2id && n == 0 && slice < syncPoints/2) {
215				if index%blockLength == 0 {
216					in[6]++
217					processBlock(&addresses, &in, &zero)
218					processBlock(&addresses, &addresses, &zero)
219				}
220				random = addresses[index%blockLength]
221			} else {
222				random = B[prev][0]
223			}
224			newOffset := indexAlpha(random, lanes, segments, threads, n, slice, lane, index)
225			processBlockXOR(&B[offset], &B[prev], &B[newOffset])
226			index, offset = index+1, offset+1
227		}
228		wg.Done()
229	}
230
231	for n := uint32(0); n < time; n++ {
232		for slice := uint32(0); slice < syncPoints; slice++ {
233			var wg sync.WaitGroup
234			for lane := uint32(0); lane < threads; lane++ {
235				wg.Add(1)
236				go processSegment(n, slice, lane, &wg)
237			}
238			wg.Wait()
239		}
240	}
241
242}
243
244func extractKey(B []block, memory, threads, keyLen uint32) []byte {
245	lanes := memory / threads
246	for lane := uint32(0); lane < threads-1; lane++ {
247		for i, v := range B[(lane*lanes)+lanes-1] {
248			B[memory-1][i] ^= v
249		}
250	}
251
252	var block [1024]byte
253	for i, v := range B[memory-1] {
254		binary.LittleEndian.PutUint64(block[i*8:], v)
255	}
256	key := make([]byte, keyLen)
257	blake2bHash(key, block[:])
258	return key
259}
260
261func indexAlpha(rand uint64, lanes, segments, threads, n, slice, lane, index uint32) uint32 {
262	refLane := uint32(rand>>32) % threads
263	if n == 0 && slice == 0 {
264		refLane = lane
265	}
266	m, s := 3*segments, ((slice+1)%syncPoints)*segments
267	if lane == refLane {
268		m += index
269	}
270	if n == 0 {
271		m, s = slice*segments, 0
272		if slice == 0 || lane == refLane {
273			m += index
274		}
275	}
276	if index == 0 || lane == refLane {
277		m--
278	}
279	return phi(rand, uint64(m), uint64(s), refLane, lanes)
280}
281
282func phi(rand, m, s uint64, lane, lanes uint32) uint32 {
283	p := rand & 0xFFFFFFFF
284	p = (p * p) >> 32
285	p = (p * m) >> 32
286	return lane*lanes + uint32((s+m-(p+1))%uint64(lanes))
287}