1/*
  2This is free and unencumbered software released into the public domain.
  3
  4Anyone is free to copy, modify, publish, use, compile, sell, or
  5distribute this software, either in source code form or as a compiled
  6binary, for any purpose, commercial or non-commercial, and by any
  7means.
  8
  9In jurisdictions that recognize copyright laws, the author or authors
 10of this software dedicate any and all copyright interest in the
 11software to the public domain. We make this dedication for the benefit
 12of the public at large and to the detriment of our heirs and
 13successors. We intend this dedication to be an overt act of
 14relinquishment in perpetuity of all present and future rights to this
 15software under copyright law.
 16
 17THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
 18EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
 19MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
 20IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR
 21OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
 22ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
 23OTHER DEALINGS IN THE SOFTWARE.
 24
 25For more information, please refer to <http://unlicense.org>
 26*/
 27
 28#ifndef PUBLIC_DOMAIN_BASE64_HPP_
 29#define PUBLIC_DOMAIN_BASE64_HPP_
 30
 31#include <cstdint>
 32#include <iterator>
 33#include <stdexcept>
 34#include <string>
 35
 36class base64_error : public std::runtime_error
 37{
 38public:
 39    using std::runtime_error::runtime_error;
 40};
 41
 42class base64
 43{
 44public:
 45    enum class alphabet
 46    {
 47        /** the alphabet is detected automatically */
 48        auto_,
 49        /** the standard base64 alphabet is used */
 50        standard,
 51        /** like `standard` except that the characters `+` and `/` are replaced by `-` and `_` respectively*/
 52        url_filename_safe
 53    };
 54
 55    enum class decoding_behavior
 56    {
 57        /** if the input is not padded, the remaining bits are ignored */
 58        moderate,
 59        /** if a padding character is encounter decoding is finished */
 60        loose
 61    };
 62
 63    /**
 64     Encodes all the elements from `in_begin` to `in_end` to `out`.
 65
 66     @warning The source and destination cannot overlap. The destination must be able to hold at least
 67     `required_encode_size(std::distance(in_begin, in_end))`, otherwise the behavior depends on the output iterator.
 68
 69     @tparam Input_iterator the source; the returned elements are cast to `std::uint8_t` and should not be greater than
 70     8 bits
 71     @tparam Output_iterator the destination; the elements written to it are from the type `char`
 72     @param in_begin the beginning of the source
 73     @param in_end the ending of the source
 74     @param out the destination iterator
 75     @param alphabet which alphabet should be used
 76     @returns the iterator to the next element past the last element copied
 77     @throws see `Input_iterator` and `Output_iterator`
 78    */
 79    template<typename Input_iterator, typename Output_iterator>
 80    static Output_iterator encode(Input_iterator in_begin, Input_iterator in_end, Output_iterator out,
 81                                  alphabet alphabet = alphabet::standard)
 82    {
 83        constexpr auto pad = '=';
 84        const char* alpha  = alphabet == alphabet::url_filename_safe
 85                                ? "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-_"
 86                                : "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/";
 87
 88        while (in_begin != in_end) {
 89            std::uint8_t i0 = 0, i1 = 0, i2 = 0;
 90
 91            // first character
 92            i0 = static_cast<std::uint8_t>(*in_begin);
 93            ++in_begin;
 94
 95            *out = alpha[i0 >> 2 & 0x3f];
 96            ++out;
 97
 98            // part of first character and second
 99            if (in_begin != in_end) {
100                i1 = static_cast<std::uint8_t>(*in_begin);
101                ++in_begin;
102
103                *out = alpha[((i0 & 0x3) << 4) | (i1 >> 4 & 0x0f)];
104                ++out;
105            } else {
106                *out = alpha[(i0 & 0x3) << 4];
107                ++out;
108
109                // last padding
110                *out = pad;
111                ++out;
112
113                // last padding
114                *out = pad;
115                ++out;
116
117                break;
118            }
119
120            // part of second character and third
121            if (in_begin != in_end) {
122                i2 = static_cast<std::uint8_t>(*in_begin);
123                ++in_begin;
124
125                *out = alpha[((i1 & 0xf) << 2) | (i2 >> 6 & 0x03)];
126                ++out;
127            } else {
128                *out = alpha[(i1 & 0xf) << 2];
129                ++out;
130
131                // last padding
132                *out = pad;
133                ++out;
134
135                break;
136            }
137
138            // rest of third
139            *out = alpha[i2 & 0x3f];
140            ++out;
141        }
142
143        return out;
144    }
145    /**
146     Encodes a string.
147
148     @param str the string that should be encoded
149     @param alphabet which alphabet should be used
150     @returns the encoded base64 string
151     @throws see base64::encode()
152    */
153    static std::string encode(const std::string& str, alphabet alphabet = alphabet::standard)
154    {
155        std::string result;
156
157        result.reserve(required_encode_size(str.length()) + 1);
158
159        encode(str.begin(), str.end(), std::back_inserter(result), alphabet);
160
161        return result;
162    }
163    /**
164     Encodes a char array.
165
166     @param buffer the char array
167     @param size the size of the array
168     @param alphabet which alphabet should be used
169     @returns the encoded string
170    */
171    static std::string encode(const char* buffer, std::size_t size, alphabet alphabet = alphabet::standard)
172    {
173        std::string result;
174
175        result.reserve(required_encode_size(size) + 1);
176
177        encode(buffer, buffer + size, std::back_inserter(result), alphabet);
178
179        return result;
180    }
181    /**
182     Decodes all the elements from `in_begin` to `in_end` to `out`. `in_begin` may point to the same location as `out`,
183     in other words: inplace decoding is possible.
184
185     @warning The destination must be able to hold at least `required_decode_size(std::distance(in_begin, in_end))`,
186     otherwise the behavior depends on the output iterator.
187
188     @tparam Input_iterator the source; the returned elements are cast to `char`
189     @tparam Output_iterator the destination; the elements written to it are from the type `std::uint8_t`
190     @param in_begin the beginning of the source
191     @param in_end the ending of the source
192     @param out the destination iterator
193     @param alphabet which alphabet should be used
194     @param behavior the behavior when an error was detected
195     @returns the iterator to the next element past the last element copied
196     @throws base64_error depending on the set behavior
197     @throws see `Input_iterator` and `Output_iterator`
198    */
199    template<typename Input_iterator, typename Output_iterator>
200    static Output_iterator decode(Input_iterator in_begin, Input_iterator in_end, Output_iterator out,
201                                  alphabet alphabet          = alphabet::auto_,
202                                  decoding_behavior behavior = decoding_behavior::moderate)
203    {
204        //constexpr auto pad = '=';
205        std::uint8_t last  = 0;
206        auto bits          = 0;
207
208        while (in_begin != in_end) {
209            auto c = *in_begin;
210            ++in_begin;
211
212            if (c == '=') {
213                break;
214            }
215
216            auto part = _base64_value(alphabet, c);
217
218            // enough bits for one byte
219            if (bits + 6 >= 8) {
220                *out = (last << (8 - bits)) | (part >> (bits - 2));
221                ++out;
222
223                bits -= 2;
224            } else {
225                bits += 6;
226            }
227
228            last = part;
229        }
230
231        // check padding
232        if (behavior != decoding_behavior::loose) {
233            while (in_begin != in_end) {
234                auto c = *in_begin;
235                ++in_begin;
236
237                if (c != '=') {
238                    throw base64_error("invalid base64 character.");
239                }
240            }
241        }
242
243        return out;
244    }
245    /**
246     Decodes a string.
247
248     @param str the base64 encoded string
249     @param alphabet which alphabet should be used
250     @param behavior the behavior when an error was detected
251     @returns the decoded string
252     @throws see base64::decode()
253    */
254    static std::string decode(const std::string& str, alphabet alphabet = alphabet::auto_,
255                              decoding_behavior behavior = decoding_behavior::moderate)
256    {
257        std::string result;
258
259        result.reserve(max_decode_size(str.length()));
260
261        decode(str.begin(), str.end(), std::back_inserter(result), alphabet, behavior);
262
263        return result;
264    }
265    /**
266     Decodes a string.
267
268     @param buffer the base64 encoded buffer
269     @param size the size of the buffer
270     @param alphabet which alphabet should be used
271     @param behavior the behavior when an error was detected
272     @returns the decoded string
273     @throws see base64::decode()
274    */
275    static std::string decode(const char* buffer, std::size_t size, alphabet alphabet = alphabet::auto_,
276                              decoding_behavior behavior = decoding_behavior::moderate)
277    {
278        std::string result;
279
280        result.reserve(max_decode_size(size));
281
282        decode(buffer, buffer + size, std::back_inserter(result), alphabet, behavior);
283
284        return result;
285    }
286    /**
287     Decodes a string inplace.
288
289     @param[in,out] str the base64 encoded string
290     @param alphabet which alphabet should be used
291     @param behavior the behavior when an error was detected
292     @throws base64::decode_inplace()
293    */
294    static void decode_inplace(std::string& str, alphabet alphabet = alphabet::auto_,
295                               decoding_behavior behavior = decoding_behavior::moderate)
296    {
297        str.resize(decode(str.begin(), str.end(), str.begin(), alphabet, behavior) - str.begin());
298    }
299    /**
300     Decodes a char array inplace.
301
302     @param[in,out] str the string array
303     @param size the length of the array
304     @param alphabet which alphabet should be used
305     @param behavior the behavior when an error was detected
306     @returns the pointer to the next element past the last element decoded
307     @throws base64::decode_inplace()
308    */
309    static char* decode_inplace(char* str, std::size_t size, alphabet alphabet = alphabet::auto_,
310                                decoding_behavior behavior = decoding_behavior::moderate)
311    {
312        return decode(str, str + size, str, alphabet, behavior);
313    }
314    /**
315     Returns the required decoding size for a given size. The value is calculated with the following formula:
316
317     $$
318     \lceil \frac{size}{4} \rceil \cdot 3
319     $$
320
321     @param size the size of the encoded input
322     @returns the size of the resulting decoded buffer; this the absolute maximum
323    */
324    static std::size_t max_decode_size(std::size_t size) noexcept
325    {
326        return (size / 4 + (size % 4 ? 1 : 0)) * 3;
327    }
328    /**
329     Returns the required encoding size for a given size. The value is calculated with the following formula:
330
331     $$
332     \lceil \frac{size}{3} \rceil \cdot 4
333     $$
334
335     @param size the size of the decoded input
336     @returns the size of the resulting encoded buffer
337    */
338    static std::size_t required_encode_size(std::size_t size) noexcept
339    {
340        return (size / 3 + (size % 3 ? 1 : 0)) * 4;
341    }
342
343private:
344    static std::uint8_t _base64_value(alphabet& alphabet, char c)
345    {
346        if (c >= 'A' && c <= 'Z') {
347            return c - 'A';
348        } else if (c >= 'a' && c <= 'z') {
349            return c - 'a' + 26;
350        } else if (c >= '0' && c <= '9') {
351            return c - '0' + 52;
352        }
353
354        // comes down to alphabet
355        if (alphabet == alphabet::standard) {
356            if (c == '+') {
357                return 62;
358            } else if (c == '/') {
359                return 63;
360            }
361        } else if (alphabet == alphabet::url_filename_safe) {
362            if (c == '-') {
363                return 62;
364            } else if (c == '_') {
365                return 63;
366            }
367        } // auto detect
368        else {
369            if (c == '+') {
370                alphabet = alphabet::standard;
371
372                return 62;
373            } else if (c == '/') {
374                alphabet = alphabet::standard;
375
376                return 63;
377            } else if (c == '-') {
378                alphabet = alphabet::url_filename_safe;
379
380                return 62;
381            } else if (c == '_') {
382                alphabet = alphabet::url_filename_safe;
383
384                return 63;
385            }
386        }
387
388        throw base64_error("invalid base64 character.");
389    }
390};
391
392#endif // !PUBLIC_DOMAIN_BASE64_HPP_