diff --git a/libsession-util/.gitignore b/libsession-util/.gitignore new file mode 100644 index 0000000000..606666622e --- /dev/null +++ b/libsession-util/.gitignore @@ -0,0 +1,2 @@ +/build +/.cxx/ diff --git a/libsession-util/build.gradle b/libsession-util/build.gradle new file mode 100644 index 0000000000..85d3a58a11 --- /dev/null +++ b/libsession-util/build.gradle @@ -0,0 +1,49 @@ +plugins { + id 'com.android.library' + id 'org.jetbrains.kotlin.android' +} + +android { + namespace 'network.loki.messenger.libsession_util' + compileSdkVersion androidCompileSdkVersion + + defaultConfig { + minSdkVersion androidMinimumSdkVersion + targetSdkVersion androidCompileSdkVersion + + testInstrumentationRunner "androidx.test.runner.AndroidJUnitRunner" + externalNativeBuild { + cmake { + cppFlags "--std=c++17" + } + } + ndk { + abiFilters 'x86_64', 'arm64-v8a' // ,'x86', 'armeabi-v7a' TODO: remove after the native library works properly with targets + } + } + + buildTypes { + release { + minifyEnabled false + } + } + externalNativeBuild { + cmake { + path "src/main/cpp/CMakeLists.txt" + version "3.18.1" + } + } + compileOptions { + sourceCompatibility JavaVersion.VERSION_1_8 + targetCompatibility JavaVersion.VERSION_1_8 + } + kotlinOptions { + jvmTarget = '1.8' + } +} + +dependencies { + testImplementation 'junit:junit:4.13.2' + androidTestImplementation 'androidx.test.ext:junit:1.1.4' + androidTestImplementation 'androidx.test.espresso:espresso-core:3.5.0' +} \ No newline at end of file diff --git a/libsession-util/distribution/libsession-util-android/include/oxenc/base32z.h b/libsession-util/distribution/libsession-util-android/include/oxenc/base32z.h new file mode 100644 index 0000000000..36fa63b2d3 --- /dev/null +++ b/libsession-util/distribution/libsession-util-android/include/oxenc/base32z.h @@ -0,0 +1,284 @@ +#pragma once +#include +#include +#include +#include +#include +#include "byte_type.h" + +namespace oxenc { + +namespace detail { + +/// Compile-time generated lookup tables for base32z conversion. This is case insensitive (though +/// for byte -> b32z conversion we always produce lower case). +struct b32z_table { + // Store the 0-31 decoded value of every possible char; all the chars that aren't valid are set + // to 0. (If you don't trust your data, check it with is_base32z first, which uses these 0's + // to detect invalid characters -- which is why we want a full 256 element array). + char from_b32z_lut[256]; + // Store the encoded character of every 0-31 (5 bit) value. + char to_b32z_lut[32]; + + // constexpr constructor that fills out the above (and should do it at compile time for any half + // decent compiler). + constexpr b32z_table() noexcept : from_b32z_lut{}, + to_b32z_lut{ + 'y', 'b', 'n', 'd', 'r', 'f', 'g', '8', 'e', 'j', 'k', 'm', 'c', 'p', 'q', 'x', + 'o', 't', '1', 'u', 'w', 'i', 's', 'z', 'a', '3', '4', '5', 'h', '7', '6', '9' + } + { + for (unsigned char c = 0; c < 32; c++) { + unsigned char x = to_b32z_lut[c]; + from_b32z_lut[x] = c; + if (x >= 'a' && x <= 'z') + from_b32z_lut[x - 'a' + 'A'] = c; + } + } + // Convert a b32z encoded character into a 0-31 value + constexpr char from_b32z(unsigned char c) const noexcept { return from_b32z_lut[c]; } + // Convert a 0-31 value into a b32z encoded character + constexpr char to_b32z(unsigned char b) const noexcept { return to_b32z_lut[b]; } +} constexpr b32z_lut; + +// This main point of this static assert is to force the compiler to compile-time build the constexpr tables. +static_assert(b32z_lut.from_b32z('w') == 20 && b32z_lut.from_b32z('T') == 17 && b32z_lut.to_b32z(5) == 'f', ""); + +} // namespace detail + +/// Returns the number of characters required to encode a base32z string from the given number of bytes. +inline constexpr size_t to_base32z_size(size_t byte_size) { return (byte_size*8 + 4) / 5; } // ⌈bits/5⌉ because 5 bits per byte +/// Returns the (maximum) number of bytes required to decode a base32z string of the given size. +inline constexpr size_t from_base32z_size(size_t b32z_size) { return b32z_size*5 / 8; } // ⌊bits/8⌋ + +/// Iterable object for on-the-fly base32z encoding. Used internally, but also particularly useful +/// when converting from one encoding to another. +template +struct base32z_encoder final { +private: + InputIt _it, _end; + static_assert(sizeof(decltype(*_it)) == 1, "base32z_encoder requires chars/bytes input iterator"); + // Number of bits held in r; will always be >= 5 until we are at the end. + int bits{_it != _end ? 8 : 0}; + // Holds bits of data we've already read, which might belong to current or next chars + uint_fast16_t r{bits ? static_cast(*_it) : (unsigned char)0}; +public: + using iterator_category = std::input_iterator_tag; + using difference_type = std::ptrdiff_t; + using value_type = char; + using reference = value_type; + using pointer = void; + base32z_encoder(InputIt begin, InputIt end) : _it{std::move(begin)}, _end{std::move(end)} {} + + base32z_encoder end() { return {_end, _end}; } + + bool operator==(const base32z_encoder& i) { return _it == i._it && bits == i.bits; } + bool operator!=(const base32z_encoder& i) { return !(*this == i); } + + base32z_encoder& operator++() { + assert(bits >= 5); + // Discard the most significant 5 bits + bits -= 5; + r &= (1 << bits) - 1; + // If we end up with less than 5 significant bits then try to pull another 8 bits: + if (bits < 5 && _it != _end) { + if (++_it != _end) { + r = (r << 8) | static_cast(*_it); + bits += 8; + } else if (bits > 0) { + // No more input bytes, so shift `r` to put the bits we have into the most + // significant bit position for the final character. E.g. if we have "11" we want + // the last character to be encoded "11000". + r <<= (5 - bits); + bits = 5; + } + } + return *this; + } + base32z_encoder operator++(int) { base32z_encoder copy{*this}; ++*this; return copy; } + + char operator*() { + // Right-shift off the excess bits we aren't accessing yet + return detail::b32z_lut.to_b32z(r >> (bits - 5)); + } +}; + +/// Converts bytes into a base32z encoded character sequence, writing them starting at `out`. +/// Returns the final value of out (i.e. the iterator positioned just after the last written base32z +/// character). +template +OutputIt to_base32z(InputIt begin, InputIt end, OutputIt out) { + static_assert(sizeof(decltype(*begin)) == 1, "to_base32z requires chars/bytes"); + base32z_encoder it{begin, end}; + return std::copy(it, it.end(), out); +} + +/// Creates a base32z string from an iterator pair of a byte sequence. +template +std::string to_base32z(It begin, It end) { + std::string base32z; + if constexpr (std::is_base_of_v::iterator_category>) { + using std::distance; + base32z.reserve(to_base32z_size(distance(begin, end))); + } + to_base32z(begin, end, std::back_inserter(base32z)); + return base32z; +} + +/// Creates a base32z string from an iterable, std::string-like object +template +std::string to_base32z(std::basic_string_view s) { return to_base32z(s.begin(), s.end()); } +inline std::string to_base32z(std::string_view s) { return to_base32z<>(s); } + +/// Returns true if the given [begin, end) range is an acceptable base32z string: specifically every +/// character must be in the base32z alphabet, and the string must be a valid encoding length that +/// could have been produced by to_base32z (i.e. some lengths are impossible). +template +constexpr bool is_base32z(It begin, It end) { + static_assert(sizeof(decltype(*begin)) == 1, "is_base32z requires chars/bytes"); + size_t count = 0; + constexpr bool random = std::is_base_of_v::iterator_category>; + if constexpr (random) { + using std::distance; + count = distance(begin, end) % 8; + if (count == 1 || count == 3 || count == 6) // see below + return false; + } + for (; begin != end; ++begin) { + auto c = static_cast(*begin); + if (detail::b32z_lut.from_b32z(c) == 0 && !(c == 'y' || c == 'Y')) + return false; + if constexpr (!random) + count++; + } + // Check for a valid length. + // - 5n + 0 bytes encodes to 8n chars (no padding bits) + // - 5n + 1 bytes encodes to 8n+2 chars (last 2 bits are padding) + // - 5n + 2 bytes encodes to 8n+4 chars (last 4 bits are padding) + // - 5n + 3 bytes encodes to 8n+5 chars (last 1 bit is padding) + // - 5n + 4 bytes encodes to 8n+7 chars (last 3 bits are padding) + if constexpr (!random) + if (count %= 8; count == 1 || count == 3 || count == 6) + return false; + return true; +} + +/// Returns true if all elements in the string-like value are base32z characters +template +constexpr bool is_base32z(std::basic_string_view s) { return is_base32z(s.begin(), s.end()); } +constexpr bool is_base32z(std::string_view s) { return is_base32z<>(s); } + +/// Iterable object for on-the-fly base32z decoding. Used internally, but also particularly useful +/// when converting from one encoding to another. The input range must be a valid base32z +/// encoded string. +/// +/// Note that we ignore "padding" bits without requiring that they actually be 0. For instance, the +/// bytes "\ff\ff" are ideally encoded as "999o" (16 bits of 1s + 4 padding 0 bits), but we don't +/// require that the padding bits be 0. That is, "9999", "9993", etc. will all decode to the same +/// \ff\ff output string. +template +struct base32z_decoder final { +private: + InputIt _it, _end; + static_assert(sizeof(decltype(*_it)) == 1, "base32z_decoder requires chars/bytes input iterator"); + uint_fast16_t in = 0; + int bits = 0; // number of bits loaded into `in`; will be in [8, 12] until we hit the end +public: + using iterator_category = std::input_iterator_tag; + using difference_type = std::ptrdiff_t; + using value_type = char; + using reference = value_type; + using pointer = void; + base32z_decoder(InputIt begin, InputIt end) : _it{std::move(begin)}, _end{std::move(end)} { + if (_it != _end) + load_byte(); + } + + base32z_decoder end() { return {_end, _end}; } + + bool operator==(const base32z_decoder& i) { return _it == i._it; } + bool operator!=(const base32z_decoder& i) { return _it != i._it; } + + base32z_decoder& operator++() { + // Discard 8 most significant bits + bits -= 8; + in &= (1 << bits) - 1; + if (++_it != _end) + load_byte(); + return *this; + } + base32z_decoder operator++(int) { base32z_decoder copy{*this}; ++*this; return copy; } + + char operator*() { + return in >> (bits - 8); + } + +private: + void load_in() { + in = in << 5 + | detail::b32z_lut.from_b32z(static_cast(*_it)); + bits += 5; + } + + void load_byte() { + load_in(); + if (bits < 8 && ++_it != _end) + load_in(); + + // If we hit the _end iterator above then we hit the end of the input with fewer than 8 bits + // accumulated to make a full byte. For a properly encoded base32z string this should only + // be possible with 0-4 bits of all 0s; these are essentially "padding" bits (e.g. encoding + // 2 byte (16 bits) requires 4 b32z chars (20 bits), where only the first 16 bits are + // significant). Ideally any padding bits should be 0, but we don't check that and rather + // just ignore them. + // + // It also isn't possible to get here with 5-7 bits if the string passes `is_base32z` + // because the length checks we do there disallow such a length as valid. (If you were to + // pass such a string to us anyway then we are technically UB, but the current + // implementation just ignore the extra bits as if they are extra padding). + } +}; + +/// Converts a sequence of base32z digits to bytes. Undefined behaviour if any characters are not +/// valid base32z alphabet characters. It is permitted for the input and output ranges to overlap +/// as long as `out` is no later than `begin`. +/// +template +OutputIt from_base32z(InputIt begin, InputIt end, OutputIt out) { + static_assert(sizeof(decltype(*begin)) == 1, "from_base32z requires chars/bytes"); + base32z_decoder it{begin, end}; + auto bend = it.end(); + while (it != bend) + *out++ = static_cast>(*it++); + return out; +} + +/// Convert a base32z sequence into a std::string of bytes. Undefined behaviour if any characters +/// are not valid (case-insensitive) base32z characters. +template +std::string from_base32z(It begin, It end) { + std::string bytes; + if constexpr (std::is_base_of_v::iterator_category>) { + using std::distance; + bytes.reserve(from_base32z_size(distance(begin, end))); + } + from_base32z(begin, end, std::back_inserter(bytes)); + return bytes; +} + +/// Converts base32z digits from a std::string-like object into a std::string of bytes. Undefined +/// behaviour if any characters are not valid (case-insensitive) base32z characters. +template +std::string from_base32z(std::basic_string_view s) { return from_base32z(s.begin(), s.end()); } +inline std::string from_base32z(std::string_view s) { return from_base32z<>(s); } + +inline namespace literals { + inline std::string operator""_b32z(const char* x, size_t n) { + std::string_view in{x, n}; + if (!is_base32z(in)) + throw std::invalid_argument{"base32z literal is not base32z"}; + return from_base32z(in); + } +} + +} diff --git a/libsession-util/distribution/libsession-util-android/include/oxenc/base64.h b/libsession-util/distribution/libsession-util-android/include/oxenc/base64.h new file mode 100644 index 0000000000..bcd30aa1bf --- /dev/null +++ b/libsession-util/distribution/libsession-util-android/include/oxenc/base64.h @@ -0,0 +1,354 @@ +#pragma once +#include +#include +#include +#include +#include +#include "byte_type.h" + +namespace oxenc { + +namespace detail { + +/// Compile-time generated lookup tables for base64 conversion. +struct b64_table { + // Store the 0-63 decoded value of every possible char; all the chars that aren't valid are set + // to 0. (If you don't trust your data, check it with is_base64 first, which uses these 0's + // to detect invalid characters -- which is why we want a full 256 element array). + char from_b64_lut[256]; + // Store the encoded character of every 0-63 (6 bit) value. + char to_b64_lut[64]; + + // constexpr constructor that fills out the above (and should do it at compile time for any half + // decent compiler). + constexpr b64_table() noexcept : from_b64_lut{}, to_b64_lut{} { + for (unsigned char c = 0; c < 26; c++) { + from_b64_lut[(unsigned char)('A' + c)] = 0 + c; + to_b64_lut[ (unsigned char)( 0 + c)] = 'A' + c; + } + for (unsigned char c = 0; c < 26; c++) { + from_b64_lut[(unsigned char)('a' + c)] = 26 + c; + to_b64_lut[ (unsigned char)(26 + c)] = 'a' + c; + } + for (unsigned char c = 0; c < 10; c++) { + from_b64_lut[(unsigned char)('0' + c)] = 52 + c; + to_b64_lut[ (unsigned char)(52 + c)] = '0' + c; + } + to_b64_lut[62] = '+'; from_b64_lut[(unsigned char) '+'] = 62; + to_b64_lut[63] = '/'; from_b64_lut[(unsigned char) '/'] = 63; + } + // Convert a b64 encoded character into a 0-63 value + constexpr char from_b64(unsigned char c) const noexcept { return from_b64_lut[c]; } + // Convert a 0-31 value into a b64 encoded character + constexpr char to_b64(unsigned char b) const noexcept { return to_b64_lut[b]; } +} constexpr b64_lut; + +// This main point of this static assert is to force the compiler to compile-time build the constexpr tables. +static_assert(b64_lut.from_b64('/') == 63 && b64_lut.from_b64('7') == 59 && b64_lut.to_b64(38) == 'm', ""); + +} // namespace detail + +/// Returns the number of characters required to encode a base64 string from the given number of bytes. +inline constexpr size_t to_base64_size(size_t byte_size, bool padded = true) { + return padded + ? (byte_size + 2) / 3 * 4 // bytes*4/3, rounded up to the next multiple of 4 + : (byte_size * 4 + 2) / 3; // ⌈bytes*4/3⌉ +} +/// Returns the (maximum) number of bytes required to decode a base64 string of the given size. +/// Note that this may overallocate by 1-2 bytes if the size includes 1-2 padding chars. +inline constexpr size_t from_base64_size(size_t b64_size) { + return b64_size * 3 / 4; // == ⌊bits/8⌋; floor because we ignore trailing "impossible" bits (see below) +} + +/// Iterable object for on-the-fly base64 encoding. Used internally, but also particularly useful +/// when converting from one encoding to another. +template +struct base64_encoder final { +private: + InputIt _it, _end; + static_assert(sizeof(decltype(*_it)) == 1, "base64_encoder requires chars/bytes input iterator"); + // How much padding (at most) we can add at the end + int padding; + // Number of bits held in r; will always be >= 6 until we are at the end. + int bits{_it != _end ? 8 : 0}; + // Holds bits of data we've already read, which might belong to current or next chars + uint_fast16_t r{bits ? static_cast(*_it) : (unsigned char)0}; +public: + using iterator_category = std::input_iterator_tag; + using difference_type = std::ptrdiff_t; + using value_type = char; + using reference = value_type; + using pointer = void; + base64_encoder(InputIt begin, InputIt end, bool padded = true) + : _it{std::move(begin)}, _end{std::move(end)}, padding{padded} {} + + base64_encoder end() { return {_end, _end, false}; } + + bool operator==(const base64_encoder& i) { return _it == i._it && bits == i.bits && padding == i.padding; } + bool operator!=(const base64_encoder& i) { return !(*this == i); } + + base64_encoder& operator++() { + if (bits == 0) { + padding--; + return *this; + } + assert(bits >= 6); + // Discard the most significant 6 bits + bits -= 6; + r &= (1 << bits) - 1; + // If we end up with less than 6 significant bits then try to pull another 8 bits: + if (bits < 6 && _it != _end) { + if (++_it != _end) { + r = (r << 8) | static_cast(*_it); + bits += 8; + } else if (bits > 0) { + // No more input bytes, so shift `r` to put the bits we have into the most + // significant bit position for the final character, and figure out how many padding + // bytes we want to append. E.g. if we have "11" we want + // the last character to be encoded "110000". + if (padding) { + // padding should be: + // 3n+0 input => 4n output, no padding, handled below + // 3n+1 input => 4n+2 output + 2 padding; we'll land here with 2 trailing bits + // 3n+2 input => 4n+3 output + 1 padding; we'll land here with 4 trailing bits + padding = 3 - bits / 2; + } + r <<= (6 - bits); + bits = 6; + } else { + padding = 0; // No excess bits, so input was a multiple of 3 and thus no padding + } + } + return *this; + } + base64_encoder operator++(int) { base64_encoder copy{*this}; ++*this; return copy; } + + char operator*() { + if (bits == 0 && padding) + return '='; + // Right-shift off the excess bits we aren't accessing yet + return detail::b64_lut.to_b64(r >> (bits - 6)); + } +}; + +/// Converts bytes into a base64 encoded character sequence, writing them starting at `out`. +/// Returns the final value of out (i.e. the iterator positioned just after the last written base64 +/// character). +template +OutputIt to_base64(InputIt begin, InputIt end, OutputIt out, bool padded = true) { + static_assert(sizeof(decltype(*begin)) == 1, "to_base64 requires chars/bytes"); + auto it = base64_encoder{begin, end, padded}; + return std::copy(it, it.end(), out); +} + +/// Creates and returns a base64 string from an iterator pair of a character sequence. The +/// resulting string will have '=' padding, if appropriate. +template +std::string to_base64(It begin, It end) { + std::string base64; + if constexpr (std::is_base_of_v::iterator_category>) { + using std::distance; + base64.reserve(to_base64_size(distance(begin, end))); + } + to_base64(begin, end, std::back_inserter(base64)); + return base64; +} + +/// Creates and returns a base64 string from an iterator pair of a character sequence. The +/// resulting string will not be padded. +template +std::string to_base64_unpadded(It begin, It end) { + std::string base64; + if constexpr (std::is_base_of_v::iterator_category>) { + using std::distance; + base64.reserve(to_base64_size(distance(begin, end), false)); + } + to_base64(begin, end, std::back_inserter(base64), false); + return base64; +} + +/// Creates a base64 string from an iterable, std::string-like object. The string will have '=' +/// padding, if appropriate. +template +std::string to_base64(std::basic_string_view s) { return to_base64(s.begin(), s.end()); } +inline std::string to_base64(std::string_view s) { return to_base64<>(s); } + +/// Creates a base64 string from an iterable, std::string-like object. The string will not be +/// padded. +template +std::string to_base64_unpadded(std::basic_string_view s) { return to_base64_unpadded(s.begin(), s.end()); } +inline std::string to_base64_unpadded(std::string_view s) { return to_base64_unpadded<>(s); } + +/// Returns true if the range is a base64 encoded value; we allow (but do not require) '=' padding, +/// but only at the end, only 1 or 2, and only if it pads out the total to a multiple of 4. +/// Otherwise the string must contain only valid base64 characters, and must not have a length of +/// 4n+1 (because that cannot be produced by base64 encoding). +template +constexpr bool is_base64(It begin, It end) { + static_assert(sizeof(decltype(*begin)) == 1, "is_base64 requires chars/bytes"); + using std::distance; + using std::prev; + size_t count = 0; + constexpr bool random = std::is_base_of_v::iterator_category>; + if constexpr (random) { + count = distance(begin, end) % 4; + if (count == 1) + return false; + } + + // Allow 1 or 2 padding chars *if* they pad it to a multiple of 4. + if (begin != end && distance(begin, end) % 4 == 0) { + auto last = prev(end); + if (static_cast(*last) == '=') + end = last--; + if (static_cast(*last) == '=') + end = last; + } + + for (; begin != end; ++begin) { + auto c = static_cast(*begin); + if (detail::b64_lut.from_b64(c) == 0 && c != 'A') + return false; + if constexpr (!random) + count++; + } + + if constexpr (!random) + if (count % 4 == 1) // base64 encoding will produce 4n, 4n+2, 4n+3, but never 4n+1 + return false; + + return true; +} + +/// Returns true if the string-like value is a base64 encoded value +template +constexpr bool is_base64(std::basic_string_view s) { return is_base64(s.begin(), s.end()); } +constexpr bool is_base64(std::string_view s) { return is_base64(s.begin(), s.end()); } + +/// Iterable object for on-the-fly base64 decoding. Used internally, but also particularly useful +/// when converting from one encoding to another. The input range must be a valid base64 encoded +/// string (with or without padding). +/// +/// Note that we ignore "padding" bits without requiring that they actually be 0. For instance, the +/// bytes "\ff\ff" are ideally encoded as "//8=" (16 bits of 1s + 2 padding 0 bits, then a full +/// 6-bit padding char). We don't, however, require that the padding bits be 0. That is, "///=", +/// "//9=", "//+=", etc. will all decode to the same \ff\ff output string. +template +struct base64_decoder final { +private: + InputIt _it, _end; + static_assert(sizeof(decltype(*_it)) == 1, "base64_decoder requires chars/bytes input iterator"); + uint_fast16_t in = 0; + int bits = 0; // number of bits loaded into `in`; will be in [8, 12] until we hit the end +public: + using iterator_category = std::input_iterator_tag; + using difference_type = std::ptrdiff_t; + using value_type = char; + using reference = value_type; + using pointer = void; + base64_decoder(InputIt begin, InputIt end) : _it{std::move(begin)}, _end{std::move(end)} { + if (_it != _end) + load_byte(); + } + + base64_decoder end() { return {_end, _end}; } + + bool operator==(const base64_decoder& i) { return _it == i._it; } + bool operator!=(const base64_decoder& i) { return _it != i._it; } + + base64_decoder& operator++() { + // Discard 8 most significant bits + bits -= 8; + in &= (1 << bits) - 1; + if (++_it != _end) + load_byte(); + return *this; + } + base64_decoder operator++(int) { base64_decoder copy{*this}; ++*this; return copy; } + + char operator*() { + return in >> (bits - 8); + } + +private: + void load_in() { + // We hit padding trying to read enough for a full byte, so we're done. (And since you were + // already supposed to have checked validity with is_base64, the padding can only be at the + // end). + auto c = static_cast(*_it); + if (c == '=') { + _it = _end; + bits = 0; + return; + } + + in = in << 6 + | detail::b64_lut.from_b64(c); + bits += 6; + } + + void load_byte() { + load_in(); + if (bits && bits < 8 && ++_it != _end) + load_in(); + + // If we hit the _end iterator above then we hit the end of the input (or hit padding) with + // fewer than 8 bits accumulated to make a full byte. For a properly encoded base64 string + // this should only be possible with 0, 2, or 4 bits of all 0s; these are essentially + // "padding" bits (e.g. encoding 2 byte (16 bits) requires 3 b64 chars (18 bits), where + // only the first 16 bits are significant). Ideally any padding bits should be 0, but we + // don't check that and rather just ignore them. + } +}; + +/// Converts a sequence of base64 digits to bytes. Undefined behaviour if any characters are not +/// valid base64 alphabet characters. It is permitted for the input and output ranges to overlap as +/// long as `out` is no later than `begin`. Trailing padding characters are permitted but not +/// required. Returns the final value of out (that is, the iterator positioned just after the +/// last written character). +/// +/// It is possible to provide "impossible" base64 encoded values; for example "YWJja" which has 30 +/// bits of data even though a base64 encoded byte string should have 24 (4 chars) or 36 (6 chars) +/// bits for a 3- and 4-byte input, respectively. We ignore any such "impossible" bits, and +/// similarly ignore impossible bits in the bit "overhang"; that means "YWJjZA==" (the proper +/// encoding of "abcd") and "YWJjZB", "YWJjZC", ..., "YWJjZP" all decode to the same "abcd" value: +/// the last 4 bits of the last character are essentially considered padding. +template +OutputIt from_base64(InputIt begin, InputIt end, OutputIt out) { + static_assert(sizeof(decltype(*begin)) == 1, "from_base64 requires chars/bytes"); + base64_decoder it{begin, end}; + auto bend = it.end(); + while (it != bend) + *out++ = static_cast>(*it++); + return out; +} + +/// Converts base64 digits from a iterator pair of characters into a std::string of bytes. +/// Undefined behaviour if any characters are not valid base64 characters. +template +std::string from_base64(It begin, It end) { + std::string bytes; + if constexpr (std::is_base_of_v::iterator_category>) { + using std::distance; + bytes.reserve(from_base64_size(distance(begin, end))); + } + from_base64(begin, end, std::back_inserter(bytes)); + return bytes; +} + +/// Converts base64 digits from a std::string-like object into a std::string of bytes. Undefined +/// behaviour if any characters are not valid base64 characters. +template +std::string from_base64(std::basic_string_view s) { return from_base64(s.begin(), s.end()); } +inline std::string from_base64(std::string_view s) { return from_base64<>(s); } + +inline namespace literals { + inline std::string operator""_b64(const char* x, size_t n) { + std::string_view in{x, n}; + if (!is_base64(in)) + throw std::invalid_argument{"base64 literal is not base64"}; + return from_base64(in); + } +} + +} diff --git a/libsession-util/distribution/libsession-util-android/include/oxenc/bt.h b/libsession-util/distribution/libsession-util-android/include/oxenc/bt.h new file mode 100644 index 0000000000..23220d1a18 --- /dev/null +++ b/libsession-util/distribution/libsession-util-android/include/oxenc/bt.h @@ -0,0 +1,5 @@ +#pragma once +#include "bt_value.h" +#include "bt_serialize.h" +#include "bt_producer.h" +#include "bt_value_producer.h" diff --git a/libsession-util/distribution/libsession-util-android/include/oxenc/bt_producer.h b/libsession-util/distribution/libsession-util-android/include/oxenc/bt_producer.h new file mode 100644 index 0000000000..d506902304 --- /dev/null +++ b/libsession-util/distribution/libsession-util-android/include/oxenc/bt_producer.h @@ -0,0 +1,445 @@ +#pragma once + +#include +#include +#include +#include +#include +#include +#include "variant.h" + +namespace oxenc { + + using namespace std::literals; + + class bt_dict_producer; + +#if defined(__APPLE__) && defined(__MAC_OS_X_VERSION_MIN_REQUIRED) && __MAC_OS_X_VERSION_MIN_REQUIRED < 101500 +#define OXENC_APPLE_TO_CHARS_WORKAROUND +/// Really simplistic version of std::to_chars on Apple, because Apple doesn't allow `std::to_chars` +/// to be used if targetting anything before macOS 10.15. The buffer must have at least 20 chars of +/// space (for int types up to 64-bit); we return a pointer one past the last char written. + template + char* apple_to_chars10(char* buf, IntType val) { + static_assert(std::is_integral_v && sizeof(IntType) <= 8); + if constexpr (std::is_signed_v) { + if (val < 0) { + buf[0] = '-'; + return apple_to_chars10(buf+1, static_cast>(-val)); + } + } + + // write it to the buffer in reverse (because we don't know how many chars we'll need yet, but + // writing in reverse will figure that out). + char* pos = buf; + do { + *pos++ = '0' + static_cast(val % 10); + val /= 10; + } while (val > 0); + + // Reverse the digits into the right order + int swaps = (pos - buf) / 2; + for (int i = 0; i < swaps; i++) + std::swap(buf[i], pos[-1 - i]); + + return pos; + } +#endif + + +/// Class that allows you to build a bt-encoded list manually, without copying or allocating memory. +/// This is essentially the reverse of bt_list_consumer: where it lets you stream-parse a buffer, +/// this class lets you build directly into a buffer that you own. +/// +/// Out-of-buffer-space errors throw + class bt_list_producer { + friend class bt_dict_producer; + + // Our pointers to the next write position and the past-the-end pointer of the buffer. + using buf_span = std::pair; + // Our data is a begin/end pointer pair for the root list, or a pointer to our parent if a + // sublist. + std::variant data; + // Reference to the write buffer; this is simply a reference to the value inside `data` for the + // root element, and a pointer to the root's value for sublists/subdicts. + buf_span& buffer; + // True indicates we have an open child list/dict + bool has_child = false; + // The range that contains this currently serialized value; `from` equals wherever the `l` was + // written that started this list and `to` is one past the `e` that ends it. Note that `to` + // will always be ahead of `buf_span.first` because we always write the `e`s to close open lists + // but these `e`s don't advance the write position (they will be overwritten if we append). + const char* const from; + const char* to; + + // Sublist constructors + bt_list_producer(bt_list_producer* parent, std::string_view prefix = "l"sv); + bt_list_producer(bt_dict_producer* parent, std::string_view prefix = "l"sv); + + // Common constructor for both list and dict producer + bt_list_producer(char* begin, char* end, std::string_view prefix); + + // Does the actual appending to the buffer, and throwing if we'd overrun. If advance is false + // then we append without moving the buffer pointer (primarily when we append intermediate `e`s + // that we will overwrite if more data is added). This means that the next write will overwrite + // whatever was previously written by an `advance=false` call. + void buffer_append(std::string_view d, bool advance = true); + + // Appends the 'e's into the buffer to close off open sublists/dicts *without* advancing the + // buffer position; we do this after each append so that the buffer always contains valid + // encoded data, even while we are still appending to it, and so that appending something raises + // a length_error if appending it would not leave enough space for the required e's to close the + // open list(s)/dict(s). + void append_intermediate_ends(size_t count = 1); + + // Writes an integer to the given buffer; returns the one-past-the-data pointer. Up to 20 bytes + // will be written and must be available in buf. Used for both string and integer + // serialization. + template + char* write_integer(IntType val, char* buf) { + static_assert(sizeof(IntType) <= 64); + +#ifndef OXENC_APPLE_TO_CHARS_WORKAROUND + auto [ptr, ec] = std::to_chars(buf, buf+20, val); + assert(ec == std::errc()); + return ptr; +#else + // Hate apple. + return apple_to_chars10(buf, val); +#endif + } + + // Serializes an integer value and appends it to the output buffer. Does not call + // append_intermediate_ends(). + template , int> = 0> + void append_impl(IntType val) { + char buf[22]; // 'i' + base10 representation + 'e' + buf[0] = 'i'; + auto* ptr = write_integer(val, buf+1); + *ptr++ = 'e'; + buffer_append({buf, static_cast(ptr-buf)}); + } + + // Appends a string value, but does not call append_intermediate_ends() + void append_impl(std::string_view s) { + char buf[21]{}; // length + ':' + auto *ptr = write_integer(s.size(), buf); + *ptr++ = ':'; + buffer_append({buf, static_cast(ptr - buf)}); + buffer_append(s); + } + + public: + bt_list_producer() = delete; + bt_list_producer(const bt_list_producer&) = delete; + bt_list_producer& operator=(const bt_list_producer&) = delete; + bt_list_producer& operator=(bt_list_producer&&) = delete; + bt_list_producer(bt_list_producer&& other); + + /// Constructs a list producer that writes into the range [begin, end). If a write would go + /// beyond the end of the buffer an exception is raised. Note that this will happen during + /// construction if the given buffer is not large enough to contain the `le` encoding of an + /// empty list. + bt_list_producer(char* begin, char* end) : bt_list_producer{begin, end, "l"sv} {} + + /// Constructs a list producer that writes into the range [begin, begin+size). If a write would + /// go beyond the end of the buffer an exception is raised. + bt_list_producer(char* begin, size_t len) : bt_list_producer{begin, begin + len, "l"sv} {} + + ~bt_list_producer(); + + /// Returns a string_view into the currently serialized data buffer. Note that the returned + /// view includes the `e` list end serialization markers which will be overwritten if the list + /// (or an active sublist/subdict) is appended to. + std::string_view view() const { + return {from, static_cast(to-from)}; + } + + /// Returns the end position in the buffer. + const char* end() const { return to; } + + /// Appends an element containing binary string data + void append(std::string_view data) + { + if (has_child) throw std::logic_error{"Cannot append to list when a sublist is active"}; + append_impl(data); + append_intermediate_ends(); + } + + bt_list_producer& operator+=(std::string_view data) { append(data); return *this; } + + /// Appends an integer + template , int> = 0> + void append(IntType i) { + if (has_child) throw std::logic_error{"Cannot append to list when a sublist is active"}; + append_impl(i); + append_intermediate_ends(); + } + + template , int> = 0> + bt_list_producer& operator+=(IntType i) { append(i); return *this; } + + /// Appends elements from the range [from, to) to the list. This does *not* append the elements + /// as a sublist: for that you should use something like: `l.append_list().append(from, to);` + template + void append(ForwardIt from, ForwardIt to) { + if (has_child) throw std::logic_error{"Cannot append to list when a sublist is active"}; + while (from != to) + append_impl(*from++); + append_intermediate_ends(); + } + + /// Appends a sublist to this list. Returns a new bt_list_producer that references the parent + /// list. The parent cannot be added to until the sublist is destroyed. This is meant to be + /// used via RAII: + /// + /// buf data[16]; + /// bt_list_producer list{data, sizeof(data)}; + /// { + /// auto sublist = list.append_list(); + /// sublist.append(42); + /// } + /// list.append(1); + /// // `data` now contains: `lli42eei1ee` + /// + /// If doing more complex lifetime management, take care not to allow the child instance to + /// outlive the parent. + bt_list_producer append_list(); + + /// Appends a dict to this list. Returns a new bt_dict_producer that references the parent + /// list. The parent cannot be added to until the subdict is destroyed. This is meant to be + /// used via RAII (see append_list() for details). + /// + /// If doing more complex lifetime management, take care not to allow the child instance to + /// outlive the parent. + bt_dict_producer append_dict(); + + /// Appends a bt_value, bt_dict, or bt_list to this bt_list. You must include the + /// bt_value_producer.h header (either directly or via bt.h) to use this method. + template + void append_bt(const T& bt); + }; + + +/// Class that allows you to build a bt-encoded dict manually, without copying or allocating memory. +/// This is essentially the reverse of bt_dict_consumer: where it lets you stream-parse a buffer, +/// this class lets you build directly into a buffer that you own. +/// +/// Note that bt-encoded dicts *must* be produced in (ASCII) ascending key order, but that this is +/// only tracked/enforced for non-release builds (i.e. without -DNDEBUG). + class bt_dict_producer : bt_list_producer { + friend class bt_list_producer; + + // Subdict constructors + + bt_dict_producer(bt_list_producer* parent) : bt_list_producer{parent, "d"sv} {} + bt_dict_producer(bt_dict_producer* parent) : bt_list_producer{parent, "d"sv} {} + + // Checks a just-written key string to make sure it is monotonically increasing from the last + // key. Does nothing in a release build. +#ifdef NDEBUG + constexpr void check_incrementing_key(size_t) const {} +#else + // String view into the buffer where we wrote the previous key. + std::string_view last_key; + void check_incrementing_key(size_t size) { + std::string_view this_key{buffer.first - size, size}; + assert(!last_key.data() || this_key > last_key); + last_key = this_key; + } +#endif + + public: + /// Constructs a dict producer that writes into the range [begin, end). If a write would go + /// beyond the end of the buffer an exception is raised. Note that this will happen during + /// construction if the given buffer is not large enough to contain the `de` encoding of an + /// empty list. + bt_dict_producer(char* begin, char* end) : bt_list_producer{begin, end, "d"sv} {} + + /// Constructs a list producer that writes into the range [begin, begin+size). If a write would + /// go beyond the end of the buffer an exception is raised. + bt_dict_producer(char* begin, size_t len) : bt_list_producer{begin, begin + len, "d"sv} {} + + /// Returns a string_view into the currently serialized data buffer. Note that the returned + /// view includes the `e` dict end serialization markers which will be overwritten if the dict + /// (or an active sublist/subdict) is appended to. + std::string_view view() const { return bt_list_producer::view(); } + + /// Returns the end position in the buffer. + const char* end() const { return bt_list_producer::end(); } + + /// Appends a key-value pair with a string or integer value. The key must be > the last key + /// added, but this is only enforced (with an assertion) in debug builds. + template || std::is_integral_v, int> = 0> + void append(std::string_view key, const T& value) { + if (has_child) throw std::logic_error{"Cannot append to list when a sublist is active"}; + append_impl(key); + check_incrementing_key(key.size()); + append_impl(value); + append_intermediate_ends(); + } + + /// Appends pairs from the range [from, to) to the dict. Elements must have a .first + /// convertible to a string_view, and a .second that is either string view convertible or an + /// integer. This does *not* append the elements as a subdict: for that you should use + /// something like: `l.append_dict().append(key, from, to);` + /// + /// Also note that the range *must* be sorted by keys, which means either using an ordered + /// container (e.g. std::map) or a manually ordered container (such as a vector or list of + /// pairs). unordered_map, however, is not acceptable. + template , int> = 0> + void append(ForwardIt from, ForwardIt to) { + if (has_child) throw std::logic_error{"Cannot append to list when a sublist is active"}; + using KeyType = std::remove_cv_tfirst)>>; + using ValType = std::decay_tsecond)>; + static_assert(std::is_convertible_vfirst), std::string_view>); + static_assert(std::is_convertible_v || std::is_integral_v); + using BadUnorderedMap = std::unordered_map; + static_assert(!( // Disallow unordered_map iterators because they are not going to be ordered. + std::is_same_v || + std::is_same_v)); + while (from != to) { + const auto& [k, v] = *from++; + append_impl(k); + check_incrementing_key(k.size()); + append_impl(v); + } + append_intermediate_ends(); + } + + /// Appends a sub-dict value to this dict with the given key. Returns a new bt_dict_producer + /// that references the parent dict. The parent cannot be added to until the subdict is + /// destroyed. Key must be (ascii-comparison) larger than the previous key. + /// + /// This is meant to be used via RAII: + /// + /// buf data[32]; + /// bt_dict_producer dict{data, sizeof(data)}; + /// { + /// auto subdict = dict.begin_dict("myKey"); + /// subdict.append("x", 42); + /// } + /// dict.append("y", ""); + /// // `data` now contains: `d5:myKeyd1:xi42ee1:y0:e` + /// + /// If doing more complex lifetime management, take care not to allow the child instance to + /// outlive the parent. + bt_dict_producer append_dict(std::string_view key) { + if (has_child) throw std::logic_error{"Cannot call append_dict while another nested list/dict is active"}; + append_impl(key); + check_incrementing_key(key.size()); + return bt_dict_producer{this}; + } + + /// Appends a list to this dict with the given key (which must be ascii-larger than the previous + /// key). Returns a new bt_list_producer that references the parent dict. The parent cannot be + /// added to until the sublist is destroyed. + /// + /// This is meant to be used via RAII (see append_dict() for details). + /// + /// If doing more complex lifetime management, take care not to allow the child instance to + /// outlive the parent. + bt_list_producer append_list(std::string_view key) + { + if (has_child) throw std::logic_error{"Cannot call append_list while another nested list/dict is active"}; + append_impl(key); + check_incrementing_key(key.size()); + return bt_list_producer{this}; + } + + /// Appends a bt_value, bt_dict, or bt_list to this bt_dict. You must include the + /// bt_value_producer.h header (either directly or via bt.h) to use this method. + template + void append_bt(std::string_view key, const T& bt); + }; + + inline bt_list_producer::bt_list_producer(bt_list_producer* parent, std::string_view prefix) + : data{parent}, buffer{parent->buffer}, from{buffer.first} { + parent->has_child = true; + buffer_append(prefix); + append_intermediate_ends(); + } + + inline bt_list_producer::bt_list_producer(bt_dict_producer* parent, std::string_view prefix) + : data{parent}, buffer{parent->buffer}, from{buffer.first} { + parent->has_child = true; + buffer_append(prefix); + append_intermediate_ends(); + } + + inline bt_list_producer::bt_list_producer(bt_list_producer&& other) + : data{std::move(other.data)}, buffer{other.buffer}, from{other.from}, to{other.to} { + if (other.has_child) throw std::logic_error{"Cannot move bt_list/dict_producer with active sublists/subdicts"}; + var::visit([](auto& x) { + if constexpr (!std::is_same_v) + x = nullptr; + }, other.data); + } + + inline void bt_list_producer::buffer_append(std::string_view d, bool advance) { + var::visit( + [d, advance, this](auto& x) { + if constexpr (std::is_same_v) { + size_t avail = std::distance(x.first, x.second); + if (d.size() > avail) + throw std::length_error{"Cannot write bt_producer: buffer size exceeded"}; + std::copy(d.begin(), d.end(), x.first); + to = x.first + d.size(); + if (advance) + x.first += d.size(); + } else { + x->buffer_append(d, advance); + } + }, + data); + } + + inline void bt_list_producer::append_intermediate_ends(size_t count) { + static constexpr std::string_view eee = "eeeeeeeeeeeeeeeeeeeeeeeeeeeeeeee"sv; + return var::visit([this, count](auto& x) mutable { + if constexpr (std::is_same_v) { + for (; count > eee.size(); count -= eee.size()) + buffer_append(eee, false); + buffer_append(eee.substr(0, count), false); + } else { + // x is a parent pointer + x->append_intermediate_ends(count + 1); + to = x->to - 1; // Our `to` should be one 'e' before our parent's `to`. + } + }, data); + } + + inline bt_list_producer::~bt_list_producer() { + var::visit( + [this](auto& x) { + if constexpr (!std::is_same_v) { + if (!x) + return; + assert(!has_child); + assert(x->has_child); + x->has_child = false; + // We've already written the intermediate 'e', so just increment + // the buffer to finalize it. + buffer.first++; + } + }, + data); + } + + inline bt_list_producer::bt_list_producer(char* begin, char* end, std::string_view prefix) + : data{buf_span{begin, end}}, buffer{*std::get_if(&data)}, from{buffer.first} { + buffer_append(prefix); + append_intermediate_ends(); + } + + inline bt_list_producer bt_list_producer::append_list() { + if (has_child) throw std::logic_error{"Cannot call append_list while another nested list/dict is active"}; + return bt_list_producer{this}; + } + + inline bt_dict_producer bt_list_producer::append_dict() { + if (has_child) throw std::logic_error{"Cannot call append_dict while another nested list/dict is active"}; + return bt_dict_producer{this}; + } + +} // namespace oxenc diff --git a/libsession-util/distribution/libsession-util-android/include/oxenc/bt_serialize.h b/libsession-util/distribution/libsession-util-android/include/oxenc/bt_serialize.h new file mode 100644 index 0000000000..a6107a43a4 --- /dev/null +++ b/libsession-util/distribution/libsession-util-android/include/oxenc/bt_serialize.h @@ -0,0 +1,1083 @@ +#pragma once + +#include +#include +#include +#include +#include +#include +#include +#include "variant.h" +#include +#include +#include +#include +#include +#include +#include + +#include "bt_value.h" + +namespace oxenc { + +using namespace std::literals; + +/** \file + * Oxenc serialization for internal commands is very simple: we support two primitive types, + * strings and integers, and two container types, lists and dicts with string keys. On the wire + * these go in BitTorrent byte encoding as described in BEP-0003 + * (https://www.bittorrent.org/beps/bep_0003.html#bencoding). + * + * On the C++ side, on input we allow strings, integral types, STL-like containers of these types, + * and STL-like containers of pairs with a string first value and any of these types as second + * value. We also accept std::variants of these. + * + * One minor deviation from BEP-0003 is that we don't support serializing values that don't fit in a + * 64-bit integer (BEP-0003 specifies arbitrary precision integers). + * + * On deserialization we can either deserialize into a special bt_value type supports everything + * (with arbitrary nesting), or we can fill a container of your given type (though this fails if the + * container isn't compatible with the deserialized data). + * + * There is also a stream deserialization that allows you to deserialize without needing heap + * allocations (as long as you know the precise data structure layout). + */ + +/// Exception throw if deserialization fails +class bt_deserialize_invalid : public std::invalid_argument { + using std::invalid_argument::invalid_argument; +}; + +/// A more specific subclass that is thown if the serialization type is an initial mismatch: for +/// example, trying deserializing an int but the next thing in input is a list. This is not, +/// however, thrown if the type initially looks fine but, say, a nested serialization fails. This +/// error will only be thrown when the input stream has not been advanced (and so can be tried for a +/// different type). +class bt_deserialize_invalid_type : public bt_deserialize_invalid { + using bt_deserialize_invalid::bt_deserialize_invalid; +}; + +namespace detail { + +/// Reads digits into an unsigned 64-bit int. +uint64_t extract_unsigned(std::string_view& s); +// (Provide non-constant lvalue and rvalue ref functions so that we only accept explicit +// string_views but not implicitly converted ones) +inline uint64_t extract_unsigned(std::string_view&& s) { return extract_unsigned(s); } + +// Fallback base case; we only get here if none of the partial specializations below work +template +struct bt_serialize { static_assert(!std::is_same_v, "Cannot serialize T: unsupported type for bt serialization"); }; + +template +struct bt_deserialize { static_assert(!std::is_same_v, "Cannot deserialize T: unsupported type for bt deserialization"); }; + +/// Checks that we aren't at the end of a string view and throws if we are. +inline void bt_need_more(const std::string_view &s) { + if (s.empty()) + throw bt_deserialize_invalid{"Unexpected end of string while deserializing"}; +} + +using some64 = union { int64_t i64; uint64_t u64; }; + +/// Deserializes a signed or unsigned 64-bit integer from a string. Sets the second bool to true +/// iff the value read was negative, false if positive; in either case the unsigned value is return +/// in .first. Throws an exception if the read value doesn't fit in a int64_t (if negative) or a +/// uint64_t (if positive). Removes consumed characters from the string_view. +std::pair bt_deserialize_integer(std::string_view& s); + +/// Integer specializations +template +struct bt_serialize>> { + static_assert(sizeof(T) <= sizeof(uint64_t), "Serialization of integers larger than uint64_t is not supported"); + void operator()(std::ostream &os, const T &val) { + // Cast 1-byte types to a larger type to avoid iostream interpreting them as single characters + using output_type = std::conditional_t<(sizeof(T) > 1), T, std::conditional_t, int, unsigned>>; + os << 'i' << static_cast(val) << 'e'; + } +}; + +template +struct bt_deserialize>> { + void operator()(std::string_view& s, T &val) { + constexpr uint64_t umax = static_cast(std::numeric_limits::max()); + constexpr int64_t smin = static_cast(std::numeric_limits::min()); + + auto [v, neg] = bt_deserialize_integer(s); + + if (std::is_signed_v) { + if (!neg) { + if (v.u64 > umax) + throw bt_deserialize_invalid("Integer deserialization failed: found too-large value " + std::to_string(v.u64) + " > " + std::to_string(umax)); + val = static_cast(v.u64); + } else { + auto& sval = v.i64; + if (!std::is_same_v && sval < smin) + throw bt_deserialize_invalid("Integer deserialization failed: found too-low value " + std::to_string(sval) + " < " + std::to_string(smin)); + val = static_cast(sval); + } + } else { + if (neg) + throw bt_deserialize_invalid("Integer deserialization failed: found negative value -" + std::to_string(v.i64) + " but type is unsigned"); + if (!std::is_same_v && v.u64 > umax) + throw bt_deserialize_invalid("Integer deserialization failed: found too-large value " + std::to_string(v.u64) + " > " + std::to_string(umax)); + val = static_cast(v.u64); + } + } +}; + +extern template struct bt_deserialize; +extern template struct bt_deserialize; + +template <> +struct bt_serialize { + void operator()(std::ostream &os, const std::string_view &val) { os << val.size(); os.put(':'); os.write(val.data(), val.size()); } +}; +template <> +struct bt_deserialize { + void operator()(std::string_view& s, std::string_view& val); +}; + +/// String specialization +template <> +struct bt_serialize { + void operator()(std::ostream &os, const std::string &val) { bt_serialize{}(os, val); } +}; +template <> +struct bt_deserialize { + void operator()(std::string_view& s, std::string& val) { std::string_view view; bt_deserialize{}(s, view); val = {view.data(), view.size()}; } +}; + +/// char * and string literals -- we allow serialization for convenience, but not deserialization +template <> +struct bt_serialize { + void operator()(std::ostream &os, const char *str) { bt_serialize{}(os, {str, std::strlen(str)}); } +}; +template +struct bt_serialize { + void operator()(std::ostream &os, const char *str) { bt_serialize{}(os, {str, N-1}); } +}; + +/// Partial dict validity; we don't check the second type for serializability, that will be handled +/// via the base case static_assert if invalid. +template struct is_bt_input_dict_container_impl : std::false_type {}; +template +struct is_bt_input_dict_container_impl> || + std::is_same_v>, + std::void_t>> +: std::true_type {}; + +/// Determines whether the type looks like something we can insert into (using `v.insert(v.end(), x)`) +template struct is_bt_insertable_impl : std::false_type {}; +template +struct is_bt_insertable_impl().insert(std::declval().end(), std::declval()))>> +: std::true_type {}; +template +constexpr bool is_bt_insertable = is_bt_insertable_impl::value; + +/// Determines whether the given type looks like a compatible map (i.e. has std::string keys) that +/// we can insert into. +template struct is_bt_output_dict_container_impl : std::false_type {}; +template +struct is_bt_output_dict_container_impl> && is_bt_insertable, + std::void_t>> +: std::true_type {}; + +template +constexpr bool is_bt_output_dict_container = is_bt_output_dict_container_impl::value; +template +constexpr bool is_bt_input_dict_container = is_bt_output_dict_container_impl::value; + +// Sanity checks: +static_assert(is_bt_input_dict_container); +static_assert(is_bt_output_dict_container); + +/// Specialization for a dict-like container (such as an unordered_map). We accept anything for a +/// dict that is const iterable over something that looks like a pair with std::string for first +/// value type. The value (i.e. second element of the pair) also must be serializable. +template +struct bt_serialize>> { + using second_type = typename T::value_type::second_type; + using ref_pair = std::reference_wrapper; + void operator()(std::ostream &os, const T &dict) { + os << 'd'; + std::vector pairs; + pairs.reserve(dict.size()); + for (const auto &pair : dict) + pairs.emplace(pairs.end(), pair); + std::sort(pairs.begin(), pairs.end(), [](ref_pair a, ref_pair b) { return a.get().first < b.get().first; }); + for (auto &ref : pairs) { + bt_serialize{}(os, ref.get().first); + bt_serialize{}(os, ref.get().second); + } + os << 'e'; + } +}; + +template +struct bt_deserialize>> { + using second_type = typename T::value_type::second_type; + void operator()(std::string_view& s, T& dict) { + // Smallest dict is 2 bytes "de", for an empty dict. + if (s.size() < 2) throw bt_deserialize_invalid("Deserialization failed: end of string found where dict expected"); + if (s[0] != 'd') throw bt_deserialize_invalid_type("Deserialization failed: expected 'd', found '"s + s[0] + "'"s); + s.remove_prefix(1); + dict.clear(); + bt_deserialize key_deserializer; + bt_deserialize val_deserializer; + + while (!s.empty() && s[0] != 'e') { + std::string key; + second_type val; + key_deserializer(s, key); + val_deserializer(s, val); + dict.insert(dict.end(), typename T::value_type{std::move(key), std::move(val)}); + } + if (s.empty()) + throw bt_deserialize_invalid("Deserialization failed: encountered end of string before dict was finished"); + s.remove_prefix(1); // Consume the 'e' + } +}; + + +/// Accept anything that looks iterable; value serialization validity isn't checked here (it fails +/// via the base case static assert). +template struct is_bt_input_list_container_impl : std::false_type {}; +template +struct is_bt_input_list_container_impl && !std::is_same_v && !is_bt_input_dict_container, + std::void_t>> +: std::true_type {}; + +template struct is_bt_output_list_container_impl : std::false_type {}; +template +struct is_bt_output_list_container_impl && !is_bt_output_dict_container && is_bt_insertable>> +: std::true_type {}; + +template +constexpr bool is_bt_output_list_container = is_bt_output_list_container_impl::value; +template +constexpr bool is_bt_input_list_container = is_bt_input_list_container_impl::value; + +// Sanity checks: +static_assert(is_bt_input_list_container); +static_assert(is_bt_output_list_container); + +/// List specialization +template +struct bt_serialize>> { + void operator()(std::ostream& os, const T& list) { + os << 'l'; + for (const auto &v : list) + bt_serialize>{}(os, v); + os << 'e'; + } +}; +template +struct bt_deserialize>> { + using value_type = typename T::value_type; + void operator()(std::string_view& s, T& list) { + // Smallest list is 2 bytes "le", for an empty list. + if (s.size() < 2) throw bt_deserialize_invalid("Deserialization failed: end of string found where list expected"); + if (s[0] != 'l') throw bt_deserialize_invalid_type("Deserialization failed: expected 'l', found '"s + s[0] + "'"s); + s.remove_prefix(1); + list.clear(); + bt_deserialize deserializer; + while (!s.empty() && s[0] != 'e') { + value_type v; + deserializer(s, v); + list.insert(list.end(), std::move(v)); + } + if (s.empty()) + throw bt_deserialize_invalid("Deserialization failed: encountered end of string before list was finished"); + s.remove_prefix(1); // Consume the 'e' + } +}; + +/// Serializes a tuple or pair of serializable values (as a list on the wire) + +/// Common implementation for both tuple and pair: +template typename Tuple, typename... T> +struct bt_serialize_tuple { +private: + template + void operator()(std::ostream& os, const Tuple& elems, std::index_sequence) { + os << 'l'; + (bt_serialize{}(os, std::get(elems)), ...); + os << 'e'; + } +public: + void operator()(std::ostream& os, const Tuple& elems) { + operator()(os, elems, std::index_sequence_for{}); + } +}; +template typename Tuple, typename... T> +struct bt_deserialize_tuple { +private: + template + void operator()(std::string_view& s, Tuple& elems, std::index_sequence) { + // Smallest list is 2 bytes "le", for an empty list. + if (s.size() < 2) throw bt_deserialize_invalid("Deserialization failed: end of string found where tuple expected"); + if (s[0] != 'l') throw bt_deserialize_invalid_type("Deserialization of tuple failed: expected 'l', found '"s + s[0] + "'"s); + s.remove_prefix(1); + (bt_deserialize{}(s, std::get(elems)), ...); + if (s.empty()) + throw bt_deserialize_invalid("Deserialization failed: encountered end of string before tuple was finished"); + if (s[0] != 'e') + throw bt_deserialize_invalid("Deserialization failed: expected end of tuple but found something else"); + s.remove_prefix(1); // Consume the 'e' + } +public: + void operator()(std::string_view& s, Tuple& elems) { + operator()(s, elems, std::index_sequence_for{}); + } +}; +template +struct bt_serialize> : bt_serialize_tuple {}; +template +struct bt_deserialize> : bt_deserialize_tuple {}; +template +struct bt_serialize> : bt_serialize_tuple {}; +template +struct bt_deserialize> : bt_deserialize_tuple {}; + +template +inline constexpr bool is_bt_tuple = false; +template +inline constexpr bool is_bt_tuple> = true; +template +inline constexpr bool is_bt_tuple> = true; + + +template +constexpr bool is_bt_deserializable = std::is_same_v || std::is_integral_v || + is_bt_output_dict_container || is_bt_output_list_container || is_bt_tuple; + +// General template and base case; this base will only actually be invoked when Ts... is empty, +// which means we reached the end without finding any variant type capable of holding the value. +template +struct bt_deserialize_try_variant_impl { + void operator()(std::string_view&, Variant&) { + throw bt_deserialize_invalid("Deserialization failed: could not deserialize value into any variant type"); + } +}; + +template +void bt_deserialize_try_variant(std::string_view& s, Variant& variant) { + bt_deserialize_try_variant_impl{}(s, variant); +} + + +template +struct bt_deserialize_try_variant_impl>, Variant, T, Ts...> { + void operator()(std::string_view& s, Variant& variant) { + if ( is_bt_output_list_container ? s[0] == 'l' : + is_bt_tuple ? s[0] == 'l' : + is_bt_output_dict_container ? s[0] == 'd' : + std::is_integral_v ? s[0] == 'i' : + std::is_same_v ? s[0] >= '0' && s[0] <= '9' : + false) { + T val; + bt_deserialize{}(s, val); + variant = std::move(val); + } else { + bt_deserialize_try_variant(s, variant); + } + } +}; + +template +struct bt_deserialize_try_variant_impl>, Variant, T, Ts...> { + void operator()(std::string_view& s, Variant& variant) { + // Unsupported deserialization type, skip it + bt_deserialize_try_variant(s, variant); + } +}; + +// Serialization of a variant; all variant types must be bt-serializable. +template +struct bt_serialize, std::void_t...>> { + void operator()(std::ostream& os, const std::variant& val) { + var::visit( + [&os] (const auto& val) { + using T = std::remove_cv_t>; + bt_serialize{}(os, val); + }, + val); + } +}; + +// Deserialization to a variant; at least one variant type must be bt-deserializble. +template +struct bt_deserialize, std::enable_if_t<(is_bt_deserializable || ...)>> { + void operator()(std::string_view& s, std::variant& val) { + bt_deserialize_try_variant(s, val); + } +}; + +template <> +struct bt_serialize : bt_serialize {}; + +template <> +struct bt_deserialize { + void operator()(std::string_view& s, bt_value& val); +}; + +template +struct bt_stream_serializer { + const T &val; + explicit bt_stream_serializer(const T &val) : val{val} {} + operator std::string() const { + std::ostringstream oss; + oss << *this; + return oss.str(); + } +}; +template +std::ostream &operator<<(std::ostream &os, const bt_stream_serializer &s) { + bt_serialize{}(os, s.val); + return os; +} + +} // namespace detail + + +/// Returns a wrapper around a value reference that can serialize the value directly to an output +/// stream. This class is intended to be used inline (i.e. without being stored) as in: +/// +/// std::list my_list{{1,2,3}}; +/// std::cout << bt_serializer(my_list); +/// +/// While it is possible to store the returned object and use it, such as: +/// +/// auto encoded = bt_serializer(42); +/// std::cout << encoded; +/// +/// this approach is not generally recommended: the returned object stores a reference to the +/// passed-in type, which may not survive. If doing this note that it is the caller's +/// responsibility to ensure the serializer is not used past the end of the lifetime of the value +/// being serialized. +/// +/// Also note that serializing directly to an output stream is more efficient as no intermediate +/// string containing the entire serialization has to be constructed. +/// +template +detail::bt_stream_serializer bt_serializer(const T &val) { return detail::bt_stream_serializer{val}; } + +/// Serializes the given value into a std::string. +/// +/// int number = 42; +/// std::string encoded = bt_serialize(number); +/// // Equivalent: +/// //auto encoded = (std::string) bt_serialize(number); +/// +/// This takes any serializable type: integral types, strings, lists of serializable types, and +/// string->value maps of serializable types. +template +std::string bt_serialize(const T &val) { return bt_serializer(val); } + +/// Deserializes the given string view directly into `val`. Usage: +/// +/// std::string encoded = "i42e"; +/// int value; +/// bt_deserialize(encoded, value); // Sets value to 42 +/// +template , int> = 0> +void bt_deserialize(std::string_view s, T& val) { + return detail::bt_deserialize{}(s, val); +} + + +/// Deserializes the given string_view into a `T`, which is returned. +/// +/// std::string encoded = "li1ei2ei3ee"; // bt-encoded list of ints: [1,2,3] +/// auto mylist = bt_deserialize>(encoded); +/// +template +T bt_deserialize(std::string_view s) { + T val; + bt_deserialize(s, val); + return val; +} + +/// Deserializes the given value into a generic `bt_value` type (wrapped std::variant) which is +/// capable of holding all possible BT-encoded values (including recursion). +/// +/// Example: +/// +/// std::string encoded = "i42e"; +/// auto val = bt_get(encoded); +/// int v = get_int(val); // fails unless the encoded value was actually an integer that +/// // fits into an `int` +/// +inline bt_value bt_get(std::string_view s) { + return bt_deserialize(s); +} + +/// Helper functions to extract a value of some integral type from a bt_value which contains either +/// a int64_t or uint64_t. Does range checking, throwing std::overflow_error if the stored value is +/// outside the range of the target type. +/// +/// Example: +/// +/// std::string encoded = "i123456789e"; +/// auto val = bt_get(encoded); +/// auto v = get_int(val); // throws if the decoded value doesn't fit in a uint32_t +template , int> = 0> +IntType get_int(const bt_value &v) { + if (auto* value = std::get_if(&v)) { + if constexpr (!std::is_same_v) + if (*value > static_cast(std::numeric_limits::max())) + throw std::overflow_error("Unable to extract integer value: stored value is too large for the requested type"); + return static_cast(*value); + } + + int64_t value = var::get(v); // throws if no int contained + if constexpr (!std::is_same_v) + if (value > static_cast(std::numeric_limits::max()) + || value < static_cast(std::numeric_limits::min())) + throw std::overflow_error("Unable to extract integer value: stored value is outside the range of the requested type"); + return static_cast(value); +} + +namespace detail { +template +void get_tuple_impl(Tuple& t, const bt_list& l, std::index_sequence); +} + +/// Converts a bt_list into the given template std::tuple or std::pair. Throws a +/// std::invalid_argument if the list has the wrong size or wrong element types. Supports recursion +/// (i.e. if the tuple itself contains tuples or pairs). The tuple (or nested tuples) may only +/// contain integral types, strings, string_views, bt_list, bt_dict, and tuples/pairs of those. +template +Tuple get_tuple(const bt_list& x) { + Tuple t; + detail::get_tuple_impl(t, x, std::make_index_sequence>{}); + return t; +} +template +Tuple get_tuple(const bt_value& x) { + return get_tuple(var::get(static_cast(x))); +} + +namespace detail { +template +void get_tuple_impl_one(T& t, It& it) { + const bt_variant& v = *it++; + if constexpr (std::is_integral_v) { + t = oxenc::get_int(v); + } else if constexpr (is_bt_tuple) { + if (std::holds_alternative(v)) + throw std::invalid_argument{"Unable to convert tuple: cannot create sub-tuple from non-bt_list"}; + t = get_tuple(var::get(v)); + } else if constexpr (std::is_same_v || std::is_same_v) { + // If we request a string/string_view, we might have the other one and need to copy/view it. + if (std::holds_alternative(v)) + t = var::get(v); + else + t = var::get(v); + } else { + t = var::get(v); + } +} +template +void get_tuple_impl(Tuple& t, const bt_list& l, std::index_sequence) { + if (l.size() != sizeof...(Is)) + throw std::invalid_argument{"Unable to convert tuple: bt_list has wrong size"}; + auto it = l.begin(); + (get_tuple_impl_one(std::get(t), it), ...); +} +} // namespace detail + + + +class bt_dict_consumer; + +/// Class that allows you to walk through a bt-encoded list in memory without copying or allocating +/// memory. It accesses existing memory directly and so the caller must ensure that the referenced +/// memory stays valid for the lifetime of the bt_list_consumer object. +class bt_list_consumer { +protected: + std::string_view data; + bt_list_consumer() = default; +public: + bt_list_consumer(std::string_view data_) : data{std::move(data_)} { + if (data.empty()) throw std::runtime_error{"Cannot create a bt_list_consumer with an empty string_view"}; + if (data[0] != 'l') + throw std::runtime_error{ + "Cannot create a bt_list_consumer with non-list data"}; + data.remove_prefix(1); + } + /// Copy constructor. Making a copy copies the current position so can be used for multipass + /// iteration through a list. + bt_list_consumer(const bt_list_consumer&) = default; + bt_list_consumer& operator=(const bt_list_consumer&) = default; + + /// Get a copy of the current buffer + std::string_view current_buffer() const { return data; } + + /// Returns true if the next value indicates the end of the list + bool is_finished() const { return data.front() == 'e'; } + /// Returns true if the next element looks like an encoded string + bool is_string() const { return data.front() >= '0' && data.front() <= '9'; } + /// Returns true if the next element looks like an encoded integer + bool is_integer() const { return data.front() == 'i'; } + /// Returns true if the next element looks like an encoded negative integer + bool is_negative_integer() const { return is_integer() && data.size() >= 2 && data[1] == '-'; } + /// Returns true if the next element looks like an encoded non-negative integer + bool is_unsigned_integer() const { return is_integer() && data.size() >= 2 && data[1] >= '0' && data[1] <= '9'; } + /// Returns true if the next element looks like an encoded list + bool is_list() const { return data.front() == 'l'; } + /// Returns true if the next element looks like an encoded dict + bool is_dict() const { return data.front() == 'd'; } + + /// Attempt to parse the next value as a string (and advance just past it). Throws if the next + /// value is not a string. + std::string consume_string() { return std::string{consume_string_view()}; } + std::string_view consume_string_view() { + if (data.empty()) + throw bt_deserialize_invalid{"expected a string, but reached end of data"}; + else if (!is_string()) + throw bt_deserialize_invalid_type{"expected a string, but found "s + data.front()}; + std::string_view next{data}, result; + detail::bt_deserialize{}(next, result); + data = next; + return result; + }; + + /// Attempts to parse the next value as an integer (and advance just past it). Throws if the + /// next value is not an integer. + template + IntType consume_integer() { + if (!is_integer()) throw bt_deserialize_invalid_type{"next value is not an integer"}; + std::string_view next{data}; + IntType ret; + detail::bt_deserialize{}(next, ret); + data = next; + return ret; + } + + /// Consumes a list, return it as a list-like type. Can also be used for tuples/pairs. This + /// typically requires dynamic allocation, but only has to parse the data once. Compare with + /// consume_list_data() which allows alloc-free traversal, but requires parsing twice (if the + /// contents are to be used). + template + T consume_list() { + T list; + consume_list(list); + return list; + } + + /// Same as above, but takes a pre-existing list-like data type. + template + void consume_list(T& list) { + if (!is_list()) throw bt_deserialize_invalid_type{"next bt value is not a list"}; + std::string_view n{data}; + detail::bt_deserialize{}(n, list); + data = n; + } + + /// Consumes a dict, return it as a dict-like type. This typically requires dynamic allocation, + /// but only has to parse the data once. Compare with consume_dict_data() which allows + /// alloc-free traversal, but requires parsing twice (if the contents are to be used). + template + T consume_dict() { + T dict; + consume_dict(dict); + return dict; + } + + /// Same as above, but takes a pre-existing dict-like data type. + template + void consume_dict(T& dict) { + if (!is_dict()) throw bt_deserialize_invalid_type{"next bt value is not a dict"}; + std::string_view n{data}; + detail::bt_deserialize{}(n, dict); + data = n; + } + + /// Consumes a value without returning it. + void skip_value() { + if (is_string()) + consume_string_view(); + else if (is_integer()) + detail::bt_deserialize_integer(data); + else if (is_list()) + consume_list_data(); + else if (is_dict()) + consume_dict_data(); + else + throw bt_deserialize_invalid_type{"next bt value has unknown type"}; + } + + /// Attempts to parse the next value as a list and returns the string_view that contains the + /// entire thing. This is recursive into both lists and dicts and likely to be quite + /// inefficient for large, nested structures (unless the values only need to be skipped but + /// aren't separately needed). This, however, does not require dynamic memory allocation. + std::string_view consume_list_data() { + auto start = data.begin(); + if (data.size() < 2 || !is_list()) throw bt_deserialize_invalid_type{"next bt value is not a list"}; + data.remove_prefix(1); // Descend into the sublist, consume the "l" + while (!is_finished()) { + skip_value(); + if (data.empty()) + throw bt_deserialize_invalid{"bt list consumption failed: hit the end of string before the list was done"}; + } + data.remove_prefix(1); // Back out from the sublist, consume the "e" + return {start, static_cast(std::distance(start, data.begin()))}; + } + + /// Attempts to parse the next value as a dict and returns the string_view that contains the + /// entire thing. This is recursive into both lists and dicts and likely to be quite + /// inefficient for large, nested structures (unless the values only need to be skipped but + /// aren't separately needed). This, however, does not require dynamic memory allocation. + std::string_view consume_dict_data() { + auto start = data.begin(); + if (data.size() < 2 || !is_dict()) throw bt_deserialize_invalid_type{"next bt value is not a dict"}; + data.remove_prefix(1); // Descent into the dict, consumer the "d" + while (!is_finished()) { + consume_string_view(); // Key is always a string + if (!data.empty()) + skip_value(); + if (data.empty()) + throw bt_deserialize_invalid{"bt dict consumption failed: hit the end of string before the dict was done"}; + } + data.remove_prefix(1); // Back out of the dict, consume the "e" + return {start, static_cast(std::distance(start, data.begin()))}; + } + + /// Shortcut for wrapping `consume_list_data()` in a new list consumer + bt_list_consumer consume_list_consumer(){ return consume_list_data(); } + /// Shortcut for wrapping `consume_dict_data()` in a new dict consumer + inline bt_dict_consumer consume_dict_consumer(); +}; + + +/// Class that allows you to walk through key-value pairs of a bt-encoded dict in memory without +/// copying or allocating memory. It accesses existing memory directly and so the caller must +/// ensure that the referenced memory stays valid for the lifetime of the bt_dict_consumer object. +class bt_dict_consumer : private bt_list_consumer { + std::string_view key_; + + /// Consume the key if not already consumed and there is a key present (rather than 'e'). + /// Throws exception if what should be a key isn't a string, or if the key consumes the entire + /// data (i.e. requires that it be followed by something). Returns true if the key was consumed + /// (either now or previously and cached). + bool consume_key() { + if (key_.data()) + return true; + if (data.empty()) throw bt_deserialize_invalid_type{"expected a key or dict end, found end of string"}; + if (data[0] == 'e') return false; + key_ = bt_list_consumer::consume_string_view(); + if (data.empty() || data[0] == 'e') + throw bt_deserialize_invalid{"dict key isn't followed by a value"}; + return true; + } + + /// Clears the cached key and returns it. Must have already called consume_key directly or + /// indirectly via one of the `is_{...}` methods. + std::string_view flush_key() { + std::string_view k; + k.swap(key_); + return k; + } + +public: + bt_dict_consumer(std::string_view data_) { + data = std::move(data_); + if (data.empty()) throw std::runtime_error{"Cannot create a bt_dict_consumer with an empty string_view"}; + if (data.size() < 2 || data[0] != 'd') + throw std::runtime_error{ + "Cannot create a bt_dict_consumer with non-dict data"}; + data.remove_prefix(1); + } + + /// Copy constructor. Making a copy copies the current position so can be used for multipass + /// iteration through a list. + bt_dict_consumer(const bt_dict_consumer&) = default; + bt_dict_consumer& operator=(const bt_dict_consumer&) = default; + + /// Returns true if the next value indicates the end of the dict + bool is_finished() { return !consume_key() && data.front() == 'e'; } + /// Operator bool is an alias for `!is_finished()` + operator bool() { return !is_finished(); } + /// Returns true if the next value looks like an encoded string + bool is_string() { return consume_key() && data.front() >= '0' && data.front() <= '9'; } + /// Returns true if the next element looks like an encoded integer + bool is_integer() { return consume_key() && data.front() == 'i'; } + /// Returns true if the next element looks like an encoded negative integer + bool is_negative_integer() { return is_integer() && data.size() >= 2 && data[1] == '-'; } + /// Returns true if the next element looks like an encoded non-negative integer + bool is_unsigned_integer() { return is_integer() && data.size() >= 2 && data[1] >= '0' && data[1] <= '9'; } + /// Returns true if the next element looks like an encoded list + bool is_list() { return consume_key() && data.front() == 'l'; } + /// Returns true if the next element looks like an encoded dict + bool is_dict() { return consume_key() && data.front() == 'd'; } + /// Returns the key of the next pair. This does not have to be called; it is also returned by + /// all of the other consume_* methods. The value is cached whether called here or by some + /// other method; accessing it multiple times simple accesses the cache until the next value is + /// consumed. + std::string_view key() { + if (!consume_key()) + throw bt_deserialize_invalid{"Cannot access next key: at the end of the dict"}; + return key_; + } + + /// Attempt to parse the next value as a string->string pair (and advance just past it). Throws + /// if the next value is not a string. + std::pair next_string() { + if (!is_string()) + throw bt_deserialize_invalid_type{"expected a string, but found "s + data.front()}; + std::pair ret; + ret.second = bt_list_consumer::consume_string_view(); + ret.first = flush_key(); + return ret; + } + + /// Attempts to parse the next value as an string->integer pair (and advance just past it). + /// Throws if the next value is not an integer. + template + std::pair next_integer() { + if (!is_integer()) throw bt_deserialize_invalid_type{"next bt dict value is not an integer"}; + std::pair ret; + ret.second = bt_list_consumer::consume_integer(); + ret.first = flush_key(); + return ret; + } + + /// Consumes a string->list pair, return it as a list-like type. This typically requires + /// dynamic allocation, but only has to parse the data once. Compare with consume_list_data() + /// which allows alloc-free traversal, but requires parsing twice (if the contents are to be + /// used). + template + std::pair next_list() { + std::pair pair; + pair.first = next_list(pair.second); + return pair; + } + + /// Same as above, but takes a pre-existing list-like data type. Returns the key. + template + std::string_view next_list(T& list) { + if (!is_list()) throw bt_deserialize_invalid_type{"next bt value is not a list"}; + bt_list_consumer::consume_list(list); + return flush_key(); + } + + /// Consumes a string->dict pair, return it as a dict-like type. This typically requires + /// dynamic allocation, but only has to parse the data once. Compare with consume_dict_data() + /// which allows alloc-free traversal, but requires parsing twice (if the contents are to be + /// used). + template + std::pair next_dict() { + std::pair pair; + pair.first = next_dict(pair.second); + return pair; + } + + /// Same as above, but takes a pre-existing dict-like data type. Returns the key. + template + std::string_view next_dict(T& dict) { + if (!is_dict()) throw bt_deserialize_invalid_type{"next bt value is not a dict"}; + bt_list_consumer::consume_dict(dict); + return flush_key(); + } + + /// Attempts to parse the next value as a string->list pair and returns the string_view that + /// contains the entire thing. This is recursive into both lists and dicts and likely to be + /// quite inefficient for large, nested structures (unless the values only need to be skipped + /// but aren't separately needed). This, however, does not require dynamic memory allocation. + std::pair next_list_data() { + if (data.size() < 2 || !is_list()) throw bt_deserialize_invalid_type{"next bt dict value is not a list"}; + return {flush_key(), bt_list_consumer::consume_list_data()}; + } + + /// Same as next_list_data(), but wraps the value in a bt_list_consumer for convenience + std::pair next_list_consumer() { return next_list_data(); } + + /// Attempts to parse the next value as a string->dict pair and returns the string_view that + /// contains the entire thing. This is recursive into both lists and dicts and likely to be + /// quite inefficient for large, nested structures (unless the values only need to be skipped + /// but aren't separately needed). This, however, does not require dynamic memory allocation. + std::pair next_dict_data() { + if (data.size() < 2 || !is_dict()) throw bt_deserialize_invalid_type{"next bt dict value is not a dict"}; + return {flush_key(), bt_list_consumer::consume_dict_data()}; + } + + /// Same as next_dict_data(), but wraps the value in a bt_dict_consumer for convenience + std::pair next_dict_consumer() { return next_dict_data(); } + + /// Skips ahead until we find the first key >= the given key or reach the end of the dict. + /// Returns true if we found an exact match, false if we reached some greater value or the end. + /// If we didn't hit the end, the next `consumer_*()` call will return the key-value pair we + /// found (either the exact match or the first key greater than the requested key). + /// + /// Two important notes: + /// + /// - properly encoded bt dicts must have lexicographically sorted keys, and this method assumes + /// that the input is correctly sorted (and thus if we find a greater value then your key does + /// not exist). + /// - this is irreversible; you cannot returned to skipped values without reparsing. (You *can* + /// however, make a copy of the bt_dict_consumer before calling and use the copy to return to + /// the pre-skipped position). + bool skip_until(std::string_view find) { + while (consume_key() && key_ < find) { + flush_key(); + skip_value(); + } + return key_ == find; + } + + /// The `consume_*` functions are wrappers around next_whatever that discard the returned key. + /// + /// Intended for use with skip_until such as: + /// + /// std::string value; + /// if (d.skip_until("key")) + /// value = d.consume_string(); + /// + + auto consume_string_view() { return next_string().second; } + auto consume_string() { return std::string{consume_string_view()}; } + + template + auto consume_integer() { return next_integer().second; } + + template + auto consume_list() { return next_list().second; } + + template + void consume_list(T& list) { next_list(list); } + + template + auto consume_dict() { return next_dict().second; } + + template + void consume_dict(T& dict) { next_dict(dict); } + + std::string_view consume_list_data() { return next_list_data().second; } + std::string_view consume_dict_data() { return next_dict_data().second; } + + /// Shortcut for wrapping `consume_list_data()` in a new list consumer + bt_list_consumer consume_list_consumer() { return consume_list_data(); } + /// Shortcut for wrapping `consume_dict_data()` in a new dict consumer + bt_dict_consumer consume_dict_consumer() { return consume_dict_data(); } +}; + +inline bt_dict_consumer bt_list_consumer::consume_dict_consumer() { return consume_dict_data(); } + +namespace detail { + +/// Reads digits into an unsigned 64-bit int. +inline uint64_t extract_unsigned(std::string_view& s) { + uint64_t uval = 0; + bool once = false; + while (!s.empty() && (s[0] >= '0' && s[0] <= '9')) { + once = true; + uint64_t bigger = uval * 10 + (s[0] - '0'); + s.remove_prefix(1); + if (bigger < uval) // overflow + throw bt_deserialize_invalid("Integer deserialization failed: value is too large for a 64-bit int"); + uval = bigger; + } + if (!once) + throw bt_deserialize_invalid{"Expected 0-9 was not found"}; + return uval; +} + +inline void bt_deserialize::operator()(std::string_view& s, std::string_view& val) { + if (s.size() < 2) throw bt_deserialize_invalid{"Deserialize failed: given data is not an bt-encoded string"}; + if (s[0] < '0' || s[0] > '9') + throw bt_deserialize_invalid_type{"Expected 0-9 but found '"s + s[0] + "'"}; + auto len = static_cast(extract_unsigned(s)); + if (s.empty() || s[0] != ':') + throw bt_deserialize_invalid{"Did not find expected ':' during string deserialization"}; + s.remove_prefix(1); + + if (len > s.size()) + throw bt_deserialize_invalid{"String deserialization failed: encoded string length is longer than the serialized data"}; + + val = {s.data(), len}; + s.remove_prefix(len); +} + +// Check that we are on a 2's complement architecture. It's highly unlikely that this code ever +// runs on a non-2s-complement architecture (especially since C++20 requires a two's complement +// signed value behaviour), but check at compile time anyway because we rely on these relations +// below. +static_assert(std::numeric_limits::min() + std::numeric_limits::max() == -1 && + static_cast(std::numeric_limits::max()) + uint64_t{1} == (uint64_t{1} << 63), + "Non 2s-complement architecture not supported!"); + +inline std::pair bt_deserialize_integer(std::string_view& s) { + // Smallest possible encoded integer is 3 chars: "i0e" + if (s.size() < 3) throw bt_deserialize_invalid("Deserialization failed: end of string found where integer expected"); + if (s[0] != 'i') throw bt_deserialize_invalid_type("Deserialization failed: expected 'i', found '"s + s[0] + '\''); + s.remove_prefix(1); + std::pair result; + auto& [val, negative] = result; + if (s[0] == '-') { + negative = true; + s.remove_prefix(1); + val.u64 = extract_unsigned(s); + if (val.u64 > (uint64_t{1} << 63)) + throw bt_deserialize_invalid("Deserialization of integer failed: negative integer value is too large for a 64-bit signed int"); + val.i64 = -static_cast(val.u64); + } else { + val.u64 = extract_unsigned(s); + } + + if (s.empty()) + throw bt_deserialize_invalid("Integer deserialization failed: encountered end of string before integer was finished"); + if (s[0] != 'e') + throw bt_deserialize_invalid("Integer deserialization failed: expected digit or 'e', found '"s + s[0] + '\''); + s.remove_prefix(1); + + return result; +} + +template struct bt_deserialize; +template struct bt_deserialize; + + +inline void bt_deserialize::operator()(std::string_view& s, bt_value& val) { + if (s.size() < 2) throw bt_deserialize_invalid("Deserialization failed: end of string found where bt-encoded value expected"); + + switch (s[0]) { + case 'd': { + bt_dict dict; + bt_deserialize{}(s, dict); + val = std::move(dict); + break; + } + case 'l': { + bt_list list; + bt_deserialize{}(s, list); + val = std::move(list); + break; + } + case 'i': { + auto [v, negative] = bt_deserialize_integer(s); + if (negative) val = v.i64; + else val = v.u64; + break; + } + case '0': case '1': case '2': case '3': case '4': case '5': case '6': case '7': case '8': case '9': { + std::string str; + bt_deserialize{}(s, str); + val = std::move(str); + break; + } + default: + throw bt_deserialize_invalid("Deserialize failed: encountered invalid value '"s + s[0] + "'; expected one of [0-9idl]"); + } +} + +} // namespace detail + +} // namespace oxenc diff --git a/libsession-util/distribution/libsession-util-android/include/oxenc/bt_value.h b/libsession-util/distribution/libsession-util-android/include/oxenc/bt_value.h new file mode 100644 index 0000000000..da073a7617 --- /dev/null +++ b/libsession-util/distribution/libsession-util-android/include/oxenc/bt_value.h @@ -0,0 +1,83 @@ +#pragma once + +// This header is here to provide just the basic bt_value/bt_dict/bt_list definitions without +// needing to include the full bt_serialize.h header. +#include +#include +#include +#include +#include +#include + +namespace oxenc { + +struct bt_value; + +/// The type used to store dictionaries inside bt_value. +using bt_dict = std::map; // NB: unordered_map doesn't work because it can't be used with a predeclared type +/// The type used to store list items inside bt_value. +using bt_list = std::list; + +/// The basic variant that can hold anything (recursively). +using bt_variant = std::variant< + std::string, + std::string_view, + int64_t, + uint64_t, + bt_list, + bt_dict +>; + +#ifdef __cpp_lib_remove_cvref // C++20 +using std::remove_cvref_t; +#else +template +using remove_cvref_t = std::remove_cv_t>; +#endif + +template +struct has_alternative; +template +struct has_alternative> : std::bool_constant<(std::is_same_v || ...)> {}; +template +constexpr bool has_alternative_v = has_alternative::value; + +namespace detail { + template + bt_list tuple_to_list(const Tuple& tuple, std::index_sequence) { + return {{bt_value{std::get(tuple)}...}}; + } + template constexpr bool is_tuple = false; + template constexpr bool is_tuple> = true; + template constexpr bool is_tuple> = true; +} + +/// Recursive generic type that can fully represent everything valid for a BT serialization. +/// This is basically just an empty wrapper around the std::variant, except we add some extra +/// converting constructors: +/// - integer constructors so that any unsigned value goes to the uint64_t and any signed value goes +/// to the int64_t. +/// - std::tuple and std::pair constructors that build a bt_list out of the tuple/pair elements. +struct bt_value : bt_variant { + using bt_variant::bt_variant; + using bt_variant::operator=; + + template , std::enable_if_t && std::is_unsigned_v, int> = 0> + bt_value(T&& uint) : bt_variant{static_cast(uint)} {} + + template , std::enable_if_t && std::is_signed_v, int> = 0> + bt_value(T&& sint) : bt_variant{static_cast(sint)} {} + + template + bt_value(const std::tuple& tuple) : bt_variant{detail::tuple_to_list(tuple, std::index_sequence_for{})} {} + + template + bt_value(const std::pair& pair) : bt_variant{detail::tuple_to_list(pair, std::index_sequence_for{})} {} + + template , std::enable_if_t && !detail::is_tuple, int> = 0> + bt_value(T&& v) : bt_variant{std::forward(v)} {} + + bt_value(const char* s) : bt_value{std::string_view{s}} {} +}; + +} diff --git a/libsession-util/distribution/libsession-util-android/include/oxenc/bt_value_producer.h b/libsession-util/distribution/libsession-util-android/include/oxenc/bt_value_producer.h new file mode 100644 index 0000000000..6c093cc70a --- /dev/null +++ b/libsession-util/distribution/libsession-util-android/include/oxenc/bt_value_producer.h @@ -0,0 +1,99 @@ +#include "bt_producer.h" +#include "bt_value.h" +#include "variant.h" +#include + +/// This header provides the implementations of append_bt(bt_value/bt_list/bt_dict) for +/// bt_serialize. (It is optional to avoid unnecessary includes when not wanted). + +namespace oxenc { + + namespace detail { + + void serialize_list(bt_list_producer& out, const bt_list& l); + void serialize_dict(bt_dict_producer& out, const bt_dict& l); + + struct dict_appender { + bt_dict_producer& out; + std::string_view key; + dict_appender(bt_dict_producer& out, std::string_view key) + : out{out}, key{key} {} + + void operator()(const bt_dict& d) { + auto subdict = out.append_dict(key); + serialize_dict(subdict, d); + } + void operator()(const bt_list& l) { + auto sublist = out.append_list(key); + serialize_list(sublist, l); + } + template + void operator()(const T& other) { + out.append(key, other); + } + }; + + struct list_appender { + bt_list_producer& out; + explicit list_appender(bt_list_producer& out) + : out{out} {} + + void operator()(const bt_dict& d) { + auto subdict = out.append_dict(); + serialize_dict(subdict, d); + } + void operator()(const bt_list& l) { + auto sublist = out.append_list(); + serialize_list(sublist, l); + } + template + void operator()(const T& other) { + out.append(other); + } + }; + + inline void serialize_dict(bt_dict_producer& out, const bt_dict& d) { + for (const auto& [k, v]: d) + var::visit(dict_appender{out, k}, static_cast(v)); + } + + inline void serialize_list(bt_list_producer& out, const bt_list& l) { + for (auto& val : l) + var::visit(list_appender{out}, static_cast(val)); + } + } + + template <> + inline void bt_list_producer::append_bt(const bt_dict& bt) { + auto subdict = append_dict(); + detail::serialize_dict(subdict, bt); + } + + template <> + inline void bt_list_producer::append_bt(const bt_list& bt) { + auto sublist = append_list(); + detail::serialize_list(sublist, bt); + } + + template <> + inline void bt_list_producer::append_bt(const bt_value& bt) { + var::visit(detail::list_appender{*this}, static_cast(bt)); + } + + template <> + inline void bt_dict_producer::append_bt(std::string_view key, const bt_dict& bt) { + auto subdict = append_dict(key); + detail::serialize_dict(subdict, bt); + } + + template <> + inline void bt_dict_producer::append_bt(std::string_view key, const bt_list& bt) { + auto sublist = append_list(key); + detail::serialize_list(sublist, bt); + } + + template <> + inline void bt_dict_producer::append_bt(std::string_view key, const bt_value& bt) { + var::visit(detail::dict_appender{*this, key}, static_cast(bt)); + } +} diff --git a/libsession-util/distribution/libsession-util-android/include/oxenc/byte_type.h b/libsession-util/distribution/libsession-util-android/include/oxenc/byte_type.h new file mode 100644 index 0000000000..1a6c0e6764 --- /dev/null +++ b/libsession-util/distribution/libsession-util-android/include/oxenc/byte_type.h @@ -0,0 +1,28 @@ +#pragma once + +// Specializations for assigning from a char into an output iterator, used by hex/base32z/base64 +// decoding to bytes. + +#include +#include + +namespace oxenc::detail { + +// Fallback - we just try a char +template +struct byte_type { using type = char; }; + +// Support for things like std::back_inserter: +template +struct byte_type> { + using type = typename OutputIt::container_type::value_type; }; + +// iterator, raw pointers: +template +struct byte_type::reference>>> { + using type = std::remove_reference_t::reference>; }; + +template +using byte_type_t = typename byte_type::type; + +} diff --git a/libsession-util/distribution/libsession-util-android/include/oxenc/endian.h b/libsession-util/distribution/libsession-util-android/include/oxenc/endian.h new file mode 100644 index 0000000000..761cfe03c7 --- /dev/null +++ b/libsession-util/distribution/libsession-util-android/include/oxenc/endian.h @@ -0,0 +1,204 @@ +#pragma once + +#include +#include +#include + +#if defined(_MSC_VER) && (!defined(__clang__) || defined(__c2__)) +# include + +# define bswap_16(x) _byteswap_ushort(x) +# define bswap_32(x) _byteswap_ulong(x) +# define bswap_64(x) _byteswap_uint64(x) +#elif defined(__clang__) || defined(__GNUC__) +# define bswap_16(x) __builtin_bswap16(x) +# define bswap_32(x) __builtin_bswap32(x) +# define bswap_64(x) __builtin_bswap64(x) +#elif defined(__linux__) +extern "C" { +# include +} // extern "C" +#else +# error "Don't know how to byteswap on this platform!" +#endif + +namespace oxenc { + + /// True if this is a little-endian platform + inline constexpr bool little_endian = +#if defined(__LITTLE_ENDIAN__) + true +#elif defined(__BIG_ENDIAN__) + false +#elif defined(__BYTE_ORDER) && defined(__LITTLE_ENDIAN) && __BYTE_ORDER == __LITTLE_ENDIAN + true +#elif defined(__BYTE_ORDER__) && defined(__ORDER_LITTLE_ENDIAN__) && __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__ + true +#elif defined(__BYTE_ORDER) && defined(__BIG_ENDIAN) && __BYTE_ORDER == __BIG_ENDIAN + false +#elif defined(__BYTE_ORDER__) && defined(__ORDER_BIG_ENDIAN__) && __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__ + false +#elif defined(_WIN32) + true +#else +# error "Error: don't know which endian this is" +#endif + ; + + /// True if this is a big-endian platform + inline constexpr bool big_endian = !little_endian; + + /// True if the type is integral and of a size we support swapping. (We also allow size-1 + /// values to be passed here for completeness, though nothing is ever swapped for such a value). + template constexpr bool is_endian_swappable = + std::is_integral_v && (sizeof(T) == 1 || sizeof(T) == 2 || sizeof(T) == 4 || sizeof(T) == 8); + + /// Byte swaps an integer value unconditionally. You usually want to use one of the other + /// endian-aware functions rather than this. + template >> + void byteswap_inplace(T& val) { + if constexpr (sizeof(T) == 2) + val = bswap_16(val); + else if constexpr (sizeof(T) == 4) + val = bswap_32(val); + else if constexpr (sizeof(T) == 8) + val = bswap_64(val); + } + + /// Converts a host-order integer value into a little-endian value, mutating it. Does nothing + /// on little-endian platforms. + template >> + void host_to_little_inplace(T& val) { + if constexpr (!little_endian) + byteswap_inplace(val); + } + + /// Converts a host-order integer value into a little-endian value, returning it. Does no + /// converstion on little-endian platforms. + template >> + T host_to_little(T val) { + host_to_little_inplace(val); + return val; + } + + /// Converts a little-endian integer value into a host-order (native) integer value, mutating + /// it. Does nothing on little-endian platforms. + template >> + void little_to_host_inplace(T& val) { + if constexpr (!little_endian) + byteswap_inplace(val); + } + + /// Converts a little-order integer value into a host-order (native) integer value, returning + /// it. Does no conversion on little-endian platforms. + template >> + T little_to_host(T val) { + little_to_host_inplace(val); + return val; + } + + /// Converts a host-order integer value into a big-endian value, mutating it. Does nothing on + /// big-endian platforms. + template >> + void host_to_big_inplace(T& val) { + if constexpr (!big_endian) + byteswap_inplace(val); + } + + /// Converts a host-order integer value into a big-endian value, returning it. Does no + /// conversion on big-endian platforms. + template >> + T host_to_big(T val) { + host_to_big_inplace(val); + return val; + } + + /// Converts a big-endian value into a host-order (native) integer value, mutating it. Does + /// nothing on big-endian platforms. + template >> + void big_to_host_inplace(T& val) { + if constexpr (!big_endian) + byteswap_inplace(val); + } + + /// Converts a big-order integer value into a host-order (native) integer value, returning it. + /// Does no conversion on big-endian platforms. + template >> + T big_to_host(T val) { + big_to_host_inplace(val); + return val; + } + + /// Loads a host-order integer value from a memory location containing little-endian bytes. + /// (There is no alignment requirement on the given pointer address). + template >> + T load_little_to_host(const void* from) { + T val; + std::memcpy(&val, from, sizeof(T)); + little_to_host_inplace(val); + return val; + } + + /// Loads a little-endian integer value from a memory location containing host order bytes. + /// (There is no alignment requirement on the given pointer address). + template >> + T load_host_to_little(const void* from) { + T val; + std::memcpy(&val, from, sizeof(T)); + host_to_little_inplace(val); + return val; + } + + /// Loads a host-order integer value from a memory location containing big-endian bytes. (There + /// is no alignment requirement on the given pointer address). + template >> + T load_big_to_host(const void* from) { + T val; + std::memcpy(&val, from, sizeof(T)); + big_to_host_inplace(val); + return val; + } + + /// Loads a big-endian integer value from a memory location containing host order bytes. (There + /// is no alignment requirement on the given pointer address). + template >> + T load_host_to_big(const void* from) { + T val; + std::memcpy(&val, from, sizeof(T)); + host_to_big_inplace(val); + return val; + } + + /// Writes a little-endian integer value into the given memory location, copying and converting + /// it (if necessary) from the given host-order integer value. + template >> + void write_host_as_little(T val, void* to) { + host_to_little_inplace(val); + std::memcpy(to, &val, sizeof(T)); + } + + /// Writes a big-endian integer value into the given memory location, copying and converting it + /// (if necessary) from the given host-order integer value. + template >> + void write_host_as_big(T val, void* to) { + host_to_big_inplace(val); + std::memcpy(to, &val, sizeof(T)); + } + + /// Writes a host-order integer value into the given memory location, copying and converting it + /// (if necessary) from the given little-endian integer value. + template >> + void write_little_as_host(T val, void* to) { + little_to_host_inplace(val); + std::memcpy(to, &val, sizeof(T)); + } + + /// Writes a host-order integer value into the given memory location, copying and converting it + /// (if necessary) from the given big-endian integer value. + template >> + void write_big_as_host(T val, void* to) { + big_to_host_inplace(val); + std::memcpy(to, &val, sizeof(T)); + } + +} // namespace oxenc diff --git a/libsession-util/distribution/libsession-util-android/include/oxenc/hex.h b/libsession-util/distribution/libsession-util-android/include/oxenc/hex.h new file mode 100644 index 0000000000..c7281736c5 --- /dev/null +++ b/libsession-util/distribution/libsession-util-android/include/oxenc/hex.h @@ -0,0 +1,233 @@ +#pragma once +#include +#include +#include +#include +#include +#include "byte_type.h" + +namespace oxenc { + +namespace detail { + +/// Compile-time generated lookup tables hex conversion +struct hex_table { + char from_hex_lut[256]; + char to_hex_lut[16]; + constexpr hex_table() noexcept : from_hex_lut{}, to_hex_lut{} { + for (unsigned char c = 0; c < 10; c++) { + from_hex_lut[(unsigned char)('0' + c)] = 0 + c; + to_hex_lut[ (unsigned char)( 0 + c)] = '0' + c; + } + for (unsigned char c = 0; c < 6; c++) { + from_hex_lut[(unsigned char)('a' + c)] = 10 + c; + from_hex_lut[(unsigned char)('A' + c)] = 10 + c; + to_hex_lut[ (unsigned char)(10 + c)] = 'a' + c; + } + } + constexpr char from_hex(unsigned char c) const noexcept { return from_hex_lut[c]; } + constexpr char to_hex(unsigned char b) const noexcept { return to_hex_lut[b]; } +} constexpr hex_lut; + +// This main point of this static assert is to force the compiler to compile-time build the constexpr tables. +static_assert(hex_lut.from_hex('a') == 10 && hex_lut.from_hex('F') == 15 && hex_lut.to_hex(13) == 'd', ""); + +} // namespace detail + +/// Returns the number of characters required to encode a hex string from the given number of bytes. +inline constexpr size_t to_hex_size(size_t byte_size) { return byte_size * 2; } +/// Returns the number of bytes required to decode a hex string of the given size. +inline constexpr size_t from_hex_size(size_t hex_size) { return hex_size / 2; } + +/// Iterable object for on-the-fly hex encoding. Used internally, but also particularly useful when +/// converting from one encoding to another. +template +struct hex_encoder final { +private: + InputIt _it, _end; + static_assert(sizeof(decltype(*_it)) == 1, "hex_encoder requires chars/bytes input iterator"); + uint8_t c = 0; + bool second_half = false; +public: + using iterator_category = std::input_iterator_tag; + using difference_type = std::ptrdiff_t; + using value_type = char; + using reference = value_type; + using pointer = void; + hex_encoder(InputIt begin, InputIt end) : _it{std::move(begin)}, _end{std::move(end)} {} + + hex_encoder end() { return {_end, _end}; } + + bool operator==(const hex_encoder& i) { return _it == i._it && second_half == i.second_half; } + bool operator!=(const hex_encoder& i) { return !(*this == i); } + + hex_encoder& operator++() { + second_half = !second_half; + if (!second_half) + ++_it; + return *this; + } + hex_encoder operator++(int) { hex_encoder copy{*this}; ++*this; return copy; } + char operator*() { + return detail::hex_lut.to_hex(second_half + ? c & 0x0f + : (c = static_cast(*_it)) >> 4); + } +}; + +/// Creates hex digits from a character sequence given by iterators, writes them starting at `out`. +/// Returns the final value of out (i.e. the iterator positioned just after the last written +/// hex character). +template +OutputIt to_hex(InputIt begin, InputIt end, OutputIt out) { + static_assert(sizeof(decltype(*begin)) == 1, "to_hex requires chars/bytes"); + auto it = hex_encoder{begin, end}; + return std::copy(it, it.end(), out); +} + +/// Creates a string of hex digits from a character sequence iterator pair +template +std::string to_hex(It begin, It end) { + std::string hex; + if constexpr (std::is_base_of_v::iterator_category>) { + using std::distance; + hex.reserve(to_hex_size(distance(begin, end))); + } + to_hex(begin, end, std::back_inserter(hex)); + return hex; +} + +/// Creates a hex string from an iterable, std::string-like object +template +std::string to_hex(std::basic_string_view s) { return to_hex(s.begin(), s.end()); } +inline std::string to_hex(std::string_view s) { return to_hex<>(s); } + +/// Returns true if the given value is a valid hex digit. +template +constexpr bool is_hex_digit(CharT c) { + static_assert(sizeof(CharT) == 1, "is_hex requires chars/bytes"); + return detail::hex_lut.from_hex(static_cast(c)) != 0 || static_cast(c) == '0'; +} + +/// Returns true if all elements in the range are hex characters *and* the string length is a +/// multiple of 2, and thus suitable to pass to from_hex(). +template +constexpr bool is_hex(It begin, It end) { + static_assert(sizeof(decltype(*begin)) == 1, "is_hex requires chars/bytes"); + constexpr bool ra = std::is_base_of_v::iterator_category>; + if constexpr (ra) { + using std::distance; + if (distance(begin, end) % 2 != 0) + return false; + } + + size_t count = 0; + for (; begin != end; ++begin) { + if constexpr (!ra) ++count; + if (!is_hex_digit(*begin)) + return false; + } + if constexpr (!ra) + return count % 2 == 0; + return true; +} + +/// Returns true if all elements in the string-like value are hex characters +template +constexpr bool is_hex(std::basic_string_view s) { return is_hex(s.begin(), s.end()); } +constexpr bool is_hex(std::string_view s) { return is_hex(s.begin(), s.end()); } + +/// Convert a hex digit into its numeric (0-15) value +constexpr char from_hex_digit(unsigned char x) noexcept { + return detail::hex_lut.from_hex(x); +} + +/// Constructs a byte value from a pair of hex digits +constexpr char from_hex_pair(unsigned char a, unsigned char b) noexcept { return (from_hex_digit(a) << 4) | from_hex_digit(b); } + +/// Iterable object for on-the-fly hex decoding. Used internally but also particularly useful when +/// converting from one encoding to another. Undefined behaviour if the given iterator range is not +/// a valid hex string with even length (i.e. is_hex() should return true). +template +struct hex_decoder final { +private: + InputIt _it, _end; + static_assert(sizeof(decltype(*_it)) == 1, "hex_encoder requires chars/bytes input iterator"); + char byte; +public: + using iterator_category = std::input_iterator_tag; + using difference_type = std::ptrdiff_t; + using value_type = char; + using reference = value_type; + using pointer = void; + hex_decoder(InputIt begin, InputIt end) : _it{std::move(begin)}, _end{std::move(end)} { + if (_it != _end) + load_byte(); + } + + hex_decoder end() { return {_end, _end}; } + + bool operator==(const hex_decoder& i) { return _it == i._it; } + bool operator!=(const hex_decoder& i) { return _it != i._it; } + + hex_decoder& operator++() { + if (++_it != _end) + load_byte(); + return *this; + } + hex_decoder operator++(int) { hex_decoder copy{*this}; ++*this; return copy; } + char operator*() const { return byte; } + +private: + void load_byte() { + auto a = *_it; + auto b = *++_it; + byte = from_hex_pair(static_cast(a), static_cast(b)); + } + +}; + +/// Converts a sequence of hex digits to bytes. Undefined behaviour if any characters are not in +/// [0-9a-fA-F] or if the input sequence length is not even: call `is_hex` first if you need to +/// check. It is permitted for the input and output ranges to overlap as long as out is no later +/// than begin. Returns the final value of out (that is, the iterator positioned just after the +/// last written character). +template +OutputIt from_hex(InputIt begin, InputIt end, OutputIt out) { + assert(is_hex(begin, end)); + auto it = hex_decoder(begin, end); + const auto hend = it.end(); + while (it != hend) + *out++ = static_cast>(*it++); + return out; +} + +/// Converts a sequence of hex digits to a string of bytes and returns it. Undefined behaviour if +/// the input sequence is not an even-length sequence of [0-9a-fA-F] characters. +template +std::string from_hex(It begin, It end) { + std::string bytes; + if constexpr (std::is_base_of_v::iterator_category>) { + using std::distance; + bytes.reserve(from_hex_size(distance(begin, end))); + } + from_hex(begin, end, std::back_inserter(bytes)); + return bytes; +} + +/// Converts hex digits from a std::string-like object into a std::string of bytes. Undefined +/// behaviour if any characters are not in [0-9a-fA-F] or if the input sequence length is not even. +template +std::string from_hex(std::basic_string_view s) { return from_hex(s.begin(), s.end()); } +inline std::string from_hex(std::string_view s) { return from_hex<>(s); } + +inline namespace literals { + inline std::string operator""_hex(const char* x, size_t n) { + std::string_view in{x, n}; + if (!is_hex(in)) + throw std::invalid_argument{"hex literal is not hex"}; + return from_hex(in); + } +} + +} diff --git a/libsession-util/distribution/libsession-util-android/include/oxenc/variant.h b/libsession-util/distribution/libsession-util-android/include/oxenc/variant.h new file mode 100644 index 0000000000..fb4c9fe6e8 --- /dev/null +++ b/libsession-util/distribution/libsession-util-android/include/oxenc/variant.h @@ -0,0 +1,103 @@ +#pragma once +// Workarounds for macos compatibility. On macOS we aren't allowed to touch anything in +// std::variant that could throw if compiling with a target <10.14 because Apple fails hard at +// properly updating their STL. Thus, if compiling in such a mode, we have to introduce +// workarounds. +// +// This header defines a `var` namespace with `var::get` and `var::visit` implementations. On +// everything except broken backwards macos, this is just an alias to `std`. On broken backwards +// macos, we provide implementations that throw std::runtime_error in failure cases since the +// std::bad_variant_access exception can't be touched. +// +// You also get a BROKEN_APPLE_VARIANT macro defined if targetting a problematic mac architecture. + +#include + +#ifdef __APPLE__ +# include +# if defined(__APPLE__) && MAC_OS_X_VERSION_MIN_REQUIRED < MAC_OS_X_VERSION_10_14 +# define BROKEN_APPLE_VARIANT +# endif +#endif + +#ifndef BROKEN_APPLE_VARIANT + +namespace var = std; // Oh look, actual C++17 support + +#else + +// Oh look, apple. + +namespace var { + +// Apple won't let us use std::visit or std::get if targetting some version of macos earlier than +// 10.14 because Apple is awful about not updating their STL. So we have to provide our own, and +// then call these without `std::` -- on crappy macos we'll come here, on everything else we'll ADL +// to the std:: implementation. +template +constexpr T& get(std::variant& var) { + if (auto* v = std::get_if(&var)) return *v; + throw std::runtime_error{"Bad variant access -- variant does not contain the requested type"}; +} +template +constexpr const T& get(const std::variant& var) { + if (auto* v = std::get_if(&var)) return *v; + throw std::runtime_error{"Bad variant access -- variant does not contain the requested type"}; +} +template +constexpr const T&& get(const std::variant&& var) { + if (auto* v = std::get_if(&var)) return std::move(*v); + throw std::runtime_error{"Bad variant access -- variant does not contain the requested type"}; +} +template +constexpr T&& get(std::variant&& var) { + if (auto* v = std::get_if(&var)) return std::move(*v); + throw std::runtime_error{"Bad variant access -- variant does not contain the requested type"}; +} +template +constexpr auto& get(std::variant& var) { + if (auto* v = std::get_if(&var)) return *v; + throw std::runtime_error{"Bad variant access -- variant does not contain the requested type"}; +} +template +constexpr const auto& get(const std::variant& var) { + if (auto* v = std::get_if(&var)) return *v; + throw std::runtime_error{"Bad variant access -- variant does not contain the requested type"}; +} +template +constexpr const auto&& get(const std::variant&& var) { + if (auto* v = std::get_if(&var)) return std::move(*v); + throw std::runtime_error{"Bad variant access -- variant does not contain the requested type"}; +} +template +constexpr auto&& get(std::variant&& var) { + if (auto* v = std::get_if(&var)) return std::move(*v); + throw std::runtime_error{"Bad variant access -- variant does not contain the requested type"}; +} + +template +constexpr auto visit_helper(Visitor&& vis, Variant&& var) { + if (var.index() == I) + return vis(var::get(std::forward(var))); + else if constexpr (sizeof...(More) > 0) + return visit_helper(std::forward(vis), std::forward(var)); + else + throw std::runtime_error{"Bad visit -- variant is valueless"}; +} + +template +constexpr auto visit_helper(Visitor&& vis, Variant&& var, std::index_sequence) { + return visit_helper(std::forward(vis), std::forward(var)); +} + +// Only handle a single variant here because multi-variant invocation is notably harder (and we +// don't need it). +template +constexpr auto visit(Visitor&& vis, Variant&& var) { + return visit_helper(std::forward(vis), std::forward(var), + std::make_index_sequence>>{}); +} + +} // namespace var + +#endif diff --git a/libsession-util/distribution/libsession-util-android/include/oxenc/version.h b/libsession-util/distribution/libsession-util-android/include/oxenc/version.h new file mode 100644 index 0000000000..c05bed1a98 --- /dev/null +++ b/libsession-util/distribution/libsession-util-android/include/oxenc/version.h @@ -0,0 +1,5 @@ +namespace oxenc { +constexpr int VERSION_MAJOR = 1; +constexpr int VERSION_MINOR = 0; +constexpr int VERSION_PATCH = 6; +} diff --git a/libsession-util/distribution/libsession-util-android/include/session/bt_merge.hpp b/libsession-util/distribution/libsession-util-android/include/session/bt_merge.hpp new file mode 100644 index 0000000000..e5042a5d1a --- /dev/null +++ b/libsession-util/distribution/libsession-util-android/include/session/bt_merge.hpp @@ -0,0 +1,58 @@ +#pragma once +#include + +#include +#ifndef NDEBUG +#include +#endif + +namespace session::bt { + +using oxenc::bt_dict; +using oxenc::bt_list; + +/// Merges two bt dicts together: the returned dict includes all keys in a or b. Keys in *both* +/// dicts get their value from `a`, otherwise the value is that of the dict that contains the key. +bt_dict merge(const bt_dict& a, const bt_dict& b); + +/// Merges two ordered bt_lists together using a predicate to determine order. The input lists must +/// be sorted to begin with. `cmp` must be callable with a pair of `const bt_value&` arguments and +/// must return true if the first argument should be considered less than the second argument. By +/// default this skips elements from b that compare equal to a value of a, but you can include all +/// the duplicates by specifying the `duplicates` parameter as true. +template +bt_list merge_sorted(const bt_list& a, const bt_list& b, Compare cmp, bool duplicates = false) { + bt_list result; + auto it_a = a.begin(); + auto it_b = b.begin(); + + assert(std::is_sorted(it_a, a.end(), cmp)); + assert(std::is_sorted(it_b, b.end(), cmp)); + + if (duplicates) { + while (it_a != a.end() && it_b != b.end()) { + if (!cmp(*it_a, *it_b)) // *b <= *a + result.push_back(*it_b++); + else // *a < *b + result.push_back(*it_a++); + } + } else { + while (it_a != a.end() && it_b != b.end()) { + if (cmp(*it_b, *it_a)) // *b < *a + result.push_back(*it_b++); + else if (cmp(*it_a, *it_b)) // *a < *b + result.push_back(*it_a++); + else // *a == *b + ++it_b; // skip it + } + } + + if (it_a != a.end()) + result.insert(result.end(), it_a, a.end()); + else if (it_b != b.end()) + result.insert(result.end(), it_b, b.end()); + + return result; +} + +} // namespace session::bt diff --git a/libsession-util/distribution/libsession-util-android/include/session/config.hpp b/libsession-util/distribution/libsession-util-android/include/session/config.hpp new file mode 100644 index 0000000000..90ab8676aa --- /dev/null +++ b/libsession-util/distribution/libsession-util-android/include/session/config.hpp @@ -0,0 +1,369 @@ +#pragma once + +#include +#include + +#include +#include +#include +#include +#include +#include +#include + +namespace session::config { + +inline constexpr int MAX_MESSAGE_SIZE = 76800; // 76.8kB = Storage server's limit + +// Application data data types: +using scalar = std::variant; + +using set = std::set; +struct dict_value; +using dict = std::map; +using dict_variant = std::variant; +struct dict_value : dict_variant { + using dict_variant::dict_variant; + using dict_variant::operator=; +}; + +// Helpers for gcc-10 and earlier which don't like visiting a std::variant subtype: +constexpr inline dict_variant& unwrap(dict_value& v) { + return static_cast(v); +} +constexpr inline const dict_variant& unwrap(const dict_value& v) { + return static_cast(v); +} + +using seqno_t = std::int64_t; +using hash_t = std::array; +using seqno_hash_t = std::pair; + +using ustring = std::basic_string; +using ustring_view = std::basic_string_view; + +class MutableConfigMessage; + +/// Base type for all errors that can happen during config parsing +struct config_error : std::runtime_error { + using std::runtime_error::runtime_error; +}; +/// Type thrown for bad signatures (bad or missing signature). +struct signature_error : config_error { + using config_error::config_error; +}; +/// Type thrown for a missing signature when a signature is required. +struct missing_signature : signature_error { + using signature_error::signature_error; +}; +/// Type thrown for an unparseable config (e.g. keys with invalid types, or keys before "#" or after +/// "~"). +struct config_parse_error : config_error { + using config_error::config_error; +}; + +/// Class for a parsed, read-only config message; also serves as the base class of a +/// MutableConfigMessage which allows setting values. +class ConfigMessage { + public: + using lagged_diffs_t = std::map; + +#ifndef SESSION_TESTING_EXPOSE_INTERNALS + protected: +#endif + dict data_; + + // diff data for *this* message, parsed during construction. Subclasses may use this for + // managing their own diff in the `diff()` method. + oxenc::bt_dict diff_; + + // diffs of previous messages that are included in this message. + lagged_diffs_t lagged_diffs_; + + // Unknown top-level config keys which we preserve even though we don't understand what they + // mean. + oxenc::bt_dict unknown_; + + /// Seqno and hash of the message; we calculate this when loading. Subclasses put the hash here + /// (so that they can return a reference to it). + seqno_hash_t seqno_hash_{0, {0}}; + + bool verified_signature_ = false; + + bool merged_ = false; + + public: + constexpr static int DEFAULT_DIFF_LAGS = 5; + + /// Verification function: this is passed the data that should have been signed and the 64-byte + /// signature. Should return true to accept the signature, false to reject it and skip the + /// message. It can also throw to abort message construction (that is: returning false skips + /// the message when loading multiple messages, but can still continue with other messages; + /// throwing aborts the entire construction). + using verify_callable = std::function; + + /// Signing function: this is passed the data to be signed and returns the 64-byte signature. + using sign_callable = std::function; + + ConfigMessage(); + ConfigMessage(const ConfigMessage&) = default; + ConfigMessage& operator=(const ConfigMessage&) = default; + ConfigMessage(ConfigMessage&&) = default; + ConfigMessage& operator=(ConfigMessage&&) = default; + + virtual ~ConfigMessage() = default; + + /// Initializes a config message by parsing a serialized message. Throws on any error. See the + /// vector version below for argument descriptions. + explicit ConfigMessage( + std::string_view serialized, + verify_callable verifier = nullptr, + sign_callable signer = nullptr, + int lag = DEFAULT_DIFF_LAGS, + bool signature_optional = false); + + /// Constructs a new ConfigMessage by loading and potentially merging multiple serialized + /// ConfigMessages together, according to the config conflict resolution rules. The result + /// of this call can either be one of the config messages directly (if one is found that + /// includes all the others), or can be a new config message that merges multiple configs + /// together. You can check `.merged()` to see which happened. + /// + /// This constructor always requires at least one valid config from the given inputs; if all are + /// empty, + /// + /// verifier - a signature verification function. If provided and not nullptr this will be + /// called to verify each signature in the provided messages: any that are missing a signature + /// or for which the verifier returns false will be dropped from consideration for merging. If + /// *all* messages fail verification an exception is raised. + /// + /// signer - a signature generation function. This is not used directly by the ConfigMessage, + /// but providing it will allow it to be passed automatically to any MutableConfigMessage + /// derived from this ConfigMessage. + /// + /// lag - the lag setting controlling the config merging rules. Any config message with lagged + /// diffs that exceeding this lag value will have those early lagged diffs dropping during + /// loading. + /// + /// signature_optional - if true then accept a message with no signature even when a verifier is + /// set, thus allowing unsigned messages (though messages with an invalid signature are still + /// not allowed). This option is ignored when verifier is not set. + /// + /// error_callback - if set then any config message parsing error will be passed to this + /// function for handling: the callback typically warns and, if the overall construction should + /// abort, rethrows the error. If this function is omitted then the default skips (without + /// failing) individual parse errors and only aborts construction if *all* messages fail to + /// parse. A simple handler such as `[](const auto& e) { throw e; }` can be used to make any + /// parse error of any message fatal. + explicit ConfigMessage( + const std::vector& configs, + verify_callable verifier = nullptr, + sign_callable signer = nullptr, + int lag = DEFAULT_DIFF_LAGS, + bool signature_optional = false, + std::function error_handler = nullptr); + + /// Returns a read-only reference to the contained data. (To get a mutable config object use + /// MutableConfigMessage). + const dict& data() const { return data_; } + + /// The verify function; if loading a message with a signature and this is set then it will + /// be called to verify the signature of the message. Takes a pointer to the signing data, + /// the data length, and a pointer to the 64-byte signature. + verify_callable verifier; + + /// The signing function; this is not directly used by the non-mutable base class, but will be + /// propagated to mutable config messages that are derived e.g. by calling `.increment()`. This + /// is called when serializing a config message to add a signature. If it is nullptr then no + /// signature is added to the serialized data. + sign_callable signer; + + /// How many lagged config diffs that should be carried forward to resolve conflicts, + /// including this message. If 0 then config messages won't have any diffs and will not be + /// mergeable. + int lag = DEFAULT_DIFF_LAGS; + + /// The diff structure for changes in *this* config message. Subclasses that need to override + /// should populate into `diff_` and return a reference to it (internal code assumes `diff_` is + /// correct immediately after a call to this). + virtual const oxenc::bt_dict& diff(); + + /// Returns the seqno of this message + const seqno_t& seqno() const { return seqno_hash_.first; } + + /// Calculates the hash of the current message. For a ConfigMessage this is calculated when the + /// message is first loaded; for a MutableConfigMessage this serializes the current value to + /// properly compute the current hash. Subclasses must ensure that seqno_hash_.second is set to + /// the correct value when this is called (and typically return a reference to it). + virtual const hash_t& hash() { return seqno_hash_.second; } + + /// After loading multiple config files this flag indicates whether or not we had to produce a + /// new, merged configuration message (true) or did not need to merge (false). (For config + /// messages that were not loaded from serialized data this is always true). + bool merged() const { return merged_; } + + /// Returns true if this message contained a valid, verified signature when it was parsed. + /// Returns false otherwise (e.g. not loaded from verification at all; loaded without a + /// verification function; or had no signature and a signature wasn't required). + bool verified_signature() const { return verified_signature_; } + + /// Constructs a new MutableConfigMessage from this config message with an incremented seqno. + /// The new config message's diff will reflect changes made after this construction. + virtual MutableConfigMessage increment() const; + + /// Serializes this config's data. Note that if the ConfigMessage was constructed from signed, + /// serialized input, this will only produce an exact copy of the original serialized input if + /// it uses the identical, deterministic signing function used to construct the original. + /// + /// The optional `enable_signing` argument can be specified as false to disable signing (this is + /// typically for a local serialization value that isn't being pushed to the server). Note that + /// signing is always disabled if there is no signing callback set, regardless of the value of + /// this argument. + virtual std::string serialize(bool enable_signing = true); + + protected: + std::string serialize_impl(const oxenc::bt_dict& diff, bool enable_signing = true); +}; + +// Constructor tag +struct increment_seqno_t {}; +inline constexpr increment_seqno_t increment_seqno{}; + +class MutableConfigMessage : public ConfigMessage { + protected: + dict orig_data_{data_}; + + friend class ConfigMessage; + + public: + MutableConfigMessage(const MutableConfigMessage&) = default; + MutableConfigMessage& operator=(const MutableConfigMessage&) = default; + MutableConfigMessage(MutableConfigMessage&&) = default; + MutableConfigMessage& operator=(MutableConfigMessage&&) = default; + + /// Constructs a new, empty config message. Takes various fields to pre-fill the various + /// properties during construction (these are for convenience and equivalent to setting them via + /// properties/methods after construction). + /// + /// seqno -- the message's seqno, default 0 + /// lags -- number of lags to keep (when deriving messages, e.g. via increment()) + /// signer -- if specified and not nullptr then this message will be signed when serialized + /// using the given signing function. If omitted no signing takes place. + explicit MutableConfigMessage( + seqno_t seqno = 0, int lag = DEFAULT_DIFF_LAGS, sign_callable signer = nullptr) { + this->lag = lag; + this->seqno(seqno); + this->signer = signer; + } + + /// Wraps the ConfigMessage constructor with the same arguments but always produces a + /// MutableConfigMessage. In particular this means that if the base constructor performed a + /// merge (and thus incremented seqno) then the config stays as is, but contained in a Mutable + /// message that can be changed. If it did *not* merge (i.e. the highest seqno message it found + /// did not conflict with any other messages) then this construction is equivalent to doing a + /// base load followed by a .increment() call. In other words: this constructor *always* gives + /// you an incremented seqno value from the highest valid input config message. + /// + /// This is almost equivalent to ConfigMessage{args...}.increment(), except that this + /// constructor only increments seqno once while the indirect version would increment twice in + /// the case of a required merge conflict resolution. + explicit MutableConfigMessage( + const std::vector& configs, + verify_callable verifier = nullptr, + sign_callable signer = nullptr, + int lag = DEFAULT_DIFF_LAGS, + bool signature_optional = false, + std::function error_handler = nullptr); + + /// Wrapper around the above that takes a single string view to load a single message, doesn't + /// take an error handler and instead always throws on parse errors (the above also throws for + /// an erroneous single message, but with a less specific "no valid config messages" error). + explicit MutableConfigMessage( + std::string_view config, + verify_callable verifier = nullptr, + sign_callable signer = nullptr, + int lag = DEFAULT_DIFF_LAGS, + bool signature_optional = false); + + /// Does the same as the base incrementing, but also records any diff info from the current + /// MutableConfigMessage. *this* object gets pruned and signed as part of this call. If the + /// sign argument is omitted/nullptr then the current object's `sign` callback gets copied into + /// the new object. After this call you typically do not want to further modify *this (because + /// any modifications will change the hash, making *this no longer a parent of the new object). + MutableConfigMessage increment() const override; + + /// Constructor that does the same thing as the `m.increment()` factory method. The second + /// value should be the literal `increment_seqno` value (to select this constructor). + explicit MutableConfigMessage(const ConfigMessage& m, increment_seqno_t); + + using ConfigMessage::data; + /// Returns a mutable reference to the underlying config data. + dict& data() { return data_; } + + using ConfigMessage::seqno; + + /// Sets the seqno of the message to a specific value. You usually want to use `.increment()` + /// from an existing config message rather than manually adjusting the seqno. + void seqno(seqno_t new_seqno) { seqno_hash_.first = new_seqno; } + + /// Returns the current diff for this data relative to its original data. The data is pruned + /// implicitly by this call. + const oxenc::bt_dict& diff() override; + + /// Prunes empty dicts/sets from data. This is called automatically when serializing or + /// calculating a diff. Returns true if the data was actually changed, false if nothing needed + /// pruning. + bool prune(); + + /// Calculates the hash of the current message. Can optionally be given the already-serialized + /// value, if available; if empty/omitted, `serialize()` will be called to compute it. + const hash_t& hash() override; + + protected: + const hash_t& hash(std::string_view serialized); + void increment_impl(); +}; + +/// Encrypts a config message using XChaCha20-Poly1305, using a blake2b keyed hash of the message +/// for the nonce (rather than pure random) so that different clients will encrypt the same data to +/// the same encrypted value (thus allowing for server-side deduplication of identical messages). +/// +/// `key_base` must be 32 bytes. This value is a fixed key that all clients that might receive this +/// message can calculate independently (for instance a value derived from a secret key, or a shared +/// random key). This key will be hashed with the message size and domain suffix (see below) to +/// determine the actual encryption key. +/// +/// `domain` is a short string (1-24 chars) used for the keyed hash. Typically this is the type of +/// config, e.g. "closed-group" or "contacts". The full key will be +/// "session-config-encrypted-message-[domain]". This value is also used for the encrypted key (see +/// above). +/// +/// The returned result will consist of encrypted data with authentication tag and appended nonce, +/// suitable for being passed to decrypt() to authenticate and decrypt. +/// +/// Throw std::invalid_argument on bad input (i.e. from invalid key_base or domain). +ustring encrypt(ustring_view message, ustring_view key_base, std::string_view domain); + +/// Same as above but works with strings/string_views instead of ustring/ustring_view +std::string encrypt(std::string_view message, std::string_view key_base, std::string_view domain); + +/// Thrown if decrypt() fails. +struct decrypt_error : std::runtime_error { + using std::runtime_error::runtime_error; +}; + +/// Takes a value produced by `encrypt()` and decrypts it. `key_base` and `domain` must be the same +/// given to encrypt or else decryption fails. Upon decryption failure a std:: +ustring decrypt(ustring_view ciphertext, ustring_view key_base, std::string_view domain); + +/// Same as above but using std::string/string_view +std::string decrypt( + std::string_view ciphertext, std::string_view key_base, std::string_view domain); + +} // namespace session::config + +namespace oxenc::detail { + +template <> +struct bt_serialize : bt_serialize {}; + +} // namespace oxenc::detail diff --git a/libsession-util/distribution/libsession-util-android/include/session/config/base.h b/libsession-util/distribution/libsession-util-android/include/session/config/base.h new file mode 100644 index 0000000000..55649ecc82 --- /dev/null +++ b/libsession-util/distribution/libsession-util-android/include/session/config/base.h @@ -0,0 +1,85 @@ +#pragma once + +#ifdef __cplusplus +extern "C" { +#endif + +#include +#include +#include + +#if defined(_WIN32) || defined(WIN32) +#define LIBSESSION_EXPORT __declspec(dllexport) +#else +#define LIBSESSION_EXPORT __attribute__((visibility("default"))) +#endif +#define LIBSESSION_C_API extern "C" LIBSESSION_EXPORT + +typedef int64_t seqno_t; + +// Config object base type: this type holds the internal object and is initialized by the various +// config-dependent settings (e.g. config_user_profile_init) then passed to the various functions. +typedef struct config_object { + // Internal opaque object pointer; calling code should leave this alone. + void* internals; + // When an error occurs in the C API this string will be set to the specific error message. May + // be NULL. + const char* last_error; +} config_object; + +// Common functions callable on any config instance: + +/// Frees a config object created with one of the config-dependent ..._init functions (e.g. +/// user_profile_init). +void config_free(config_object* conf); + +/// Returns the numeric namespace in which config messages of this type should be stored. +int16_t config_storage_namespace(const config_object* conf); + +/// Merges the config object with one or more remotely obtained config strings. After this call the +/// config object may be unchanged, complete replaced, or updated and needing a push, depending on +/// the messages that are merged; the caller should check config_needs_push(). +/// +/// `configs` is an array of pointers to the start of the strings; `lengths` is an array of string +/// lengths; `count` is the length of those two arrays. +void config_merge(config_object* conf, const char** configs, const size_t* lengths, size_t count); + +/// Returns true if this config object contains updated data that has not yet been confirmed stored +/// on the server. +bool config_needs_push(const config_object* conf); + +/// Obtains the configuration data that needs to be pushed to the server. A new buffer of the +/// appropriate size is malloc'd and set to `out` The output is written to a new malloc'ed buffer of +/// the appropriate size; the buffer and the output length are set in the `out` and `outlen` +/// parameters. Note that this is binary data, *not* a null-terminated C string. +/// +/// Generally this call should be guarded by a call to `config_needs_push`, however it can be used +/// to re-obtain the current serialized config even if no push is needed (for example, if the client +/// wants to re-submit it after a network error). +/// +/// NB: The returned buffer belongs to the caller: that is, the caller *MUST* free() it when done +/// with it. +seqno_t config_push(config_object* conf, char** out, size_t* outlen); + +/// Reports that data obtained from `config_push` has been successfully stored on the server. The +/// seqno value is the one returned by the config_push call that yielded the config data. +void config_confirm_pushed(config_object* conf, seqno_t seqno); + +/// Returns a binary dump of the current state of the config object. This dump can be used to +/// resurrect the object at a later point (e.g. after a restart). Allocates a new buffer and sets +/// it in `out` and the length in `outlen`. Note that this is binary data, *not* a null-terminated +/// C string. +/// +/// NB: It is the caller's responsibility to `free()` the buffer when done with it. +/// +/// Immediately after this is called `config_needs_dump` will start returning true (until the +/// configuration is next modified). +void config_dump(config_object* conf, char** out, size_t* outlen); + +/// Returns true if something has changed since the last call to `dump()` that requires calling +/// and saving the `config_dump()` data again. +bool config_needs_dump(const config_object* conf); + +#ifdef __cplusplus +} // extern "C" +#endif diff --git a/libsession-util/distribution/libsession-util-android/include/session/config/base.hpp b/libsession-util/distribution/libsession-util-android/include/session/config/base.hpp new file mode 100644 index 0000000000..dc14a37bc0 --- /dev/null +++ b/libsession-util/distribution/libsession-util-android/include/session/config/base.hpp @@ -0,0 +1,503 @@ +#pragma once + +#include +#include +#include +#include + +#include "base.h" +#include "namespaces.hpp" + +namespace session::config { + +template +static constexpr bool is_one_of = (std::is_same_v || ...); + +/// True for a dict_value direct subtype, but not scalar sub-subtypes. +template +static constexpr bool is_dict_subtype = is_one_of; + +/// True for a dict_value or any of the types containable within a dict value +template +static constexpr bool is_dict_value = + is_dict_subtype || is_one_of; + +// Levels for the logging callback +enum class LogLevel { debug, info, warning, error }; + +/// Our current config state +enum class ConfigState : int { + /// Clean means the config is confirmed stored on the server and we haven't changed anything. + Clean = 0, + + /// Dirty means we have local changes, and the changes haven't been serialized yet for sending + /// to the server. + Dirty = 1, + + /// Waiting is halfway in-between clean and dirty: the caller has serialized the data, but + /// hasn't yet reported back that the data has been stored, *and* we haven't made any changes + /// since the data was serialize. + Waiting = 2, +}; + +/// Base config type for client-side configs containing common functionality needed by all config +/// sub-types. +class ConfigBase { + private: + // The object (either base config message or MutableConfigMessage) that stores the current + // config message. Subclasses do not directly access this: instead they call `dirty()` if they + // intend to make changes, or the `set_config_field` wrapper. + std::unique_ptr _config; + + // Tracks our current state + ConfigState _state = ConfigState::Clean; + + protected: + // Constructs an empty base config with no config settings and seqno set to 0. + ConfigBase(); + + // Constructs a base config by loading the data from a dump as produced by `dump()`. + explicit ConfigBase(std::string_view dump); + + // Tracks whether we need to dump again; most mutating methods should set this to true (unless + // calling set_state, which sets to to true implicitly). + bool _needs_dump = false; + + // Sets the current state; this also sets _needs_dump to true. + void set_state(ConfigState s) { + _state = s; + _needs_dump = true; + } + + // If set then we log things by calling this callback + std::function logger; + + // Invokes the above if set, does nothing if there is no logger. + void log(LogLevel lvl, std::string msg) { + if (logger) + logger(lvl, std::move(msg)); + } + + // Returns a reference to the current MutableConfigMessage. If the current message is not + // already dirty (i.e. Clean or Waiting) then calling this increments the seqno counter. + MutableConfigMessage& dirty(); + + // class for proxying subfield access; this class should never be stored but only used + // ephemerally (most of its methods are rvalue-qualified). This lets constructs such as + // foo["abc"]["def"]["ghi"] = 12; + // work, auto-vivifying (or trampling, if not a dict) subdicts to reach the target. It also + // allows non-vivifying value retrieval via .string(), .integer(), etc. methods. + class DictFieldProxy { + private: + ConfigBase& _conf; + std::vector _inter_keys; + std::string _last_key; + + // See if we can find the key without needing to create anything, so that we can attempt to + // access values without mutating anything (which allows, among other things, for assigning + // of the existing value to not dirty anything). Returns nullptr if the value or something + // along its path would need to be created, or has the wrong type; otherwise a const pointer + // to the value. The templated type, if provided, can be one of the types a dict_value can + // hold to also check that the returned value has a particular type; if omitted you get back + // the dict_value pointer itself. + template >> + const T* get_clean() const { + const config::dict* data = &_conf._config->data(); + // All but the last need to be dicts: + for (const auto& key : _inter_keys) { + auto it = data->find(key); + data = it != data->end() ? std::get_if(&it->second) : nullptr; + if (!data) + return nullptr; + } + + const dict_value* val; + // The last can be any value type: + if (auto it = data->find(_last_key); it != data->end()) + val = &it->second; + else + return nullptr; + + if constexpr (std::is_same_v) + return val; + else if constexpr (is_dict_subtype) { + if (auto* v = std::get_if(val)) + return v; + } else { // int64 or std::string, i.e. the config::scalar sub-types. + if (auto* scalar = std::get_if(val)) + return std::get_if(scalar); + } + return nullptr; + } + + // Returns a lvalue reference to the value, stomping its way through the dict as it goes to + // create subdicts as needed to reach the target value. If given a template type then we + // also cast the final dict_value variant into the given type (and replace if with a + // default-constructed value if it has the wrong type) then return a reference to that. + template >> + T& get_dirty() { + config::dict* data = &_conf.dirty().data(); + for (const auto& key : _inter_keys) { + auto& val = (*data)[key]; + data = std::get_if(&val); + if (!data) + data = &val.emplace(); + } + auto& val = (*data)[_last_key]; + + if constexpr (std::is_same_v) + return val; + else if constexpr (is_dict_subtype) { + if (auto* v = std::get_if(&val)) + return *v; + return val.emplace(); + } else { // int64 or std::string, i.e. the config::scalar sub-types. + if (auto* scalar = std::get_if(&val)) { + if (auto* v = std::get_if(scalar)) + return *v; + return scalar->emplace(); + } + return val.emplace().emplace(); + } + } + + template + void assign_if_changed(T value) { + // Try to avoiding dirtying the config if this assignment isn't changing anything + if (!_conf.is_dirty()) + if (auto current = get_clean(); current && *current == value) + return; + + get_dirty() = std::move(value); + } + + void insert_if_missing(config::scalar&& value) { + if (!_conf.is_dirty()) + if (auto current = get_clean(); current && current->count(value)) + return; + + get_dirty().insert(std::move(value)); + } + + void set_erase_impl(const config::scalar& value) { + if (!_conf.is_dirty()) + if (auto current = get_clean(); current && !current->count(value)) + return; + + config::dict* data = &_conf.dirty().data(); + + for (const auto& key : _inter_keys) { + auto it = data->find(key); + data = it != data->end() ? std::get_if(&it->second) : nullptr; + if (!data) + return; + } + + auto it = data->find(_last_key); + if (it == data->end()) + return; + auto& val = it->second; + if (auto* current = std::get_if(&val)) + current->erase(value); + else + val.emplace(); + } + + public: + DictFieldProxy(ConfigBase& b, std::string key) : _conf{b}, _last_key{std::move(key)} {} + + /// Descends into a dict, returning a copied proxy object for the path to the requested + /// field. Nothing is created by doing this unless you actually assign to a value. + DictFieldProxy operator[](std::string subkey) const& { + DictFieldProxy subfield{_conf, std::move(subkey)}; + subfield._inter_keys.reserve(_inter_keys.size() + 1); + subfield._inter_keys.insert( + subfield._inter_keys.end(), _inter_keys.begin(), _inter_keys.end()); + subfield._inter_keys.push_back(_last_key); + return subfield; + } + + // Same as above, but when called on an rvalue reference we just mutate the current proxy to + // the new dict path. + DictFieldProxy&& operator[](std::string subkey) && { + _inter_keys.push_back(std::move(_last_key)); + _last_key = std::move(subkey); + return std::move(*this); + } + + /// Returns a const pointer to the string if one exists at the given location, nullptr + /// otherwise. + const std::string* string() const { return get_clean(); } + + /// returns the value as a string_view or a fallback if the value doesn't exist (or isn't a + /// string). The returned view is directly into the value (or fallback) and so mustn't be + /// used beyond the validity of either. + std::string_view string_view_or(std::string_view fallback) const { + if (auto* s = string()) + return {*s}; + return fallback; + } + + /// Returns a copy of the value as a string, if it exists and is a string; returns + /// `fallback` otherwise. + std::string string_or(std::string fallback) const { + if (auto* s = string()) + return *s; + return std::move(fallback); + } + + /// Returns a const pointer to the integer if one exists at the given location, nullptr + /// otherwise. + const int64_t* integer() const { return get_clean(); } + + /// Returns the value as an integer or a fallback if the value doesn't exist (or isn't an + /// integer). + int64_t integer_or(int64_t fallback) const { + if (auto* i = integer()) + return *i; + return fallback; + } + + /// Returns a const pointer to the set if one exists at the given location, nullptr + /// otherwise. + const config::set* set() const { return get_clean(); } + /// Returns a const pointer to the dict if one exists at the given location, nullptr + /// otherwise. (You typically don't need to use this but can rather just use [] to descend + /// into the dict). + const config::dict* dict() const { return get_clean(); } + + /// Replaces the current value with the given string. This also auto-vivifies any + /// intermediate dicts needed to reach the given key, including replacing non-dict values if + /// they currently exist along the path. + void operator=(std::string value) { assign_if_changed(std::move(value)); } + /// Same as above, but takes a string_view for convenience. + void operator=(std::string_view value) { *this = std::string{value}; } + /// Replace the current value with the given integer. See above. + void operator=(int64_t value) { assign_if_changed(value); } + /// Replace the current value with the given set. See above. + void operator=(config::set value) { assign_if_changed(std::move(value)); } + /// Replace the current value with the given dict. See above. This often isn't needed + /// because of how other assignment operations work. + void operator=(config::dict value) { assign_if_changed(std::move(value)); } + + /// Returns true if there is a value at the current key. If a template type T is given, it + /// only returns true if that value also is a `T`. + template >> + bool exists() const { + return get_clean() != nullptr; + } + + // Alias for `exists()` + template + bool is() const { + return exists(); + } + + /// Removes the value at the current location, regardless of what it currently is. This + /// does nothing if the current location does not have a value. + void erase() { + if (!_conf.is_dirty() && !get_clean()) + return; + + config::dict* data = &_conf.dirty().data(); + for (const auto& key : _inter_keys) { + auto it = data->find(key); + data = it != data->end() ? std::get_if(&it->second) : nullptr; + if (!data) + return; + } + data->erase(_last_key); + } + + /// Adds a value to the set at the current location. If the current value is not a set or + /// does not exist then dicts will be created to reach it and a new set will be created. + void set_insert(std::string_view value) { + insert_if_missing(config::scalar{std::string{value}}); + } + void set_insert(int64_t value) { insert_if_missing(config::scalar{value}); } + + /// Removes a value from the set at the current location. If the current value does not + /// exist then nothing happens. If it does exist, but is not a set, it will be replaced + /// with an empty set. Otherwise the given value will be removed from the set, if present. + void set_erase(std::string_view value) { + set_erase_impl(config::scalar{std::string{value}}); + } + void set_erase(int64_t value) { set_erase_impl(scalar{value}); } + + /// Emplaces a value at the current location. As with assignment, this creates dicts as + /// needed along the keys to reach the target. The existing value (if present) is destroyed + /// to make room for the new one. + template < + typename T, + typename... Args, + typename = std::enable_if_t< + is_one_of>> + T& emplace(Args&&... args) { + if constexpr (is_one_of) + return get_dirty().emplace(std::forward(args)...); + + return get_dirty().emplace(std::forward(args)...); + } + }; + + /// Wrapper for the ConfigBase's root `data` field to provide data access. Only provides a [] + /// that gets you into a DictFieldProxy. + class DictFieldRoot { + ConfigBase& _conf; + DictFieldRoot(DictFieldRoot&&) = delete; + DictFieldRoot(const DictFieldRoot&) = delete; + DictFieldRoot& operator=(DictFieldRoot&&) = delete; + DictFieldRoot& operator=(const DictFieldRoot&) = delete; + + public: + DictFieldRoot(ConfigBase& b) : _conf{b} {} + + /// Access a dict element. This returns a proxy object for accessing the value, but does + /// *not* auto-vivify the path (unless/until you assign to it). + DictFieldProxy operator[](std::string key) const& { + return DictFieldProxy{_conf, std::move(key)}; + } + }; + + // Called when dumping to obtain any extra data that a subclass needs to store to reconstitute + // the object. The base implementation does nothing. The counterpart to this, + // `load_extra_data()`, is called when loading from a dump that has extra data; a subclass + // should either override both (if it needs to serialize extra data) or neither (if it needs no + // extra data). Internally this extra data (if non-empty) is stored in the "+" key of the dump. + virtual oxenc::bt_dict extra_data() const { return {}; } + + // Called when constructing from a dump that has extra data. The base implementation does + // nothing. + virtual void load_extra_data(oxenc::bt_dict extra) {} + + public: + virtual ~ConfigBase() = default; + + // Proxy class providing read and write access to the contained config data. + const DictFieldRoot data{*this}; + + // Accesses the storage namespace where this config type is to be stored/loaded from. See + // namespaces.hpp for the underlying integer values. + virtual Namespace storage_namespace() const = 0; + + // How many config lags should be used for this object; default to 5. Implementing subclasses + // can override to return a different constant if desired. More lags require more "diff" + // storage in the config messages, but also allow for a higher tolerance of simultaneous message + // conflicts. + virtual int config_lags() const { return 5; } + + // This takes all of the messages pulled down from the server and does whatever is necessary to + // merge (or replace) the current values. + // + // After this call the caller should check `needs_push()` to see if the data on hand was updated + // and needs to be pushed to the server again. + // + // Will throw on serious error (i.e. if neither the current nor any of the given configs are + // parseable). + virtual void merge(const std::vector& configs); + + // Returns true if we are currently dirty (i.e. have made changes that haven't been serialized + // yet). + bool is_dirty() const { return _state == ConfigState::Dirty; } + + // Returns true if we are curently clean (i.e. our current config is stored on the server and + // unmodified). + bool is_clean() const { return _state == ConfigState::Clean; } + + // Returns true if this object contains updated data that has not yet been confirmed stored on + // the server. This will be true whenever `is_clean()` is false: that is, if we are currently + // "dirty" (i.e. have changes that haven't been pushed) or are still awaiting confirmation of + // storage of the most recent serialized push data. + virtual bool needs_push() const; + + // Returns the data to push to the server along with the seqno value of the data. If the config + // is currently dirty (i.e. has previously unsent modifications) then this marks it as + // awaiting-confirmation instead of dirty so that any future change immediately increments the + // seqno. + virtual std::pair push(); + + // Should be called after the push is confirmed stored on the storage server swarm to let the + // object know the data is stored. (Once this is called `needs_push` will start returning false + // until something changes). Takes the seqno that was pushed so that the object can ensure that + // the latest version was pushed (i.e. in case there have been other changes since the `push()` + // call that returned this seqno). + // + // It is safe to call this multiple times with the same seqno value, and with out-of-order + // seqnos (e.g. calling with seqno 122 after having called with 123; the duplicates and earlier + // ones will just be ignored). + virtual void confirm_pushed(seqno_t seqno); + + // Returns a dump of the current state for storage in the database; this value would get passed + // into the constructor to reconstitute the object (including the push/not pushed status). This + // method is *not* virtual: if subclasses need to store extra data they should set it in the + // `subclass_data` field. + std::string dump(); + + // Returns true if something has changed since the last call to `dump()` that requires calling + // and saving the `dump()` data again. + virtual bool needs_dump() const { return _needs_dump; } +}; + +// The C++ struct we hold opaquely inside the C internals struct. This is designed so that any +// internals has the same layout so that it doesn't matter whether we unbox to an +// internals or internals. +template < + typename ConfigT = ConfigBase, + std::enable_if_t, int> = 0> +struct internals final { + std::unique_ptr config; + std::string error; + + // Dereferencing falls through to the ConfigBase object + ConfigT* operator->() { + if constexpr (std::is_same_v) + return config.get(); + else { + auto* c = dynamic_cast(config.get()); + assert(c); + return c; + } + } + const ConfigT* operator->() const { + if constexpr (std::is_same_v) + return config.get(); + else { + auto* c = dynamic_cast(config.get()); + assert(c); + return c; + } + } + ConfigT& operator*() { return *operator->(); } + const ConfigT& operator*() const { return *operator->(); } +}; + +template , int> = 0> +inline internals& unbox(config_object* conf) { + return *static_cast*>(conf->internals); +} +template , int> = 0> +inline const internals& unbox(const config_object* conf) { + return *static_cast*>(conf->internals); +} + +// Sets an error message in the internals.error string and updates the last_error pointer in the +// outer (C) config_object struct to point at it. +void set_error(config_object* conf, std::string e); + +// Same as above, but gets the error string out of an exception and passed through a return value. +// Intended to simplify catch-and-return-error such as: +// try { +// whatever(); +// } catch (const std::exception& e) { +// return set_error(conf, LIB_SESSION_ERR_OHNOES, e); +// } +inline int set_error(config_object* conf, int errcode, const std::exception& e) { + set_error(conf, e.what()); + return errcode; +} + +// Copies a value contained in a string into a new malloced char buffer, returning the buffer and +// size via the two pointer arguments. +void copy_out(const std::string& data, char** out, size_t* outlen); + +} // namespace session::config diff --git a/libsession-util/distribution/libsession-util-android/include/session/config/error.h b/libsession-util/distribution/libsession-util-android/include/session/config/error.h new file mode 100644 index 0000000000..598a1539aa --- /dev/null +++ b/libsession-util/distribution/libsession-util-android/include/session/config/error.h @@ -0,0 +1,23 @@ +#pragma once + +#ifdef __cplusplus +extern "C" { +#endif + +enum config_error { + /// Value returned for no error + SESSION_ERR_NONE = 0, + /// Error indicating that initialization failed because the dumped data being loaded is invalid. + SESSION_ERR_INVALID_DUMP = 1, + /// Error indicated a bad value, e.g. if trying to set something invalid in a config field. + SESSION_ERR_BAD_VALUE = 2, +}; + +// Returns a generic string for a given integer error code as returned by some functions. Depending +// on the call, a more details error string may be available in the config_object's `last_error` +// field. +const char* config_errstr(int err); + +#ifdef __cplusplus +} // extern "C" +#endif diff --git a/libsession-util/distribution/libsession-util-android/include/session/config/namespaces.hpp b/libsession-util/distribution/libsession-util-android/include/session/config/namespaces.hpp new file mode 100644 index 0000000000..f5ab57c200 --- /dev/null +++ b/libsession-util/distribution/libsession-util-android/include/session/config/namespaces.hpp @@ -0,0 +1,12 @@ +#pragma once + +#include + +namespace session::config { + +enum class Namespace : std::int16_t { + UserProfile = 2, + ClosedGroupInfo = 11, +}; + +} // namespace session::config diff --git a/libsession-util/distribution/libsession-util-android/include/session/config/user_profile.h b/libsession-util/distribution/libsession-util-android/include/session/config/user_profile.h new file mode 100644 index 0000000000..6a01230271 --- /dev/null +++ b/libsession-util/distribution/libsession-util-android/include/session/config/user_profile.h @@ -0,0 +1,51 @@ +#pragma once + +#ifdef __cplusplus +extern "C" { +#endif + +#include "base.h" + +/// Constructs a user profile config object and sets a pointer to it in `conf`. To restore an +/// existing dump produced by a past instantiation's call to `dump()` pass the dump value via `dump` +/// and `dumplen`; to construct a new, empty profile pass NULL and 0. +/// +/// `error` must either be NULL or a pointer to a buffer of at least 256 bytes. +/// +/// Returns 0 on success; returns a non-zero error code and sets error (if not NULL) to the +/// exception message on failure. +/// +/// When done with the object the `config_object` must be destroyed by passing the pointer to +/// config_free() (in `session/config/base.h`). +int user_profile_init(config_object** conf, const char* dump, size_t dumplen, char* error) + __attribute__((warn_unused_result)); + +/// Returns a pointer to the currently-set name (null-terminated), or NULL if there is no name at +/// all. Should be copied right away as the pointer may not remain valid beyond other API calls. +const char* user_profile_get_name(const config_object* conf); + +/// Sets the user profile name to the null-terminated C string. Returns 0 on success, non-zero on +/// error (and sets the config_object's error string). +int user_profile_set_name(config_object* conf, const char* name); + +typedef struct user_profile_pic { + // Null-terminated C string containing the uploaded URL of the pic. Will be NULL if there is no + // profile pic. + const char* url; + // The profile pic decryption key, in bytes. This is a byte buffer of length `keylen`, *not* a + // null-terminated C string. Will be NULL if there is no profile pic. + const char* key; + size_t keylen; +} user_profile_pic; + +// Obtains the current profile pic. The pointers in the returned struct will be NULL if a profile +// pic is not currently set, and otherwise should be copied right away (they will not be valid +// beyond other API calls on this config object). +user_profile_pic user_profile_get_pic(const config_object* conf); + +// Sets a user profile +int user_profile_set_pic(config_object* conf, user_profile_pic pic); + +#ifdef __cplusplus +} // extern "C" +#endif diff --git a/libsession-util/distribution/libsession-util-android/include/session/config/user_profile.hpp b/libsession-util/distribution/libsession-util-android/include/session/config/user_profile.hpp new file mode 100644 index 0000000000..b4cdba80b7 --- /dev/null +++ b/libsession-util/distribution/libsession-util-android/include/session/config/user_profile.hpp @@ -0,0 +1,43 @@ +#pragma once + +#include +#include + +#include "base.hpp" +#include "namespaces.hpp" + +namespace session::config { + +/// keys used in this config, either currently or in the past (so that we don't reuse): +/// +/// n - user profile name +/// p - user profile url +/// q - user profile decryption key (binary) + +class UserProfile final : public ConfigBase { + + public: + /// Constructs a new, blank user profile. + UserProfile() = default; + + /// Constructs a user profile from existing data + explicit UserProfile(std::string_view dumped) : ConfigBase{dumped} {} + + Namespace storage_namespace() const override { return Namespace::UserProfile; } + + /// Returns the user profile name, or nullptr if there is no profile name set. + const std::string* get_name() const; + + /// Sets the user profile name + void set_name(std::string_view new_name); + + /// Gets the user's current profile pic URL and decryption key. Returns nullptr for *both* + /// values if *either* value is unset or empty in the config data. + std::pair get_profile_pic() const; + + /// Sets the user's current profile pic to a new URL and decryption key. Clears both if either + /// one is empty. + void set_profile_pic(std::string url, std::string key); +}; + +} // namespace session::config diff --git a/libsession-util/distribution/libsession-util-android/include/session/fields.hpp b/libsession-util/distribution/libsession-util-android/include/session/fields.hpp new file mode 100644 index 0000000000..6ca71a245b --- /dev/null +++ b/libsession-util/distribution/libsession-util-android/include/session/fields.hpp @@ -0,0 +1,43 @@ +#pragma once + +#include +#include +#include +#include + +namespace session { + +using namespace std::literals; + +/// An uploaded file is its URL + decryption key +struct Uploaded { + std::string url; + std::string key; +}; + +/// A conversation disappearing messages setting +struct Disappearing { + /// The possible modes of a disappearing messages setting. + enum class Mode : int { None = 0, AfterSend = 1, AfterRead = 2 }; + + /// The mode itself + Mode mode = Mode::None; + + /// The timer value; this is only used when mode is not None. + std::chrono::seconds timer = 0s; +}; + +/// A Session ID: an x25519 pubkey, with a 05 identifying prefix. On the wire we send just the +/// 32-byte pubkey value (i.e. not hex, without the prefix). +struct SessionID { + /// The fixed session netid, 0x05 + static constexpr unsigned char netid = 0x05; + + /// The raw x25519 pubkey, as bytes + std::array pubkey; + + /// Returns the full pubkey in hex, including the netid prefix. + std::string hex() const; +}; + +} // namespace session diff --git a/libsession-util/distribution/libsession-util-android/include/session/xed25519.h b/libsession-util/distribution/libsession-util-android/include/session/xed25519.h new file mode 100644 index 0000000000..5348dafa0c --- /dev/null +++ b/libsession-util/distribution/libsession-util-android/include/session/xed25519.h @@ -0,0 +1,34 @@ +#pragma once + +#ifdef __cplusplus +extern "C" { +#endif + +/// XEd25519-signed a message given a curve25519 privkey and message. Writes the 64-byte signature +/// to `sig` on success and returns 0. Returns non-zero on failure. +__attribute__((warn_unused_result)) int session_xed25519_sign( + unsigned char* signature /* 64 byte buffer */, + const unsigned char* curve25519_privkey /* 32 bytes */, + const unsigned char* msg, + const unsigned int msg_len); + +/// Verifies an XEd25519-signed message given a 64-byte signature, 32-byte curve25519 pubkey, and +/// message. Returns 0 if the signature verifies successfully, non-zero on failure. +__attribute__((warn_unused_result)) int session_xed25519_verify( + const unsigned char* signature /* 64 bytes */, + const unsigned char* pubkey /* 32-bytes */, + const unsigned char* msg, + const unsigned int msg_len); + +/// Given a curve25519 pubkey, this writes the associated XEd25519-derived Ed25519 pubkey into +/// ed25519_pubkey. Note, however, that there are *two* possible Ed25519 pubkeys that could result +/// in a given curve25519 pubkey: this always returns the positive value. You can get the other +/// possibility (the negative) by flipping the sign bit, i.e. `returned_pubkey[31] |= 0x80`. +/// Returns 0 on success, non-0 on failure. +__attribute__((warn_unused_result)) int session_xed25519_pubkey( + unsigned char* ed25519_pubkey /* 32-byte output buffer */, + const unsigned char* curve25519_pubkey /* 32 bytes */); + +#ifdef __cplusplus +} +#endif diff --git a/libsession-util/distribution/libsession-util-android/include/session/xed25519.hpp b/libsession-util/distribution/libsession-util-android/include/session/xed25519.hpp new file mode 100644 index 0000000000..9889113c2f --- /dev/null +++ b/libsession-util/distribution/libsession-util-android/include/session/xed25519.hpp @@ -0,0 +1,38 @@ +#pragma once +#include +#include +#include + +namespace session::xed25519 { + +using ustring_view = std::basic_string_view; + +/// XEd25519-signs a message given the curve25519 privkey and message. +std::array sign( + ustring_view curve25519_privkey /* 32 bytes */, ustring_view msg); + +/// "Softer" version that takes and returns strings of regular chars +std::string sign(std::string_view curve25519_privkey /* 32 bytes */, std::string_view msg); + +/// Verifies a curve25519 message allegedly signed by the given curve25519 pubkey +[[nodiscard]] bool verify( + ustring_view signature /* 64 bytes */, + ustring_view curve25519_pubkey /* 32 bytes */, + ustring_view msg); + +/// "Softer" version that takes strings of regular chars +[[nodiscard]] bool verify( + std::string_view signature /* 64 bytes */, + std::string_view curve25519_pubkey /* 32 bytes */, + std::string_view msg); + +/// Given a curve25519 pubkey, this returns the associated XEd25519-derived Ed25519 pubkey. Note, +/// however, that there are *two* possible Ed25519 pubkeys that could result in a given curve25519 +/// pubkey: this always returns the positive value. You can get the other possibility (the +/// negative) by flipping the sign bit, i.e. `returned_pubkey[31] |= 0x80`. +std::array pubkey(ustring_view curve25519_pubkey); + +/// "Softer" version that takes/returns strings of regular chars +std::string pubkey(std::string_view curve25519_pubkey); + +} // namespace session::xed25519 diff --git a/libsession-util/distribution/libsession-util-android/lib/arm64-v8a/libsession-util.a b/libsession-util/distribution/libsession-util-android/lib/arm64-v8a/libsession-util.a new file mode 100644 index 0000000000..1ab0ff8c3b Binary files /dev/null and b/libsession-util/distribution/libsession-util-android/lib/arm64-v8a/libsession-util.a differ diff --git a/libsession-util/distribution/libsession-util-android/lib/armeabi-v7a/libsession-util.a b/libsession-util/distribution/libsession-util-android/lib/armeabi-v7a/libsession-util.a new file mode 100644 index 0000000000..fdbd9df39f Binary files /dev/null and b/libsession-util/distribution/libsession-util-android/lib/armeabi-v7a/libsession-util.a differ diff --git a/libsession-util/distribution/libsession-util-android/lib/x86/libsession-util.a b/libsession-util/distribution/libsession-util-android/lib/x86/libsession-util.a new file mode 100644 index 0000000000..0ad9aa2182 Binary files /dev/null and b/libsession-util/distribution/libsession-util-android/lib/x86/libsession-util.a differ diff --git a/libsession-util/distribution/libsession-util-android/lib/x86_64/libsession-util.a b/libsession-util/distribution/libsession-util-android/lib/x86_64/libsession-util.a new file mode 100644 index 0000000000..91adc74d1c Binary files /dev/null and b/libsession-util/distribution/libsession-util-android/lib/x86_64/libsession-util.a differ diff --git a/libsession-util/src/androidTest/java/network/loki/messenger/libsession_util/ExampleInstrumentedTest.kt b/libsession-util/src/androidTest/java/network/loki/messenger/libsession_util/ExampleInstrumentedTest.kt new file mode 100644 index 0000000000..ac3ff38165 --- /dev/null +++ b/libsession-util/src/androidTest/java/network/loki/messenger/libsession_util/ExampleInstrumentedTest.kt @@ -0,0 +1,30 @@ +package network.loki.messenger.libsession_util + +import androidx.test.platform.app.InstrumentationRegistry +import androidx.test.ext.junit.runners.AndroidJUnit4 + +import org.junit.Test +import org.junit.runner.RunWith + +import org.junit.Assert.* + +/** + * Instrumented test, which will execute on an Android device. + * + * See [testing documentation](http://d.android.com/tools/testing). + */ +@RunWith(AndroidJUnit4::class) +class ExampleInstrumentedTest { + @Test + fun useAppContext() { + // Context of the app under test. + val appContext = InstrumentationRegistry.getInstrumentation().targetContext + assertEquals("network.loki.messenger.libsession_util.test", appContext.packageName) + } + + @Test + fun jni_accessible() { + assertEquals("Hello from C++", NativeLib().stringFromJNI()) + } + +} \ No newline at end of file diff --git a/libsession-util/src/main/AndroidManifest.xml b/libsession-util/src/main/AndroidManifest.xml new file mode 100644 index 0000000000..65483324a6 --- /dev/null +++ b/libsession-util/src/main/AndroidManifest.xml @@ -0,0 +1,5 @@ + + + + \ No newline at end of file diff --git a/libsession-util/src/main/cpp/CMakeLists.txt b/libsession-util/src/main/cpp/CMakeLists.txt new file mode 100644 index 0000000000..c6161ba4f7 --- /dev/null +++ b/libsession-util/src/main/cpp/CMakeLists.txt @@ -0,0 +1,57 @@ +# For more information about using CMake with Android Studio, read the +# documentation: https://d.android.com/studio/projects/add-native-code.html + +# Sets the minimum version of CMake required to build the native library. + +cmake_minimum_required(VERSION 3.18.1) + +# Declares and names the project. + +project("session_util") + +# Creates and names a library, sets it as either STATIC +# or SHARED, and provides the relative paths to its source code. +# You can define multiple libraries, and CMake builds them for you. +# Gradle automatically packages shared libraries with your APK. + +add_library( # Sets the name of the library. + session_util + + # Sets the library as a shared library. + SHARED + + # Provides a relative path to your source file(s). + session_util.cpp) + +# Searches for a specified prebuilt library and stores the path as a +# variable. Because CMake includes system libraries in the search path by +# default, you only need to specify the name of the public NDK library +# you want to add. CMake verifies that the library exists before +# completing its build. + +find_library( # Sets the name of the path variable. + log-lib + + # Specifies the name of the NDK library that + # you want CMake to locate. + log) + +# Add the libsession-util library here +set(distribution_DIR ${CMAKE_CURRENT_SOURCE_DIR}/../../../distribution) +add_library(external-libsession-util STATIC IMPORTED) +set_target_properties(external-libsession-util PROPERTIES IMPORTED_LOCATION + ${distribution_DIR}/libsession-util-android/lib/${ANDROID_ABI}/libsession-util.a) + +target_include_directories(session_util PRIVATE + ${distribution_DIR}/libsession-util-android/include) + +# Specifies libraries CMake should link to your target library. You +# can link multiple libraries, such as libraries you define in this +# build script, prebuilt third-party libraries, or system libraries. + +target_link_libraries( # Specifies the target library. + session_util + external-libsession-util + # Links the target library to the log library + # included in the NDK. + ${log-lib}) \ No newline at end of file diff --git a/libsession-util/src/main/cpp/session_util.cpp b/libsession-util/src/main/cpp/session_util.cpp new file mode 100644 index 0000000000..d057826204 --- /dev/null +++ b/libsession-util/src/main/cpp/session_util.cpp @@ -0,0 +1,19 @@ +#include +#include +#include "session/config/user_profile.hpp" + +extern "C" JNIEXPORT jobject JNICALL +Java_network_loki_messenger_libsession_1util_Config_00024Companion_newInstance( + JNIEnv* env, + jobject /*this*/) { + + auto* profile = new session::config::UserProfile(); + + jclass configClass = env->FindClass("network/loki/messenger/libsession_util/Config"); + jobject newConfig = env->AllocObject(configClass); + + jfieldID pointerField = env->GetFieldID(configClass, "pointer", "J"); + env->SetLongField(newConfig, pointerField, reinterpret_cast(profile)); + + return newConfig; +} \ No newline at end of file diff --git a/libsession-util/src/main/java/network/loki/messenger/libsession_util/Config.kt b/libsession-util/src/main/java/network/loki/messenger/libsession_util/Config.kt new file mode 100644 index 0000000000..a697b44261 --- /dev/null +++ b/libsession-util/src/main/java/network/loki/messenger/libsession_util/Config.kt @@ -0,0 +1,13 @@ +package network.loki.messenger.libsession_util + +data class Config(private val /* yucky */ pointer: Long) { + + companion object { + external fun newInstance(): Config + } + + var lastError: String? = null + + + +} \ No newline at end of file diff --git a/libsession-util/src/main/java/network/loki/messenger/libsession_util/NativeLib.kt b/libsession-util/src/main/java/network/loki/messenger/libsession_util/NativeLib.kt new file mode 100644 index 0000000000..19ed94386b --- /dev/null +++ b/libsession-util/src/main/java/network/loki/messenger/libsession_util/NativeLib.kt @@ -0,0 +1,17 @@ +package network.loki.messenger.libsession_util + +class NativeLib { + + /** + * A native method that is implemented by the 'libsession_util' native library, + * which is packaged with this application. + */ + external fun stringFromJNI(): String + + companion object { + // Used to load the 'libsession_util' library on application startup. + init { + System.loadLibrary("session_util") + } + } +} \ No newline at end of file diff --git a/libsession-util/src/test/java/network/loki/messenger/libsession_util/ExampleUnitTest.kt b/libsession-util/src/test/java/network/loki/messenger/libsession_util/ExampleUnitTest.kt new file mode 100644 index 0000000000..3d156bfd4d --- /dev/null +++ b/libsession-util/src/test/java/network/loki/messenger/libsession_util/ExampleUnitTest.kt @@ -0,0 +1,14 @@ +package network.loki.messenger.libsession_util + +import org.junit.Test + +import org.junit.Assert.* + +/** + * Example local unit test, which will execute on the development machine (host). + * + * See [testing documentation](http://d.android.com/tools/testing). + */ +class ExampleUnitTest { + +} \ No newline at end of file diff --git a/settings.gradle b/settings.gradle index 3a42510472..7ab26e097c 100644 --- a/settings.gradle +++ b/settings.gradle @@ -3,4 +3,5 @@ rootProject.name = "session-android" include ':app' include ':liblazysodium' include ':libsession' -include ':libsignal' \ No newline at end of file +include ':libsignal' +include ':libsession-util'