From 02c2763c26c6395a7f43ea05591165b6ee78909a Mon Sep 17 00:00:00 2001 From: frekky Date: Sat, 29 Aug 2015 13:03:08 +0800 Subject: [PATCH] Unsigned types for encoding data --- src/base128.c | 67 +++++++++++++++++++--------------------- src/base32.c | 34 ++++++++++---------- src/base64.c | 35 ++++++++++----------- src/encoding.c | 84 +++++++++++++++++++++++++++----------------------- src/encoding.h | 22 +++++++------ 5 files changed, 122 insertions(+), 120 deletions(-) diff --git a/src/base128.c b/src/base128.c index 4e8e409..98784a3 100644 --- a/src/base128.c +++ b/src/base128.c @@ -42,22 +42,23 @@ * accent chars since they might readily be entered in normal use, * don't use 254-255 because of possible function overloading in DNS systems. */ -static const unsigned char cb128[] = +static const uint8_t cb128[] = "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789" "\274\275\276\277" "\300\301\302\303\304\305\306\307\310\311\312\313\314\315\316\317" "\320\321\322\323\324\325\326\327\330\331\332\333\334\335\336\337" "\340\341\342\343\344\345\346\347\350\351\352\353\354\355\356\357" "\360\361\362\363\364\365\366\367\370\371\372\373\374\375"; -static unsigned char rev128[256]; +static uint8_t rev128[256]; static int reverse_init = 0; -static int base128_encode(char *, size_t *, const void *, size_t); -static int base128_decode(void *, size_t *, const char *, size_t); +static size_t base128_encode(uint8_t *, size_t *, const uint8_t *, size_t); +static size_t base128_decode(uint8_t *, size_t *, const uint8_t *, size_t); static int base128_handles_dots(); -static int base128_blksize_raw(); -static int base128_blksize_enc(); +static size_t base128_blksize_raw(); +static size_t base128_blksize_enc(); static size_t base128_encoded_length(size_t inputlen); +static size_t base128_raw_length(size_t inputlen); static struct encoder base128_encoder = { @@ -84,13 +85,13 @@ base128_handles_dots() return 0; } -static int +static size_t base128_blksize_raw() { return BLKSIZE_RAW; } -static int +static size_t base128_blksize_enc() { return BLKSIZE_ENC; @@ -99,13 +100,13 @@ base128_blksize_enc() static size_t base128_encoded_length(size_t inputlen) { - return (BLKSIZE_ENC * inputlen) / BLKSIZE_RAW + ((BLKSIZE_ENC * inputlen) % BLKSIZE_RAW) ? 1 : 0; + return (BLKSIZE_ENC * inputlen) / BLKSIZE_RAW + (((BLKSIZE_ENC * inputlen) % BLKSIZE_RAW) ? 1 : 0); } static size_t base128_raw_length(size_t inputlen) { - return (BLKSIZE_RAW * inputlen) / BLKSIZE_ENC + ((BLKSIZE_RAW * inputlen) % BLKSIZE_ENC) ? 1 : 0; + return (BLKSIZE_RAW * inputlen) / BLKSIZE_ENC + (((BLKSIZE_RAW * inputlen) % BLKSIZE_ENC) ? 1 : 0); } inline static void @@ -124,8 +125,8 @@ base128_reverse_init() } } -static int -base128_encode(char *buf, size_t *buflen, const void *data, size_t size) +static size_t +base128_encode(uint8_t *ubuf, size_t *buflen, const uint8_t *udata, size_t size) /* * Fills *buf with max. *buflen characters, encoding size bytes of *data. * @@ -136,10 +137,8 @@ base128_encode(char *buf, size_t *buflen, const void *data, size_t size) * sets *buflen to : #bytes encoded from data */ { - unsigned char *ubuf = (unsigned char *) buf; - unsigned char *udata = (unsigned char *) data; - int iout = 0; /* to-be-filled output char */ - int iin = 0; /* one more than last input byte that can be + size_t iout = 0; /* to-be-filled output char */ + size_t iin = 0; /* one more than last input byte that can be successfully decoded */ /* Note: Don't bother to optimize manually. GCC optimizes @@ -218,8 +217,8 @@ base128_encode(char *buf, size_t *buflen, const void *data, size_t size) #define REV128(x) rev128[(int) (x)] -static int -base128_decode(void *buf, size_t *buflen, const char *str, size_t slen) +static size_t +base128_decode(uint8_t *buf, size_t *buflen, const uint8_t *str, size_t slen) /* * Fills *buf with max. *buflen bytes, decoded from slen chars in *str. * Decoding stops early when *str contains \0. @@ -232,8 +231,6 @@ base128_decode(void *buf, size_t *buflen, const char *str, size_t slen) * return value : #bytes filled in buf (excluding \0) */ { - unsigned char *ustr = (unsigned char *) str; - unsigned char *ubuf = (unsigned char *) buf; int iout = 0; /* to-be-filled output byte */ int iin = 0; /* next input char to use in decoding */ @@ -246,61 +243,61 @@ base128_decode(void *buf, size_t *buflen, const char *str, size_t slen) if (iout >= *buflen || iin + 1 >= slen || str[iin] == '\0' || str[iin + 1] == '\0') break; - ubuf[iout] = ((REV128(ustr[iin]) & 0x7f) << 1) | - ((REV128(ustr[iin + 1]) & 0x40) >> 6); + buf[iout] = ((REV128(str[iin]) & 0x7f) << 1) | + ((REV128(str[iin + 1]) & 0x40) >> 6); iin++; /* 0 used up, iin=1 */ iout++; if (iout >= *buflen || iin + 1 >= slen || str[iin] == '\0' || str[iin + 1] == '\0') break; - ubuf[iout] = ((REV128(ustr[iin]) & 0x3f) << 2) | - ((REV128(ustr[iin + 1]) & 0x60) >> 5); + buf[iout] = ((REV128(str[iin]) & 0x3f) << 2) | + ((REV128(str[iin + 1]) & 0x60) >> 5); iin++; /* 1 used up, iin=2 */ iout++; if (iout >= *buflen || iin + 1 >= slen || str[iin] == '\0' || str[iin + 1] == '\0') break; - ubuf[iout] = ((REV128(ustr[iin]) & 0x1f) << 3) | - ((REV128(ustr[iin + 1]) & 0x70) >> 4); + buf[iout] = ((REV128(str[iin]) & 0x1f) << 3) | + ((REV128(str[iin + 1]) & 0x70) >> 4); iin++; /* 2 used up, iin=3 */ iout++; if (iout >= *buflen || iin + 1 >= slen || str[iin] == '\0' || str[iin + 1] == '\0') break; - ubuf[iout] = ((REV128(ustr[iin]) & 0x0f) << 4) | - ((REV128(ustr[iin + 1]) & 0x78) >> 3); + buf[iout] = ((REV128(str[iin]) & 0x0f) << 4) | + ((REV128(str[iin + 1]) & 0x78) >> 3); iin++; /* 3 used up, iin=4 */ iout++; if (iout >= *buflen || iin + 1 >= slen || str[iin] == '\0' || str[iin + 1] == '\0') break; - ubuf[iout] = ((REV128(ustr[iin]) & 0x07) << 5) | - ((REV128(ustr[iin + 1]) & 0x7c) >> 2); + buf[iout] = ((REV128(str[iin]) & 0x07) << 5) | + ((REV128(str[iin + 1]) & 0x7c) >> 2); iin++; /* 4 used up, iin=5 */ iout++; if (iout >= *buflen || iin + 1 >= slen || str[iin] == '\0' || str[iin + 1] == '\0') break; - ubuf[iout] = ((REV128(ustr[iin]) & 0x03) << 6) | - ((REV128(ustr[iin + 1]) & 0x7e) >> 1); + buf[iout] = ((REV128(str[iin]) & 0x03) << 6) | + ((REV128(str[iin + 1]) & 0x7e) >> 1); iin++; /* 5 used up, iin=6 */ iout++; if (iout >= *buflen || iin + 1 >= slen || str[iin] == '\0' || str[iin + 1] == '\0') break; - ubuf[iout] = ((REV128(ustr[iin]) & 0x01) << 7) | - ((REV128(ustr[iin + 1]) & 0x7f)); + buf[iout] = ((REV128(str[iin]) & 0x01) << 7) | + ((REV128(str[iin + 1]) & 0x7f)); iin += 2; /* 6,7 used up, iin=8 */ iout++; } - ubuf[iout] = '\0'; + buf[iout] = '\0'; return iout; } diff --git a/src/base32.c b/src/base32.c index a49963d..ccb2133 100644 --- a/src/base32.c +++ b/src/base32.c @@ -33,11 +33,11 @@ static const char cb32_ucase[] = static unsigned char rev32[256]; static int reverse_init = 0; -static int base32_encode(char *, size_t *, const void *, size_t); -static int base32_decode(void *, size_t *, const char *, size_t); +static size_t base32_encode(uint8_t *, size_t *, const uint8_t *, size_t); +static size_t base32_decode(uint8_t *, size_t *, const uint8_t *, size_t); static int base32_handles_dots(); -static int base32_blksize_raw(); -static int base32_blksize_enc(); +static size_t base32_blksize_raw(); +static size_t base32_blksize_enc(); static size_t base32_encoded_length(size_t inputlen); static size_t base32_raw_length(size_t inputlen); @@ -67,13 +67,13 @@ base32_handles_dots() return 0; } -static int +static size_t base32_blksize_raw() { return BLKSIZE_RAW; } -static int +static size_t base32_blksize_enc() { return BLKSIZE_ENC; @@ -82,13 +82,13 @@ base32_blksize_enc() static size_t base32_encoded_length(size_t inputlen) { - return (BLKSIZE_ENC * inputlen) / BLKSIZE_RAW + ((BLKSIZE_ENC * inputlen) % BLKSIZE_RAW) ? 1 : 0; + return (BLKSIZE_ENC * inputlen) / BLKSIZE_RAW + (((BLKSIZE_ENC * inputlen) % BLKSIZE_RAW) ? 1 : 0); } static size_t base32_raw_length(size_t inputlen) { - return (BLKSIZE_RAW * inputlen) / BLKSIZE_ENC + ((BLKSIZE_RAW * inputlen) % BLKSIZE_ENC) ? 1 : 0; + return (BLKSIZE_RAW * inputlen) / BLKSIZE_ENC + (((BLKSIZE_RAW * inputlen) % BLKSIZE_ENC) ? 1 : 0); } @@ -123,8 +123,8 @@ b32_8to5(int in) return rev32[in]; } -static int -base32_encode(char *buf, size_t *buflen, const void *data, size_t size) +static size_t +base32_encode(uint8_t *buf, size_t *buflen, const uint8_t *udata, size_t size) /* * Fills *buf with max. *buflen characters, encoding size bytes of *data. * @@ -135,9 +135,8 @@ base32_encode(char *buf, size_t *buflen, const void *data, size_t size) * sets *buflen to : #bytes encoded from data */ { - unsigned char *udata = (unsigned char *) data; - int iout = 0; /* to-be-filled output char */ - int iin = 0; /* one more than last input byte that can be + size_t iout = 0; /* to-be-filled output char */ + size_t iin = 0; /* one more than last input byte that can be successfully decoded */ /* Note: Don't bother to optimize manually. GCC optimizes @@ -214,8 +213,8 @@ base32_encode(char *buf, size_t *buflen, const void *data, size_t size) #define REV32(x) rev32[(int) (x)] -static int -base32_decode(void *buf, size_t *buflen, const char *str, size_t slen) +static size_t +base32_decode(uint8_t *ubuf, size_t *buflen, const uint8_t *str, size_t slen) /* * Fills *buf with max. *buflen bytes, decoded from slen chars in *str. * Decoding stops early when *str contains \0. @@ -228,9 +227,8 @@ base32_decode(void *buf, size_t *buflen, const char *str, size_t slen) * return value : #bytes filled in buf (excluding \0) */ { - unsigned char *ubuf = (unsigned char *) buf; - int iout = 0; /* to-be-filled output byte */ - int iin = 0; /* next input char to use in decoding */ + size_t iout = 0; /* to-be-filled output byte */ + size_t iin = 0; /* next input char to use in decoding */ base32_reverse_init (); diff --git a/src/base64.c b/src/base64.c index c57a3f6..fb189e3 100644 --- a/src/base64.c +++ b/src/base64.c @@ -33,12 +33,13 @@ static const char cb64[] = static unsigned char rev64[256]; static int reverse_init = 0; -static int base64_encode(char *, size_t *, const void *, size_t); -static int base64_decode(void *, size_t *, const char *, size_t); +static size_t base64_encode(uint8_t *, size_t *, const uint8_t *, size_t); +static size_t base64_decode(uint8_t *, size_t *, const uint8_t *, size_t); static int base64_handles_dots(); -static int base64_blksize_raw(); -static int base64_blksize_enc(); +static size_t base64_blksize_raw(); +static size_t base64_blksize_enc(); static size_t base64_encoded_length(size_t inputlen); +static size_t base64_raw_length(size_t inputlen); static struct encoder base64_encoder = { @@ -65,13 +66,13 @@ base64_handles_dots() return 0; } -static int +static size_t base64_blksize_raw() { return BLKSIZE_RAW; } -static int +static size_t base64_blksize_enc() { return BLKSIZE_ENC; @@ -80,13 +81,13 @@ base64_blksize_enc() static size_t base64_encoded_length(size_t inputlen) { - return (BLKSIZE_ENC * inputlen) / BLKSIZE_RAW + ((BLKSIZE_ENC * inputlen) % BLKSIZE_RAW) ? 1 : 0; + return (BLKSIZE_ENC * inputlen) / BLKSIZE_RAW + (((BLKSIZE_ENC * inputlen) % BLKSIZE_RAW) ? 1 : 0); } static size_t base64_raw_length(size_t inputlen) { - return (BLKSIZE_RAW * inputlen) / BLKSIZE_ENC + ((BLKSIZE_RAW * inputlen) % BLKSIZE_ENC) ? 1 : 0; + return (BLKSIZE_RAW * inputlen) / BLKSIZE_ENC + (((BLKSIZE_RAW * inputlen) % BLKSIZE_ENC) ? 1 : 0); } inline static void @@ -105,8 +106,8 @@ base64_reverse_init() } } -static int -base64_encode(char *buf, size_t *buflen, const void *data, size_t size) +static size_t +base64_encode(uint8_t *buf, size_t *buflen, const uint8_t *udata, size_t size) /* * Fills *buf with max. *buflen characters, encoding size bytes of *data. * @@ -117,9 +118,8 @@ base64_encode(char *buf, size_t *buflen, const void *data, size_t size) * sets *buflen to : #bytes encoded from data */ { - unsigned char *udata = (unsigned char *) data; - int iout = 0; /* to-be-filled output char */ - int iin = 0; /* one more than last input byte that can be + size_t iout = 0; /* to-be-filled output char */ + size_t iin = 0; /* one more than last input byte that can be successfully decoded */ /* Note: Don't bother to optimize manually. GCC optimizes @@ -166,8 +166,8 @@ base64_encode(char *buf, size_t *buflen, const void *data, size_t size) #define REV64(x) rev64[(int) (x)] -static int -base64_decode(void *buf, size_t *buflen, const char *str, size_t slen) +static size_t +base64_decode(uint8_t *ubuf, size_t *buflen, const uint8_t *str, size_t slen) /* * Fills *buf with max. *buflen bytes, decoded from slen chars in *str. * Decoding stops early when *str contains \0. @@ -180,9 +180,8 @@ base64_decode(void *buf, size_t *buflen, const char *str, size_t slen) * return value : #bytes filled in buf (excluding \0) */ { - unsigned char *ubuf = (unsigned char *) buf; - int iout = 0; /* to-be-filled output byte */ - int iin = 0; /* next input char to use in decoding */ + size_t iout = 0; /* to-be-filled output byte */ + size_t iin = 0; /* next input char to use in decoding */ base64_reverse_init (); diff --git a/src/encoding.c b/src/encoding.c index 58530b4..3ebfa8b 100644 --- a/src/encoding.c +++ b/src/encoding.c @@ -20,52 +20,57 @@ #include "encoding.h" size_t -get_raw_length(size_t enc_bytes, struct encoder *enc, const char *topdomain) -/* Returns the maximum length of raw data that can be encoded into max_enc_bytes */ +get_raw_length_from_dns(size_t enc_bytes, struct encoder *enc, const char *topdomain) +/* Returns the maximum length of raw data that can be encoded into enc_bytes */ { - size_t enc_datalen = enc_bytes - strlen(topdomain); + /* 2 byte for something - seems necessary */ + size_t enc_datalen = enc_bytes - strlen(topdomain) - 2; /* Number of dots in length of encoded data */ - size_t dots = enc_datalen / (DNS_MAXLABEL + 1); + size_t dots = 1; if (!enc->eats_dots()) /* Dots are not included in encoded data length */ - enc_datalen -= dots; + dots += enc_datalen / (DNS_MAXLABEL); + enc_datalen -= dots; return enc->get_raw_length(enc_datalen); } size_t -get_encoded_length(size_t raw_bytes, struct encoder *enc, const char *topdomain) +get_encoded_dns_length(size_t raw_bytes, struct encoder *enc, const char *topdomain) /* Returns length of encoded data from original data length orig_len; */ { size_t dots = 1; /* dot before topdomain */ - size_t len = enc->get_encoded_length(raw_bytes) + strlen(topdomain); + size_t len = enc->get_encoded_length(raw_bytes); if (!enc->places_dots()) - dots += len / 63; /* number of dots needed in data */ - return len; + dots += len / DNS_MAXLABEL; /* number of dots needed in data */ + return len + dots + strlen(topdomain); } -int -build_hostname(char *buf, size_t buflen, const char *data, const size_t datalen, +size_t +build_hostname(uint8_t *buf, size_t buflen, const uint8_t *data, const size_t datalen, const char *topdomain, struct encoder *encoder, size_t maxlen, size_t header_len) /* Builds DNS-compatible hostname for data using specified encoder and topdomain - * NB: Does not account for header length. Data is encoded at start of buf to - * (buf + MIN(maxlen, buflen)). */ + * Encoded data is placed into buf. */ { - size_t space; - char *b; + size_t space, enc; + uint8_t *b; - space = get_encoded_length(MIN(maxlen, buflen), encoder, topdomain); - buf += header_len; buflen -= header_len; + buf += header_len; maxlen -= header_len; - memset(buf, 0, buflen); - encoder->encode(buf, &space, data, datalen); + maxlen = MIN(maxlen, buflen); + + /* 1 byte for dot before topdomain + 1 byte extra for something */ + space = maxlen - strlen(topdomain) - (maxlen / DNS_MAXLABEL) - 2; + + enc = encoder->encode(buf, &space, data, datalen); +// warnx("build_hostname: enc %lu, predicted %lu; maxlen %lu, header %lu, datalen %lu, space %lu", +// encdata_len, encoder->get_encoded_length(datalen), maxlen, header_len, datalen, space); if (!encoder->places_dots()) - inline_dotify(buf, buflen); + enc = inline_dotify(buf - header_len, buflen + header_len) - header_len; - b = buf; - b += strlen(buf); + b = buf + enc; /* move b back one step to see if the dot is there */ b--; @@ -74,48 +79,49 @@ build_hostname(char *buf, size_t buflen, const char *data, const size_t datalen, b++; /* move b ahead of the string so we can copy to it */ - strncpy(b, topdomain, strlen(topdomain)+1); + strncpy((char *)b, topdomain, strlen(topdomain)+1); +// warnx("build_hostname: host '%s' (sl %lu, actual %lu), topdomain '%s'", +// buf - header_len, strlen(buf - header_len), encdata_len + header_len + strlen(topdomain)+1, b); return space; } -int -unpack_data(char *buf, size_t buflen, char *data, size_t datalen, struct encoder *enc) +size_t +unpack_data(uint8_t *buf, size_t buflen, uint8_t *data, size_t datalen, struct encoder *enc) { if (!enc->eats_dots()) datalen = inline_undotify(data, datalen); return enc->decode(buf, &buflen, data, datalen); } -int -inline_dotify(char *buf, size_t buflen) +size_t +inline_dotify(uint8_t *buf, size_t buflen) { unsigned dots; - unsigned pos; - unsigned total; - char *reader, *writer; + size_t pos, total; + uint8_t *reader, *writer; - total = strlen(buf); - dots = total / 63; + total = strlen((char *)buf); + dots = total / DNS_MAXLABEL; writer = buf; writer += total; writer += dots; total += dots; - if (strlen(buf) + dots > buflen) { + if (strlen((char *)buf) + dots > buflen) { writer = buf; writer += buflen; total = buflen; } reader = writer - dots; - pos = (unsigned) (reader - buf) + 1; + pos = (reader - buf) + 1; while (dots) { *writer-- = *reader--; pos--; - if (pos % 63 == 0) { + if (pos % DNS_MAXLABEL == 0) { *writer-- = '.'; dots--; } @@ -125,12 +131,12 @@ inline_dotify(char *buf, size_t buflen) return total; } -int -inline_undotify(char *buf, size_t len) +size_t +inline_undotify(uint8_t *buf, size_t len) { - unsigned pos; + size_t pos; unsigned dots; - char *reader, *writer; + uint8_t *reader, *writer; writer = buf; reader = writer; diff --git a/src/encoding.h b/src/encoding.h index 584d697..aed7c28 100644 --- a/src/encoding.h +++ b/src/encoding.h @@ -18,6 +18,8 @@ #ifndef _ENCODING_H_ #define _ENCODING_H_ +#include + /* All-0, all-1, 01010101, 10101010: each 4 times to make sure the pattern spreads across multiple encoded chars -> 16 bytes total. Followed by 32 bytes from my /dev/random; should be enough. @@ -29,23 +31,23 @@ struct encoder { char name[8]; - int (*encode) (char *, size_t *, const void *, size_t); - int (*decode) (void *, size_t *, const char *, size_t); + size_t (*encode) (uint8_t *, size_t *, const uint8_t *, size_t); + size_t (*decode) (uint8_t *, size_t *, const uint8_t *, size_t); int (*places_dots) (void); int (*eats_dots) (void); - int (*blocksize_raw)(void); - int (*blocksize_encoded)(void); + size_t (*blocksize_raw)(void); + size_t (*blocksize_encoded)(void); size_t (*get_encoded_length)(size_t); size_t (*get_raw_length)(size_t); }; -size_t get_raw_length(size_t enc_bytes, struct encoder *enc, const char *topdomain); -size_t get_encoded_length(size_t raw_bytes, struct encoder *enc, const char *topdomain); +size_t get_raw_length_from_dns(size_t enc_bytes, struct encoder *enc, const char *topdomain); +size_t get_encoded_dns_length(size_t raw_bytes, struct encoder *enc, const char *topdomain); -int build_hostname(char *, size_t, const char *, const size_t, const char *, struct encoder *, size_t); -int unpack_data(char *, size_t, char *, size_t, struct encoder *); -int inline_dotify(char *, size_t); -int inline_undotify(char *, size_t); +size_t build_hostname(uint8_t *, size_t, const uint8_t *, const size_t, const char *, struct encoder *, size_t, size_t); +size_t unpack_data(uint8_t *, size_t, uint8_t *, size_t, struct encoder *); +size_t inline_dotify(uint8_t *, size_t); +size_t inline_undotify(uint8_t *, size_t); #endif /* _ENCODING_H_ */