Unsigned types for encoding data

This commit is contained in:
frekky 2015-08-29 13:03:08 +08:00
parent ce0c0dc03b
commit 02c2763c26
5 changed files with 122 additions and 120 deletions

View File

@ -42,22 +42,23 @@
* accent chars since they might readily be entered in normal use,
* don't use 254-255 because of possible function overloading in DNS systems.
*/
static const unsigned char cb128[] =
static const uint8_t cb128[] =
"abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789"
"\274\275\276\277"
"\300\301\302\303\304\305\306\307\310\311\312\313\314\315\316\317"
"\320\321\322\323\324\325\326\327\330\331\332\333\334\335\336\337"
"\340\341\342\343\344\345\346\347\350\351\352\353\354\355\356\357"
"\360\361\362\363\364\365\366\367\370\371\372\373\374\375";
static unsigned char rev128[256];
static uint8_t rev128[256];
static int reverse_init = 0;
static int base128_encode(char *, size_t *, const void *, size_t);
static int base128_decode(void *, size_t *, const char *, size_t);
static size_t base128_encode(uint8_t *, size_t *, const uint8_t *, size_t);
static size_t base128_decode(uint8_t *, size_t *, const uint8_t *, size_t);
static int base128_handles_dots();
static int base128_blksize_raw();
static int base128_blksize_enc();
static size_t base128_blksize_raw();
static size_t base128_blksize_enc();
static size_t base128_encoded_length(size_t inputlen);
static size_t base128_raw_length(size_t inputlen);
static struct encoder base128_encoder =
{
@ -84,13 +85,13 @@ base128_handles_dots()
return 0;
}
static int
static size_t
base128_blksize_raw()
{
return BLKSIZE_RAW;
}
static int
static size_t
base128_blksize_enc()
{
return BLKSIZE_ENC;
@ -99,13 +100,13 @@ base128_blksize_enc()
static size_t
base128_encoded_length(size_t inputlen)
{
return (BLKSIZE_ENC * inputlen) / BLKSIZE_RAW + ((BLKSIZE_ENC * inputlen) % BLKSIZE_RAW) ? 1 : 0;
return (BLKSIZE_ENC * inputlen) / BLKSIZE_RAW + (((BLKSIZE_ENC * inputlen) % BLKSIZE_RAW) ? 1 : 0);
}
static size_t
base128_raw_length(size_t inputlen)
{
return (BLKSIZE_RAW * inputlen) / BLKSIZE_ENC + ((BLKSIZE_RAW * inputlen) % BLKSIZE_ENC) ? 1 : 0;
return (BLKSIZE_RAW * inputlen) / BLKSIZE_ENC + (((BLKSIZE_RAW * inputlen) % BLKSIZE_ENC) ? 1 : 0);
}
inline static void
@ -124,8 +125,8 @@ base128_reverse_init()
}
}
static int
base128_encode(char *buf, size_t *buflen, const void *data, size_t size)
static size_t
base128_encode(uint8_t *ubuf, size_t *buflen, const uint8_t *udata, size_t size)
/*
* Fills *buf with max. *buflen characters, encoding size bytes of *data.
*
@ -136,10 +137,8 @@ base128_encode(char *buf, size_t *buflen, const void *data, size_t size)
* sets *buflen to : #bytes encoded from data
*/
{
unsigned char *ubuf = (unsigned char *) buf;
unsigned char *udata = (unsigned char *) data;
int iout = 0; /* to-be-filled output char */
int iin = 0; /* one more than last input byte that can be
size_t iout = 0; /* to-be-filled output char */
size_t iin = 0; /* one more than last input byte that can be
successfully decoded */
/* Note: Don't bother to optimize manually. GCC optimizes
@ -218,8 +217,8 @@ base128_encode(char *buf, size_t *buflen, const void *data, size_t size)
#define REV128(x) rev128[(int) (x)]
static int
base128_decode(void *buf, size_t *buflen, const char *str, size_t slen)
static size_t
base128_decode(uint8_t *buf, size_t *buflen, const uint8_t *str, size_t slen)
/*
* Fills *buf with max. *buflen bytes, decoded from slen chars in *str.
* Decoding stops early when *str contains \0.
@ -232,8 +231,6 @@ base128_decode(void *buf, size_t *buflen, const char *str, size_t slen)
* return value : #bytes filled in buf (excluding \0)
*/
{
unsigned char *ustr = (unsigned char *) str;
unsigned char *ubuf = (unsigned char *) buf;
int iout = 0; /* to-be-filled output byte */
int iin = 0; /* next input char to use in decoding */
@ -246,61 +243,61 @@ base128_decode(void *buf, size_t *buflen, const char *str, size_t slen)
if (iout >= *buflen || iin + 1 >= slen ||
str[iin] == '\0' || str[iin + 1] == '\0')
break;
ubuf[iout] = ((REV128(ustr[iin]) & 0x7f) << 1) |
((REV128(ustr[iin + 1]) & 0x40) >> 6);
buf[iout] = ((REV128(str[iin]) & 0x7f) << 1) |
((REV128(str[iin + 1]) & 0x40) >> 6);
iin++; /* 0 used up, iin=1 */
iout++;
if (iout >= *buflen || iin + 1 >= slen ||
str[iin] == '\0' || str[iin + 1] == '\0')
break;
ubuf[iout] = ((REV128(ustr[iin]) & 0x3f) << 2) |
((REV128(ustr[iin + 1]) & 0x60) >> 5);
buf[iout] = ((REV128(str[iin]) & 0x3f) << 2) |
((REV128(str[iin + 1]) & 0x60) >> 5);
iin++; /* 1 used up, iin=2 */
iout++;
if (iout >= *buflen || iin + 1 >= slen ||
str[iin] == '\0' || str[iin + 1] == '\0')
break;
ubuf[iout] = ((REV128(ustr[iin]) & 0x1f) << 3) |
((REV128(ustr[iin + 1]) & 0x70) >> 4);
buf[iout] = ((REV128(str[iin]) & 0x1f) << 3) |
((REV128(str[iin + 1]) & 0x70) >> 4);
iin++; /* 2 used up, iin=3 */
iout++;
if (iout >= *buflen || iin + 1 >= slen ||
str[iin] == '\0' || str[iin + 1] == '\0')
break;
ubuf[iout] = ((REV128(ustr[iin]) & 0x0f) << 4) |
((REV128(ustr[iin + 1]) & 0x78) >> 3);
buf[iout] = ((REV128(str[iin]) & 0x0f) << 4) |
((REV128(str[iin + 1]) & 0x78) >> 3);
iin++; /* 3 used up, iin=4 */
iout++;
if (iout >= *buflen || iin + 1 >= slen ||
str[iin] == '\0' || str[iin + 1] == '\0')
break;
ubuf[iout] = ((REV128(ustr[iin]) & 0x07) << 5) |
((REV128(ustr[iin + 1]) & 0x7c) >> 2);
buf[iout] = ((REV128(str[iin]) & 0x07) << 5) |
((REV128(str[iin + 1]) & 0x7c) >> 2);
iin++; /* 4 used up, iin=5 */
iout++;
if (iout >= *buflen || iin + 1 >= slen ||
str[iin] == '\0' || str[iin + 1] == '\0')
break;
ubuf[iout] = ((REV128(ustr[iin]) & 0x03) << 6) |
((REV128(ustr[iin + 1]) & 0x7e) >> 1);
buf[iout] = ((REV128(str[iin]) & 0x03) << 6) |
((REV128(str[iin + 1]) & 0x7e) >> 1);
iin++; /* 5 used up, iin=6 */
iout++;
if (iout >= *buflen || iin + 1 >= slen ||
str[iin] == '\0' || str[iin + 1] == '\0')
break;
ubuf[iout] = ((REV128(ustr[iin]) & 0x01) << 7) |
((REV128(ustr[iin + 1]) & 0x7f));
buf[iout] = ((REV128(str[iin]) & 0x01) << 7) |
((REV128(str[iin + 1]) & 0x7f));
iin += 2; /* 6,7 used up, iin=8 */
iout++;
}
ubuf[iout] = '\0';
buf[iout] = '\0';
return iout;
}

View File

@ -33,11 +33,11 @@ static const char cb32_ucase[] =
static unsigned char rev32[256];
static int reverse_init = 0;
static int base32_encode(char *, size_t *, const void *, size_t);
static int base32_decode(void *, size_t *, const char *, size_t);
static size_t base32_encode(uint8_t *, size_t *, const uint8_t *, size_t);
static size_t base32_decode(uint8_t *, size_t *, const uint8_t *, size_t);
static int base32_handles_dots();
static int base32_blksize_raw();
static int base32_blksize_enc();
static size_t base32_blksize_raw();
static size_t base32_blksize_enc();
static size_t base32_encoded_length(size_t inputlen);
static size_t base32_raw_length(size_t inputlen);
@ -67,13 +67,13 @@ base32_handles_dots()
return 0;
}
static int
static size_t
base32_blksize_raw()
{
return BLKSIZE_RAW;
}
static int
static size_t
base32_blksize_enc()
{
return BLKSIZE_ENC;
@ -82,13 +82,13 @@ base32_blksize_enc()
static size_t
base32_encoded_length(size_t inputlen)
{
return (BLKSIZE_ENC * inputlen) / BLKSIZE_RAW + ((BLKSIZE_ENC * inputlen) % BLKSIZE_RAW) ? 1 : 0;
return (BLKSIZE_ENC * inputlen) / BLKSIZE_RAW + (((BLKSIZE_ENC * inputlen) % BLKSIZE_RAW) ? 1 : 0);
}
static size_t
base32_raw_length(size_t inputlen)
{
return (BLKSIZE_RAW * inputlen) / BLKSIZE_ENC + ((BLKSIZE_RAW * inputlen) % BLKSIZE_ENC) ? 1 : 0;
return (BLKSIZE_RAW * inputlen) / BLKSIZE_ENC + (((BLKSIZE_RAW * inputlen) % BLKSIZE_ENC) ? 1 : 0);
}
@ -123,8 +123,8 @@ b32_8to5(int in)
return rev32[in];
}
static int
base32_encode(char *buf, size_t *buflen, const void *data, size_t size)
static size_t
base32_encode(uint8_t *buf, size_t *buflen, const uint8_t *udata, size_t size)
/*
* Fills *buf with max. *buflen characters, encoding size bytes of *data.
*
@ -135,9 +135,8 @@ base32_encode(char *buf, size_t *buflen, const void *data, size_t size)
* sets *buflen to : #bytes encoded from data
*/
{
unsigned char *udata = (unsigned char *) data;
int iout = 0; /* to-be-filled output char */
int iin = 0; /* one more than last input byte that can be
size_t iout = 0; /* to-be-filled output char */
size_t iin = 0; /* one more than last input byte that can be
successfully decoded */
/* Note: Don't bother to optimize manually. GCC optimizes
@ -214,8 +213,8 @@ base32_encode(char *buf, size_t *buflen, const void *data, size_t size)
#define REV32(x) rev32[(int) (x)]
static int
base32_decode(void *buf, size_t *buflen, const char *str, size_t slen)
static size_t
base32_decode(uint8_t *ubuf, size_t *buflen, const uint8_t *str, size_t slen)
/*
* Fills *buf with max. *buflen bytes, decoded from slen chars in *str.
* Decoding stops early when *str contains \0.
@ -228,9 +227,8 @@ base32_decode(void *buf, size_t *buflen, const char *str, size_t slen)
* return value : #bytes filled in buf (excluding \0)
*/
{
unsigned char *ubuf = (unsigned char *) buf;
int iout = 0; /* to-be-filled output byte */
int iin = 0; /* next input char to use in decoding */
size_t iout = 0; /* to-be-filled output byte */
size_t iin = 0; /* next input char to use in decoding */
base32_reverse_init ();

View File

@ -33,12 +33,13 @@ static const char cb64[] =
static unsigned char rev64[256];
static int reverse_init = 0;
static int base64_encode(char *, size_t *, const void *, size_t);
static int base64_decode(void *, size_t *, const char *, size_t);
static size_t base64_encode(uint8_t *, size_t *, const uint8_t *, size_t);
static size_t base64_decode(uint8_t *, size_t *, const uint8_t *, size_t);
static int base64_handles_dots();
static int base64_blksize_raw();
static int base64_blksize_enc();
static size_t base64_blksize_raw();
static size_t base64_blksize_enc();
static size_t base64_encoded_length(size_t inputlen);
static size_t base64_raw_length(size_t inputlen);
static struct encoder base64_encoder =
{
@ -65,13 +66,13 @@ base64_handles_dots()
return 0;
}
static int
static size_t
base64_blksize_raw()
{
return BLKSIZE_RAW;
}
static int
static size_t
base64_blksize_enc()
{
return BLKSIZE_ENC;
@ -80,13 +81,13 @@ base64_blksize_enc()
static size_t
base64_encoded_length(size_t inputlen)
{
return (BLKSIZE_ENC * inputlen) / BLKSIZE_RAW + ((BLKSIZE_ENC * inputlen) % BLKSIZE_RAW) ? 1 : 0;
return (BLKSIZE_ENC * inputlen) / BLKSIZE_RAW + (((BLKSIZE_ENC * inputlen) % BLKSIZE_RAW) ? 1 : 0);
}
static size_t
base64_raw_length(size_t inputlen)
{
return (BLKSIZE_RAW * inputlen) / BLKSIZE_ENC + ((BLKSIZE_RAW * inputlen) % BLKSIZE_ENC) ? 1 : 0;
return (BLKSIZE_RAW * inputlen) / BLKSIZE_ENC + (((BLKSIZE_RAW * inputlen) % BLKSIZE_ENC) ? 1 : 0);
}
inline static void
@ -105,8 +106,8 @@ base64_reverse_init()
}
}
static int
base64_encode(char *buf, size_t *buflen, const void *data, size_t size)
static size_t
base64_encode(uint8_t *buf, size_t *buflen, const uint8_t *udata, size_t size)
/*
* Fills *buf with max. *buflen characters, encoding size bytes of *data.
*
@ -117,9 +118,8 @@ base64_encode(char *buf, size_t *buflen, const void *data, size_t size)
* sets *buflen to : #bytes encoded from data
*/
{
unsigned char *udata = (unsigned char *) data;
int iout = 0; /* to-be-filled output char */
int iin = 0; /* one more than last input byte that can be
size_t iout = 0; /* to-be-filled output char */
size_t iin = 0; /* one more than last input byte that can be
successfully decoded */
/* Note: Don't bother to optimize manually. GCC optimizes
@ -166,8 +166,8 @@ base64_encode(char *buf, size_t *buflen, const void *data, size_t size)
#define REV64(x) rev64[(int) (x)]
static int
base64_decode(void *buf, size_t *buflen, const char *str, size_t slen)
static size_t
base64_decode(uint8_t *ubuf, size_t *buflen, const uint8_t *str, size_t slen)
/*
* Fills *buf with max. *buflen bytes, decoded from slen chars in *str.
* Decoding stops early when *str contains \0.
@ -180,9 +180,8 @@ base64_decode(void *buf, size_t *buflen, const char *str, size_t slen)
* return value : #bytes filled in buf (excluding \0)
*/
{
unsigned char *ubuf = (unsigned char *) buf;
int iout = 0; /* to-be-filled output byte */
int iin = 0; /* next input char to use in decoding */
size_t iout = 0; /* to-be-filled output byte */
size_t iin = 0; /* next input char to use in decoding */
base64_reverse_init ();

View File

@ -20,52 +20,57 @@
#include "encoding.h"
size_t
get_raw_length(size_t enc_bytes, struct encoder *enc, const char *topdomain)
/* Returns the maximum length of raw data that can be encoded into max_enc_bytes */
get_raw_length_from_dns(size_t enc_bytes, struct encoder *enc, const char *topdomain)
/* Returns the maximum length of raw data that can be encoded into enc_bytes */
{
size_t enc_datalen = enc_bytes - strlen(topdomain);
/* 2 byte for something - seems necessary */
size_t enc_datalen = enc_bytes - strlen(topdomain) - 2;
/* Number of dots in length of encoded data */
size_t dots = enc_datalen / (DNS_MAXLABEL + 1);
size_t dots = 1;
if (!enc->eats_dots()) /* Dots are not included in encoded data length */
dots += enc_datalen / (DNS_MAXLABEL);
enc_datalen -= dots;
return enc->get_raw_length(enc_datalen);
}
size_t
get_encoded_length(size_t raw_bytes, struct encoder *enc, const char *topdomain)
get_encoded_dns_length(size_t raw_bytes, struct encoder *enc, const char *topdomain)
/* Returns length of encoded data from original data length orig_len; */
{
size_t dots = 1; /* dot before topdomain */
size_t len = enc->get_encoded_length(raw_bytes) + strlen(topdomain);
size_t len = enc->get_encoded_length(raw_bytes);
if (!enc->places_dots())
dots += len / 63; /* number of dots needed in data */
return len;
dots += len / DNS_MAXLABEL; /* number of dots needed in data */
return len + dots + strlen(topdomain);
}
int
build_hostname(char *buf, size_t buflen, const char *data, const size_t datalen,
size_t
build_hostname(uint8_t *buf, size_t buflen, const uint8_t *data, const size_t datalen,
const char *topdomain, struct encoder *encoder, size_t maxlen, size_t header_len)
/* Builds DNS-compatible hostname for data using specified encoder and topdomain
* NB: Does not account for header length. Data is encoded at start of buf to
* (buf + MIN(maxlen, buflen)). */
* Encoded data is placed into buf. */
{
size_t space;
char *b;
size_t space, enc;
uint8_t *b;
space = get_encoded_length(MIN(maxlen, buflen), encoder, topdomain);
buf += header_len;
buflen -= header_len;
buf += header_len;
maxlen -= header_len;
memset(buf, 0, buflen);
encoder->encode(buf, &space, data, datalen);
maxlen = MIN(maxlen, buflen);
/* 1 byte for dot before topdomain + 1 byte extra for something */
space = maxlen - strlen(topdomain) - (maxlen / DNS_MAXLABEL) - 2;
enc = encoder->encode(buf, &space, data, datalen);
// warnx("build_hostname: enc %lu, predicted %lu; maxlen %lu, header %lu, datalen %lu, space %lu",
// encdata_len, encoder->get_encoded_length(datalen), maxlen, header_len, datalen, space);
if (!encoder->places_dots())
inline_dotify(buf, buflen);
enc = inline_dotify(buf - header_len, buflen + header_len) - header_len;
b = buf;
b += strlen(buf);
b = buf + enc;
/* move b back one step to see if the dot is there */
b--;
@ -74,48 +79,49 @@ build_hostname(char *buf, size_t buflen, const char *data, const size_t datalen,
b++;
/* move b ahead of the string so we can copy to it */
strncpy(b, topdomain, strlen(topdomain)+1);
strncpy((char *)b, topdomain, strlen(topdomain)+1);
// warnx("build_hostname: host '%s' (sl %lu, actual %lu), topdomain '%s'",
// buf - header_len, strlen(buf - header_len), encdata_len + header_len + strlen(topdomain)+1, b);
return space;
}
int
unpack_data(char *buf, size_t buflen, char *data, size_t datalen, struct encoder *enc)
size_t
unpack_data(uint8_t *buf, size_t buflen, uint8_t *data, size_t datalen, struct encoder *enc)
{
if (!enc->eats_dots())
datalen = inline_undotify(data, datalen);
return enc->decode(buf, &buflen, data, datalen);
}
int
inline_dotify(char *buf, size_t buflen)
size_t
inline_dotify(uint8_t *buf, size_t buflen)
{
unsigned dots;
unsigned pos;
unsigned total;
char *reader, *writer;
size_t pos, total;
uint8_t *reader, *writer;
total = strlen(buf);
dots = total / 63;
total = strlen((char *)buf);
dots = total / DNS_MAXLABEL;
writer = buf;
writer += total;
writer += dots;
total += dots;
if (strlen(buf) + dots > buflen) {
if (strlen((char *)buf) + dots > buflen) {
writer = buf;
writer += buflen;
total = buflen;
}
reader = writer - dots;
pos = (unsigned) (reader - buf) + 1;
pos = (reader - buf) + 1;
while (dots) {
*writer-- = *reader--;
pos--;
if (pos % 63 == 0) {
if (pos % DNS_MAXLABEL == 0) {
*writer-- = '.';
dots--;
}
@ -125,12 +131,12 @@ inline_dotify(char *buf, size_t buflen)
return total;
}
int
inline_undotify(char *buf, size_t len)
size_t
inline_undotify(uint8_t *buf, size_t len)
{
unsigned pos;
size_t pos;
unsigned dots;
char *reader, *writer;
uint8_t *reader, *writer;
writer = buf;
reader = writer;

View File

@ -18,6 +18,8 @@
#ifndef _ENCODING_H_
#define _ENCODING_H_
#include <stdint.h>
/* All-0, all-1, 01010101, 10101010: each 4 times to make sure the pattern
spreads across multiple encoded chars -> 16 bytes total.
Followed by 32 bytes from my /dev/random; should be enough.
@ -29,23 +31,23 @@
struct encoder {
char name[8];
int (*encode) (char *, size_t *, const void *, size_t);
int (*decode) (void *, size_t *, const char *, size_t);
size_t (*encode) (uint8_t *, size_t *, const uint8_t *, size_t);
size_t (*decode) (uint8_t *, size_t *, const uint8_t *, size_t);
int (*places_dots) (void);
int (*eats_dots) (void);
int (*blocksize_raw)(void);
int (*blocksize_encoded)(void);
size_t (*blocksize_raw)(void);
size_t (*blocksize_encoded)(void);
size_t (*get_encoded_length)(size_t);
size_t (*get_raw_length)(size_t);
};
size_t get_raw_length(size_t enc_bytes, struct encoder *enc, const char *topdomain);
size_t get_encoded_length(size_t raw_bytes, struct encoder *enc, const char *topdomain);
size_t get_raw_length_from_dns(size_t enc_bytes, struct encoder *enc, const char *topdomain);
size_t get_encoded_dns_length(size_t raw_bytes, struct encoder *enc, const char *topdomain);
int build_hostname(char *, size_t, const char *, const size_t, const char *, struct encoder *, size_t);
int unpack_data(char *, size_t, char *, size_t, struct encoder *);
int inline_dotify(char *, size_t);
int inline_undotify(char *, size_t);
size_t build_hostname(uint8_t *, size_t, const uint8_t *, const size_t, const char *, struct encoder *, size_t, size_t);
size_t unpack_data(uint8_t *, size_t, uint8_t *, size_t, struct encoder *);
size_t inline_dotify(uint8_t *, size_t);
size_t inline_undotify(uint8_t *, size_t);
#endif /* _ENCODING_H_ */