Optimize stream for full-file writes

This commit is contained in:
topjohnwu 2021-11-23 18:08:14 -08:00
parent 048b2af0fc
commit 24984ea4f2
5 changed files with 50 additions and 23 deletions

View File

@ -19,14 +19,14 @@ uint64_t dyn_img_hdr::j64 = 0;
static void decompress(format_t type, int fd, const void *in, size_t size) { static void decompress(format_t type, int fd, const void *in, size_t size) {
auto ptr = get_decoder(type, make_unique<fd_stream>(fd)); auto ptr = get_decoder(type, make_unique<fd_stream>(fd));
ptr->write(in, size); ptr->write(in, size, true);
} }
static off_t compress(format_t type, int fd, const void *in, size_t size) { static off_t compress(format_t type, int fd, const void *in, size_t size) {
auto prev = lseek(fd, 0, SEEK_CUR); auto prev = lseek(fd, 0, SEEK_CUR);
{ {
auto strm = get_encoder(type, make_unique<fd_stream>(fd)); auto strm = get_encoder(type, make_unique<fd_stream>(fd));
strm->write(in, size); strm->write(in, size, true);
} }
auto now = lseek(fd, 0, SEEK_CUR); auto now = lseek(fd, 0, SEEK_CUR);
return now - prev; return now - prev;

View File

@ -17,7 +17,7 @@
using namespace std; using namespace std;
#define bwrite filter_stream::write #define bwrite base->write
constexpr size_t CHUNK = 0x40000; constexpr size_t CHUNK = 0x40000;
constexpr size_t LZ4_UNCOMPRESSED = 0x800000; constexpr size_t LZ4_UNCOMPRESSED = 0x800000;
@ -108,8 +108,7 @@ class zopfli_encoder : public chunk_out_stream {
public: public:
explicit zopfli_encoder(stream_ptr &&base) : explicit zopfli_encoder(stream_ptr &&base) :
chunk_out_stream(std::move(base), ZOPFLI_MASTER_BLOCK_SIZE), chunk_out_stream(std::move(base), ZOPFLI_MASTER_BLOCK_SIZE),
zo{}, out(nullptr), outsize(0), crc(crc32_z(0L, Z_NULL, 0)), zo{}, out(nullptr), outsize(0), crc(crc32_z(0L, Z_NULL, 0)), in_total(0), bp(0) {
in_total(0), bp(0), final(false) {
ZopfliInitOptions(&zo); ZopfliInitOptions(&zo);
// 5 iterations is reasonable for large files // 5 iterations is reasonable for large files
@ -130,7 +129,6 @@ public:
} }
~zopfli_encoder() override { ~zopfli_encoder() override {
final = true;
finalize(); finalize();
/* CRC */ /* CRC */
@ -150,7 +148,7 @@ public:
} }
protected: protected:
bool write_chunk(const void *buf, size_t len) override { bool write_chunk(const void *buf, size_t len, bool final) override {
if (len == 0) if (len == 0)
return true; return true;
@ -178,7 +176,6 @@ private:
unsigned long crc; unsigned long crc;
uint32_t in_total; uint32_t in_total;
unsigned char bp; unsigned char bp;
bool final;
}; };
class bz_strm : public out_stream { class bz_strm : public out_stream {
@ -465,7 +462,7 @@ public:
} }
protected: protected:
bool write_chunk(const void *buf, size_t len) override { bool write_chunk(const void *buf, size_t len, bool final) override {
// This is an error // This is an error
if (len != chunk_sz) if (len != chunk_sz)
return false; return false;
@ -514,22 +511,27 @@ public:
} }
protected: protected:
bool write_chunk(const void *buf, size_t len) override { bool write_chunk(const void *buf, size_t len, bool final) override {
int r = LZ4_compress_HC((const char *) buf, out_buf, len, LZ4_COMPRESSED, LZ4HC_CLEVEL_MAX); auto in = static_cast<const char *>(buf);
if (r == 0) { uint32_t block_sz = LZ4_compress_HC(in, out_buf, len, LZ4_COMPRESSED, LZ4HC_CLEVEL_MAX);
if (block_sz == 0) {
LOGW("LZ4HC compression failure\n"); LOGW("LZ4HC compression failure\n");
return false; return false;
} }
return bwrite(&r, sizeof(r)) && bwrite(out_buf, r); if (bwrite(&block_sz, sizeof(block_sz)) && bwrite(out_buf, block_sz)) {
in_total += sizeof(block_sz) + block_sz;
return true;
}
return false;
} }
private: private:
char *out_buf; char *out_buf;
bool lg; bool lg;
unsigned in_total; uint32_t in_total;
}; };
stream_ptr get_encoder(format_t type, stream_ptr &&base) { filter_strm_ptr get_encoder(format_t type, stream_ptr &&base) {
switch (type) { switch (type) {
case XZ: case XZ:
return make_unique<xz_encoder>(std::move(base)); return make_unique<xz_encoder>(std::move(base));
@ -551,7 +553,7 @@ stream_ptr get_encoder(format_t type, stream_ptr &&base) {
} }
} }
stream_ptr get_decoder(format_t type, stream_ptr &&base) { filter_strm_ptr get_decoder(format_t type, stream_ptr &&base) {
switch (type) { switch (type) {
case XZ: case XZ:
case LZMA: case LZMA:

View File

@ -4,9 +4,9 @@
#include "format.hpp" #include "format.hpp"
stream_ptr get_encoder(format_t type, stream_ptr &&base); filter_strm_ptr get_encoder(format_t type, stream_ptr &&base);
stream_ptr get_decoder(format_t type, stream_ptr &&base); filter_strm_ptr get_decoder(format_t type, stream_ptr &&base);
void compress(const char *method, const char *infile, const char *outfile); void compress(const char *method, const char *infile, const char *outfile);

View File

@ -26,6 +26,7 @@ public:
ssize_t read(void *buf, size_t len) override; ssize_t read(void *buf, size_t len) override;
bool write(const void *buf, size_t len) override; bool write(const void *buf, size_t len) override;
virtual bool write(const void *buf, size_t len, bool final);
// Seeking while filtering does not make sense // Seeking while filtering does not make sense
off_t seek(off_t off, int whence) final { return stream::seek(off, whence); } off_t seek(off_t off, int whence) final { return stream::seek(off, whence); }
@ -34,6 +35,8 @@ protected:
stream_ptr base; stream_ptr base;
}; };
using filter_strm_ptr = std::unique_ptr<filter_stream>;
// Buffered output stream, writing in chunks // Buffered output stream, writing in chunks
class chunk_out_stream : public filter_stream { class chunk_out_stream : public filter_stream {
public: public:
@ -48,11 +51,12 @@ public:
// Reading does not make sense // Reading does not make sense
ssize_t read(void *buf, size_t len) final { return stream::read(buf, len); } ssize_t read(void *buf, size_t len) final { return stream::read(buf, len); }
bool write(const void *buf, size_t len) final; bool write(const void *buf, size_t len) final;
bool write(const void *buf, size_t len, bool final) final;
protected: protected:
// Classes inheriting this class has to call finalize() in its destructor // Classes inheriting this class has to call finalize() in its destructor
void finalize(); void finalize();
virtual bool write_chunk(const void *buf, size_t len) = 0; virtual bool write_chunk(const void *buf, size_t len, bool final) = 0;
size_t chunk_sz; size_t chunk_sz;

View File

@ -107,13 +107,21 @@ bool filter_stream::write(const void *buf, size_t len) {
return base->write(buf, len); return base->write(buf, len);
} }
bool chunk_out_stream::write(const void *_in, size_t len) { bool filter_stream::write(const void *buf, size_t len, bool final) {
return write(buf, len);
}
bool chunk_out_stream::write(const void *buf, size_t len) {
return write(buf, len, false);
}
bool chunk_out_stream::write(const void *_in, size_t len, bool final) {
auto in = static_cast<const uint8_t *>(_in); auto in = static_cast<const uint8_t *>(_in);
while (len) { while (len) {
if (buf_off + len >= chunk_sz) { if (buf_off + len >= chunk_sz) {
// Enough input for a chunk
const uint8_t *src; const uint8_t *src;
if (buf_off) { if (buf_off) {
// Copy the rest of the chunk to internal buffer
src = _buf; src = _buf;
auto copy = chunk_sz - buf_off; auto copy = chunk_sz - buf_off;
memcpy(_buf + buf_off, in, copy); memcpy(_buf + buf_off, in, copy);
@ -125,8 +133,21 @@ bool chunk_out_stream::write(const void *_in, size_t len) {
in += chunk_sz; in += chunk_sz;
len -= chunk_sz; len -= chunk_sz;
} }
if (!write_chunk(src, chunk_sz)) if (!write_chunk(src, chunk_sz, final && len == 0))
return false; return false;
} else if (final) {
// Final input data, write regardless whether it is chunk sized
if (buf_off) {
memcpy(_buf + buf_off, in, len);
auto avail = buf_off + len;
buf_off = 0;
if (!write_chunk(_buf, avail, true))
return false;
} else {
if (!write_chunk(in, len, true))
return false;
}
break;
} else { } else {
// Buffer internally // Buffer internally
if (!_buf) { if (!_buf) {
@ -142,7 +163,7 @@ bool chunk_out_stream::write(const void *_in, size_t len) {
void chunk_out_stream::finalize() { void chunk_out_stream::finalize() {
if (buf_off) { if (buf_off) {
write_chunk(_buf, buf_off); write_chunk(_buf, buf_off, true);
delete[] _buf; delete[] _buf;
_buf = nullptr; _buf = nullptr;
buf_off = 0; buf_off = 0;