MESSAGE
DATE | 2016-12-15 |
FROM | Christopher League
|
SUBJECT | Subject: [Learn] zlib demo with vector::resize
|
From learn-bounces-at-nylxs.com Thu Dec 15 17:49:11 2016 Return-Path: X-Original-To: archive-at-mrbrklyn.com Delivered-To: archive-at-mrbrklyn.com Received: from www.mrbrklyn.com (www.mrbrklyn.com [96.57.23.82]) by mrbrklyn.com (Postfix) with ESMTP id 77B35161312; Thu, 15 Dec 2016 17:49:11 -0500 (EST) X-Original-To: learn-at-nylxs.com Delivered-To: learn-at-nylxs.com Received: from liucs.net (contrapunctus.net [174.136.110.10]) by mrbrklyn.com (Postfix) with ESMTP id 7C858160E77 for ; Thu, 15 Dec 2016 17:49:06 -0500 (EST) Received: from localhost (112.sub-70-214-66.myvzw.com [70.214.66.112]) by liucs.net (Postfix) with ESMTPSA id 11E9FE096 for ; Thu, 15 Dec 2016 17:49:03 -0500 (EST) From: Christopher League To: learn-at-nylxs.com User-Agent: Notmuch/0.21 (http://notmuchmail.org) Emacs/25.1.1 (x86_64-unknown-linux-gnu) Date: Thu, 15 Dec 2016 16:33:31 -0500 Message-ID: <87r35898t0.fsf-at-contrapunctus.net> MIME-Version: 1.0 Content-Type: multipart/mixed; boundary="=-=-=" Subject: [Learn] zlib demo with vector::resize X-BeenThere: learn-at-nylxs.com X-Mailman-Version: 2.1.17 Precedence: list List-Id: List-Unsubscribe: , List-Archive: List-Post: List-Help: List-Subscribe: , Errors-To: learn-bounces-at-nylxs.com Sender: "Learn"
--=-=-= Content-Type: multipart/alternative; boundary="==-=-="
--==-=-= Content-Type: text/plain
Here's a cool little demo that generates some pseudo-random chars in a buffer (represented by `vector`), compresses them using zlib, decompresses, and checks that the final result matches the input.
Thanks Ruben, for the afternoon hack. :/
CL
~~~~ {.cpp} // zdemo // % make zdemo CXXFLAGS="-g -Wall" LDFLAGS=-lz #include #include #include #include #include #include "zlib.h" using namespace std;
// This many bytes per kilobyte (kibibyte) const unsigned KIBI = 1024;
// Since the deflating and inflating procedures have a lot in common, this is // an attempt to capture the commonality. Yes I wrote a class, try not to // faint. class Flater { public: const int LEVEL = Z_DEFAULT_COMPRESSION; const bool TRACE_RESIZES = true; const unsigned MINIMAL_CHUNK = 16 * KIBI; // Start point for output size Flater(); virtual ~Flater() { }; void run(vector& in, vector& out); protected: virtual int flate() = 0; z_stream zs; };
Flater::Flater() { zs.zalloc = Z_NULL; zs.zfree = Z_NULL; zs.opaque = Z_NULL; }
// Here's the generic algorithm. The input buffer is fully available. The // output buffer doubles its size as needed. It can start out empty, and we'll // allocate MINIMAL_CHUNK bytes up front. void Flater::run(vector& in, vector& out) { zs.avail_in = in.size(); // #bytes available to be read zs.next_in = in.data(); // ptr to bytes available to be read
unsigned int bytes_written = 0; do { out.resize(max(MINIMAL_CHUNK, 2*(unsigned)out.size())); if(TRACE_RESIZES) { cout << " Resize " << out.size() << '\n'; }
zs.avail_out = out.size() - bytes_written; zs.next_out = out.data() + bytes_written;
int r = flate(); assert(Z_STREAM_ERROR != r);
bytes_written = out.size() - zs.avail_out;
} while (zs.avail_out == 0); assert(zs.avail_in == 0); // All input will be used
out.resize(bytes_written); // Reduce size if(TRACE_RESIZES) { cout << " Finish " << out.size() << '\n'; } }
////////////////////////////////////////////////////////////////
// Here's the customizations for deflating (compression).
class Deflater : public Flater { public: Deflater(); ~Deflater(); int flate(); };
Deflater::Deflater() { int r = deflateInit(&zs, LEVEL); assert(Z_OK == r); }
int Deflater::flate() { return deflate(&zs, Z_FINISH); }
Deflater::~Deflater() { deflateEnd(&zs); }
////////////////////////////////////////////////////////////////
// Here's the customizations for inflating (decompression).
class Inflater : public Flater { public: Inflater(); ~Inflater(); int flate(); };
Inflater::Inflater() { int r = inflateInit(&zs); assert(Z_OK == r); }
int Inflater::flate() { return inflate(&zs, Z_FINISH); }
Inflater::~Inflater() { inflateEnd(&zs); }
////////////////////////////////////////////////////////////////
// For testing purposes, fill buffer with a mixture of lowercase letters and // random bytes. If the data is completely uniform-random in the range 0-255, // then we wouldn't be able to compress. Having a significant portion of // lower-case letters (maybe) makes it more compressible. void generate_data(vector& buffer) { assert(buffer.size() > 0); const unsigned RANDOM_RATIO = 10; srand(time(0)); for(unsigned i = 0; i < buffer.size(); i++) { buffer[i] = (rand()%RANDOM_RATIO == 0) ? rand() : 'a' + (rand()%26); } }
// Print sizes, and first several bytes in hex and ASCII, similar to a hex // dump. void output_sample(const vector& buffer) { cout << " " << buffer.size() << " bytes (~" << buffer.size()/KIBI << "K, ~" << buffer.size()/KIBI/KIBI << "M)\n";
const unsigned SEVERAL = min(16, (int)buffer.size()); cout.fill('0'); cout << hex << " "; for(unsigned i = 0; i < SEVERAL; i++) { cout << setw(2) << (int)buffer[i] << ' '; } for(unsigned i = 0; i < SEVERAL; i++) { cout << (char)(isprint(buffer[i])? buffer[i] : '.'); } cout << dec << '\n'; }
////////////////////////////////////////////////////////////////
int main() { const float INPUT_SIZE_MB = 25.7; const unsigned INPUT_SIZE_BYTES = INPUT_SIZE_MB * KIBI * KIBI;
cout << "Generating some data...\n"; vector original(INPUT_SIZE_BYTES); generate_data(original); output_sample(original);
cout << "Compressing...\n"; vector compressed; Deflater().run(original, compressed); output_sample(compressed);
double ratio = (double)compressed.size() / original.size(); cout << "Compression ratio: " << fixed << setprecision(3) << ratio << '\n';
vector copy; cout << "Decompressing...\n"; Inflater().run(compressed, copy); output_sample(copy);
assert(copy.size() == INPUT_SIZE_BYTES); assert(copy == original); cout << "Exact match!\n";
return 0; } ~~~~
--==-=-= Content-Type: text/html; charset=utf-8 Content-Transfer-Encoding: quoted-printable
1.0, user-scalable=3Dyes">
Here=E2=80=99s a cool little demo that generates some pseudo-random char= s in a buffer (represented by vector<uint8_t> ), compress= es them using zlib, decompresses, and checks that the final result matches = the input.
Thanks Ruben, for the afternoon hack. :/
CL
ceCode cpp">// zdemo // % make zdemo CXXFLAGS=3D"-g -Wall" LDFLAGS= =3D-lz #include <iostream> #include <iomanip> #include <vector> #include <cassert> #include <cstdint> #include "zlib.h" using namespace std;
// This many bytes per kilobyte (kibibyte) const unsigned KIBI =3D= 1024;
// Since the deflating and inflating procedures have a l= ot in common, this is // an attempt to capture the commonality. Yes I wrote a = class, try not to // faint. class Flater { public: const int LEVEL =3D= Z_DEFAULT_COMPRESSION; const bool TRACE_RE= SIZES =3D true; const unsigned MINI= MAL_CHUNK =3D 16 * KIBI; // St= art point for output size Flater(); virtual ~Flater() { }; void run(vector<uint8_t= >& in, vector<uint8_t>& o= ut); protected: virtual int flate()= =3D 0; z_stream zs; };
Flater::Flater() { zs.zalloc =3D Z_NULL; zs.zfree =3D Z_NULL; zs.opaque =3D Z_NULL; }
// Here's the generic algorithm. The input buffer is= fully available. The // output buffer doubles its size as needed. It can star= t out empty, and we'll // allocate MINIMAL_CHUNK bytes up front. void Flater::run(vector<uin= t8_t>& in, vector<uint8_t>&am= p; out) { zs.avail_in =3D in.size(); // #bytes available to = be read zs.next_in =3D in.data(); // ptr to bytes availab= le to be read
unsigned int bytes_= written =3D 0; do { out.resize(max(MINIMAL_CHUNK, 2*(ass=3D"dt">unsigned)out.size())); if(TRACE_RESIZES) { cout << " Resize " &= lt;< out.size() << 'h">\n'; }
zs.avail_out =3D out.size() - bytes_written; zs.next_out =3D out.data() + bytes_written;
int r =3D flate(); assert(Z_STREAM_ERROR !=3D r);
bytes_written =3D out.size() - zs.avail_out;
} while (zs.avail_out =3D=3D v">0); assert(zs.avail_in =3D=3D 0); =3D"co">// All input will be used
out.resize(bytes_written); // Reduce size if(TRACE_RESIZES) { cout << " Finish " <&= lt; out.size() << '\= n'; } }
////////////////////////////////////////////////////////= ////////
// Here's the customizations for deflating (compress= ion).
class Deflater : public= Flater { public: Deflater(); ~Deflater(); int flate(); };
Deflater::Deflater() { int r =3D deflateInit(&zs, LEVEL); assert(Z_OK =3D=3D r); }
int Deflater::flate() { return deflate(&zs, Z_FINISH); }
Deflater::~Deflater() { deflateEnd(&zs); }
////////////////////////////////////////////////////////= ////////
// Here's the customizations for inflating (decompre= ssion).
class Inflater : public= Flater { public: Inflater(); ~Inflater(); int flate(); };
Inflater::Inflater() { int r =3D inflateInit(&zs); assert(Z_OK =3D=3D r); }
int Inflater::flate() { return inflate(&zs, Z_FINISH); }
Inflater::~Inflater() { inflateEnd(&zs); }
////////////////////////////////////////////////////////= ////////
// For testing purposes, fill buffer with a mixture of l= owercase letters and // random bytes. If the data is completely uniform-rando= m in the range 0-255, // then we wouldn't be able to compress. Having a si= gnificant portion of // lower-case letters (maybe) makes it more compressible= . void generate_data(vector<u= int8_t>& buffer) { assert(buffer.size() > 0); const unsigned RAND= OM_RATIO =3D 10; srand(time(0)); for(unsigned i =3D = 0; i < buffer.size(); i++) { buffer[i] =3D (rand()%RANDOM_RATIO =3D=3D 0) ? rand() : 'a' + (rand()%dv">26); } }
// Print sizes, and first several bytes in hex and ASCII= , similar to a hex // dump. void output_sample(const> vector<uint8_t>& buffer) { cout << " " << buffer.size() << " bytes (~= " << buffer.size()/KIBI << "K, ~= " << buffer.size()/KIBI/KIBI << "= ;M)\n";
const unsigned SEVE= RAL =3D min(16, (int)bu= ffer.size()); cout.fill('0'); cout << hex << " "; for(unsigned i =3D = 0; i < SEVERAL; i++) { cout << setw(2) << (ss=3D"dt">int)buffer[i] << ' 'n>; } for(unsigned i =3D = 0; i < SEVERAL; i++) { cout << (char)(isprint(buffer[i])? = buffer[i] : '.'); } cout << dec << '=3D"ch">\n'; }
////////////////////////////////////////////////////////= ////////
int main() { const float INPUT_S= IZE_MB =3D 25.7; const unsigned INPU= T_SIZE_BYTES =3D INPUT_SIZE_MB * KIBI * KIBI;
cout << "Generating some data...pan class=3D"ch">\n"; vector<uint8_t> original(INPUT_SIZE_BYT= ES); generate_data(original); output_sample(original);
cout << "Compressing...=3D"ch">\n"; vector<uint8_t> compressed; Deflater().run(original, compressed); output_sample(compressed);
double ratio =3D (double= span>)compressed.size() / original.size(); cout << "Compression ratio: "= << fixed << setprecision(3) <<= ratio << '\n= ';
vector<uint8_t> copy; cout << "Decompressing...ss=3D"ch">\n"; Inflater().run(compressed, copy); output_sample(copy);
assert(copy.size() =3D=3D INPUT_SIZE_BYTES); assert(copy =3D=3D original); cout << "Exact match!=3D"ch">\n";
return 0; }
--==-=-=--
--=-=-= Content-Type: text/x-c++src Content-Disposition: attachment; filename=zdemo.cpp
// zdemo // % make zdemo CXXFLAGS="-g -Wall" LDFLAGS=-lz #include #include #include #include #include #include "zlib.h" using namespace std;
// This many bytes per kilobyte (kibibyte) const unsigned KIBI = 1024;
// Since the deflating and inflating procedures have a lot in common, this is // an attempt to capture the commonality. Yes I wrote a class, try not to // faint. class Flater { public: const int LEVEL = Z_DEFAULT_COMPRESSION; const bool TRACE_RESIZES = true; const unsigned MINIMAL_CHUNK = 16 * KIBI; // Start point for output size Flater(); virtual ~Flater() { }; void run(vector& in, vector& out); protected: virtual int flate() = 0; z_stream zs; };
Flater::Flater() { zs.zalloc = Z_NULL; zs.zfree = Z_NULL; zs.opaque = Z_NULL; }
// Here's the generic algorithm. The input buffer is fully available. The // output buffer doubles its size as needed. It can start out empty, and we'll // allocate MINIMAL_CHUNK bytes up front. void Flater::run(vector& in, vector& out) { zs.avail_in = in.size(); // #bytes available to be read zs.next_in = in.data(); // ptr to bytes available to be read
unsigned int bytes_written = 0; do { out.resize(max(MINIMAL_CHUNK, 2*(unsigned)out.size())); if(TRACE_RESIZES) { cout << " Resize " << out.size() << '\n'; }
zs.avail_out = out.size() - bytes_written; zs.next_out = out.data() + bytes_written;
int r = flate(); assert(Z_STREAM_ERROR != r);
bytes_written = out.size() - zs.avail_out;
} while (zs.avail_out == 0); assert(zs.avail_in == 0); // All input will be used
out.resize(bytes_written); // Reduce size if(TRACE_RESIZES) { cout << " Finish " << out.size() << '\n'; } }
////////////////////////////////////////////////////////////////
// Here's the customizations for deflating (compression).
class Deflater : public Flater { public: Deflater(); ~Deflater(); int flate(); };
Deflater::Deflater() { int r = deflateInit(&zs, LEVEL); assert(Z_OK == r); }
int Deflater::flate() { return deflate(&zs, Z_FINISH); }
Deflater::~Deflater() { deflateEnd(&zs); }
////////////////////////////////////////////////////////////////
// Here's the customizations for inflating (decompression).
class Inflater : public Flater { public: Inflater(); ~Inflater(); int flate(); };
Inflater::Inflater() { int r = inflateInit(&zs); assert(Z_OK == r); }
int Inflater::flate() { return inflate(&zs, Z_FINISH); }
Inflater::~Inflater() { inflateEnd(&zs); }
////////////////////////////////////////////////////////////////
// For testing purposes, fill buffer with a mixture of lowercase letters and // random bytes. If the data is completely uniform-random in the range 0-255, // then we wouldn't be able to compress. Having a significant portion of // lower-case letters (maybe) makes it more compressible. void generate_data(vector& buffer) { assert(buffer.size() > 0); const unsigned RANDOM_RATIO = 10; srand(time(0)); for(unsigned i = 0; i < buffer.size(); i++) { buffer[i] = (rand()%RANDOM_RATIO == 0) ? rand() : 'a' + (rand()%26); } }
// Print sizes, and first several bytes in hex and ASCII, similar to a hex // dump. void output_sample(const vector& buffer) { cout << " " << buffer.size() << " bytes (~" << buffer.size()/KIBI << "K, ~" << buffer.size()/KIBI/KIBI << "M)\n";
const unsigned SEVERAL = min(16, (int)buffer.size()); cout.fill('0'); cout << hex << " "; for(unsigned i = 0; i < SEVERAL; i++) { cout << setw(2) << (int)buffer[i] << ' '; } for(unsigned i = 0; i < SEVERAL; i++) { cout << (char)(isprint(buffer[i])? buffer[i] : '.'); } cout << dec << '\n'; }
////////////////////////////////////////////////////////////////
int main() { const float INPUT_SIZE_MB = 25.7; const unsigned INPUT_SIZE_BYTES = INPUT_SIZE_MB * KIBI * KIBI;
cout << "Generating some data...\n"; vector original(INPUT_SIZE_BYTES); generate_data(original); output_sample(original);
cout << "Compressing...\n"; vector compressed; Deflater().run(original, compressed); output_sample(compressed);
double ratio = (double)compressed.size() / original.size(); cout << "Compression ratio: " << fixed << setprecision(3) << ratio << '\n';
vector copy; cout << "Decompressing...\n"; Inflater().run(compressed, copy); output_sample(copy);
assert(copy.size() == INPUT_SIZE_BYTES); assert(copy == original); cout << "Exact match!\n";
return 0; }
--=-=-= Content-Type: text/plain; charset="us-ascii" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit Content-Disposition: inline
_______________________________________________ Learn mailing list Learn-at-nylxs.com http://lists.mrbrklyn.com/mailman/listinfo/learn
--=-=-=--
|
|