Boost logo

Boost-Commit :

From: technews_at_[hidden]
Date: 2008-05-31 18:53:58


Author: turkanis
Date: 2008-05-31 18:53:58 EDT (Sat, 31 May 2008)
New Revision: 46001
URL: http://svn.boost.org/trac/boost/changeset/46001

Log:
added support for archives with multiple members; added tests for metadata and for multiple members (fixes #1896)
Text files modified:
   trunk/boost/iostreams/filter/gzip.hpp | 516 +++++++++++++++++++++++++++++----------
   trunk/libs/iostreams/test/gzip_test.cpp | 65 ++++
   2 files changed, 432 insertions(+), 149 deletions(-)

Modified: trunk/boost/iostreams/filter/gzip.hpp
==============================================================================
--- trunk/boost/iostreams/filter/gzip.hpp (original)
+++ trunk/boost/iostreams/filter/gzip.hpp 2008-05-31 18:53:58 EDT (Sat, 31 May 2008)
@@ -19,6 +19,7 @@
 #include <boost/config.hpp> // STATIC_CONSTANT, STDC_NAMESPACE,
                             // DINKUMWARE_STDLIB, __STL_CONFIG_H.
 #include <algorithm> // min.
+#include <cassert>
 #include <cstdio> // EOF.
 #include <cstddef> // size_t.
 #include <ctime> // std::time_t.
@@ -34,7 +35,8 @@
 #include <boost/iostreams/operations.hpp>
 #include <boost/iostreams/device/back_inserter.hpp>
 #include <boost/iostreams/filter/zlib.hpp>
-#include <boost/iostreams/pipeline.hpp>
+#include <boost/iostreams/pipeline.hpp>
+#include <boost/iostreams/putback.hpp>
 
 // Must come last.
 #if defined(BOOST_MSVC)
@@ -47,6 +49,8 @@
 #endif
 
 namespace boost { namespace iostreams {
+
+//------------------Definitions of constants----------------------------------//
 
 namespace gzip {
 
@@ -59,6 +63,7 @@
 const int bad_length = 3; // Recorded length doesn't match data.
 const int bad_header = 4; // Malformed header.
 const int bad_footer = 5; // Malformed footer.
+const int bad_method = 6; // Unsupported compression method.
 
 namespace magic {
 
@@ -118,6 +123,8 @@
 
 } // End namespace gzip.
 
+//------------------Definition of gzip_params---------------------------------//
+
 //
 // Class name: gzip_params.
 // Description: Subclass of zlib_params with an additional field
@@ -142,6 +149,8 @@
     std::time_t mtime;
 };
 
+//------------------Definition of gzip_error----------------------------------//
+
 //
 // Class name: gzip_error.
 // Description: Subclass of std::ios_base::failure thrown to indicate
@@ -163,6 +172,8 @@
     int zlib_error_code_;
 };
 
+//------------------Definition of gzip_compressor-----------------------------//
+
 //
 // Template name: gzip_compressor
 // Description: Model of OutputFilter implementing compression in the
@@ -283,7 +294,7 @@
         flags_ = 0;
     }
 
- enum flag_type {
+ enum state_type {
         f_header_done = 1,
         f_body_done = f_header_done << 1,
         f_footer_done = f_body_done << 1
@@ -297,6 +308,81 @@
 
 typedef basic_gzip_compressor<> gzip_compressor;
 
+//------------------Definition of helper templates for decompression----------//
+
+namespace detail {
+
+// Processes gzip headers
+class gzip_header {
+public:
+ gzip_header() { reset(); }
+
+ // Members for processing header data
+ void process(char c);
+ bool done() const { return state_ == s_done; }
+ void reset();
+
+ // Members for accessing header data
+ std::string file_name() const { return file_name_; }
+ std::string comment() const { return comment_; }
+ bool text() const { return (flags_ & gzip::flags::text) != 0; }
+ int os() const { return os_; }
+ std::time_t mtime() const { return mtime_; }
+private:
+ enum state_type {
+ s_id1 = 1,
+ s_id2 = s_id1 + 1,
+ s_cm = s_id2 + 1,
+ s_flg = s_cm + 1,
+ s_mtime = s_flg + 1,
+ s_xfl = s_mtime + 1,
+ s_os = s_xfl + 1,
+ s_xlen = s_os + 1,
+ s_extra = s_xlen + 1,
+ s_name = s_extra + 1,
+ s_comment = s_name + 1,
+ s_hcrc = s_comment + 1,
+ s_done = s_hcrc + 1
+ };
+ std::string file_name_;
+ std::string comment_;
+ int os_;
+ std::time_t mtime_;
+ int flags_;
+ int state_;
+ int offset_; // Offset within fixed-length region.
+ int xlen_; // Bytes remaining in extra field.
+};
+
+// Processes gzip footers
+class gzip_footer {
+public:
+ gzip_footer() { reset(); }
+
+ // Members for processing footer data
+ void process(char c);
+ bool done() const { return state_ == s_done; }
+ void reset();
+
+ // Members for accessing footer data
+ zlib::ulong crc() const { return crc_; }
+ zlib::ulong uncompressed_size() const { return isize_; }
+private:
+ enum state_type {
+ s_crc = 1,
+ s_isize = s_crc + 1,
+ s_done = s_isize + 1
+ };
+ zlib::ulong crc_;
+ zlib::ulong isize_;
+ int state_;
+ int offset_;
+};
+
+} // End namespace boost::iostreams::detail.
+
+//------------------Definition of basic_gzip_decompressor---------------------//
+
 //
 // Template name: basic_gzip_decompressor
 // Description: Model of InputFilter implementing compression in the
@@ -304,6 +390,9 @@
 //
 template<typename Alloc = std::allocator<char> >
 class basic_gzip_decompressor : basic_zlib_decompressor<Alloc> {
+private:
+ typedef basic_zlib_decompressor<Alloc> base_type;
+ typedef typename base_type::string_type string_type;
 public:
     typedef char char_type;
     struct category
@@ -316,35 +405,70 @@
     template<typename Source>
     std::streamsize read(Source& src, char_type* s, std::streamsize n)
     {
- if ((flags_ & f_header_read) == 0) {
- non_blocking_adapter<Source> nb(src);
- read_header(nb);
- flags_ |= f_header_read;
- }
-
- if ((flags_ & f_footer_read) != 0)
- return -1;
-
- try {
- std::streamsize result = 0;
- std::streamsize amt;
- if ((amt = base_type::read(src, s, n)) != -1) {
- result += amt;
- if (amt < n) { // Double check for EOF.
- amt = base_type::read(src, s + result, n - result);
- if (amt != -1)
+ typedef char_traits<char> traits_type;
+ std::streamsize result = 0;
+ peekable_source<Source> peek(src, putback_);
+ while (result < n && state_ != s_done) {
+ if (state_ == s_start) {
+ state_ = s_header;
+ header_.reset();
+ footer_.reset();
+ }
+ if (state_ == s_header) {
+ int c = boost::iostreams::get(peek);
+ if (traits_type::is_eof(c)) {
+ throw gzip_error(gzip::bad_header);
+ } else if (traits_type::would_block(c)) {
+ break;
+ }
+ header_.process(c);
+ if (header_.done())
+ state_ = s_body;
+ } else if (state_ == s_body) {
+ try {
+ std::streamsize amt =
+ base_type::read(peek, s + result, n - result);
+ if (amt != -1) {
                         result += amt;
+ if (amt < n - result)
+ break;
+ } else {
+ peek.putback(this->unconsumed_input());
+ state_ = s_footer;
+ }
+ } catch (const zlib_error& e) {
+ throw gzip_error(e);
+ }
+ } else { // state_ == s_footer
+ int c = boost::iostreams::get(peek);
+ if (traits_type::is_eof(c)) {
+ throw gzip_error(gzip::bad_footer);
+ } else if (traits_type::would_block(c)) {
+ break;
+ }
+ footer_.process(c);
+ if (footer_.done()) {
+ int c = boost::iostreams::get(peek);
+ if (traits_type::is_eof(c)) {
+ state_ = s_done;
+ } else {
+ peek.putback(c);
+ base_type::close(peek, BOOST_IOS::in);
+ state_ = s_start;
+ header_.reset();
+ footer_.reset();
+ }
                 }
             }
- if (amt == -1) {
- non_blocking_adapter<Source> nb(src);
- read_footer(nb);
- flags_ |= f_footer_read;
- }
- return result;
- } catch (const zlib_error& e) {
- throw gzip_error(e);
         }
+ if (peek.has_unconsumed_input()) {
+ putback_ = peek.unconsumed_input();
+ } else {
+ putback_.clear();
+ }
+ return result != 0 || state_ != s_done ?
+ result :
+ -1;
     }
 
     template<typename Source>
@@ -353,136 +477,95 @@
         try {
             base_type::close(src, BOOST_IOS::in);
         } catch (const zlib_error& e) {
- flags_ = 0;
+ state_ = s_start;
+ header_.reset();
+ footer_.reset();
             throw gzip_error(e);
         }
- flags_ = 0;
+ state_ = s_start;
     }
 
- std::string file_name() const { return file_name_; }
- std::string comment() const { return comment_; }
- bool text() const { return (flags_ & gzip::flags::text) != 0; }
- int os() const { return os_; }
- std::time_t mtime() const { return mtime_; }
+ std::string file_name() const { return header_.file_name(); }
+ std::string comment() const { return header_.comment(); }
+ bool text() const { return header_.text(); }
+ int os() const { return header_.os(); }
+ std::time_t mtime() const { return header_.mtime(); }
 private:
- typedef basic_zlib_decompressor<Alloc> base_type;
- typedef BOOST_IOSTREAMS_CHAR_TRAITS(char) traits_type;
- static bool is_eof(int c) { return traits_type::eq_int_type(c, EOF); }
     static gzip_params make_params(int window_bits);
 
+ // Source adapter allowing an arbitrary character sequence to be put back.
     template<typename Source>
- static uint8_t read_uint8(Source& src, int error)
- {
- int c;
- if ((c = boost::iostreams::get(src)) == EOF || c == WOULD_BLOCK)
- throw gzip_error(error);
- return static_cast<uint8_t>(traits_type::to_char_type(c));
- }
+ struct peekable_source {
+ typedef char char_type;
+ struct category : source_tag, peekable_tag { };
+ explicit peekable_source(Source& src, const string_type& putback = "")
+ : src_(src), putback_(putback), offset_(0)
+ { }
+ std::streamsize read(char* s, std::streamsize n)
+ {
+ std::streamsize result = 0;
 
- template<typename Source>
- static uint32_t read_uint32(Source& src, int error)
- {
- uint8_t b1 = read_uint8(src, error);
- uint8_t b2 = read_uint8(src, error);
- uint8_t b3 = read_uint8(src, error);
- uint8_t b4 = read_uint8(src, error);
- return b1 + (b2 << 8) + (b3 << 16) + (b4 << 24);
- }
+ // Copy characters from putback buffer
+ std::streamsize pbsize =
+ static_cast<std::streamsize>(putback_.size());
+ if (offset_ < pbsize) {
+ result = (std::min)(n, pbsize - offset_);
+ BOOST_IOSTREAMS_CHAR_TRAITS(char)::copy(
+ s, putback_.data() + offset_, result);
+ offset_ += result;
+ if (result == n)
+ return result;
+ }
 
- template<typename Source>
- std::string read_string(Source& src)
- {
- std::string result;
- while (true) {
- int c;
- if (is_eof(c = boost::iostreams::get(src)))
- throw gzip_error(gzip::bad_header);
- else if (c == 0)
- return result;
- else
- result += static_cast<char>(c);
+ // Read characters from src_
+ std::streamsize amt =
+ boost::iostreams::read(src_, s + result, n - result);
+ return amt != -1 ?
+ result + amt :
+ result ? result : -1;
+ }
+ bool putback(char c)
+ {
+ if (offset_) {
+ putback_[--offset_] = c;
+ return true;
+ } else {
+ return boost::iostreams::putback(src_, c);
+ }
+ }
+ void putback(const string_type& s)
+ {
+ putback_.replace(0, offset_, s);
+ offset_ = 0;
         }
- }
-
- template<typename Source>
- void read_header(Source& src) // Source is non-blocking.
- {
- // Reset saved values.
- #if BOOST_WORKAROUND(__GNUC__, == 2) && defined(__STL_CONFIG_H) || \
- BOOST_WORKAROUND(BOOST_DINKUMWARE_STDLIB, == 1) \
- /**/
- file_name_.erase(0, std::string::npos);
- comment_.erase(0, std::string::npos);
- #else
- file_name_.clear();
- comment_.clear();
- #endif
- os_ = gzip::os_unknown;
- mtime_ = 0;
-
- int flags;
 
- // Read header, without checking header crc.
- if ( boost::iostreams::get(src) != gzip::magic::id1 || // ID1.
- boost::iostreams::get(src) != gzip::magic::id2 || // ID2.
- is_eof(boost::iostreams::get(src)) || // CM.
- is_eof(flags = boost::iostreams::get(src)) ) // FLG.
+ // Returns true if some characters have been putback but not re-read.
+ bool has_unconsumed_input() const
         {
- throw gzip_error(gzip::bad_header);
+ return offset_ < static_cast<std::streamsize>(putback_.size());
         }
- mtime_ = read_uint32(src, gzip::bad_header); // MTIME.
- read_uint8(src, gzip::bad_header); // XFL.
- os_ = read_uint8(src, gzip::bad_header); // OS.
- if (flags & boost::iostreams::gzip::flags::text)
- flags_ |= f_text;
-
- // Skip extra field. (From J. Halleaux; see note at top.)
- if (flags & gzip::flags::extra) {
- int length =
- static_cast<int>(
- read_uint8(src, gzip::bad_header) +
- (read_uint8(src, gzip::bad_header) << 8)
- );
- // length is garbage if EOF but the loop below will quit anyway.
- do { }
- while (length-- != 0 && !is_eof(boost::iostreams::get(src)));
- }
-
- if (flags & gzip::flags::name) // Read file name.
- file_name_ = read_string(src);
- if (flags & gzip::flags::comment) // Read comment.
- comment_ = read_string(src);
- if (flags & gzip::flags::header_crc) { // Skip header crc.
- read_uint8(src, gzip::bad_header);
- read_uint8(src, gzip::bad_header);
+
+ // Returns the sequence of characters that have been put back but not re-read.
+ string_type unconsumed_input() const
+ {
+ return string_type(putback_, offset_, putback_.size() - offset_);
         }
- }
+ Source& src_;
+ string_type putback_;
+ std::streamsize offset_;
+ };
 
- template<typename Source>
- void read_footer(Source& src)
- {
- typename base_type::string_type footer =
- this->unconsumed_input();
- int c;
- while (!is_eof(c = boost::iostreams::get(src)))
- footer += c;
- detail::range_adapter<input, std::string>
- rng(footer.begin(), footer.end());
- if (read_uint32(rng, gzip::bad_footer) != this->crc())
- throw gzip_error(gzip::bad_crc);
- if (static_cast<int>(read_uint32(rng, gzip::bad_footer)) != this->total_out())
- throw gzip_error(gzip::bad_length);
- }
- enum flag_type {
- f_header_read = 1,
- f_footer_read = f_header_read << 1,
- f_text = f_footer_read << 1
+ enum state_type {
+ s_start = 1,
+ s_header = s_start + 1,
+ s_body = s_header + 1,
+ s_footer = s_body + 1,
+ s_done = s_footer + 1
     };
- std::string file_name_;
- std::string comment_;
- int os_;
- std::time_t mtime_;
- int flags_;
+ detail::gzip_header header_;
+ detail::gzip_footer footer_;
+ string_type putback_;
+ int state_;
 };
 BOOST_IOSTREAMS_PIPABLE(basic_gzip_decompressor, 1)
 
@@ -574,13 +657,164 @@
     return amt;
 }
 
+//------------------Implementation of gzip_header-----------------------------//
+
+namespace detail {
+
+void gzip_header::process(char c)
+{
+ uint8_t value = static_cast<uint8_t>(c);
+ switch (state_) {
+ case s_id1:
+ if (value != gzip::magic::id1)
+ throw gzip_error(gzip::bad_header);
+ state_ = s_id2;
+ break;
+ case s_id2:
+ if (value != gzip::magic::id2)
+ throw gzip_error(gzip::bad_header);
+ state_ = s_cm;
+ break;
+ case s_cm:
+ if (value != gzip::method::deflate)
+ throw gzip_error(gzip::bad_method);
+ state_ = s_flg;
+ break;
+ case s_flg:
+ flags_ = value;
+ state_ = s_mtime;
+ break;
+ case s_mtime:
+ mtime_ += value << (offset_ * 8);
+ if (offset_ == 3) {
+ state_ = s_xfl;
+ offset_ = 0;
+ } else {
+ ++offset_;
+ }
+ break;
+ case s_xfl:
+ state_ = s_os;
+ break;
+ case s_os:
+ os_ = value;
+ if (flags_ & gzip::flags::extra) {
+ state_ = s_extra;
+ } else if (flags_ & gzip::flags::name) {
+ state_ = s_name;
+ } else if (flags_ & gzip::flags::comment) {
+ state_ = s_comment;
+ } else if (flags_ & gzip::flags::header_crc) {
+ state_ = s_hcrc;
+ } else {
+ state_ = s_done;
+ }
+ break;
+ case s_xlen:
+ xlen_ += value << (offset_ * 8);
+ if (offset_ == 1) {
+ state_ = s_extra;
+ offset_ = 0;
+ } else {
+ ++offset_;
+ }
+ break;
+ case s_extra:
+ if (--xlen_ == 0) {
+ if (flags_ & gzip::flags::name) {
+ state_ = s_name;
+ } else if (flags_ & gzip::flags::comment) {
+ state_ = s_comment;
+ } else if (flags_ & gzip::flags::header_crc) {
+ state_ = s_hcrc;
+ } else {
+ state_ = s_done;
+ }
+ }
+ break;
+ case s_name:
+ if (c != 0) {
+ file_name_ += c;
+ } else if (flags_ & gzip::flags::comment) {
+ state_ = s_comment;
+ } else if (flags_ & gzip::flags::header_crc) {
+ state_ = s_hcrc;
+ } else {
+ state_ = s_done;
+ }
+ break;
+ case s_comment:
+ if (c != 0) {
+ comment_ += c;
+ } else if (flags_ & gzip::flags::header_crc) {
+ state_ = s_hcrc;
+ } else {
+ state_ = s_done;
+ }
+ break;
+ case s_hcrc:
+ if (offset_ == 1) {
+ state_ = s_done;
+ offset_ = 0;
+ } else {
+ ++offset_;
+ }
+ break;
+ default:
+ assert(0);
+ }
+}
+
+void gzip_header::reset()
+{
+ file_name_.clear();
+ comment_.clear();
+ os_ = flags_ = offset_ = xlen_ = 0;
+ mtime_ = 0;
+ state_ = s_id1;
+}
+
+//------------------Implementation of gzip_footer-----------------------------//
+
+void gzip_footer::process(char c)
+{
+ uint8_t value = static_cast<uint8_t>(c);
+ if (state_ == s_crc) {
+ crc_ += value << (offset_ * 8);
+ if (offset_ == 3) {
+ state_ = s_isize;
+ offset_ = 0;
+ } else {
+ ++offset_;
+ }
+ } else if (state_ = s_isize) {
+ isize_ += value << (offset_ * 8);
+ if (offset_ == 3) {
+ state_ = s_done;
+ offset_ = 0;
+ } else {
+ ++offset_;
+ }
+ } else {
+ assert(0);
+ }
+}
+
+void gzip_footer::reset()
+{
+ state_ = s_crc;
+ offset_ = 0;
+}
+
+} // End namespace boost::iostreams::detail.
+
 //------------------Implementation of gzip_decompressor-----------------------//
 
 template<typename Alloc>
 basic_gzip_decompressor<Alloc>::basic_gzip_decompressor
     (int window_bits, int buffer_size)
     : base_type(make_params(window_bits), buffer_size),
- os_(gzip::os_unknown), mtime_(0), flags_(0)
+ state_(s_start)
     { }
 
 template<typename Alloc>

Modified: trunk/libs/iostreams/test/gzip_test.cpp
==============================================================================
--- trunk/libs/iostreams/test/gzip_test.cpp (original)
+++ trunk/libs/iostreams/test/gzip_test.cpp 2008-05-31 18:53:58 EDT (Sat, 31 May 2008)
@@ -6,8 +6,14 @@
 // See http://www.boost.org/libs/iostreams for documentation.
 
 #include <string>
+#include <boost/iostreams/copy.hpp>
+#include <boost/iostreams/device/array.hpp>
+#include <boost/iostreams/device/back_inserter.hpp>
 #include <boost/iostreams/filter/gzip.hpp>
 #include <boost/iostreams/filter/test.hpp>
+#include <boost/iostreams/filtering_stream.hpp>
+#include <boost/ref.hpp>
+#include <boost/range/iterator_range.hpp>
 #include <boost/test/test_tools.hpp>
 #include <boost/test/unit_test.hpp>
 #include "detail/sequence.hpp"
@@ -16,18 +22,36 @@
 using namespace boost;
 using namespace boost::iostreams;
 using namespace boost::iostreams::test;
+namespace io = boost::iostreams;
 using boost::unit_test::test_suite;
 
 struct gzip_alloc : std::allocator<char> { };
 
-void gzip_test()
+void compression_test()
 {
- text_sequence data;
- BOOST_CHECK(
- test_filter_pair( gzip_compressor(),
- gzip_decompressor(),
- std::string(data.begin(), data.end()) )
- );
+ text_sequence data;
+
+ // Test compression and decompression with metadata
+ for (int i = 0; i < 4; ++i) {
+ gzip_params params;
+ if (i & 1) {
+ params.file_name = "original file name";
+ }
+ if (i & 2) {
+ params.comment = "detailed file description";
+ }
+ gzip_compressor out(params);
+ gzip_decompressor in;
+ BOOST_CHECK(
+ test_filter_pair( boost::ref(out),
+ boost::ref(in),
+ std::string(data.begin(), data.end()) )
+ );
+ BOOST_CHECK(in.file_name() == params.file_name);
+ BOOST_CHECK(in.comment() == params.comment);
+ }
+
+ // Test compression and decompression with custom allocator
     BOOST_CHECK(
         test_filter_pair( basic_gzip_compressor<gzip_alloc>(),
                           basic_gzip_decompressor<gzip_alloc>(),
@@ -35,9 +59,34 @@
     );
 }
 
+void multiple_member_test()
+{
+ text_sequence data;
+ std::vector<char> temp, dest;
+
+ // Write compressed data to temp, twice in succession
+ filtering_ostream out;
+ out.push(gzip_compressor());
+ out.push(io::back_inserter(temp));
+ io::copy(make_iterator_range(data), out);
+ out.push(io::back_inserter(temp));
+ io::copy(make_iterator_range(data), out);
+
+ // Read compressed data from temp into dest
+ filtering_istream in;
+ in.push(gzip_decompressor());
+ in.push(array_source(&temp[0], temp.size()));
+ io::copy(in, io::back_inserter(dest));
+
+ // Check that dest consists of two copies of data
+ BOOST_CHECK(std::equal(data.begin(), data.end(), dest.begin()));
+ BOOST_CHECK(std::equal(data.begin(), data.end(), dest.begin() + dest.size() / 2));
+}
+
 test_suite* init_unit_test_suite(int, char* [])
 {
     test_suite* test = BOOST_TEST_SUITE("gzip test");
- test->add(BOOST_TEST_CASE(&gzip_test));
+ test->add(BOOST_TEST_CASE(&compression_test));
+ test->add(BOOST_TEST_CASE(&multiple_member_test));
     return test;
 }


Boost-Commit list run by bdawes at acm.org, david.abrahams at rcn.com, gregod at cs.rpi.edu, cpdaniel at pacbell.net, john at johnmaddock.co.uk