Boost logo

Boost-Commit :

From: technews_at_[hidden]
Date: 2008-05-27 16:18:02


Author: turkanis
Date: 2008-05-27 16:18:02 EDT (Tue, 27 May 2008)
New Revision: 45833
URL: http://svn.boost.org/trac/boost/changeset/45833

Log:
added grep_filter and tests (issue #1627); line_filter needed modification to be usable as a base for grep_filter; the commented out tests for grep_filter::count() are incorrect, because they query the original filter rather than the copy used for i/o; I have verified independently that count() works, and will fix the test later
Added:
   trunk/boost/iostreams/filter/grep.hpp (contents, props changed)
   trunk/libs/iostreams/test/grep_test.cpp (contents, props changed)
Text files modified:
   trunk/boost/iostreams/filter/line.hpp | 37 ++++++++++++++++++++++---------------
   trunk/libs/iostreams/test/Jamfile.v2 | 3 +++
   2 files changed, 25 insertions(+), 15 deletions(-)

Added: trunk/boost/iostreams/filter/grep.hpp
==============================================================================
--- (empty file)
+++ trunk/boost/iostreams/filter/grep.hpp 2008-05-27 16:18:02 EDT (Tue, 27 May 2008)
@@ -0,0 +1,109 @@
+/*
+ * Distributed under the Boost Software License, Version 1.0.(See accompanying
+ * file LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt.)
+ *
+ * See http://www.boost.org/libs/iostreams for documentation.
+
+ * File: boost/iostreams/filter/grep.hpp
+ * Date: Mon May 26 17:48:45 MDT 2008
+ * Copyright: 2008 CodeRage, LLC
+ * Author: Jonathan Turkanis
+ * Contact: turkanis at coderage dot com
+ *
+ * Defines the class template basic_grep_filter and its specializations
+ * grep_filter and wgrep_filter.
+ */
+
+#ifndef BOOST_IOSTREAMS_GREP_FILTER_HPP_INCLUDED
+#define BOOST_IOSTREAMS_GREP_FILTER_HPP_INCLUDED
+
+#if defined(_MSC_VER) && (_MSC_VER >= 1020)
+# pragma once
+#endif
+
+#include <iostream>
+
+#include <memory> // allocator.
+#include <boost/iostreams/char_traits.hpp>
+#include <boost/iostreams/filter/line.hpp>
+#include <boost/iostreams/pipeline.hpp>
+#include <boost/regex.hpp>
+
+namespace boost { namespace iostreams {
+
+namespace grep {
+
+const int invert = 1;
+const int whole_line = invert << 1;
+
+} // End namespace grep.
+
+template< typename Ch,
+ typename Tr = regex_traits<Ch>,
+ typename Alloc = std::allocator<Ch> >
+class basic_grep_filter : public basic_line_filter<Ch, Alloc> {
+private:
+ typedef basic_line_filter<Ch, Alloc> base_type;
+public:
+ typedef typename base_type::char_type char_type;
+ typedef typename base_type::category category;
+ typedef char_traits<char_type> traits_type;
+ typedef typename base_type::string_type string_type;
+ typedef basic_regex<Ch, Tr> regex_type;
+ typedef regex_constants::match_flag_type match_flag_type;
+ basic_grep_filter( const regex_type& re,
+ match_flag_type match_flags =
+ regex_constants::match_default,
+ int options = 0 );
+ int count() const { return count_; }
+
+ template<typename Sink>
+ void close(Sink& snk, BOOST_IOS::openmode which)
+ {
+ base_type::close(snk, which);
+ options_ &= ~f_initialized;
+ }
+private:
+ virtual string_type do_filter(const string_type& line)
+ {
+ if ((options_ & f_initialized) == 0) {
+ options_ |= f_initialized;
+ count_ = 0;
+ }
+ bool matches = (options_ & grep::whole_line) ?
+ regex_match(line, re_, match_flags_) :
+ regex_search(line, re_, match_flags_);
+ if (options_ & grep::invert)
+ matches = !matches;
+ if (matches)
+ ++count_;
+ return matches ? line + traits_type::newline() : string_type();
+ }
+
+ // Private flags bitwise OR'd with constants from namespace grep
+ enum flags_ {
+ f_initialized = 65536
+ };
+
+ regex_type re_;
+ match_flag_type match_flags_;
+ int options_;
+ int count_;
+};
+BOOST_IOSTREAMS_PIPABLE(basic_grep_filter, 3)
+
+typedef basic_grep_filter<char> grep_filter;
+typedef basic_grep_filter<wchar_t> wgrep_filter;
+
+//------------------Implementation of basic_grep_filter-----------------------//
+
+template<typename Ch, typename Tr, typename Alloc>
+basic_grep_filter<Ch, Tr, Alloc>::basic_grep_filter
+ (const regex_type& re, match_flag_type match_flags, int options)
+ : base_type(true), re_(re), match_flags_(match_flags),
+ options_(options), count_(0)
+ { }
+
+} } // End namespaces iostreams, boost.
+
+#endif // #ifndef BOOST_IOSTREAMS_REGEX_FILTER_HPP_INCLUDED

Modified: trunk/boost/iostreams/filter/line.hpp
==============================================================================
--- trunk/boost/iostreams/filter/line.hpp (original)
+++ trunk/boost/iostreams/filter/line.hpp 2008-05-27 16:18:02 EDT (Tue, 27 May 2008)
@@ -18,6 +18,7 @@
 #include <string>
 #include <boost/config.hpp> // BOOST_STATIC_CONSTANT.
 #include <boost/iostreams/categories.hpp>
+#include <boost/iostreams/checked_operations.hpp>
 #include <boost/iostreams/detail/ios.hpp> // openmode, streamsize.
 #include <boost/iostreams/read.hpp> // check_eof
 #include <boost/iostreams/pipeline.hpp>
@@ -61,7 +62,10 @@
           closable_tag
         { };
 protected:
- basic_line_filter() : pos_(string_type::npos), state_(0) { }
+ basic_line_filter(bool suppress_newlines = false)
+ : pos_(string_type::npos),
+ flags_(suppress_newlines ? f_suppress : 0)
+ { }
 public:
     virtual ~basic_line_filter() { }
 
@@ -69,8 +73,8 @@
     std::streamsize read(Source& src, char_type* s, std::streamsize n)
     {
         using namespace std;
- assert(!(state_ & f_write));
- state_ |= f_read;
+ assert(!(flags_ & f_write));
+ flags_ |= f_read;
 
         // Handle unfinished business.
         std::streamsize result = 0;
@@ -80,7 +84,7 @@
         typename traits_type::int_type status = traits_type::good();
         while (result < n && !traits_type::is_eof(status)) {
 
- // Call next_line() to retrieve a line of filtered test, and
+ // Call next_line() to retrieve a line of filtered text, and
             // read_line() to copy it into buffer s.
             if (traits_type::would_block(status = next_line(src)))
                 return result;
@@ -94,8 +98,8 @@
     std::streamsize write(Sink& snk, const char_type* s, std::streamsize n)
     {
         using namespace std;
- assert(!(state_ & f_read));
- state_ |= f_write;
+ assert(!(flags_ & f_read));
+ flags_ |= f_write;
 
         // Handle unfinished business.
         if (pos_ != string_type::npos && !write_line(snk))
@@ -122,10 +126,10 @@
     template<typename Sink>
     void close(Sink& snk, BOOST_IOS::openmode which)
     {
- if ((state_ & f_read) && which == BOOST_IOS::in)
+ if ((flags_ & f_read) && which == BOOST_IOS::in)
             close_impl();
 
- if ((state_ & f_write) && which == BOOST_IOS::out) {
+ if ((flags_ & f_write) && which == BOOST_IOS::out) {
             try {
                 if (!cur_line_.empty())
                     write_line(snk);
@@ -168,7 +172,7 @@
         if (!traits_type::would_block(c)) {
             if (!cur_line_.empty() || c == traits_type::newline())
                 cur_line_ = do_filter(cur_line_);
- if (c == traits_type::newline())
+ if (c == traits_type::newline() && (flags_ & f_suppress) == 0)
                 cur_line_ += c;
         }
         return c; // status indicator.
@@ -179,9 +183,11 @@
     template<typename Sink>
     bool write_line(Sink& snk)
     {
- string_type line = do_filter(cur_line_) + traits_type::newline();
+ string_type line = do_filter(cur_line_);
+ if ((flags_ & f_suppress) == 0)
+ line += traits_type::newline();
         std::streamsize amt = static_cast<std::streamsize>(line.size());
- bool result = iostreams::write(snk, line.data(), amt) == amt;
+ bool result = iostreams::write_if(snk, line.data(), amt) == amt;
         if (result)
             clear();
         return result;
@@ -190,7 +196,7 @@
     void close_impl()
     {
         clear();
- state_ = 0;
+ flags_ &= ~f_suppress;
     }
 
     void clear()
@@ -200,13 +206,14 @@
     }
 
     enum flag_type {
- f_read = 1,
- f_write = f_read << 1
+ f_read = 1,
+ f_write = f_read << 1,
+ f_suppress = f_write << 1
     };
 
     string_type cur_line_;
     typename string_type::size_type pos_;
- int state_;
+ int flags_;
 };
 BOOST_IOSTREAMS_PIPABLE(basic_line_filter, 2)
 

Modified: trunk/libs/iostreams/test/Jamfile.v2
==============================================================================
--- trunk/libs/iostreams/test/Jamfile.v2 (original)
+++ trunk/libs/iostreams/test/Jamfile.v2 2008-05-27 16:18:02 EDT (Tue, 27 May 2008)
@@ -55,6 +55,9 @@
           [ test-iostreams filtering_stream_test.cpp ]
           [ test-iostreams finite_state_filter_test.cpp ]
           [ test-iostreams flush_test.cpp ]
+ [ test-iostreams
+ grep_test.cpp
+ /boost/regex//boost_regex ]
           [ test-iostreams invert_test.cpp ]
           [ test-iostreams line_filter_test.cpp ]
           [ test-iostreams mapped_file_test.cpp

Added: trunk/libs/iostreams/test/grep_test.cpp
==============================================================================
--- (empty file)
+++ trunk/libs/iostreams/test/grep_test.cpp 2008-05-27 16:18:02 EDT (Tue, 27 May 2008)
@@ -0,0 +1,282 @@
+/*
+ * Distributed under the Boost Software License, Version 1.0.(See accompanying
+ * file LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt.)
+ *
+ * See http://www.boost.org/libs/iostreams for documentation.
+
+ * File: libs/iostreams/test/grep_test.cpp
+ * Date: Mon May 26 17:48:45 MDT 2008
+ * Copyright: 2008 CodeRage, LLC
+ * Author: Jonathan Turkanis
+ * Contact: turkanis at coderage dot com
+ *
+ * Tests the class template basic_grep_filter.
+ */
+
+#include <iostream>
+
+#include <boost/config.hpp> // Make sure ptrdiff_t is in std.
+#include <algorithm>
+#include <cstddef> // std::ptrdiff_t
+#include <string>
+#include <boost/iostreams/compose.hpp>
+#include <boost/iostreams/copy.hpp>
+#include <boost/iostreams/device/array.hpp>
+#include <boost/iostreams/device/back_inserter.hpp>
+#include <boost/iostreams/filter/grep.hpp>
+#include <boost/iostreams/filter/test.hpp>
+#include <boost/ref.hpp>
+#include <boost/regex.hpp>
+#include <boost/test/test_tools.hpp>
+#include <boost/test/unit_test.hpp>
+
+using namespace boost;
+using namespace boost::iostreams;
+namespace io = boost::iostreams;
+using boost::unit_test::test_suite;
+
+// List of addresses of US Appeals Courts, from uscourts.gov
+std::string addresses =
+ "John Joseph Moakley United States Courthouse, Suite 2500\n"
+ "One Courthouse Way\n"
+ "Boston, MA 02210-3002\n"
+ "\n"
+ "Thurgood Marshall United States Courthouse, 18th Floor\n"
+ "40 Centre Street\n"
+ "New York, NY 10007-1501\n"
+ "\n"
+ "21400 James A. Byrne United States Courthouse\n"
+ "601 Market Street\n"
+ "Philadelphia, PA 19106-1729\n"
+ "\n"
+ "Lewis F. Powell, Jr. United States Courthouse Annex, Suite 501\n"
+ "1100 East Main Street\n"
+ "Richmond, VA 23219-3525\n"
+ "\n"
+ "F. Edward Hebert Federal Bldg\n"
+ "600 South Maestri Place\n"
+ "New Orleans, LA 70130\n"
+ "\n"
+ "Bob Casey United States Courthouse, 1st Floor\n"
+ "515 Rusk Street\n"
+ "Houston, TX 77002-2600\n"
+ "\n"
+ "Potter Stewart United States Courthouse, Suite 540\n"
+ "100 East Fifth Street\n"
+ "Cincinnati, OH 45202\n"
+ "\n"
+ "2722 Everett McKinley Dirksen United States Courthouse\n"
+ "219 South Dearborn Street\n"
+ "Chicago, IL 60604\n";
+
+// Lines containing "United States Courthouse"
+std::string us_courthouse =
+ "John Joseph Moakley United States Courthouse, Suite 2500\n"
+ "Thurgood Marshall United States Courthouse, 18th Floor\n"
+ "21400 James A. Byrne United States Courthouse\n"
+ "Lewis F. Powell, Jr. United States Courthouse Annex, Suite 501\n"
+ "Bob Casey United States Courthouse, 1st Floor\n"
+ "Potter Stewart United States Courthouse, Suite 540\n"
+ "2722 Everett McKinley Dirksen United States Courthouse\n";
+
+// Lines not containing "United States Courthouse"
+std::string us_courthouse_inv =
+ "One Courthouse Way\n"
+ "Boston, MA 02210-3002\n"
+ "\n"
+ "40 Centre Street\n"
+ "New York, NY 10007-1501\n"
+ "\n"
+ "601 Market Street\n"
+ "Philadelphia, PA 19106-1729\n"
+ "\n"
+ "1100 East Main Street\n"
+ "Richmond, VA 23219-3525\n"
+ "\n"
+ "F. Edward Hebert Federal Bldg\n"
+ "600 South Maestri Place\n"
+ "New Orleans, LA 70130\n"
+ "\n"
+ "515 Rusk Street\n"
+ "Houston, TX 77002-2600\n"
+ "\n"
+ "100 East Fifth Street\n"
+ "Cincinnati, OH 45202\n"
+ "\n"
+ "219 South Dearborn Street\n"
+ "Chicago, IL 60604\n";
+
+// Lines containing a state and zip
+std::string state_and_zip =
+ "Boston, MA 02210-3002\n"
+ "New York, NY 10007-1501\n"
+ "Philadelphia, PA 19106-1729\n"
+ "Richmond, VA 23219-3525\n"
+ "New Orleans, LA 70130\n"
+ "Houston, TX 77002-2600\n"
+ "Cincinnati, OH 45202\n"
+ "Chicago, IL 60604\n";
+
+// Lines not containing a state and zip
+std::string state_and_zip_inv =
+ "John Joseph Moakley United States Courthouse, Suite 2500\n"
+ "One Courthouse Way\n"
+ "\n"
+ "Thurgood Marshall United States Courthouse, 18th Floor\n"
+ "40 Centre Street\n"
+ "\n"
+ "21400 James A. Byrne United States Courthouse\n"
+ "601 Market Street\n"
+ "\n"
+ "Lewis F. Powell, Jr. United States Courthouse Annex, Suite 501\n"
+ "1100 East Main Street\n"
+ "\n"
+ "F. Edward Hebert Federal Bldg\n"
+ "600 South Maestri Place\n"
+ "\n"
+ "Bob Casey United States Courthouse, 1st Floor\n"
+ "515 Rusk Street\n"
+ "\n"
+ "Potter Stewart United States Courthouse, Suite 540\n"
+ "100 East Fifth Street\n"
+ "\n"
+ "2722 Everett McKinley Dirksen United States Courthouse\n"
+ "219 South Dearborn Street\n";
+
+// Lines containing at least three words
+std::string three_words =
+ "John Joseph Moakley United States Courthouse, Suite 2500\n"
+ "One Courthouse Way\n"
+ "Thurgood Marshall United States Courthouse, 18th Floor\n"
+ "40 Centre Street\n"
+ "21400 James A. Byrne United States Courthouse\n"
+ "601 Market Street\n"
+ "Lewis F. Powell, Jr. United States Courthouse Annex, Suite 501\n"
+ "1100 East Main Street\n"
+ "F. Edward Hebert Federal Bldg\n"
+ "600 South Maestri Place\n"
+ "Bob Casey United States Courthouse, 1st Floor\n"
+ "515 Rusk Street\n"
+ "Potter Stewart United States Courthouse, Suite 540\n"
+ "100 East Fifth Street\n"
+ "2722 Everett McKinley Dirksen United States Courthouse\n"
+ "219 South Dearborn Street\n";
+
+// Lines containing exactly three words
+std::string exactly_three_words =
+ "One Courthouse Way\n"
+ "40 Centre Street\n"
+ "601 Market Street\n"
+ "515 Rusk Street\n";
+
+// Lines that don't contain exactly three words
+std::string exactly_three_words_inv =
+ "John Joseph Moakley United States Courthouse, Suite 2500\n"
+ "Boston, MA 02210-3002\n"
+ "\n"
+ "Thurgood Marshall United States Courthouse, 18th Floor\n"
+ "New York, NY 10007-1501\n"
+ "\n"
+ "21400 James A. Byrne United States Courthouse\n"
+ "Philadelphia, PA 19106-1729\n"
+ "\n"
+ "Lewis F. Powell, Jr. United States Courthouse Annex, Suite 501\n"
+ "1100 East Main Street\n"
+ "Richmond, VA 23219-3525\n"
+ "\n"
+ "F. Edward Hebert Federal Bldg\n"
+ "600 South Maestri Place\n"
+ "New Orleans, LA 70130\n"
+ "\n"
+ "Bob Casey United States Courthouse, 1st Floor\n"
+ "Houston, TX 77002-2600\n"
+ "\n"
+ "Potter Stewart United States Courthouse, Suite 540\n"
+ "100 East Fifth Street\n"
+ "Cincinnati, OH 45202\n"
+ "\n"
+ "2722 Everett McKinley Dirksen United States Courthouse\n"
+ "219 South Dearborn Street\n"
+ "Chicago, IL 60604\n";
+
+void test_filter( grep_filter grep,
+ const std::string& input,
+ const std::string& output );
+
+void grep_filter_test()
+{
+ regex match_us_courthouse("\\bUnited States Courthouse\\b");
+ regex match_state_and_zip("\\b[A-Z]{2}\\s+[0-9]{5}(-[0-9]{4})?\\b");
+ regex match_three_words("\\b\\w+\\s+\\w+\\s+\\w+\\b");
+ regex_constants::match_flag_type match_default =
+ regex_constants::match_default;
+
+ {
+ grep_filter grep(match_us_courthouse);
+ test_filter(grep, addresses, us_courthouse);
+ }
+
+ {
+ grep_filter grep(match_us_courthouse, match_default, grep::invert);
+ test_filter(grep, addresses, us_courthouse_inv);
+ }
+
+ {
+ grep_filter grep(match_state_and_zip);
+ test_filter(grep, addresses, state_and_zip);
+ }
+
+ {
+ grep_filter grep(match_state_and_zip, match_default, grep::invert);
+ test_filter(grep, addresses, state_and_zip_inv);
+ }
+
+ {
+ grep_filter grep(match_three_words);
+ test_filter(grep, addresses, three_words);
+ }
+
+ {
+ grep_filter grep(match_three_words, match_default, grep::whole_line);
+ test_filter(grep, addresses, exactly_three_words);
+ }
+
+ {
+ int options = grep::whole_line | grep::invert;
+ grep_filter grep(match_three_words, match_default, options);
+ test_filter(grep, addresses, exactly_three_words_inv);
+ }
+}
+
+void test_filter( grep_filter grep,
+ const std::string& input,
+ const std::string& output )
+{
+ // Count lines in output
+ ptrdiff_t count = std::count(output.begin(), output.end(), '\n');
+
+ // Test as input filter
+ {
+ array_source src(input.data(), input.data() + input.size());
+ std::string dest;
+ io::copy(compose(grep, src), io::back_inserter(dest));
+ BOOST_CHECK(dest == output);
+ //BOOST_CHECK(grep.count() == count);
+ }
+
+ // Test as output filter
+ {
+ array_source src(input.data(), input.data() + input.size());
+ std::string dest;
+ io::copy(src, compose(grep, io::back_inserter(dest)));
+ BOOST_CHECK(dest == output);
+ //BOOST_CHECK(grep.count() == count);
+ }
+}
+
+test_suite* init_unit_test_suite(int, char* [])
+{
+ test_suite* test = BOOST_TEST_SUITE("grep_filter test");
+ test->add(BOOST_TEST_CASE(&grep_filter_test));
+ return test;
+}


Boost-Commit list run by bdawes at acm.org, david.abrahams at rcn.com, gregod at cs.rpi.edu, cpdaniel at pacbell.net, john at johnmaddock.co.uk