Boost-Commit :

Date view	Thread view	Subject view	Author view

Subject: [Boost-commit] svn:boost r49598 - branches/release/tools/inspect
From: daniel_james_at_[hidden]
Date: 2008-11-05 08:47:38

Author: danieljames
Date: 2008-11-05 08:47:38 EST (Wed, 05 Nov 2008)
New Revision: 49598
URL: http://svn.boost.org/trac/boost/changeset/49598

Log:
Merge inspect from trunk, up to version 49597
Text files modified:
   branches/release/tools/inspect/inspect.cpp | 26 +++++--
   branches/release/tools/inspect/link_check.cpp | 128 ++++++++++++++++++++++++++++-----------
   branches/release/tools/inspect/link_check.hpp | 2
   branches/release/tools/inspect/path_name_check.cpp | 1
   4 files changed, 113 insertions(+), 44 deletions(-)

Modified: branches/release/tools/inspect/inspect.cpp
==============================================================================
--- branches/release/tools/inspect/inspect.cpp (original)
+++ branches/release/tools/inspect/inspect.cpp 2008-11-05 08:47:38 EST (Wed, 05 Nov 2008)
@@ -104,13 +104,7 @@

// get info (as a string) if inspect_root is svn working copy --------------//

- string info( const fs::path & inspect_root )
- {
- string rev;
- string repos;
- fs::path entries( inspect_root / ".svn" / "entries" );
- fs::ifstream entries_file( entries );
- if ( entries_file )
+ void extract_info( fs::ifstream & entries_file, string & rev, string & repos )
     {
       std::getline( entries_file, rev );
       std::getline( entries_file, rev );
@@ -118,6 +112,22 @@
       std::getline( entries_file, rev ); // revision number as a string
       std::getline( entries_file, repos ); // repository as a string
     }
+
+ string info( const fs::path & inspect_root )
+ {
+ string rev( "?" );
+ string repos( "unknown" );
+ fs::path entries( inspect_root / ".svn" / "entries" );
+ fs::ifstream entries_file( entries );
+ if ( entries_file )
+ extract_info( entries_file, rev, repos );
+ else
+ {
+ entries = inspect_root / ".." / "svn_info" / ".svn" / "entries";
+ fs::ifstream entries_file( entries );
+ if ( entries_file )
+ extract_info( entries_file, rev, repos );
+ }
     return repos + " at revision " + rev;
   }

@@ -146,6 +156,8 @@
       && leaf != ".htaccess"
       // ignore svn files:
       && leaf != ".svn"
+ // ignore OS X directory info files:
+ && leaf != ".DS_Store"
       ;
   }

Modified: branches/release/tools/inspect/link_check.cpp
==============================================================================
--- branches/release/tools/inspect/link_check.cpp (original)
+++ branches/release/tools/inspect/link_check.cpp 2008-11-05 08:47:38 EST (Wed, 05 Nov 2008)
@@ -15,10 +15,13 @@

namespace
{
- boost::regex url_regex(
+ boost::regex html_url_regex(
     "<\\s*[^>]*\\s+(?:HREF|SRC)" // HREF or SRC
     "\\s*=\\s*(['\"])(.*?)\\1",
     boost::regbase::normal | boost::regbase::icase);
+ boost::regex css_url_regex(
+ "(\\@import\\s*[\"']|url\\s*\\(\\s*[\"']?)([^\"')]*)",
+ boost::regbase::normal | boost::regbase::icase);

   // Regular expression for parsing URLS from:
   // http://tools.ietf.org/html/rfc3986#appendix-B
@@ -26,15 +29,36 @@
     "^(([^:/?#]+):)?(//([^/?#]*))?([^?#]*)(\\?([^#]*))?(#(.*))?$",
     boost::regbase::normal);

- // Decode percent encoded characters and html escapsed ampersands,
- // returns an empty string if there's an error.
- // The urls should really be fully HTML decoded at the beginning.
- std::string decode_url(std::string const& url_path) {
+ // Decode html escapsed ampersands, returns an empty string if there's an error.
+ std::string decode_ampersands(std::string const& url_path) {
+ std::string::size_type pos = 0, next;
+ std::string result;
+ result.reserve(url_path.length());
+
+ while((next = url_path.find('&', pos)) != std::string::npos) {
+ result.append(url_path, pos, next - pos);
+ pos = next;
+ if(url_path.substr(pos, 5) == "&") {
+ result += '&'; pos += 5;
+ }
+ else {
+ result += '&'; pos += 1;
+ }
+ break;
+ }
+
+ result.append(url_path, pos, url_path.length());
+
+ return result;
+ }
+
+ // Decode percent encoded characters, returns an empty string if there's an error.
+ std::string decode_percents(std::string const& url_path) {
     std::string::size_type pos = 0, next;
     std::string result;
     result.reserve(url_path.length());

- while((next = url_path.find_first_of("&%", pos)) != std::string::npos) {
+ while((next = url_path.find('%', pos)) != std::string::npos) {
       result.append(url_path, pos, next - pos);
       pos = next;
       switch(url_path[pos]) {
@@ -47,15 +71,6 @@
           pos = next + 3;
           break;
         }
- case '&': {
- if(url_path.substr(pos, 5) == "&") {
- result += '&'; pos += 5;
- }
- else {
- result += '&'; pos += 1;
- }
- break;
- }
       }
     }

@@ -64,6 +79,10 @@
     return result;
   }

+ bool is_css(const path & p) {
+ return p.extension() == ".css";
+ }
+
} // unnamed namespace

namespace boost
@@ -77,6 +96,9 @@
      : m_broken_errors(0), m_unlinked_errors(0), m_invalid_errors(0),
        m_bookmark_errors(0)
    {
+ // HTML signatures are already registered by the base class,
+ // 'hypertext_inspector'
+ register_signature(".css");
    }

// inspect (all) -----------------------------------------------------------//
@@ -90,7 +112,7 @@
         m_paths[ relative_to( full_path, fs::initial_path() ) ] |= m_present;
     }

-// inspect ( .htm, .html ) -------------------------------------------------//
+// inspect ( .htm, .html, .shtml, .css ) -----------------------------------//

    void link_check::inspect(
       const string & library_name,
@@ -108,6 +130,9 @@
       boost::match_results< string::const_iterator > what;
       boost::match_flag_type flags = boost::match_default;

+ boost::regex const& url_regex =
+ is_css(full_path) ? css_url_regex : html_url_regex;
+
       while( boost::regex_search( start, end, what, url_regex, flags) )
       {
         // what[0] contains the whole string iterators.
@@ -127,11 +152,27 @@
       const path & source_path, bool no_link_errors )
         // precondition: source_path.is_complete()
     {
+ if(!no_link_errors && url.empty()) {
+ ++m_invalid_errors;
+ error( library_name, source_path, string(name()) + " empty URL." );
+ return;
+ }
+
+ // Decode ampersand encoded characters.
+ string decoded_url = is_css(source_path) ? url : decode_ampersands(url);
+ if(decoded_url.empty()) {
+ if(!no_link_errors) {
+ ++m_invalid_errors;
+ error( library_name, source_path, string(name()) + " invalid URL (invalid ampersand encodings): " + url );
+ }
+ return;
+ }
+
       boost::smatch m;
- if(!boost::regex_match(url, m, url_decompose_regex)) {
+ if(!boost::regex_match(decoded_url, m, url_decompose_regex)) {
         if(!no_link_errors) {
           ++m_invalid_errors;
- error( library_name, source_path, string(name()) + " invalid URL: " + url );
+ error( library_name, source_path, string(name()) + " invalid URL: " + decoded_url );
         }
         return;
       }
@@ -156,7 +197,7 @@
           if(!authority_matched) {
             if(!no_link_errors) {
               ++m_invalid_errors;
- error( library_name, source_path, string(name()) + " no hostname: " + url );
+ error( library_name, source_path, string(name()) + " no hostname: " + decoded_url );
             }
           }

@@ -165,13 +206,19 @@
         else if(scheme == "file") {
           if(!no_link_errors) {
             ++m_invalid_errors;
- error( library_name, source_path, string(name()) + " invalid URL (hardwired file): " + url );
+ error( library_name, source_path, string(name()) + " invalid URL (hardwired file): " + decoded_url );
+ }
+ }
+ else if(scheme == "mailto" || scheme == "ftp" || scheme == "news" || scheme == "javascript") {
+ if ( !no_link_errors && is_css(source_path) ) {
+ ++m_invalid_errors;
+ error( library_name, source_path, string(name()) + " invalid protocol for css: " + decoded_url );
           }
         }
- else if(!(scheme == "mailto" || scheme == "ftp" || scheme == "news" || scheme == "javascript")) {
+ else {
           if(!no_link_errors) {
             ++m_invalid_errors;
- error( library_name, source_path, string(name()) + " unknown protocol: " + url );
+ error( library_name, source_path, string(name()) + " unknown protocol: " + decoded_url );
           }
         }

@@ -182,16 +229,24 @@
       if(authority_matched) {
         if(!no_link_errors) {
           ++m_invalid_errors;
- error( library_name, source_path, string(name()) + " invalid URL (hostname without protocol): " + url );
+ error( library_name, source_path, string(name()) + " invalid URL (hostname without protocol): " + decoded_url );
         }
       }

       // Check the fragment identifier
- if(fragment_matched) {
- if ( !no_link_errors && fragment.find( '#' ) != string::npos )
- {
- ++m_bookmark_errors;
- error( library_name, source_path, string(name()) + " invalid bookmark: " + url );
+ if ( fragment_matched ) {
+ if ( is_css(source_path) ) {
+ if ( !no_link_errors ) {
+ ++m_invalid_errors;
+ error( library_name, source_path, string(name()) + " fragment link in CSS: " + decoded_url );
+ }
+ }
+ else {
+ if ( !no_link_errors && fragment.find( '#' ) != string::npos )
+ {
+ ++m_bookmark_errors;
+ error( library_name, source_path, string(name()) + " invalid bookmark: " + decoded_url );
+ }
         }

         // No more to do if it's just a fragment identifier
@@ -199,26 +254,26 @@
       }

       // Detect characters banned by RFC2396:
- if ( !no_link_errors && url.find_first_of( " <>\"{}|\\^[]'" ) != string::npos )
+ if ( !no_link_errors && decoded_url.find_first_of( " <>\"{}|\\^[]'" ) != string::npos )
       {
         ++m_invalid_errors;
- error( library_name, source_path, string(name()) + " invalid character in URL: " + url );
+ error( library_name, source_path, string(name()) + " invalid character in URL: " + decoded_url );
       }

       // Check that we actually have a path.
       if(url_path.empty()) {
         if(!no_link_errors) {
           ++m_invalid_errors;
- error( library_name, source_path, string(name()) + " invalid URL (empty path in relative url): " + url );
+ error( library_name, source_path, string(name()) + " invalid URL (empty path in relative url): " + decoded_url );
         }
       }

       // Decode percent and ampersand encoded characters.
- string decoded_path = decode_url(url_path);
+ string decoded_path = decode_percents(url_path);
       if(decoded_path.empty()) {
         if(!no_link_errors) {
           ++m_invalid_errors;
- error( library_name, source_path, string(name()) + " invalid URL (invalid character encodings): " + url );
+ error( library_name, source_path, string(name()) + " invalid URL (invalid character encodings): " + decoded_url );
         }
         return;
       }
@@ -234,7 +289,7 @@
       {
         if(!no_link_errors) {
           ++m_invalid_errors;
- error( library_name, source_path, string(name()) + " invalid URL (error resolving path): " + url );
+ error( library_name, source_path, string(name()) + " invalid URL (error resolving path): " + decoded_url );
         }
         return;
       }
@@ -256,7 +311,7 @@
       if ( !no_link_errors && (itr->second & m_present) == 0 )
       {
         ++m_broken_errors;
- error( library_name, source_path, string(name()) + " broken link: " + url );
+ error( library_name, source_path, string(name()) + " broken link: " + decoded_url );
       }
     }

@@ -271,7 +326,8 @@
        if ( (itr->second & m_linked_to) != m_linked_to
          && (itr->second & m_nounlinked_errors) != m_nounlinked_errors
          && (itr->first.rfind( ".html" ) == itr->first.size()-5
- || itr->first.rfind( ".htm" ) == itr->first.size()-4)
+ || itr->first.rfind( ".htm" ) == itr->first.size()-4
+ || itr->first.rfind( ".css" ) == itr->first.size()-4)
          // because they may be redirectors, it is OK if these are unlinked:
          && itr->first.rfind( "index.html" ) == string::npos
          && itr->first.rfind( "index.htm" ) == string::npos )

Modified: branches/release/tools/inspect/link_check.hpp
==============================================================================
--- branches/release/tools/inspect/link_check.hpp (original)
+++ branches/release/tools/inspect/link_check.hpp 2008-11-05 08:47:38 EST (Wed, 05 Nov 2008)
@@ -36,7 +36,7 @@
     public:

       link_check();
- virtual const char * name() const { return "*HTML*"; }
+ virtual const char * name() const { return "*LINK*"; }
       virtual const char * desc() const { return "invalid bookmarks, invalid urls, broken links, unlinked files"; }

       virtual void inspect(

Modified: branches/release/tools/inspect/path_name_check.cpp
==============================================================================
--- branches/release/tools/inspect/path_name_check.cpp (original)
+++ branches/release/tools/inspect/path_name_check.cpp 2008-11-05 08:47:38 EST (Wed, 05 Nov 2008)
@@ -15,6 +15,7 @@
#include <string>
#include <algorithm>
#include <cctype>
+#include <cstring>

using std::string;

Date view	Thread view	Subject view	Author view

Boost-Commit list run by bdawes at acm.org, david.abrahams at rcn.com, gregod at cs.rpi.edu, cpdaniel at pacbell.net, john at johnmaddock.co.uk