Boost logo

Boost-Commit :

Subject: [Boost-commit] svn:boost r52664 - trunk/tools/inspect
From: daniel_james_at_[hidden]
Date: 2009-04-29 17:12:32


Author: danieljames
Date: 2009-04-29 17:12:32 EDT (Wed, 29 Apr 2009)
New Revision: 52664
URL: http://svn.boost.org/trac/boost/changeset/52664

Log:
Check for external links.
Text files modified:
   trunk/tools/inspect/link_check.cpp | 32 ++++++++++++++++++++++++--------
   trunk/tools/inspect/link_check.hpp | 3 ++-
   2 files changed, 26 insertions(+), 9 deletions(-)

Modified: trunk/tools/inspect/link_check.cpp
==============================================================================
--- trunk/tools/inspect/link_check.cpp (original)
+++ trunk/tools/inspect/link_check.cpp 2009-04-29 17:12:32 EDT (Wed, 29 Apr 2009)
@@ -9,6 +9,7 @@
 #include "link_check.hpp"
 #include "boost/regex.hpp"
 #include "boost/filesystem/operations.hpp"
+#include <boost/algorithm/string/case_conv.hpp>
 #include <cstdlib>
 
 namespace fs = boost::filesystem;
@@ -16,8 +17,8 @@
 namespace
 {
   boost::regex html_url_regex(
- "<\\s*[^>]*\\s+(?:HREF|SRC)" // HREF or SRC
- "\\s*=\\s*(['\"])(.*?)\\1",
+ "<([^\\s<>]*)\\s*[^<>]*\\s+(?:HREF|SRC)" // HREF or SRC
+ "\\s*=\\s*(['\"])(.*?)\\2",
     boost::regbase::normal | boost::regbase::icase);
   boost::regex css_url_regex(
     "(\\@import\\s*[\"']|url\\s*\\(\\s*[\"']?)([^\"')]*)",
@@ -130,14 +131,21 @@
       boost::match_results< string::const_iterator > what;
       boost::match_flag_type flags = boost::match_default;
 
- if(is_css(full_path))
+ if(!is_css(full_path))
       {
         while( boost::regex_search( start, end, what, html_url_regex, flags) )
         {
           // what[0] contains the whole string iterators.
- // what[2] contains the URL iterators.
- do_url( string( what[2].first, what[2].second ),
- library_name, full_path, no_link_errors );
+ // what[1] contains the element type iterators.
+ // what[3] contains the URL iterators.
+
+ string type( what[1].first, what[1].second );
+ boost::algorithm::to_lower(type);
+
+ // TODO: Complain if 'link' tags use external stylesheets.
+ do_url( string( what[3].first, what[3].second ),
+ library_name, full_path, no_link_errors,
+ type == "a" || type == "link" );
 
           start = what[0].second; // update search position
           flags |= boost::match_prev_avail; // update flags
@@ -150,7 +158,7 @@
         // what[0] contains the whole string iterators.
         // what[2] contains the URL iterators.
         do_url( string( what[2].first, what[2].second ),
- library_name, full_path, no_link_errors );
+ library_name, full_path, no_link_errors, false );
 
         start = what[0].second; // update search position
         flags |= boost::match_prev_avail; // update flags
@@ -161,7 +169,7 @@
 // do_url ------------------------------------------------------------------//
 
     void link_check::do_url( const string & url, const string & library_name,
- const path & source_path, bool no_link_errors )
+ const path & source_path, bool no_link_errors, bool allow_external_links )
         // precondition: source_path.is_complete()
     {
       if(!no_link_errors && url.empty()) {
@@ -200,6 +208,14 @@
         //query(m[7]),
         fragment(m[9]);
 
+ // Check for external links
+ if(!allow_external_links && (authority_matched || scheme_matched)) {
+ if(!no_link_errors) {
+ ++m_invalid_errors;
+ error( library_name, source_path, string(name()) + " invalid external link: " + decoded_url );
+ }
+ }
+
       // Protocol checks
       if(scheme_matched) {
         if(scheme == "http" || scheme == "https") {

Modified: trunk/tools/inspect/link_check.hpp
==============================================================================
--- trunk/tools/inspect/link_check.hpp (original)
+++ trunk/tools/inspect/link_check.hpp 2009-04-29 17:12:32 EDT (Wed, 29 Apr 2009)
@@ -32,7 +32,8 @@
       m_path_map m_paths; // first() is relative initial_path()
 
       void do_url( const string & url, const string & library_name,
- const path & full_source_path, bool no_link_errors );
+ const path & full_source_path, bool no_link_errors,
+ bool allow_external_links );
     public:
 
       link_check();


Boost-Commit list run by bdawes at acm.org, david.abrahams at rcn.com, gregod at cs.rpi.edu, cpdaniel at pacbell.net, john at johnmaddock.co.uk