|
Boost-Commit : |
Subject: [Boost-commit] svn:boost r52664 - trunk/tools/inspect
From: daniel_james_at_[hidden]
Date: 2009-04-29 17:12:32
Author: danieljames
Date: 2009-04-29 17:12:32 EDT (Wed, 29 Apr 2009)
New Revision: 52664
URL: http://svn.boost.org/trac/boost/changeset/52664
Log:
Check for external links.
Text files modified:
trunk/tools/inspect/link_check.cpp | 32 ++++++++++++++++++++++++--------
trunk/tools/inspect/link_check.hpp | 3 ++-
2 files changed, 26 insertions(+), 9 deletions(-)
Modified: trunk/tools/inspect/link_check.cpp
==============================================================================
--- trunk/tools/inspect/link_check.cpp (original)
+++ trunk/tools/inspect/link_check.cpp 2009-04-29 17:12:32 EDT (Wed, 29 Apr 2009)
@@ -9,6 +9,7 @@
#include "link_check.hpp"
#include "boost/regex.hpp"
#include "boost/filesystem/operations.hpp"
+#include <boost/algorithm/string/case_conv.hpp>
#include <cstdlib>
namespace fs = boost::filesystem;
@@ -16,8 +17,8 @@
namespace
{
boost::regex html_url_regex(
- "<\\s*[^>]*\\s+(?:HREF|SRC)" // HREF or SRC
- "\\s*=\\s*(['\"])(.*?)\\1",
+ "<([^\\s<>]*)\\s*[^<>]*\\s+(?:HREF|SRC)" // HREF or SRC
+ "\\s*=\\s*(['\"])(.*?)\\2",
boost::regbase::normal | boost::regbase::icase);
boost::regex css_url_regex(
"(\\@import\\s*[\"']|url\\s*\\(\\s*[\"']?)([^\"')]*)",
@@ -130,14 +131,21 @@
boost::match_results< string::const_iterator > what;
boost::match_flag_type flags = boost::match_default;
- if(is_css(full_path))
+ if(!is_css(full_path))
{
while( boost::regex_search( start, end, what, html_url_regex, flags) )
{
// what[0] contains the whole string iterators.
- // what[2] contains the URL iterators.
- do_url( string( what[2].first, what[2].second ),
- library_name, full_path, no_link_errors );
+ // what[1] contains the element type iterators.
+ // what[3] contains the URL iterators.
+
+ string type( what[1].first, what[1].second );
+ boost::algorithm::to_lower(type);
+
+ // TODO: Complain if 'link' tags use external stylesheets.
+ do_url( string( what[3].first, what[3].second ),
+ library_name, full_path, no_link_errors,
+ type == "a" || type == "link" );
start = what[0].second; // update search position
flags |= boost::match_prev_avail; // update flags
@@ -150,7 +158,7 @@
// what[0] contains the whole string iterators.
// what[2] contains the URL iterators.
do_url( string( what[2].first, what[2].second ),
- library_name, full_path, no_link_errors );
+ library_name, full_path, no_link_errors, false );
start = what[0].second; // update search position
flags |= boost::match_prev_avail; // update flags
@@ -161,7 +169,7 @@
// do_url ------------------------------------------------------------------//
void link_check::do_url( const string & url, const string & library_name,
- const path & source_path, bool no_link_errors )
+ const path & source_path, bool no_link_errors, bool allow_external_links )
// precondition: source_path.is_complete()
{
if(!no_link_errors && url.empty()) {
@@ -200,6 +208,14 @@
//query(m[7]),
fragment(m[9]);
+ // Check for external links
+ if(!allow_external_links && (authority_matched || scheme_matched)) {
+ if(!no_link_errors) {
+ ++m_invalid_errors;
+ error( library_name, source_path, string(name()) + " invalid external link: " + decoded_url );
+ }
+ }
+
// Protocol checks
if(scheme_matched) {
if(scheme == "http" || scheme == "https") {
Modified: trunk/tools/inspect/link_check.hpp
==============================================================================
--- trunk/tools/inspect/link_check.hpp (original)
+++ trunk/tools/inspect/link_check.hpp 2009-04-29 17:12:32 EDT (Wed, 29 Apr 2009)
@@ -32,7 +32,8 @@
m_path_map m_paths; // first() is relative initial_path()
void do_url( const string & url, const string & library_name,
- const path & full_source_path, bool no_link_errors );
+ const path & full_source_path, bool no_link_errors,
+ bool allow_external_links );
public:
link_check();
Boost-Commit list run by bdawes at acm.org, david.abrahams at rcn.com, gregod at cs.rpi.edu, cpdaniel at pacbell.net, john at johnmaddock.co.uk