|
Boost-Commit : |
Subject: [Boost-commit] svn:boost r49598 - branches/release/tools/inspect
From: daniel_james_at_[hidden]
Date: 2008-11-05 08:47:38
Author: danieljames
Date: 2008-11-05 08:47:38 EST (Wed, 05 Nov 2008)
New Revision: 49598
URL: http://svn.boost.org/trac/boost/changeset/49598
Log:
Merge inspect from trunk, up to version 49597
Text files modified:
branches/release/tools/inspect/inspect.cpp | 26 +++++--
branches/release/tools/inspect/link_check.cpp | 128 ++++++++++++++++++++++++++++-----------
branches/release/tools/inspect/link_check.hpp | 2
branches/release/tools/inspect/path_name_check.cpp | 1
4 files changed, 113 insertions(+), 44 deletions(-)
Modified: branches/release/tools/inspect/inspect.cpp
==============================================================================
--- branches/release/tools/inspect/inspect.cpp (original)
+++ branches/release/tools/inspect/inspect.cpp 2008-11-05 08:47:38 EST (Wed, 05 Nov 2008)
@@ -104,13 +104,7 @@
// get info (as a string) if inspect_root is svn working copy --------------//
- string info( const fs::path & inspect_root )
- {
- string rev;
- string repos;
- fs::path entries( inspect_root / ".svn" / "entries" );
- fs::ifstream entries_file( entries );
- if ( entries_file )
+ void extract_info( fs::ifstream & entries_file, string & rev, string & repos )
{
std::getline( entries_file, rev );
std::getline( entries_file, rev );
@@ -118,6 +112,22 @@
std::getline( entries_file, rev ); // revision number as a string
std::getline( entries_file, repos ); // repository as a string
}
+
+ string info( const fs::path & inspect_root )
+ {
+ string rev( "?" );
+ string repos( "unknown" );
+ fs::path entries( inspect_root / ".svn" / "entries" );
+ fs::ifstream entries_file( entries );
+ if ( entries_file )
+ extract_info( entries_file, rev, repos );
+ else
+ {
+ entries = inspect_root / ".." / "svn_info" / ".svn" / "entries";
+ fs::ifstream entries_file( entries );
+ if ( entries_file )
+ extract_info( entries_file, rev, repos );
+ }
return repos + " at revision " + rev;
}
@@ -146,6 +156,8 @@
&& leaf != ".htaccess"
// ignore svn files:
&& leaf != ".svn"
+ // ignore OS X directory info files:
+ && leaf != ".DS_Store"
;
}
Modified: branches/release/tools/inspect/link_check.cpp
==============================================================================
--- branches/release/tools/inspect/link_check.cpp (original)
+++ branches/release/tools/inspect/link_check.cpp 2008-11-05 08:47:38 EST (Wed, 05 Nov 2008)
@@ -15,10 +15,13 @@
namespace
{
- boost::regex url_regex(
+ boost::regex html_url_regex(
"<\\s*[^>]*\\s+(?:HREF|SRC)" // HREF or SRC
"\\s*=\\s*(['\"])(.*?)\\1",
boost::regbase::normal | boost::regbase::icase);
+ boost::regex css_url_regex(
+ "(\\@import\\s*[\"']|url\\s*\\(\\s*[\"']?)([^\"')]*)",
+ boost::regbase::normal | boost::regbase::icase);
// Regular expression for parsing URLS from:
// http://tools.ietf.org/html/rfc3986#appendix-B
@@ -26,15 +29,36 @@
"^(([^:/?#]+):)?(//([^/?#]*))?([^?#]*)(\\?([^#]*))?(#(.*))?$",
boost::regbase::normal);
- // Decode percent encoded characters and html escapsed ampersands,
- // returns an empty string if there's an error.
- // The urls should really be fully HTML decoded at the beginning.
- std::string decode_url(std::string const& url_path) {
+ // Decode html escapsed ampersands, returns an empty string if there's an error.
+ std::string decode_ampersands(std::string const& url_path) {
+ std::string::size_type pos = 0, next;
+ std::string result;
+ result.reserve(url_path.length());
+
+ while((next = url_path.find('&', pos)) != std::string::npos) {
+ result.append(url_path, pos, next - pos);
+ pos = next;
+ if(url_path.substr(pos, 5) == "&") {
+ result += '&'; pos += 5;
+ }
+ else {
+ result += '&'; pos += 1;
+ }
+ break;
+ }
+
+ result.append(url_path, pos, url_path.length());
+
+ return result;
+ }
+
+ // Decode percent encoded characters, returns an empty string if there's an error.
+ std::string decode_percents(std::string const& url_path) {
std::string::size_type pos = 0, next;
std::string result;
result.reserve(url_path.length());
- while((next = url_path.find_first_of("&%", pos)) != std::string::npos) {
+ while((next = url_path.find('%', pos)) != std::string::npos) {
result.append(url_path, pos, next - pos);
pos = next;
switch(url_path[pos]) {
@@ -47,15 +71,6 @@
pos = next + 3;
break;
}
- case '&': {
- if(url_path.substr(pos, 5) == "&") {
- result += '&'; pos += 5;
- }
- else {
- result += '&'; pos += 1;
- }
- break;
- }
}
}
@@ -64,6 +79,10 @@
return result;
}
+ bool is_css(const path & p) {
+ return p.extension() == ".css";
+ }
+
} // unnamed namespace
namespace boost
@@ -77,6 +96,9 @@
: m_broken_errors(0), m_unlinked_errors(0), m_invalid_errors(0),
m_bookmark_errors(0)
{
+ // HTML signatures are already registered by the base class,
+ // 'hypertext_inspector'
+ register_signature(".css");
}
// inspect (all) -----------------------------------------------------------//
@@ -90,7 +112,7 @@
m_paths[ relative_to( full_path, fs::initial_path() ) ] |= m_present;
}
-// inspect ( .htm, .html ) -------------------------------------------------//
+// inspect ( .htm, .html, .shtml, .css ) -----------------------------------//
void link_check::inspect(
const string & library_name,
@@ -108,6 +130,9 @@
boost::match_results< string::const_iterator > what;
boost::match_flag_type flags = boost::match_default;
+ boost::regex const& url_regex =
+ is_css(full_path) ? css_url_regex : html_url_regex;
+
while( boost::regex_search( start, end, what, url_regex, flags) )
{
// what[0] contains the whole string iterators.
@@ -127,11 +152,27 @@
const path & source_path, bool no_link_errors )
// precondition: source_path.is_complete()
{
+ if(!no_link_errors && url.empty()) {
+ ++m_invalid_errors;
+ error( library_name, source_path, string(name()) + " empty URL." );
+ return;
+ }
+
+ // Decode ampersand encoded characters.
+ string decoded_url = is_css(source_path) ? url : decode_ampersands(url);
+ if(decoded_url.empty()) {
+ if(!no_link_errors) {
+ ++m_invalid_errors;
+ error( library_name, source_path, string(name()) + " invalid URL (invalid ampersand encodings): " + url );
+ }
+ return;
+ }
+
boost::smatch m;
- if(!boost::regex_match(url, m, url_decompose_regex)) {
+ if(!boost::regex_match(decoded_url, m, url_decompose_regex)) {
if(!no_link_errors) {
++m_invalid_errors;
- error( library_name, source_path, string(name()) + " invalid URL: " + url );
+ error( library_name, source_path, string(name()) + " invalid URL: " + decoded_url );
}
return;
}
@@ -156,7 +197,7 @@
if(!authority_matched) {
if(!no_link_errors) {
++m_invalid_errors;
- error( library_name, source_path, string(name()) + " no hostname: " + url );
+ error( library_name, source_path, string(name()) + " no hostname: " + decoded_url );
}
}
@@ -165,13 +206,19 @@
else if(scheme == "file") {
if(!no_link_errors) {
++m_invalid_errors;
- error( library_name, source_path, string(name()) + " invalid URL (hardwired file): " + url );
+ error( library_name, source_path, string(name()) + " invalid URL (hardwired file): " + decoded_url );
+ }
+ }
+ else if(scheme == "mailto" || scheme == "ftp" || scheme == "news" || scheme == "javascript") {
+ if ( !no_link_errors && is_css(source_path) ) {
+ ++m_invalid_errors;
+ error( library_name, source_path, string(name()) + " invalid protocol for css: " + decoded_url );
}
}
- else if(!(scheme == "mailto" || scheme == "ftp" || scheme == "news" || scheme == "javascript")) {
+ else {
if(!no_link_errors) {
++m_invalid_errors;
- error( library_name, source_path, string(name()) + " unknown protocol: " + url );
+ error( library_name, source_path, string(name()) + " unknown protocol: " + decoded_url );
}
}
@@ -182,16 +229,24 @@
if(authority_matched) {
if(!no_link_errors) {
++m_invalid_errors;
- error( library_name, source_path, string(name()) + " invalid URL (hostname without protocol): " + url );
+ error( library_name, source_path, string(name()) + " invalid URL (hostname without protocol): " + decoded_url );
}
}
// Check the fragment identifier
- if(fragment_matched) {
- if ( !no_link_errors && fragment.find( '#' ) != string::npos )
- {
- ++m_bookmark_errors;
- error( library_name, source_path, string(name()) + " invalid bookmark: " + url );
+ if ( fragment_matched ) {
+ if ( is_css(source_path) ) {
+ if ( !no_link_errors ) {
+ ++m_invalid_errors;
+ error( library_name, source_path, string(name()) + " fragment link in CSS: " + decoded_url );
+ }
+ }
+ else {
+ if ( !no_link_errors && fragment.find( '#' ) != string::npos )
+ {
+ ++m_bookmark_errors;
+ error( library_name, source_path, string(name()) + " invalid bookmark: " + decoded_url );
+ }
}
// No more to do if it's just a fragment identifier
@@ -199,26 +254,26 @@
}
// Detect characters banned by RFC2396:
- if ( !no_link_errors && url.find_first_of( " <>\"{}|\\^[]'" ) != string::npos )
+ if ( !no_link_errors && decoded_url.find_first_of( " <>\"{}|\\^[]'" ) != string::npos )
{
++m_invalid_errors;
- error( library_name, source_path, string(name()) + " invalid character in URL: " + url );
+ error( library_name, source_path, string(name()) + " invalid character in URL: " + decoded_url );
}
// Check that we actually have a path.
if(url_path.empty()) {
if(!no_link_errors) {
++m_invalid_errors;
- error( library_name, source_path, string(name()) + " invalid URL (empty path in relative url): " + url );
+ error( library_name, source_path, string(name()) + " invalid URL (empty path in relative url): " + decoded_url );
}
}
// Decode percent and ampersand encoded characters.
- string decoded_path = decode_url(url_path);
+ string decoded_path = decode_percents(url_path);
if(decoded_path.empty()) {
if(!no_link_errors) {
++m_invalid_errors;
- error( library_name, source_path, string(name()) + " invalid URL (invalid character encodings): " + url );
+ error( library_name, source_path, string(name()) + " invalid URL (invalid character encodings): " + decoded_url );
}
return;
}
@@ -234,7 +289,7 @@
{
if(!no_link_errors) {
++m_invalid_errors;
- error( library_name, source_path, string(name()) + " invalid URL (error resolving path): " + url );
+ error( library_name, source_path, string(name()) + " invalid URL (error resolving path): " + decoded_url );
}
return;
}
@@ -256,7 +311,7 @@
if ( !no_link_errors && (itr->second & m_present) == 0 )
{
++m_broken_errors;
- error( library_name, source_path, string(name()) + " broken link: " + url );
+ error( library_name, source_path, string(name()) + " broken link: " + decoded_url );
}
}
@@ -271,7 +326,8 @@
if ( (itr->second & m_linked_to) != m_linked_to
&& (itr->second & m_nounlinked_errors) != m_nounlinked_errors
&& (itr->first.rfind( ".html" ) == itr->first.size()-5
- || itr->first.rfind( ".htm" ) == itr->first.size()-4)
+ || itr->first.rfind( ".htm" ) == itr->first.size()-4
+ || itr->first.rfind( ".css" ) == itr->first.size()-4)
// because they may be redirectors, it is OK if these are unlinked:
&& itr->first.rfind( "index.html" ) == string::npos
&& itr->first.rfind( "index.htm" ) == string::npos )
Modified: branches/release/tools/inspect/link_check.hpp
==============================================================================
--- branches/release/tools/inspect/link_check.hpp (original)
+++ branches/release/tools/inspect/link_check.hpp 2008-11-05 08:47:38 EST (Wed, 05 Nov 2008)
@@ -36,7 +36,7 @@
public:
link_check();
- virtual const char * name() const { return "*HTML*"; }
+ virtual const char * name() const { return "*LINK*"; }
virtual const char * desc() const { return "invalid bookmarks, invalid urls, broken links, unlinked files"; }
virtual void inspect(
Modified: branches/release/tools/inspect/path_name_check.cpp
==============================================================================
--- branches/release/tools/inspect/path_name_check.cpp (original)
+++ branches/release/tools/inspect/path_name_check.cpp 2008-11-05 08:47:38 EST (Wed, 05 Nov 2008)
@@ -15,6 +15,7 @@
#include <string>
#include <algorithm>
#include <cctype>
+#include <cstring>
using std::string;
Boost-Commit list run by bdawes at acm.org, david.abrahams at rcn.com, gregod at cs.rpi.edu, cpdaniel at pacbell.net, john at johnmaddock.co.uk