|
Boost-Commit : |
Subject: [Boost-commit] svn:boost r59099 - in trunk/tools/inspect: . build/msvc
From: bdawes_at_[hidden]
Date: 2010-01-17 14:48:08
Author: bemandawes
Date: 2010-01-17 14:48:08 EST (Sun, 17 Jan 2010)
New Revision: 59099
URL: http://svn.boost.org/trac/boost/changeset/59099
Log:
Add checks for broken or duplicate bookmarks
Added:
trunk/tools/inspect/link_check_test.html (contents, props changed)
Text files modified:
trunk/tools/inspect/build/msvc/boost_inspect.vcproj | 28 ++++-----------
trunk/tools/inspect/build/msvc/readme.txt | 2
trunk/tools/inspect/link_check.cpp | 67 +++++++++++++++++++++++++++++++++++++++
trunk/tools/inspect/link_check.hpp | 10 ++++-
4 files changed, 83 insertions(+), 24 deletions(-)
Modified: trunk/tools/inspect/build/msvc/boost_inspect.vcproj
==============================================================================
--- trunk/tools/inspect/build/msvc/boost_inspect.vcproj (original)
+++ trunk/tools/inspect/build/msvc/boost_inspect.vcproj 2010-01-17 14:48:08 EST (Sun, 17 Jan 2010)
@@ -2,7 +2,7 @@
<VisualStudioProject
ProjectType="Visual C++"
Version="9.00"
- Name="boost_inspect"
+ Name="inspect"
ProjectGUID="{0EC8AC1C-6D1F-47FC-A06A-9CC3F924BD82}"
RootNamespace="boost_inspect"
Keyword="Win32Proj"
@@ -42,7 +42,7 @@
Name="VCCLCompilerTool"
Optimization="0"
AdditionalIncludeDirectories="..\..\..\.."
- PreprocessorDefinitions="BOOST_SYSTEM_NO_LIB;BOOST_FILESYSTEM_NO_LIB;WIN32;_DEBUG;_CONSOLE"
+ PreprocessorDefinitions="WIN32;_DEBUG;_CONSOLE"
MinimalRebuild="true"
ExceptionHandling="2"
BasicRuntimeChecks="3"
@@ -118,7 +118,7 @@
Optimization="2"
EnableIntrinsicFunctions="true"
AdditionalIncludeDirectories="..\..\..\.."
- PreprocessorDefinitions="BOOST_SYSTEM_NO_LIB;BOOST_FILESYSTEM_NO_LIB;WIN32;NDEBUG;_CONSOLE"
+ PreprocessorDefinitions="WIN32;NDEBUG;_CONSOLE"
ExceptionHandling="2"
RuntimeLibrary="2"
EnableFunctionLevelLinking="true"
@@ -177,6 +177,10 @@
UniqueIdentifier="{4FC737F1-C7A5-4376-A066-2A32D752A2FF}"
>
<File
+ RelativePath="..\..\apple_macro_check.cpp"
+ >
+ </File>
+ <File
RelativePath="..\..\ascii_check.cpp"
>
</File>
@@ -189,7 +193,7 @@
>
</File>
<File
- RelativePath="..\..\..\..\libs\system\src\error_code.cpp"
+ RelativePath="..\..\end_check.cpp"
>
</File>
<File
@@ -209,22 +213,10 @@
>
</File>
<File
- RelativePath="..\..\..\..\libs\filesystem\src\operations.cpp"
- >
- </File>
- <File
- RelativePath="..\..\..\..\libs\filesystem\src\path.cpp"
- >
- </File>
- <File
RelativePath="..\..\path_name_check.cpp"
>
</File>
<File
- RelativePath="..\..\..\..\libs\filesystem\src\portability.cpp"
- >
- </File>
- <File
RelativePath="..\..\tab_check.cpp"
>
</File>
@@ -232,10 +224,6 @@
RelativePath="..\..\unnamed_namespace_check.cpp"
>
</File>
- <File
- RelativePath="..\..\..\..\libs\filesystem\src\utf8_codecvt_facet.cpp"
- >
- </File>
</Filter>
<Filter
Name="Header Files"
Modified: trunk/tools/inspect/build/msvc/readme.txt
==============================================================================
--- trunk/tools/inspect/build/msvc/readme.txt (original)
+++ trunk/tools/inspect/build/msvc/readme.txt 2010-01-17 14:48:08 EST (Sun, 17 Jan 2010)
@@ -1,3 +1,3 @@
The provided Microsoft VC++ solution assumes the following has been run in the root directory"
- bjam --toolset=msvc-9.0express --build-type=complete --with-regex stage
\ No newline at end of file
+ bjam --toolset=msvc-9.0express --build-type=complete --with-filesystem,regex stage
\ No newline at end of file
Modified: trunk/tools/inspect/link_check.cpp
==============================================================================
--- trunk/tools/inspect/link_check.cpp (original)
+++ trunk/tools/inspect/link_check.cpp 2010-01-17 14:48:08 EST (Sun, 17 Jan 2010)
@@ -11,11 +11,17 @@
#include "boost/filesystem/operations.hpp"
#include <boost/algorithm/string/case_conv.hpp>
#include <cstdlib>
+#include <set>
+
+// #include <iostream>
namespace fs = boost::filesystem;
namespace
{
+ boost::regex html_bookmark_regex(
+ "<([^\\s<>]*)\\s*[^<>]*\\s+(?:NAME|ID)\\s*=\\s*(['\"])(.*?)\\2",
+ boost::regbase::normal | boost::regbase::icase);
boost::regex html_url_regex(
"<([^\\s<>]*)\\s*[^<>]*\\s+(?:HREF|SRC)" // HREF or SRC
"\\s*=\\s*(['\"])(.*?)\\2",
@@ -30,6 +36,10 @@
"^(([^:/?#]+):)?(//([^/?#]*))?([^?#]*)(\\?([^#]*))?(#(.*))?$",
boost::regbase::normal);
+ typedef std::set<std::string> bookmark_set;
+ bookmark_set bookmarks;
+ bookmark_set bookmarks_lowercase; // duplicate check needs case insensitive
+
// Decode html escapsed ampersands, returns an empty string if there's an error.
std::string decode_ampersands(std::string const& url_path) {
std::string::size_type pos = 0, next;
@@ -95,7 +105,7 @@
link_check::link_check()
: m_broken_errors(0), m_unlinked_errors(0), m_invalid_errors(0),
- m_bookmark_errors(0)
+ m_bookmark_errors(0), m_duplicate_bookmark_errors(0)
{
// HTML signatures are already registered by the base class,
// 'hypertext_inspector'
@@ -126,6 +136,53 @@
bool no_link_errors =
(contents.find( "boostinspect:" "nolink" ) != string::npos);
+ // build bookmarks databases
+ bookmarks.clear();
+ bookmarks_lowercase.clear();
+ string::const_iterator a_start( contents.begin() );
+ string::const_iterator a_end( contents.end() );
+ boost::match_results< string::const_iterator > a_what;
+ boost::match_flag_type a_flags = boost::match_default;
+
+ if(!is_css(full_path))
+ {
+ while( boost::regex_search( a_start, a_end, a_what, html_bookmark_regex, a_flags) )
+ {
+ // what[0] contains the whole string iterators.
+ // what[1] contains the tag iterators.
+ // what[3] contains the bookmark iterators.
+
+ string tag( a_what[1].first, a_what[1].second );
+ boost::algorithm::to_lower(tag);
+
+ if ( tag != "meta" )
+ {
+ string bookmark( a_what[3].first, a_what[3].second );
+ bookmarks.insert( bookmark );
+// std::cout << "******************* " << bookmark << '\n';
+
+ // w3.org recommends case-insensitive checking for duplicate bookmarks
+ // since some browsers do a case-insensitive match.
+ string bookmark_lowercase( bookmark );
+ boost::algorithm::to_lower(bookmark_lowercase);
+
+ std::pair<bookmark_set::iterator, bool> result
+ = bookmarks_lowercase.insert( bookmark_lowercase );
+ if (!result.second)
+ {
+ ++m_duplicate_bookmark_errors;
+ error( library_name, full_path, string(name()) +
+ " duplicate bookmark: " + bookmark );
+ }
+ }
+
+ a_start = a_what[0].second; // update search position
+ a_flags |= boost::match_prev_avail; // update flags
+ a_flags |= boost::match_not_bob;
+ }
+ }
+
+ // process urls
string::const_iterator start( contents.begin() );
string::const_iterator end( contents.end() );
boost::match_results< string::const_iterator > what;
@@ -275,6 +332,14 @@
++m_bookmark_errors;
error( library_name, source_path, string(name()) + " invalid bookmark: " + decoded_url );
}
+ if ( !no_link_errors && url_path.empty()
+ // w3.org recommends case-sensitive broken bookmark checking
+ // since some browsers do a case-sensitive match.
+ && bookmarks.find(fragment) == bookmarks.end() )
+ {
+ ++m_bookmark_errors;
+ error( library_name, source_path, string(name()) + " unknown bookmark: " + decoded_url );
+ }
}
// No more to do if it's just a fragment identifier
Modified: trunk/tools/inspect/link_check.hpp
==============================================================================
--- trunk/tools/inspect/link_check.hpp (original)
+++ trunk/tools/inspect/link_check.hpp 2010-01-17 14:48:08 EST (Sun, 17 Jan 2010)
@@ -27,6 +27,7 @@
long m_unlinked_errors;
long m_invalid_errors;
long m_bookmark_errors;
+ long m_duplicate_bookmark_errors;
typedef std::map< string, int > m_path_map;
m_path_map m_paths; // first() is relative initial_path()
@@ -38,7 +39,9 @@
link_check();
virtual const char * name() const { return "*LINK*"; }
- virtual const char * desc() const { return "invalid bookmarks, invalid urls, broken links, unlinked files"; }
+ virtual const char * desc() const
+ { return "invalid bookmarks, duplicate bookmarks,"
+ " invalid urls, broken links, unlinked files"; }
virtual void inspect(
const std::string & library_name,
@@ -53,7 +56,10 @@
virtual ~link_check()
{
- std::cout << " " << m_bookmark_errors << " bookmarks with invalid characters" << line_break();
+ std::cout << " " << m_bookmark_errors
+ << " bookmarks with invalid characters" << line_break();
+ std::cout << " " << m_duplicate_bookmark_errors
+ << " duplicate bookmarks" << line_break();
std::cout << " " << m_invalid_errors << " invalid urls" << line_break();
std::cout << " " << m_broken_errors << " broken links" << line_break();
std::cout << " " << m_unlinked_errors << " unlinked files" << line_break();
Added: trunk/tools/inspect/link_check_test.html
==============================================================================
--- (empty file)
+++ trunk/tools/inspect/link_check_test.html 2010-01-17 14:48:08 EST (Sun, 17 Jan 2010)
@@ -0,0 +1,24 @@
+<html>
+
+<head>
+<meta http-equiv="Content-Language" content="en-us">
+<meta name="GENERATOR" content="Microsoft FrontPage 5.0">
+<meta name="ProgId" content="FrontPage.Editor.Document">
+<meta http-equiv="Content-Type" content="text/html; charset=windows-1252">
+<title>valid link</title>
+</head>
+
+<body>
+
+<p>valid bookmark link</p>
+<p>valid relative link</p>
+<p>broken relative link with bookmark</p>
+<p>broken bookmark link</p>
+<p><a name="link-target">bookmark</a></p>
+<p><a name="second-target">second bookmark</a></p>
+<p><a name="SECOND-TARGET">duplicate second bookmark</a></p>
+<p> </p>
+
+</body>
+
+</html>
\ No newline at end of file
Boost-Commit list run by bdawes at acm.org, david.abrahams at rcn.com, gregod at cs.rpi.edu, cpdaniel at pacbell.net, john at johnmaddock.co.uk