Boost logo

Boost-Commit :

Subject: [Boost-commit] svn:boost r59099 - in trunk/tools/inspect: . build/msvc
From: bdawes_at_[hidden]
Date: 2010-01-17 14:48:08


Author: bemandawes
Date: 2010-01-17 14:48:08 EST (Sun, 17 Jan 2010)
New Revision: 59099
URL: http://svn.boost.org/trac/boost/changeset/59099

Log:
Add checks for broken or duplicate bookmarks
Added:
   trunk/tools/inspect/link_check_test.html (contents, props changed)
Text files modified:
   trunk/tools/inspect/build/msvc/boost_inspect.vcproj | 28 ++++-----------
   trunk/tools/inspect/build/msvc/readme.txt | 2
   trunk/tools/inspect/link_check.cpp | 67 +++++++++++++++++++++++++++++++++++++++
   trunk/tools/inspect/link_check.hpp | 10 ++++-
   4 files changed, 83 insertions(+), 24 deletions(-)

Modified: trunk/tools/inspect/build/msvc/boost_inspect.vcproj
==============================================================================
--- trunk/tools/inspect/build/msvc/boost_inspect.vcproj (original)
+++ trunk/tools/inspect/build/msvc/boost_inspect.vcproj 2010-01-17 14:48:08 EST (Sun, 17 Jan 2010)
@@ -2,7 +2,7 @@
 <VisualStudioProject
         ProjectType="Visual C++"
         Version="9.00"
- Name="boost_inspect"
+ Name="inspect"
         ProjectGUID="{0EC8AC1C-6D1F-47FC-A06A-9CC3F924BD82}"
         RootNamespace="boost_inspect"
         Keyword="Win32Proj"
@@ -42,7 +42,7 @@
                                 Name="VCCLCompilerTool"
                                 Optimization="0"
                                 AdditionalIncludeDirectories="..\..\..\.."
- PreprocessorDefinitions="BOOST_SYSTEM_NO_LIB;BOOST_FILESYSTEM_NO_LIB;WIN32;_DEBUG;_CONSOLE"
+ PreprocessorDefinitions="WIN32;_DEBUG;_CONSOLE"
                                 MinimalRebuild="true"
                                 ExceptionHandling="2"
                                 BasicRuntimeChecks="3"
@@ -118,7 +118,7 @@
                                 Optimization="2"
                                 EnableIntrinsicFunctions="true"
                                 AdditionalIncludeDirectories="..\..\..\.."
- PreprocessorDefinitions="BOOST_SYSTEM_NO_LIB;BOOST_FILESYSTEM_NO_LIB;WIN32;NDEBUG;_CONSOLE"
+ PreprocessorDefinitions="WIN32;NDEBUG;_CONSOLE"
                                 ExceptionHandling="2"
                                 RuntimeLibrary="2"
                                 EnableFunctionLevelLinking="true"
@@ -177,6 +177,10 @@
                         UniqueIdentifier="{4FC737F1-C7A5-4376-A066-2A32D752A2FF}"
>
                         <File
+ RelativePath="..\..\apple_macro_check.cpp"
+ >
+ </File>
+ <File
                                 RelativePath="..\..\ascii_check.cpp"
>
                         </File>
@@ -189,7 +193,7 @@
>
                         </File>
                         <File
- RelativePath="..\..\..\..\libs\system\src\error_code.cpp"
+ RelativePath="..\..\end_check.cpp"
>
                         </File>
                         <File
@@ -209,22 +213,10 @@
>
                         </File>
                         <File
- RelativePath="..\..\..\..\libs\filesystem\src\operations.cpp"
- >
- </File>
- <File
- RelativePath="..\..\..\..\libs\filesystem\src\path.cpp"
- >
- </File>
- <File
                                 RelativePath="..\..\path_name_check.cpp"
>
                         </File>
                         <File
- RelativePath="..\..\..\..\libs\filesystem\src\portability.cpp"
- >
- </File>
- <File
                                 RelativePath="..\..\tab_check.cpp"
>
                         </File>
@@ -232,10 +224,6 @@
                                 RelativePath="..\..\unnamed_namespace_check.cpp"
>
                         </File>
- <File
- RelativePath="..\..\..\..\libs\filesystem\src\utf8_codecvt_facet.cpp"
- >
- </File>
                 </Filter>
                 <Filter
                         Name="Header Files"

Modified: trunk/tools/inspect/build/msvc/readme.txt
==============================================================================
--- trunk/tools/inspect/build/msvc/readme.txt (original)
+++ trunk/tools/inspect/build/msvc/readme.txt 2010-01-17 14:48:08 EST (Sun, 17 Jan 2010)
@@ -1,3 +1,3 @@
 The provided Microsoft VC++ solution assumes the following has been run in the root directory"
 
- bjam --toolset=msvc-9.0express --build-type=complete --with-regex stage
\ No newline at end of file
+ bjam --toolset=msvc-9.0express --build-type=complete --with-filesystem,regex stage
\ No newline at end of file

Modified: trunk/tools/inspect/link_check.cpp
==============================================================================
--- trunk/tools/inspect/link_check.cpp (original)
+++ trunk/tools/inspect/link_check.cpp 2010-01-17 14:48:08 EST (Sun, 17 Jan 2010)
@@ -11,11 +11,17 @@
 #include "boost/filesystem/operations.hpp"
 #include <boost/algorithm/string/case_conv.hpp>
 #include <cstdlib>
+#include <set>
+
+// #include <iostream>
 
 namespace fs = boost::filesystem;
 
 namespace
 {
+ boost::regex html_bookmark_regex(
+ "<([^\\s<>]*)\\s*[^<>]*\\s+(?:NAME|ID)\\s*=\\s*(['\"])(.*?)\\2",
+ boost::regbase::normal | boost::regbase::icase);
   boost::regex html_url_regex(
     "<([^\\s<>]*)\\s*[^<>]*\\s+(?:HREF|SRC)" // HREF or SRC
     "\\s*=\\s*(['\"])(.*?)\\2",
@@ -30,6 +36,10 @@
     "^(([^:/?#]+):)?(//([^/?#]*))?([^?#]*)(\\?([^#]*))?(#(.*))?$",
     boost::regbase::normal);
 
+ typedef std::set<std::string> bookmark_set;
+ bookmark_set bookmarks;
+ bookmark_set bookmarks_lowercase; // duplicate check needs case insensitive
+
   // Decode html escapsed ampersands, returns an empty string if there's an error.
   std::string decode_ampersands(std::string const& url_path) {
     std::string::size_type pos = 0, next;
@@ -95,7 +105,7 @@
 
    link_check::link_check()
      : m_broken_errors(0), m_unlinked_errors(0), m_invalid_errors(0),
- m_bookmark_errors(0)
+ m_bookmark_errors(0), m_duplicate_bookmark_errors(0)
    {
        // HTML signatures are already registered by the base class,
        // 'hypertext_inspector'
@@ -126,6 +136,53 @@
       bool no_link_errors =
           (contents.find( "boostinspect:" "nolink" ) != string::npos);
 
+ // build bookmarks databases
+ bookmarks.clear();
+ bookmarks_lowercase.clear();
+ string::const_iterator a_start( contents.begin() );
+ string::const_iterator a_end( contents.end() );
+ boost::match_results< string::const_iterator > a_what;
+ boost::match_flag_type a_flags = boost::match_default;
+
+ if(!is_css(full_path))
+ {
+ while( boost::regex_search( a_start, a_end, a_what, html_bookmark_regex, a_flags) )
+ {
+ // what[0] contains the whole string iterators.
+ // what[1] contains the tag iterators.
+ // what[3] contains the bookmark iterators.
+
+ string tag( a_what[1].first, a_what[1].second );
+ boost::algorithm::to_lower(tag);
+
+ if ( tag != "meta" )
+ {
+ string bookmark( a_what[3].first, a_what[3].second );
+ bookmarks.insert( bookmark );
+// std::cout << "******************* " << bookmark << '\n';
+
+ // w3.org recommends case-insensitive checking for duplicate bookmarks
+ // since some browsers do a case-insensitive match.
+ string bookmark_lowercase( bookmark );
+ boost::algorithm::to_lower(bookmark_lowercase);
+
+ std::pair<bookmark_set::iterator, bool> result
+ = bookmarks_lowercase.insert( bookmark_lowercase );
+ if (!result.second)
+ {
+ ++m_duplicate_bookmark_errors;
+ error( library_name, full_path, string(name()) +
+ " duplicate bookmark: " + bookmark );
+ }
+ }
+
+ a_start = a_what[0].second; // update search position
+ a_flags |= boost::match_prev_avail; // update flags
+ a_flags |= boost::match_not_bob;
+ }
+ }
+
+ // process urls
       string::const_iterator start( contents.begin() );
       string::const_iterator end( contents.end() );
       boost::match_results< string::const_iterator > what;
@@ -275,6 +332,14 @@
             ++m_bookmark_errors;
             error( library_name, source_path, string(name()) + " invalid bookmark: " + decoded_url );
           }
+ if ( !no_link_errors && url_path.empty()
+ // w3.org recommends case-sensitive broken bookmark checking
+ // since some browsers do a case-sensitive match.
+ && bookmarks.find(fragment) == bookmarks.end() )
+ {
+ ++m_bookmark_errors;
+ error( library_name, source_path, string(name()) + " unknown bookmark: " + decoded_url );
+ }
         }
 
         // No more to do if it's just a fragment identifier

Modified: trunk/tools/inspect/link_check.hpp
==============================================================================
--- trunk/tools/inspect/link_check.hpp (original)
+++ trunk/tools/inspect/link_check.hpp 2010-01-17 14:48:08 EST (Sun, 17 Jan 2010)
@@ -27,6 +27,7 @@
       long m_unlinked_errors;
       long m_invalid_errors;
       long m_bookmark_errors;
+ long m_duplicate_bookmark_errors;
 
       typedef std::map< string, int > m_path_map;
       m_path_map m_paths; // first() is relative initial_path()
@@ -38,7 +39,9 @@
 
       link_check();
       virtual const char * name() const { return "*LINK*"; }
- virtual const char * desc() const { return "invalid bookmarks, invalid urls, broken links, unlinked files"; }
+ virtual const char * desc() const
+ { return "invalid bookmarks, duplicate bookmarks,"
+ " invalid urls, broken links, unlinked files"; }
 
       virtual void inspect(
         const std::string & library_name,
@@ -53,7 +56,10 @@
 
       virtual ~link_check()
         {
- std::cout << " " << m_bookmark_errors << " bookmarks with invalid characters" << line_break();
+ std::cout << " " << m_bookmark_errors
+ << " bookmarks with invalid characters" << line_break();
+ std::cout << " " << m_duplicate_bookmark_errors
+ << " duplicate bookmarks" << line_break();
           std::cout << " " << m_invalid_errors << " invalid urls" << line_break();
           std::cout << " " << m_broken_errors << " broken links" << line_break();
           std::cout << " " << m_unlinked_errors << " unlinked files" << line_break();

Added: trunk/tools/inspect/link_check_test.html
==============================================================================
--- (empty file)
+++ trunk/tools/inspect/link_check_test.html 2010-01-17 14:48:08 EST (Sun, 17 Jan 2010)
@@ -0,0 +1,24 @@
+<html>
+
+<head>
+<meta http-equiv="Content-Language" content="en-us">
+<meta name="GENERATOR" content="Microsoft FrontPage 5.0">
+<meta name="ProgId" content="FrontPage.Editor.Document">
+<meta http-equiv="Content-Type" content="text/html; charset=windows-1252">
+<title>valid link</title>
+</head>
+
+<body>
+
+<p>valid bookmark link</p>
+<p>valid relative link</p>
+<p>broken relative link with bookmark</p>
+<p>broken bookmark link</p>
+<p><a name="link-target">bookmark</a></p>
+<p><a name="second-target">second bookmark</a></p>
+<p><a name="SECOND-TARGET">duplicate second bookmark</a></p>
+<p>&nbsp;</p>
+
+</body>
+
+</html>
\ No newline at end of file


Boost-Commit list run by bdawes at acm.org, david.abrahams at rcn.com, gregod at cs.rpi.edu, cpdaniel at pacbell.net, john at johnmaddock.co.uk