Boost logo

Boost-Commit :

Subject: [Boost-commit] svn:boost r50225 - in sandbox/tools/auto_index: build src
From: john_at_[hidden]
Date: 2008-12-09 13:00:55


Author: johnmaddock
Date: 2008-12-09 13:00:54 EST (Tue, 09 Dec 2008)
New Revision: 50225
URL: http://svn.boost.org/trac/boost/changeset/50225

Log:
Split up and organise code a bit better.
Added:
   sandbox/tools/auto_index/src/auto_index.hpp (contents, props changed)
   sandbox/tools/auto_index/src/file_scanning.cpp (contents, props changed)
   sandbox/tools/auto_index/src/index_generator.cpp (contents, props changed)
Text files modified:
   sandbox/tools/auto_index/build/Jamfile.v2 | 8
   sandbox/tools/auto_index/src/auto_index.cpp | 592 ++++-----------------------------------
   2 files changed, 72 insertions(+), 528 deletions(-)

Modified: sandbox/tools/auto_index/build/Jamfile.v2
==============================================================================
--- sandbox/tools/auto_index/build/Jamfile.v2 (original)
+++ sandbox/tools/auto_index/build/Jamfile.v2 2008-12-09 13:00:54 EST (Tue, 09 Dec 2008)
@@ -1,6 +1,12 @@
 import quickbook ;
 
-exe auto_index : ../src/auto_index.cpp ../src/tiny_xml.cpp /boost/regex /boost/filesystem
+exe auto_index :
+ ../src/auto_index.cpp
+ ../src/file_scanning.cpp
+ ../src/index_generator.cpp
+ ../src/tiny_xml.cpp
+ /boost/regex
+ /boost/filesystem
 : <define>BOOST_ALL_NO_LIB=1 <link>static
 : release
 ;

Modified: sandbox/tools/auto_index/src/auto_index.cpp
==============================================================================
--- sandbox/tools/auto_index/src/auto_index.cpp (original)
+++ sandbox/tools/auto_index/src/auto_index.cpp 2008-12-09 13:00:54 EST (Tue, 09 Dec 2008)
@@ -1,15 +1,15 @@
+// Copyright 2008 John Maddock
+//
+// Use, modification and distribution are subject to the
+// Boost Software License, Version 1.0.
+// (See accompanying file LICENSE_1_0.txt
+// or copy at http://www.boost.org/LICENSE_1_0.txt)
 
-#include "tiny_xml.hpp"
-#include <boost/regex.hpp>
-#include <boost/filesystem.hpp>
-#include <fstream>
-#include <cctype>
-#include <map>
-#include <set>
-#include <sstream>
+#include "auto_index.hpp"
 
 int help()
 {
+ std::cout << "Please refer to the documentation for the correct command line syntax" << std::endl;
    return 1;
 }
 
@@ -63,85 +63,9 @@
    }
    return result;
 }
-
-struct index_info
-{
- std::string term;
- boost::regex search_text;
- boost::regex search_id;
- std::string category;
-};
-bool operator < (const index_info& a, const index_info& b)
-{
- return a.term < b.term;
-}
-
-std::multiset<index_info> index_terms;
-std::set<std::pair<std::string, std::string> > found_terms;
-bool no_duplicates = false;
-bool verbose = false;
-
-struct index_entry;
-typedef boost::shared_ptr<index_entry> index_entry_ptr;
-bool operator < (const index_entry_ptr& a, const index_entry_ptr& b);
-typedef std::set<index_entry_ptr> index_entry_set;
-
-std::string make_upper_key(const std::string& s)
-{
- std::string result;
- for(std::string::const_iterator i = s.begin(); i != s.end(); ++i)
- result.append(1, std::toupper(*i));
- return result;
-}
-
-struct index_entry
-{
- std::string key;
- std::string sort_key;
- std::string id;
- std::string category;
- index_entry_set sub_keys;
-
- index_entry(){}
- index_entry(const std::string& k) : key(k) { sort_key = make_upper_key(key); }
- index_entry(const std::string& k, const std::string& i) : key(k), id(i) { sort_key = make_upper_key(key); }
- index_entry(const std::string& k, const std::string& i, const std::string& c) : key(k), id(i), category(c) { sort_key = make_upper_key(key); }
-};
-
-index_entry_set index_entries;
-
-bool operator < (const index_entry_ptr& a, const index_entry_ptr& b)
-{
- return a->sort_key < b->sort_key;
-}
-
-boost::tiny_xml::element_list indexes;
-
-struct id_rewrite_rule
-{
- bool base_on_id;
- boost::regex id;
- std::string new_name;
-
- id_rewrite_rule(const std::string& i, const std::string& n, bool b)
- : base_on_id(b), id(i), new_name(n) {}
-};
-std::list<id_rewrite_rule> id_rewrite_list;
-
-bool internal_indexes = false;
-
-struct node_id
-{
- const std::string* id;
- node_id* prev;
-};
-
-struct title_info
-{
- std::string title;
- title_info* prev;
-};
-
+//
+// Find attribute named "name" in node "node":
+//
 const std::string* find_attr(boost::tiny_xml::element_ptr node, const char* name)
 {
    for(boost::tiny_xml::attribute_list::const_iterator i = node->attributes.begin();
@@ -152,21 +76,30 @@
    }
    return 0;
 }
-
+//
+// Get the ID of the current block scope, basically
+// move up the XML tree until we find a valid ID:
+//
 const std::string* get_current_block_id(node_id const* id)
 {
    while((id->id == 0) && (id->prev))
       id = id->prev;
    return id->id;
 }
-
+//
+// Get the title of the current block scope, basically
+// move up the XML tree until we find a valid title:
+//
 const std::string& get_current_block_title(title_info const* id)
 {
    while((id->title.size() == 0) && (id->prev))
       id = id->prev;
    return id->title;
 }
-
+//
+// Get all the content under this node, with any inline XML
+// stripped out:
+//
 std::string get_consolidated_content(boost::tiny_xml::element_ptr node)
 {
    std::string result(node->content);
@@ -179,7 +112,9 @@
    static const boost::regex e("(^[[:space:]]+)|([[:space:]]+)|([[:space:]]+$)");
    return regex_replace(result, e, "(?2 )", boost::regex_constants::format_all);
 }
-
+//
+// Rewrite a title based on any rewrite rules we may have:
+//
 std::string rewrite_title(const std::string& title, const std::string& id)
 {
    for(std::list<id_rewrite_rule>::const_iterator i = id_rewrite_list.begin(); i != id_rewrite_list.end(); ++i)
@@ -197,9 +132,15 @@
    }
    return title;
 }
-
+//
+// This does most of the work: process the node pointed to, and any children
+// that it may have:
+//
 void process_node(boost::tiny_xml::element_ptr node, node_id* prev, title_info* pt, boost::tiny_xml::element_ptr parent_node = boost::tiny_xml::element_ptr())
 {
+ //
+ // Store the current ID and title as nested scoped objects:
+ //
    node_id id = { 0, prev };
    id.id = find_attr(node, "id");
    title_info title = { "", pt};
@@ -215,21 +156,25 @@
    }
    else if(node->name == "index")
    {
+ // Keep track of all the indexes we see:
       indexes.push_back(node);
       if(parent_node->name == "para")
          parent_node->name = "";
    }
 
    //
- // Search content for items:
+ // Search content for items: we only search if the name of this node is
+ // empty, and the content is not empty, and the content is not whitespace
+ // alone.
    //
    static const boost::regex space_re("[[:space:]]+");
    if((node->name == "") && node->content.size() && !regex_match(node->content, space_re))
    {
+ // Save block ID and title in case we find some hits:
       const std::string* pid = get_current_block_id(&id);
       const std::string& rtitle = get_current_block_title(&title);
       const std::string simple_title = rewrite_title(rtitle, *pid);
-
+ // Scan for each index term:
       for(std::multiset<index_info>::const_iterator i = index_terms.begin();
             i != index_terms.end(); ++i)
       {
@@ -237,24 +182,24 @@
          {
             //
             // We need to check to see if this term has already been indexed
- // in this zone, in order to prevent duplicate entries:
+ // in this zone, in order to prevent duplicate entries, also check
+ // that any constrait placed on the terms ID is satisfied:
             //
             std::pair<std::string, std::string> item_index(*pid, i->term);
             if(((no_duplicates == false) || (0 == found_terms.count(item_index)))
                && (i->search_id.empty() || regex_search(*pid, i->search_id)))
             {
+ // We have something to index!
                found_terms.insert(item_index);
- /*
- std::cout << "<indexterm zone=\"" << *pid << "\">\n <primary>"
- << rtitle << "</primary>\n"
- << " <secondary>" << i->first << "</secondary>\n</indexterm>" << std::endl;
- std::cout << "<indexterm zone=\"" << *pid << "\">\n <primary>"
- << i->first << "</primary>\n"
- << " <secondary>" << rtitle << "</secondary>\n</indexterm>" << std::endl;
- */
 
+ //
+ // First off insert index entry with primary term
+ // consisting of the section title, and secondary term the
+ // actual index term:
+ //
                if(internal_indexes == false)
                {
+ // Insert an <indexterm> into the XML:
                   boost::tiny_xml::element_ptr p(new boost::tiny_xml::element());
                   p->name = "indexterm";
                   boost::tiny_xml::element_ptr prim(new boost::tiny_xml::element());
@@ -271,6 +216,7 @@
                   if(parent_node)
                      parent_node->elements.push_front(p);
                }
+ // Track the entry in our internal index:
                index_entry_ptr item1(new index_entry(simple_title));
                index_entry_ptr item2(new index_entry(i->term, *pid));
                if(index_entries.find(item1) == index_entries.end())
@@ -279,8 +225,15 @@
                }
                (**index_entries.find(item1)).sub_keys.insert(item2);
 
+ //
+ // Now insert another index entry with the index term
+ // as the primary key, and the section title as the
+ // secondary key, this one gets assigned to the
+ // appropriate index category if there is one:
+ //
                if(internal_indexes == false)
                {
+ // Insert <indexterm> into the XML:
                   boost::tiny_xml::element_ptr p2(new boost::tiny_xml::element());
                   p2->name = "indexterm";
                   if(i->category.size())
@@ -301,6 +254,7 @@
                   if(parent_node)
                      parent_node->elements.push_front(p2);
                }
+ // Track the entry in our internal index:
                index_entry_ptr item3(new index_entry(i->term));
                index_entry_ptr item4(new index_entry(rtitle, *pid));
                if(index_entries.find(item3) == index_entries.end())
@@ -333,431 +287,15 @@
    process_node(node, &id, &t);
 }
 
-void load_file(std::string& s, std::istream& is)
-{
- s.erase();
- if(is.bad()) return;
- s.reserve(is.rdbuf()->in_avail());
- char c;
- while(is.get(c))
- {
- if(s.capacity() == s.size())
- s.reserve(s.capacity() * 3);
- s.append(1, c);
- }
-}
-
-void scan_file(const char* file)
-{
- if(verbose)
- std::cout << "Scanning file... " << file << std::endl;
- static const boost::regex class_e(
- // possibly leading whitespace:
- "^[[:space:]]*"
- // possible template declaration:
- "(template[[:space:]]*<[^;:{]+>[[:space:]]*)?"
- // class or struct:
- "(class|struct)[[:space:]]*"
- // leading declspec macros etc:
- "("
- "\\<\\w+\\>"
- "("
- "[[:blank:]]*\\([^)]*\\)"
- ")?"
- "[[:space:]]*"
- ")*"
- // the class name
- "(\\<\\w*\\>)[[:space:]]*"
- // template specialisation parameters
- "(<[^;:{]+>)?[[:space:]]*"
- // terminate in { or :
- "(\\{|:[^;\\{()]*\\{)"
- );
- std::string text;
- std::ifstream is(file);
- load_file(text, is);
- {
- if(verbose)
- std::cout << "Scanning for class names... " << std::endl;
- boost::sregex_token_iterator i(text.begin(), text.end(), class_e, 5), j;
- while(i != j)
- {
- index_info info;
- info.term = i->str();
- info.search_text = "\\<" + i->str() + "\\>";
- info.category = "class_name";
- if(index_terms.count(info) == 0)
- {
- if(verbose)
- std::cout << "Indexing class " << info.term << std::endl;
- index_terms.insert(info);
- }
- ++i;
- }
- }
-
- //
- // Now typedefs:
- //
- {
- if(verbose)
- std::cout << "Scanning for typedef names... " << std::endl;
- static const boost::regex typedef_exp(
- "typedef[^;{}#]+?(\\w+)\\s*;");
- boost::sregex_token_iterator i(text.begin(), text.end(), typedef_exp, 1), j;
- while(i != j)
- {
- index_info info;
- info.term = i->str();
- info.search_text = "\\<" + i->str() + "\\>";
- info.category = "typedef_name";
- if(index_terms.count(info) == 0)
- {
- if(verbose)
- std::cout << "Indexing typedef " << info.term << std::endl;
- index_terms.insert(info);
- }
- ++i;
- }
- }
-
- //
- // Now macros:
- //
- {
- if(verbose)
- std::cout << "Scanning for macro names... " << std::endl;
- static const boost::regex e(
- "^\\s*#\\s*define\\s+(\\w+)"
- );
- boost::sregex_token_iterator i(text.begin(), text.end(), e, 1), j;
- while(i != j)
- {
- index_info info;
- info.term = i->str();
- info.search_text = "\\<" + i->str() + "\\>";
- info.category = "macro_name";
- if(index_terms.count(info) == 0)
- {
- if(verbose)
- std::cout << "Indexing macro " << info.term << std::endl;
- index_terms.insert(info);
- }
- ++i;
- }
- }
- //
- // Now functions:
- //
- {
- if(verbose)
- std::cout << "Scanning for function names... " << std::endl;
- static const boost::regex e(
- "\\w+\\s+(\\w+)\\s*\\([^\\)]*\\)\\s*\\{"
- );
- boost::sregex_token_iterator i(text.begin(), text.end(), e, 1), j;
- while(i != j)
- {
- index_info info;
- info.term = i->str();
- info.search_text = "\\<" + i->str() + "\\>";
- info.category = "function_name";
- if(index_terms.count(info) == 0)
- {
- if(verbose)
- std::cout << "Indexing function " << info.term << std::endl;
- index_terms.insert(info);
- }
- ++i;
- }
- }
-}
-
-void scan_dir(const std::string& dir, const std::string& mask, bool recurse)
-{
- using namespace boost::filesystem;
- boost::regex e(mask);
- directory_iterator i(dir), j;
-
- while(i != j)
- {
- if(regex_match(i->path().filename(), e))
- {
- scan_file(i->path().directory_string().c_str());
- }
- else if(recurse && is_directory(i->status()))
- {
- scan_dir(i->path().directory_string(), mask, recurse);
- }
- ++i;
- }
-}
-
-std::string unquote(const std::string& s)
-{
- std::string result(s);
- if((s.size() >= 2) && (*s.begin() == '\"') && (*s.rbegin() == '\"'))
- {
- result.erase(result.begin());
- result.erase(--result.end());
- }
- return result;
-}
-
-void process_script(const char* script)
-{
- static const boost::regex scan_parser(
- "!scan[[:space:]]+"
- "([^\"[:space:]]+|\"(?:[^\"\\\\]|\\\\.)+\")"
- );
- static const boost::regex scan_dir_parser(
- "!scan-path[[:space:]]+"
- "([^\"[:space:]]+|\"(?:[^\"\\\\]|\\\\.)+\")"
- "[[:space:]]+"
- "([^\"[:space:]]+|\"(?:[^\"\\\\]|\\\\.)+\")"
- "(?:"
- "[[:space:]]+"
- "([^\"[:space:]]+|\"(?:[^\"\\\\]|\\\\.)+\")"
- ")?"
- );
- static const boost::regex entry_parser(
- "([^\"[:space:]]+|\"(?:[^\"\\\\]|\\\\.)+\")"
- "(?:"
- "[[:space:]]+"
- "([^\"[:space:]]+|\"(?:[^\"\\\\]|\\\\.)*\")"
- "(?:"
- "[[:space:]]+"
- "([^\"[:space:]]+|\"(?:[^\"\\\\]|\\\\.)*\")"
- "(?:"
- "[[:space:]]+"
- "([^\"[:space:]]+|\"(?:[^\"\\\\]|\\\\.)*\")"
- ")?"
- ")?"
- ")?"
- "[[:space:]]*");
- static const boost::regex rewrite_parser(
- "!(rewrite-name|rewrite-id)\\s+"
- "([^\"[:space:]]+|\"(?:[^\"\\\\]|\\\\.)+\")\\s+"
- "([^\"[:space:]]+|\"(?:[^\"\\\\]|\\\\.)+\")"
- );
- if(verbose)
- std::cout << "Processing script " << script << std::endl;
- boost::smatch what;
- std::string line;
- std::ifstream is(script);
- if(is.bad())
- {
- throw std::runtime_error("Could not open script file");
- }
- while(std::getline(is, line).good())
- {
- if(regex_match(line, what, scan_parser))
- {
- std::string f = unquote(what[1].str());
- boost::filesystem::path base(script);
- base.remove_filename();
- base /= f;
- f = base.file_string();
- scan_file(f.c_str());
- }
- else if(regex_match(line, what, scan_dir_parser))
- {
- std::string d = unquote(what[1].str());
- std::string m = unquote(what[2].str());
- bool r = unquote(what[3].str()) == "true";
- boost::filesystem::path base(script);
- base.remove_filename();
- base /= d;
- d = base.directory_string();
- if(verbose)
- std::cout << "Scanning directory " << d << std::endl;
- scan_dir(d, m, r);
- }
- else if(regex_match(line, what, rewrite_parser))
- {
- bool id = what[1] == "rewrite-id";
- std::string a = unquote(what[2].str());
- std::string b = unquote(what[3].str());
- id_rewrite_list.push_back(id_rewrite_rule(a, b, id));
- }
- else if(line.compare(0, 9, "!exclude ") == 0)
- {
- static const boost::regex delim("([^\"[:space:]]+|\"(?:[^\"\\\\]|\\\\.)+\")");
- boost::sregex_token_iterator i(line.begin() + 9, line.end(), delim, 0), j;
- while(i != j)
- {
- index_info info;
- info.term = unquote(*i);
- index_terms.erase(info);
- ++i;
- }
- }
- else if(regex_match(line, what, entry_parser))
- {
- // what[1] is the Index entry
- // what[2] is the regex to search for (optional)
- // what[3] is a section id that must be matched
- // in order for the term to be indexed (optional)
- // what[4] is the index category to place the term in (optional).
- index_info info;
- info.term = unquote(what.str(1));
- std::string s = unquote(what.str(2));
- if(s.size())
- info.search_text = boost::regex(s, boost::regex::icase|boost::regex::perl);
- else
- info.search_text = boost::regex("\\<" + what.str(1) + "\\>", boost::regex::icase|boost::regex::perl);
-
- s = unquote(what.str(3));
- if(s.size())
- info.search_id = s;
- if(what[4].matched)
- info.category = unquote(what.str(4));
- index_terms.insert(info);
- }
- }
-}
-
-std::string get_next_index_id()
-{
- static int index_id_count = 0;
- std::stringstream s;
- s << "idx_id_" << index_id_count;
- ++index_id_count;
- return s.str();
-}
-
-void generate_indexes()
-{
- for(boost::tiny_xml::element_list::const_iterator i = indexes.begin(); i != indexes.end(); ++i)
- {
- boost::tiny_xml::element_ptr node = *i;
- const std::string* category = find_attr(node, "type");
- bool has_title = false;
-
- for(boost::tiny_xml::element_list::const_iterator k = (*i)->elements.begin(); k != (*i)->elements.end(); ++k)
- {
- if((**k).name == "title")
- {
- has_title = true;
- break;
- }
- }
-
- boost::tiny_xml::element_ptr navbar(new boost::tiny_xml::element());
- navbar->name = "para";
- node->elements.push_back(navbar);
-
- char last_c = 0;
- boost::tiny_xml::element_ptr list(new boost::tiny_xml::element());
- list->name = "variablelist";
- boost::tiny_xml::element_ptr listentry;
- boost::tiny_xml::element_ptr listitem;
- boost::tiny_xml::element_ptr sublist;
- node->elements.push_back(list);
-
- for(index_entry_set::const_iterator i = index_entries.begin(); i != index_entries.end(); ++i)
- {
- if((0 == category) || (category->size() == 0) || (category && (**i).category == *category))
- {
- if(std::toupper((**i).key[0]) != last_c)
- {
- std::string id_name = get_next_index_id();
- last_c = std::toupper((**i).key[0]);
- listentry.reset(new boost::tiny_xml::element());
- listentry->name = "varlistentry";
- boost::tiny_xml::attribute id;
- id.name = "id";
- id.value = id_name;
- listentry->attributes.push_back(id);
- boost::tiny_xml::element_ptr term(new boost::tiny_xml::element());
- term->name = "term";
- term->content.assign(&last_c, 1);
- listentry->elements.push_front(term);
- list->elements.push_back(listentry);
- listitem.reset(new boost::tiny_xml::element());
- listitem->name = "listitem";
- sublist.reset(new boost::tiny_xml::element());
- sublist->name = "variablelist";
- listitem->elements.push_back(sublist);
- listentry->elements.push_back(listitem);
-
- boost::tiny_xml::element_ptr nav(new boost::tiny_xml::element());
- nav->name = "";
- nav->content = " ";
- boost::tiny_xml::element_ptr navlink(new boost::tiny_xml::element());
- navlink->name = "link";
- navlink->content = term->content;
- boost::tiny_xml::attribute navid;
- navid.name = "linkend";
- navid.value = id_name;
- navlink->attributes.push_back(navid);
- navbar->elements.push_back(navlink);
- navbar->elements.push_back(nav);
- }
- boost::tiny_xml::element_ptr subentry(new boost::tiny_xml::element());
- subentry->name = "varlistentry";
- boost::tiny_xml::element_ptr subterm(new boost::tiny_xml::element());
- subterm->name = "term";
- if((**i).id.empty())
- subterm->content = (**i).key;
- else
- {
- boost::tiny_xml::element_ptr link(new boost::tiny_xml::element());
- link->name = "link";
- link->content = (**i).key;
- boost::tiny_xml::attribute at;
- at.name = "linkend";
- at.value = (**i).id;
- link->attributes.push_back(at);
- subterm->elements.push_back(link);
- }
- subentry->elements.push_back(subterm);
- boost::tiny_xml::element_ptr subitem(new boost::tiny_xml::element());
- subitem->name = "listitem";
- subentry->elements.push_back(subitem);
- sublist->elements.push_back(subentry);
-
- boost::tiny_xml::element_ptr secondary_list(new boost::tiny_xml::element());
- secondary_list->name = "simplelist";
- subitem->elements.push_back(secondary_list);
-
- for(index_entry_set::const_iterator k = (**i).sub_keys.begin(); k != (**i).sub_keys.end(); ++k)
- {
- boost::tiny_xml::element_ptr member(new boost::tiny_xml::element());
- member->name = "member";
- boost::tiny_xml::element_ptr para(new boost::tiny_xml::element());
- para->name = "para";
- if((**k).id.empty())
- para->content = (**k).key;
- else
- {
- boost::tiny_xml::element_ptr link(new boost::tiny_xml::element());
- link->name = "link";
- boost::tiny_xml::attribute at;
- at.name = "linkend";
- at.value = (**k).id;
- link->attributes.push_back(at);
- link->content = (**k).key;
- para->elements.push_back(link);
- }
- member->elements.push_back(para);
- secondary_list->elements.push_back(member);
- }
- }
- }
- node->name = "section";
- node->attributes.clear();
- if(!has_title)
- {
- boost::tiny_xml::element_ptr t(new boost::tiny_xml::element());
- t->name = "title";
- t->content = "Index";
- node->elements.push_front(t);
- }
- }
-}
-
 std::string infile, outfile;
+std::multiset<index_info> index_terms;
+std::set<std::pair<std::string, std::string> > found_terms;
+bool no_duplicates = false;
+bool verbose = false;
+index_entry_set index_entries;
+boost::tiny_xml::element_list indexes;
+std::list<id_rewrite_rule> id_rewrite_list;
+bool internal_indexes = false;
 
 int main(int argc, char* argv[])
 {

Added: sandbox/tools/auto_index/src/auto_index.hpp
==============================================================================
--- (empty file)
+++ sandbox/tools/auto_index/src/auto_index.hpp 2008-12-09 13:00:54 EST (Tue, 09 Dec 2008)
@@ -0,0 +1,103 @@
+// Copyright 2008 John Maddock
+//
+// Use, modification and distribution are subject to the
+// Boost Software License, Version 1.0.
+// (See accompanying file LICENSE_1_0.txt
+// or copy at http://www.boost.org/LICENSE_1_0.txt)
+
+#ifndef BOOST_AUTO_INDEX_HPP
+#define BOOST_AUTO_INDEX_HPP
+
+#include "tiny_xml.hpp"
+#include <boost/regex.hpp>
+#include <boost/filesystem.hpp>
+#include <fstream>
+#include <cctype>
+#include <map>
+#include <set>
+#include <sstream>
+
+struct index_info
+{
+ std::string term;
+ boost::regex search_text;
+ boost::regex search_id;
+ std::string category;
+};
+inline bool operator < (const index_info& a, const index_info& b)
+{
+ return a.term < b.term;
+}
+
+
+struct index_entry;
+typedef boost::shared_ptr<index_entry> index_entry_ptr;
+bool operator < (const index_entry_ptr& a, const index_entry_ptr& b);
+typedef std::set<index_entry_ptr> index_entry_set;
+
+inline std::string make_upper_key(const std::string& s)
+{
+ std::string result;
+ for(std::string::const_iterator i = s.begin(); i != s.end(); ++i)
+ result.append(1, std::toupper(*i));
+ return result;
+}
+
+struct index_entry
+{
+ std::string key;
+ std::string sort_key;
+ std::string id;
+ std::string category;
+ index_entry_set sub_keys;
+
+ index_entry(){}
+ index_entry(const std::string& k) : key(k) { sort_key = make_upper_key(key); }
+ index_entry(const std::string& k, const std::string& i) : key(k), id(i) { sort_key = make_upper_key(key); }
+ index_entry(const std::string& k, const std::string& i, const std::string& c) : key(k), id(i), category(c) { sort_key = make_upper_key(key); }
+};
+
+
+inline bool operator < (const index_entry_ptr& a, const index_entry_ptr& b)
+{
+ return a->sort_key < b->sort_key;
+}
+
+struct id_rewrite_rule
+{
+ bool base_on_id;
+ boost::regex id;
+ std::string new_name;
+
+ id_rewrite_rule(const std::string& i, const std::string& n, bool b)
+ : base_on_id(b), id(i), new_name(n) {}
+};
+
+struct node_id
+{
+ const std::string* id;
+ node_id* prev;
+};
+
+struct title_info
+{
+ std::string title;
+ title_info* prev;
+};
+
+void process_script(const char* script);
+void scan_dir(const std::string& dir, const std::string& mask, bool recurse);
+void scan_file(const char* file);
+void generate_indexes();
+const std::string* find_attr(boost::tiny_xml::element_ptr node, const char* name);
+
+extern std::multiset<index_info> index_terms;
+extern std::set<std::pair<std::string, std::string> > found_terms;
+extern bool no_duplicates;
+extern bool verbose;
+extern index_entry_set index_entries;
+extern boost::tiny_xml::element_list indexes;
+extern std::list<id_rewrite_rule> id_rewrite_list;
+extern bool internal_indexes;
+
+#endif

Added: sandbox/tools/auto_index/src/file_scanning.cpp
==============================================================================
--- (empty file)
+++ sandbox/tools/auto_index/src/file_scanning.cpp 2008-12-09 13:00:54 EST (Tue, 09 Dec 2008)
@@ -0,0 +1,304 @@
+// Copyright 2008 John Maddock
+//
+// Use, modification and distribution are subject to the
+// Boost Software License, Version 1.0.
+// (See accompanying file LICENSE_1_0.txt
+// or copy at http://www.boost.org/LICENSE_1_0.txt)
+
+#include "auto_index.hpp"
+
+//
+// Helper to dump file contents into a std::string:
+//
+void load_file(std::string& s, std::istream& is)
+{
+ s.erase();
+ if(is.bad()) return;
+ s.reserve(is.rdbuf()->in_avail());
+ char c;
+ while(is.get(c))
+ {
+ if(s.capacity() == s.size())
+ s.reserve(s.capacity() * 3);
+ s.append(1, c);
+ }
+}
+//
+// Scan a source file for things to index:
+//
+void scan_file(const char* file)
+{
+ if(verbose)
+ std::cout << "Scanning file... " << file << std::endl;
+ static const boost::regex class_e(
+ // possibly leading whitespace:
+ "^[[:space:]]*"
+ // possible template declaration:
+ "(template[[:space:]]*<[^;:{]+>[[:space:]]*)?"
+ // class or struct:
+ "(class|struct)[[:space:]]*"
+ // leading declspec macros etc:
+ "("
+ "\\<\\w+\\>"
+ "("
+ "[[:blank:]]*\\([^)]*\\)"
+ ")?"
+ "[[:space:]]*"
+ ")*"
+ // the class name
+ "(\\<\\w*\\>)[[:space:]]*"
+ // template specialisation parameters
+ "(<[^;:{]+>)?[[:space:]]*"
+ // terminate in { or :
+ "(\\{|:[^;\\{()]*\\{)"
+ );
+ std::string text;
+ std::ifstream is(file);
+ load_file(text, is);
+ {
+ if(verbose)
+ std::cout << "Scanning for class names... " << std::endl;
+ boost::sregex_token_iterator i(text.begin(), text.end(), class_e, 5), j;
+ while(i != j)
+ {
+ index_info info;
+ info.term = i->str();
+ info.search_text = "\\<" + i->str() + "\\>";
+ info.category = "class_name";
+ if(index_terms.count(info) == 0)
+ {
+ if(verbose)
+ std::cout << "Indexing class " << info.term << std::endl;
+ index_terms.insert(info);
+ }
+ ++i;
+ }
+ }
+
+ //
+ // Now typedefs:
+ //
+ {
+ if(verbose)
+ std::cout << "Scanning for typedef names... " << std::endl;
+ static const boost::regex typedef_exp(
+ "typedef[^;{}#]+?(\\w+)\\s*;");
+ boost::sregex_token_iterator i(text.begin(), text.end(), typedef_exp, 1), j;
+ while(i != j)
+ {
+ index_info info;
+ info.term = i->str();
+ info.search_text = "\\<" + i->str() + "\\>";
+ info.category = "typedef_name";
+ if(index_terms.count(info) == 0)
+ {
+ if(verbose)
+ std::cout << "Indexing typedef " << info.term << std::endl;
+ index_terms.insert(info);
+ }
+ ++i;
+ }
+ }
+
+ //
+ // Now macros:
+ //
+ {
+ if(verbose)
+ std::cout << "Scanning for macro names... " << std::endl;
+ static const boost::regex e(
+ "^\\s*#\\s*define\\s+(\\w+)"
+ );
+ boost::sregex_token_iterator i(text.begin(), text.end(), e, 1), j;
+ while(i != j)
+ {
+ index_info info;
+ info.term = i->str();
+ info.search_text = "\\<" + i->str() + "\\>";
+ info.category = "macro_name";
+ if(index_terms.count(info) == 0)
+ {
+ if(verbose)
+ std::cout << "Indexing macro " << info.term << std::endl;
+ index_terms.insert(info);
+ }
+ ++i;
+ }
+ }
+ //
+ // Now functions:
+ //
+ {
+ if(verbose)
+ std::cout << "Scanning for function names... " << std::endl;
+ static const boost::regex e(
+ "\\w+\\s+(\\w+)\\s*\\([^\\)]*\\)\\s*\\{"
+ );
+ boost::sregex_token_iterator i(text.begin(), text.end(), e, 1), j;
+ while(i != j)
+ {
+ index_info info;
+ info.term = i->str();
+ info.search_text = "\\<" + i->str() + "\\>";
+ info.category = "function_name";
+ if(index_terms.count(info) == 0)
+ {
+ if(verbose)
+ std::cout << "Indexing function " << info.term << std::endl;
+ index_terms.insert(info);
+ }
+ ++i;
+ }
+ }
+}
+//
+// Scan a whole directory for files to search:
+//
+void scan_dir(const std::string& dir, const std::string& mask, bool recurse)
+{
+ using namespace boost::filesystem;
+ boost::regex e(mask);
+ directory_iterator i(dir), j;
+
+ while(i != j)
+ {
+ if(regex_match(i->path().filename(), e))
+ {
+ scan_file(i->path().directory_string().c_str());
+ }
+ else if(recurse && is_directory(i->status()))
+ {
+ scan_dir(i->path().directory_string(), mask, recurse);
+ }
+ ++i;
+ }
+}
+//
+// Remove quotes from a string:
+//
+std::string unquote(const std::string& s)
+{
+ std::string result(s);
+ if((s.size() >= 2) && (*s.begin() == '\"') && (*s.rbegin() == '\"'))
+ {
+ result.erase(result.begin());
+ result.erase(--result.end());
+ }
+ return result;
+}
+//
+// Load and process a script file:
+//
+void process_script(const char* script)
+{
+ static const boost::regex scan_parser(
+ "!scan[[:space:]]+"
+ "([^\"[:space:]]+|\"(?:[^\"\\\\]|\\\\.)+\")"
+ );
+ static const boost::regex scan_dir_parser(
+ "!scan-path[[:space:]]+"
+ "([^\"[:space:]]+|\"(?:[^\"\\\\]|\\\\.)+\")"
+ "[[:space:]]+"
+ "([^\"[:space:]]+|\"(?:[^\"\\\\]|\\\\.)+\")"
+ "(?:"
+ "[[:space:]]+"
+ "([^\"[:space:]]+|\"(?:[^\"\\\\]|\\\\.)+\")"
+ ")?"
+ );
+ static const boost::regex entry_parser(
+ "([^\"[:space:]]+|\"(?:[^\"\\\\]|\\\\.)+\")"
+ "(?:"
+ "[[:space:]]+"
+ "([^\"[:space:]]+|\"(?:[^\"\\\\]|\\\\.)*\")"
+ "(?:"
+ "[[:space:]]+"
+ "([^\"[:space:]]+|\"(?:[^\"\\\\]|\\\\.)*\")"
+ "(?:"
+ "[[:space:]]+"
+ "([^\"[:space:]]+|\"(?:[^\"\\\\]|\\\\.)*\")"
+ ")?"
+ ")?"
+ ")?"
+ "[[:space:]]*");
+ static const boost::regex rewrite_parser(
+ "!(rewrite-name|rewrite-id)\\s+"
+ "([^\"[:space:]]+|\"(?:[^\"\\\\]|\\\\.)+\")\\s+"
+ "([^\"[:space:]]+|\"(?:[^\"\\\\]|\\\\.)+\")"
+ );
+ if(verbose)
+ std::cout << "Processing script " << script << std::endl;
+ boost::smatch what;
+ std::string line;
+ std::ifstream is(script);
+ if(is.bad())
+ {
+ throw std::runtime_error("Could not open script file");
+ }
+ while(std::getline(is, line).good())
+ {
+ if(regex_match(line, what, scan_parser))
+ {
+ std::string f = unquote(what[1].str());
+ boost::filesystem::path base(script);
+ base.remove_filename();
+ base /= f;
+ f = base.file_string();
+ scan_file(f.c_str());
+ }
+ else if(regex_match(line, what, scan_dir_parser))
+ {
+ std::string d = unquote(what[1].str());
+ std::string m = unquote(what[2].str());
+ bool r = unquote(what[3].str()) == "true";
+ boost::filesystem::path base(script);
+ base.remove_filename();
+ base /= d;
+ d = base.directory_string();
+ if(verbose)
+ std::cout << "Scanning directory " << d << std::endl;
+ scan_dir(d, m, r);
+ }
+ else if(regex_match(line, what, rewrite_parser))
+ {
+ bool id = what[1] == "rewrite-id";
+ std::string a = unquote(what[2].str());
+ std::string b = unquote(what[3].str());
+ id_rewrite_list.push_back(id_rewrite_rule(a, b, id));
+ }
+ else if(line.compare(0, 9, "!exclude ") == 0)
+ {
+ static const boost::regex delim("([^\"[:space:]]+|\"(?:[^\"\\\\]|\\\\.)+\")");
+ boost::sregex_token_iterator i(line.begin() + 9, line.end(), delim, 0), j;
+ while(i != j)
+ {
+ index_info info;
+ info.term = unquote(*i);
+ index_terms.erase(info);
+ ++i;
+ }
+ }
+ else if(regex_match(line, what, entry_parser))
+ {
+ // what[1] is the Index entry
+ // what[2] is the regex to search for (optional)
+ // what[3] is a section id that must be matched
+ // in order for the term to be indexed (optional)
+ // what[4] is the index category to place the term in (optional).
+ index_info info;
+ info.term = unquote(what.str(1));
+ std::string s = unquote(what.str(2));
+ if(s.size())
+ info.search_text = boost::regex(s, boost::regex::icase|boost::regex::perl);
+ else
+ info.search_text = boost::regex("\\<" + what.str(1) + "\\>", boost::regex::icase|boost::regex::perl);
+
+ s = unquote(what.str(3));
+ if(s.size())
+ info.search_id = s;
+ if(what[4].matched)
+ info.category = unquote(what.str(4));
+ index_terms.insert(info);
+ }
+ }
+}
+

Added: sandbox/tools/auto_index/src/index_generator.cpp
==============================================================================
--- (empty file)
+++ sandbox/tools/auto_index/src/index_generator.cpp 2008-12-09 13:00:54 EST (Tue, 09 Dec 2008)
@@ -0,0 +1,154 @@
+// Copyright 2008 John Maddock
+//
+// Use, modification and distribution are subject to the
+// Boost Software License, Version 1.0.
+// (See accompanying file LICENSE_1_0.txt
+// or copy at http://www.boost.org/LICENSE_1_0.txt)
+
+#include "auto_index.hpp"
+
+//
+// Get a numerical ID for the next item:
+//
+std::string get_next_index_id()
+{
+ static int index_id_count = 0;
+ std::stringstream s;
+ s << "idx_id_" << index_id_count;
+ ++index_id_count;
+ return s.str();
+}
+//
+// Generate indexes using our own internal method:
+//
+void generate_indexes()
+{
+ for(boost::tiny_xml::element_list::const_iterator i = indexes.begin(); i != indexes.end(); ++i)
+ {
+ boost::tiny_xml::element_ptr node = *i;
+ const std::string* category = find_attr(node, "type");
+ bool has_title = false;
+
+ for(boost::tiny_xml::element_list::const_iterator k = (*i)->elements.begin(); k != (*i)->elements.end(); ++k)
+ {
+ if((**k).name == "title")
+ {
+ has_title = true;
+ break;
+ }
+ }
+
+ boost::tiny_xml::element_ptr navbar(new boost::tiny_xml::element());
+ navbar->name = "para";
+ node->elements.push_back(navbar);
+
+ char last_c = 0;
+ boost::tiny_xml::element_ptr list(new boost::tiny_xml::element());
+ list->name = "variablelist";
+ boost::tiny_xml::element_ptr listentry;
+ boost::tiny_xml::element_ptr listitem;
+ boost::tiny_xml::element_ptr sublist;
+ node->elements.push_back(list);
+
+ for(index_entry_set::const_iterator i = index_entries.begin(); i != index_entries.end(); ++i)
+ {
+ if((0 == category) || (category->size() == 0) || (category && (**i).category == *category))
+ {
+ if(std::toupper((**i).key[0]) != last_c)
+ {
+ std::string id_name = get_next_index_id();
+ last_c = std::toupper((**i).key[0]);
+ listentry.reset(new boost::tiny_xml::element());
+ listentry->name = "varlistentry";
+ boost::tiny_xml::attribute id;
+ id.name = "id";
+ id.value = id_name;
+ listentry->attributes.push_back(id);
+ boost::tiny_xml::element_ptr term(new boost::tiny_xml::element());
+ term->name = "term";
+ term->content.assign(&last_c, 1);
+ listentry->elements.push_front(term);
+ list->elements.push_back(listentry);
+ listitem.reset(new boost::tiny_xml::element());
+ listitem->name = "listitem";
+ sublist.reset(new boost::tiny_xml::element());
+ sublist->name = "variablelist";
+ listitem->elements.push_back(sublist);
+ listentry->elements.push_back(listitem);
+
+ boost::tiny_xml::element_ptr nav(new boost::tiny_xml::element());
+ nav->name = "";
+ nav->content = " ";
+ boost::tiny_xml::element_ptr navlink(new boost::tiny_xml::element());
+ navlink->name = "link";
+ navlink->content = term->content;
+ boost::tiny_xml::attribute navid;
+ navid.name = "linkend";
+ navid.value = id_name;
+ navlink->attributes.push_back(navid);
+ navbar->elements.push_back(navlink);
+ navbar->elements.push_back(nav);
+ }
+ boost::tiny_xml::element_ptr subentry(new boost::tiny_xml::element());
+ subentry->name = "varlistentry";
+ boost::tiny_xml::element_ptr subterm(new boost::tiny_xml::element());
+ subterm->name = "term";
+ if((**i).id.empty())
+ subterm->content = (**i).key;
+ else
+ {
+ boost::tiny_xml::element_ptr link(new boost::tiny_xml::element());
+ link->name = "link";
+ link->content = (**i).key;
+ boost::tiny_xml::attribute at;
+ at.name = "linkend";
+ at.value = (**i).id;
+ link->attributes.push_back(at);
+ subterm->elements.push_back(link);
+ }
+ subentry->elements.push_back(subterm);
+ boost::tiny_xml::element_ptr subitem(new boost::tiny_xml::element());
+ subitem->name = "listitem";
+ subentry->elements.push_back(subitem);
+ sublist->elements.push_back(subentry);
+
+ boost::tiny_xml::element_ptr secondary_list(new boost::tiny_xml::element());
+ secondary_list->name = "simplelist";
+ subitem->elements.push_back(secondary_list);
+
+ for(index_entry_set::const_iterator k = (**i).sub_keys.begin(); k != (**i).sub_keys.end(); ++k)
+ {
+ boost::tiny_xml::element_ptr member(new boost::tiny_xml::element());
+ member->name = "member";
+ boost::tiny_xml::element_ptr para(new boost::tiny_xml::element());
+ para->name = "para";
+ if((**k).id.empty())
+ para->content = (**k).key;
+ else
+ {
+ boost::tiny_xml::element_ptr link(new boost::tiny_xml::element());
+ link->name = "link";
+ boost::tiny_xml::attribute at;
+ at.name = "linkend";
+ at.value = (**k).id;
+ link->attributes.push_back(at);
+ link->content = (**k).key;
+ para->elements.push_back(link);
+ }
+ member->elements.push_back(para);
+ secondary_list->elements.push_back(member);
+ }
+ }
+ }
+ node->name = "section";
+ node->attributes.clear();
+ if(!has_title)
+ {
+ boost::tiny_xml::element_ptr t(new boost::tiny_xml::element());
+ t->name = "title";
+ t->content = "Index";
+ node->elements.push_front(t);
+ }
+ }
+}
+


Boost-Commit list run by bdawes at acm.org, david.abrahams at rcn.com, gregod at cs.rpi.edu, cpdaniel at pacbell.net, john at johnmaddock.co.uk