Boost logo

Boost-Commit :

Subject: [Boost-commit] svn:boost r59892 - website/public_html/beta/common/code
From: daniel_james_at_[hidden]
Date: 2010-02-25 03:42:44


Author: danieljames
Date: 2010-02-25 03:42:42 EST (Thu, 25 Feb 2010)
New Revision: 59892
URL: http://svn.boost.org/trac/boost/changeset/59892

Log:
Extract some html preperation methods.
Text files modified:
   website/public_html/beta/common/code/boost_archive.php | 339 ++++++++++++++++++++-------------------
   1 files changed, 172 insertions(+), 167 deletions(-)

Modified: website/public_html/beta/common/code/boost_archive.php
==============================================================================
--- website/public_html/beta/common/code/boost_archive.php (original)
+++ website/public_html/beta/common/code/boost_archive.php 2010-02-25 03:42:42 EST (Thu, 25 Feb 2010)
@@ -304,173 +304,10 @@
 
     function content($archive)
     {
- $text = prepare_html($archive->content_);
-
- preg_match('@<body[^>]*>@i',$text,$body_begin,PREG_OFFSET_CAPTURE);
- preg_match('@</body>@i',$text,$body_end,PREG_OFFSET_CAPTURE);
- if (!isset($body_begin[0]))
- {
- //~ Attempt to recover some content from illegal HTML that is missing the body tag.
- preg_match('@</head>@i',$text,$body_begin,PREG_OFFSET_CAPTURE);
- }
- if (!isset($body_begin[0]))
- {
- //~ Attempt to recover some content from illegal HTML that is missing the body tag.
- preg_match('@<html[^>]*>@i',$text,$body_begin,PREG_OFFSET_CAPTURE);
- }
- if (!isset($body_begin[0]))
- {
- //~ Attempt to recover some content from illegal HTML that is missing the body tag.
- preg_match('@<(hr|div|img|p|h1|h2|h3|h4)[^>]*>@i',$text,$body_begin,PREG_OFFSET_CAPTURE);
- }
- if (!isset($body_begin[0]))
- {
- return;
- }
- else if (!isset($body_end[0]))
- {
- $text = substr($text,
- $body_begin[0][1]+strlen($body_begin[0][0]));
- }
- else
- {
- $text = substr($text,
- $body_begin[0][1]+strlen($body_begin[0][0]),
- $body_end[0][1]-($body_begin[0][1]+strlen($body_begin[0][0])) );
- }
-
- # nasty code, because (?!fubar) causes an ICE...
- preg_match('@<table[^<>]*>?@i',$text,$table_begin,PREG_OFFSET_CAPTURE);
- preg_match('@</table>@i',$text,$table_end,PREG_OFFSET_CAPTURE);
- if (isset($table_begin[0]) && isset($table_end[0])) {
- $table_contents_start = $table_begin[0][1] + strlen($table_begin[0][0]);
- $table_contents = substr($text, $table_contents_start,
- $table_end[0][1] - $table_contents_start);
- if(strpos($table_contents, 'boost.png') !== FALSE) {
- preg_match('@<td[^<>]*>?([^<]*<(h[12]|p).*?)</td>@is', $table_contents,
- $table_contents_header, PREG_OFFSET_CAPTURE);
- $text = (isset($table_contents_header[1]) ? $table_contents_header[1][0] : '').
- substr($text, $table_end[0][1] + 8);
- }
- }
- #else
- #{
- # $text = substr($text,$h1_begin[0][1]);
- #}
- #if (isset($title[1]))
- #{
- # $text = "<h1>${title[1]}</h1>\n" . $text;
- #}
- $text = preg_replace(
- '@(<a[^>]+>[\s]*)?<img.*boost\.png[^>]*>([\s]*</a>)?@i',
- '',
- $text );
- $text = preg_replace(
- '@<img(.*)align="?right"?[^>]*>@i',
- '<img${1} class="right-inset" />',
- $text );
- $text = preg_replace(
- '@<img(.*)align="?absmiddle"?[^>]*>@i',
- '<img${1} class="inline" />',
- $text );
- /* Remove certain attributes */
- $text = preg_replace(
- '@[\s]+(border|cellpadding|cellspacing|width|height|valign|align|frame|rules|naturalsizeflag|background)=("[^"]*"?|\'[^\']*\'?|[^\s/>]+)@i',
- '',
- $text );
- $text = preg_replace(
- '@<table[\s]+(border)[^\s>]*@i',
- '<table',
- $text );
- $text = preg_replace(
- '@<[/]?(font|hr)[^>]*>@i',
- '',
- $text );
- $text = preg_replace(
- '@<([^\s]+)[\s]+>@i',
- '<${1}>',
- $text );
- $text = _preg_replace_bounds(
- '@<blockquote>[\s]*(<pre>)@i','@(</pre>)[\s]*</blockquote>@i',
- '${1}','${1}',
- $text );
- $text = _preg_replace_bounds(
- '@<blockquote>[\s]*(<p>)@i','@(</p>)[\s]*</blockquote>@i',
- '${1}','${1}',
- $text );
- $text = _preg_replace_bounds(
- '@<blockquote>[\s]*(<table>)@i','@(</table>)[\s]*</blockquote>@i',
- '${1}','${1}',
- $text );
- $text = _preg_replace_bounds(
- '@<blockquote>[\s]*<li>@i','@</li>[\s]*</blockquote>@i',
- '<ul><li>','</li></ul>',
- $text );
- $text = _preg_replace_bounds(
- '@(?:<blockquote>[\s]*)+<h2>@i','@</h2>(?:[\s]*</blockquote>)+@i',
- '<h2>','</h2>',
- $text );
- $text = preg_replace(
- '@(<a name=[^\s>]+[\s]*>)[\s]*(</?[^a])@i',
- '${1}</a>${2}',
- $text );
- $text = preg_replace(
- '@<table>([\s]+<tr>[\s]+<td>.*_arr.*</td>[\s]+<td>.*</td>[\s]+<td>.*</td>[\s]+</tr>[\s]+)</table>@i',
- '<table class="pyste-nav">${1}</table>',
- $text );
- $text = preg_replace(
- '@<table>([\s]+<tr>[\s]+<td)[\s]+class="note_box">@i',
- '<table class="note_box">${1}>',
- $text );
- $text = preg_replace(
- '@<table>([\s]+<tr>[\s]+<td[\s]+class="table_title">)@i',
- '<table class="toc">${1}',
- $text );
- $text = preg_replace(
- '@src=".*theme/u_arr\.gif"@i',
- 'src="/gfx/space.png" class="up_image"',
- $text );
- $text = preg_replace(
- '@src=".*theme/l_arr\.gif"@i',
- 'src="/gfx/space.png" class="prev_image"',
- $text );
- $text = preg_replace(
- '@src=".*theme/r_arr\.gif"@i',
- 'src="/gfx/space.png" class="next_image"',
- $text );
- $text = preg_replace(
- '@src=".*theme/u_arr_disabled\.gif"@i',
- 'src="/gfx/space.png" class="up_image_disabled"',
- $text );
- $text = preg_replace(
- '@src=".*theme/l_arr_disabled\.gif"@i',
- 'src="/gfx/space.png" class="prev_image_disabled"',
- $text );
- $text = preg_replace(
- '@src=".*theme/r_arr_disabled\.gif"@i',
- 'src="/gfx/space.png" class="next_image_disabled"',
- $text );
- $text = preg_replace(
- '@src=".*theme/note\.gif"@i',
- 'src="/gfx/space.png" class="note_image"',
- $text );
- $text = preg_replace(
- '@src=".*theme/alert\.gif"@i',
- 'src="/gfx/space.png" class="caution_image"',
- $text );
- $text = preg_replace(
- '@src=".*theme/bulb\.gif"@i',
- 'src="/gfx/space.png" class="tip_image"',
- $text );
- $text = preg_replace(
- '@<img src=".*theme/(?:bullet|lens)\.gif">@i',
- '',
- $text );
- $text = preg_replace(
- '@(<img src=".*theme/(?:arrow)\.gif")>@i',
- '${1} class="inline">',
- $text );
-
+ $text = extract_html_body($archive->content_);
+ $text = prepare_html($text);
+ $text = remove_html_banner($text);
+ $text = prepare_themed_html($text);
         print $text;
     }
 
@@ -604,6 +441,43 @@
     }
 }
 
+function extract_html_body($text) {
+ preg_match('@<body[^>]*>@i',$text,$body_begin,PREG_OFFSET_CAPTURE);
+ preg_match('@</body>@i',$text,$body_end,PREG_OFFSET_CAPTURE);
+ if (!isset($body_begin[0]))
+ {
+ //~ Attempt to recover some content from illegal HTML that is missing the body tag.
+ preg_match('@</head>@i',$text,$body_begin,PREG_OFFSET_CAPTURE);
+ }
+ if (!isset($body_begin[0]))
+ {
+ //~ Attempt to recover some content from illegal HTML that is missing the body tag.
+ preg_match('@<html[^>]*>@i',$text,$body_begin,PREG_OFFSET_CAPTURE);
+ }
+ if (!isset($body_begin[0]))
+ {
+ //~ Attempt to recover some content from illegal HTML that is missing the body tag.
+ preg_match('@<(hr|div|img|p|h1|h2|h3|h4)[^>]*>@i',$text,$body_begin,PREG_OFFSET_CAPTURE);
+ }
+ if (!isset($body_begin[0]))
+ {
+ return;
+ }
+ else if (!isset($body_end[0]))
+ {
+ $text = substr($text,
+ $body_begin[0][1]+strlen($body_begin[0][0]));
+ }
+ else
+ {
+ $text = substr($text,
+ $body_begin[0][1]+strlen($body_begin[0][0]),
+ $body_end[0][1]-($body_begin[0][1]+strlen($body_begin[0][0])) );
+ }
+
+ return $text;
+}
+
 function prepare_html($text) {
     $text = preg_replace(
         '@href="?http://www.boost.org/?([^"\s]*)"?@i',
@@ -629,6 +503,137 @@
     return $text;
 }
 
+function remove_html_banner($text) {
+ # nasty code, because (?!fubar) causes an ICE...
+ preg_match('@<table[^<>]*>?@i',$text,$table_begin,PREG_OFFSET_CAPTURE);
+ preg_match('@</table>@i',$text,$table_end,PREG_OFFSET_CAPTURE);
+ if (isset($table_begin[0]) && isset($table_end[0])) {
+ $table_contents_start = $table_begin[0][1] + strlen($table_begin[0][0]);
+ $table_contents = substr($text, $table_contents_start,
+ $table_end[0][1] - $table_contents_start);
+ if(strpos($table_contents, 'boost.png') !== FALSE) {
+ preg_match('@<td[^<>]*>?([^<]*<(h[12]|p).*?)</td>@is', $table_contents,
+ $table_contents_header, PREG_OFFSET_CAPTURE);
+ $text = (isset($table_contents_header[1]) ? $table_contents_header[1][0] : '').
+ substr($text, $table_end[0][1] + 8);
+ }
+ }
+ return $text;
+}
+
+function prepare_themed_html($text) {
+ $text = preg_replace(
+ '@(<a[^>]+>[\s]*)?<img.*boost\.png[^>]*>([\s]*</a>)?@i',
+ '',
+ $text );
+ $text = preg_replace(
+ '@<img(.*)align="?right"?[^>]*>@i',
+ '<img${1} class="right-inset" />',
+ $text );
+ $text = preg_replace(
+ '@<img(.*)align="?absmiddle"?[^>]*>@i',
+ '<img${1} class="inline" />',
+ $text );
+ /* Remove certain attributes */
+ $text = preg_replace(
+ '@[\s]+(border|cellpadding|cellspacing|width|height|valign|align|frame|rules|naturalsizeflag|background)=("[^"]*"?|\'[^\']*\'?|[^\s/>]+)@i',
+ '',
+ $text );
+ $text = preg_replace(
+ '@<table[\s]+(border)[^\s>]*@i',
+ '<table',
+ $text );
+ $text = preg_replace(
+ '@<[/]?(font|hr)[^>]*>@i',
+ '',
+ $text );
+ $text = preg_replace(
+ '@<([^\s]+)[\s]+>@i',
+ '<${1}>',
+ $text );
+ $text = _preg_replace_bounds(
+ '@<blockquote>[\s]*(<pre>)@i','@(</pre>)[\s]*</blockquote>@i',
+ '${1}','${1}',
+ $text );
+ $text = _preg_replace_bounds(
+ '@<blockquote>[\s]*(<p>)@i','@(</p>)[\s]*</blockquote>@i',
+ '${1}','${1}',
+ $text );
+ $text = _preg_replace_bounds(
+ '@<blockquote>[\s]*(<table>)@i','@(</table>)[\s]*</blockquote>@i',
+ '${1}','${1}',
+ $text );
+ $text = _preg_replace_bounds(
+ '@<blockquote>[\s]*<li>@i','@</li>[\s]*</blockquote>@i',
+ '<ul><li>','</li></ul>',
+ $text );
+ $text = _preg_replace_bounds(
+ '@(?:<blockquote>[\s]*)+<h2>@i','@</h2>(?:[\s]*</blockquote>)+@i',
+ '<h2>','</h2>',
+ $text );
+ $text = preg_replace(
+ '@(<a name=[^\s>]+[\s]*>)[\s]*(</?[^a])@i',
+ '${1}</a>${2}',
+ $text );
+ $text = preg_replace(
+ '@<table>([\s]+<tr>[\s]+<td>.*_arr.*</td>[\s]+<td>.*</td>[\s]+<td>.*</td>[\s]+</tr>[\s]+)</table>@i',
+ '<table class="pyste-nav">${1}</table>',
+ $text );
+ $text = preg_replace(
+ '@<table>([\s]+<tr>[\s]+<td)[\s]+class="note_box">@i',
+ '<table class="note_box">${1}>',
+ $text );
+ $text = preg_replace(
+ '@<table>([\s]+<tr>[\s]+<td[\s]+class="table_title">)@i',
+ '<table class="toc">${1}',
+ $text );
+ $text = preg_replace(
+ '@src=".*theme/u_arr\.gif"@i',
+ 'src="/gfx/space.png" class="up_image"',
+ $text );
+ $text = preg_replace(
+ '@src=".*theme/l_arr\.gif"@i',
+ 'src="/gfx/space.png" class="prev_image"',
+ $text );
+ $text = preg_replace(
+ '@src=".*theme/r_arr\.gif"@i',
+ 'src="/gfx/space.png" class="next_image"',
+ $text );
+ $text = preg_replace(
+ '@src=".*theme/u_arr_disabled\.gif"@i',
+ 'src="/gfx/space.png" class="up_image_disabled"',
+ $text );
+ $text = preg_replace(
+ '@src=".*theme/l_arr_disabled\.gif"@i',
+ 'src="/gfx/space.png" class="prev_image_disabled"',
+ $text );
+ $text = preg_replace(
+ '@src=".*theme/r_arr_disabled\.gif"@i',
+ 'src="/gfx/space.png" class="next_image_disabled"',
+ $text );
+ $text = preg_replace(
+ '@src=".*theme/note\.gif"@i',
+ 'src="/gfx/space.png" class="note_image"',
+ $text );
+ $text = preg_replace(
+ '@src=".*theme/alert\.gif"@i',
+ 'src="/gfx/space.png" class="caution_image"',
+ $text );
+ $text = preg_replace(
+ '@src=".*theme/bulb\.gif"@i',
+ 'src="/gfx/space.png" class="tip_image"',
+ $text );
+ $text = preg_replace(
+ '@<img src=".*theme/(?:bullet|lens)\.gif">@i',
+ '',
+ $text );
+ $text = preg_replace(
+ '@(<img src=".*theme/(?:arrow)\.gif")>@i',
+ '${1} class="inline">',
+ $text );
+ return $text;
+}
+
 // Return a readable error message for unzip exit state.
 
 function unzip_error($exit_status) {


Boost-Commit list run by bdawes at acm.org, david.abrahams at rcn.com, gregod at cs.rpi.edu, cpdaniel at pacbell.net, john at johnmaddock.co.uk