Source for file _class_htmlchecker.php
Documentation is available at _class_htmlchecker.php
* checks HTML against a subset of elements to ensure safety and XHTML validation.
* b2evolution - {@link http://b2evolution.net/}
* Released under GNU GPL License - {@link http://b2evolution.net/about/license.html}
* @copyright (c)2003-2005 by Francois PLANQUE - {@link http://fplanque.net/}
* @author Simon Willison, 23rd Feb 2003, modified by fplanque, sakichan
if( !defined('DB_USER') ) die( 'Please, do not access this page directly.' );
* checks HTML against a subset of elements to ensure safety and XHTML validation.
var $tags; // Array showing allowed attributes for tags
var $tagattrs; // Array showing URI attributes
* {@internal This gets tested in _libs.misc.simpletest.php}}
* @param string Input encoding to use ('ISO-8859-1', 'UTF-8', 'US-ASCII' or '' for auto-detect)
function SafeHtmlChecker( & $allowed_tags, & $allowed_attributes, & $uri_attrs, & $allowed_uri_scheme, $encoding =
'' )
$this->tags =
& $allowed_tags;
$this->tagattrs =
& $allowed_attributes;
$encoding =
strtoupper($encoding); // we might get 'iso-8859-1' for example
$this->encoding =
$encoding;
if( ! in_array( $encoding, array( 'ISO-8859-1', 'UTF-8', 'US-ASCII' ) ) )
{ // passed encoding not supported by xml_parser_create()
$this->xml_parser_encoding =
''; // auto-detect (in PHP4, in PHP5 anyway)
$this->xml_parser_encoding =
$this->encoding;
// set functions to call when a start or end tag is encountered
// set function to call for the actual data
// echo 'default handler: '.$data.'<br />';
function external_entity( $parser, $open_entity_names, $base, $system_id, $public_id)
// echo 'external_entity<br />';
function unparsed_entity( $parser, $entity_name, $base, $system_id, $public_id, $notation_name)
// echo 'unparsed_entity<br />';
if( empty($this->xml_parser_encoding) ||
$this->encoding !=
$this->xml_parser_encoding )
{ // we need to convert encoding:
{ // we can convert encoding to UTF-8
$this->encoding =
'UTF-8';
$xhtml =
mb_convert_encoding( $xhtml, 'UTF-8' );
// Open comments or '<![CDATA[' are dangerous
// Convert isolated & chars
$xhtml =
preg_replace( '#(\s)&(\s)#', '\\1&\\2', $xhtml );
$xhtml_head =
'<?xml version="1.0"';
if( ! empty($this->encoding) )
$xhtml_head .=
' encoding="'.
$this->encoding.
'"';
$xhtml_head .=
'?><!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN" "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd">';
$xhtml =
$xhtml_head.
'<body>'.
$xhtml.
'</body>';
switch( $xml_error_code )
case XML_ERROR_TAG_MISMATCH:
$xml_error_string .=
': <code>'.
$this->stack[count($this->stack)-
1].
'</code>';
$this->html_error( T_('Parser error: ').
$xml_error_string );
* Called when the parser finds an opening tag
function tag_open($parser, $tag, $attrs)
// echo "processing tag: $tag <br />\n";
$this->html_error( T_('Tag <code>body</code> can only be used once!') );
// If previous tag is illegal, no point in running tests
$this->html_error( T_('Illegal tag').
": <code>$tag</code>" );
// Is tag allowed in the current context?
if ($previous ==
'body') {
$this->html_error( sprintf( T_('Tag %s must occur inside another tag'), '<code>'.
$tag.
'</code>' ) );
$this->html_error( sprintf( T_('Tag %s is not allowed within tag %s'), '<code>'.
$tag.
'</code>', '<code>'.
$previous.
'</code>') );
// Are tag attributes valid?
foreach( $attrs as $attr =>
$value )
$this->html_error( sprintf( T_('Tag %s may not have attribute %s'), '<code>'.
$tag.
'</code>', '<code>'.
$attr.
'</code>' ) );
{ // Must this attribute be checked for URIs
// Set previous, used for checking nesting context rules
function cdata($parser, $cdata)
// Simply check that the 'previous' tag allows CDATA
// If previous tag is illegal, no point in running test
if (trim($cdata) !=
'') {
$this->html_error( sprintf( T_('Tag %s may not contain raw character data'), '<code>'.
$previous.
'</code>' ) );
// Move back one up the stack