Source for file _xhtml_validator.class.php
Documentation is available at _xhtml_validator.class.php
* This file implements the XHTML_Validator class.
* Checks HTML against a subset of elements to ensure safety and XHTML validation.
* This file is part of the b2evolution/evocms project - {@link http://b2evolution.net/}.
* See also {@link http://sourceforge.net/projects/evocms/}.
* @copyright (c)2003-2008 by Francois PLANQUE - {@link http://fplanque.net/}.
* Parts of this file are copyright (c)2003 by Nobuo SAKIYAMA - {@link http://www.sakichan.org/}
* Parts of this file are copyright (c)2004-2005 by Daniel HAHLER - {@link http://thequod.de/contact}.
* @license http://b2evolution.net/about/license.html GNU General Public License (GPL)
* {@internal Open Source relicensing agreement:
* Daniel HAHLER grants Francois PLANQUE the right to license
* Daniel HAHLER's contributions to this file and the b2evolution project
* under any OSI approved OSS license (http://www.opensource.org/licenses/).
* This file was inspired by Simon Willison's SafeHtmlChecker released in
* the public domain on 23rd Feb 2003.
* {@link http://simon.incutio.com/code/php/SafeHtmlChecker.class.php.txt}
* {@internal Below is a list of authors who have contributed to design/coding of this file: }}
* @author blueyed: Daniel HAHLER.
* @author fplanque: Francois PLANQUE.
* @author sakichan: Nobuo SAKIYAMA.
* @author Simon Willison.
* @version $Id: _xhtml_validator.class.php,v 1.9 2008/01/21 09:35:37 fplanque Exp $
if( !defined('EVO_MAIN_INIT') ) die( 'Please, do not access this page directly.' );
* checks HTML against a subset of elements to ensure safety and XHTML validation.
var $tags; // Array showing allowed attributes for tags
var $tagattrs; // Array showing URI attributes
* {@internal This gets tested in _libs.misc.simpletest.php}}
* @param string Input encoding to use ('ISO-8859-1', 'UTF-8', 'US-ASCII' or '' for auto-detect)
function XHTML_Validator( $context =
'posting', $allow_css_tweaks =
false, $allow_iframes =
false, $allow_javascript =
false, $allow_objects =
false, $encoding =
NULL, $msg_type =
'error' )
require
$inc_path.
'xhtml_validator/_xhtml_dtd.inc.php';
$this->context =
$context;
$this->tags =
& $allowed_tags;
$this->tagattrs =
& $allowed_attributes;
$this->tags =
& $comments_allowed_tags;
$this->tagattrs =
& $comments_allowed_attributes;
// Attributes that need to be checked for a valid URI:
$this->msg_type =
$msg_type;
$encoding =
strtoupper($encoding); // we might get 'iso-8859-1' for example
$this->encoding =
$encoding;
if( ! in_array( $encoding, array( 'ISO-8859-1', 'UTF-8', 'US-ASCII' ) ) )
{ // passed encoding not supported by xml_parser_create()
$this->xml_parser_encoding =
''; // auto-detect (in PHP4, in PHP5 anyway)
$this->xml_parser_encoding =
$this->encoding;
// set functions to call when a start or end tag is encountered
// set function to call for the actual data
// TODO: use convert_encoding()
if( empty($this->xml_parser_encoding) ||
$this->encoding !=
$this->xml_parser_encoding )
{ // we need to convert encoding:
{ // we can convert encoding to UTF-8
$this->encoding =
'UTF-8';
elseif( ($this->encoding ==
'ISO-8859-1' ||
empty($this->encoding)) &&
function_exists('utf8_encode') )
$this->encoding =
'UTF-8';
// Open comments or '<![CDATA[' are dangerous
// Convert isolated & chars
$xhtml =
preg_replace( '#(\s)&(\s)#', '\\1&\\2', $xhtml );
$xhtml_head =
'<?xml version="1.0"';
if( ! empty($this->encoding) )
$xhtml_head .=
' encoding="'.
$this->encoding.
'"';
$xhtml_head .=
'?><!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN" "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd"';
// Include latin1 entities (http://www.w3.org/TR/xhtml1/DTD/xhtml-lat1.ent):
// Include symbol entities (http://www.w3.org/TR/xhtml1/DTD/xhtml-symbol.ent):
// Include special entities (http://www.w3.org/TR/xhtml1/DTD/xhtml-special.ent):
$xhtml =
$xhtml_head.
'<body>'.
$xhtml.
'</body>';
switch( $xml_error_code )
case XML_ERROR_TAG_MISMATCH:
$xml_error_string .=
': <code>'.
$this->stack[count($this->stack)-
1].
'</code>';
$this->html_error( T_('Parser error: ').
$xml_error_string );
* Called when the parser finds an opening tag
function tag_open($parser, $tag, $attrs)
// echo "processing tag: $tag <br />\n";
$this->html_error( T_('Tag <code>body</code> can only be used once!') );
// If previous tag is illegal, no point in running tests
$this->html_error( T_('Illegal tag').
": <code>$tag</code>" );
// Is tag allowed in the current context?
if ($previous ==
'body') {
$this->html_error( sprintf( T_('Tag <%s> must occur inside another tag'), '<code>'.
$tag.
'</code>' ) );
$this->html_error( sprintf( T_('Tag <%s> is not allowed within tag <%s>'), '<code>'.
$tag.
'</code>', '<code>'.
$previous.
'</code>') );
// Are tag attributes valid?
foreach( $attrs as $attr =>
$value )
$this->html_error( sprintf( T_('Tag <%s> may not have attribute %s="..."'), '<code>'.
$tag.
'</code>', '<code>'.
$attr.
'</code>' ) );
{ // This attribute must be checked for URIs
if( $error =
validate_url( $value, $this->context, false ) ) //Note: We do not check for spam here, should be done on whole message in check_html_sanity()
$this->html_error( T_('Found invalid URL: ').
$error );
// Set previous, used for checking nesting context rules
function cdata($parser, $cdata)
// Simply check that the 'previous' tag allows CDATA
// If previous tag is illegal, no point in running test
if (trim($cdata) !=
'') {
$this->html_error( sprintf( T_('Tag <%s> may not contain raw character data'), '<code>'.
$previous.
'</code>' ) );
// Move back one up the stack
$Messages->add( $string, $this->msg_type );
* $Log: _xhtml_validator.class.php,v $
* Revision 1.9 2008/01/21 09:35:37 fplanque
* Revision 1.8 2008/01/20 18:20:27 fplanque
* Antispam per group setting
* Revision 1.7 2008/01/20 15:31:12 fplanque
* configurable validation/security rules
* Revision 1.6 2008/01/19 18:24:25 fplanque
* antispam checking refactored
* Revision 1.5 2008/01/19 15:45:28 fplanque
* Revision 1.4 2008/01/19 10:57:11 fplanque
* Splitting XHTML checking by group and interface
* Revision 1.3 2008/01/18 15:53:42 fplanque
* Revision 1.2 2007/09/13 02:37:22 fplanque
* Revision 1.1 2007/06/25 11:02:27 fplanque
* MODULES (refactored MVC)
* Revision 1.13 2007/04/26 00:11:07 fplanque
* Revision 1.12 2006/11/27 02:29:53 blueyed
* Committed test changes by accident. Test added for it as an exercise.
* Revision 1.11 2006/11/26 02:30:39 fplanque
* Revision 1.10 2006/11/06 22:56:53 blueyed
* Added full(?) XHTML entities support to the html checker
* Revision 1.9 2006/11/04 21:44:59 blueyed
* Include latin1 entities to let xml_parse() not choke on those