You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
444 lines
11 KiB
444 lines
11 KiB
<?php // $Id$ |
|
|
|
// vim: expandtab sw=4 ts=4 sts=4: |
|
|
|
# ***** BEGIN LICENSE BLOCK ***** |
|
# This file is part of HTML Sanitizer. |
|
# Copyright (c) 2005-2011 Frederic Minne <zefredz@gmail.com>. |
|
# All rights reserved. |
|
# |
|
# HTML Sanitizer is free software; you can redistribute it and/or modify |
|
# it under the terms of the GNU Lesser General Public License as published by |
|
# the Free Software Foundation; either version 3 of the License, or |
|
# (at your option) any later version. |
|
# |
|
# HTML Sanitizer is distributed in the hope that it will be useful, |
|
# but WITHOUT ANY WARRANTY; without even the implied warranty of |
|
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the |
|
# GNU General Public License for more details. |
|
# |
|
# You should have received a copy of the GNU Lesser General Public License |
|
# along with HTML Sanitizer; if not, see <http://www.gnu.org/licenses/>. |
|
# |
|
# ***** END LICENSE BLOCK ***** |
|
|
|
/** |
|
* Sanitize HTML contents : |
|
* Remove dangerous tags and attributes that can lead to security issues like |
|
* XSS or HTTP response splitting |
|
* |
|
* @author Frederic Minne <zefredz@gmail.com> |
|
* @copyright Copyright © 2005-2011, Frederic Minne |
|
* @license http://www.gnu.org/licenses/lgpl.txt GNU Lesser General Public License version 3 or later |
|
* @version 1.1 |
|
*/ |
|
class HTML_Sanitizer |
|
{ |
|
// Private fields |
|
private $_allowedTags; |
|
private $_allowJavascriptEvents; |
|
private $_allowJavascriptInUrls; |
|
private $_allowObjects; |
|
private $_allowScript; |
|
private $_allowStyle; |
|
private $_additionalTags; |
|
|
|
/** |
|
* Constructor |
|
*/ |
|
public function __construct() |
|
{ |
|
$this->resetAll(); |
|
} |
|
|
|
/** |
|
* (re)set all options to default value |
|
*/ |
|
public function resetAll() |
|
{ |
|
$this->_allowDOMEvents = false; |
|
$this->_allowJavascriptInUrls = false; |
|
$this->_allowStyle = false; |
|
$this->_allowScript = false; |
|
$this->_allowObjects = false; |
|
$this->_allowStyle = false; |
|
|
|
$this->_allowedTags = '<a><br><b><h1><h2><h3><h4><h5><h6>' |
|
. '<img><li><ol><p><strong><table><tr><td><th><u><ul><thead>' |
|
. '<tbody><tfoot><em><dd><dt><dl><span><div><del><add><i><hr>' |
|
. '<pre><br><blockquote><address><code><caption><abbr><acronym>' |
|
. '<cite><dfn><q><ins><sup><sub><kbd><samp><var><tt><small><big>' |
|
; |
|
|
|
$this->_additionalTags = ''; |
|
} |
|
|
|
/** |
|
* Add additional tags to allowed tags |
|
* @param string |
|
* @access public |
|
*/ |
|
public function addAdditionalTags( $tags ) |
|
{ |
|
$this->_additionalTags .= $tags; |
|
} |
|
|
|
/** |
|
* Allow iframes |
|
* @access public |
|
*/ |
|
public function allowIframes() |
|
{ |
|
$this->addAdditionalTags( '<iframe>' ); |
|
} |
|
|
|
/** |
|
* Allow HTML5 media tags |
|
* @access public |
|
*/ |
|
public function allowHtml5Media() |
|
{ |
|
$this->addAdditionalTags( '<canvas><video><audio>' ); |
|
} |
|
|
|
/** |
|
* Allow object, embed, applet and param tags in html |
|
* @access public |
|
*/ |
|
public function allowObjects() |
|
{ |
|
$this->_allowObjects = true; |
|
} |
|
|
|
/** |
|
* Allow DOM event on DOM elements |
|
* @access public |
|
*/ |
|
public function allowDOMEvents() |
|
{ |
|
$this->_allowDOMEvents = true; |
|
} |
|
|
|
/** |
|
* Allow script tags |
|
* @access public |
|
*/ |
|
public function allowScript() |
|
{ |
|
$this->_allowScript = true; |
|
} |
|
|
|
/** |
|
* Allow the use of javascript: in urls |
|
* @access public |
|
*/ |
|
public function allowJavascriptInUrls() |
|
{ |
|
$this->_allowJavascriptInUrls = true; |
|
} |
|
|
|
/** |
|
* Allow style tags and attributes |
|
* @access public |
|
*/ |
|
public function allowStyle() |
|
{ |
|
$this->_allowStyle = true; |
|
} |
|
|
|
/** |
|
* Helper to allow all javascript related tags and attributes |
|
* @access public |
|
*/ |
|
public function allowAllJavascript() |
|
{ |
|
$this->allowDOMEvents(); |
|
$this->allowScript(); |
|
$this->allowJavascriptInUrls(); |
|
} |
|
|
|
/** |
|
* Allow all tags and attributes |
|
* @access public |
|
*/ |
|
public function allowAll() |
|
{ |
|
$this->allowAllJavascript(); |
|
$this->allowObjects(); |
|
$this->allowStyle(); |
|
$this->allowIframes(); |
|
$this->allowHtml5Media(); |
|
} |
|
|
|
/** |
|
* Filter URLs to avoid HTTP response splitting attacks |
|
* @access public |
|
* @param string url |
|
* @return string filtered url |
|
*/ |
|
public function filterHTTPResponseSplitting( $url ) |
|
{ |
|
$dangerousCharactersPattern = '~(\r\n|\r|\n|%0a|%0d|%0D|%0A)~'; |
|
return preg_replace( $dangerousCharactersPattern, '', $url ); |
|
} |
|
|
|
/** |
|
* Remove potential javascript in urls |
|
* @access public |
|
* @param string url |
|
* @return string filtered url |
|
*/ |
|
public function removeJavascriptURL( $str ) |
|
{ |
|
$HTML_Sanitizer_stripJavascriptURL = 'javascript:[^"]+'; |
|
|
|
$str = preg_replace("/$HTML_Sanitizer_stripJavascriptURL/i" |
|
, '__forbidden__' |
|
, $str ); |
|
|
|
return $str; |
|
} |
|
|
|
/** |
|
* Remove potential flaws in urls |
|
* @access private |
|
* @param string url |
|
* @return string filtered url |
|
*/ |
|
private function sanitizeURL( $url ) |
|
{ |
|
if ( ! $this->_allowJavascriptInUrls ) |
|
{ |
|
$url = $this->removeJavascriptURL( $url ); |
|
} |
|
|
|
$url = $this->filterHTTPResponseSplitting( $url ); |
|
|
|
return $url; |
|
} |
|
|
|
/** |
|
* Callback for PCRE |
|
* @access private |
|
* @param matches array |
|
* @return string |
|
* @see sanitizeURL |
|
*/ |
|
private function _sanitizeURLCallback( $matches ) |
|
{ |
|
return 'href="'.$this->sanitizeURL( $matches[1] ).'"'; |
|
} |
|
|
|
/** |
|
* Remove potential flaws in href attributes |
|
* @access private |
|
* @param string html tag |
|
* @return string filtered html tag |
|
*/ |
|
private function sanitizeHref( $str ) |
|
{ |
|
$HTML_Sanitizer_URL = 'href="([^"]+)"'; |
|
|
|
return preg_replace_callback("/$HTML_Sanitizer_URL/i" |
|
, array( &$this, '_sanitizeURLCallback' ) |
|
, $str ); |
|
} |
|
|
|
/** |
|
* Callback for PCRE |
|
* @access private |
|
* @param matches array |
|
* @return string |
|
* @see sanitizeURL |
|
*/ |
|
private function _sanitizeSrcCallback( $matches ) |
|
{ |
|
return 'src="'.$this->sanitizeURL( $matches[1] ).'"'; |
|
} |
|
|
|
/** |
|
* Remove potential flaws in href attributes |
|
* @access private |
|
* @param string html tag |
|
* @return string filtered html tag |
|
*/ |
|
private function sanitizeSrc( $str ) |
|
{ |
|
$HTML_Sanitizer_URL = 'src="([^"]+)"'; |
|
|
|
return preg_replace_callback("/$HTML_Sanitizer_URL/i" |
|
, array( &$this, '_sanitizeSrcCallback' ) |
|
, $str ); |
|
} |
|
|
|
/** |
|
* Remove dangerous attributes from html tags |
|
* @access private |
|
* @param string html tag |
|
* @return string filtered html tag |
|
*/ |
|
private function removeEvilAttributes( $str ) |
|
{ |
|
if ( ! $this->_allowDOMEvents ) |
|
{ |
|
$str = preg_replace_callback('/<(.*?)>/i' |
|
, array( &$this, '_removeDOMEventsCallback' ) |
|
, $str ); |
|
} |
|
|
|
if ( ! $this->_allowStyle ) |
|
{ |
|
$str = preg_replace_callback('/<(.*?)>/i' |
|
, array( &$this, '_removeStyleCallback' ) |
|
, $str ); |
|
} |
|
|
|
return $str; |
|
} |
|
|
|
/** |
|
* Remove DOM events attributes from html tags |
|
* @access private |
|
* @param string html tag |
|
* @return string filtered html tag |
|
*/ |
|
private function removeDOMEvents( $str ) |
|
{ |
|
$str = preg_replace ( '/\s*=\s*/', '=', $str ); |
|
|
|
$HTML_Sanitizer_stripAttrib = '(onclick|ondblclick|onmousedown|' |
|
. 'onmouseup|onmouseover|onmousemove|onmouseout|onkeypress|onkeydown|' |
|
. 'onkeyup|onfocus|onblur|onabort|onerror|onload)' |
|
; |
|
|
|
$str = stripslashes( preg_replace("/$HTML_Sanitizer_stripAttrib/i" |
|
, 'forbidden' |
|
, $str ) ); |
|
|
|
return $str; |
|
} |
|
|
|
/** |
|
* Callback for PCRE |
|
* @access private |
|
* @param matches array |
|
* @return string |
|
* @see removeDOMEvents |
|
*/ |
|
private function _removeDOMEventsCallback( $matches ) |
|
{ |
|
return '<' . $this->removeDOMEvents( $matches[1] ) . '>'; |
|
} |
|
|
|
/** |
|
* Remove style attributes from html tags |
|
* @access private |
|
* @param string html tag |
|
* @return string filtered html tag |
|
*/ |
|
private function removeStyle( $str ) |
|
{ |
|
$str = preg_replace ( '/\s*=\s*/', '=', $str ); |
|
|
|
$HTML_Sanitizer_stripAttrib = '(style)' |
|
; |
|
|
|
$str = stripslashes( preg_replace("/$HTML_Sanitizer_stripAttrib/i" |
|
, 'forbidden' |
|
, $str ) ); |
|
|
|
return $str; |
|
} |
|
|
|
/** |
|
* Callback for PCRE |
|
* @access private |
|
* @param matches array |
|
* @return string |
|
* @see removeStyle |
|
*/ |
|
private function _removeStyleCallback( $matches ) |
|
{ |
|
return '<' . $this->removeStyle( $matches[1] ) . '>'; |
|
} |
|
|
|
/** |
|
* Remove dangerous HTML tags |
|
* @access private |
|
* @param string html code |
|
* @return string filtered url |
|
*/ |
|
private function removeEvilTags( $str ) |
|
{ |
|
$allowedTags = $this->_allowedTags; |
|
|
|
if ( $this->_allowScript ) |
|
{ |
|
$allowedTags .= '<script>'; |
|
} |
|
|
|
if ( $this->_allowStyle ) |
|
{ |
|
$allowedTags .= '<style>'; |
|
} |
|
|
|
if ( $this->_allowObjects ) |
|
{ |
|
$allowedTags .= '<object><embed><applet><param>'; |
|
} |
|
|
|
$allowedTags .= $this->_additionalTags; |
|
|
|
$str = strip_tags($str, $allowedTags ); |
|
|
|
return $str; |
|
} |
|
|
|
/** |
|
* Sanitize HTML |
|
* remove dangerous tags and attributes |
|
* clean urls |
|
* @access public |
|
* @param string html code |
|
* @return string sanitized html code |
|
*/ |
|
public function sanitize( $html ) |
|
{ |
|
$html = $this->removeEvilTags( $html ); |
|
|
|
$html = $this->removeEvilAttributes( $html ); |
|
|
|
$html = $this->sanitizeHref( $html ); |
|
|
|
$html = $this->sanitizeSrc( $html ); |
|
|
|
return $html; |
|
} |
|
} |
|
|
|
function html_sanitize( $str ) |
|
{ |
|
static $san = null; |
|
|
|
if ( empty( $san ) ) |
|
{ |
|
$san = new HTML_Sanitizer; |
|
} |
|
|
|
return $san->sanitize( $str ); |
|
} |
|
|
|
function html_loose_sanitize( $str ) |
|
{ |
|
static $san = null; |
|
|
|
if ( empty( $san ) ) |
|
{ |
|
$san = new HTML_Sanitizer; |
|
$san->allowAll(); |
|
} |
|
|
|
return $san->sanitize( $str ); |
|
|
|
} |
|
|
|
|