You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
444 lines
9.5 KiB
444 lines
9.5 KiB
<?php |
|
|
|
/** |
|
* @file |
|
* $Id$. |
|
*/ |
|
|
|
// vim: expandtab sw=4 ts=4 sts=4: |
|
// ***** BEGIN LICENSE BLOCK ***** |
|
// This file is part of HTML Sanitizer. |
|
// Copyright (c) 2005-2011 Frederic Minne <zefredz@gmail.com>. |
|
// All rights reserved. |
|
// |
|
// HTML Sanitizer is free software; you can redistribute it and/or modify |
|
// it under the terms of the GNU Lesser General Public License as published by |
|
// the Free Software Foundation; either version 3 of the License, or |
|
// (at your option) any later version. |
|
// |
|
// HTML Sanitizer is distributed in the hope that it will be useful, |
|
// but WITHOUT ANY WARRANTY; without even the implied warranty of |
|
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the |
|
// GNU General Public License for more details. |
|
// |
|
// You should have received a copy of the GNU Lesser General Public License |
|
// along with HTML Sanitizer; if not, see <http://www.gnu.org/licenses/>. |
|
// |
|
// ***** END LICENSE BLOCK *****. |
|
/** |
|
* Sanitize HTML contents : |
|
* Remove dangerous tags and attributes that can lead to security issues like |
|
* XSS or HTTP response splitting. |
|
* |
|
* @author Frederic Minne <zefredz@gmail.com> |
|
* @copyright Copyright © 2005-2011, Frederic Minne |
|
* @license http://www.gnu.org/licenses/lgpl.txt GNU Lesser General Public License version 3 or later |
|
* @version 1.1 |
|
*/ |
|
class HTML_Sanitizer { |
|
/** |
|
* Private fields. |
|
*/ |
|
private $_allowedTags; |
|
private $_allowJavascriptEvents; |
|
private $_allowJavascriptInUrls; |
|
private $_allowObjects; |
|
private $_allowScript; |
|
private $_allowStyle; |
|
private $_additionalTags; |
|
|
|
/** |
|
* Constructor. |
|
*/ |
|
public function __construct() { |
|
$this->resetAll(); |
|
} |
|
|
|
/** |
|
* (re)set all options to default value. |
|
*/ |
|
public function resetAll() { |
|
$this->_allowDOMEvents = FALSE; |
|
$this->_allowJavascriptInUrls = FALSE; |
|
$this->_allowStyle = FALSE; |
|
$this->_allowScript = FALSE; |
|
$this->_allowObjects = FALSE; |
|
$this->_allowStyle = FALSE; |
|
|
|
$this->_allowedTags = '<a><br><b><h1><h2><h3><h4><h5><h6>' |
|
. '<img><li><ol><p><strong><table><tr><td><th><u><ul><thead>' |
|
. '<tbody><tfoot><em><dd><dt><dl><span><div><del><add><i><hr>' |
|
. '<pre><br><blockquote><address><code><caption><abbr><acronym>' |
|
. '<cite><dfn><q><ins><sup><sub><kbd><samp><var><tt><small><big>'; |
|
|
|
$this->_additionalTags = ''; |
|
} |
|
|
|
/** |
|
* Add additional tags to allowed tags. |
|
* |
|
* @param string |
|
* |
|
* @access public |
|
*/ |
|
public function addAdditionalTags($tags) { |
|
$this->_additionalTags .= $tags; |
|
} |
|
|
|
/** |
|
* Allow iframes. |
|
* |
|
* @access public |
|
*/ |
|
public function allowIframes() { |
|
$this->addAdditionalTags('<iframe>'); |
|
} |
|
|
|
/** |
|
* Allow HTML5 media tags. |
|
* |
|
* @access public |
|
*/ |
|
public function allowHtml5Media() { |
|
$this->addAdditionalTags('<canvas><video><audio>'); |
|
} |
|
|
|
/** |
|
* Allow object, embed, applet and param tags in html. |
|
* |
|
* @access public |
|
*/ |
|
public function allowObjects() { |
|
$this->_allowObjects = TRUE; |
|
} |
|
|
|
/** |
|
* Allow DOM event on DOM elements. |
|
* |
|
* @access public |
|
*/ |
|
public function allowDOMEvents() { |
|
$this->_allowDOMEvents = TRUE; |
|
} |
|
|
|
/** |
|
* Allow script tags. |
|
* |
|
* @access public |
|
*/ |
|
public function allowScript() { |
|
$this->_allowScript = TRUE; |
|
} |
|
|
|
/** |
|
* Allow the use of javascript: in urls. |
|
* |
|
* @access public |
|
*/ |
|
public function allowJavascriptInUrls() { |
|
$this->_allowJavascriptInUrls = TRUE; |
|
} |
|
|
|
/** |
|
* Allow style tags and attributes. |
|
* |
|
* @access public |
|
*/ |
|
public function allowStyle() { |
|
$this->_allowStyle = TRUE; |
|
} |
|
|
|
/** |
|
* Helper to allow all javascript related tags and attributes. |
|
* |
|
* @access public |
|
*/ |
|
public function allowAllJavascript() { |
|
$this->allowDOMEvents(); |
|
$this->allowScript(); |
|
$this->allowJavascriptInUrls(); |
|
} |
|
|
|
/** |
|
* Allow all tags and attributes. |
|
* |
|
* @access public |
|
*/ |
|
public function allowAll() { |
|
$this->allowAllJavascript(); |
|
$this->allowObjects(); |
|
$this->allowStyle(); |
|
$this->allowIframes(); |
|
$this->allowHtml5Media(); |
|
} |
|
|
|
/** |
|
* Filter URLs to avoid HTTP response splitting attacks. |
|
* |
|
* @access public |
|
* @param string url |
|
* |
|
* @return string filtered url |
|
*/ |
|
public function filterHTTPResponseSplitting($url) { |
|
$dangerousCharactersPattern = '~(\r\n|\r|\n|%0a|%0d|%0D|%0A)~'; |
|
return preg_replace($dangerousCharactersPattern, '', $url); |
|
} |
|
|
|
/** |
|
* Remove potential javascript in urls. |
|
* |
|
* @access public |
|
* @param string url |
|
* |
|
* @return string filtered url |
|
*/ |
|
public function removeJavascriptURL($str) { |
|
$HTML_Sanitizer_stripJavascriptURL = 'javascript:[^"]+'; |
|
|
|
$str = preg_replace("/$HTML_Sanitizer_stripJavascriptURL/i", '__forbidden__', $str); |
|
|
|
return $str; |
|
} |
|
|
|
/** |
|
* Remove potential flaws in urls. |
|
* |
|
* @access private |
|
* @param string url |
|
* |
|
* @return string filtered url |
|
*/ |
|
private function sanitizeURL($url) { |
|
if (!$this->_allowJavascriptInUrls) { |
|
$url = $this->removeJavascriptURL($url); |
|
} |
|
|
|
$url = $this->filterHTTPResponseSplitting($url); |
|
|
|
return $url; |
|
} |
|
|
|
/** |
|
* Callback for PCRE. |
|
* |
|
* @access private |
|
* @param matches array |
|
* |
|
* @return string |
|
* |
|
* @see sanitizeURL |
|
*/ |
|
private function _sanitizeURLCallback($matches) { |
|
return 'href="' . $this->sanitizeURL($matches[1]) . '"'; |
|
} |
|
|
|
/** |
|
* Remove potential flaws in href attributes. |
|
* |
|
* @access private |
|
* @param string html tag |
|
* |
|
* @return string filtered html tag |
|
*/ |
|
private function sanitizeHref($str) { |
|
$HTML_Sanitizer_URL = 'href="([^"]+)"'; |
|
|
|
return preg_replace_callback("/$HTML_Sanitizer_URL/i", array(&$this, '_sanitizeURLCallback'), $str); |
|
} |
|
|
|
/** |
|
* Callback for PCRE. |
|
* |
|
* @access private |
|
* @param matches array |
|
* |
|
* @return string |
|
* |
|
* @see sanitizeURL |
|
*/ |
|
private function _sanitizeSrcCallback($matches) { |
|
return 'src="' . $this->sanitizeURL($matches[1]) . '"'; |
|
} |
|
|
|
/** |
|
* Remove potential flaws in href attributes. |
|
* |
|
* @access private |
|
* @param string html tag |
|
* |
|
* @return string filtered html tag |
|
*/ |
|
private function sanitizeSrc($str) { |
|
$HTML_Sanitizer_URL = 'src="([^"]+)"'; |
|
|
|
return preg_replace_callback("/$HTML_Sanitizer_URL/i", array(&$this, '_sanitizeSrcCallback'), $str); |
|
} |
|
|
|
/** |
|
* Remove dangerous attributes from html tags. |
|
* |
|
* @access private |
|
* @param string html tag |
|
* |
|
* @return string filtered html tag |
|
*/ |
|
private function removeEvilAttributes($str) { |
|
if (!$this->_allowDOMEvents) { |
|
$str = preg_replace_callback('/<(.*?)>/i', array(&$this, '_removeDOMEventsCallback'), $str); |
|
} |
|
|
|
if (!$this->_allowStyle) { |
|
$str = preg_replace_callback('/<(.*?)>/i', array(&$this, '_removeStyleCallback'), $str); |
|
} |
|
|
|
return $str; |
|
} |
|
|
|
/** |
|
* Remove DOM events attributes from html tags. |
|
* |
|
* @access private |
|
* @param string html tag |
|
* |
|
* @return string filtered html tag |
|
*/ |
|
private function removeDOMEvents($str) { |
|
$str = preg_replace('/\s*=\s*/', '=', $str); |
|
|
|
$HTML_Sanitizer_stripAttrib = '(onclick|ondblclick|onmousedown|' |
|
. 'onmouseup|onmouseover|onmousemove|onmouseout|onkeypress|onkeydown|' |
|
. 'onkeyup|onfocus|onblur|onabort|onerror|onload)'; |
|
|
|
$str = stripslashes(preg_replace("/$HTML_Sanitizer_stripAttrib/i", 'forbidden', $str)); |
|
|
|
return $str; |
|
} |
|
|
|
/** |
|
* Callback for PCRE. |
|
* |
|
* @access private |
|
* @param matches array |
|
* |
|
* @return string |
|
* |
|
* @see removeDOMEvents |
|
*/ |
|
private function _removeDOMEventsCallback($matches) { |
|
return '<' . $this->removeDOMEvents($matches[1]) . '>'; |
|
} |
|
|
|
/** |
|
* Remove style attributes from html tags. |
|
* |
|
* @access private |
|
* @param string html tag |
|
* |
|
* @return string filtered html tag |
|
*/ |
|
private function removeStyle($str) { |
|
$str = preg_replace('/\s*=\s*/', '=', $str); |
|
|
|
$HTML_Sanitizer_stripAttrib = '(style)'; |
|
|
|
$str = stripslashes(preg_replace("/$HTML_Sanitizer_stripAttrib/i", 'forbidden', $str)); |
|
|
|
return $str; |
|
} |
|
|
|
/** |
|
* Callback for PCRE. |
|
* |
|
* @access private |
|
* @param matches array |
|
* |
|
* @return string |
|
* |
|
* @see removeStyle |
|
*/ |
|
private function _removeStyleCallback($matches) { |
|
return '<' . $this->removeStyle($matches[1]) . '>'; |
|
} |
|
|
|
/** |
|
* Remove dangerous HTML tags. |
|
* |
|
* @access private |
|
* @param string html code |
|
* |
|
* @return string filtered url |
|
*/ |
|
private function removeEvilTags($str) { |
|
$allowedTags = $this->_allowedTags; |
|
|
|
if ($this->_allowScript) { |
|
$allowedTags .= '<script>'; |
|
} |
|
|
|
if ($this->_allowStyle) { |
|
$allowedTags .= '<style>'; |
|
} |
|
|
|
if ($this->_allowObjects) { |
|
$allowedTags .= '<object><embed><applet><param>'; |
|
} |
|
|
|
$allowedTags .= $this->_additionalTags; |
|
|
|
$str = strip_tags($str, $allowedTags); |
|
|
|
return $str; |
|
} |
|
|
|
/** |
|
* Sanitize HTML |
|
* remove dangerous tags and attributes |
|
* clean urls. |
|
* |
|
* @access public |
|
* @param string html code |
|
* |
|
* @return string sanitized html code |
|
*/ |
|
public function sanitize($html) { |
|
$html = $this->removeEvilTags($html); |
|
|
|
$html = $this->removeEvilAttributes($html); |
|
|
|
$html = $this->sanitizeHref($html); |
|
|
|
$html = $this->sanitizeSrc($html); |
|
|
|
return $html; |
|
} |
|
|
|
} |
|
|
|
/** |
|
* |
|
*/ |
|
function html_sanitize($str) { |
|
static $san = NULL; |
|
|
|
if (empty($san)) { |
|
$san = new HTML_Sanitizer(); |
|
} |
|
|
|
return $san->sanitize($str); |
|
} |
|
|
|
/** |
|
* |
|
*/ |
|
function html_loose_sanitize($str) { |
|
static $san = NULL; |
|
|
|
if (empty($san)) { |
|
$san = new HTML_Sanitizer(); |
|
$san->allowAll(); |
|
} |
|
|
|
return $san->sanitize($str); |
|
|
|
}
|
|
|