|
|
|
<?php
|
|
|
|
|
|
|
|
/**
|
|
|
|
* @file
|
|
|
|
* $Id$.
|
|
|
|
*/
|
|
|
|
|
|
|
|
// vim: expandtab sw=4 ts=4 sts=4:
|
|
|
|
// ***** BEGIN LICENSE BLOCK *****
|
|
|
|
// This file is part of HTML Sanitizer.
|
|
|
|
// Copyright (c) 2005-2011 Frederic Minne <zefredz@gmail.com>.
|
|
|
|
// All rights reserved.
|
|
|
|
//
|
|
|
|
// HTML Sanitizer is free software; you can redistribute it and/or modify
|
|
|
|
// it under the terms of the GNU Lesser General Public License as published by
|
|
|
|
// the Free Software Foundation; either version 3 of the License, or
|
|
|
|
// (at your option) any later version.
|
|
|
|
//
|
|
|
|
// HTML Sanitizer is distributed in the hope that it will be useful,
|
|
|
|
// but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
|
|
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
|
|
|
// GNU General Public License for more details.
|
|
|
|
//
|
|
|
|
// You should have received a copy of the GNU Lesser General Public License
|
|
|
|
// along with HTML Sanitizer; if not, see <http://www.gnu.org/licenses/>.
|
|
|
|
//
|
|
|
|
// ***** END LICENSE BLOCK *****.
|
|
|
|
/**
|
|
|
|
* Sanitize HTML contents :
|
|
|
|
* Remove dangerous tags and attributes that can lead to security issues like
|
|
|
|
* XSS or HTTP response splitting.
|
|
|
|
*
|
|
|
|
* @author Frederic Minne <zefredz@gmail.com>
|
|
|
|
* @copyright Copyright © 2005-2011, Frederic Minne
|
|
|
|
* @license http://www.gnu.org/licenses/lgpl.txt GNU Lesser General Public License version 3 or later
|
|
|
|
* @version 1.1
|
|
|
|
*/
|
|
|
|
class HTML_Sanitizer {
|
|
|
|
/**
|
|
|
|
* Private fields.
|
|
|
|
*/
|
|
|
|
private $_allowedTags;
|
|
|
|
private $_allowJavascriptEvents;
|
|
|
|
private $_allowJavascriptInUrls;
|
|
|
|
private $_allowObjects;
|
|
|
|
private $_allowScript;
|
|
|
|
private $_allowStyle;
|
|
|
|
private $_additionalTags;
|
|
|
|
|
|
|
|
/**
|
|
|
|
* Constructor.
|
|
|
|
*/
|
|
|
|
public function __construct() {
|
|
|
|
$this->resetAll();
|
|
|
|
}
|
|
|
|
|
|
|
|
/**
|
|
|
|
* (re)set all options to default value.
|
|
|
|
*/
|
|
|
|
public function resetAll() {
|
|
|
|
$this->_allowDOMEvents = FALSE;
|
|
|
|
$this->_allowJavascriptInUrls = FALSE;
|
|
|
|
$this->_allowStyle = FALSE;
|
|
|
|
$this->_allowScript = FALSE;
|
|
|
|
$this->_allowObjects = FALSE;
|
|
|
|
$this->_allowStyle = FALSE;
|
|
|
|
|
|
|
|
$this->_allowedTags = '<a><br><b><h1><h2><h3><h4><h5><h6>'
|
|
|
|
. '<img><li><ol><p><strong><table><tr><td><th><u><ul><thead>'
|
|
|
|
. '<tbody><tfoot><em><dd><dt><dl><span><div><del><add><i><hr>'
|
|
|
|
. '<pre><br><blockquote><address><code><caption><abbr><acronym>'
|
|
|
|
. '<cite><dfn><q><ins><sup><sub><kbd><samp><var><tt><small><big>';
|
|
|
|
|
|
|
|
$this->_additionalTags = '';
|
|
|
|
}
|
|
|
|
|
|
|
|
/**
|
|
|
|
* Add additional tags to allowed tags.
|
|
|
|
*
|
|
|
|
* @param string
|
|
|
|
*
|
|
|
|
* @access public
|
|
|
|
*/
|
|
|
|
public function addAdditionalTags($tags) {
|
|
|
|
$this->_additionalTags .= $tags;
|
|
|
|
}
|
|
|
|
|
|
|
|
/**
|
|
|
|
* Allow iframes.
|
|
|
|
*
|
|
|
|
* @access public
|
|
|
|
*/
|
|
|
|
public function allowIframes() {
|
|
|
|
$this->addAdditionalTags('<iframe>');
|
|
|
|
}
|
|
|
|
|
|
|
|
/**
|
|
|
|
* Allow HTML5 media tags.
|
|
|
|
*
|
|
|
|
* @access public
|
|
|
|
*/
|
|
|
|
public function allowHtml5Media() {
|
|
|
|
$this->addAdditionalTags('<canvas><video><audio>');
|
|
|
|
}
|
|
|
|
|
|
|
|
/**
|
|
|
|
* Allow object, embed, applet and param tags in html.
|
|
|
|
*
|
|
|
|
* @access public
|
|
|
|
*/
|
|
|
|
public function allowObjects() {
|
|
|
|
$this->_allowObjects = TRUE;
|
|
|
|
}
|
|
|
|
|
|
|
|
/**
|
|
|
|
* Allow DOM event on DOM elements.
|
|
|
|
*
|
|
|
|
* @access public
|
|
|
|
*/
|
|
|
|
public function allowDOMEvents() {
|
|
|
|
$this->_allowDOMEvents = TRUE;
|
|
|
|
}
|
|
|
|
|
|
|
|
/**
|
|
|
|
* Allow script tags.
|
|
|
|
*
|
|
|
|
* @access public
|
|
|
|
*/
|
|
|
|
public function allowScript() {
|
|
|
|
$this->_allowScript = TRUE;
|
|
|
|
}
|
|
|
|
|
|
|
|
/**
|
|
|
|
* Allow the use of javascript: in urls.
|
|
|
|
*
|
|
|
|
* @access public
|
|
|
|
*/
|
|
|
|
public function allowJavascriptInUrls() {
|
|
|
|
$this->_allowJavascriptInUrls = TRUE;
|
|
|
|
}
|
|
|
|
|
|
|
|
/**
|
|
|
|
* Allow style tags and attributes.
|
|
|
|
*
|
|
|
|
* @access public
|
|
|
|
*/
|
|
|
|
public function allowStyle() {
|
|
|
|
$this->_allowStyle = TRUE;
|
|
|
|
}
|
|
|
|
|
|
|
|
/**
|
|
|
|
* Helper to allow all javascript related tags and attributes.
|
|
|
|
*
|
|
|
|
* @access public
|
|
|
|
*/
|
|
|
|
public function allowAllJavascript() {
|
|
|
|
$this->allowDOMEvents();
|
|
|
|
$this->allowScript();
|
|
|
|
$this->allowJavascriptInUrls();
|
|
|
|
}
|
|
|
|
|
|
|
|
/**
|
|
|
|
* Allow all tags and attributes.
|
|
|
|
*
|
|
|
|
* @access public
|
|
|
|
*/
|
|
|
|
public function allowAll() {
|
|
|
|
$this->allowAllJavascript();
|
|
|
|
$this->allowObjects();
|
|
|
|
$this->allowStyle();
|
|
|
|
$this->allowIframes();
|
|
|
|
$this->allowHtml5Media();
|
|
|
|
}
|
|
|
|
|
|
|
|
/**
|
|
|
|
* Filter URLs to avoid HTTP response splitting attacks.
|
|
|
|
*
|
|
|
|
* @access public
|
|
|
|
* @param string url
|
|
|
|
*
|
|
|
|
* @return string filtered url
|
|
|
|
*/
|
|
|
|
public function filterHTTPResponseSplitting($url) {
|
|
|
|
$dangerousCharactersPattern = '~(\r\n|\r|\n|%0a|%0d|%0D|%0A)~';
|
|
|
|
return preg_replace($dangerousCharactersPattern, '', $url);
|
|
|
|
}
|
|
|
|
|
|
|
|
/**
|
|
|
|
* Remove potential javascript in urls.
|
|
|
|
*
|
|
|
|
* @access public
|
|
|
|
* @param string url
|
|
|
|
*
|
|
|
|
* @return string filtered url
|
|
|
|
*/
|
|
|
|
public function removeJavascriptURL($str) {
|
|
|
|
$HTML_Sanitizer_stripJavascriptURL = 'javascript:[^"]+';
|
|
|
|
|
|
|
|
$str = preg_replace("/$HTML_Sanitizer_stripJavascriptURL/i", '__forbidden__', $str);
|
|
|
|
|
|
|
|
return $str;
|
|
|
|
}
|
|
|
|
|
|
|
|
/**
|
|
|
|
* Remove potential flaws in urls.
|
|
|
|
*
|
|
|
|
* @access private
|
|
|
|
* @param string url
|
|
|
|
*
|
|
|
|
* @return string filtered url
|
|
|
|
*/
|
|
|
|
private function sanitizeURL($url) {
|
|
|
|
if (!$this->_allowJavascriptInUrls) {
|
|
|
|
$url = $this->removeJavascriptURL($url);
|
|
|
|
}
|
|
|
|
|
|
|
|
$url = $this->filterHTTPResponseSplitting($url);
|
|
|
|
|
|
|
|
return $url;
|
|
|
|
}
|
|
|
|
|
|
|
|
/**
|
|
|
|
* Callback for PCRE.
|
|
|
|
*
|
|
|
|
* @access private
|
|
|
|
* @param matches array
|
|
|
|
*
|
|
|
|
* @return string
|
|
|
|
*
|
|
|
|
* @see sanitizeURL
|
|
|
|
*/
|
|
|
|
private function _sanitizeURLCallback($matches) {
|
|
|
|
return 'href="' . $this->sanitizeURL($matches[1]) . '"';
|
|
|
|
}
|
|
|
|
|
|
|
|
/**
|
|
|
|
* Remove potential flaws in href attributes.
|
|
|
|
*
|
|
|
|
* @access private
|
|
|
|
* @param string html tag
|
|
|
|
*
|
|
|
|
* @return string filtered html tag
|
|
|
|
*/
|
|
|
|
private function sanitizeHref($str) {
|
|
|
|
$HTML_Sanitizer_URL = 'href="([^"]+)"';
|
|
|
|
|
|
|
|
return preg_replace_callback("/$HTML_Sanitizer_URL/i", array(&$this, '_sanitizeURLCallback'), $str);
|
|
|
|
}
|
|
|
|
|
|
|
|
/**
|
|
|
|
* Callback for PCRE.
|
|
|
|
*
|
|
|
|
* @access private
|
|
|
|
* @param matches array
|
|
|
|
*
|
|
|
|
* @return string
|
|
|
|
*
|
|
|
|
* @see sanitizeURL
|
|
|
|
*/
|
|
|
|
private function _sanitizeSrcCallback($matches) {
|
|
|
|
return 'src="' . $this->sanitizeURL($matches[1]) . '"';
|
|
|
|
}
|
|
|
|
|
|
|
|
/**
|
|
|
|
* Remove potential flaws in href attributes.
|
|
|
|
*
|
|
|
|
* @access private
|
|
|
|
* @param string html tag
|
|
|
|
*
|
|
|
|
* @return string filtered html tag
|
|
|
|
*/
|
|
|
|
private function sanitizeSrc($str) {
|
|
|
|
$HTML_Sanitizer_URL = 'src="([^"]+)"';
|
|
|
|
|
|
|
|
return preg_replace_callback("/$HTML_Sanitizer_URL/i", array(&$this, '_sanitizeSrcCallback'), $str);
|
|
|
|
}
|
|
|
|
|
|
|
|
/**
|
|
|
|
* Remove dangerous attributes from html tags.
|
|
|
|
*
|
|
|
|
* @access private
|
|
|
|
* @param string html tag
|
|
|
|
*
|
|
|
|
* @return string filtered html tag
|
|
|
|
*/
|
|
|
|
private function removeEvilAttributes($str) {
|
|
|
|
if (!$this->_allowDOMEvents) {
|
|
|
|
$str = preg_replace_callback('/<(.*?)>/i', array(&$this, '_removeDOMEventsCallback'), $str);
|
|
|
|
}
|
|
|
|
|
|
|
|
if (!$this->_allowStyle) {
|
|
|
|
$str = preg_replace_callback('/<(.*?)>/i', array(&$this, '_removeStyleCallback'), $str);
|
|
|
|
}
|
|
|
|
|
|
|
|
return $str;
|
|
|
|
}
|
|
|
|
|
|
|
|
/**
|
|
|
|
* Remove DOM events attributes from html tags.
|
|
|
|
*
|
|
|
|
* @access private
|
|
|
|
* @param string html tag
|
|
|
|
*
|
|
|
|
* @return string filtered html tag
|
|
|
|
*/
|
|
|
|
private function removeDOMEvents($str) {
|
|
|
|
$str = preg_replace('/\s*=\s*/', '=', $str);
|
|
|
|
|
|
|
|
$HTML_Sanitizer_stripAttrib = '(onclick|ondblclick|onmousedown|'
|
|
|
|
. 'onmouseup|onmouseover|onmousemove|onmouseout|onkeypress|onkeydown|'
|
|
|
|
. 'onkeyup|onfocus|onblur|onabort|onerror|onload)';
|
|
|
|
|
|
|
|
$str = stripslashes(preg_replace("/$HTML_Sanitizer_stripAttrib/i", 'forbidden', $str));
|
|
|
|
|
|
|
|
return $str;
|
|
|
|
}
|
|
|
|
|
|
|
|
/**
|
|
|
|
* Callback for PCRE.
|
|
|
|
*
|
|
|
|
* @access private
|
|
|
|
* @param matches array
|
|
|
|
*
|
|
|
|
* @return string
|
|
|
|
*
|
|
|
|
* @see removeDOMEvents
|
|
|
|
*/
|
|
|
|
private function _removeDOMEventsCallback($matches) {
|
|
|
|
return '<' . $this->removeDOMEvents($matches[1]) . '>';
|
|
|
|
}
|
|
|
|
|
|
|
|
/**
|
|
|
|
* Remove style attributes from html tags.
|
|
|
|
*
|
|
|
|
* @access private
|
|
|
|
* @param string html tag
|
|
|
|
*
|
|
|
|
* @return string filtered html tag
|
|
|
|
*/
|
|
|
|
private function removeStyle($str) {
|
|
|
|
$str = preg_replace('/\s*=\s*/', '=', $str);
|
|
|
|
|
|
|
|
$HTML_Sanitizer_stripAttrib = '(style)';
|
|
|
|
|
|
|
|
$str = stripslashes(preg_replace("/$HTML_Sanitizer_stripAttrib/i", 'forbidden', $str));
|
|
|
|
|
|
|
|
return $str;
|
|
|
|
}
|
|
|
|
|
|
|
|
/**
|
|
|
|
* Callback for PCRE.
|
|
|
|
*
|
|
|
|
* @access private
|
|
|
|
* @param matches array
|
|
|
|
*
|
|
|
|
* @return string
|
|
|
|
*
|
|
|
|
* @see removeStyle
|
|
|
|
*/
|
|
|
|
private function _removeStyleCallback($matches) {
|
|
|
|
return '<' . $this->removeStyle($matches[1]) . '>';
|
|
|
|
}
|
|
|
|
|
|
|
|
/**
|
|
|
|
* Remove dangerous HTML tags.
|
|
|
|
*
|
|
|
|
* @access private
|
|
|
|
* @param string html code
|
|
|
|
*
|
|
|
|
* @return string filtered url
|
|
|
|
*/
|
|
|
|
private function removeEvilTags($str) {
|
|
|
|
$allowedTags = $this->_allowedTags;
|
|
|
|
|
|
|
|
if ($this->_allowScript) {
|
|
|
|
$allowedTags .= '<script>';
|
|
|
|
}
|
|
|
|
|
|
|
|
if ($this->_allowStyle) {
|
|
|
|
$allowedTags .= '<style>';
|
|
|
|
}
|
|
|
|
|
|
|
|
if ($this->_allowObjects) {
|
|
|
|
$allowedTags .= '<object><embed><applet><param>';
|
|
|
|
}
|
|
|
|
|
|
|
|
$allowedTags .= $this->_additionalTags;
|
|
|
|
|
|
|
|
$str = strip_tags($str, $allowedTags);
|
|
|
|
|
|
|
|
return $str;
|
|
|
|
}
|
|
|
|
|
|
|
|
/**
|
|
|
|
* Sanitize HTML
|
|
|
|
* remove dangerous tags and attributes
|
|
|
|
* clean urls.
|
|
|
|
*
|
|
|
|
* @access public
|
|
|
|
* @param string html code
|
|
|
|
*
|
|
|
|
* @return string sanitized html code
|
|
|
|
*/
|
|
|
|
public function sanitize($html) {
|
|
|
|
$html = $this->removeEvilTags($html);
|
|
|
|
|
|
|
|
$html = $this->removeEvilAttributes($html);
|
|
|
|
|
|
|
|
$html = $this->sanitizeHref($html);
|
|
|
|
|
|
|
|
$html = $this->sanitizeSrc($html);
|
|
|
|
|
|
|
|
return $html;
|
|
|
|
}
|
|
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
/**
|
|
|
|
*
|
|
|
|
*/
|
|
|
|
function html_sanitize($str) {
|
|
|
|
static $san = NULL;
|
|
|
|
|
|
|
|
if (empty($san)) {
|
|
|
|
$san = new HTML_Sanitizer();
|
|
|
|
}
|
|
|
|
|
|
|
|
return $san->sanitize($str);
|
|
|
|
}
|
|
|
|
|
|
|
|
/**
|
|
|
|
*
|
|
|
|
*/
|
|
|
|
function html_loose_sanitize($str) {
|
|
|
|
static $san = NULL;
|
|
|
|
|
|
|
|
if (empty($san)) {
|
|
|
|
$san = new HTML_Sanitizer();
|
|
|
|
$san->allowAll();
|
|
|
|
}
|
|
|
|
|
|
|
|
return $san->sanitize($str);
|
|
|
|
|
|
|
|
}
|