<?php /** * @file * $Id$. */ // vim: expandtab sw=4 ts=4 sts=4: // ***** BEGIN LICENSE BLOCK ***** // This file is part of HTML Sanitizer. // Copyright (c) 2005-2011 Frederic Minne <zefredz@gmail.com>. // All rights reserved. // // HTML Sanitizer is free software; you can redistribute it and/or modify // it under the terms of the GNU Lesser General Public License as published by // the Free Software Foundation; either version 3 of the License, or // (at your option) any later version. // // HTML Sanitizer is distributed in the hope that it will be useful, // but WITHOUT ANY WARRANTY; without even the implied warranty of // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the // GNU General Public License for more details. // // You should have received a copy of the GNU Lesser General Public License // along with HTML Sanitizer; if not, see <http://www.gnu.org/licenses/>. // // ***** END LICENSE BLOCK *****. /** * Sanitize HTML contents : * Remove dangerous tags and attributes that can lead to security issues like * XSS or HTTP response splitting. * * @author Frederic Minne <zefredz@gmail.com> * @copyright Copyright © 2005-2011, Frederic Minne * @license http://www.gnu.org/licenses/lgpl.txt GNU Lesser General Public License version 3 or later * @version 1.1 */ class HTML_Sanitizer { /** * Private fields. */ private $_allowedTags; private $_allowJavascriptEvents; private $_allowJavascriptInUrls; private $_allowObjects; private $_allowScript; private $_allowStyle; private $_additionalTags; /** * Constructor. */ public function __construct() { $this->resetAll(); } /** * (re)set all options to default value. */ public function resetAll() { $this->_allowDOMEvents = FALSE; $this->_allowJavascriptInUrls = FALSE; $this->_allowStyle = FALSE; $this->_allowScript = FALSE; $this->_allowObjects = FALSE; $this->_allowStyle = FALSE; $this->_allowedTags = '<a><br><b><h1><h2><h3><h4><h5><h6>' . '<img><li><ol><p><strong><table><tr><td><th><u><ul><thead>' . '<tbody><tfoot><em><dd><dt><dl><span><div><del><add><i><hr>' . '<pre><br><blockquote><address><code><caption><abbr><acronym>' . '<cite><dfn><q><ins><sup><sub><kbd><samp><var><tt><small><big>'; $this->_additionalTags = ''; } /** * Add additional tags to allowed tags. * * @param string * * @access public */ public function addAdditionalTags($tags) { $this->_additionalTags .= $tags; } /** * Allow iframes. * * @access public */ public function allowIframes() { $this->addAdditionalTags('<iframe>'); } /** * Allow HTML5 media tags. * * @access public */ public function allowHtml5Media() { $this->addAdditionalTags('<canvas><video><audio>'); } /** * Allow object, embed, applet and param tags in html. * * @access public */ public function allowObjects() { $this->_allowObjects = TRUE; } /** * Allow DOM event on DOM elements. * * @access public */ public function allowDOMEvents() { $this->_allowDOMEvents = TRUE; } /** * Allow script tags. * * @access public */ public function allowScript() { $this->_allowScript = TRUE; } /** * Allow the use of javascript: in urls. * * @access public */ public function allowJavascriptInUrls() { $this->_allowJavascriptInUrls = TRUE; } /** * Allow style tags and attributes. * * @access public */ public function allowStyle() { $this->_allowStyle = TRUE; } /** * Helper to allow all javascript related tags and attributes. * * @access public */ public function allowAllJavascript() { $this->allowDOMEvents(); $this->allowScript(); $this->allowJavascriptInUrls(); } /** * Allow all tags and attributes. * * @access public */ public function allowAll() { $this->allowAllJavascript(); $this->allowObjects(); $this->allowStyle(); $this->allowIframes(); $this->allowHtml5Media(); } /** * Filter URLs to avoid HTTP response splitting attacks. * * @access public * @param string url * * @return string filtered url */ public function filterHTTPResponseSplitting($url) { $dangerousCharactersPattern = '~(\r\n|\r|\n|%0a|%0d|%0D|%0A)~'; return preg_replace($dangerousCharactersPattern, '', $url); } /** * Remove potential javascript in urls. * * @access public * @param string url * * @return string filtered url */ public function removeJavascriptURL($str) { $HTML_Sanitizer_stripJavascriptURL = 'javascript:[^"]+'; $str = preg_replace("/$HTML_Sanitizer_stripJavascriptURL/i", '__forbidden__', $str); return $str; } /** * Remove potential flaws in urls. * * @access private * @param string url * * @return string filtered url */ private function sanitizeURL($url) { if (!$this->_allowJavascriptInUrls) { $url = $this->removeJavascriptURL($url); } $url = $this->filterHTTPResponseSplitting($url); return $url; } /** * Callback for PCRE. * * @access private * @param matches array * * @return string * * @see sanitizeURL */ private function _sanitizeURLCallback($matches) { return 'href="' . $this->sanitizeURL($matches[1]) . '"'; } /** * Remove potential flaws in href attributes. * * @access private * @param string html tag * * @return string filtered html tag */ private function sanitizeHref($str) { $HTML_Sanitizer_URL = 'href="([^"]+)"'; return preg_replace_callback("/$HTML_Sanitizer_URL/i", array(&$this, '_sanitizeURLCallback'), $str); } /** * Callback for PCRE. * * @access private * @param matches array * * @return string * * @see sanitizeURL */ private function _sanitizeSrcCallback($matches) { return 'src="' . $this->sanitizeURL($matches[1]) . '"'; } /** * Remove potential flaws in href attributes. * * @access private * @param string html tag * * @return string filtered html tag */ private function sanitizeSrc($str) { $HTML_Sanitizer_URL = 'src="([^"]+)"'; return preg_replace_callback("/$HTML_Sanitizer_URL/i", array(&$this, '_sanitizeSrcCallback'), $str); } /** * Remove dangerous attributes from html tags. * * @access private * @param string html tag * * @return string filtered html tag */ private function removeEvilAttributes($str) { if (!$this->_allowDOMEvents) { $str = preg_replace_callback('/<(.*?)>/i', array(&$this, '_removeDOMEventsCallback'), $str); } if (!$this->_allowStyle) { $str = preg_replace_callback('/<(.*?)>/i', array(&$this, '_removeStyleCallback'), $str); } return $str; } /** * Remove DOM events attributes from html tags. * * @access private * @param string html tag * * @return string filtered html tag */ private function removeDOMEvents($str) { $str = preg_replace('/\s*=\s*/', '=', $str); $HTML_Sanitizer_stripAttrib = '(onclick|ondblclick|onmousedown|' . 'onmouseup|onmouseover|onmousemove|onmouseout|onkeypress|onkeydown|' . 'onkeyup|onfocus|onblur|onabort|onerror|onload)'; $str = stripslashes(preg_replace("/$HTML_Sanitizer_stripAttrib/i", 'forbidden', $str)); return $str; } /** * Callback for PCRE. * * @access private * @param matches array * * @return string * * @see removeDOMEvents */ private function _removeDOMEventsCallback($matches) { return '<' . $this->removeDOMEvents($matches[1]) . '>'; } /** * Remove style attributes from html tags. * * @access private * @param string html tag * * @return string filtered html tag */ private function removeStyle($str) { $str = preg_replace('/\s*=\s*/', '=', $str); $HTML_Sanitizer_stripAttrib = '(style)'; $str = stripslashes(preg_replace("/$HTML_Sanitizer_stripAttrib/i", 'forbidden', $str)); return $str; } /** * Callback for PCRE. * * @access private * @param matches array * * @return string * * @see removeStyle */ private function _removeStyleCallback($matches) { return '<' . $this->removeStyle($matches[1]) . '>'; } /** * Remove dangerous HTML tags. * * @access private * @param string html code * * @return string filtered url */ private function removeEvilTags($str) { $allowedTags = $this->_allowedTags; if ($this->_allowScript) { $allowedTags .= '<script>'; } if ($this->_allowStyle) { $allowedTags .= '<style>'; } if ($this->_allowObjects) { $allowedTags .= '<object><embed><applet><param>'; } $allowedTags .= $this->_additionalTags; $str = strip_tags($str, $allowedTags); return $str; } /** * Sanitize HTML * remove dangerous tags and attributes * clean urls. * * @access public * @param string html code * * @return string sanitized html code */ public function sanitize($html) { $html = $this->removeEvilTags($html); $html = $this->removeEvilAttributes($html); $html = $this->sanitizeHref($html); $html = $this->sanitizeSrc($html); return $html; } } /** * */ function html_sanitize($str) { static $san = NULL; if (empty($san)) { $san = new HTML_Sanitizer(); } return $san->sanitize($str); } /** * */ function html_loose_sanitize($str) { static $san = NULL; if (empty($san)) { $san = new HTML_Sanitizer(); $san->allowAll(); } return $san->sanitize($str); }