<?php // $Id$ // vim: expandtab sw=4 ts=4 sts=4: # ***** BEGIN LICENSE BLOCK ***** # This file is part of HTML Sanitizer. # Copyright (c) 2005-2011 Frederic Minne <zefredz@gmail.com>. # All rights reserved. # # HTML Sanitizer is free software; you can redistribute it and/or modify # it under the terms of the GNU Lesser General Public License as published by # the Free Software Foundation; either version 3 of the License, or # (at your option) any later version. # # HTML Sanitizer is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # GNU General Public License for more details. # # You should have received a copy of the GNU Lesser General Public License # along with HTML Sanitizer; if not, see <http://www.gnu.org/licenses/>. # # ***** END LICENSE BLOCK ***** /** * Sanitize HTML contents : * Remove dangerous tags and attributes that can lead to security issues like * XSS or HTTP response splitting * * @author Frederic Minne <zefredz@gmail.com> * @copyright Copyright © 2005-2011, Frederic Minne * @license http://www.gnu.org/licenses/lgpl.txt GNU Lesser General Public License version 3 or later * @version 1.1 */ class HTML_Sanitizer { // Private fields private $_allowedTags; private $_allowJavascriptEvents; private $_allowJavascriptInUrls; private $_allowObjects; private $_allowScript; private $_allowStyle; private $_additionalTags; /** * Constructor */ public function __construct() { $this->resetAll(); } /** * (re)set all options to default value */ public function resetAll() { $this->_allowDOMEvents = false; $this->_allowJavascriptInUrls = false; $this->_allowStyle = false; $this->_allowScript = false; $this->_allowObjects = false; $this->_allowStyle = false; $this->_allowedTags = '<a><br><b><h1><h2><h3><h4><h5><h6>' . '<img><li><ol><p><strong><table><tr><td><th><u><ul><thead>' . '<tbody><tfoot><em><dd><dt><dl><span><div><del><add><i><hr>' . '<pre><br><blockquote><address><code><caption><abbr><acronym>' . '<cite><dfn><q><ins><sup><sub><kbd><samp><var><tt><small><big>' ; $this->_additionalTags = ''; } /** * Add additional tags to allowed tags * @param string * @access public */ public function addAdditionalTags( $tags ) { $this->_additionalTags .= $tags; } /** * Allow iframes * @access public */ public function allowIframes() { $this->addAdditionalTags( '<iframe>' ); } /** * Allow HTML5 media tags * @access public */ public function allowHtml5Media() { $this->addAdditionalTags( '<canvas><video><audio>' ); } /** * Allow object, embed, applet and param tags in html * @access public */ public function allowObjects() { $this->_allowObjects = true; } /** * Allow DOM event on DOM elements * @access public */ public function allowDOMEvents() { $this->_allowDOMEvents = true; } /** * Allow script tags * @access public */ public function allowScript() { $this->_allowScript = true; } /** * Allow the use of javascript: in urls * @access public */ public function allowJavascriptInUrls() { $this->_allowJavascriptInUrls = true; } /** * Allow style tags and attributes * @access public */ public function allowStyle() { $this->_allowStyle = true; } /** * Helper to allow all javascript related tags and attributes * @access public */ public function allowAllJavascript() { $this->allowDOMEvents(); $this->allowScript(); $this->allowJavascriptInUrls(); } /** * Allow all tags and attributes * @access public */ public function allowAll() { $this->allowAllJavascript(); $this->allowObjects(); $this->allowStyle(); $this->allowIframes(); $this->allowHtml5Media(); } /** * Filter URLs to avoid HTTP response splitting attacks * @access public * @param string url * @return string filtered url */ public function filterHTTPResponseSplitting( $url ) { $dangerousCharactersPattern = '~(\r\n|\r|\n|%0a|%0d|%0D|%0A)~'; return preg_replace( $dangerousCharactersPattern, '', $url ); } /** * Remove potential javascript in urls * @access public * @param string url * @return string filtered url */ public function removeJavascriptURL( $str ) { $HTML_Sanitizer_stripJavascriptURL = 'javascript:[^"]+'; $str = preg_replace("/$HTML_Sanitizer_stripJavascriptURL/i" , '__forbidden__' , $str ); return $str; } /** * Remove potential flaws in urls * @access private * @param string url * @return string filtered url */ private function sanitizeURL( $url ) { if ( ! $this->_allowJavascriptInUrls ) { $url = $this->removeJavascriptURL( $url ); } $url = $this->filterHTTPResponseSplitting( $url ); return $url; } /** * Callback for PCRE * @access private * @param matches array * @return string * @see sanitizeURL */ private function _sanitizeURLCallback( $matches ) { return 'href="'.$this->sanitizeURL( $matches[1] ).'"'; } /** * Remove potential flaws in href attributes * @access private * @param string html tag * @return string filtered html tag */ private function sanitizeHref( $str ) { $HTML_Sanitizer_URL = 'href="([^"]+)"'; return preg_replace_callback("/$HTML_Sanitizer_URL/i" , array( &$this, '_sanitizeURLCallback' ) , $str ); } /** * Callback for PCRE * @access private * @param matches array * @return string * @see sanitizeURL */ private function _sanitizeSrcCallback( $matches ) { return 'src="'.$this->sanitizeURL( $matches[1] ).'"'; } /** * Remove potential flaws in href attributes * @access private * @param string html tag * @return string filtered html tag */ private function sanitizeSrc( $str ) { $HTML_Sanitizer_URL = 'src="([^"]+)"'; return preg_replace_callback("/$HTML_Sanitizer_URL/i" , array( &$this, '_sanitizeSrcCallback' ) , $str ); } /** * Remove dangerous attributes from html tags * @access private * @param string html tag * @return string filtered html tag */ private function removeEvilAttributes( $str ) { if ( ! $this->_allowDOMEvents ) { $str = preg_replace_callback('/<(.*?)>/i' , array( &$this, '_removeDOMEventsCallback' ) , $str ); } if ( ! $this->_allowStyle ) { $str = preg_replace_callback('/<(.*?)>/i' , array( &$this, '_removeStyleCallback' ) , $str ); } return $str; } /** * Remove DOM events attributes from html tags * @access private * @param string html tag * @return string filtered html tag */ private function removeDOMEvents( $str ) { $str = preg_replace ( '/\s*=\s*/', '=', $str ); $HTML_Sanitizer_stripAttrib = '(onclick|ondblclick|onmousedown|' . 'onmouseup|onmouseover|onmousemove|onmouseout|onkeypress|onkeydown|' . 'onkeyup|onfocus|onblur|onabort|onerror|onload)' ; $str = stripslashes( preg_replace("/$HTML_Sanitizer_stripAttrib/i" , 'forbidden' , $str ) ); return $str; } /** * Callback for PCRE * @access private * @param matches array * @return string * @see removeDOMEvents */ private function _removeDOMEventsCallback( $matches ) { return '<' . $this->removeDOMEvents( $matches[1] ) . '>'; } /** * Remove style attributes from html tags * @access private * @param string html tag * @return string filtered html tag */ private function removeStyle( $str ) { $str = preg_replace ( '/\s*=\s*/', '=', $str ); $HTML_Sanitizer_stripAttrib = '(style)' ; $str = stripslashes( preg_replace("/$HTML_Sanitizer_stripAttrib/i" , 'forbidden' , $str ) ); return $str; } /** * Callback for PCRE * @access private * @param matches array * @return string * @see removeStyle */ private function _removeStyleCallback( $matches ) { return '<' . $this->removeStyle( $matches[1] ) . '>'; } /** * Remove dangerous HTML tags * @access private * @param string html code * @return string filtered url */ private function removeEvilTags( $str ) { $allowedTags = $this->_allowedTags; if ( $this->_allowScript ) { $allowedTags .= '<script>'; } if ( $this->_allowStyle ) { $allowedTags .= '<style>'; } if ( $this->_allowObjects ) { $allowedTags .= '<object><embed><applet><param>'; } $allowedTags .= $this->_additionalTags; $str = strip_tags($str, $allowedTags ); return $str; } /** * Sanitize HTML * remove dangerous tags and attributes * clean urls * @access public * @param string html code * @return string sanitized html code */ public function sanitize( $html ) { $html = $this->removeEvilTags( $html ); $html = $this->removeEvilAttributes( $html ); $html = $this->sanitizeHref( $html ); $html = $this->sanitizeSrc( $html ); return $html; } } function html_sanitize( $str ) { static $san = null; if ( empty( $san ) ) { $san = new HTML_Sanitizer; } return $san->sanitize( $str ); } function html_loose_sanitize( $str ) { static $san = null; if ( empty( $san ) ) { $san = new HTML_Sanitizer; $san->allowAll(); } return $san->sanitize( $str ); }