EBSCO Discovery module. Used on the library.upei.ca website. The bento box modules leverages the auth parts of this module.

445 lines
11 KiB

<?php // $Id$
// vim: expandtab sw=4 ts=4 sts=4:
# ***** BEGIN LICENSE BLOCK *****
# This file is part of HTML Sanitizer.
# Copyright (c) 2005-2011 Frederic Minne <zefredz@gmail.com>.
# All rights reserved.
#
# HTML Sanitizer is free software; you can redistribute it and/or modify
# it under the terms of the GNU Lesser General Public License as published by
# the Free Software Foundation; either version 3 of the License, or
# (at your option) any later version.
#
# HTML Sanitizer is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU Lesser General Public License
# along with HTML Sanitizer; if not, see <http://www.gnu.org/licenses/>.
#
# ***** END LICENSE BLOCK *****
/**
* Sanitize HTML contents :
* Remove dangerous tags and attributes that can lead to security issues like
* XSS or HTTP response splitting
*
* @author Frederic Minne <zefredz@gmail.com>
* @copyright Copyright &copy; 2005-2011, Frederic Minne
* @license http://www.gnu.org/licenses/lgpl.txt GNU Lesser General Public License version 3 or later
* @version 1.1
*/
class HTML_Sanitizer
{
// Private fields
private $_allowedTags;
private $_allowJavascriptEvents;
private $_allowJavascriptInUrls;
private $_allowObjects;
private $_allowScript;
private $_allowStyle;
private $_additionalTags;
/**
* Constructor
*/
public function __construct()
{
$this->resetAll();
}
/**
* (re)set all options to default value
*/
public function resetAll()
{
$this->_allowDOMEvents = false;
$this->_allowJavascriptInUrls = false;
$this->_allowStyle = false;
$this->_allowScript = false;
$this->_allowObjects = false;
$this->_allowStyle = false;
$this->_allowedTags = '<a><br><b><h1><h2><h3><h4><h5><h6>'
. '<img><li><ol><p><strong><table><tr><td><th><u><ul><thead>'
. '<tbody><tfoot><em><dd><dt><dl><span><div><del><add><i><hr>'
. '<pre><br><blockquote><address><code><caption><abbr><acronym>'
. '<cite><dfn><q><ins><sup><sub><kbd><samp><var><tt><small><big>'
;
$this->_additionalTags = '';
}
/**
* Add additional tags to allowed tags
* @param string
* @access public
*/
public function addAdditionalTags( $tags )
{
$this->_additionalTags .= $tags;
}
/**
* Allow iframes
* @access public
*/
public function allowIframes()
{
$this->addAdditionalTags( '<iframe>' );
}
/**
* Allow HTML5 media tags
* @access public
*/
public function allowHtml5Media()
{
$this->addAdditionalTags( '<canvas><video><audio>' );
}
/**
* Allow object, embed, applet and param tags in html
* @access public
*/
public function allowObjects()
{
$this->_allowObjects = true;
}
/**
* Allow DOM event on DOM elements
* @access public
*/
public function allowDOMEvents()
{
$this->_allowDOMEvents = true;
}
/**
* Allow script tags
* @access public
*/
public function allowScript()
{
$this->_allowScript = true;
}
/**
* Allow the use of javascript: in urls
* @access public
*/
public function allowJavascriptInUrls()
{
$this->_allowJavascriptInUrls = true;
}
/**
* Allow style tags and attributes
* @access public
*/
public function allowStyle()
{
$this->_allowStyle = true;
}
/**
* Helper to allow all javascript related tags and attributes
* @access public
*/
public function allowAllJavascript()
{
$this->allowDOMEvents();
$this->allowScript();
$this->allowJavascriptInUrls();
}
/**
* Allow all tags and attributes
* @access public
*/
public function allowAll()
{
$this->allowAllJavascript();
$this->allowObjects();
$this->allowStyle();
$this->allowIframes();
$this->allowHtml5Media();
}
/**
* Filter URLs to avoid HTTP response splitting attacks
* @access public
* @param string url
* @return string filtered url
*/
public function filterHTTPResponseSplitting( $url )
{
$dangerousCharactersPattern = '~(\r\n|\r|\n|%0a|%0d|%0D|%0A)~';
return preg_replace( $dangerousCharactersPattern, '', $url );
}
/**
* Remove potential javascript in urls
* @access public
* @param string url
* @return string filtered url
*/
public function removeJavascriptURL( $str )
{
$HTML_Sanitizer_stripJavascriptURL = 'javascript:[^"]+';
$str = preg_replace("/$HTML_Sanitizer_stripJavascriptURL/i"
, '__forbidden__'
, $str );
return $str;
}
/**
* Remove potential flaws in urls
* @access private
* @param string url
* @return string filtered url
*/
private function sanitizeURL( $url )
{
if ( ! $this->_allowJavascriptInUrls )
{
$url = $this->removeJavascriptURL( $url );
}
$url = $this->filterHTTPResponseSplitting( $url );
return $url;
}
/**
* Callback for PCRE
* @access private
* @param matches array
* @return string
* @see sanitizeURL
*/
private function _sanitizeURLCallback( $matches )
{
return 'href="'.$this->sanitizeURL( $matches[1] ).'"';
}
/**
* Remove potential flaws in href attributes
* @access private
* @param string html tag
* @return string filtered html tag
*/
private function sanitizeHref( $str )
{
$HTML_Sanitizer_URL = 'href="([^"]+)"';
return preg_replace_callback("/$HTML_Sanitizer_URL/i"
, array( &$this, '_sanitizeURLCallback' )
, $str );
}
/**
* Callback for PCRE
* @access private
* @param matches array
* @return string
* @see sanitizeURL
*/
private function _sanitizeSrcCallback( $matches )
{
return 'src="'.$this->sanitizeURL( $matches[1] ).'"';
}
/**
* Remove potential flaws in href attributes
* @access private
* @param string html tag
* @return string filtered html tag
*/
private function sanitizeSrc( $str )
{
$HTML_Sanitizer_URL = 'src="([^"]+)"';
return preg_replace_callback("/$HTML_Sanitizer_URL/i"
, array( &$this, '_sanitizeSrcCallback' )
, $str );
}
/**
* Remove dangerous attributes from html tags
* @access private
* @param string html tag
* @return string filtered html tag
*/
private function removeEvilAttributes( $str )
{
if ( ! $this->_allowDOMEvents )
{
$str = preg_replace_callback('/<(.*?)>/i'
, array( &$this, '_removeDOMEventsCallback' )
, $str );
}
if ( ! $this->_allowStyle )
{
$str = preg_replace_callback('/<(.*?)>/i'
, array( &$this, '_removeStyleCallback' )
, $str );
}
return $str;
}
/**
* Remove DOM events attributes from html tags
* @access private
* @param string html tag
* @return string filtered html tag
*/
private function removeDOMEvents( $str )
{
$str = preg_replace ( '/\s*=\s*/', '=', $str );
$HTML_Sanitizer_stripAttrib = '(onclick|ondblclick|onmousedown|'
. 'onmouseup|onmouseover|onmousemove|onmouseout|onkeypress|onkeydown|'
. 'onkeyup|onfocus|onblur|onabort|onerror|onload)'
;
$str = stripslashes( preg_replace("/$HTML_Sanitizer_stripAttrib/i"
, 'forbidden'
, $str ) );
return $str;
}
/**
* Callback for PCRE
* @access private
* @param matches array
* @return string
* @see removeDOMEvents
*/
private function _removeDOMEventsCallback( $matches )
{
return '<' . $this->removeDOMEvents( $matches[1] ) . '>';
}
/**
* Remove style attributes from html tags
* @access private
* @param string html tag
* @return string filtered html tag
*/
private function removeStyle( $str )
{
$str = preg_replace ( '/\s*=\s*/', '=', $str );
$HTML_Sanitizer_stripAttrib = '(style)'
;
$str = stripslashes( preg_replace("/$HTML_Sanitizer_stripAttrib/i"
, 'forbidden'
, $str ) );
return $str;
}
/**
* Callback for PCRE
* @access private
* @param matches array
* @return string
* @see removeStyle
*/
private function _removeStyleCallback( $matches )
{
return '<' . $this->removeStyle( $matches[1] ) . '>';
}
/**
* Remove dangerous HTML tags
* @access private
* @param string html code
* @return string filtered url
*/
private function removeEvilTags( $str )
{
$allowedTags = $this->_allowedTags;
if ( $this->_allowScript )
{
$allowedTags .= '<script>';
}
if ( $this->_allowStyle )
{
$allowedTags .= '<style>';
}
if ( $this->_allowObjects )
{
$allowedTags .= '<object><embed><applet><param>';
}
$allowedTags .= $this->_additionalTags;
$str = strip_tags($str, $allowedTags );
return $str;
}
/**
* Sanitize HTML
* remove dangerous tags and attributes
* clean urls
* @access public
* @param string html code
* @return string sanitized html code
*/
public function sanitize( $html )
{
$html = $this->removeEvilTags( $html );
$html = $this->removeEvilAttributes( $html );
$html = $this->sanitizeHref( $html );
$html = $this->sanitizeSrc( $html );
return $html;
}
}
function html_sanitize( $str )
{
static $san = null;
if ( empty( $san ) )
{
$san = new HTML_Sanitizer;
}
return $san->sanitize( $str );
}
function html_loose_sanitize( $str )
{
static $san = null;
if ( empty( $san ) )
{
$san = new HTML_Sanitizer;
$san->allowAll();
}
return $san->sanitize( $str );
}