EBSCO Discovery module. Used on the library.upei.ca website. The bento box modules leverages the auth parts of this module.

445 lines
9.5 KiB

<?php
/**
* @file
* $Id$.
*/
// vim: expandtab sw=4 ts=4 sts=4:
// ***** BEGIN LICENSE BLOCK *****
// This file is part of HTML Sanitizer.
// Copyright (c) 2005-2011 Frederic Minne <zefredz@gmail.com>.
// All rights reserved.
//
// HTML Sanitizer is free software; you can redistribute it and/or modify
// it under the terms of the GNU Lesser General Public License as published by
// the Free Software Foundation; either version 3 of the License, or
// (at your option) any later version.
//
// HTML Sanitizer is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
// GNU General Public License for more details.
//
// You should have received a copy of the GNU Lesser General Public License
// along with HTML Sanitizer; if not, see <http://www.gnu.org/licenses/>.
//
// ***** END LICENSE BLOCK *****.
/**
* Sanitize HTML contents :
* Remove dangerous tags and attributes that can lead to security issues like
* XSS or HTTP response splitting.
*
* @author Frederic Minne <zefredz@gmail.com>
* @copyright Copyright &copy; 2005-2011, Frederic Minne
* @license http://www.gnu.org/licenses/lgpl.txt GNU Lesser General Public License version 3 or later
* @version 1.1
*/
class HTML_Sanitizer {
/**
* Private fields.
*/
private $_allowedTags;
private $_allowJavascriptEvents;
private $_allowJavascriptInUrls;
private $_allowObjects;
private $_allowScript;
private $_allowStyle;
private $_additionalTags;
/**
* Constructor.
*/
public function __construct() {
$this->resetAll();
}
/**
* (re)set all options to default value.
*/
public function resetAll() {
$this->_allowDOMEvents = FALSE;
$this->_allowJavascriptInUrls = FALSE;
$this->_allowStyle = FALSE;
$this->_allowScript = FALSE;
$this->_allowObjects = FALSE;
$this->_allowStyle = FALSE;
$this->_allowedTags = '<a><br><b><h1><h2><h3><h4><h5><h6>'
. '<img><li><ol><p><strong><table><tr><td><th><u><ul><thead>'
. '<tbody><tfoot><em><dd><dt><dl><span><div><del><add><i><hr>'
. '<pre><br><blockquote><address><code><caption><abbr><acronym>'
. '<cite><dfn><q><ins><sup><sub><kbd><samp><var><tt><small><big>';
$this->_additionalTags = '';
}
/**
* Add additional tags to allowed tags.
*
* @param string
*
* @access public
*/
public function addAdditionalTags($tags) {
$this->_additionalTags .= $tags;
}
/**
* Allow iframes.
*
* @access public
*/
public function allowIframes() {
$this->addAdditionalTags('<iframe>');
}
/**
* Allow HTML5 media tags.
*
* @access public
*/
public function allowHtml5Media() {
$this->addAdditionalTags('<canvas><video><audio>');
}
/**
* Allow object, embed, applet and param tags in html.
*
* @access public
*/
public function allowObjects() {
$this->_allowObjects = TRUE;
}
/**
* Allow DOM event on DOM elements.
*
* @access public
*/
public function allowDOMEvents() {
$this->_allowDOMEvents = TRUE;
}
/**
* Allow script tags.
*
* @access public
*/
public function allowScript() {
$this->_allowScript = TRUE;
}
/**
* Allow the use of javascript: in urls.
*
* @access public
*/
public function allowJavascriptInUrls() {
$this->_allowJavascriptInUrls = TRUE;
}
/**
* Allow style tags and attributes.
*
* @access public
*/
public function allowStyle() {
$this->_allowStyle = TRUE;
}
/**
* Helper to allow all javascript related tags and attributes.
*
* @access public
*/
public function allowAllJavascript() {
$this->allowDOMEvents();
$this->allowScript();
$this->allowJavascriptInUrls();
}
/**
* Allow all tags and attributes.
*
* @access public
*/
public function allowAll() {
$this->allowAllJavascript();
$this->allowObjects();
$this->allowStyle();
$this->allowIframes();
$this->allowHtml5Media();
}
/**
* Filter URLs to avoid HTTP response splitting attacks.
*
* @access public
* @param string url
*
* @return string filtered url
*/
public function filterHTTPResponseSplitting($url) {
$dangerousCharactersPattern = '~(\r\n|\r|\n|%0a|%0d|%0D|%0A)~';
return preg_replace($dangerousCharactersPattern, '', $url);
}
/**
* Remove potential javascript in urls.
*
* @access public
* @param string url
*
* @return string filtered url
*/
public function removeJavascriptURL($str) {
$HTML_Sanitizer_stripJavascriptURL = 'javascript:[^"]+';
$str = preg_replace("/$HTML_Sanitizer_stripJavascriptURL/i", '__forbidden__', $str);
return $str;
}
/**
* Remove potential flaws in urls.
*
* @access private
* @param string url
*
* @return string filtered url
*/
private function sanitizeURL($url) {
if (!$this->_allowJavascriptInUrls) {
$url = $this->removeJavascriptURL($url);
}
$url = $this->filterHTTPResponseSplitting($url);
return $url;
}
/**
* Callback for PCRE.
*
* @access private
* @param matches array
*
* @return string
*
* @see sanitizeURL
*/
private function _sanitizeURLCallback($matches) {
return 'href="' . $this->sanitizeURL($matches[1]) . '"';
}
/**
* Remove potential flaws in href attributes.
*
* @access private
* @param string html tag
*
* @return string filtered html tag
*/
private function sanitizeHref($str) {
$HTML_Sanitizer_URL = 'href="([^"]+)"';
return preg_replace_callback("/$HTML_Sanitizer_URL/i", array(&$this, '_sanitizeURLCallback'), $str);
}
/**
* Callback for PCRE.
*
* @access private
* @param matches array
*
* @return string
*
* @see sanitizeURL
*/
private function _sanitizeSrcCallback($matches) {
return 'src="' . $this->sanitizeURL($matches[1]) . '"';
}
/**
* Remove potential flaws in href attributes.
*
* @access private
* @param string html tag
*
* @return string filtered html tag
*/
private function sanitizeSrc($str) {
$HTML_Sanitizer_URL = 'src="([^"]+)"';
return preg_replace_callback("/$HTML_Sanitizer_URL/i", array(&$this, '_sanitizeSrcCallback'), $str);
}
/**
* Remove dangerous attributes from html tags.
*
* @access private
* @param string html tag
*
* @return string filtered html tag
*/
private function removeEvilAttributes($str) {
if (!$this->_allowDOMEvents) {
$str = preg_replace_callback('/<(.*?)>/i', array(&$this, '_removeDOMEventsCallback'), $str);
}
if (!$this->_allowStyle) {
$str = preg_replace_callback('/<(.*?)>/i', array(&$this, '_removeStyleCallback'), $str);
}
return $str;
}
/**
* Remove DOM events attributes from html tags.
*
* @access private
* @param string html tag
*
* @return string filtered html tag
*/
private function removeDOMEvents($str) {
$str = preg_replace('/\s*=\s*/', '=', $str);
$HTML_Sanitizer_stripAttrib = '(onclick|ondblclick|onmousedown|'
. 'onmouseup|onmouseover|onmousemove|onmouseout|onkeypress|onkeydown|'
. 'onkeyup|onfocus|onblur|onabort|onerror|onload)';
$str = stripslashes(preg_replace("/$HTML_Sanitizer_stripAttrib/i", 'forbidden', $str));
return $str;
}
/**
* Callback for PCRE.
*
* @access private
* @param matches array
*
* @return string
*
* @see removeDOMEvents
*/
private function _removeDOMEventsCallback($matches) {
return '<' . $this->removeDOMEvents($matches[1]) . '>';
}
/**
* Remove style attributes from html tags.
*
* @access private
* @param string html tag
*
* @return string filtered html tag
*/
private function removeStyle($str) {
$str = preg_replace('/\s*=\s*/', '=', $str);
$HTML_Sanitizer_stripAttrib = '(style)';
$str = stripslashes(preg_replace("/$HTML_Sanitizer_stripAttrib/i", 'forbidden', $str));
return $str;
}
/**
* Callback for PCRE.
*
* @access private
* @param matches array
*
* @return string
*
* @see removeStyle
*/
private function _removeStyleCallback($matches) {
return '<' . $this->removeStyle($matches[1]) . '>';
}
/**
* Remove dangerous HTML tags.
*
* @access private
* @param string html code
*
* @return string filtered url
*/
private function removeEvilTags($str) {
$allowedTags = $this->_allowedTags;
if ($this->_allowScript) {
$allowedTags .= '<script>';
}
if ($this->_allowStyle) {
$allowedTags .= '<style>';
}
if ($this->_allowObjects) {
$allowedTags .= '<object><embed><applet><param>';
}
$allowedTags .= $this->_additionalTags;
$str = strip_tags($str, $allowedTags);
return $str;
}
/**
* Sanitize HTML
* remove dangerous tags and attributes
* clean urls.
*
* @access public
* @param string html code
*
* @return string sanitized html code
*/
public function sanitize($html) {
$html = $this->removeEvilTags($html);
$html = $this->removeEvilAttributes($html);
$html = $this->sanitizeHref($html);
$html = $this->sanitizeSrc($html);
return $html;
}
}
/**
*
*/
function html_sanitize($str) {
static $san = NULL;
if (empty($san)) {
$san = new HTML_Sanitizer();
}
return $san->sanitize($str);
}
/**
*
*/
function html_loose_sanitize($str) {
static $san = NULL;
if (empty($san)) {
$san = new HTML_Sanitizer();
$san->allowAll();
}
return $san->sanitize($str);
}