EBSCO Discovery module. Used on the library.upei.ca website. The bento box modules leverages the auth parts of this module.
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

444 lines
9.5 KiB

* @file
* $Id$.
// vim: expandtab sw=4 ts=4 sts=4:
// ***** BEGIN LICENSE BLOCK *****
// This file is part of HTML Sanitizer.
// Copyright (c) 2005-2011 Frederic Minne <zefredz@gmail.com>.
// All rights reserved.
// HTML Sanitizer is free software; you can redistribute it and/or modify
// it under the terms of the GNU Lesser General Public License as published by
// the Free Software Foundation; either version 3 of the License, or
// (at your option) any later version.
// HTML Sanitizer is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// GNU General Public License for more details.
// You should have received a copy of the GNU Lesser General Public License
// along with HTML Sanitizer; if not, see <http://www.gnu.org/licenses/>.
// ***** END LICENSE BLOCK *****.
* Sanitize HTML contents :
* Remove dangerous tags and attributes that can lead to security issues like
* XSS or HTTP response splitting.
* @author Frederic Minne <zefredz@gmail.com>
* @copyright Copyright &copy; 2005-2011, Frederic Minne
* @license http://www.gnu.org/licenses/lgpl.txt GNU Lesser General Public License version 3 or later
* @version 1.1
class HTML_Sanitizer {
* Private fields.
private $_allowedTags;
private $_allowJavascriptEvents;
private $_allowJavascriptInUrls;
private $_allowObjects;
private $_allowScript;
private $_allowStyle;
private $_additionalTags;
* Constructor.
public function __construct() {
* (re)set all options to default value.
public function resetAll() {
$this->_allowDOMEvents = FALSE;
$this->_allowJavascriptInUrls = FALSE;
$this->_allowStyle = FALSE;
$this->_allowScript = FALSE;
$this->_allowObjects = FALSE;
$this->_allowStyle = FALSE;
$this->_allowedTags = '<a><br><b><h1><h2><h3><h4><h5><h6>'
. '<img><li><ol><p><strong><table><tr><td><th><u><ul><thead>'
. '<tbody><tfoot><em><dd><dt><dl><span><div><del><add><i><hr>'
. '<pre><br><blockquote><address><code><caption><abbr><acronym>'
. '<cite><dfn><q><ins><sup><sub><kbd><samp><var><tt><small><big>';
$this->_additionalTags = '';
* Add additional tags to allowed tags.
* @param string
* @access public
public function addAdditionalTags($tags) {
$this->_additionalTags .= $tags;
* Allow iframes.
* @access public
public function allowIframes() {
* Allow HTML5 media tags.
* @access public
public function allowHtml5Media() {
* Allow object, embed, applet and param tags in html.
* @access public
public function allowObjects() {
$this->_allowObjects = TRUE;
* Allow DOM event on DOM elements.
* @access public
public function allowDOMEvents() {
$this->_allowDOMEvents = TRUE;
* Allow script tags.
* @access public
public function allowScript() {
$this->_allowScript = TRUE;
* Allow the use of javascript: in urls.
* @access public
public function allowJavascriptInUrls() {
$this->_allowJavascriptInUrls = TRUE;
* Allow style tags and attributes.
* @access public
public function allowStyle() {
$this->_allowStyle = TRUE;
* Helper to allow all javascript related tags and attributes.
* @access public
public function allowAllJavascript() {
* Allow all tags and attributes.
* @access public
public function allowAll() {
* Filter URLs to avoid HTTP response splitting attacks.
* @access public
* @param string url
* @return string filtered url
public function filterHTTPResponseSplitting($url) {
$dangerousCharactersPattern = '~(\r\n|\r|\n|%0a|%0d|%0D|%0A)~';
return preg_replace($dangerousCharactersPattern, '', $url);
* Remove potential javascript in urls.
* @access public
* @param string url
* @return string filtered url
public function removeJavascriptURL($str) {
$HTML_Sanitizer_stripJavascriptURL = 'javascript:[^"]+';
$str = preg_replace("/$HTML_Sanitizer_stripJavascriptURL/i", '__forbidden__', $str);
return $str;
* Remove potential flaws in urls.
* @access private
* @param string url
* @return string filtered url
private function sanitizeURL($url) {
if (!$this->_allowJavascriptInUrls) {
$url = $this->removeJavascriptURL($url);
$url = $this->filterHTTPResponseSplitting($url);
return $url;
* Callback for PCRE.
* @access private
* @param matches array
* @return string
* @see sanitizeURL
private function _sanitizeURLCallback($matches) {
return 'href="' . $this->sanitizeURL($matches[1]) . '"';
* Remove potential flaws in href attributes.
* @access private
* @param string html tag
* @return string filtered html tag
private function sanitizeHref($str) {
$HTML_Sanitizer_URL = 'href="([^"]+)"';
return preg_replace_callback("/$HTML_Sanitizer_URL/i", array(&$this, '_sanitizeURLCallback'), $str);
* Callback for PCRE.
* @access private
* @param matches array
* @return string
* @see sanitizeURL
private function _sanitizeSrcCallback($matches) {
return 'src="' . $this->sanitizeURL($matches[1]) . '"';
* Remove potential flaws in href attributes.
* @access private
* @param string html tag
* @return string filtered html tag
private function sanitizeSrc($str) {
$HTML_Sanitizer_URL = 'src="([^"]+)"';
return preg_replace_callback("/$HTML_Sanitizer_URL/i", array(&$this, '_sanitizeSrcCallback'), $str);
* Remove dangerous attributes from html tags.
* @access private
* @param string html tag
* @return string filtered html tag
private function removeEvilAttributes($str) {
if (!$this->_allowDOMEvents) {
$str = preg_replace_callback('/<(.*?)>/i', array(&$this, '_removeDOMEventsCallback'), $str);
if (!$this->_allowStyle) {
$str = preg_replace_callback('/<(.*?)>/i', array(&$this, '_removeStyleCallback'), $str);
return $str;
* Remove DOM events attributes from html tags.
* @access private
* @param string html tag
* @return string filtered html tag
private function removeDOMEvents($str) {
$str = preg_replace('/\s*=\s*/', '=', $str);
$HTML_Sanitizer_stripAttrib = '(onclick|ondblclick|onmousedown|'
. 'onmouseup|onmouseover|onmousemove|onmouseout|onkeypress|onkeydown|'
. 'onkeyup|onfocus|onblur|onabort|onerror|onload)';
$str = stripslashes(preg_replace("/$HTML_Sanitizer_stripAttrib/i", 'forbidden', $str));
return $str;
* Callback for PCRE.
* @access private
* @param matches array
* @return string
* @see removeDOMEvents
private function _removeDOMEventsCallback($matches) {
return '<' . $this->removeDOMEvents($matches[1]) . '>';
* Remove style attributes from html tags.
* @access private
* @param string html tag
* @return string filtered html tag
private function removeStyle($str) {
$str = preg_replace('/\s*=\s*/', '=', $str);
$HTML_Sanitizer_stripAttrib = '(style)';
$str = stripslashes(preg_replace("/$HTML_Sanitizer_stripAttrib/i", 'forbidden', $str));
return $str;
* Callback for PCRE.
* @access private
* @param matches array
* @return string
* @see removeStyle
private function _removeStyleCallback($matches) {
return '<' . $this->removeStyle($matches[1]) . '>';
* Remove dangerous HTML tags.
* @access private
* @param string html code
* @return string filtered url
private function removeEvilTags($str) {
$allowedTags = $this->_allowedTags;
if ($this->_allowScript) {
$allowedTags .= '<script>';
if ($this->_allowStyle) {
$allowedTags .= '<style>';
if ($this->_allowObjects) {
$allowedTags .= '<object><embed><applet><param>';
$allowedTags .= $this->_additionalTags;
$str = strip_tags($str, $allowedTags);
return $str;
* Sanitize HTML
* remove dangerous tags and attributes
* clean urls.
* @access public
* @param string html code
* @return string sanitized html code
public function sanitize($html) {
$html = $this->removeEvilTags($html);
$html = $this->removeEvilAttributes($html);
$html = $this->sanitizeHref($html);
$html = $this->sanitizeSrc($html);
return $html;
function html_sanitize($str) {
static $san = NULL;
if (empty($san)) {
$san = new HTML_Sanitizer();
return $san->sanitize($str);
function html_loose_sanitize($str) {
static $san = NULL;
if (empty($san)) {
$san = new HTML_Sanitizer();
return $san->sanitize($str);