From 6cc205ec1a390b7eca63a6983778e2c8a9c21caa Mon Sep 17 00:00:00 2001 From: Nigel Banks Date: Mon, 22 Jul 2013 12:54:55 +0200 Subject: [PATCH 1/2] Attempt to normalize XML data-streams when comparing for equality. Used during solution install/reinstall to determine if changes have been made. Fedora selectively strips newline characters within actual content. Now we normalize the two data-streams such that all newlines are made spaces and any preceding/trailing white-space within the document is removed. For Issue: https://dgi.ontimenow.com/viewitem.aspx?id=1989&type=features&force_use_number=false --- includes/solution_packs.inc | 6 ++++++ xml/strip_newlines_and_whitespace.xsl | 13 +++++++++++++ 2 files changed, 19 insertions(+) create mode 100644 xml/strip_newlines_and_whitespace.xsl diff --git a/includes/solution_packs.inc b/includes/solution_packs.inc index 26bd77b5..18169562 100644 --- a/includes/solution_packs.inc +++ b/includes/solution_packs.inc @@ -431,12 +431,18 @@ function islandora_check_object_status(AbstractObject $object_definition) { // we need to replace the info:fedora namespace, as C14N hates it. // C14N also doesn't normalize whitespace at the end of lines and Fedora // may add some whitespace on some lines. + $xsl = new DOMDocument(); + $xsl->load(drupal_get_path('module', 'islandora') . '/xml/strip_newlines_and_whitespace.xsl'); + $xslt = new XSLTProcessor(); + $xslt->importStyleSheet($xsl); $object_definition_dom = new DOMDocument(); $object_definition_dom->preserveWhiteSpace = FALSE; $object_definition_dom->loadXML(str_replace('info:', 'http://', $ds->content)); + $object_definition_dom = $xslt->transformToDoc($object_definition_dom); $object_actual_dom = new DOMDocument(); $object_actual_dom->preserveWhiteSpace = FALSE; $object_actual_dom->loadXML(str_replace('info:', 'http://', $existing_object[$ds->id]->content)); + $object_actual_dom = $xslt->transformToDoc($object_actual_dom); // Fedora changes the xml structure so we need to cannonize it. if ($object_actual_dom->C14N() != $object_definition_dom->C14N()) { diff --git a/xml/strip_newlines_and_whitespace.xsl b/xml/strip_newlines_and_whitespace.xsl new file mode 100644 index 00000000..4d4e1057 --- /dev/null +++ b/xml/strip_newlines_and_whitespace.xsl @@ -0,0 +1,13 @@ + + + + + + + + + + + + + \ No newline at end of file From c21ec669f66ffe57dbfa7f0b45bb9780f3cd22f1 Mon Sep 17 00:00:00 2001 From: Nigel Banks Date: Tue, 23 Jul 2013 00:59:32 +0200 Subject: [PATCH 2/2] Updated comment --- includes/solution_packs.inc | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/includes/solution_packs.inc b/includes/solution_packs.inc index 18169562..72749d0d 100644 --- a/includes/solution_packs.inc +++ b/includes/solution_packs.inc @@ -430,7 +430,9 @@ function islandora_check_object_status(AbstractObject $object_definition) { // be equal as Fedora does some XML mangling. In order for C14N to work // we need to replace the info:fedora namespace, as C14N hates it. // C14N also doesn't normalize whitespace at the end of lines and Fedora - // may add some whitespace on some lines. + // will sometimes replace new-lines with white-space. So first we strip + // leading/tailing white-space and replace all new-lines within the xml + // document to account for Fedora's weird formatting. $xsl = new DOMDocument(); $xsl->load(drupal_get_path('module', 'islandora') . '/xml/strip_newlines_and_whitespace.xsl'); $xslt = new XSLTProcessor();