Updates page MODS when a parent books mods is updated. Currrently it will only update the page MODS if the book title or accessCondition changes.
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

342 lines
12 KiB

5 years ago
<?php
/**
* @file
* Module file for roblib_update_pmods.
*/
/**
* Implements hook_cmodel_datastream_islandora_datastream_modified().
*/
function roblib_update_pmods_islandora_bookcmodel_mods_islandora_datastream_modified(AbstractObject $object, AbstractDatastream $datastream, array $params) {
module_load_include('inc', 'islandora_paged_content', 'utilities');
$pages = islandora_paged_content_get_pages($object);
$parent_mods = $datastream->content;
$pid = $datastream->parent->id;
roblib_update_pmods_batch($parent_mods, $pages, $pid);
}
/**
* Setup the batch.
*
* @param string $parent_mods
* The contents of the MODS datastream of the parent book.
* @param Array $pages
* A list of pages related to the book
* @param string $pid
* The PID of the book.
*/
function roblib_update_pmods_batch($parent_mods, $pages, $pid) {
$batch = [
'title' => t('Updating child page MODS records ...'),
'operations' => [],
'init_message' => t('starting'),
'progress_message' => t('Processed @current out of @total.'),
'error_message' => t('An error occurred during processing'),
'finished' => 'roblib_update_pmods_batch_finished',
];
foreach ($pages as $page) {
/**
* we use base64_encode to ensure we don't overload the batch
* processor by stuffing complex objects into it
*/
$batch['operations'][] = [
'_roblib_update_pmods_update_child',
[$page, $parent_mods],
];
}
batch_set($batch);
batch_process('islandora/object/' . $pid);
}
/**
* Handles individual requests from the batch.
*
* @param array $page
* A page from a book.
* @param string $parent_mods
* The contents of the MODS datastream of the parent book.
* @param array $context
* The batch context.
*/
function _roblib_update_pmods_update_child($page, $parent_mods, &$context) {
if (!isset($context['results']['pages_processed'])) {
$context['results']['pages_processed'] = 0;
}
$context['results']['pages_processed']++;
$context['message'] = t('Processing MODS for page !p %pid', [
'!p' => $context['results']['pages_processed'],
'%pid' => $page['pid'],
]);
try {
roblib_update_pmods_update_child($page, $parent_mods, $context['results']['pages_processed']);
} catch (Exception $e) {
//TODO something
watchdog('roblib_update_pmods', $e->getMessage(), NULL, WATCHDOG_ERROR, NULL);
}
}
/**
* Prepare a page object for update.
*
* @param array $page
* A page from a book.
* @param string $parent_mods
* The contents of the MODS datastream of the parent book.
* @param $image_number
* The image number of the book.
*
*
* @throws \Exception
*/
function roblib_update_pmods_update_child($page, $parent_mods, $image_number) {
$dsid = 'MODS';
$mods_template = <<<EOT
<mods xmlns="http://www.loc.gov/mods/v3" xmlns:mods="http://www.loc.gov/mods/v3" xmlns:xlink="http://www.w3.org/1999/xlink" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xmlns:xsisnippet="http://www.w3.org/2001/XMLSchema-instance" version="3.2" xsisnippet:schemaLocation="http://www.loc.gov/mods/ http://www.loc.gov/standards/mods/mods.xsd">
</mods>
EOT;
$page_object = islandora_object_load($page['pid']);
if (!isset($page_object[$dsid])) {
$datastream = $page_object->constructDatastream($dsid);
$datastream->label = 'MODS';
$datastream->mimeType = 'application/xml';
$datastream->content = $mods_template;
// Ingest the datastream if it isn't yet.
}
else {
$datastream = $page_object[$dsid];
}
roblib_update_pmods_compare_and_update($datastream, $parent_mods, $image_number);
if (!isset($page_object[$dsid])) {
$page_object->ingestDatastream($datastream);
}
unset($page_object);
}
/**
* Remove XML nodes from a XML Domdocument.
* @param array $nodes_to_delete
* The nodes to delete
*
* @throws \Exception
*/
function roblib_update_pmods_delete_nodes($nodes_to_delete){
foreach($nodes_to_delete as $node) {
$old_node = $node->parentNode->removeChild($node);
if(empty($old_node)){
throw new Exception ('Could not delete Node ' . $node->nodeValue);
}
}
}
/**
* Update the page object MODS datastream if the parent MODS has been updated.
* Only updates if the books accessCondition or the titleInfo/Title has been modified.
*
* @param AbstractDatastream $datastream
* A page objects MODS datastream
* @param string $parent_mods
* A book objects MODS xml
* @param $image_number
* The image number of the book.
*
* @throws \Exception
*/
function roblib_update_pmods_compare_and_update(&$datastream, $parent_mods, $image_number) {
$updated = FALSE;
$child_mods = $datastream->content;
if (empty($child_mods)) {
throw new Exception('Error loading Child MODS');
}
$child_xml_doc = new DOMDocument();
$test = $child_xml_doc->loadXML($child_mods);
if ($test === FALSE) {
throw new Exception('Error processing Child MODS ' . $child_mods);
}
$child_xpath = new DOMXPath($child_xml_doc);
$child_xpath->registerNamespace('mods', 'http://www.loc.gov/mods/v3');
$child_xpath->registerNamespace('xlink', 'http://www.w3.org/1999/xlink');
$query = "/mods:mods/mods:titleInfo[not(@type)]/mods:title";
$parent_xml_doc = new DOMDocument();
$test = $parent_xml_doc->loadXML($parent_mods);
if ($test === FALSE) {
throw new Exception('Error processing Parent MODS ' . $parent_mods);
}
$parent_xpath = new DOMXPath($parent_xml_doc);
$parent_xpath->registerNamespace('mods', 'http://www.loc.gov/mods/v3');
$parent_xpath->registerNamespace('xlink', 'http://www.w3.org/1999/xlink');
$parent_results = $parent_xpath->query($query, $parent_xml_doc);
$child_results = $child_xpath->query($query, $child_xml_doc);
$p_arr = [];
$parent_book_title_arr = [];
$c_arr = [];
$nodes_to_delete = [];
$image_number_string = 'Image ' . $image_number . ' - ';
foreach ($child_results as $child_node) {
$c_arr[] = $child_node->nodeValue;
$nodes_to_delete[] = $child_node->parentNode;
}
foreach ($parent_results as $p_node) {
$p_arr[] = $image_number_string . trim($p_node->nodeValue);
$parent_book_title_arr[] = trim($p_node->nodeValue);
}
$missing_titles = array_diff($p_arr, $c_arr);
if(count($missing_titles) > 0) {
$updated = TRUE;
roblib_update_pmods_delete_nodes($nodes_to_delete);
}
foreach ($missing_titles as $title) {
roblib_update_pmods_update_title($child_xml_doc, $title);
}
$book_title_query = "/mods:mods/mods:relatedItem[@type='host']/mods:titleInfo/mods:title";
$child_book_title_results = $child_xpath->query($book_title_query, $child_xml_doc);
$c_arr = array();
$nodes_to_delete = [];
foreach($child_book_title_results as $c_node) {
$c_arr[] = $c_node->nodeValue;
$nodes_to_delete[] = $c_node->parentNode->parentNode;
}
$missing_titles = array_diff($parent_book_title_arr, $c_arr);
if(count($missing_titles) > 0) {
$updated = TRUE;
roblib_update_pmods_delete_nodes($nodes_to_delete);
}
foreach ($missing_titles as $title) {
roblib_update_pmods_update_book_title($child_xml_doc, $title);
}
$access_condition_query = "/mods:mods/mods:accessCondition[@type='use and reproduction' and @xlink:href='http://rightsstatements.org/vocab/InC/1.0/']";
$child_access_condition_results = $child_xpath->query($access_condition_query, $child_xml_doc);
$parent_access_condition_results = $parent_xpath->query($access_condition_query, $parent_xml_doc);
$p_arr = array();
$c_arr = array();
$nodes_to_delete = [];
foreach($child_access_condition_results as $c_node) {
$c_arr[] = $c_node->nodeValue;
$nodes_to_delete[] = $c_node;
}
foreach ($parent_access_condition_results as $p_node) {
$p_arr[] = $p_node->nodeValue;
}
$missing_conditions = array_diff($p_arr, $c_arr);
if(count($missing_conditions) > 0 || $parent_access_condition_results->length < 1) {
$updated = TRUE;
roblib_update_pmods_delete_nodes($nodes_to_delete);
}
foreach ($missing_conditions as $condition) {
roblib_update_pmods_update_conditions($child_xml_doc, $condition, TRUE);
}
// A book may have a custom accessCondition that doesn't link to a license
$access_condition_query = "/mods:mods/mods:accessCondition[@type='use and reproduction' and not(@xlink:href)]";
// TODO refactor duplicate code into function
$child_access_condition_results = $child_xpath->query($access_condition_query, $child_xml_doc);
$parent_access_condition_results = $parent_xpath->query($access_condition_query, $parent_xml_doc);
$p_arr = array();
$c_arr = array();
$nodes_to_delete = [];
foreach($child_access_condition_results as $c_node) {
$c_arr[] = $c_node->nodeValue;
$nodes_to_delete[] = $c_node;
}
foreach ($parent_access_condition_results as $p_node) {
$p_arr[] = $p_node->nodeValue;
}
$missing_conditions = array_diff($p_arr, $c_arr);
if(count($missing_conditions) > 0 || $parent_access_condition_results->length < 1) {
$updated = TRUE;
roblib_update_pmods_delete_nodes($nodes_to_delete);
}
foreach ($missing_conditions as $condition) {
roblib_update_pmods_update_conditions($child_xml_doc, $condition, FALSE);
}
if ($updated) {
$updated_xml = $child_xml_doc->saveXML();
$datastream->setContentFromString($updated_xml);
}
}
/**
* Update the accessCondition node of the pages MODS xml.
*
* @param \DOMDocument $mods_doc
* A pages MODS xml
* @param string $condition
* The new accessCondition nodes value.
* @param bool $use_xlink
* If true it will add the xlink:href attribute.
*/
function roblib_update_pmods_update_conditions($mods_doc, $condition, $use_xlink = FALSE){
$root = $mods_doc->documentElement;
$accessCondition = $mods_doc->createElement('accessCondition', $condition);
$root->appendChild($accessCondition);
$type_attribute = $mods_doc->createAttribute('type');
$type_attribute->value = 'use and reproduction';
$accessCondition->appendChild($type_attribute);
if($use_xlink){
$href_attribute = $mods_doc->createAttribute('xlink:href');
$href_attribute->value = 'http://rightsstatements.org/vocab/InC/1.0/';
$accessCondition->appendChild($href_attribute);
}
}
/**
* Update the page MODS xml with a relatedItem[@type='host'] title with the books title.
*
* @param \DOMDocument $mods_doc
* A page MODS xml
* @param string $book_title
* The title of the book.
*/
function roblib_update_pmods_update_book_title (&$mods_doc, $book_title) {
$root = $mods_doc->documentElement;
$related_item = $mods_doc->createElement('relatedItem');
$type_attribute = $mods_doc->createAttribute('type');
$type_attribute->value = 'host';
$related_item->appendChild($type_attribute);
$root->appendChild($related_item);
$title_info = $mods_doc->createElement('titleInfo');
$related_item->appendChild($title_info);
$title = $mods_doc->createElement('title', (string) $book_title);
$title_info->appendChild($title);
}
/**
* Update the page MODS xml titleInfo/title with the books title prefixed by image #.
*
* @param \DOMDocument $mods_doc
* A page MODS xml
* @param string $book_title
* The title of the book prefixed with the page image #.
*/
function roblib_update_pmods_update_title(&$mods_doc, $title) {
$root = $mods_doc->documentElement;
$title_info = $mods_doc->createElement('titleInfo');
$root->appendChild($title_info);
$title = $mods_doc->createElement('title', (string) $title);
$title_info->appendChild($title);
}
/**
* @param $success
* @param $results
* @param $operations
*
* @return array|mixed|string|null
*/
function roblib_update_pmods_batch_finished($success, $results, $operations) {
return t('Finished Processing Pages.');
}