You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
342 lines
12 KiB
342 lines
12 KiB
5 years ago
|
<?php
|
||
|
|
||
|
/**
|
||
|
* @file
|
||
|
* Module file for roblib_update_pmods.
|
||
|
*/
|
||
|
|
||
|
/**
|
||
|
* Implements hook_cmodel_datastream_islandora_datastream_modified().
|
||
|
*/
|
||
|
function roblib_update_pmods_islandora_bookcmodel_mods_islandora_datastream_modified(AbstractObject $object, AbstractDatastream $datastream, array $params) {
|
||
|
module_load_include('inc', 'islandora_paged_content', 'utilities');
|
||
|
$pages = islandora_paged_content_get_pages($object);
|
||
|
$parent_mods = $datastream->content;
|
||
|
$pid = $datastream->parent->id;
|
||
|
roblib_update_pmods_batch($parent_mods, $pages, $pid);
|
||
|
}
|
||
|
|
||
|
/**
|
||
|
* Setup the batch.
|
||
|
*
|
||
|
* @param string $parent_mods
|
||
|
* The contents of the MODS datastream of the parent book.
|
||
|
* @param Array $pages
|
||
|
* A list of pages related to the book
|
||
|
* @param string $pid
|
||
|
* The PID of the book.
|
||
|
*/
|
||
|
function roblib_update_pmods_batch($parent_mods, $pages, $pid) {
|
||
|
$batch = [
|
||
|
'title' => t('Updating child page MODS records ...'),
|
||
|
'operations' => [],
|
||
|
'init_message' => t('starting'),
|
||
|
'progress_message' => t('Processed @current out of @total.'),
|
||
|
'error_message' => t('An error occurred during processing'),
|
||
|
'finished' => 'roblib_update_pmods_batch_finished',
|
||
|
];
|
||
|
|
||
|
foreach ($pages as $page) {
|
||
|
/**
|
||
|
* we use base64_encode to ensure we don't overload the batch
|
||
|
* processor by stuffing complex objects into it
|
||
|
*/
|
||
|
$batch['operations'][] = [
|
||
|
'_roblib_update_pmods_update_child',
|
||
|
[$page, $parent_mods],
|
||
|
];
|
||
|
}
|
||
|
batch_set($batch);
|
||
|
batch_process('islandora/object/' . $pid);
|
||
|
}
|
||
|
|
||
|
/**
|
||
|
* Handles individual requests from the batch.
|
||
|
*
|
||
|
* @param array $page
|
||
|
* A page from a book.
|
||
|
* @param string $parent_mods
|
||
|
* The contents of the MODS datastream of the parent book.
|
||
|
* @param array $context
|
||
|
* The batch context.
|
||
|
*/
|
||
|
function _roblib_update_pmods_update_child($page, $parent_mods, &$context) {
|
||
|
if (!isset($context['results']['pages_processed'])) {
|
||
|
$context['results']['pages_processed'] = 0;
|
||
|
}
|
||
|
$context['results']['pages_processed']++;
|
||
|
$context['message'] = t('Processing MODS for page !p %pid', [
|
||
|
'!p' => $context['results']['pages_processed'],
|
||
|
'%pid' => $page['pid'],
|
||
|
]);
|
||
|
try {
|
||
|
roblib_update_pmods_update_child($page, $parent_mods, $context['results']['pages_processed']);
|
||
|
} catch (Exception $e) {
|
||
|
//TODO something
|
||
|
watchdog('roblib_update_pmods', $e->getMessage(), NULL, WATCHDOG_ERROR, NULL);
|
||
|
}
|
||
|
}
|
||
|
|
||
|
/**
|
||
|
* Prepare a page object for update.
|
||
|
*
|
||
|
* @param array $page
|
||
|
* A page from a book.
|
||
|
* @param string $parent_mods
|
||
|
* The contents of the MODS datastream of the parent book.
|
||
|
* @param $image_number
|
||
|
* The image number of the book.
|
||
|
*
|
||
|
*
|
||
|
* @throws \Exception
|
||
|
*/
|
||
|
function roblib_update_pmods_update_child($page, $parent_mods, $image_number) {
|
||
|
$dsid = 'MODS';
|
||
|
$mods_template = <<<EOT
|
||
|
<mods xmlns="http://www.loc.gov/mods/v3" xmlns:mods="http://www.loc.gov/mods/v3" xmlns:xlink="http://www.w3.org/1999/xlink" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xmlns:xsisnippet="http://www.w3.org/2001/XMLSchema-instance" version="3.2" xsisnippet:schemaLocation="http://www.loc.gov/mods/ http://www.loc.gov/standards/mods/mods.xsd">
|
||
|
</mods>
|
||
|
EOT;
|
||
|
$page_object = islandora_object_load($page['pid']);
|
||
|
if (!isset($page_object[$dsid])) {
|
||
|
$datastream = $page_object->constructDatastream($dsid);
|
||
|
$datastream->label = 'MODS';
|
||
|
$datastream->mimeType = 'application/xml';
|
||
|
$datastream->content = $mods_template;
|
||
|
// Ingest the datastream if it isn't yet.
|
||
|
}
|
||
|
else {
|
||
|
$datastream = $page_object[$dsid];
|
||
|
}
|
||
|
roblib_update_pmods_compare_and_update($datastream, $parent_mods, $image_number);
|
||
|
|
||
|
if (!isset($page_object[$dsid])) {
|
||
|
$page_object->ingestDatastream($datastream);
|
||
|
}
|
||
|
unset($page_object);
|
||
|
}
|
||
|
|
||
|
/**
|
||
|
* Remove XML nodes from a XML Domdocument.
|
||
|
* @param array $nodes_to_delete
|
||
|
* The nodes to delete
|
||
|
*
|
||
|
* @throws \Exception
|
||
|
*/
|
||
|
function roblib_update_pmods_delete_nodes($nodes_to_delete){
|
||
|
foreach($nodes_to_delete as $node) {
|
||
|
$old_node = $node->parentNode->removeChild($node);
|
||
|
if(empty($old_node)){
|
||
|
throw new Exception ('Could not delete Node ' . $node->nodeValue);
|
||
|
}
|
||
|
}
|
||
|
}
|
||
|
|
||
|
/**
|
||
|
* Update the page object MODS datastream if the parent MODS has been updated.
|
||
|
* Only updates if the books accessCondition or the titleInfo/Title has been modified.
|
||
|
*
|
||
|
* @param AbstractDatastream $datastream
|
||
|
* A page objects MODS datastream
|
||
|
* @param string $parent_mods
|
||
|
* A book objects MODS xml
|
||
|
* @param $image_number
|
||
|
* The image number of the book.
|
||
|
*
|
||
|
* @throws \Exception
|
||
|
*/
|
||
|
function roblib_update_pmods_compare_and_update(&$datastream, $parent_mods, $image_number) {
|
||
|
$updated = FALSE;
|
||
|
$child_mods = $datastream->content;
|
||
|
if (empty($child_mods)) {
|
||
|
throw new Exception('Error loading Child MODS');
|
||
|
}
|
||
|
$child_xml_doc = new DOMDocument();
|
||
|
$test = $child_xml_doc->loadXML($child_mods);
|
||
|
if ($test === FALSE) {
|
||
|
throw new Exception('Error processing Child MODS ' . $child_mods);
|
||
|
}
|
||
|
$child_xpath = new DOMXPath($child_xml_doc);
|
||
|
$child_xpath->registerNamespace('mods', 'http://www.loc.gov/mods/v3');
|
||
|
$child_xpath->registerNamespace('xlink', 'http://www.w3.org/1999/xlink');
|
||
|
$query = "/mods:mods/mods:titleInfo[not(@type)]/mods:title";
|
||
|
$parent_xml_doc = new DOMDocument();
|
||
|
$test = $parent_xml_doc->loadXML($parent_mods);
|
||
|
if ($test === FALSE) {
|
||
|
throw new Exception('Error processing Parent MODS ' . $parent_mods);
|
||
|
}
|
||
|
$parent_xpath = new DOMXPath($parent_xml_doc);
|
||
|
$parent_xpath->registerNamespace('mods', 'http://www.loc.gov/mods/v3');
|
||
|
$parent_xpath->registerNamespace('xlink', 'http://www.w3.org/1999/xlink');
|
||
|
$parent_results = $parent_xpath->query($query, $parent_xml_doc);
|
||
|
$child_results = $child_xpath->query($query, $child_xml_doc);
|
||
|
$p_arr = [];
|
||
|
$parent_book_title_arr = [];
|
||
|
$c_arr = [];
|
||
|
$nodes_to_delete = [];
|
||
|
$image_number_string = 'Image ' . $image_number . ' - ';
|
||
|
foreach ($child_results as $child_node) {
|
||
|
$c_arr[] = $child_node->nodeValue;
|
||
|
$nodes_to_delete[] = $child_node->parentNode;
|
||
|
}
|
||
|
foreach ($parent_results as $p_node) {
|
||
|
$p_arr[] = $image_number_string . trim($p_node->nodeValue);
|
||
|
$parent_book_title_arr[] = trim($p_node->nodeValue);
|
||
|
}
|
||
|
$missing_titles = array_diff($p_arr, $c_arr);
|
||
|
|
||
|
if(count($missing_titles) > 0) {
|
||
|
$updated = TRUE;
|
||
|
roblib_update_pmods_delete_nodes($nodes_to_delete);
|
||
|
}
|
||
|
foreach ($missing_titles as $title) {
|
||
|
roblib_update_pmods_update_title($child_xml_doc, $title);
|
||
|
}
|
||
|
$book_title_query = "/mods:mods/mods:relatedItem[@type='host']/mods:titleInfo/mods:title";
|
||
|
$child_book_title_results = $child_xpath->query($book_title_query, $child_xml_doc);
|
||
|
$c_arr = array();
|
||
|
$nodes_to_delete = [];
|
||
|
foreach($child_book_title_results as $c_node) {
|
||
|
$c_arr[] = $c_node->nodeValue;
|
||
|
$nodes_to_delete[] = $c_node->parentNode->parentNode;
|
||
|
}
|
||
|
$missing_titles = array_diff($parent_book_title_arr, $c_arr);
|
||
|
if(count($missing_titles) > 0) {
|
||
|
$updated = TRUE;
|
||
|
roblib_update_pmods_delete_nodes($nodes_to_delete);
|
||
|
}
|
||
|
foreach ($missing_titles as $title) {
|
||
|
roblib_update_pmods_update_book_title($child_xml_doc, $title);
|
||
|
}
|
||
|
$access_condition_query = "/mods:mods/mods:accessCondition[@type='use and reproduction' and @xlink:href='http://rightsstatements.org/vocab/InC/1.0/']";
|
||
|
$child_access_condition_results = $child_xpath->query($access_condition_query, $child_xml_doc);
|
||
|
$parent_access_condition_results = $parent_xpath->query($access_condition_query, $parent_xml_doc);
|
||
|
$p_arr = array();
|
||
|
$c_arr = array();
|
||
|
$nodes_to_delete = [];
|
||
|
foreach($child_access_condition_results as $c_node) {
|
||
|
$c_arr[] = $c_node->nodeValue;
|
||
|
$nodes_to_delete[] = $c_node;
|
||
|
}
|
||
|
foreach ($parent_access_condition_results as $p_node) {
|
||
|
$p_arr[] = $p_node->nodeValue;
|
||
|
}
|
||
|
$missing_conditions = array_diff($p_arr, $c_arr);
|
||
|
if(count($missing_conditions) > 0 || $parent_access_condition_results->length < 1) {
|
||
|
$updated = TRUE;
|
||
|
roblib_update_pmods_delete_nodes($nodes_to_delete);
|
||
|
}
|
||
|
foreach ($missing_conditions as $condition) {
|
||
|
roblib_update_pmods_update_conditions($child_xml_doc, $condition, TRUE);
|
||
|
}
|
||
|
// A book may have a custom accessCondition that doesn't link to a license
|
||
|
$access_condition_query = "/mods:mods/mods:accessCondition[@type='use and reproduction' and not(@xlink:href)]";
|
||
|
// TODO refactor duplicate code into function
|
||
|
$child_access_condition_results = $child_xpath->query($access_condition_query, $child_xml_doc);
|
||
|
$parent_access_condition_results = $parent_xpath->query($access_condition_query, $parent_xml_doc);
|
||
|
$p_arr = array();
|
||
|
$c_arr = array();
|
||
|
$nodes_to_delete = [];
|
||
|
foreach($child_access_condition_results as $c_node) {
|
||
|
$c_arr[] = $c_node->nodeValue;
|
||
|
$nodes_to_delete[] = $c_node;
|
||
|
}
|
||
|
|
||
|
foreach ($parent_access_condition_results as $p_node) {
|
||
|
$p_arr[] = $p_node->nodeValue;
|
||
|
}
|
||
|
$missing_conditions = array_diff($p_arr, $c_arr);
|
||
|
if(count($missing_conditions) > 0 || $parent_access_condition_results->length < 1) {
|
||
|
$updated = TRUE;
|
||
|
roblib_update_pmods_delete_nodes($nodes_to_delete);
|
||
|
}
|
||
|
foreach ($missing_conditions as $condition) {
|
||
|
roblib_update_pmods_update_conditions($child_xml_doc, $condition, FALSE);
|
||
|
}
|
||
|
if ($updated) {
|
||
|
$updated_xml = $child_xml_doc->saveXML();
|
||
|
$datastream->setContentFromString($updated_xml);
|
||
|
}
|
||
|
|
||
|
}
|
||
|
|
||
|
/**
|
||
|
* Update the accessCondition node of the pages MODS xml.
|
||
|
*
|
||
|
* @param \DOMDocument $mods_doc
|
||
|
* A pages MODS xml
|
||
|
* @param string $condition
|
||
|
* The new accessCondition nodes value.
|
||
|
* @param bool $use_xlink
|
||
|
* If true it will add the xlink:href attribute.
|
||
|
*/
|
||
|
function roblib_update_pmods_update_conditions($mods_doc, $condition, $use_xlink = FALSE){
|
||
|
$root = $mods_doc->documentElement;
|
||
|
$accessCondition = $mods_doc->createElement('accessCondition', $condition);
|
||
|
$root->appendChild($accessCondition);
|
||
|
$type_attribute = $mods_doc->createAttribute('type');
|
||
|
$type_attribute->value = 'use and reproduction';
|
||
|
$accessCondition->appendChild($type_attribute);
|
||
|
if($use_xlink){
|
||
|
$href_attribute = $mods_doc->createAttribute('xlink:href');
|
||
|
$href_attribute->value = 'http://rightsstatements.org/vocab/InC/1.0/';
|
||
|
$accessCondition->appendChild($href_attribute);
|
||
|
}
|
||
|
}
|
||
|
|
||
|
/**
|
||
|
* Update the page MODS xml with a relatedItem[@type='host'] title with the books title.
|
||
|
*
|
||
|
* @param \DOMDocument $mods_doc
|
||
|
* A page MODS xml
|
||
|
* @param string $book_title
|
||
|
* The title of the book.
|
||
|
*/
|
||
|
function roblib_update_pmods_update_book_title (&$mods_doc, $book_title) {
|
||
|
$root = $mods_doc->documentElement;
|
||
|
$related_item = $mods_doc->createElement('relatedItem');
|
||
|
$type_attribute = $mods_doc->createAttribute('type');
|
||
|
$type_attribute->value = 'host';
|
||
|
$related_item->appendChild($type_attribute);
|
||
|
$root->appendChild($related_item);
|
||
|
$title_info = $mods_doc->createElement('titleInfo');
|
||
|
$related_item->appendChild($title_info);
|
||
|
$title = $mods_doc->createElement('title', (string) $book_title);
|
||
|
$title_info->appendChild($title);
|
||
|
}
|
||
|
|
||
|
/**
|
||
|
* Update the page MODS xml titleInfo/title with the books title prefixed by image #.
|
||
|
*
|
||
|
* @param \DOMDocument $mods_doc
|
||
|
* A page MODS xml
|
||
|
* @param string $book_title
|
||
|
* The title of the book prefixed with the page image #.
|
||
|
*/
|
||
|
function roblib_update_pmods_update_title(&$mods_doc, $title) {
|
||
|
$root = $mods_doc->documentElement;
|
||
|
$title_info = $mods_doc->createElement('titleInfo');
|
||
|
$root->appendChild($title_info);
|
||
|
$title = $mods_doc->createElement('title', (string) $title);
|
||
|
$title_info->appendChild($title);
|
||
|
}
|
||
|
|
||
|
/**
|
||
|
* @param $success
|
||
|
* @param $results
|
||
|
* @param $operations
|
||
|
*
|
||
|
* @return array|mixed|string|null
|
||
|
*/
|
||
|
function roblib_update_pmods_batch_finished($success, $results, $operations) {
|
||
|
return t('Finished Processing Pages.');
|
||
|
}
|
||
|
|
||
|
|
||
|
|
||
|
|
||
|
|
||
|
|
||
|
|
||
|
|
||
|
|