You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
341 lines
12 KiB
341 lines
12 KiB
<?php |
|
|
|
/** |
|
* @file |
|
* Module file for roblib_update_pmods. |
|
*/ |
|
|
|
/** |
|
* Implements hook_cmodel_datastream_islandora_datastream_modified(). |
|
*/ |
|
function roblib_update_pmods_islandora_bookcmodel_mods_islandora_datastream_modified(AbstractObject $object, AbstractDatastream $datastream, array $params) { |
|
module_load_include('inc', 'islandora_paged_content', 'utilities'); |
|
$pages = islandora_paged_content_get_pages($object); |
|
$parent_mods = $datastream->content; |
|
$pid = $datastream->parent->id; |
|
roblib_update_pmods_batch($parent_mods, $pages, $pid); |
|
} |
|
|
|
/** |
|
* Setup the batch. |
|
* |
|
* @param string $parent_mods |
|
* The contents of the MODS datastream of the parent book. |
|
* @param Array $pages |
|
* A list of pages related to the book |
|
* @param string $pid |
|
* The PID of the book. |
|
*/ |
|
function roblib_update_pmods_batch($parent_mods, $pages, $pid) { |
|
$batch = [ |
|
'title' => t('Updating child page MODS records ...'), |
|
'operations' => [], |
|
'init_message' => t('starting'), |
|
'progress_message' => t('Processed @current out of @total.'), |
|
'error_message' => t('An error occurred during processing'), |
|
'finished' => 'roblib_update_pmods_batch_finished', |
|
]; |
|
|
|
foreach ($pages as $page) { |
|
/** |
|
* we use base64_encode to ensure we don't overload the batch |
|
* processor by stuffing complex objects into it |
|
*/ |
|
$batch['operations'][] = [ |
|
'_roblib_update_pmods_update_child', |
|
[$page, $parent_mods], |
|
]; |
|
} |
|
batch_set($batch); |
|
batch_process('islandora/object/' . $pid); |
|
} |
|
|
|
/** |
|
* Handles individual requests from the batch. |
|
* |
|
* @param array $page |
|
* A page from a book. |
|
* @param string $parent_mods |
|
* The contents of the MODS datastream of the parent book. |
|
* @param array $context |
|
* The batch context. |
|
*/ |
|
function _roblib_update_pmods_update_child($page, $parent_mods, &$context) { |
|
if (!isset($context['results']['pages_processed'])) { |
|
$context['results']['pages_processed'] = 0; |
|
} |
|
$context['results']['pages_processed']++; |
|
$context['message'] = t('Processing MODS for page !p %pid', [ |
|
'!p' => $context['results']['pages_processed'], |
|
'%pid' => $page['pid'], |
|
]); |
|
try { |
|
roblib_update_pmods_update_child($page, $parent_mods, $context['results']['pages_processed']); |
|
} catch (Exception $e) { |
|
//TODO something |
|
watchdog('roblib_update_pmods', $e->getMessage(), NULL, WATCHDOG_ERROR, NULL); |
|
} |
|
} |
|
|
|
/** |
|
* Prepare a page object for update. |
|
* |
|
* @param array $page |
|
* A page from a book. |
|
* @param string $parent_mods |
|
* The contents of the MODS datastream of the parent book. |
|
* @param $image_number |
|
* The image number of the book. |
|
* |
|
* |
|
* @throws \Exception |
|
*/ |
|
function roblib_update_pmods_update_child($page, $parent_mods, $image_number) { |
|
$dsid = 'MODS'; |
|
$mods_template = <<<EOT |
|
<mods xmlns="http://www.loc.gov/mods/v3" xmlns:mods="http://www.loc.gov/mods/v3" xmlns:xlink="http://www.w3.org/1999/xlink" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xmlns:xsisnippet="http://www.w3.org/2001/XMLSchema-instance" version="3.2" xsisnippet:schemaLocation="http://www.loc.gov/mods/ http://www.loc.gov/standards/mods/mods.xsd"> |
|
</mods> |
|
EOT; |
|
$page_object = islandora_object_load($page['pid']); |
|
if (!isset($page_object[$dsid])) { |
|
$datastream = $page_object->constructDatastream($dsid); |
|
$datastream->label = 'MODS'; |
|
$datastream->mimeType = 'application/xml'; |
|
$datastream->content = $mods_template; |
|
// Ingest the datastream if it isn't yet. |
|
} |
|
else { |
|
$datastream = $page_object[$dsid]; |
|
} |
|
roblib_update_pmods_compare_and_update($datastream, $parent_mods, $image_number); |
|
|
|
if (!isset($page_object[$dsid])) { |
|
$page_object->ingestDatastream($datastream); |
|
} |
|
unset($page_object); |
|
} |
|
|
|
/** |
|
* Remove XML nodes from a XML Domdocument. |
|
* @param array $nodes_to_delete |
|
* The nodes to delete |
|
* |
|
* @throws \Exception |
|
*/ |
|
function roblib_update_pmods_delete_nodes($nodes_to_delete){ |
|
foreach($nodes_to_delete as $node) { |
|
$old_node = $node->parentNode->removeChild($node); |
|
if(empty($old_node)){ |
|
throw new Exception ('Could not delete Node ' . $node->nodeValue); |
|
} |
|
} |
|
} |
|
|
|
/** |
|
* Update the page object MODS datastream if the parent MODS has been updated. |
|
* Only updates if the books accessCondition or the titleInfo/Title has been modified. |
|
* |
|
* @param AbstractDatastream $datastream |
|
* A page objects MODS datastream |
|
* @param string $parent_mods |
|
* A book objects MODS xml |
|
* @param $image_number |
|
* The image number of the book. |
|
* |
|
* @throws \Exception |
|
*/ |
|
function roblib_update_pmods_compare_and_update(&$datastream, $parent_mods, $image_number) { |
|
$updated = FALSE; |
|
$child_mods = $datastream->content; |
|
if (empty($child_mods)) { |
|
throw new Exception('Error loading Child MODS'); |
|
} |
|
$child_xml_doc = new DOMDocument(); |
|
$test = $child_xml_doc->loadXML($child_mods); |
|
if ($test === FALSE) { |
|
throw new Exception('Error processing Child MODS ' . $child_mods); |
|
} |
|
$child_xpath = new DOMXPath($child_xml_doc); |
|
$child_xpath->registerNamespace('mods', 'http://www.loc.gov/mods/v3'); |
|
$child_xpath->registerNamespace('xlink', 'http://www.w3.org/1999/xlink'); |
|
$query = "/mods:mods/mods:titleInfo[not(@type)]/mods:title"; |
|
$parent_xml_doc = new DOMDocument(); |
|
$test = $parent_xml_doc->loadXML($parent_mods); |
|
if ($test === FALSE) { |
|
throw new Exception('Error processing Parent MODS ' . $parent_mods); |
|
} |
|
$parent_xpath = new DOMXPath($parent_xml_doc); |
|
$parent_xpath->registerNamespace('mods', 'http://www.loc.gov/mods/v3'); |
|
$parent_xpath->registerNamespace('xlink', 'http://www.w3.org/1999/xlink'); |
|
$parent_results = $parent_xpath->query($query, $parent_xml_doc); |
|
$child_results = $child_xpath->query($query, $child_xml_doc); |
|
$p_arr = []; |
|
$parent_book_title_arr = []; |
|
$c_arr = []; |
|
$nodes_to_delete = []; |
|
$image_number_string = 'Image ' . $image_number . ' - '; |
|
foreach ($child_results as $child_node) { |
|
$c_arr[] = $child_node->nodeValue; |
|
$nodes_to_delete[] = $child_node->parentNode; |
|
} |
|
foreach ($parent_results as $p_node) { |
|
$p_arr[] = $image_number_string . trim($p_node->nodeValue); |
|
$parent_book_title_arr[] = trim($p_node->nodeValue); |
|
} |
|
$missing_titles = array_diff($p_arr, $c_arr); |
|
|
|
if(count($missing_titles) > 0) { |
|
$updated = TRUE; |
|
roblib_update_pmods_delete_nodes($nodes_to_delete); |
|
} |
|
foreach ($missing_titles as $title) { |
|
roblib_update_pmods_update_title($child_xml_doc, $title); |
|
} |
|
$book_title_query = "/mods:mods/mods:relatedItem[@type='host']/mods:titleInfo/mods:title"; |
|
$child_book_title_results = $child_xpath->query($book_title_query, $child_xml_doc); |
|
$c_arr = array(); |
|
$nodes_to_delete = []; |
|
foreach($child_book_title_results as $c_node) { |
|
$c_arr[] = $c_node->nodeValue; |
|
$nodes_to_delete[] = $c_node->parentNode->parentNode; |
|
} |
|
$missing_titles = array_diff($parent_book_title_arr, $c_arr); |
|
if(count($missing_titles) > 0) { |
|
$updated = TRUE; |
|
roblib_update_pmods_delete_nodes($nodes_to_delete); |
|
} |
|
foreach ($missing_titles as $title) { |
|
roblib_update_pmods_update_book_title($child_xml_doc, $title); |
|
} |
|
$access_condition_query = "/mods:mods/mods:accessCondition[@type='use and reproduction' and @xlink:href='http://rightsstatements.org/vocab/InC/1.0/']"; |
|
$child_access_condition_results = $child_xpath->query($access_condition_query, $child_xml_doc); |
|
$parent_access_condition_results = $parent_xpath->query($access_condition_query, $parent_xml_doc); |
|
$p_arr = array(); |
|
$c_arr = array(); |
|
$nodes_to_delete = []; |
|
foreach($child_access_condition_results as $c_node) { |
|
$c_arr[] = $c_node->nodeValue; |
|
$nodes_to_delete[] = $c_node; |
|
} |
|
foreach ($parent_access_condition_results as $p_node) { |
|
$p_arr[] = $p_node->nodeValue; |
|
} |
|
$missing_conditions = array_diff($p_arr, $c_arr); |
|
if(count($missing_conditions) > 0 || $parent_access_condition_results->length < 1) { |
|
$updated = TRUE; |
|
roblib_update_pmods_delete_nodes($nodes_to_delete); |
|
} |
|
foreach ($missing_conditions as $condition) { |
|
roblib_update_pmods_update_conditions($child_xml_doc, $condition, TRUE); |
|
} |
|
// A book may have a custom accessCondition that doesn't link to a license |
|
$access_condition_query = "/mods:mods/mods:accessCondition[@type='use and reproduction' and not(@xlink:href)]"; |
|
// TODO refactor duplicate code into function |
|
$child_access_condition_results = $child_xpath->query($access_condition_query, $child_xml_doc); |
|
$parent_access_condition_results = $parent_xpath->query($access_condition_query, $parent_xml_doc); |
|
$p_arr = array(); |
|
$c_arr = array(); |
|
$nodes_to_delete = []; |
|
foreach($child_access_condition_results as $c_node) { |
|
$c_arr[] = $c_node->nodeValue; |
|
$nodes_to_delete[] = $c_node; |
|
} |
|
|
|
foreach ($parent_access_condition_results as $p_node) { |
|
$p_arr[] = $p_node->nodeValue; |
|
} |
|
$missing_conditions = array_diff($p_arr, $c_arr); |
|
if(count($missing_conditions) > 0 || $parent_access_condition_results->length < 1) { |
|
$updated = TRUE; |
|
roblib_update_pmods_delete_nodes($nodes_to_delete); |
|
} |
|
foreach ($missing_conditions as $condition) { |
|
roblib_update_pmods_update_conditions($child_xml_doc, $condition, FALSE); |
|
} |
|
if ($updated) { |
|
$updated_xml = $child_xml_doc->saveXML(); |
|
$datastream->setContentFromString($updated_xml); |
|
} |
|
|
|
} |
|
|
|
/** |
|
* Update the accessCondition node of the pages MODS xml. |
|
* |
|
* @param \DOMDocument $mods_doc |
|
* A pages MODS xml |
|
* @param string $condition |
|
* The new accessCondition nodes value. |
|
* @param bool $use_xlink |
|
* If true it will add the xlink:href attribute. |
|
*/ |
|
function roblib_update_pmods_update_conditions($mods_doc, $condition, $use_xlink = FALSE){ |
|
$root = $mods_doc->documentElement; |
|
$accessCondition = $mods_doc->createElement('accessCondition', $condition); |
|
$root->appendChild($accessCondition); |
|
$type_attribute = $mods_doc->createAttribute('type'); |
|
$type_attribute->value = 'use and reproduction'; |
|
$accessCondition->appendChild($type_attribute); |
|
if($use_xlink){ |
|
$href_attribute = $mods_doc->createAttribute('xlink:href'); |
|
$href_attribute->value = 'http://rightsstatements.org/vocab/InC/1.0/'; |
|
$accessCondition->appendChild($href_attribute); |
|
} |
|
} |
|
|
|
/** |
|
* Update the page MODS xml with a relatedItem[@type='host'] title with the books title. |
|
* |
|
* @param \DOMDocument $mods_doc |
|
* A page MODS xml |
|
* @param string $book_title |
|
* The title of the book. |
|
*/ |
|
function roblib_update_pmods_update_book_title (&$mods_doc, $book_title) { |
|
$root = $mods_doc->documentElement; |
|
$related_item = $mods_doc->createElement('relatedItem'); |
|
$type_attribute = $mods_doc->createAttribute('type'); |
|
$type_attribute->value = 'host'; |
|
$related_item->appendChild($type_attribute); |
|
$root->appendChild($related_item); |
|
$title_info = $mods_doc->createElement('titleInfo'); |
|
$related_item->appendChild($title_info); |
|
$title = $mods_doc->createElement('title', (string) $book_title); |
|
$title_info->appendChild($title); |
|
} |
|
|
|
/** |
|
* Update the page MODS xml titleInfo/title with the books title prefixed by image #. |
|
* |
|
* @param \DOMDocument $mods_doc |
|
* A page MODS xml |
|
* @param string $book_title |
|
* The title of the book prefixed with the page image #. |
|
*/ |
|
function roblib_update_pmods_update_title(&$mods_doc, $title) { |
|
$root = $mods_doc->documentElement; |
|
$title_info = $mods_doc->createElement('titleInfo'); |
|
$root->appendChild($title_info); |
|
$title = $mods_doc->createElement('title', (string) $title); |
|
$title_info->appendChild($title); |
|
} |
|
|
|
/** |
|
* @param $success |
|
* @param $results |
|
* @param $operations |
|
* |
|
* @return array|mixed|string|null |
|
*/ |
|
function roblib_update_pmods_batch_finished($success, $results, $operations) { |
|
return t('Finished Processing Pages.'); |
|
} |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|