Updates page MODS when a parent books mods is updated. Currrently it will only update the page MODS if the book title or accessCondition changes.
* @file
* Module file for roblib_update_pmods.
* Implements hook_cmodel_datastream_islandora_datastream_modified().
function roblib_update_pmods_islandora_bookcmodel_mods_islandora_datastream_modified(AbstractObject $object, AbstractDatastream $datastream, array $params) {
module_load_include('inc', 'islandora_paged_content', 'utilities');
$pages = islandora_paged_content_get_pages($object);
$parent_mods = $datastream->content;
$pid = $datastream->parent->id;
roblib_update_pmods_batch($parent_mods, $pages, $pid);
* Setup the batch.
* @param string $parent_mods
* The contents of the MODS datastream of the parent book.
* @param Array $pages
* A list of pages related to the book
* @param string $pid
* The PID of the book.
function roblib_update_pmods_batch($parent_mods, $pages, $pid) {
$batch = [
'title' => t('Updating child page MODS records ...'),
'operations' => [],
'init_message' => t('starting'),
'progress_message' => t('Processed @current out of @total.'),
'error_message' => t('An error occurred during processing'),
'finished' => 'roblib_update_pmods_batch_finished',
foreach ($pages as $page) {
* we use base64_encode to ensure we don't overload the batch
* processor by stuffing complex objects into it
$batch['operations'][] = [
[$page, $parent_mods],
batch_process('islandora/object/' . $pid);
* Handles individual requests from the batch.
* @param array $page
* A page from a book.
* @param string $parent_mods
* The contents of the MODS datastream of the parent book.
* @param array $context
* The batch context.
function _roblib_update_pmods_update_child($page, $parent_mods, &$context) {
if (!isset($context['results']['pages_processed'])) {
$context['results']['pages_processed'] = 0;
$context['message'] = t('Processing MODS for page !p %pid', [
'!p' => $context['results']['pages_processed'],
'%pid' => $page['pid'],
try {
roblib_update_pmods_update_child($page, $parent_mods, $context['results']['pages_processed']);
} catch (Exception $e) {
//TODO something
watchdog('roblib_update_pmods', $e->getMessage(), NULL, WATCHDOG_ERROR, NULL);
* Prepare a page object for update.
* @param array $page
* A page from a book.
* @param string $parent_mods
* The contents of the MODS datastream of the parent book.
* @param $image_number
* The image number of the book.
* @throws \Exception
function roblib_update_pmods_update_child($page, $parent_mods, $image_number) {
$dsid = 'MODS';
$mods_template = <<<EOT
<mods xmlns="http://www.loc.gov/mods/v3" xmlns:mods="http://www.loc.gov/mods/v3" xmlns:xlink="http://www.w3.org/1999/xlink" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xmlns:xsisnippet="http://www.w3.org/2001/XMLSchema-instance" version="3.2" xsisnippet:schemaLocation="http://www.loc.gov/mods/ http://www.loc.gov/standards/mods/mods.xsd">
$page_object = islandora_object_load($page['pid']);
if (!isset($page_object[$dsid])) {
$datastream = $page_object->constructDatastream($dsid);
$datastream->label = 'MODS';
$datastream->mimeType = 'application/xml';
$datastream->content = $mods_template;
// Ingest the datastream if it isn't yet.
else {
$datastream = $page_object[$dsid];
roblib_update_pmods_compare_and_update($datastream, $parent_mods, $image_number);
if (!isset($page_object[$dsid])) {
* Remove XML nodes from a XML Domdocument.
* @param array $nodes_to_delete
* The nodes to delete
* @throws \Exception
function roblib_update_pmods_delete_nodes($nodes_to_delete){
foreach($nodes_to_delete as $node) {
$old_node = $node->parentNode->removeChild($node);
throw new Exception ('Could not delete Node ' . $node->nodeValue);
* Update the page object MODS datastream if the parent MODS has been updated.
* Only updates if the books accessCondition or the titleInfo/Title has been modified.
* @param AbstractDatastream $datastream
* A page objects MODS datastream
* @param string $parent_mods
* A book objects MODS xml
* @param $image_number
* The image number of the book.
* @throws \Exception
function roblib_update_pmods_compare_and_update(&$datastream, $parent_mods, $image_number) {
$updated = FALSE;
$child_mods = $datastream->content;
if (empty($child_mods)) {
throw new Exception('Error loading Child MODS');
$child_xml_doc = new DOMDocument();
$test = $child_xml_doc->loadXML($child_mods);
if ($test === FALSE) {
throw new Exception('Error processing Child MODS ' . $child_mods);
$child_xpath = new DOMXPath($child_xml_doc);
$child_xpath->registerNamespace('mods', 'http://www.loc.gov/mods/v3');
$child_xpath->registerNamespace('xlink', 'http://www.w3.org/1999/xlink');
$query = "/mods:mods/mods:titleInfo[not(@type)]/mods:title";
$parent_xml_doc = new DOMDocument();
$test = $parent_xml_doc->loadXML($parent_mods);
if ($test === FALSE) {
throw new Exception('Error processing Parent MODS ' . $parent_mods);
$parent_xpath = new DOMXPath($parent_xml_doc);
$parent_xpath->registerNamespace('mods', 'http://www.loc.gov/mods/v3');
$parent_xpath->registerNamespace('xlink', 'http://www.w3.org/1999/xlink');
$parent_results = $parent_xpath->query($query, $parent_xml_doc);
$child_results = $child_xpath->query($query, $child_xml_doc);
$p_arr = [];
$parent_book_title_arr = [];
$c_arr = [];
$nodes_to_delete = [];
$image_number_string = 'Image ' . $image_number . ' - ';
foreach ($child_results as $child_node) {
$c_arr[] = $child_node->nodeValue;
$nodes_to_delete[] = $child_node->parentNode;
foreach ($parent_results as $p_node) {
$p_arr[] = $image_number_string . trim($p_node->nodeValue);
$parent_book_title_arr[] = trim($p_node->nodeValue);
$missing_titles = array_diff($p_arr, $c_arr);
if(count($missing_titles) > 0) {
$updated = TRUE;
foreach ($missing_titles as $title) {
roblib_update_pmods_update_title($child_xml_doc, $title);
$book_title_query = "/mods:mods/mods:relatedItem[@type='host']/mods:titleInfo/mods:title";
$child_book_title_results = $child_xpath->query($book_title_query, $child_xml_doc);
$c_arr = array();
$nodes_to_delete = [];
foreach($child_book_title_results as $c_node) {
$c_arr[] = $c_node->nodeValue;
$nodes_to_delete[] = $c_node->parentNode->parentNode;
$missing_titles = array_diff($parent_book_title_arr, $c_arr);
if(count($missing_titles) > 0) {
$updated = TRUE;
foreach ($missing_titles as $title) {
roblib_update_pmods_update_book_title($child_xml_doc, $title);
$access_condition_query = "/mods:mods/mods:accessCondition[@type='use and reproduction' and @xlink:href='http://rightsstatements.org/vocab/InC/1.0/']";
$child_access_condition_results = $child_xpath->query($access_condition_query, $child_xml_doc);
$parent_access_condition_results = $parent_xpath->query($access_condition_query, $parent_xml_doc);
$p_arr = array();
$c_arr = array();
$nodes_to_delete = [];
foreach($child_access_condition_results as $c_node) {
$c_arr[] = $c_node->nodeValue;
$nodes_to_delete[] = $c_node;
foreach ($parent_access_condition_results as $p_node) {
$p_arr[] = $p_node->nodeValue;
$missing_conditions = array_diff($p_arr, $c_arr);
if(count($missing_conditions) > 0 || $parent_access_condition_results->length < 1) {
$updated = TRUE;
foreach ($missing_conditions as $condition) {
roblib_update_pmods_update_conditions($child_xml_doc, $condition, TRUE);
// A book may have a custom accessCondition that doesn't link to a license
$access_condition_query = "/mods:mods/mods:accessCondition[@type='use and reproduction' and not(@xlink:href)]";
// TODO refactor duplicate code into function
$child_access_condition_results = $child_xpath->query($access_condition_query, $child_xml_doc);
$parent_access_condition_results = $parent_xpath->query($access_condition_query, $parent_xml_doc);
$p_arr = array();
$c_arr = array();
$nodes_to_delete = [];
foreach($child_access_condition_results as $c_node) {
$c_arr[] = $c_node->nodeValue;
$nodes_to_delete[] = $c_node;
foreach ($parent_access_condition_results as $p_node) {
$p_arr[] = $p_node->nodeValue;
$missing_conditions = array_diff($p_arr, $c_arr);
if(count($missing_conditions) > 0 || $parent_access_condition_results->length < 1) {
$updated = TRUE;
foreach ($missing_conditions as $condition) {
roblib_update_pmods_update_conditions($child_xml_doc, $condition, FALSE);
if ($updated) {
$updated_xml = $child_xml_doc->saveXML();
* Update the accessCondition node of the pages MODS xml.
* @param \DOMDocument $mods_doc
* A pages MODS xml
* @param string $condition
* The new accessCondition nodes value.
* @param bool $use_xlink
* If true it will add the xlink:href attribute.
function roblib_update_pmods_update_conditions($mods_doc, $condition, $use_xlink = FALSE){
$root = $mods_doc->documentElement;
$accessCondition = $mods_doc->createElement('accessCondition', $condition);
$type_attribute = $mods_doc->createAttribute('type');
$type_attribute->value = 'use and reproduction';
$href_attribute = $mods_doc->createAttribute('xlink:href');
$href_attribute->value = 'http://rightsstatements.org/vocab/InC/1.0/';
* Update the page MODS xml with a relatedItem[@type='host'] title with the books title.
* @param \DOMDocument $mods_doc
* A page MODS xml
* @param string $book_title
* The title of the book.
function roblib_update_pmods_update_book_title (&$mods_doc, $book_title) {
$root = $mods_doc->documentElement;
$related_item = $mods_doc->createElement('relatedItem');
$type_attribute = $mods_doc->createAttribute('type');
$type_attribute->value = 'host';
$title_info = $mods_doc->createElement('titleInfo');
$title = $mods_doc->createElement('title', (string) $book_title);
* Update the page MODS xml titleInfo/title with the books title prefixed by image #.
* @param \DOMDocument $mods_doc
* A page MODS xml
* @param string $book_title
* The title of the book prefixed with the page image #.
function roblib_update_pmods_update_title(&$mods_doc, $title) {
$root = $mods_doc->documentElement;
$title_info = $mods_doc->createElement('titleInfo');
$title = $mods_doc->createElement('title', (string) $title);
* @param $success
* @param $results
* @param $operations
* @return array|mixed|string|null
function roblib_update_pmods_batch_finished($success, $results, $operations) {
return t('Finished Processing Pages.');