content; $pid = $datastream->parent->id; roblib_update_pmods_batch($parent_mods, $pages, $pid); } /** * Setup the batch. * * @param string $parent_mods * The contents of the MODS datastream of the parent book. * @param Array $pages * A list of pages related to the book * @param string $pid * The PID of the book. */ function roblib_update_pmods_batch($parent_mods, $pages, $pid) { $batch = [ 'title' => t('Updating child page MODS records ...'), 'operations' => [], 'init_message' => t('starting'), 'progress_message' => t('Processed @current out of @total.'), 'error_message' => t('An error occurred during processing'), 'finished' => 'roblib_update_pmods_batch_finished', ]; foreach ($pages as $page) { /** * we use base64_encode to ensure we don't overload the batch * processor by stuffing complex objects into it */ $batch['operations'][] = [ '_roblib_update_pmods_update_child', [$page, $parent_mods], ]; } batch_set($batch); batch_process('islandora/object/' . $pid); } /** * Handles individual requests from the batch. * * @param array $page * A page from a book. * @param string $parent_mods * The contents of the MODS datastream of the parent book. * @param array $context * The batch context. */ function _roblib_update_pmods_update_child($page, $parent_mods, &$context) { if (!isset($context['results']['pages_processed'])) { $context['results']['pages_processed'] = 0; } $context['results']['pages_processed']++; $context['message'] = t('Processing MODS for page !p %pid', [ '!p' => $context['results']['pages_processed'], '%pid' => $page['pid'], ]); try { roblib_update_pmods_update_child($page, $parent_mods, $context['results']['pages_processed']); } catch (Exception $e) { //TODO something watchdog('roblib_update_pmods', $e->getMessage(), NULL, WATCHDOG_ERROR, NULL); } } /** * Prepare a page object for update. * * @param array $page * A page from a book. * @param string $parent_mods * The contents of the MODS datastream of the parent book. * @param $image_number * The image number of the book. * * * @throws \Exception */ function roblib_update_pmods_update_child($page, $parent_mods, $image_number) { $dsid = 'MODS'; $mods_template = << EOT; $page_object = islandora_object_load($page['pid']); if (!isset($page_object[$dsid])) { $datastream = $page_object->constructDatastream($dsid); $datastream->label = 'MODS'; $datastream->mimeType = 'application/xml'; $datastream->content = $mods_template; // Ingest the datastream if it isn't yet. } else { $datastream = $page_object[$dsid]; } roblib_update_pmods_compare_and_update($datastream, $parent_mods, $image_number); if (!isset($page_object[$dsid])) { $page_object->ingestDatastream($datastream); } unset($page_object); } /** * Remove XML nodes from a XML Domdocument. * @param array $nodes_to_delete * The nodes to delete * * @throws \Exception */ function roblib_update_pmods_delete_nodes($nodes_to_delete){ foreach($nodes_to_delete as $node) { $old_node = $node->parentNode->removeChild($node); if(empty($old_node)){ throw new Exception ('Could not delete Node ' . $node->nodeValue); } } } /** * Update the page object MODS datastream if the parent MODS has been updated. * Only updates if the books accessCondition or the titleInfo/Title has been modified. * * @param AbstractDatastream $datastream * A page objects MODS datastream * @param string $parent_mods * A book objects MODS xml * @param $image_number * The image number of the book. * * @throws \Exception */ function roblib_update_pmods_compare_and_update(&$datastream, $parent_mods, $image_number) { $updated = FALSE; $child_mods = $datastream->content; if (empty($child_mods)) { throw new Exception('Error loading Child MODS'); } $child_xml_doc = new DOMDocument(); $test = $child_xml_doc->loadXML($child_mods); if ($test === FALSE) { throw new Exception('Error processing Child MODS ' . $child_mods); } $child_xpath = new DOMXPath($child_xml_doc); $child_xpath->registerNamespace('mods', 'http://www.loc.gov/mods/v3'); $child_xpath->registerNamespace('xlink', 'http://www.w3.org/1999/xlink'); $query = "/mods:mods/mods:titleInfo[not(@type)]/mods:title"; $parent_xml_doc = new DOMDocument(); $test = $parent_xml_doc->loadXML($parent_mods); if ($test === FALSE) { throw new Exception('Error processing Parent MODS ' . $parent_mods); } $parent_xpath = new DOMXPath($parent_xml_doc); $parent_xpath->registerNamespace('mods', 'http://www.loc.gov/mods/v3'); $parent_xpath->registerNamespace('xlink', 'http://www.w3.org/1999/xlink'); $parent_results = $parent_xpath->query($query, $parent_xml_doc); $child_results = $child_xpath->query($query, $child_xml_doc); $p_arr = []; $parent_book_title_arr = []; $c_arr = []; $nodes_to_delete = []; $image_number_string = 'Image ' . $image_number . ' - '; foreach ($child_results as $child_node) { $c_arr[] = $child_node->nodeValue; $nodes_to_delete[] = $child_node->parentNode; } foreach ($parent_results as $p_node) { $p_arr[] = $image_number_string . trim($p_node->nodeValue); $parent_book_title_arr[] = trim($p_node->nodeValue); } $missing_titles = array_diff($p_arr, $c_arr); if(count($missing_titles) > 0) { $updated = TRUE; roblib_update_pmods_delete_nodes($nodes_to_delete); } foreach ($missing_titles as $title) { roblib_update_pmods_update_title($child_xml_doc, $title); } $book_title_query = "/mods:mods/mods:relatedItem[@type='host']/mods:titleInfo/mods:title"; $child_book_title_results = $child_xpath->query($book_title_query, $child_xml_doc); $c_arr = array(); $nodes_to_delete = []; foreach($child_book_title_results as $c_node) { $c_arr[] = $c_node->nodeValue; $nodes_to_delete[] = $c_node->parentNode->parentNode; } $missing_titles = array_diff($parent_book_title_arr, $c_arr); if(count($missing_titles) > 0) { $updated = TRUE; roblib_update_pmods_delete_nodes($nodes_to_delete); } foreach ($missing_titles as $title) { roblib_update_pmods_update_book_title($child_xml_doc, $title); } $access_condition_query = "/mods:mods/mods:accessCondition[@type='use and reproduction' and @xlink:href='http://rightsstatements.org/vocab/InC/1.0/']"; $child_access_condition_results = $child_xpath->query($access_condition_query, $child_xml_doc); $parent_access_condition_results = $parent_xpath->query($access_condition_query, $parent_xml_doc); $p_arr = array(); $c_arr = array(); $nodes_to_delete = []; foreach($child_access_condition_results as $c_node) { $c_arr[] = $c_node->nodeValue; $nodes_to_delete[] = $c_node; } foreach ($parent_access_condition_results as $p_node) { $p_arr[] = $p_node->nodeValue; } $missing_conditions = array_diff($p_arr, $c_arr); if(count($missing_conditions) > 0 || $parent_access_condition_results->length < 1) { $updated = TRUE; roblib_update_pmods_delete_nodes($nodes_to_delete); } foreach ($missing_conditions as $condition) { roblib_update_pmods_update_conditions($child_xml_doc, $condition, TRUE); } // A book may have a custom accessCondition that doesn't link to a license $access_condition_query = "/mods:mods/mods:accessCondition[@type='use and reproduction' and not(@xlink:href)]"; // TODO refactor duplicate code into function $child_access_condition_results = $child_xpath->query($access_condition_query, $child_xml_doc); $parent_access_condition_results = $parent_xpath->query($access_condition_query, $parent_xml_doc); $p_arr = array(); $c_arr = array(); $nodes_to_delete = []; foreach($child_access_condition_results as $c_node) { $c_arr[] = $c_node->nodeValue; $nodes_to_delete[] = $c_node; } foreach ($parent_access_condition_results as $p_node) { $p_arr[] = $p_node->nodeValue; } $missing_conditions = array_diff($p_arr, $c_arr); if(count($missing_conditions) > 0 || $parent_access_condition_results->length < 1) { $updated = TRUE; roblib_update_pmods_delete_nodes($nodes_to_delete); } foreach ($missing_conditions as $condition) { roblib_update_pmods_update_conditions($child_xml_doc, $condition, FALSE); } if ($updated) { $updated_xml = $child_xml_doc->saveXML(); $datastream->setContentFromString($updated_xml); } } /** * Update the accessCondition node of the pages MODS xml. * * @param \DOMDocument $mods_doc * A pages MODS xml * @param string $condition * The new accessCondition nodes value. * @param bool $use_xlink * If true it will add the xlink:href attribute. */ function roblib_update_pmods_update_conditions($mods_doc, $condition, $use_xlink = FALSE){ $root = $mods_doc->documentElement; $accessCondition = $mods_doc->createElement('accessCondition', $condition); $root->appendChild($accessCondition); $type_attribute = $mods_doc->createAttribute('type'); $type_attribute->value = 'use and reproduction'; $accessCondition->appendChild($type_attribute); if($use_xlink){ $href_attribute = $mods_doc->createAttribute('xlink:href'); $href_attribute->value = 'http://rightsstatements.org/vocab/InC/1.0/'; $accessCondition->appendChild($href_attribute); } } /** * Update the page MODS xml with a relatedItem[@type='host'] title with the books title. * * @param \DOMDocument $mods_doc * A page MODS xml * @param string $book_title * The title of the book. */ function roblib_update_pmods_update_book_title (&$mods_doc, $book_title) { $root = $mods_doc->documentElement; $related_item = $mods_doc->createElement('relatedItem'); $type_attribute = $mods_doc->createAttribute('type'); $type_attribute->value = 'host'; $related_item->appendChild($type_attribute); $root->appendChild($related_item); $title_info = $mods_doc->createElement('titleInfo'); $related_item->appendChild($title_info); $title = $mods_doc->createElement('title', (string) $book_title); $title_info->appendChild($title); } /** * Update the page MODS xml titleInfo/title with the books title prefixed by image #. * * @param \DOMDocument $mods_doc * A page MODS xml * @param string $book_title * The title of the book prefixed with the page image #. */ function roblib_update_pmods_update_title(&$mods_doc, $title) { $root = $mods_doc->documentElement; $title_info = $mods_doc->createElement('titleInfo'); $root->appendChild($title_info); $title = $mods_doc->createElement('title', (string) $title); $title_info->appendChild($title); } /** * @param $success * @param $results * @param $operations * * @return array|mixed|string|null */ function roblib_update_pmods_batch_finished($success, $results, $operations) { return t('Finished Processing Pages.'); }