diff --git a/config/install/field.field.node.fits_technical_metadata.field_data_of.yml b/config/install/field.field.node.fits_technical_metadata.field_data_of.yml new file mode 100644 index 00000000..47b4daee --- /dev/null +++ b/config/install/field.field.node.fits_technical_metadata.field_data_of.yml @@ -0,0 +1,20 @@ +langcode: en +status: true +dependencies: + config: + - field.storage.node.field_data_of + - node.type.fits_technical_metadata +id: node.fits_technical_metadata.field_data_of +field_name: field_data_of +entity_type: node +bundle: fits_technical_metadata +label: 'Data of' +description: '' +required: false +translatable: false +default_value: { } +default_value_callback: '' +settings: + handler: 'default:node' + handler_settings: { } +field_type: entity_reference diff --git a/config/install/ield.field.media.fits_technical_metadata.field_media_file.yml b/config/install/ield.field.media.fits_technical_metadata.field_media_file.yml new file mode 100644 index 00000000..ea1d5f1d --- /dev/null +++ b/config/install/ield.field.media.fits_technical_metadata.field_media_file.yml @@ -0,0 +1,26 @@ +langcode: en +status: true +dependencies: + config: + - field.storage.media.field_media_file + - media.type.fits_technical_metadata + module: + - file +id: media.fits_technical_metadata.field_media_file +field_name: field_media_file +entity_type: media +bundle: fits_technical_metadata +label: File +description: '' +required: true +translatable: true +default_value: { } +default_value_callback: '' +settings: + file_directory: '[date:custom:Y]-[date:custom:m]' + file_extensions: xml + max_filesize: '' + description_field: false + handler: 'default:file' + handler_settings: { } +field_type: file diff --git a/config/install/ield.storage.node.field_data_of.yml b/config/install/ield.storage.node.field_data_of.yml new file mode 100644 index 00000000..02baf482 --- /dev/null +++ b/config/install/ield.storage.node.field_data_of.yml @@ -0,0 +1,22 @@ +langcode: en +status: true +dependencies: + module: + - field_permissions + - node +third_party_settings: + field_permissions: + permission_type: public +id: node.field_data_of +field_name: field_data_of +entity_type: node +type: entity_reference +settings: + target_type: node +module: core +locked: false +cardinality: 1 +translatable: true +indexes: { } +persist_with_no_fields: false +custom_storage: false diff --git a/config/install/node.type.fits_technical_metadata.yml b/config/install/node.type.fits_technical_metadata.yml new file mode 100644 index 00000000..a53652d2 --- /dev/null +++ b/config/install/node.type.fits_technical_metadata.yml @@ -0,0 +1,17 @@ +langcode: en +status: true +dependencies: + module: + - menu_ui +third_party_settings: + menu_ui: + available_menus: + - main + parent: 'main:' +name: 'Fits Technical Metadata' +type: fits_technical_metadata +description: 'Convenience node to hold indexable FITS values' +help: '' +new_revision: true +preview_mode: 1 +display_submitted: true diff --git a/islandora_fits.module b/islandora_fits.module index b9b55621..b75e57fe 100644 --- a/islandora_fits.module +++ b/islandora_fits.module @@ -6,24 +6,26 @@ */ use Drupal\Core\Routing\RouteMatchInterface; +use Drupal\media\MediaInterface; use Drupal\taxonomy\Entity\Term; -use Drupal\taxonomy\Entity\Vocabulary; +use Drupal\node\Entity\Node; +use Drupal\file\Entity\File; /** * Implements hook_help(). */ function islandora_fits_help($route_name, RouteMatchInterface $route_match) { - switch ($route_name) { - // Main module help for the islandora_fits module. - case 'help.page.islandora_fits': - $output = ''; - $output .= '

' . t('About') . '

'; - $output .= '

' . t('Enables Technical Metadata derivative generation') . '

'; - return $output; - - default: - } + switch ($route_name) { + // Main module help for the islandora_fits module. + case 'help.page.islandora_fits': + $output = ''; + $output .= '

' . t('About') . '

'; + $output .= '

' . t('Enables Technical Metadata derivative generation') . '

'; + return $output; + + default: + } } /** @@ -57,3 +59,31 @@ function islandora_fits_theme($existing, $type, $theme, $path) { ], ]; } + +/** + * Implements hook_ENTITY_TYPE_presave(). + */ +function islandora_fits_media_presave(MediaInterface $media) { + $transformer = \Drupal::getContainer()->get('islandora_fits.transformxml'); + if ($media->bundle() != 'fits_technical_metadata') { + return; + } + $attached = $media->get('field_attached_fits_node')->referencedEntities()[0]; + $data_of = $media->get('field_media_of')->referencedEntities()[0]; + $title = $data_of->getTitle(); + if (!$attached) { + $node = Node::create(['type' => 'fits_technical_metadata']); + $node->setTitle("Fits Metadata of $title"); + $node->save(); + $media->field_attached_fits_node->target_id = $node->id(); + $attached = $node; + } + $file_id = $media->get('field_media_file')->getValue()[0]['target_id']; + $file = File::load($file_id); + $data = file_get_contents($file->getFileUri()); + $transformer->add_node_fields($data); + $transformer->populate_node($data, $attached); + $attached->save(); +} + + diff --git a/islandora_fits.services.yml b/islandora_fits.services.yml new file mode 100644 index 00000000..6649fca1 --- /dev/null +++ b/islandora_fits.services.yml @@ -0,0 +1,4 @@ +services: + islandora_fits.transformxml: + class: Drupal\islandora_fits\Services\XMLTransform + arguments: ['@renderer','@entity_field.manager'] diff --git a/src/Plugin/Field/FieldFormatter/FitsFormatter.php b/src/Plugin/Field/FieldFormatter/FitsFormatter.php index 05aa0c86..5a83053e 100644 --- a/src/Plugin/Field/FieldFormatter/FitsFormatter.php +++ b/src/Plugin/Field/FieldFormatter/FitsFormatter.php @@ -2,13 +2,11 @@ namespace Drupal\islandora_fits\Plugin\Field\FieldFormatter; -use Drupal\Component\Utility\Html; use Drupal\Core\Field\FieldItemInterface; use Drupal\Core\Field\FieldItemListInterface; use Drupal\Core\Field\FormatterBase; use Drupal\Core\Form\FormStateInterface; use Drupal\file\Entity\File; -Use Drupal\Component\Utility\Xss; use Drupal\Core\Link; use Drupal\Core\Url; @@ -77,6 +75,7 @@ class FitsFormatter extends FormatterBase { * The textual output generated. */ protected function viewValue(FieldItemInterface $item) { + $transformer = \Drupal::getContainer()->get('islandora_fits.transformxml'); $fileItem = $item->getValue(); $file = File::load($fileItem['target_id']); $url = Url::fromUri($file->url()); @@ -86,203 +85,9 @@ class FitsFormatter extends FormatterBase { if (mb_detect_encoding($contents) != 'UTF-8') { $contents = utf8_encode($contents); } - $xml = new \SimpleXMLElement($contents); - $xml->registerXPathNamespace('fits', 'http://hul.harvard.edu/ois/xml/ns/fits/fits_output'); - $fits_metadata = $this->islandora_fits_child_xpath($xml); - $headers = array( - 'label' => t('Field'), - 'value' => t('Value'), - ); - if (count($fits_metadata) == 0) { - $variables['islandora_fits_table']['empty'] = ''; - $variables['islandora_fits_fieldsets']['empty'] = array( - '#type' => 'markup', - '#markup' => t('No technical metadata found.'), - ); - } else { - foreach ($fits_metadata as $tool_name => $vals_array) { - $variables['islandora_fits_data'][$tool_name] = array(); - $rows = &$variables['islandora_fits_data'][$tool_name]; - foreach ($vals_array as $field => $val_array) { - if (!array_key_exists($field, $rows)) { - $rows[$field] = array( - array('data' => Xss::filter($field), 'class' => 'islandora_fits_table_labels'), - ); - foreach ($val_array as $value) { - if (!isset($rows[$field]['value'])) { - $rows[$field]['value'] = array('data' => Xss::filter($value), 'class' => 'islandora_fits_table_values'); - } else { - $data = $rows[$field]['value']['data'] .= ' - ' . Xss::filter($value); - $rows[$field]['value'] = array('data' => $data, 'class' => 'islandora_fits_table_values'); - } - } - } - $table_attributes = array('class' => array('islandora_fits_table')); - - $table = array( - 'header' => $headers, - 'rows' => $rows, - 'attributes' => $table_attributes, - ); - - $variables['islandora_fits_table'][$tool_name] = $table; - $variables['islandora_fits_fieldsets'][$tool_name] = [ - '#theme' => 'table', - '#header' => $headers, - '#rows' => $rows, - '#attributes' => $table_attributes, - '#header_columns' => 4, - ]; - } - } - } - $fieldsets = $variables['islandora_fits_fieldsets']; - $output = []; - foreach ($fieldsets as $title => $fieldset) { - $output[] = [ - 'title' => $title, - 'data' => $fieldset, - ]; - - } - - $renderable = [ - '#theme' => 'fits', - '#title' => $this->t("FITS metadata"), - '#link' => $link, - '#output' => $output, - '#attached' => [ - 'library' => [ - 'islandora_fits/islandora_fits', - ] - ] - - ]; - return \Drupal::service('renderer')->render($renderable); + $output = $transformer->transformFits($contents); + $output['#link'] = $link; + $output['#title'] = $this->t("FITS Metadata"); + return \Drupal::service('renderer')->render($output); } - - /** - * Finds the the first set of children from the FITS xml. - * - * Once it has these it passes them off recursively. - * - * @param SimpleXMLElement $xml - * The SimpleXMLElement to parse. - * - * @return array - * An array containing key/value pairs of fields and data. - */ - public function islandora_fits_child_xpath($xml) { - $results = $xml->xpath('/*|/*/fits:metadata'); - $output = array(); - foreach ($results as $result) { - $this->islandora_fits_children($result, $output); - } - return $output; - } - - /** - * Finds children for fits module. - * - * Recursive function that searches continuously until - * we grab the node's text value and add to - * the output array. - * - * @param SimpleXMLElement $child - * The current child that we are searching through. - * - * @param array $output - * An array containing key/value pairs of fields and data. - */ - public function islandora_fits_children($child, &$output) { - $grandchildren = $child->xpath('*/*'); - - if (count($grandchildren) > 0) { - foreach ($grandchildren as $grandchild) { - $this->islandora_fits_children($grandchild, $output); - } - } else { - $text_results = $child->xpath('text()'); - $tool_name = FALSE; - if ($text_results) { - foreach ($text_results as $text) { - foreach ($text->attributes() as $key => $value) { - if ($key === 'toolname') { - $tool_name = trim((string)$value); - } - } - $output_text = trim((string)$text); - if (!empty($output_text)) { - $fits_out = $this->islandora_fits_construct_output($child->getName(), $tool_name); - $tool_label = $fits_out['tool']; - $field_label = $fits_out['name']; - // Need to check if the label already exists in our output - // such that we do not duplicate entries. - if ($tool_label) { - if (isset($output[$tool_label])) { - if (!array_key_exists($field_label, $output[$tool_label])) { - $output[$tool_label][$field_label][] = $output_text; - } else { - if (!in_array($output_text, $output[$tool_label][$field_label])) { - $output[$tool_label][$field_label][] = $output_text; - } - } - } else { - $output[$tool_label][$field_label][] = $output_text; - } - } // No tool attribute. - else { - if (isset($output['Unknown'][$field_label])) { - if (!in_array($output_text, $output['Unknown'][$field_label])) { - $output['Unknown'][$field_label][] = $output_text; - } - } else { - $output['Unknown'][$field_label][] = $output_text; - } - } - } - } - } - } - - } - - /** - * Builds display by parsing strings. - * - * @param string $node_name - * Name of the current node that we will display. - * @param string $tool_name - * Name of the tool used to generate the metadata. - * - * @return array - * Constructed node name for output. - */ - public function islandora_fits_construct_output($node_name, $tool_name) { - // Construct an arbitrary string with all capitals in it. - $capitals = 'ABCDEFGHIJKLMNOPQRSTUVWXYZ'; - $name_array = str_split($node_name); - $space_position = array(); - - // Check to see which characters are capitals so we can split - // them up for cleaner display. - foreach ($name_array as $key => $value) { - if (strpos($capitals, $value) !== FALSE && $key !== 0) { - $space_position[] = $key; - } - } - if (count($space_position)) { - // Needed in event we add multiple spaces so need to keep track. - $pos_offset = 0; - foreach ($space_position as $pos) { - $node_name = substr_replace($node_name, ' ', $pos + $pos_offset, 0); - $pos_offset++; - } - } - $node_name = ucwords($node_name); - - return array('name' => $node_name, 'tool' => ucwords($tool_name)); - } - - } diff --git a/src/Services/XMLTransform.php b/src/Services/XMLTransform.php new file mode 100644 index 00000000..e200ee72 --- /dev/null +++ b/src/Services/XMLTransform.php @@ -0,0 +1,326 @@ +renderer = $renderer; + $this->entityManager = $entityManager; + $this->forbidden = ['-', ' ']; + } + + /** + * Transforms FITS xml into renderable array. + * + * @param $input_xml + * @return array + */ + public function transformFits($input_xml) { + $xml = new \SimpleXMLElement($input_xml); + $xml->registerXPathNamespace('fits', 'http://hul.harvard.edu/ois/xml/ns/fits/fits_output'); + $fits_metadata = $this->islandora_fits_child_xpath($xml); + $headers = array( + 'label' => t('Field'), + 'value' => t('Value'), + ); + if (count($fits_metadata) == 0) { + $variables['islandora_fits_table']['empty'] = ''; + $variables['islandora_fits_fieldsets']['empty'] = array( + '#type' => 'markup', + '#markup' => t('No technical metadata found.'), + ); + } else { + foreach ($fits_metadata as $tool_name => $vals_array) { + $variables['islandora_fits_data'][$tool_name] = array(); + $rows = &$variables['islandora_fits_data'][$tool_name]; + foreach ($vals_array as $field => $val_array) { + if (!array_key_exists($field, $rows)) { + $rows[$field] = array( + array('data' => Xss::filter($field), 'class' => 'islandora_fits_table_labels'), + ); + foreach ($val_array as $value) { + if (!isset($rows[$field]['value'])) { + $rows[$field]['value'] = array('data' => Xss::filter($value), 'class' => 'islandora_fits_table_values'); + } else { + $data = $rows[$field]['value']['data'] .= ' - ' . Xss::filter($value); + $rows[$field]['value'] = array('data' => $data, 'class' => 'islandora_fits_table_values'); + } + } + } + $table_attributes = array('class' => array('islandora_fits_table')); + + $table = array( + 'header' => $headers, + 'rows' => $rows, + 'attributes' => $table_attributes, + ); + + $variables['islandora_fits_table'][$tool_name] = $table; + $variables['islandora_fits_fieldsets'][$tool_name] = [ + '#theme' => 'table', + '#header' => $headers, + '#rows' => $rows, + '#attributes' => $table_attributes, + '#header_columns' => 4, + ]; + } + } + } + $fieldsets = $variables['islandora_fits_fieldsets']; + $output = []; + foreach ($fieldsets as $title => $fieldset) { + $output[] = [ + 'title' => $title, + 'data' => $fieldset, + ]; + + } + + $renderable = [ + '#theme' => 'fits', + '#output' => $output, + '#attached' => [ + 'library' => [ + 'islandora_fits/islandora_fits', + ] + ] + + ]; + return $renderable; + } + + /** + * Finds the the first set of children from the FITS xml. + * + * Once it has these it passes them off recursively. + * + * @param SimpleXMLElement $xml + * The SimpleXMLElement to parse. + * + * @return array + * An array containing key/value pairs of fields and data. + */ + public function islandora_fits_child_xpath($xml) { + $results = $xml->xpath('/*|/*/fits:metadata'); + $output = array(); + foreach ($results as $result) { + $this->islandora_fits_children($result, $output); + } + return $output; + } + + /** + * Finds children for fits module. + * + * Recursive function that searches continuously until + * we grab the node's text value and add to + * the output array. + * + * @param SimpleXMLElement $child + * The current child that we are searching through. + * + * @param array $output + * An array containing key/value pairs of fields and data. + */ + public function islandora_fits_children($child, &$output) { + $grandchildren = $child->xpath('*/*'); + + if (count($grandchildren) > 0) { + foreach ($grandchildren as $grandchild) { + $this->islandora_fits_children($grandchild, $output); + } + } else { + $text_results = $child->xpath('text()'); + $tool_name = FALSE; + if ($text_results) { + foreach ($text_results as $text) { + foreach ($text->attributes() as $key => $value) { + if ($key === 'toolname') { + $tool_name = trim((string)$value); + } + } + $output_text = trim((string)$text); + if (!empty($output_text)) { + $fits_out = $this->islandora_fits_construct_output($child->getName(), $tool_name); + $tool_label = $fits_out['tool']; + $field_label = $fits_out['name']; + // Need to check if the label already exists in our output + // such that we do not duplicate entries. + if ($tool_label) { + if (isset($output[$tool_label])) { + if (!array_key_exists($field_label, $output[$tool_label])) { + $output[$tool_label][$field_label][] = $output_text; + } else { + if (!in_array($output_text, $output[$tool_label][$field_label])) { + $output[$tool_label][$field_label][] = $output_text; + } + } + } else { + $output[$tool_label][$field_label][] = $output_text; + } + } // No tool attribute. + else { + if (isset($output['Unknown'][$field_label])) { + if (!in_array($output_text, $output['Unknown'][$field_label])) { + $output['Unknown'][$field_label][] = $output_text; + } + } else { + $output['Unknown'][$field_label][] = $output_text; + } + } + } + } + } + } + + } + + /** + * Builds display by parsing strings. + * + * @param string $node_name + * Name of the current node that we will display. + * @param string $tool_name + * Name of the tool used to generate the metadata. + * + * @return array + * Constructed node name for output. + */ + public function islandora_fits_construct_output($node_name, $tool_name) { + // Construct an arbitrary string with all capitals in it. + $capitals = 'ABCDEFGHIJKLMNOPQRSTUVWXYZ'; + $name_array = str_split($node_name); + $space_position = array(); + + // Check to see which characters are capitals so we can split + // them up for cleaner display. + foreach ($name_array as $key => $value) { + if (strpos($capitals, $value) !== FALSE && $key !== 0) { + $space_position[] = $key; + } + } + if (count($space_position)) { + // Needed in event we add multiple spaces so need to keep track. + $pos_offset = 0; + foreach ($space_position as $pos) { + $node_name = substr_replace($node_name, ' ', $pos + $pos_offset, 0); + $pos_offset++; + } + } + $node_name = ucwords($node_name); + + return array('name' => $node_name, 'tool' => ucwords($tool_name)); + } + + /** + * Adds fields to content type. + * + * @param $input_xml + * @throws \Drupal\Core\Entity\EntityStorageException + */ + public function add_node_fields($input_xml) { + $data = $this->transformFits($input_xml); + $all_fields = []; + foreach ($data['#output'] as $datum) { + $all_fields = array_merge($all_fields, $this->harvest_values($datum)); + } + $to_process = $this->normalize_names($all_fields); + foreach ($to_process as $field) { + $exists = FieldStorageConfig::loadByName('node', $field['field_name']); + if (!$exists) { + $field_storage = FieldStorageConfig::create([ + 'entity_type' => 'node', + 'field_name' => $field['field_name'], + 'type' => 'text', + ]); + $field_storage->save(); + FieldConfig::create([ + 'field_storage' => $field_storage, + 'bundle' => 'fits_technical_metadata', + 'label' => $field['field_name'], + ])->save(); + } + } + } + + /** + * Populates associated node. + * + * @param $input_xml + * @param $node + */ + public function populate_node($input_xml, &$node) { + $data = $this->transformFits($input_xml); + $all_fields = []; + foreach ($data['#output'] as $datum) { + $all_fields = array_merge($all_fields, $this->harvest_values($datum)); + } + $to_add = []; + foreach ($all_fields as $label => $field_value) { + $lower = strtolower($label); + $normalized = str_replace($this->forbidden, '_', $lower); + $field_name = substr("field_$normalized", 0, 32); + $to_add[$field_name] = $field_value; + } + + foreach ($to_add as $field_name => $field_value) { + $node->set($field_name, $field_value); + } + } + + /** + * Extracts and labels content. + * + * @param $input + * @return array + */ + private function harvest_values($input) { + $fields = []; + $label = str_replace(' ', '_', $input['title']); + $rows = $input['data']['#rows']; + foreach ($rows as $key => $value) { + $fields["{$label}_{$key}"] = $value['value']['data']; + } + return $fields; + + } + + /** + * Create standardized machine name fields. + * + * @param array $names + * @return array + */ + private function normalize_names(array $names) { + $normalized_names = []; + foreach ($names as $label => $field_value) { + $lower = strtolower($label); + $normalized = str_replace($this->forbidden, '_', $lower); + $field_name = substr("field_$normalized", 0, 32); + + $normalized_names[] = [ + 'field_label' => $label, + 'field_name' => $field_name, + 'field_value' => $field_value, + ]; + } + return $normalized_names; + } +}