diff --git a/modules/islandora_iiif/src/Plugin/views/style/IIIFManifest.php b/modules/islandora_iiif/src/Plugin/views/style/IIIFManifest.php index 19467001..cc4b5e94 100644 --- a/modules/islandora_iiif/src/Plugin/views/style/IIIFManifest.php +++ b/modules/islandora_iiif/src/Plugin/views/style/IIIFManifest.php @@ -189,21 +189,27 @@ class IIIFManifest extends StylePluginBase { */ protected function getTileSourceFromRow(ResultRow $row, $iiif_address, $iiif_base_id) { $canvases = []; - foreach ($this->options['iiif_tile_field'] as $iiif_tile_field) { + foreach (array_filter(array_values($this->options['iiif_tile_field'])) as $iiif_tile_field) { $viewsField = $this->view->field[$iiif_tile_field]; + $iiif_ocr_file_field = !empty($this->options['iiif_ocr_file_field']) ? array_filter(array_values($this->options['iiif_ocr_file_field'])) : []; + $ocrField = count($iiif_ocr_file_field) > 0 ? $this->view->field[$iiif_ocr_file_field[0]] : NULL; $entity = $viewsField->getEntity($row); if (isset($entity->{$viewsField->definition['field_name']})) { /** @var \Drupal\Core\Field\FieldItemListInterface $images */ $images = $entity->{$viewsField->definition['field_name']}; - foreach ($images as $image) { + foreach ($images as $i => $image) { if (!$image->entity->access('view')) { // If the user does not have permission to view the file, skip it. continue; } + + $ocrs = $entity->{$ocrField->definition['field_name']}; + // Create the IIIF URL for this file // Visiting $iiif_url will resolve to the info.json for the image. + $ocr = isset($ocrs[$i]) ? $ocrs[$i] : FALSE; $file_url = $image->entity->createFileUrl(FALSE); $mime_type = $image->entity->getMimeType(); $iiif_url = rtrim($iiif_address, '/') . '/' . urlencode($file_url); @@ -241,8 +247,7 @@ class IIIFManifest extends StylePluginBase { } } } - - $canvases[] = [ + $tmp_canvas = [ // @see https://iiif.io/api/presentation/2.1/#canvas '@id' => $canvas_id, '@type' => 'sc:Canvas', @@ -271,6 +276,17 @@ class IIIFManifest extends StylePluginBase { ], ], ]; + + if (isset($ocr) && $ocr != FALSE) { + $tmp_canvas['seeAlso'] = [ + '@id' => $ocr->entity->createFileUrl(FALSE), + 'format' => 'text/vnd.hocr+html', + 'profile' => 'http://kba.cloud/hocr-spec', + 'label' => 'hOCR embedded text', + ]; + } + + $canvases[] = $tmp_canvas; } } } @@ -313,6 +329,7 @@ class IIIFManifest extends StylePluginBase { $options = parent::defineOptions(); $options['iiif_tile_field'] = ['default' => '']; + $options['iiif_ocr_file_field'] = ['default' => '']; return $options; } @@ -368,6 +385,15 @@ class IIIFManifest extends StylePluginBase { // otherwise could lock up the form when setting up a View. '#required' => count($field_options) > 0, ]; + + $form['iiif_ocr_file_field'] = [ + '#title' => $this->t('Structured OCR data file field'), + '#type' => 'checkboxes', + '#default_value' => $this->options['iiif_ocr_file_field'], + '#description' => $this->t('The source of structured OCR text for each entity.'), + '#options' => $field_options, + '#required' => FALSE, + ]; } /** diff --git a/modules/islandora_text_extraction/src/Plugin/Action/GenerateOCRDerivativeFile.php b/modules/islandora_text_extraction/src/Plugin/Action/GenerateOCRDerivativeFile.php index f6b8034a..4ff0d93f 100644 --- a/modules/islandora_text_extraction/src/Plugin/Action/GenerateOCRDerivativeFile.php +++ b/modules/islandora_text_extraction/src/Plugin/Action/GenerateOCRDerivativeFile.php @@ -8,7 +8,7 @@ use Drupal\Core\Url; use Drupal\islandora\Plugin\Action\AbstractGenerateDerivativeMediaFile; /** - * Emits a Node for generating fits derivatives event. + * Generates OCR derivatives event. * * @Action( * id = "generate_extracted_text_file", @@ -29,6 +29,7 @@ class GenerateOCRDerivativeFile extends AbstractGenerateDerivativeMediaFile { $config['destination_media_type'] = 'file'; $config['scheme'] = $this->config->get('default_scheme'); $config['destination_text_field_name'] = ''; + $config['text_format'] = 'plain_text'; return $config; } @@ -38,7 +39,7 @@ class GenerateOCRDerivativeFile extends AbstractGenerateDerivativeMediaFile { public function buildConfigurationForm(array $form, FormStateInterface $form_state) { $map = $this->entityFieldManager->getFieldMapByFieldType('text_long'); $file_fields = $map['media']; - $field_options = array_combine(array_keys($file_fields), array_keys($file_fields)); + $field_options = ['none' => $this->t('None')] + array_combine(array_keys($file_fields), array_keys($file_fields)); $form = parent::buildConfigurationForm($form, $form_state); $form['mimetype']['#description'] = $this->t('Mimetype to convert to (e.g. application/xml, etc...)'); $form['mimetype']['#value'] = 'text/plain'; @@ -48,13 +49,23 @@ class GenerateOCRDerivativeFile extends AbstractGenerateDerivativeMediaFile { $last = array_slice($form, count($form) - $position + 1); $middle['destination_text_field_name'] = [ - '#required' => TRUE, + '#required' => FALSE, '#type' => 'select', '#options' => $field_options, '#title' => $this->t('Destination Text field Name'), '#default_value' => $this->configuration['destination_text_field_name'], '#description' => $this->t('Text field on Media Type to hold extracted text.'), ]; + $middle['text_format'] = [ + '#type' => 'select', + '#title' => $this->t('Format'), + '#options' => [ + 'plain_text' => $this->t('Plain text'), + 'hocr' => $this->t('hOCR text with positional data'), + ], + '#default_value' => $this->configuration['text_format'], + '#description' => $this->t("The type of text to be returned."), + ]; $form = array_merge($first, $middle, $last); unset($form['args']); @@ -81,17 +92,29 @@ class GenerateOCRDerivativeFile extends AbstractGenerateDerivativeMediaFile { public function submitConfigurationForm(array &$form, FormStateInterface $form_state) { parent::submitConfigurationForm($form, $form_state); $this->configuration['destination_text_field_name'] = $form_state->getValue('destination_text_field_name'); + $this->configuration['text_format'] = $form_state->getValue('text_format'); + switch ($form_state->getValue('text_format')) { + case 'hocr': + $this->configuration['args'] = '-c tessedit_create_hocr=1 -c hocr_font_info=0'; + break; + + case 'plain_text': + $his->configuration['args'] = ''; + break; + } } /** * Override this to return arbitrary data as an array to be json encoded. */ protected function generateData(EntityInterface $entity) { + $data = parent::generateData($entity); $route_params = [ 'media' => $entity->id(), 'destination_field' => $this->configuration['destination_field_name'], 'destination_text_field' => $this->configuration['destination_text_field_name'], + 'text_format' => $this->configuration['text_format'], ]; $data['destination_uri'] = Url::fromRoute('islandora_text_extraction.attach_file_to_media', $route_params) ->setAbsolute()