diff --git a/modules/islandora_text_extraction/src/Plugin/Action/GenerateOCRDerivativeFile.php b/modules/islandora_text_extraction/src/Plugin/Action/GenerateOCRDerivativeFile.php index f6b8034a..73318e35 100644 --- a/modules/islandora_text_extraction/src/Plugin/Action/GenerateOCRDerivativeFile.php +++ b/modules/islandora_text_extraction/src/Plugin/Action/GenerateOCRDerivativeFile.php @@ -8,7 +8,7 @@ use Drupal\Core\Url; use Drupal\islandora\Plugin\Action\AbstractGenerateDerivativeMediaFile; /** - * Emits a Node for generating fits derivatives event. + * Generates OCR derivatives event. * * @Action( * id = "generate_extracted_text_file", @@ -29,6 +29,7 @@ class GenerateOCRDerivativeFile extends AbstractGenerateDerivativeMediaFile { $config['destination_media_type'] = 'file'; $config['scheme'] = $this->config->get('default_scheme'); $config['destination_text_field_name'] = ''; + $config['text_format'] = 'plain_text'; return $config; } @@ -38,7 +39,7 @@ class GenerateOCRDerivativeFile extends AbstractGenerateDerivativeMediaFile { public function buildConfigurationForm(array $form, FormStateInterface $form_state) { $map = $this->entityFieldManager->getFieldMapByFieldType('text_long'); $file_fields = $map['media']; - $field_options = array_combine(array_keys($file_fields), array_keys($file_fields)); + $field_options = ['none' => $this->t('None')] + array_combine(array_keys($file_fields), array_keys($file_fields)); $form = parent::buildConfigurationForm($form, $form_state); $form['mimetype']['#description'] = $this->t('Mimetype to convert to (e.g. application/xml, etc...)'); $form['mimetype']['#value'] = 'text/plain'; @@ -48,13 +49,23 @@ class GenerateOCRDerivativeFile extends AbstractGenerateDerivativeMediaFile { $last = array_slice($form, count($form) - $position + 1); $middle['destination_text_field_name'] = [ - '#required' => TRUE, + '#required' => FALSE, '#type' => 'select', '#options' => $field_options, '#title' => $this->t('Destination Text field Name'), '#default_value' => $this->configuration['destination_text_field_name'], '#description' => $this->t('Text field on Media Type to hold extracted text.'), ]; + $middle['text_format'] = [ + '#type' => 'select', + '#title' => $this->t('Format'), + '#options' => [ + 'plain_text' => $this->t('Plain text'), + 'hocr' => $this->t('hOCR text with positional data'), + ], + '#default_value' => $this->configuration['text_format'], + '#description' => $this->t("The type of text to be returned."), + ]; $form = array_merge($first, $middle, $last); unset($form['args']); @@ -81,17 +92,28 @@ class GenerateOCRDerivativeFile extends AbstractGenerateDerivativeMediaFile { public function submitConfigurationForm(array &$form, FormStateInterface $form_state) { parent::submitConfigurationForm($form, $form_state); $this->configuration['destination_text_field_name'] = $form_state->getValue('destination_text_field_name'); + $this->configuration['text_format'] = $form_state->getValue('text_format'); + switch ($form_state->getValue('text_format')) { + case 'hocr': + $this->configuration['args'] = '-c tessedit_create_hocr=1 -c hocr_font_info=0'; + break; + case 'plain_text': + $his->configuration['args'] = ''; + break; + } } /** * Override this to return arbitrary data as an array to be json encoded. */ protected function generateData(EntityInterface $entity) { + $data = parent::generateData($entity); $route_params = [ 'media' => $entity->id(), 'destination_field' => $this->configuration['destination_field_name'], 'destination_text_field' => $this->configuration['destination_text_field_name'], + 'text_format' => $this->configuration['text_format'], ]; $data['destination_uri'] = Url::fromRoute('islandora_text_extraction.attach_file_to_media', $route_params) ->setAbsolute()