Browse Source

WIP Modify GenerateOCRDerivativeFile to support hOCR

pull/897/head
Alexander O'Neill 2 years ago committed by Alexander O'Neill
parent
commit
0bea8da572
  1. 28
      modules/islandora_text_extraction/src/Plugin/Action/GenerateOCRDerivativeFile.php

28
modules/islandora_text_extraction/src/Plugin/Action/GenerateOCRDerivativeFile.php

@ -8,7 +8,7 @@ use Drupal\Core\Url;
use Drupal\islandora\Plugin\Action\AbstractGenerateDerivativeMediaFile; use Drupal\islandora\Plugin\Action\AbstractGenerateDerivativeMediaFile;
/** /**
* Emits a Node for generating fits derivatives event. * Generates OCR derivatives event.
* *
* @Action( * @Action(
* id = "generate_extracted_text_file", * id = "generate_extracted_text_file",
@ -29,6 +29,7 @@ class GenerateOCRDerivativeFile extends AbstractGenerateDerivativeMediaFile {
$config['destination_media_type'] = 'file'; $config['destination_media_type'] = 'file';
$config['scheme'] = $this->config->get('default_scheme'); $config['scheme'] = $this->config->get('default_scheme');
$config['destination_text_field_name'] = ''; $config['destination_text_field_name'] = '';
$config['text_format'] = 'plain_text';
return $config; return $config;
} }
@ -38,7 +39,7 @@ class GenerateOCRDerivativeFile extends AbstractGenerateDerivativeMediaFile {
public function buildConfigurationForm(array $form, FormStateInterface $form_state) { public function buildConfigurationForm(array $form, FormStateInterface $form_state) {
$map = $this->entityFieldManager->getFieldMapByFieldType('text_long'); $map = $this->entityFieldManager->getFieldMapByFieldType('text_long');
$file_fields = $map['media']; $file_fields = $map['media'];
$field_options = array_combine(array_keys($file_fields), array_keys($file_fields)); $field_options = ['none' => $this->t('None')] + array_combine(array_keys($file_fields), array_keys($file_fields));
$form = parent::buildConfigurationForm($form, $form_state); $form = parent::buildConfigurationForm($form, $form_state);
$form['mimetype']['#description'] = $this->t('Mimetype to convert to (e.g. application/xml, etc...)'); $form['mimetype']['#description'] = $this->t('Mimetype to convert to (e.g. application/xml, etc...)');
$form['mimetype']['#value'] = 'text/plain'; $form['mimetype']['#value'] = 'text/plain';
@ -48,13 +49,23 @@ class GenerateOCRDerivativeFile extends AbstractGenerateDerivativeMediaFile {
$last = array_slice($form, count($form) - $position + 1); $last = array_slice($form, count($form) - $position + 1);
$middle['destination_text_field_name'] = [ $middle['destination_text_field_name'] = [
'#required' => TRUE, '#required' => FALSE,
'#type' => 'select', '#type' => 'select',
'#options' => $field_options, '#options' => $field_options,
'#title' => $this->t('Destination Text field Name'), '#title' => $this->t('Destination Text field Name'),
'#default_value' => $this->configuration['destination_text_field_name'], '#default_value' => $this->configuration['destination_text_field_name'],
'#description' => $this->t('Text field on Media Type to hold extracted text.'), '#description' => $this->t('Text field on Media Type to hold extracted text.'),
]; ];
$middle['text_format'] = [
'#type' => 'select',
'#title' => $this->t('Format'),
'#options' => [
'plain_text' => $this->t('Plain text'),
'hocr' => $this->t('hOCR text with positional data'),
],
'#default_value' => $this->configuration['text_format'],
'#description' => $this->t("The type of text to be returned."),
];
$form = array_merge($first, $middle, $last); $form = array_merge($first, $middle, $last);
unset($form['args']); unset($form['args']);
@ -81,17 +92,28 @@ class GenerateOCRDerivativeFile extends AbstractGenerateDerivativeMediaFile {
public function submitConfigurationForm(array &$form, FormStateInterface $form_state) { public function submitConfigurationForm(array &$form, FormStateInterface $form_state) {
parent::submitConfigurationForm($form, $form_state); parent::submitConfigurationForm($form, $form_state);
$this->configuration['destination_text_field_name'] = $form_state->getValue('destination_text_field_name'); $this->configuration['destination_text_field_name'] = $form_state->getValue('destination_text_field_name');
$this->configuration['text_format'] = $form_state->getValue('text_format');
switch ($form_state->getValue('text_format')) {
case 'hocr':
$this->configuration['args'] = '-c tessedit_create_hocr=1 -c hocr_font_info=0';
break;
case 'plain_text':
$his->configuration['args'] = '';
break;
}
} }
/** /**
* Override this to return arbitrary data as an array to be json encoded. * Override this to return arbitrary data as an array to be json encoded.
*/ */
protected function generateData(EntityInterface $entity) { protected function generateData(EntityInterface $entity) {
$data = parent::generateData($entity); $data = parent::generateData($entity);
$route_params = [ $route_params = [
'media' => $entity->id(), 'media' => $entity->id(),
'destination_field' => $this->configuration['destination_field_name'], 'destination_field' => $this->configuration['destination_field_name'],
'destination_text_field' => $this->configuration['destination_text_field_name'], 'destination_text_field' => $this->configuration['destination_text_field_name'],
'text_format' => $this->configuration['text_format'],
]; ];
$data['destination_uri'] = Url::fromRoute('islandora_text_extraction.attach_file_to_media', $route_params) $data['destination_uri'] = Url::fromRoute('islandora_text_extraction.attach_file_to_media', $route_params)
->setAbsolute() ->setAbsolute()

Loading…
Cancel
Save