Browse Source

WIP Modify GenerateOCRDerivativeFile to support hOCR

pull/897/head
Alexander O'Neill 2 years ago committed by Alexander O'Neill
parent
commit
0bea8da572
  1. 28
      modules/islandora_text_extraction/src/Plugin/Action/GenerateOCRDerivativeFile.php

28
modules/islandora_text_extraction/src/Plugin/Action/GenerateOCRDerivativeFile.php

@ -8,7 +8,7 @@ use Drupal\Core\Url;
use Drupal\islandora\Plugin\Action\AbstractGenerateDerivativeMediaFile;
/**
* Emits a Node for generating fits derivatives event.
* Generates OCR derivatives event.
*
* @Action(
* id = "generate_extracted_text_file",
@ -29,6 +29,7 @@ class GenerateOCRDerivativeFile extends AbstractGenerateDerivativeMediaFile {
$config['destination_media_type'] = 'file';
$config['scheme'] = $this->config->get('default_scheme');
$config['destination_text_field_name'] = '';
$config['text_format'] = 'plain_text';
return $config;
}
@ -38,7 +39,7 @@ class GenerateOCRDerivativeFile extends AbstractGenerateDerivativeMediaFile {
public function buildConfigurationForm(array $form, FormStateInterface $form_state) {
$map = $this->entityFieldManager->getFieldMapByFieldType('text_long');
$file_fields = $map['media'];
$field_options = array_combine(array_keys($file_fields), array_keys($file_fields));
$field_options = ['none' => $this->t('None')] + array_combine(array_keys($file_fields), array_keys($file_fields));
$form = parent::buildConfigurationForm($form, $form_state);
$form['mimetype']['#description'] = $this->t('Mimetype to convert to (e.g. application/xml, etc...)');
$form['mimetype']['#value'] = 'text/plain';
@ -48,13 +49,23 @@ class GenerateOCRDerivativeFile extends AbstractGenerateDerivativeMediaFile {
$last = array_slice($form, count($form) - $position + 1);
$middle['destination_text_field_name'] = [
'#required' => TRUE,
'#required' => FALSE,
'#type' => 'select',
'#options' => $field_options,
'#title' => $this->t('Destination Text field Name'),
'#default_value' => $this->configuration['destination_text_field_name'],
'#description' => $this->t('Text field on Media Type to hold extracted text.'),
];
$middle['text_format'] = [
'#type' => 'select',
'#title' => $this->t('Format'),
'#options' => [
'plain_text' => $this->t('Plain text'),
'hocr' => $this->t('hOCR text with positional data'),
],
'#default_value' => $this->configuration['text_format'],
'#description' => $this->t("The type of text to be returned."),
];
$form = array_merge($first, $middle, $last);
unset($form['args']);
@ -81,17 +92,28 @@ class GenerateOCRDerivativeFile extends AbstractGenerateDerivativeMediaFile {
public function submitConfigurationForm(array &$form, FormStateInterface $form_state) {
parent::submitConfigurationForm($form, $form_state);
$this->configuration['destination_text_field_name'] = $form_state->getValue('destination_text_field_name');
$this->configuration['text_format'] = $form_state->getValue('text_format');
switch ($form_state->getValue('text_format')) {
case 'hocr':
$this->configuration['args'] = '-c tessedit_create_hocr=1 -c hocr_font_info=0';
break;
case 'plain_text':
$his->configuration['args'] = '';
break;
}
}
/**
* Override this to return arbitrary data as an array to be json encoded.
*/
protected function generateData(EntityInterface $entity) {
$data = parent::generateData($entity);
$route_params = [
'media' => $entity->id(),
'destination_field' => $this->configuration['destination_field_name'],
'destination_text_field' => $this->configuration['destination_text_field_name'],
'text_format' => $this->configuration['text_format'],
];
$data['destination_uri'] = Url::fromRoute('islandora_text_extraction.attach_file_to_media', $route_params)
->setAbsolute()

Loading…
Cancel
Save