|
|
@ -8,7 +8,7 @@ use Drupal\Core\Url; |
|
|
|
use Drupal\islandora\Plugin\Action\AbstractGenerateDerivativeMediaFile; |
|
|
|
use Drupal\islandora\Plugin\Action\AbstractGenerateDerivativeMediaFile; |
|
|
|
|
|
|
|
|
|
|
|
/** |
|
|
|
/** |
|
|
|
* Emits a Node for generating fits derivatives event. |
|
|
|
* Generates OCR derivatives event. |
|
|
|
* |
|
|
|
* |
|
|
|
* @Action( |
|
|
|
* @Action( |
|
|
|
* id = "generate_extracted_text_file", |
|
|
|
* id = "generate_extracted_text_file", |
|
|
@ -29,6 +29,7 @@ class GenerateOCRDerivativeFile extends AbstractGenerateDerivativeMediaFile { |
|
|
|
$config['destination_media_type'] = 'file'; |
|
|
|
$config['destination_media_type'] = 'file'; |
|
|
|
$config['scheme'] = $this->config->get('default_scheme'); |
|
|
|
$config['scheme'] = $this->config->get('default_scheme'); |
|
|
|
$config['destination_text_field_name'] = ''; |
|
|
|
$config['destination_text_field_name'] = ''; |
|
|
|
|
|
|
|
$config['text_format'] = 'plain_text'; |
|
|
|
return $config; |
|
|
|
return $config; |
|
|
|
} |
|
|
|
} |
|
|
|
|
|
|
|
|
|
|
@ -38,7 +39,7 @@ class GenerateOCRDerivativeFile extends AbstractGenerateDerivativeMediaFile { |
|
|
|
public function buildConfigurationForm(array $form, FormStateInterface $form_state) { |
|
|
|
public function buildConfigurationForm(array $form, FormStateInterface $form_state) { |
|
|
|
$map = $this->entityFieldManager->getFieldMapByFieldType('text_long'); |
|
|
|
$map = $this->entityFieldManager->getFieldMapByFieldType('text_long'); |
|
|
|
$file_fields = $map['media']; |
|
|
|
$file_fields = $map['media']; |
|
|
|
$field_options = array_combine(array_keys($file_fields), array_keys($file_fields)); |
|
|
|
$field_options = ['none' => $this->t('None')] + array_combine(array_keys($file_fields), array_keys($file_fields)); |
|
|
|
$form = parent::buildConfigurationForm($form, $form_state); |
|
|
|
$form = parent::buildConfigurationForm($form, $form_state); |
|
|
|
$form['mimetype']['#description'] = $this->t('Mimetype to convert to (e.g. application/xml, etc...)'); |
|
|
|
$form['mimetype']['#description'] = $this->t('Mimetype to convert to (e.g. application/xml, etc...)'); |
|
|
|
$form['mimetype']['#value'] = 'text/plain'; |
|
|
|
$form['mimetype']['#value'] = 'text/plain'; |
|
|
@ -48,13 +49,23 @@ class GenerateOCRDerivativeFile extends AbstractGenerateDerivativeMediaFile { |
|
|
|
$last = array_slice($form, count($form) - $position + 1); |
|
|
|
$last = array_slice($form, count($form) - $position + 1); |
|
|
|
|
|
|
|
|
|
|
|
$middle['destination_text_field_name'] = [ |
|
|
|
$middle['destination_text_field_name'] = [ |
|
|
|
'#required' => TRUE, |
|
|
|
'#required' => FALSE, |
|
|
|
'#type' => 'select', |
|
|
|
'#type' => 'select', |
|
|
|
'#options' => $field_options, |
|
|
|
'#options' => $field_options, |
|
|
|
'#title' => $this->t('Destination Text field Name'), |
|
|
|
'#title' => $this->t('Destination Text field Name'), |
|
|
|
'#default_value' => $this->configuration['destination_text_field_name'], |
|
|
|
'#default_value' => $this->configuration['destination_text_field_name'], |
|
|
|
'#description' => $this->t('Text field on Media Type to hold extracted text.'), |
|
|
|
'#description' => $this->t('Text field on Media Type to hold extracted text.'), |
|
|
|
]; |
|
|
|
]; |
|
|
|
|
|
|
|
$middle['text_format'] = [ |
|
|
|
|
|
|
|
'#type' => 'select', |
|
|
|
|
|
|
|
'#title' => $this->t('Format'), |
|
|
|
|
|
|
|
'#options' => [ |
|
|
|
|
|
|
|
'plain_text' => $this->t('Plain text'), |
|
|
|
|
|
|
|
'hocr' => $this->t('hOCR text with positional data'), |
|
|
|
|
|
|
|
], |
|
|
|
|
|
|
|
'#default_value' => $this->configuration['text_format'], |
|
|
|
|
|
|
|
'#description' => $this->t("The type of text to be returned."), |
|
|
|
|
|
|
|
]; |
|
|
|
$form = array_merge($first, $middle, $last); |
|
|
|
$form = array_merge($first, $middle, $last); |
|
|
|
|
|
|
|
|
|
|
|
unset($form['args']); |
|
|
|
unset($form['args']); |
|
|
@ -81,17 +92,28 @@ class GenerateOCRDerivativeFile extends AbstractGenerateDerivativeMediaFile { |
|
|
|
public function submitConfigurationForm(array &$form, FormStateInterface $form_state) { |
|
|
|
public function submitConfigurationForm(array &$form, FormStateInterface $form_state) { |
|
|
|
parent::submitConfigurationForm($form, $form_state); |
|
|
|
parent::submitConfigurationForm($form, $form_state); |
|
|
|
$this->configuration['destination_text_field_name'] = $form_state->getValue('destination_text_field_name'); |
|
|
|
$this->configuration['destination_text_field_name'] = $form_state->getValue('destination_text_field_name'); |
|
|
|
|
|
|
|
$this->configuration['text_format'] = $form_state->getValue('text_format'); |
|
|
|
|
|
|
|
switch ($form_state->getValue('text_format')) { |
|
|
|
|
|
|
|
case 'hocr': |
|
|
|
|
|
|
|
$this->configuration['args'] = '-c tessedit_create_hocr=1 -c hocr_font_info=0'; |
|
|
|
|
|
|
|
break; |
|
|
|
|
|
|
|
case 'plain_text': |
|
|
|
|
|
|
|
$his->configuration['args'] = ''; |
|
|
|
|
|
|
|
break; |
|
|
|
|
|
|
|
} |
|
|
|
} |
|
|
|
} |
|
|
|
|
|
|
|
|
|
|
|
/** |
|
|
|
/** |
|
|
|
* Override this to return arbitrary data as an array to be json encoded. |
|
|
|
* Override this to return arbitrary data as an array to be json encoded. |
|
|
|
*/ |
|
|
|
*/ |
|
|
|
protected function generateData(EntityInterface $entity) { |
|
|
|
protected function generateData(EntityInterface $entity) { |
|
|
|
|
|
|
|
|
|
|
|
$data = parent::generateData($entity); |
|
|
|
$data = parent::generateData($entity); |
|
|
|
$route_params = [ |
|
|
|
$route_params = [ |
|
|
|
'media' => $entity->id(), |
|
|
|
'media' => $entity->id(), |
|
|
|
'destination_field' => $this->configuration['destination_field_name'], |
|
|
|
'destination_field' => $this->configuration['destination_field_name'], |
|
|
|
'destination_text_field' => $this->configuration['destination_text_field_name'], |
|
|
|
'destination_text_field' => $this->configuration['destination_text_field_name'], |
|
|
|
|
|
|
|
'text_format' => $this->configuration['text_format'], |
|
|
|
]; |
|
|
|
]; |
|
|
|
$data['destination_uri'] = Url::fromRoute('islandora_text_extraction.attach_file_to_media', $route_params) |
|
|
|
$data['destination_uri'] = Url::fromRoute('islandora_text_extraction.attach_file_to_media', $route_params) |
|
|
|
->setAbsolute() |
|
|
|
->setAbsolute() |
|
|
|