From 138eab201673bdeed163dcbe2a1db7476c3f5fb6 Mon Sep 17 00:00:00 2001 From: Alexander O'Neill Date: Wed, 3 May 2023 16:42:03 -0300 Subject: [PATCH] Issue #941: Only add
tags to plain text extracted text fields. (#942) * Issue #941: Only add
tags to plain text extracted text fields. * Fix PHPCS errors. * Don't add
tags to edited OCR text field if it looks like hOCR. * Respond to PHPCS errors. --- .../islandora_text_extraction.module | 4 ++++ .../src/Controller/MediaSourceController.php | 6 +++++- 2 files changed, 9 insertions(+), 1 deletion(-) diff --git a/modules/islandora_text_extraction/islandora_text_extraction.module b/modules/islandora_text_extraction/islandora_text_extraction.module index 5d6f6437..ca330dd4 100644 --- a/modules/islandora_text_extraction/islandora_text_extraction.module +++ b/modules/islandora_text_extraction/islandora_text_extraction.module @@ -40,6 +40,10 @@ function islandora_text_extraction_media_presave(MediaInterface $media) { $file = File::load($file_id); if ($file) { $data = file_get_contents($file->getFileUri()); + // Check if it's already markup like hOCR. + if (substr($data, 0, 5) == 'set('field_edited_text', $data); $media->field_edited_text->format = 'basic_html'; diff --git a/modules/islandora_text_extraction/src/Controller/MediaSourceController.php b/modules/islandora_text_extraction/src/Controller/MediaSourceController.php index 14c36ebd..f15e42d5 100644 --- a/modules/islandora_text_extraction/src/Controller/MediaSourceController.php +++ b/modules/islandora_text_extraction/src/Controller/MediaSourceController.php @@ -108,7 +108,11 @@ class MediaSourceController extends ControllerBase { $this->getLogger('islandora')->warning("Field $destination_field is not defined in Media Type {$media->bundle()}"); } if ($media->hasField($destination_text_field)) { - $media->{$destination_text_field}->setValue(nl2br($contents)); + // @todo The request actually has a malformed parameter string, ?text_format=plain_text?connection_close=true. + if (substr($request->query->get('text_format'), 0, 10) == 'plain_text') { + $contents = nl2br($contents); + } + $media->{$destination_text_field}->setValue($contents); } else { $this->getLogger('islandora')->warning("Field $destination_text_field is not defined in Media Type {$media->bundle()}");