From 4a618fdb5d54ae5ad0b66458e5381173589155c9 Mon Sep 17 00:00:00 2001
From: Alexander O'Neill <alexander@born-digital.com>
Date: Tue, 2 May 2023 17:07:06 -0300
Subject: [PATCH] Issue #941: Only add <br/> tags to plain text extracted text
 fields.

---
 .../src/Controller/MediaSourceController.php               | 7 ++++++-
 1 file changed, 6 insertions(+), 1 deletion(-)

diff --git a/modules/islandora_text_extraction/src/Controller/MediaSourceController.php b/modules/islandora_text_extraction/src/Controller/MediaSourceController.php
index 14c36ebd..2909f68d 100644
--- a/modules/islandora_text_extraction/src/Controller/MediaSourceController.php
+++ b/modules/islandora_text_extraction/src/Controller/MediaSourceController.php
@@ -108,7 +108,12 @@ class MediaSourceController extends ControllerBase {
         $this->getLogger('islandora')->warning("Field $destination_field is not defined in  Media Type {$media->bundle()}");
       }
       if ($media->hasField($destination_text_field)) {
-        $media->{$destination_text_field}->setValue(nl2br($contents));
+        // TODO: The request actually has a malformed parameter string, ?text_format=plain_text?connection_close=true.
+        // But that's a Tesseract issue.
+        if (substr($request->query->get('text_format'), 0, 10) == 'plain_text' ) {
+          $contents = nl2br($contents);
+        }
+        $media->{$destination_text_field}->setValue($contents);
       }
       else {
         $this->getLogger('islandora')->warning("Field $destination_text_field is not defined in Media Type {$media->bundle()}");