From 4052cef24c5230852ce6b5750044d4cc999b2adc Mon Sep 17 00:00:00 2001
From: Rosie Le Faive <lefaive@gmail.com>
Date: Wed, 25 Oct 2023 11:23:21 -0300
Subject: [PATCH] Update OcrTextFormatter.php

Addresses #988
---
 .../src/Plugin/Field/FieldFormatter/OcrTextFormatter.php     | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/modules/islandora_text_extraction/src/Plugin/Field/FieldFormatter/OcrTextFormatter.php b/modules/islandora_text_extraction/src/Plugin/Field/FieldFormatter/OcrTextFormatter.php
index 2e066943..055e76d9 100644
--- a/modules/islandora_text_extraction/src/Plugin/Field/FieldFormatter/OcrTextFormatter.php
+++ b/modules/islandora_text_extraction/src/Plugin/Field/FieldFormatter/OcrTextFormatter.php
@@ -132,8 +132,9 @@ class OcrTextFormatter extends FormatterBase implements ContainerFactoryPluginIn
     $fileItem = $item->getValue();
     $file = $this->entityTypeManager->getStorage('file')->load($fileItem['target_id']);
     $contents = file_get_contents($file->getFileUri());
-    if (mb_detect_encoding($contents) != 'UTF-8') {
-      $contents = utf8_encode($contents);
+    $detected_encoding = mb_detect_encoding($contents);
+    if ($detected_encoding != 'UTF-8') {
+      $contents = mb_convert_encoding($contents, 'UTF-8', $detected_encoding);
     }
     $contents = nl2br($contents);
     return $contents;