From 4052cef24c5230852ce6b5750044d4cc999b2adc Mon Sep 17 00:00:00 2001 From: Rosie Le Faive <lefaive@gmail.com> Date: Wed, 25 Oct 2023 11:23:21 -0300 Subject: [PATCH] Update OcrTextFormatter.php Addresses #988 --- .../src/Plugin/Field/FieldFormatter/OcrTextFormatter.php | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/modules/islandora_text_extraction/src/Plugin/Field/FieldFormatter/OcrTextFormatter.php b/modules/islandora_text_extraction/src/Plugin/Field/FieldFormatter/OcrTextFormatter.php index 2e066943..055e76d9 100644 --- a/modules/islandora_text_extraction/src/Plugin/Field/FieldFormatter/OcrTextFormatter.php +++ b/modules/islandora_text_extraction/src/Plugin/Field/FieldFormatter/OcrTextFormatter.php @@ -132,8 +132,9 @@ class OcrTextFormatter extends FormatterBase implements ContainerFactoryPluginIn $fileItem = $item->getValue(); $file = $this->entityTypeManager->getStorage('file')->load($fileItem['target_id']); $contents = file_get_contents($file->getFileUri()); - if (mb_detect_encoding($contents) != 'UTF-8') { - $contents = utf8_encode($contents); + $detected_encoding = mb_detect_encoding($contents); + if ($detected_encoding != 'UTF-8') { + $contents = mb_convert_encoding($contents, 'UTF-8', $detected_encoding); } $contents = nl2br($contents); return $contents;