diff --git a/modules/islandora_text_extraction/islandora_text_extraction.module b/modules/islandora_text_extraction/islandora_text_extraction.module
index 9bff85f0..ca330dd4 100644
--- a/modules/islandora_text_extraction/islandora_text_extraction.module
+++ b/modules/islandora_text_extraction/islandora_text_extraction.module
@@ -40,8 +40,8 @@ function islandora_text_extraction_media_presave(MediaInterface $media) {
       $file = File::load($file_id);
       if ($file) {
         $data = file_get_contents($file->getFileUri());
-        // Check if it's already markup like hOCR
-        if (substr($data, 0, 4) == '<xml') {
+        // Check if it's already markup like hOCR.
+        if (substr($data, 0, 5) == '<?xml') {
           return;
         }
         $data = nl2br($data);