diff --git a/modules/islandora_text_extraction/islandora_text_extraction.module b/modules/islandora_text_extraction/islandora_text_extraction.module index 9bff85f0..ca330dd4 100644 --- a/modules/islandora_text_extraction/islandora_text_extraction.module +++ b/modules/islandora_text_extraction/islandora_text_extraction.module @@ -40,8 +40,8 @@ function islandora_text_extraction_media_presave(MediaInterface $media) { $file = File::load($file_id); if ($file) { $data = file_get_contents($file->getFileUri()); - // Check if it's already markup like hOCR - if (substr($data, 0, 4) == '<xml') { + // Check if it's already markup like hOCR. + if (substr($data, 0, 5) == '<?xml') { return; } $data = nl2br($data);