You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
781 lines
28 KiB
781 lines
28 KiB
<?php |
|
/** |
|
* @file |
|
* Classes and functions for datastream validation. |
|
*/ |
|
|
|
/** |
|
* Converts a hexidecimal string to an integer. |
|
* |
|
* This is useful for running checks on values that appear in the binary |
|
* of a datastream. Returns FALSE if the hex value contains non-hex characters |
|
* or if the string would not return a 16- or 32-bit formatted big-endian |
|
* signed integer. |
|
* |
|
* @param string $hex |
|
* The hexidecimal string. |
|
* |
|
* @throws Exception |
|
* if something horrible happens during the actual conversion. |
|
* |
|
* @return bool|int |
|
* FALSE on failure, or the integer on success. |
|
*/ |
|
function islandora_hex2int($hex) { |
|
|
|
// A couple of quick string checks. |
|
if (!ctype_xdigit($hex)) { |
|
drupal_set_message(t('String passed to islandora_hex2int() contains non-hexidecimal characters.'), 'error'); |
|
return FALSE; |
|
} |
|
if (!strlen($hex) === 4 || !strlen($hex) === 8) { |
|
drupal_set_message(t('String passed to islandora_hex2int() cannot create a 16- or 32-bit little-endian signed integer'), 'error'); |
|
return FALSE; |
|
} |
|
|
|
// The actual conversion. |
|
try { |
|
$reverse_hex = implode('', array_reverse(str_split($hex, 2))); |
|
$int = hexdec($reverse_hex); |
|
return $int; |
|
} |
|
catch (Exception $e) { |
|
throw new Exception('An error occurred during the conversion of hexidecimal to integer.', 0, $e); |
|
} |
|
} |
|
|
|
/** |
|
* A result from a datastream validator; $type defines TRUE/FALSE as pass/fail. |
|
*/ |
|
class DatastreamValidatorResult { |
|
|
|
/** |
|
* The message for this result. |
|
* |
|
* @var string |
|
*/ |
|
protected $message; |
|
|
|
/** |
|
* The caller for this result. |
|
* |
|
* @var array |
|
*/ |
|
protected $caller; |
|
|
|
/** |
|
* The type of result this is - TRUE for pass, FALSE for fail. |
|
* |
|
* @var bool |
|
*/ |
|
protected $type; |
|
|
|
/** |
|
* Constructs a DatastreamValidatorResult. |
|
* |
|
* @param bool $type |
|
* Whether this result should indicate a pass (TRUE) or fail (FALSE). |
|
* @param string $message |
|
* The message that will be used by this result. |
|
* @param array $caller |
|
* The caller for this result. |
|
*/ |
|
public function __construct($type, $message, array $caller) { |
|
$this->message = $message; |
|
$this->caller = $caller; |
|
$this->type = $type; |
|
} |
|
|
|
/** |
|
* Get the message for this result. |
|
* |
|
* @return string |
|
* The message for this result. |
|
*/ |
|
public function getMessage() { |
|
return $this->message; |
|
} |
|
|
|
/** |
|
* Get the caller for this result. |
|
* |
|
* @return string |
|
* The caller for this result. |
|
*/ |
|
public function getCaller() { |
|
return $this->caller; |
|
} |
|
|
|
/** |
|
* Get the type of result. |
|
* |
|
* @return bool |
|
* The type of pass (TRUE for pass, FALSE for fail). |
|
*/ |
|
public function getType() { |
|
return $this->type; |
|
} |
|
} |
|
|
|
/** |
|
* Abstraction for datastream validators. |
|
* |
|
* Classes extended from DatastreamValidator don't require much to be useful. |
|
* They accept an IslandoraFedoraObject and a DSID to perform assertions on; |
|
* all you have to do is place a series of functions inside the extended class |
|
* using the naming convention assertThing(); each of these functions should |
|
* ideally assert one thing and one thing only (for simplicity's sake), and |
|
* should generate either a pass or a fail message by calling addPass() or |
|
* addFail(). That's it, really; they don't have to return any values, as |
|
* addPass() and addFail() just add messages to the overall pass/fail array. |
|
* |
|
* As long as you use those rules and naming conventions, all the magic is done |
|
* when you instantiate the new datastream validator object. |
|
* |
|
* The IslandoraWebTestCase::assertDatastreams() function accepts paired DSIDs |
|
* and datastream validator names in order to do the rest of the work. It grabs |
|
* all the test results using getPasses() and getFails() and transforms those |
|
* into something that DrupalWebTestCase can use. |
|
*/ |
|
abstract class DatastreamValidator { |
|
|
|
/** |
|
* The IslandoraFedoraObject containing the datastream to test. |
|
* |
|
* @var IslandoraFedoraObject |
|
*/ |
|
public $object; |
|
|
|
/** |
|
* The DSID of the string to test. |
|
* |
|
* @var string |
|
*/ |
|
public $datastream; |
|
|
|
/** |
|
* The content of the datastream. |
|
* |
|
* @var string[] |
|
*/ |
|
public $datastreamContent; |
|
|
|
/** |
|
* An array of DatastreamValidatorResults. |
|
* |
|
* These should be generated using $this->addResult. |
|
* |
|
* @var DatastreamValidatorResult[] |
|
*/ |
|
public $results = array(); |
|
|
|
/** |
|
* An array of additional required parameters. |
|
* |
|
* @var array |
|
*/ |
|
public $params = array(); |
|
|
|
/** |
|
* Constructs a DatastreamValidator. |
|
* |
|
* @param IslandoraFedoraObject $object |
|
* The object to grab the datastream from. |
|
* @param string $datastream |
|
* The DSID of the datastream itself. |
|
* @param array $params |
|
* An extra array of parameters the validator might need. |
|
*/ |
|
public function __construct(IslandoraFedoraObject $object, $datastream, array $params = array()) { |
|
$this->object = $object; |
|
$this->datastream = $datastream; |
|
$this->params = $params; |
|
if ($object[$datastream]) { |
|
$this->datastreamContent = $object[$datastream]->content; |
|
} |
|
else { |
|
drupal_set_message(t("Error grabbing content from datastream %datastream in object %id", array( |
|
'%datastream' => $datastream, |
|
'%id' => $object->id, |
|
)), 'error'); |
|
} |
|
} |
|
|
|
/** |
|
* Helper function to run all the validators in a class. |
|
* |
|
* On DatastreamValidator::__construct(), this looks for any functions |
|
* within the class beginning in "assert" and runs them. In all current cases |
|
* (and realistically in all future cases), this adds one or more passes or |
|
* fails to $this->passes and/or $this->fails. |
|
*/ |
|
public function runValidators() { |
|
$methods = get_class_methods($this); |
|
foreach ($methods as $method) { |
|
if (substr($method, 0, 6) === 'assert') { |
|
$this->$method(); |
|
} |
|
} |
|
} |
|
|
|
/** |
|
* Returns an array of DatastreamValidatorResults. |
|
* |
|
* @return DatastreamValidatorResult[] |
|
* The results. |
|
*/ |
|
public function getResults() { |
|
if (empty($this->results)) { |
|
$this->runValidators(); |
|
} |
|
return $this->results; |
|
} |
|
|
|
/** |
|
* Adds a result to $this->results. |
|
* |
|
* @param bool $type |
|
* The type of result (TRUE for pass, FALSE for fail). |
|
* @param string $message |
|
* The message to put in the result. |
|
*/ |
|
public function addResult($type, $message) { |
|
$result = new DatastreamValidatorResult($type, $message, $this->getAssertionCall()); |
|
$this->results[] = $result; |
|
} |
|
|
|
/** |
|
* Cycles through backtrace until the first non-assertion method is found. |
|
* |
|
* This is a manipulated version of DrupalWebTestCase::getAssertionCall(). |
|
* We use it here so that we can pass back assertion calls from |
|
* DatastreamValidator assertions instead of less useful TestCase functions. |
|
* |
|
* @return array |
|
* Array representing the true caller. |
|
*/ |
|
protected function getAssertionCall() { |
|
$backtrace = debug_backtrace(); |
|
|
|
// While the current caller's function starts with 'assert', and another one |
|
// exists after this function, keep poppin' em off. |
|
while (substr($backtrace[1]['function'], 0, 6) !== 'assert' && isset($backtrace[2])) { |
|
array_shift($backtrace); |
|
} |
|
|
|
return _drupal_get_last_caller($backtrace); |
|
} |
|
|
|
} |
|
|
|
/** |
|
* Asserts that an object's given datastreams are common-type image files. |
|
* |
|
* Uses PHPGD to run the assertion check. This means that only certain kinds |
|
* of image files can be checked. Please check the documentation for the PHPGD |
|
* imagecreatefromstring() function to determine what filetypes are valid. |
|
*/ |
|
class ImageDatastreamValidator extends DatastreamValidator { |
|
|
|
/** |
|
* Asserts the validity of an image using PHPGD. |
|
*/ |
|
protected function assertImageGeneration() { |
|
$assertion = imagecreatefromstring($this->datastreamContent) !== FALSE; |
|
$pass = "Image datastream {$this->datastream} is valid."; |
|
$fail = "Image datastream {$this->datastream} is either invalid or corrupt."; |
|
$message = $assertion ? $pass : $fail; |
|
$this->addResult($assertion, $message); |
|
} |
|
|
|
} |
|
|
|
/** |
|
* Asserts the validity of any .tif/.tiff datastream. |
|
*/ |
|
class TIFFDatastreamValidator extends DatastreamValidator { |
|
|
|
/** |
|
* Asserts that the TIFF contains an appropriate header. |
|
*/ |
|
public function assertTIFFHeaderHex() { |
|
$datastream_header_hex = self::getTIFFHeaderHex(); |
|
if ($datastream_header_hex == "49492a00") { |
|
// In this case, the ingested TIFF is designated as using the "Intel |
|
// byte-order" (i.e. little-endian) by starting with the characters "II" |
|
// (repeated so that byte order does not yet need to be significant). |
|
// The number that follows is '42' in little-endian hex, a number of |
|
// 'deep philosophical significance' to the TIFF format creators.' |
|
$this->addResult(TRUE, "{$this->datastream} datastream asserts that it is a valid Intel-byte-orderded TIF/TIFF file."); |
|
} |
|
elseif ($datastream_header_hex == "4d4d002a") { |
|
// In this case, the ingested TIFF is designated as using the "Motorola |
|
// byte-order" (i.e. big-endian) by starting with the characters "MM" |
|
// instead. 42 follows once again, this time in big-endian hex. |
|
$this->addResult(TRUE, "{$this->datastream} datastream asserts that it is a valid Motorola-byte-ordered TIF/TIFF file."); |
|
} |
|
else { |
|
$this->addResult(FALSE, "{$this->datastream} datastream does not assert that it is a valid TIF/TIFF file."); |
|
} |
|
} |
|
|
|
/** |
|
* Grabs the first 8 characters from the TIFF datastream's hex. |
|
* |
|
* @return string |
|
* The ... thing I just wrote up there. |
|
*/ |
|
protected function getTIFFHeaderHex() { |
|
return substr(bin2hex($this->datastreamContent), 0, 8); |
|
} |
|
|
|
} |
|
|
|
/** |
|
* Asserts the validity of a JP2 datastream. |
|
*/ |
|
class JP2DatastreamValidator extends DatastreamValidator { |
|
|
|
/** |
|
* Asserts the hex values at the head of the JP2 file. |
|
* |
|
* JP2 files begin with an offset header at the second 32-bit integer, |
|
* 0x6A502020. This header is in all .jp2s, and we check for it here. |
|
*/ |
|
protected function assertJP2Header() { |
|
$assertion = substr(bin2hex($this->datastreamContent), 8, 8) == '6a502020'; |
|
$pass = "Datastream {$this->datastream} contains the appropriate JP2 header."; |
|
$fail = "Datastream {$this->datastream} does not contain the appropriate JP2 header."; |
|
$message = $assertion ? $pass : $fail; |
|
$this->addResult($assertion, $message); |
|
} |
|
|
|
/** |
|
* Asserts the marker at the end of the JP2 file. |
|
* |
|
* JP2 files have their codestream capped with a marker, 0xFFD9. We're just |
|
* checking for it here to see if the .jp2 encoder finished okay. |
|
*/ |
|
protected function assertJP2Marker() { |
|
$assertion = substr(bin2hex($this->datastreamContent), strlen(bin2hex($this->datastreamContent)) - 4, 4) == 'ffd9'; |
|
$pass = "Datastream {$this->datastream} contains the appropriate JP2 ending marker."; |
|
$fail = "Datastream {$this->datastream} does not contain the appropriate JP2 ending marker. If this is the only JP2 validator that failed, it is likely that derivative generation was interrupted."; |
|
$message = $assertion ? $pass : $fail; |
|
$this->addResult($assertion, $message); |
|
} |
|
} |
|
|
|
/** |
|
* Asserts the validity of a PDF datastream. |
|
*/ |
|
class PDFDatastreamValidator extends DatastreamValidator { |
|
|
|
/** |
|
* Validates the PDF signature. |
|
*/ |
|
protected function assertPDFSignature() { |
|
$assertion = substr($this->datastreamContent, 0, 5) == '%PDF-'; |
|
$pdf_version = substr($this->datastreamContent, 5, 3); |
|
$pass = "{$this->datastream} datastream asserts that it is a valid PDF file using PDF version {$pdf_version}"; |
|
$fail = "{$this->datastream} datastream binary header appears to be corrupt and missing a valid PDF signature."; |
|
$message = $assertion ? $pass : $fail; |
|
$this->addResult($assertion, $message); |
|
} |
|
|
|
/** |
|
* Counts the number of signatures in this PDF file and asserts there are any. |
|
*/ |
|
protected function assertPDFStreamCount() { |
|
$pdf_stream_count = substr_count(bin2hex($this->datastreamContent), '0a73747265616d0a'); |
|
$assertion = $pdf_stream_count !== 0; |
|
$pass = "{$this->datastream} datastream reports the existence of {$pdf_stream_count} PDF streams. Note that an extremely low number could still indicate corruption."; |
|
$fail = "{$this->datastream} datastream contains zero PDF streams, and is likely not a PDF file."; |
|
$message = $assertion ? $pass : $fail; |
|
$this->addResult($assertion, $message); |
|
} |
|
|
|
/** |
|
* Validates the PDF closing tag. |
|
* |
|
* @return bool |
|
* TRUE if it was present; FALSE otherwise. |
|
*/ |
|
protected function assertPDFClosingTag() { |
|
$assertion = strpos(bin2hex($this->datastreamContent), '0a2525454f460a') == TRUE; |
|
$pass = "{$this->datastream} datastream reports the existence of the closing 'EOF' tag required at the end of PDFs"; |
|
$fail = "{$this->datastream} datastream does not contain the closing 'EOF' tag. If this is the only PDF validation that failed, it is likely that derivative generation was interrupted."; |
|
$message = $assertion ? $pass : $fail; |
|
$this->addResult($assertion, $message); |
|
} |
|
} |
|
|
|
/** |
|
* Validates the number of times a string occurs in a datastream. |
|
* |
|
* Requires $this->params to be set to an array containing two keys - the first |
|
* is the string we're looking to find in the datastream, and the second is an |
|
* integer representing the number of times it should appear in the datastream. |
|
*/ |
|
class TextDatastreamValidator extends DatastreamValidator { |
|
/** |
|
* Constructor override; blow up if we don't have our two values. |
|
*/ |
|
public function __construct(IslandoraFedoraObject $object, $datastream, array $params = array()) { |
|
if (count($params) < 2) { |
|
throw new InvalidArgumentException('$params must contain at least two values to instantiate a TextDatastreamValidator.'); |
|
} |
|
parent::__construct($object, $datastream, $params); |
|
} |
|
|
|
/** |
|
* Asserts that the string given appears the correct number of times. |
|
*/ |
|
protected function assertTextStringCount() { |
|
$string_count = self::getTextStringCount(); |
|
list($string, $expected) = $this->params; |
|
$assertion = $string_count === $expected; |
|
$this->addResult($assertion, "{$this->datastream} datastream contains the word(s) '{$string}' repeated {$string_count} time(s) (expected: {$expected})."); |
|
} |
|
|
|
/** |
|
* The number of times key [0] in $this->params appears in the datastream. |
|
* |
|
* @return int |
|
* That count I just mentioned up there. |
|
*/ |
|
protected function getTextStringCount() { |
|
return substr_count($this->datastreamContent, $this->params[0]); |
|
} |
|
} |
|
|
|
/** |
|
* Asserts the validity a WAV datastream. |
|
* |
|
* WAV files contain a rigidly detailed header that contains all sorts of fun |
|
* information we can use to validate things against other things. So, we check |
|
* rigorously that the header contains properly constructed data by looking to |
|
* see if certain values are at their expected byte offset. We also compare |
|
* declared chunk sizes against actual sizes. If any of these are off, WAV |
|
* players will fail to function. |
|
*/ |
|
class WAVDatastreamValidator extends DatastreamValidator { |
|
|
|
/** |
|
* We need a special constructor here to get the hex datastream content. |
|
* |
|
* @param IslandoraFedoraObject $object |
|
* The object to grab the datastream from. |
|
* @param string $datastream |
|
* The DSID of the datastream itself. |
|
* @param array $params |
|
* An extra array of parameters the validator might need. |
|
*/ |
|
public function __construct(IslandoraFedoraObject $object, $datastream, array $params = array()) { |
|
parent::__construct($object, $datastream, $params); |
|
$this->datastreamContent = bin2hex($this->datastreamContent); |
|
} |
|
|
|
/** |
|
* Asserts that the datastream contains a valid WAV signature. |
|
*/ |
|
protected function assertWAVSignature() { |
|
$signatures = str_split(substr($this->datastreamContent, 0, 24), 8); |
|
$assertion = $signatures[0] == '52494646' && $signatures[2] == '57415645'; |
|
$pass = "Header of the {$this->datastream} datastream contains a valid file signature."; |
|
$fail = "Header of the {$this->datastream} datastream contains corrupt file signature."; |
|
$message = $assertion ? $pass : $fail; |
|
$this->addResult($assertion, $message); |
|
} |
|
|
|
/** |
|
* Asserts that the chunksize in the header is correct. |
|
*/ |
|
protected function assertWAVChunkSize() { |
|
$assertion = islandora_hex2int(substr($this->datastreamContent, 8, 8)) === 36 + self::getDataSubChunkSize(); |
|
$pass = "{$this->datastream} datastream chunksize in WAV header is correct"; |
|
$fail = "{$this->datastream} datastream chunksize in WAV header does not match actual chunksize."; |
|
$message = $assertion ? $pass : $fail; |
|
$this->addResult($assertion, $message); |
|
} |
|
|
|
/** |
|
* Asserts that the datastream contains a 'fmt' subchunk. |
|
*/ |
|
protected function assertWAVFmtSubChunk() { |
|
$assertion = substr($this->datastreamContent, 24, 8) === '666d7420'; |
|
$pass = "{$this->datastream} datastream contains a 'fmt' subchunk."; |
|
$fail = "{$this->datastream} datastream is missing the required 'fmt' subchunk."; |
|
$message = $assertion ? $pass : $fail; |
|
$this->addResult($assertion, $message); |
|
} |
|
|
|
/** |
|
* Asserts that the byterate reported by the WAV header is valid. |
|
*/ |
|
protected function assertWAVByteRate() { |
|
$wav_samplerate = islandora_hex2int(substr($this->datastreamContent, 48, 8)); |
|
$assertion = islandora_hex2int(substr($this->datastreamContent, 56, 8)) === $wav_samplerate * self::getNumChannels() * self::getBytesPerSample(); |
|
$pass = "{$this->datastream} datastream byterate in the WAV header is correct."; |
|
$fail = "{$this->datastream} datastream byterate in the WAV header does not match actual calculated byterate."; |
|
$message = $assertion ? $pass : $fail; |
|
$this->addResult($assertion, $message); |
|
} |
|
|
|
/** |
|
* Asserts that the block alignment is correct. |
|
*/ |
|
protected function assertWAVBlockAlignment() { |
|
$assertion = islandora_hex2int(substr($this->datastreamContent, 64, 4)) === self::getNumChannels() * self::getBytesPerSample(); |
|
$pass = "{$this->datastream} datastream block alignment is set correctly."; |
|
$fail = "{$this->datastream} datastream block alignment is off."; |
|
$message = $assertion ? $pass : $fail; |
|
$this->addResult($assertion, $message); |
|
} |
|
|
|
/** |
|
* Asserts the existence of a 'data' subchunk. |
|
* |
|
* Also asserts that the subchunk size is correct. |
|
*/ |
|
protected function assertWAVDataSubChunk() { |
|
if (substr($this->datastreamContent, 72, 8) !== '64617461') { |
|
$this->addResult(FALSE, "{$this->datastream} datastream is missing the 'data' subchunk."); |
|
return; |
|
} |
|
else { |
|
$this->addResult(TRUE, "{$this->datastream} datastream contains 'data' subchunk."); |
|
$wav_numsamples = strlen(substr($this->datastreamContent, 88)) / self::getNumChannels() / self::getBytesPerSample() / 2; |
|
$assertion = self::getDataSubChunkSize() === $wav_numsamples * self::getNumChannels() * self::getBytesPerSample(); |
|
$pass = "{$this->datastream} datastream 'data' chunk is the correct size."; |
|
$fail = "{$this->datastream} datastream 'data' chunk is sized incorrectly."; |
|
$message = $assertion ? $pass : $fail; |
|
$this->addResult($assertion, $message); |
|
} |
|
} |
|
|
|
/** |
|
* Gets the number of channels reported by the WAV header. |
|
* |
|
* @return int |
|
* The number of channels reported by the datastream header. |
|
*/ |
|
protected function getNumChannels() { |
|
return islandora_hex2int(substr($this->datastreamContent, 44, 4)); |
|
} |
|
|
|
/** |
|
* Gets the reported number of byte rates per sample. |
|
* |
|
* @return int |
|
* The number of bytes per sample reported by the datastream header. |
|
*/ |
|
protected function getBytesPerSample() { |
|
return islandora_hex2int(substr($this->datastreamContent, 68, 4)) / 8; |
|
} |
|
|
|
/** |
|
* Gets the size of the 'data' subchunk. |
|
* |
|
* @return int |
|
* The size of the 'data' subchunk. |
|
*/ |
|
protected function getDataSubChunkSize() { |
|
return islandora_hex2int(substr($this->datastreamContent, 80, 8)); |
|
} |
|
} |
|
|
|
/** |
|
* Asserts the validity of any .mp3 datastream. |
|
* |
|
* Our default setup tries to create an MP3 using VBR, but we do some extra |
|
* checks in case someone turns that off. If the header contains the characters |
|
* 'Xing', it is flagged as VBR, and we can do an in-depth check on each of the |
|
* VBR settings. Otherwise, we look for the basic MP3 signature 'fffa' or 'fffb' |
|
* at the start of the binary. |
|
*/ |
|
class MP3DatastreamValidator extends DatastreamValidator { |
|
|
|
/** |
|
* Asserts the validity of the MP3. |
|
* |
|
* The MP3 file format is a bit of a mess; the entire makeup of the file |
|
* depends on whether it uses variable bit rate or static bit rate. So, I'm |
|
* breaking my own rules here and using a single assert function so that I |
|
* can handle the weird logic. |
|
*/ |
|
protected function assertValidMP3() { |
|
$this->datastreamContent = bin2hex($this->datastreamContent); |
|
|
|
// If it's not a VBR MP3, we don't have to check much, so let's get that |
|
// out of the way first before we go doing a bunch of potentially pointless |
|
// math. Check to see if the VBR flag (58696e67) isn't there. |
|
if (strpos($this->datastreamContent, '58696e67') == FALSE && substr($this->datastreamContent, 0, 4) == 'fffa') { |
|
$this->addResult(TRUE, "{$this->datastream} datastream is encoded as a valid MPEG-1 Layer 3 file with CRC protection"); |
|
return; |
|
} |
|
if (strpos($this->datastreamContent, '58696e67') == FALSE && substr($this->datastreamContent, 0, 4) == 'fffb') { |
|
$this->addResult(TRUE, "{$this->datastream} datastream is encoded as a valid unprotected MPEG-1 Layer 3 file"); |
|
return; |
|
} |
|
|
|
// And what if the flag IS set? |
|
if (strpos($this->datastreamContent, '58696e67')) { |
|
// Check the field flags. VBR-formatted MP3 files contain a 32-bit |
|
// integer (stored as $mp3_flag_value) that is a combination of four |
|
// bits, each one indicating the on-off status of a VBR setting, via |
|
// logical OR. Rather than disassembling this value into individual |
|
// bits, we use the algorithm "if (binary_total+bit_value*2)/bit_value*2 |
|
// is greater than or equal to bit_value, that bit is turned on" to find |
|
// the status of each bit, so we know whether to offset the rest. |
|
$mp3_field_offset = array(0, 0, 0); |
|
$mp3_vbrheader = substr($this->datastreamContent, strpos($this->datastreamContent, '58696e67'), 240); |
|
$mp3_flag_value = hexdec(substr($mp3_vbrheader, 8, 8)); |
|
|
|
// We can't use the first flag, but we still need to offset the rest. |
|
if (($mp3_flag_value + 1) % 2 == 0) { |
|
$mp3_field_offset[0] += 8; |
|
$mp3_field_offset[1] += 8; |
|
$mp3_field_offset[2] += 8; |
|
} |
|
|
|
// The second flag leads us to filesize data, which we can verify. |
|
if (($mp3_flag_value + 4) % 4 > 1) { |
|
$mp3_field_bytes = hexdec(substr($mp3_vbrheader, $mp3_field_offset[0] + 16, 8)); |
|
$mp3_size = strlen($this->datastreamContent) / 2; |
|
$assertion = $mp3_size == $mp3_field_bytes; |
|
$pass = "{$this->datastream} datastream reported filesize of {$mp3_size} bytes matches size field value of {$mp3_field_bytes}"; |
|
$fail = "{$this->datastream} datastream reported filesize of {$mp3_size} bytes does not match size field value of {$mp3_field_bytes}"; |
|
$message = $assertion ? $pass : $fail; |
|
$this->addResult($assertion, $message); |
|
$mp3_field_offset[1] += 8; |
|
$mp3_field_offset[2] += 8; |
|
} |
|
|
|
// We can't use the third flag for anything, but we still have to offset. |
|
if (($mp3_flag_value + 8) % 8 > 3) { |
|
$mp3_field_offset[2] += 200; |
|
} |
|
|
|
// The fourth flag leads us to VBR quality data, which we can validate. |
|
if ($mp3_flag_value > 7) { |
|
$mp3_field_quality = hexdec(substr($mp3_vbrheader, $mp3_field_offset[2] + 16, 8)); |
|
$assertion = $mp3_field_quality <= 100 && $mp3_field_quality >= 0; |
|
$pass = "{$this->datastream} datastream reports valid VBR quality of {$mp3_field_quality} (expected: between 0-100)"; |
|
$fail = "{$this->datastream} datastream reports invalid VBR quality of {$mp3_field_quality} (expected: between 0-100)"; |
|
$message = $assertion ? $pass : $fail; |
|
$this->addResult($assertion, $message); |
|
} |
|
} |
|
|
|
// If none of that works out, fail. |
|
else { |
|
$this->addResult(FALSE, "{$this->datastream} datastream is corrupt and does not identify as a valid MP3."); |
|
} |
|
} |
|
|
|
} |
|
|
|
/** |
|
* Attempts to validate an .mp4 datastream. |
|
* |
|
* MP4 files are a subset of the ISO file format specification, and as such need |
|
* to contain a 64-bit declaration of type within the first eight eight bytes of |
|
* the file. This declaration is comprised of the characters 'ftyp', followed by |
|
* a four-character filetype code. Here, we look for 'ftyp', and then pass the |
|
* filetype code to the test message. |
|
*/ |
|
class MP4DatastreamValidator extends DatastreamValidator { |
|
|
|
/** |
|
* Asserts that the datastream is ISO-formatted video. |
|
*/ |
|
protected function assertISOVideo() { |
|
$mp4_ftyp = substr(strpos($this->datastreamContent, 'ftyp'), 4, 4); |
|
$assertion = strpos($this->datastreamContent, 'ftyp') !== 0; |
|
$pass = "{$this->datastream} datastream asserts that it is a valid ISO-formatted video file using ftyp {$mp4_ftyp}"; |
|
$fail = "{$this->datastream} datastream is not a valid ISO-formatted video"; |
|
$message = $assertion ? $pass : $fail; |
|
$this->addResult($assertion, $message); |
|
} |
|
} |
|
|
|
/** |
|
* Attempts to validate an .ogg/ogv datastream using Vorbis and Theora encoding. |
|
* |
|
* OGG files are made up of several 'pages' of OGG data, each prefaced with an |
|
* OGG marker - the letters 'OggS'. The file header also contains information on |
|
* what encoders were used to create the file. Here, we're looking for at least |
|
* one OGG page, and confirming that the file asserts the Theora and Vorbis |
|
* codecs were used to create the file. |
|
*/ |
|
class OGGDatastreamValidator extends DatastreamValidator { |
|
|
|
/** |
|
* Asserts that the datastream contains ogg pages. |
|
*/ |
|
protected function assertOGGPages() { |
|
$ogg_pages = substr_count($this->datastreamContent, 'OggS'); |
|
$assertion = $ogg_pages !== 0; |
|
$pass = "{$this->datastream} datastream asserts that it contains {$ogg_pages} Ogg pages (even a very small file should contain several)."; |
|
$fail = "{$this->datastream} datastream contains no Ogg pages."; |
|
$message = $assertion ? $pass : $fail; |
|
$this->addResult($assertion, $message); |
|
} |
|
|
|
/** |
|
* Asserts that the datastream contains Theora-encoded video. |
|
*/ |
|
protected function assertTheoraVideo() { |
|
$assertion = substr_count($this->datastreamContent, 'theora') !== 0; |
|
$pass = "{$this->datastream} datastream asserts that it contains Theora-encoded video data."; |
|
$fail = "{$this->datastream} datastream contains no marker indicating the presence of Theora-encoded video data."; |
|
$message = $assertion ? $pass : $fail; |
|
$this->addResult($assertion, $message); |
|
} |
|
|
|
/** |
|
* Asserts that the datastream contains Vorbis-encoded audio. |
|
*/ |
|
protected function assertVorbisAudio() { |
|
$assertion = substr_count($this->datastreamContent, 'vorbis') !== 0; |
|
$pass = "{$this->datastream} datastream asserts that it contains Vorbis-encoded audio data"; |
|
$fail = "{$this->datastream} datastream contains no marker indicating the presence of Vorbis-encoded audio data."; |
|
$message = $assertion ? $pass : $fail; |
|
$this->addResult($assertion, $message); |
|
} |
|
} |
|
|
|
/** |
|
* Attempts to validate an .mkv datastream. |
|
* |
|
* There's not much we can do to check an MKV file, since the format is really, |
|
* really loose. We do know a couple of things though - first, since MKV is an |
|
* EBML format, the first four characters will always be the same. Since they're |
|
* non-standard characters, we're looking at their hex values instead. And |
|
* second, we know that the file will contain the declaration 'matroska' soon |
|
* after. |
|
*/ |
|
class MKVDatastreamValidator extends DatastreamValidator { |
|
|
|
/** |
|
* Asserts that the datastream is an EBML-format file. |
|
*/ |
|
protected function assertEBMLFormat() { |
|
$assertion = substr(bin2hex($this->datastreamContent), 0, 8) == '1a45dfa3'; |
|
$pass = "{$this->datastream} datastream asserts that it is an EBML-formatted file"; |
|
$fail = "{$this->datastream} datastream is not an EBML-formatted file."; |
|
$message = $assertion ? $pass : $fail; |
|
$this->addResult($assertion, $message); |
|
} |
|
|
|
/** |
|
* Asserts that the datastream contains a matroska marker. |
|
*/ |
|
protected function assertMatroskaMarker() { |
|
$assertion = substr_count($this->datastreamContent, 'matroska') == 1; |
|
$pass = "{$this->datastream} datastream asserts that its EBML DocType is Matroska"; |
|
$fail = "{$this->datastream} datastream does not contain a Matroska EBML DocType marker."; |
|
$message = $assertion ? $pass : $fail; |
|
$this->addResult($assertion, $message); |
|
} |
|
}
|
|
|