Browse Source

datastream validation objectively rather than procedurally

pull/463/head
qadan 11 years ago
parent
commit
9a7acddf2a
  1. 834
      tests/datastream_validators.inc
  2. 63
      tests/islandora_web_test_case.inc
  3. 10
      tests/scripts/travis_setup.sh

834
tests/datastream_validators.inc

@ -1,59 +1,9 @@
<?php <?php
/** /**
* @file * @file
* Assertions for various datastream types. * Classes and functions for datastream validation.
*
* For a datastream validator to work correctly with IslandoraWebTestCase::
* validateDatastreams(), it needs to return an array of results, each entry of
* which contains two values: first, TRUE or FALSE, depending on whether or not
* that particular result passed or failed, and second, a string containing a
* message to accompany the result.
*
* It also should contain three parameters, all of which may use any label, but
* must be organized in the following order:
* $object - an object that the datastream can be loaded from.
* $datastream - a DSID to pull from $object.
* $optional_params - a parameter for any data the function requires.
*
* When IslandoraWebTestCase::validateDatastreams() is called, it is passed an
* array of datastreams, each of which is itself an array containing the DSID of
* the datastream, the middle of the function name (image, pdf, tiff, etc.), and
* (optional) data to be passed to that third parameter.
*/ */
/**
* A function to pass assertions to and receive results from.
*
* @param bool $assertion
* The if/then statement to validate against.
* @param array $results
* An array of results to append the generated result to.
* @param string $pass
* A message to return if the assertion turns up true.
* @param string $fail
* An optional message to return if the assertion turns up false.
* If left empty, the $pass message will be returned.
*
* @return array
* A result that can be made useful in the validation functions below.
*/
function islandora_assert_valid($assertion, $results, $pass, $fail = NULL) {
if ($assertion) {
$result = array(TRUE, $pass);
}
else {
if (isset($fail)) {
$result = array(FALSE, $fail);
}
else {
$result = array(FALSE, $pass);
}
}
array_push($results, $result);
return $results;
}
/** /**
* Converts a hexidecimal string to an integer. * Converts a hexidecimal string to an integer.
* *
@ -65,6 +15,9 @@ function islandora_assert_valid($assertion, $results, $pass, $fail = NULL) {
* @param string $hex * @param string $hex
* The hexidecimal string. * The hexidecimal string.
* *
* @throws Exception
* if something horrible happens during the actual conversion.
*
* @return bool|int * @return bool|int
* FALSE on failure, or the integer on success. * FALSE on failure, or the integer on success.
*/ */
@ -92,149 +45,370 @@ function islandora_hex2int($hex) {
} }
/** /**
* Asserts that an object's given datastreams are common-type image files. * Abstraction for datastream validators.
*
* Classes extended from DatastreamValidator don't require much to be useful.
* They accept an IslandoraFedoraObject and a DSID to perform assertions on;
* all you have to do is place a series of functions inside the extended class
* using the naming convention assertThing(); each of these functions should
* ideally assert one thing and one thing only (for simplicity's sake), and
* should generate either a pass or a fail message by calling addPass() or
* addFail(). That's it, really; they don't have to return any values, as
* addPass() and addFail() just add messages to the overall pass/fail array.
*
* As long as you use those rules and naming conventions, all the magic is done
* when you instantiate the new datastream validator object.
*
* The IslandoraWebTestCase::assertDatastreams() function accepts paired DSIDs
* and datastream validator names in order to do the rest of the work. It grabs
* all the test results using getPasses() and getFails() and transforms those
* into something that DrupalWebTestCase can use.
*/
abstract class DatastreamValidator {
/**
* This class is skipped when looking for the source of an assertion.
* *
* Uses PHPGD to run the assertion check. This means that only certain kinds * @see DrupalWebTestCase::skipClasses
* of image files can be checked. Please check the documentation for the PHPGD */
* imagecreatefromstring() function to determine what filetypes are valid. protected $skipClasses = array(__CLASS__ => TRUE);
/**
* The IslandoraFedoraObject containing the datastream to test.
*
* @var IslandoraFedoraObject
*/
public $object;
/**
* The DSID of the string to test.
*
* @var string
*/
public $datastream;
/**
* The content of the datastream.
*
* @var string[]
*/
public $datastreamContent;
/**
* An associative array of messages returned from passed tests, and callers.
*
* This should only be added to using $this->addPass(), so that the caller can
* be appropriately determined.
*
* @var array
*/
public $passes = array();
/**
* An associative array of messages returned from failed tests, and callers.
* *
* @param AbstractObject $object * This should only be added to using $this->addFail(), so that the caller can
* The PID of the object. * be appropriately determined.
*
* @var array
*/
public $fails = array();
/**
* An array of additional required parameters.
*
* @var array
*/
public $params = array();
/**
* Constructs a DatastreamValidator.
*
* @param IslandoraFedoraObject $object
* The object to grab the datastream from.
* @param string $datastream * @param string $datastream
* A DSID to check that corresponds to a PHPGD-valid image datastream. * The DSID of the datastream itself.
* @param array $params
* An extra array of parameters the validator might need.
*/
public function __construct($object, $datastream, array $params = array()) {
$this->object = $object;
$this->datastream = $datastream;
$this->params = $params;
$this->datastreamContent = $object[$datastream]->content;
$this->runValidators();
}
/**
* Helper function to run all the validators in a class.
* *
* @return array * On DatastreamValidator::__construct(), this looks for any functions
* A series of TRUE(pass)/FALSE(fail) results paired with result messages. * within the class beginning in "assert" and runs them. In all current cases
* (and realistically in all future cases), this adds one or more passes or
* fails to $this->passes and/or $this->fails.
*/ */
function islandora_validate_image_datastream($object, $datastream) { public function runValidators() {
$datastream_string = $object[$datastream]->content; $methods = get_class_methods($this);
$results = array(); foreach ($methods as $method) {
$pass = "Image datastream {$datastream} is valid."; if (substr($method, 0, 6) === 'assert') {
$fail = "Image datastream {$datastream} is either invalid or corrupt."; $this->$method();
$results = islandora_assert_valid(imagecreatefromstring($datastream_string), $results, $pass, $fail); }
return $results; }
} }
/** /**
* Asserts the validity of any .tif/.tiff datastream. * Returns an array of pass messages.
*
* @return string[]
* The pass messages.
*/
public function getPasses() {
return $this->passes;
}
/**
* Returns an array of fail messages.
* *
* Does not use the islandora_assert_valid() function, as this is not a simple * @return string[]
* true/false. * The fail messages.
*/
public function getFails() {
return $this->fails;
}
/**
* Adds a pass to $this->pass.
* *
* @param AbstractObject $object * Passes are an associative array of messages and callers. Callers should be
* The PID of the object. * obtained using $this->getAssertionCall().
* @param string $datastream *
* A DSID to check that corresponds to a .tif/.tiff datastream. * @param string $message
* The message to use.
*/
public function addPass($message) {
$this->passes[$message] = $this->getAssertionCall();
}
/**
* Adds a fail to $this->fail.
*
* Fails are an associative array of messages and callers. Callers should be
* obtained using $this->getAssertionCall().
*
* @param string $message
* The message to use.
*/
public function addFail($message) {
$this->fails[$message] = $this->getAssertionCall();
}
/**
* Cycles through backtrace until the first non-assertion method is found.
*
* This is a manipulated version of DrupalWebTestCase::getAssertionCall().
* We use it here so that we can pass back assertion calls from
* DatastreamValidator assertions instead of less useful TestCase functions.
* *
* @return array * @return array
* A series of TRUE(pass)/FALSE(fail) results paired with result messages. * Array representing the true caller.
*/
protected function getAssertionCall() {
$backtrace = debug_backtrace();
// While the current caller's function starts with 'assert', and another one
// exists after this function, keep poppin' em off.
while (substr($backtrace[1]['function'], 0, 6) !== 'assert' && isset($backtrace[2])) {
array_shift($backtrace);
}
return _drupal_get_last_caller($backtrace);
}
}
/**
* Asserts that an object's given datastreams are common-type image files.
*
* Uses PHPGD to run the assertion check. This means that only certain kinds
* of image files can be checked. Please check the documentation for the PHPGD
* imagecreatefromstring() function to determine what filetypes are valid.
*/
class ImageDatastreamValidator extends DatastreamValidator {
/**
* Asserts the validity of an image using PHPGD.
*/
protected function assertImageGeneration() {
if (imagecreatefromstring($this->datastreamContent) !== FALSE) {
$this->addPass("Image datastream {$this->datastream} is valid.");
}
else {
$this->addFail("Image datastream {$this->datastream} is either invalid or corrupt.");
}
}
}
/**
* Asserts the validity of any .tif/.tiff datastream.
*/ */
function islandora_validate_tiff_datastream($object, $datastream) { class TIFFDatastreamValidator extends DatastreamValidator {
$datastream_string = $object[$datastream]->content;
$datastream_header_hex = substr(bin2hex($datastream_string), 0, 8); /**
$results = array(); * Asserts that the TIFF contains an appropriate header.
*/
public function assertTIFFHeaderHex() {
$datastream_header_hex = self::getTIFFHeaderHex();
if ($datastream_header_hex == "49492a00") { if ($datastream_header_hex == "49492a00") {
// In this case, the ingested TIFF is designated as using the "Intel // In this case, the ingested TIFF is designated as using the "Intel
// byte-order" (e.g. little-endian) by starting with the characters "II" // byte-order" (i.e. little-endian) by starting with the characters "II"
// (repeated so that byte order does not yet need to be significant). // (repeated so that byte order does not yet need to be significant).
// The number that follows is '42' in little-endian hex, a number of // The number that follows is '42' in little-endian hex, a number of
// 'deep philosophical significance' to the TIFF format creators. // 'deep philosophical significance' to the TIFF format creators.
array_push($results, array(TRUE, "{$datastream} datastream asserts that it is a valid Intel-byte-orderded TIF/TIFF file.")); $this->addPass("{$this->datastream} datastream asserts that it is a valid Intel-byte-orderded TIF/TIFF file.");
} }
elseif ($datastream_header_hex == "4d4d002a") { elseif ($datastream_header_hex == "4d4d002a") {
// In this case, the ingested TIFF is designated as using the "Motorola // In this case, the ingested TIFF is designated as using the "Motorola
// byte-order" (e.g. big-endian) by starting with the characters "MM" // byte-order" (i.e. big-endian) by starting with the characters "MM"
// instead. 42 follows once again, this time in big-endian hex. // instead. 42 follows once again, this time in big-endian hex.
array_push($results, array(TRUE, "{$datastream} datastream asserts that it is a valid Motorola-byte-ordered TIF/TIFF file.")); $this->addPass("{$this->datastream} datastream asserts that it is a valid Motorola-byte-ordered TIF/TIFF file.");
} }
else { else {
array_push($results, array(FALSE, "{$datastream} datastream does not assert that it is a valid TIF/TIFF file.")); $this->addFail("{$this->datastream} datastream does not assert that it is a valid TIF/TIFF file.");
} }
return $results;
} }
/** /**
* Asserts the validity of any .jp2 datastream. * Grabs the first 8 characters from the TIFF datastream's hex.
*
* @param AbstractObject $object
* The PID of the object.
* @param string $datastream
* A DSID to check that corresponds to a .jp2 datastream.
* *
* @return array * @return string
* A series of TRUE(pass)/FALSE(fail) results paired with result messages. * The ... thing I just wrote up there.
*/ */
function islandora_validate_jp2_datastream($object, $datastream) { protected function getTIFFHeaderHex() {
$datastream_hex = bin2hex($object[$datastream]->content); return substr(bin2hex($this->datastreamContent), 0, 8);
$results = array(); }
// JP2 files begin with an offset header at the second 32-bit integer,
// 0x6A502020. This header is in all .jp2s, and we check for it here.
$pass = "{$datastream} datastream begins correctly with the appropriate .jp2 header.";
$fail = "{$datastream} datastream does not begin with the appropriate .jp2 header.";
$results = islandora_assert_valid(substr($datastream_hex, 8, 8) == '6a502020', $results, $pass, $fail);
// JP2 files have their codestream capped with a marker, 0xFFD9. We're
// just checking for it here to see if the .jp2 encoder finished okay.
$pass = "{$datastream} datastream ends correctly with the appropriate .jp2 marker.";
$fail = "{$datastream} datastream does not end with a .jp2 marker; derivative generation was likely interrupted.";
$results = islandora_assert_valid(substr($datastream_hex, strlen($datastream_hex) - 4, 4) == 'ffd9', $results, $pass, $fail);
return $results;
} }
/** /**
* Asserts the validity of any .pdf datastream. * Asserts the validity of a JP2 datastream.
*/
class JP2DatastreamValidator extends DatastreamValidator {
/**
* Asserts the hex values at the head of the JP2 file.
* *
* @param AbstractObject $object * JP2 files begin with an offset header at the second 32-bit integer,
* The PID of the object. * 0x6A502020. This header is in all .jp2s, and we check for it here.
* @param string $datastream */
* A DSID to check that corresponds to a .pdf datastream. protected function assertJP2Header() {
if (substr(bin2hex($this->datastreamContent), 8, 8) == '6a502020') {
$this->addPass("Datastream {$this->datastream} contains the appropriate JP2 header.");
}
else {
$this->addFail("Datastream {$this->datastream} does not contain the appropriate JP2 header.");
}
}
/**
* Asserts the marker at the end of the JP2 file.
* *
* @return array * JP2 files have their codestream capped with a marker, 0xFFD9. We're just
* A series of TRUE(pass)/FALSE(fail) results paired with result messages. * checking for it here to see if the .jp2 encoder finished okay.
*/ */
function islandora_validate_pdf_datastream($object, $datastream) { protected function assertJP2Marker() {
$pdf = $object[$datastream]->content; if (substr(bin2hex($this->datastreamContent), strlen(bin2hex($this->datastreamContent)) - 4, 4) == 'ffd9') {
$pdf_version = substr($pdf, 5, 3); $this->addPass("Datastream {$this->datastream} contains the appropriate JP2 ending marker.");
$results = array(); }
$pass = "{$datastream} datastream asserts that it is a valid PDF file using PDF version {$pdf_version}"; else {
$fail = "{$datastream} datastream binary header appears to be corrupt and missing a valid PDF signature."; $this->addFail("Datastream {$this->datastream} does not contain the appropriate JP2 ending marker. If this is the only JP2 validator that failed, it is likely that derivative generation was interrupted.");
$results = islandora_assert_valid(substr($pdf, 0, 5) == '%PDF-', $results, $pass, $fail); }
}
}
$pdf_streams = substr_count(bin2hex($pdf), '0a73747265616d0a'); /**
$pass = "{$datastream} datastream reports the existence of {$pdf_streams} PDF streams. Note that an extremely low number could still indicate corruption."; * Asserts the validity of a PDF datastream.
$fail = "{$datastream} datastream contains zero PDF streams, and is likely not a PDF file."; */
$results = islandora_assert_valid($pdf_streams, $results, $pass, $fail); class PDFDatastreamValidator extends DatastreamValidator {
$pass = "{$datastream} datastream reports the existence of the closing 'EOF' tag required at the end of PDFs"; /**
$fail = "{$datastream} datastream does not contain the closing 'EOF' tag. If this is the only PDF validation that failed, it is likely that derivative generation was interrupted."; * Validates the PDF signature.
$results = islandora_assert_valid(strpos(bin2hex($pdf), '0a2525454f460a'), $results, $pass, $fail); */
return $results; protected function assertPDFSignature() {
if (substr($this->datastreamContent, 0, 5) == '%PDF-') {
$pdf_version = substr($this->datastreamContent, 5, 3);
$this->addPass("{$this->datastream} datastream asserts that it is a valid PDF file using PDF version {$pdf_version}");
}
else {
$this->addFail("{$this->datastream} datastream binary header appears to be corrupt and missing a valid PDF signature.");
}
} }
/** /**
* Asserts that a string of text shows up inside a datastream. * Counts the number of signatures in this PDF file and asserts there are any.
*/
protected function assertPDFStreamCount() {
$pdf_stream_count = substr_count(bin2hex($this->datastreamContent), '0a73747265616d0a');
if ($pdf_stream_count !== 0) {
$this->addPass("{$this->datastream} datastream reports the existence of {$pdf_stream_count} PDF streams. Note that an extremely low number could still indicate corruption.");
}
else {
$this->addFail("{$this->datastream} datastream contains zero PDF streams, and is likely not a PDF file.");
}
}
/**
* Validates the PDF closing tag.
* *
* @param AbstractObject $object * @return bool
* The PID of the object. * TRUE if it was present; FALSE otherwise.
* @param string $datastream */
* A DSID to check that corresponds to a datastream containing text. protected function assertPDFClosingTag() {
* @param array $text if (strpos(bin2hex($this->datastreamContent), '0a2525454f460a')) {
* An array of strings/the number of times it should appear in the datastream. $this->addPass("{$this->datastream} datastream reports the existence of the closing 'EOF' tag required at the end of PDFs");
}
else {
$this->addFail("{$this->datastream} datastream does not contain the closing 'EOF' tag. If this is the only PDF validation that failed, it is likely that derivative generation was interrupted.");
}
}
}
/**
* Validates the number of times a string occurs in a datastream.
* *
* @return array * Requires $this->params to be set to an array containing two keys - the first
* A series of TRUE(pass)/FALSE(fail) results paired with result messages. * is the string we're looking to find in the datastream, and the second is an
* integer representing the number of times it should appear in the datastream.
*/
class TextDatastreamValidator extends DatastreamValidator {
/**
* Asserts that the string given appears the correct number of times.
*/ */
function islandora_validate_text_datastream($object, $datastream, array $text) { protected function assertTextStringCount() {
$results = array(); if (!isset($this->params[1])) {
$content = $object[$datastream]->content; $this->addFail("TextDatastreamValidator cannot be instantiated without two keys in the 'params' variable.");
$string_count = substr_count($content, $text[0]); return;
$pass = "{$datastream} datastream contains the word(s) '{$text[0]}' repeated {$string_count} time(s) (expected: {$text[1]})."; }
$fail = "{$datastream} datastream contains the word(s) '{$text[0]}' repeated {$string_count} time(s) (expected: {$text[1]})."; $string_count = self::getTextStringCount();
$results = islandora_assert_valid($string_count == $text[1], $results, $pass, $fail); $expected = $this->params[1];
return $results; $function = $string_count === $expected ? 'addPass' : 'addFail';
$this->$function("{$this->datastream} datastream contains the word(s) '{$this->params[0]}' repeated {$string_count} time(s) (expected: {$expected}).");
} }
/** /**
* Asserts the validity of any .wav datastraeam. * The number of times key [0] in $this->params appears in the datastream.
*
* @return int
* That count I just mentioned up there.
*/
protected function getTextStringCount() {
return substr_count($this->datastreamContent, $this->params[0]);
}
}
/**
* Asserts the validity a WAV datastream.
* *
* WAV files contain a rigidly detailed header that contains all sorts of fun * WAV files contain a rigidly detailed header that contains all sorts of fun
* information we can use to validate things against other things. So, we check * information we can use to validate things against other things. So, we check
@ -242,54 +416,140 @@ function islandora_validate_text_datastream($object, $datastream, array $text) {
* see if certain values are at their expected byte offset. We also compare * see if certain values are at their expected byte offset. We also compare
* declared chunk sizes against actual sizes. If any of these are off, WAV * declared chunk sizes against actual sizes. If any of these are off, WAV
* players will fail to function. * players will fail to function.
*/
class WAVDatastreamValidator extends DatastreamValidator {
/**
* We need a special constructor here to get the hex datastream content.
* *
* @param AbstractObject $object * @param IslandoraFedoraObject $object
* The PID of the object. * The object to grab the datastream from.
* @param string $datastream * @param string $datastream
* A DSID to check that corresponds to a datastream generated via OCR or HOCR. * The DSID of the datastream itself.
* * @param array $params
* @return array * An extra array of parameters the validator might need.
* A series of TRUE(pass)/FALSE(fail) results paired with result messages.
*/ */
function islandora_validate_wav_datastream($object, $datastream) { public function __construct($object, $datastream, array $params = array()) {
$results = array(); $this->object = $object;
$wav = bin2hex($object['OBJ']->content); $this->datastream = $datastream;
$wav_subchunk2size = islandora_hex2int(substr($wav, 80, 8)); $this->params = $params;
$wav_samplerate = islandora_hex2int(substr($wav, 48, 8)); $this->datastreamContent = bin2hex($object[$datastream]->content);
$wav_numchannels = islandora_hex2int(substr($wav, 44, 4)); $this->runValidators();
$wav_bytespersample = islandora_hex2int(substr($wav, 68, 4)) / 8; }
$wav_numsamples = strlen(substr($wav, 88)) / $wav_numchannels / $wav_bytespersample / 2;
$magic_number = str_split(substr($wav, 0, 24), 8);
$pass = "Header of the {$datastream} datastream contains correct file signature"; /**
$fail = "Header of the {$datastream} datastream contains corrupt file signature"; * Asserts that the datastream contains a valid WAV signature.
$results = islandora_assert_valid($magic_number[0] = '52494646' && $magic_number[2] = '57415645', $results, $pass, $fail); */
protected function assertWAVSignature() {
$signatures = str_split(substr($this->datastreamContent, 0, 24), 8);
if ($signatures[0] = '52494646' && $signatures[2] = '57415645') {
$this->addPass("Header of the {$this->datastream} datastream contains a valid file signature.");
}
else {
$this->addFail("Header of the {$this->datastream} datastream contains corrupt file signature.");
}
}
$pass = "{$datastream} datastream chunksize in WAV header is correct"; /**
$fail = "{$datastream} datastream chunksize in WAV header does not match actual chunksize."; * Asserts that the chunksize in the header is correct.
$results = islandora_assert_valid(islandora_hex2int(substr($wav, 8, 8)) === 36 + $wav_subchunk2size, $results, $pass, $fail); */
protected function assertWAVChunkSize() {
if (islandora_hex2int(substr($this->datastreamContent, 8, 8)) === 36 + self::getDataSubChunkSize()) {
$this->addPass("{$this->datastream} datastream chunksize in WAV header is correct");
}
else {
$this->addFail("{$this->datastream} datastream chunksize in WAV header does not match actual chunksize.");
}
}
$pass = "{$datastream} datastream contains a 'fmt' subchunk."; /**
$fail = "{$datastream} datastream is missing the required 'fmt' subchunk."; * Asserts that the datastream contains a 'fmt' subchunk.
$results = islandora_assert_valid(substr($wav, 24, 8) === '666d7420', $results, $pass, $fail); */
protected function assertWAVFmtSubChunk() {
if (substr($this->datastreamContent, 24, 8) === '666d7420') {
$this->addPass("{$this->datastream} datastream contains a 'fmt' subchunk.");
}
else {
$this->addFail("{$this->datastream} datastream is missing the required 'fmt' subchunk.");
}
}
$pass = "{$datastream} datastream byterate in the WAV header is correct."; /**
$fail = "{$datastream} datastream byterate in the WAV header does not match actual calculated byterate."; * Asserts that the byterate reported by the WAV header is valid.
$results = islandora_assert_valid(islandora_hex2int(substr($wav, 56, 8)) === $wav_samplerate * $wav_numchannels * $wav_bytespersample, $results, $pass, $fail); */
protected function assertWAVByteRate() {
$wav_samplerate = islandora_hex2int(substr($this->datastreamContent, 48, 8));
if (islandora_hex2int(substr($this->datastreamContent, 56, 8)) === $wav_samplerate * self::getNumChannels() * self::getBytesPerSample()) {
$this->addPass("{$this->datastream} datastream byterate in the WAV header is correct.");
}
else {
$this->addFail("{$this->datastream} datastream byterate in the WAV header does not match actual calculated byterate.");
}
}
$pass = "{$datastream} datastream block alignment is set correctly."; /**
$fail = "{$datastream} datastream block alignment is off."; * Asserts that the block alignment is correct.
$results = islandora_assert_valid(islandora_hex2int(substr($wav, 64, 4)) === $wav_numchannels * $wav_bytespersample, $results, $pass, $fail); */
protected function assertWAVBlockAlignment() {
if (islandora_hex2int(substr($this->datastreamContent, 64, 4)) === self::getNumChannels() * self::getBytesPerSample()) {
$this->addPass("{$this->datastream} datastream block alignment is set correctly.");
}
else {
$this->addFail("{$this->datastream} datastream block alignment is off.");
}
}
$pass = "{$datastream} datastream contains 'data' subchunk."; /**
$fail = "{$datastream} datastream is missing the 'data' subchunk."; * Asserts the existence of a 'data' subchunk.
$results = islandora_assert_valid(substr($wav, 72, 8) === '64617461', $results, $pass, $fail); *
* Also asserts that the subchunk size is correct.
*/
protected function assertWAVDataSubChunk() {
if (substr($this->datastreamContent, 72, 8) !== '64617461') {
$this->addFail("{$this->datastream} datastream is missing the 'data' subchunk.");
return;
}
else {
$this->addPass("{$this->datastream} datastream contains 'data' subchunk.");
$wav_numsamples = strlen(substr($this->datastreamContent, 88)) / self::getNumChannels() / self::getBytesPerSample() / 2;
if (self::getDataSubChunkSize() === $wav_numsamples * self::getNumChannels() * self::getBytesPerSample()) {
$this->addPass("{$this->datastream} datastream 'data' chunk is the correct size.");
}
else {
$this->addFail("{$this->datastream} datastream 'data' chunk is sized incorrectly.");
}
}
}
$pass = "{$datastream} datastream 'data' chunk is the correct size."; /**
$fail = "{$datastream} datastream 'data' chunk is sized incorrectly."; * Gets the number of channels reported by the WAV header.
$results = islandora_assert_valid($wav_subchunk2size === $wav_numsamples * $wav_numchannels * $wav_bytespersample, $results, $pass, $fail); *
* @return int
* The number of channels reported by the datastream header.
*/
protected function getNumChannels() {
return islandora_hex2int(substr($this->datastreamContent, 44, 4));
}
return $results; /**
* Gets the reported number of byte rates per sample.
*
* @return int
* The number of bytes per sample reported by the datastream header.
*/
protected function getBytesPerSample() {
return islandora_hex2int(substr($this->datastreamContent, 68, 4)) / 8;
}
/**
* Gets the size of the 'data' subchunk.
*
* @return int
* The size of the 'data' subchunk.
*/
protected function getDataSubChunkSize() {
return islandora_hex2int(substr($this->datastreamContent, 80, 8));
}
} }
/** /**
@ -300,24 +560,34 @@ function islandora_validate_wav_datastream($object, $datastream) {
* 'Xing', it is flagged as VBR, and we can do an in-depth check on each of the * 'Xing', it is flagged as VBR, and we can do an in-depth check on each of the
* VBR settings. Otherwise, we look for the basic MP3 signature 'fffa' or 'fffb' * VBR settings. Otherwise, we look for the basic MP3 signature 'fffa' or 'fffb'
* at the start of the binary. * at the start of the binary.
*/
class MP3DatastreamValidator extends DatastreamValidator {
/**
* Asserts the validity of the MP3.
* *
* @param AbstractObject $object * The MP3 file format is a bit of a mess; the entire makeup of the file
* The PID of the object. * depends on whether it uses variable bit rate or static bit rate. So, I'm
* @param string $datastream * breaking my own rules here and using a single assert function so that I
* A DSID of a datastream corresponding to an mp3 file. * can handle the weird logic.
*
* @return array
* A series of TRUE(pass)/FALSE(fail) results paired with result messages.
*/ */
function islandora_validate_mp3_datastream($object, $datastream) { protected function assertValidMP3() {
$results = array(); $this->datastreamContent = bin2hex($this->datastreamContent);
$mp3 = bin2hex($object[$datastream]->content);
$mp3_size = strlen($mp3) / 2;
// Looks to see if VBR was set properly by LAME. If so, MATH TIME! // If it's not a VBR MP3, we don't have to check much, so let's get that
if (strpos($mp3, '58696e67')) { // out of the way first before we go doing a bunch of potentially pointless
$mp3_vbrheader = substr($mp3, strpos($mp3, '58696e67'), 240); // math. Check to see if the VBR flag (58696e67) isn't there.
if (strpos($this->datastreamContent, '58696e67') == FALSE && substr($this->datastreamContent, 0, 4) == 'fffa') {
$this->addPass("{$this->datastream} datastream is encoded as a valid MPEG-1 Layer 3 file with CRC protection");
return;
}
if (strpos($this->datastreamContent, '58696e67') == FALSE && substr($this->datastreamContent, 0, 4) == 'fffb') {
$this->addPass("{$this->datastream} datastream is encoded as a valid unprotected MPEG-1 Layer 3 file");
return;
}
// And what if the flag IS set?
if (strpos($this->datastreamContent, '58696e67')) {
// Check the field flags. VBR-formatted MP3 files contain a 32-bit // Check the field flags. VBR-formatted MP3 files contain a 32-bit
// integer (stored as $mp3_flag_value) that is a combination of four // integer (stored as $mp3_flag_value) that is a combination of four
// bits, each one indicating the on-off status of a VBR setting, via // bits, each one indicating the on-off status of a VBR setting, via
@ -326,6 +596,7 @@ function islandora_validate_mp3_datastream($object, $datastream) {
// is greater than or equal to bit_value, that bit is turned on" to find // is greater than or equal to bit_value, that bit is turned on" to find
// the status of each bit, so we know whether to offset the rest. // the status of each bit, so we know whether to offset the rest.
$mp3_field_offset = array(0, 0, 0); $mp3_field_offset = array(0, 0, 0);
$mp3_vbrheader = substr($this->datastreamContent, strpos($this->datastreamContent, '58696e67'), 240);
$mp3_flag_value = hexdec(substr($mp3_vbrheader, 8, 8)); $mp3_flag_value = hexdec(substr($mp3_vbrheader, 8, 8));
// We can't use the first flag, but we still need to offset the rest. // We can't use the first flag, but we still need to offset the rest.
@ -338,14 +609,18 @@ function islandora_validate_mp3_datastream($object, $datastream) {
// The second flag leads us to filesize data, which we can verify. // The second flag leads us to filesize data, which we can verify.
if (($mp3_flag_value + 4) % 4 > 1) { if (($mp3_flag_value + 4) % 4 > 1) {
$mp3_field_bytes = hexdec(substr($mp3_vbrheader, $mp3_field_offset[0] + 16, 8)); $mp3_field_bytes = hexdec(substr($mp3_vbrheader, $mp3_field_offset[0] + 16, 8));
$pass = "{$datastream} datastream reported filesize of {$mp3_size} bytes matches size field value of {$mp3_field_bytes}"; $mp3_size = strlen($this->datastreamContent) / 2;
$fail = "{$datastream} datastream reported filesize of {$mp3_size} bytes does not match size field value of {$mp3_field_bytes}"; if ($mp3_size == $mp3_field_bytes) {
$results = islandora_assert_valid($mp3_size == $mp3_field_bytes, $results, $pass, $fail); $this->addPass("{$this->datastream} datastream reported filesize of {$mp3_size} bytes matches size field value of {$mp3_field_bytes}");
}
else {
$this->addFail("{$this->datastream} datastream reported filesize of {$mp3_size} bytes does not match size field value of {$mp3_field_bytes}");
}
$mp3_field_offset[1] += 8; $mp3_field_offset[1] += 8;
$mp3_field_offset[2] += 8; $mp3_field_offset[2] += 8;
} }
// We can't use the third flag for anything either. // We can't use the third flag for anything, but we still have to offset.
if (($mp3_flag_value + 8) % 8 > 3) { if (($mp3_flag_value + 8) % 8 > 3) {
$mp3_field_offset[2] += 200; $mp3_field_offset[2] += 200;
} }
@ -353,23 +628,21 @@ function islandora_validate_mp3_datastream($object, $datastream) {
// The fourth flag leads us to VBR quality data, which we can validate. // The fourth flag leads us to VBR quality data, which we can validate.
if ($mp3_flag_value > 7) { if ($mp3_flag_value > 7) {
$mp3_field_quality = hexdec(substr($mp3_vbrheader, $mp3_field_offset[2] + 16, 8)); $mp3_field_quality = hexdec(substr($mp3_vbrheader, $mp3_field_offset[2] + 16, 8));
$pass = "{$datastream} datastream reports valid VBR quality of {$mp3_field_quality} (expected: between 0-100)"; if ($mp3_field_quality <= 100 && $mp3_field_quality >= 0) {
$fail = "{$datastream} datastream reports invalid VBR quality of {$mp3_field_quality} (expected: between 0-100)"; $this->addPass("{$this->datastream} datastream reports valid VBR quality of {$mp3_field_quality} (expected: between 0-100)");
$results = islandora_assert_valid($mp3_field_quality <= 100 && $mp3_field_quality >= 0, $results, $pass, $fail);
} }
else {
$this->addFail("{$this->datastream} datastream reports invalid VBR quality of {$mp3_field_quality} (expected: between 0-100)");
} }
// Otherwise, just forget everything and check the file signature.
elseif (strpos($mp3, '58696e67') == FALSE && substr($mp3, 0, 4) == 'fffa') {
$results = array(array(TRUE, "{$datastream} datastream is encoded as a valid MPEG-1 Layer 3 file with CRC protection"));
} }
elseif (strpos($mp3, '58696e67') == FALSE && substr($mp3, 0, 4) == 'fffb') {
$results = array(array(TRUE, "{$datastream} datastream is encoded as a valid unprotected MPEG-1 Layer 3 file"));
} }
// If none of that works out, fail.
else { else {
$results = array(array(FALSE, "{$datastream} datastream is corrupt and does not identify as a valid MP3.")); $this->addFail("{$this->datastream} datastream is corrupt and does not identify as a valid MP3.");
} }
return $results; }
} }
/** /**
@ -378,27 +651,23 @@ function islandora_validate_mp3_datastream($object, $datastream) {
* MP4 files are a subset of the ISO file format specification, and as such need * MP4 files are a subset of the ISO file format specification, and as such need
* to contain a 64-bit declaration of type within the first eight eight bytes of * to contain a 64-bit declaration of type within the first eight eight bytes of
* the file. This declaration is comprised of the characters 'ftyp', followed by * the file. This declaration is comprised of the characters 'ftyp', followed by
* a four-character filetype code. Below, we look for 'ftyp', and then pass the * a four-character filetype code. Here, we look for 'ftyp', and then pass the
* filetype code to the test message. * filetype code to the test message.
*
* @param AbstractObject $object
* The PID of the object.
* @param string $datastream
* A DSID of a datastream corresponding to an mp4 file.
*
* @return array
* A series of TRUE(pass)/FALSE(fail) results paired with result messages.
*/ */
function islandora_validate_mp4_datastream($object, $datastream) { class MP4DatastreamValidator extends DatastreamValidator {
$results = array();
$mp4 = $object[$datastream]->content; /**
if (strpos($mp4, 'ftyp')) { * Asserts that the datastream is ISO-formatted video.
$mp4_ftyp = substr(strpos($mp4, 'ftyp'), 4, 4); */
protected function assertISOVideo() {
if (strpos($this->datastreamContent, 'ftyp')) {
$mp4_ftyp = substr(strpos($this->datastreamContent, 'ftyp'), 4, 4);
$this->addPass("{$this->datastream} datastream asserts that it is a valid ISO-formatted video file using ftyp {$mp4_ftyp}");
}
else {
$this->addFail("{$this->datastream} datastream is not a valid ISO-formatted video");
}
} }
$pass = "{$datastream} datastream asserts that it is a valid ISO-formatted video file using ftyp {$mp4_ftyp}";
$fail = "{$datastream} datastream is not a valid ISO-formatted video";
$results = islandora_assert_valid(strpos($mp4, 'ftyp'), $results, $pass, $fail);
return $results;
} }
/** /**
@ -409,33 +678,45 @@ function islandora_validate_mp4_datastream($object, $datastream) {
* what encoders were used to create the file. Here, we're looking for at least * what encoders were used to create the file. Here, we're looking for at least
* one OGG page, and confirming that the file asserts the Theora and Vorbis * one OGG page, and confirming that the file asserts the Theora and Vorbis
* codecs were used to create the file. * codecs were used to create the file.
*
* @param AbstractObject $object
* The PID of the object.
* @param string $datastream
* A DSID of a datastream corresponding to an ogg file.
*
* @return array
* A series of TRUE(pass)/FALSE(fail) results paired with result messages.
*/ */
function islandora_validate_ogg_datastream($object, $datastream) { class OGGDatastreamValidator extends DatastreamValidator {
$results = array();
$ogg = $object[$datastream]->content;
$ogg_pages = substr_count($ogg, 'OggS');
$pass = "{$datastream} datastream asserts that it contains {$ogg_pages} Ogg pages (even a very small file should contain several)."; /**
$fail = "{$datastream} datastream contains no Ogg pages."; * Asserts that the datastream contains ogg pages.
$results = islandora_assert_valid(substr_count($ogg, 'OggS'), $results, $pass, $fail); */
protected function assertOGGPages() {
$pass = "{$datastream} datastream asserts that it contains Theora-encoded video data."; $ogg_pages = substr_count($this->datastreamContent, 'OggS');
$fail = "{$datastream} datastream contains no marker indicating the presence of Theora-encoded video data."; if ($ogg_pages !== 0) {
$results = islandora_assert_valid(substr_count($ogg, 'theora'), $results, $pass, $fail); $this->addPass("{$this->datastream} datastream asserts that it contains {$ogg_pages} Ogg pages (even a very small file should contain several).");
}
else {
$this->addFail("{$this->datastream} datastream contains no Ogg pages.");
}
}
$pass = "{$datastream} datastream asserts that it contains Vorbis-encoded audio data"; /**
$fail = "{$datastream} datastream contains no marker indicating the presence of Vorbis-encoded audio data."; * Asserts that the datastream contains Theora-encoded video.
$results = islandora_assert_valid(substr_count($ogg, 'vorbis'), $results, $pass, $fail); */
protected function assertTheoraVideo() {
if (substr_count($this->datastreamContent, 'theora') !== 0) {
$this->addPass("{$this->datastream} datastream asserts that it contains Theora-encoded video data.");
}
else {
$this->addFail("{$this->datastream} datastream contains no marker indicating the presence of Theora-encoded video data.");
}
}
return $results; /**
* Asserts that the datastream contains Vorbis-encoded audio.
*/
protected function assertVorbisAudio() {
if (substr_count($this->datastreamContent, 'vorbis')) {
$this->addPass("{$this->datastream} datastream asserts that it contains Vorbis-encoded audio data");
}
else {
$this->addFail("{$this->datastream} datastream contains no marker indicating the presence of Vorbis-encoded audio data.");
}
}
} }
/** /**
@ -446,28 +727,31 @@ function islandora_validate_ogg_datastream($object, $datastream) {
* EBML format, the first four characters will always be the same. Since they're * EBML format, the first four characters will always be the same. Since they're
* non-standard characters, we're looking at their hex values instead. And * non-standard characters, we're looking at their hex values instead. And
* second, we know that the file will contain the declaration 'matroska' soon * second, we know that the file will contain the declaration 'matroska' soon
* after. We could look for this in the binary, but we already have the hex- * after.
* translated version, so we just look for 'matroska' in hex.
*
* @param AbstractObject $object
* The PID of the object.
* @param string $datastream
* A DSID of a datastream corresponding to an MKV file.
*
* @return array
* A series of TRUE(pass)/FALSE(fail) results paired with result messages.
*/ */
function islandora_validate_mkv_datastream($object, $datastream) { class MKVDatastreamValidator extends DatastreamValidator {
$results = array();
$mkv = bin2hex($object[$datastream]->content);
$pass = "{$datastream} datastream asserts that it is an EBML-formatted file"; /**
$fail = "{$datastream} datastream is not an EBML-formatted file."; * Asserts that the datastream is an EBML-format file.
$results = islandora_assert_valid(substr($mkv, 0, 8) == '1a45dfa3', $results, $pass, $fail); */
protected function assertEBMLFormat() {
$pass = "{$datastream} datastream asserts that its EBML DocType is Matroska"; if (substr(bin2hex($this->datastreamContent), 0, 8) == '1a45dfa3') {
$fail = "{$datastream} datastream does not contain a Matroska EBML DocType marker."; $this->addPass("{$this->datastream} datastream asserts that it is an EBML-formatted file");
$results = islandora_assert_valid(substr_count($mkv, '6d6174726f736b61') == 1, $results, $pass, $fail); }
else {
$this->addFail("{$this->datastream} datastream is not an EBML-formatted file.");
}
}
return $results; /**
* Asserts that the datastream contains a matroska marker.
*/
protected function assertMatroskaMarker() {
if (substr_count($this->datastreamContent, 'matroska') == 1) {
$this->addPass("{$this->datastream} datastream asserts that its EBML DocType is Matroska");
}
else {
$this->addFail("{$this->datastream} datastream does not contain a Matroska EBML DocType marker.");
}
}
} }

63
tests/islandora_web_test_case.inc

@ -216,37 +216,72 @@ class IslandoraWebTestCase extends DrupalWebTestCase {
/** /**
* Attempts to validate an array of datastreams, generally via binary checks. * Attempts to validate an array of datastreams, generally via binary checks.
* *
* These functions exist in, and can be added to, datastream_validators.inc, * Datastream validation classes exist in, and can be added to, the file
* which is found in this folder. * 'datastream_validators.inc', which is found in this folder. Datastream
* validator classes use the naming convention 'PrefixDatastreamValidator',
* and that 'Prefix' is what this function uses to determine what class to
* instantiate.
* *
* $param AbstractObject $object * $param IslandoraFedoraObject $object
* The object to load datastreams from. * The object to load datastreams from.
* $param array $datastreams * $param array $datastreams
* An array of paired DSIDs, validate function names, and optional params. * An array of arrays that pair DSIDs, DatastreamValidator class prefixes,
* and optional params - e.g. array(
* array('DSID', 'TIFF'),
* array('DSID2, 'Text', array('param 1', 'param 2')),
* ) and so on.
*/ */
public function validateDatastreams($object, array $datastreams) { public function validateDatastreams($object, array $datastreams) {
if (!is_object($object)) { if (!is_object($object)) {
$this->fail("Failed. Object passed in is invalid.", 'Islandora'); $this->fail("Datastream validation failed; Object passed in is invalid.", 'Islandora');
return;
} }
module_load_include('inc', 'islandora', 'tests/datastream_validators'); module_load_include('inc', 'islandora', 'tests/datastream_validators');
foreach ($datastreams as $datastream) { foreach ($datastreams as $datastream) {
if (isset($object[$datastream[0]])) { // Let's give them conventional names.
$function = 'islandora_validate_' . $datastream[1] . '_datastream'; $dsid = $datastream[0];
if (function_exists($function)) { $prefix = $datastream[1];
$params = array();
if (isset($datastream[2])) { if (isset($datastream[2])) {
$results = $function($object, $datastream[0], $datastream[2]); $params = $datastream[2];
}
// Legacy tests were created before the CamelCase conventions of the class
// system now in place. So, we need to automagically seek out prefixes
// that start with a lower-case letter and convert them to the proper
// format (rather than fixing every single legacy test).
if (ctype_lower(substr($prefix, 0, 1))) {
// Handle the case where the prefix is "image".
if ($prefix === 'image') {
$prefix = 'Image';
} }
// Handle the case where the prefix is "text".
elseif ($prefix === 'text') {
$prefix = 'Text';
}
// All other cases involve just converting everything to caps.
else { else {
$results = $function($object, $datastream[0]); $prefix = strtoupper($prefix);
} }
foreach ($results as $result) {
$this->assertTrue($result[0], $result[1], 'Islandora');
} }
// Instantiate the appropriate class, grab the passes and fails.
$class_name = "{$prefix}DatastreamValidator";
if (class_exists($class_name)) {
$validator = new $class_name($object, $dsid, $params);
foreach ($validator->getPasses() as $message => $caller) {
$this->assert(TRUE, $message, 'Islandora', $caller);
} }
else { foreach ($validator->getFails() as $message => $caller) {
$this->fail("No {$datastream[0]} validation function exists for the {$datastream[1]} datastream.", 'Islandora'); $this->assert(FALSE, $message, 'Islandora', $caller);
} }
} }
else {
$this->fail("No DatastreamValidator class was found with the name '$class_name'; are you sure the prefix given to IslandoraWebTestCase->validateDatastreams() was entered correctly, or that such a validator exists?", 'Islandora');
}
} }
} }

10
tests/scripts/travis_setup.sh

@ -21,15 +21,7 @@ pear channel-discover pear.phpqatools.org
pear channel-discover pear.netpirates.net pear channel-discover pear.netpirates.net
pear install pear/PHP_CodeSniffer-1.4.8 pear install pear/PHP_CodeSniffer-1.4.8
pear install pear.phpunit.de/phpcpd pear install pear.phpunit.de/phpcpd
pear install drush/drush-5.9.0
# Install Drush
git clone https://github.com/drush-ops/drush.git
pushd drush
git checkout 5.9.0
chmod +x drush
popd
sudo ln -s $HOME/drush/drush /usr/local/sbin
phpenv rehash phpenv rehash
drush dl --yes drupal drush dl --yes drupal
cd drupal-* cd drupal-*

Loading…
Cancel
Save