diff --git a/includes/admin.form.inc b/includes/admin.form.inc index d34bd5d5..b0bb0642 100644 --- a/includes/admin.form.inc +++ b/includes/admin.form.inc @@ -55,6 +55,12 @@ function islandora_repository_admin(array $form, array &$form_state) { '#description' => t('The PID of the Root Collection Object'), '#required' => TRUE, ), + 'islandora_use_datastream_cache_headers' => array( + '#type' => 'checkbox', + '#title' => t('Generate/parse datastream HTTP cache headers'), + '#description' => t('HTTP caching can reduce network traffic, by allowing clients to used cached copies.'), + '#default_value' => variable_get('islandora_use_datastream_cache_headers', TRUE), + ), ), 'islandora_namespace' => array( '#type' => 'fieldset', diff --git a/includes/datastream.inc b/includes/datastream.inc index 585fa2f5..1d506e3d 100644 --- a/includes/datastream.inc +++ b/includes/datastream.inc @@ -46,8 +46,6 @@ function islandora_view_datastream(AbstractDatastream $datastream, $download = F } } - header_remove('Cache-Control'); - header_remove('Expires'); header('Content-type: ' . $datastream->mimetype); if ($datastream->controlGroup == 'M' || $datastream->controlGroup == 'X') { header('Content-length: ' . $datastream->size); @@ -59,13 +57,164 @@ function islandora_view_datastream(AbstractDatastream $datastream, $download = F $filename = $datastream->label . '.' . $extension; header("Content-Disposition: attachment; filename=\"$filename\""); } + + $cache_check = islandora_view_datastream_cache_check($datastream); + if ($cache_check !== 200) { + if ($cache_check === 304) { + header('HTTP/1.1 304 Not Modified'); + } + elseif ($cache_check === 412) { + header('HTTP/1.0 412 Precondition Failed'); + } + } + islandora_view_datastream_set_cache_headers($datastream); + drupal_page_is_cacheable(FALSE); // Try not to load the file into PHP memory! - ob_end_flush(); - $datastream->getContent('php://output'); + // Close and flush ALL the output buffers! + while (@ob_end_flush()) { + }; + + // New content needed. + if ($cache_check === 200) { + $datastream->getContent('php://output'); + } exit(); } +/** + * Parse "etags" from HTTP If-Match or If-None-Match headers. + * + * Parses from the CSV-like struture supported by HTTP headers into an array, + * so `"asdf", "fdsa", W/"2132"` should become an array containing the strings: + * - asdf + * - fdsa + * - 2132 + * + * @see http://www.w3.org/Protocols/rfc2616/rfc2616-sec14.html#sec14.24 + * @see http://www.w3.org/Protocols/rfc2616/rfc2616-sec14.html#sec14.26 + * + * @param string $header_value + * The value from the headers. + * + * @return array + * An array containing all the etags present. + */ +function islandora_parse_http_match_headers($header_value) { + $matches = array(); + // Match the CSV-like structure supported by the HTTP headers. + $count = preg_match_all('/(((W\/)?("?)(\*|.+?)\4)(, +)?)/', $header_value, $matches); + // The fifth sub-expression/group is which will contain the etags. + return $matches[5]; +} + +/** + * Validate cache headers. + * + * @param AbstractDatastream $datastream + * The datastream for which to check the request headers against. + * + * @return int + * An integer representing the HTTP response code. One of: + * - 200: Proceed as normal. (Full download). + * - 304: Resource hasn't changed; pass cache validation. + * - 412: Resource has changed; fail cache validation. + * + * @see http://www.w3.org/Protocols/rfc2616/rfc2616-sec14.html + */ +function islandora_view_datastream_cache_check(AbstractDatastream $datastream) { + if (!variable_get('islandora_use_datastream_cache_headers', TRUE)) { + return 200; + } + + // Let's assume that if we get here, we'll be able to complete the request. + $return = 200; + + if (isset($_SERVER['HTTP_IF_MODIFIED_SINCE'])) { + $modified_since = DateTime::createFromFormat('D, d M Y H:i:s e', $_SERVER['HTTP_IF_MODIFIED_SINCE']); + if ($datastream->createdDate->getTimestamp() - $modified_since->getTimestamp() > 0) { + // Changed! + return $return; + } + else { + $return = 304; + } + } + if ($return === 200 && isset($_SERVER['HTTP_IF_UNMODIFIED_SINCE'])) { + $unmodified_since = DateTime::createFromFormat('D, d M Y H:i:s e', $_SERVER['HTTP_IF_UNMODIFIED_SINCE']); + if ($datastream->createdDate->getTimestamp() !== $unmodified_since->getTimestamp()) { + // Changed! + $return = 412; + } + else { + return $return; + } + } + + // Only consider Etags we have provided. + if (isset($datastream->checksum)) { + $tags = array(); + foreach ($datastream as $offset => $version) { + if (isset($version->checksum)) { + $tags[$offset] = $version->checksum; + } + } + + if ($return === 200 && isset($_SERVER['HTTP_IF_MATCH'])) { + $request_tags = islandora_parse_http_match_headers($_SERVER['HTTP_IF_MATCH']); + if (in_array('*', $request_tags) || count(array_intersect($tags, $request_tags)) > 0) { + // There's a match... Let things go ahead. + return $return; + } + else { + $return = 412; + } + } + if (in_array($return, array(200, 304), TRUE) && isset($_SERVER['HTTP_IF_NONE_MATCH'])) { + $request_tags = islandora_parse_http_match_headers($_SERVER['HTTP_IF_NONE_MATCH']); + if (in_array('*', $request_tags) || count(array_intersect($tags, $request_tags)) > 0) { + $return = 304; + } + else { + $return = 200; + } + } + } + + return $return; +} + +/** + * Set various HTTP headers for caching. + * + * @param AbstractDatastream $datastream + * The datastream being viewed/downloaded. + */ +function islandora_view_datastream_set_cache_headers(AbstractDatastream $datastream) { + if (variable_get('islandora_use_datastream_cache_headers', TRUE)) { + // Force cache revalidation. + header('Expires: Sun, 19 Nov 1978 05:00:00 GMT'); + $cache_control = array(); + if ($datastream->parent->repository->api->connection->username == 'anonymous') { + $cache_control[] = 'public'; + } + else { + $cache_control[] = 'private'; + } + $cache_control[] = 'must-revalidate'; + $cache_control[] = 'max-age=0'; + header('Cache-Control: ' . implode(', ', $cache_control)); + header('Last-Modified: ' . $datastream->createdDate->format('D, d M Y H:i:s \G\M\T')); + if (isset($datastream->checksum)) { + header("Etag: \"{$datastream->checksum}\""); + } + } + else { + header_remove('Cache-Control'); + header_remove('Expires'); + } +} + /** * Get the human readable size of the given datastream. * diff --git a/islandora.info b/islandora.info index 4b48bc29..36ed0dff 100644 --- a/islandora.info +++ b/islandora.info @@ -18,5 +18,6 @@ files[] = tests/ingest.test files[] = tests/hooked_access.test files[] = tests/islandora_manage_permissions.test files[] = tests/datastream_versions.test +files[] = tests/datastream_cache.test files[] = tests/derivatives.test php = 5.3 diff --git a/tests/datastream_cache.test b/tests/datastream_cache.test new file mode 100644 index 00000000..8118c08a --- /dev/null +++ b/tests/datastream_cache.test @@ -0,0 +1,160 @@ + 'Datastream Cache Headers', + 'description' => 'Check our headers work as we expect them to.', + 'group' => 'Islandora', + ); + } + + /** + * Creates an admin user and a connection to a fedora repository. + * + * @see IslandoraWebTestCase::setUp() + */ + public function setUp() { + parent::setUp(); + $this->repository = $this->admin->repository; + $this->purgeTestObjects(); + } + + /** + * Free any objects/resources created for this test. + * + * @see IslandoraWebTestCase::tearDown() + */ + public function tearDown() { + $this->purgeTestObjects(); + parent::tearDown(); + } + + /** + * Purge any objects created by the test's in this class. + */ + public function purgeTestObjects() { + $objects = array( + 'test:test', + ); + foreach ($objects as $object) { + try { + $object = $this->repository->getObject($object); + $this->repository->purgeObject($object->id); + } + catch (Exception $e) { + // Meh... Either it didn't exist or the purge failed. + } + } + } + + /** + * Create our test object. + */ + protected function createTestObject() { + $object = $this->repository->constructObject('test:test'); + $object->label = 'Test object'; + $object->models = 'test:model'; + $datastream = $object->constructDatastream('asdf', 'M'); + $datastream->label = 'datastream of doom'; + $datastream->mimetype = 'text/plain'; + $datastream->content = 'And then things happened.'; + $datastream->checksumType = 'SHA-1'; + $object->ingestDatastream($datastream); + $this->repository->ingestObject($object); + return $object; + } + + /** + * Test HTTP cache headers. + */ + public function testCacheHeaders() { + $object = $this->createTestObject(); + $datastream = $object['asdf']; + + $user = $this->drupalCreateUser(array(ISLANDORA_VIEW_OBJECTS)); + $this->drupalLogin($user); + + // Test If-Modified-Since. + $result = $this->drupalGet("islandora/object/{$object->id}/datastream/{$datastream->id}/view", array(), array( + 'If-Modified-Since: ' . $datastream->createdDate->format('D, d M Y H:i:s \G\M\T'), + )); + $this->assertResponse(304); + $result = $this->drupalGet("islandora/object/{$object->id}/datastream/{$datastream->id}/view", array(), array( + 'If-Modified-Since: ' . $datastream->createdDate->sub(new DateInterval('P1M'))->format('D, d M Y H:i:s \G\M\T'), + )); + $this->assertResponse(200); + + // Test If-Unmodified-Since. + $result = $this->drupalGet("islandora/object/{$object->id}/datastream/{$datastream->id}/view", array(), array( + 'If-Unmodified-Since: ' . $datastream->createdDate->format('D, d M Y H:i:s \G\M\T'), + )); + $this->assertResponse(200); + $result = $this->drupalGet("islandora/object/{$object->id}/datastream/{$datastream->id}/view", array(), array( + 'If-Unmodified-Since: ' . $datastream->createdDate->sub(new DateInterval('P1M'))->format('D, d M Y H:i:s \G\M\T'), + )); + $this->assertResponse(412); + + // Test If-Match. + $result = $this->drupalGet("islandora/object/{$object->id}/datastream/{$datastream->id}/view", array(), array( + format_string('If-Match: "!checksum"', array( + '!checksum' => $datastream->checksum, + )), + )); + $this->assertResponse(200); + $result = $this->drupalGet("islandora/object/{$object->id}/datastream/{$datastream->id}/view", array(), array( + format_string('If-Match: "!checksum"', array( + '!checksum' => 'dont-match' . $datastream->checksum, + )), + )); + $this->assertResponse(412); + + // Test If-None-Match. + $result = $this->drupalGet("islandora/object/{$object->id}/datastream/{$datastream->id}/view", array(), array( + format_string('If-None-Match: "!checksum"', array( + '!checksum' => $datastream->checksum, + )), + )); + $this->assertResponse(304); + $result = $this->drupalGet("islandora/object/{$object->id}/datastream/{$datastream->id}/view", array(), array( + format_string('If-None-Match: "!checksum"', array( + '!checksum' => 'dont-match' . $datastream->checksum, + )), + )); + $this->assertResponse(200); + + // Test combination of If-None-Match and If-Modified-Since + $result = $this->drupalGet("islandora/object/{$object->id}/datastream/{$datastream->id}/view", array(), array( + 'If-Modified-Since: ' . $datastream->createdDate->format('D, d M Y H:i:s \G\M\T'), + format_string('If-None-Match: "!checksum"', array( + '!checksum' => $datastream->checksum, + )), + )); + $this->assertResponse(304); + $result = $this->drupalGet("islandora/object/{$object->id}/datastream/{$datastream->id}/view", array(), array( + 'If-Modified-Since: ' . $datastream->createdDate->format('D, d M Y H:i:s \G\M\T'), + format_string('If-None-Match: "!checksum"', array( + '!checksum' => 'dont-match' . $datastream->checksum, + )), + )); + $this->assertResponse(200); + $result = $this->drupalGet("islandora/object/{$object->id}/datastream/{$datastream->id}/view", array(), array( + 'If-Modified-Since: ' . $datastream->createdDate->sub(new DateInterval('P1M'))->format('D, d M Y H:i:s \G\M\T'), + format_string('If-None-Match: "!checksum"', array( + '!checksum' => $datastream->checksum, + )), + )); + $this->assertResponse(200); + } +}