Browse Source

Stream results

main
astanley 2 months ago
parent
commit
279fbaf9f5
  1. 14
      composer.json
  2. 186
      src/Controller/HarvestDownloadController.php
  3. 86
      src/Form/CollectionHarvestForm.php

14
composer.json

@ -0,0 +1,14 @@
{
"name": "custom/islandora_collection_harvest",
"description": "Islandora module for zipping and streaming collection media using ZipStream.",
"type": "drupal-module",
"require": {
"php": ">=8.1",
"maennchen/zipstream-php": "^3.1"
},
"extra": {
"drupal": {
"extension_type": "module"
}
}
}

186
src/Controller/HarvestDownloadController.php

@ -4,24 +4,70 @@ declare(strict_types=1);
namespace Drupal\islandora_collection_harvest\Controller; namespace Drupal\islandora_collection_harvest\Controller;
use Drupal\Core\StreamWrapper\StreamWrapperManagerInterface; use Drupal\Core\Controller\ControllerBase;
use Drupal\flysystem\FlysystemFactory;
use Drupal\Core\File\FileSystemInterface; use Drupal\Core\File\FileSystemInterface;
use Drupal\Core\StreamWrapper\StreamWrapperManagerInterface;
use Drupal\Core\TempStore\PrivateTempStoreFactory; use Drupal\Core\TempStore\PrivateTempStoreFactory;
use Drupal\flysystem\FlysystemFactory;
use Symfony\Component\HttpFoundation\StreamedResponse; use Symfony\Component\HttpFoundation\StreamedResponse;
use Drupal\Core\Controller\ControllerBase;
use Symfony\Component\DependencyInjection\ContainerInterface; use Symfony\Component\DependencyInjection\ContainerInterface;
use ZipStream\ZipStream;
use Drupal\media\MediaInterface;
use Drupal\file\FileInterface;
/**
* Controller responsible for streaming ZIP downloads of collection media.
*
* This class:
* - Loads media entities stored by the form in PrivateTempStore.
* - Builds and streams a ZIP archive using ZipStream without
* temporary files or memory-heavy buffering.
* - Supports both local and Flysystem (remote) file schemes.
*
* @see \Drupal\islandora_collection_harvest\Form\CollectionHarvestForm
*/
final class HarvestDownloadController extends ControllerBase { final class HarvestDownloadController extends ControllerBase {
/**
* The file system service.
*
* @var \Drupal\Core\File\FileSystemInterface
*/
protected FileSystemInterface $fileSystem; protected FileSystemInterface $fileSystem;
/**
* The private tempstore factory.
*
* @var \Drupal\Core\TempStore\PrivateTempStoreFactory
*/
protected PrivateTempStoreFactory $tempStoreFactory; protected PrivateTempStoreFactory $tempStoreFactory;
/**
* The Flysystem factory service.
*
* @var \Drupal\flysystem\FlysystemFactory
*/
protected FlysystemFactory $flysystemFactory; protected FlysystemFactory $flysystemFactory;
/**
* The stream wrapper manager service.
*
* @var \Drupal\Core\StreamWrapper\StreamWrapperManagerInterface
*/
protected StreamWrapperManagerInterface $streamWrapperManager; protected StreamWrapperManagerInterface $streamWrapperManager;
/**
* Constructs a new HarvestDownloadController.
*
* @param \Drupal\Core\File\FileSystemInterface $file_system
* The file system service.
* @param \Drupal\Core\TempStore\PrivateTempStoreFactory $tempStoreFactory
* The private tempstore factory.
* @param \Drupal\flysystem\FlysystemFactory $flysystemFactory
* The Flysystem factory.
* @param \Drupal\Core\StreamWrapper\StreamWrapperManagerInterface $streamWrapperManager
* The stream wrapper manager.
*/
public function __construct( public function __construct(
FileSystemInterface $file_system, FileSystemInterface $file_system,
PrivateTempStoreFactory $tempStoreFactory, PrivateTempStoreFactory $tempStoreFactory,
@ -34,6 +80,9 @@ final class HarvestDownloadController extends ControllerBase {
$this->streamWrapperManager = $streamWrapperManager; $this->streamWrapperManager = $streamWrapperManager;
} }
/**
* {@inheritdoc}
*/
public static function create(ContainerInterface $container): self { public static function create(ContainerInterface $container): self {
return new self( return new self(
$container->get('file_system'), $container->get('file_system'),
@ -44,74 +93,123 @@ final class HarvestDownloadController extends ControllerBase {
} }
/** /**
* Builds and downloads archive. * Streams a ZIP file containing the requested media files.
*
* The media IDs were stored in PrivateTempStore by the form submit handler.
* This method:
* - Reloads all referenced media entities.
* - Iterates through each file, adding it to a ZipStream stream.
* - Handles both local and remote file systems gracefully.
* *
* @param string $filename * @param string $filename
* The generated filename (used as the PrivateTempStore key and download name).
* *
* @return \Symfony\Component\HttpFoundation\StreamedResponse * @return \Symfony\Component\HttpFoundation\StreamedResponse
* The streaming ZIP response.
*
* @throws \Symfony\Component\HttpKernel\Exception\NotFoundHttpException
* Thrown if the media list cannot be found in tempstore.
*/ */
public function download(string $filename): StreamedResponse { public function download(string $filename): StreamedResponse {
set_time_limit(0);
ignore_user_abort(true);
// Retrieve stored media IDs.
$temp_store = $this->tempStoreFactory->get('islandora_collection_harvest'); $temp_store = $this->tempStoreFactory->get('islandora_collection_harvest');
$media_entities = $temp_store->get($filename); $media_ids = $temp_store->get($filename);
if (empty($media_entities)) {
if (empty($media_ids)) {
throw new \Symfony\Component\HttpKernel\Exception\NotFoundHttpException('No media found.'); throw new \Symfony\Component\HttpKernel\Exception\NotFoundHttpException('No media found.');
} }
return new StreamedResponse(function() use ($media_entities) { // Reload media entities for this request.
$zip = new \ZipArchive(); /** @var \Drupal\media\MediaInterface[] $media_entities */
$tmpfile = tempnam(sys_get_temp_dir(), 'collection_zip_'); $media_entities = \Drupal::entityTypeManager()
$zip->open($tmpfile, \ZipArchive::CREATE | \ZipArchive::OVERWRITE); ->getStorage('media')
->loadMultiple($media_ids);
// Build the streamed ZIP response.
$response = new StreamedResponse(function () use ($media_entities) {
// Clean any buffered output.
if (ob_get_level()) {
@ob_end_clean();
}
ignore_user_abort(true);
set_time_limit(0);
// Initialize the ZipStream object.
$zip = new ZipStream();
// Process each media entity.
foreach ($media_entities as $media) { foreach ($media_entities as $media) {
$source_field = $media->getSource() if (!$media instanceof MediaInterface) {
->getConfiguration()['source_field'] ?? NULL;
if (!$source_field || !$media->hasField($source_field)) {
continue; continue;
} }
$source_field = $media->getSource()->getConfiguration()['source_field'] ?? NULL;
if (!$source_field || !$media->hasField($source_field) || $media->get($source_field)->isEmpty()) {
continue;
}
/** @var \Drupal\file\FileInterface|null $file */
$file = $media->get($source_field)->entity; $file = $media->get($source_field)->entity;
if (!$file) { if (!$file instanceof FileInterface) {
continue; continue;
} }
$uri = $file->getFileUri(); $uri = $file->getFileUri();
$filename_in_zip = basename($uri); $name_in_zip = basename($uri);
$real_path = $this->fileSystem->realpath($uri);
if ($real_path && file_exists($real_path)) { // Try to use a local file path first.
$zip->addFile($real_path, $filename_in_zip); $realpath = $this->fileSystem->realpath($uri);
if ($realpath && file_exists($realpath)) {
$zip->addFileFromPath($name_in_zip, $realpath);
continue;
} }
else {
// Flysystem v1 fallback // Fall back to Flysystem (remote storage).
try { try {
$scheme = $this->streamWrapperManager->getScheme($uri); $scheme = $this->streamWrapperManager->getScheme($uri);
$path = substr($uri, strlen($scheme) + 3); $path = substr($uri, strlen($scheme) + 3);
$filesystem = $this->flysystemFactory->getFilesystem($scheme); $filesystem = $this->flysystemFactory->getFilesystem($scheme);
if ($filesystem && $filesystem->has($path)) { if ($filesystem && $filesystem->has($path)) {
// Try stream-based read first.
if (method_exists($filesystem, 'readStream')) {
$stream = $filesystem->readStream($path);
if ($stream) {
$zip->addFileFromStream($name_in_zip, $stream);
@fclose($stream);
}
}
else {
// Fallback to full read (less memory-efficient).
$contents = $filesystem->read($path); $contents = $filesystem->read($path);
if ($contents !== FALSE) { if ($contents !== false) {
$tmp_fly = tempnam(sys_get_temp_dir(), 'zip_'); $zip->addFile($name_in_zip, $contents);
file_put_contents($tmp_fly, $contents);
$zip->addFile($tmp_fly, $filename_in_zip);
} }
} }
} }
catch (\Exception $e) { }
$this->logger('islandora_collection_harvest')->error( catch (\Exception $e) {
'Error reading @uri from Flysystem: @msg', $this->logger('islandora_collection_harvest')->error(
['@uri' => $uri, '@msg' => $e->getMessage()] 'Flysystem read failed for @uri: @msg',
); ['@uri' => $uri, '@msg' => $e->getMessage()]
} );
} }
} }
$zip->close(); // Finalize the ZIP output and flush to browser.
readfile($tmpfile); $zip->finish();
unlink($tmpfile); @flush();
}, 200, [ });
'Content-Type' => 'application/zip',
'Content-Disposition' => 'attachment; filename="' . $filename . '"', // Set appropriate download headers.
]); $response->headers->set('Content-Type', 'application/zip');
$response->headers->set('Content-Disposition', 'attachment; filename="' . $filename . '"');
return $response;
} }
} }

86
src/Form/CollectionHarvestForm.php

@ -12,24 +12,72 @@ use Drupal\flysystem\FlysystemFactory;
use Drupal\islandora\IslandoraUtils; use Drupal\islandora\IslandoraUtils;
use Drupal\Core\TempStore\PrivateTempStoreFactory; use Drupal\Core\TempStore\PrivateTempStoreFactory;
use Symfony\Component\DependencyInjection\ContainerInterface; use Symfony\Component\DependencyInjection\ContainerInterface;
use Drupal\node\NodeInterface;
use Drupal\media\MediaInterface;
use Drupal\taxonomy\TermInterface;
/** /**
* Form for selecting Collection and Media Use and preparing ZIP download. * Provides a form to select a collection and a media use term,
* and prepares a streamed ZIP download of all matching media.
*
* Workflow:
* - Lists Islandora "collection" nodes (field_model = Collection).
* - Lists all taxonomy terms from 'islandora_media_use'.
* - On submit, gathers all child nodes and their associated media
* of the chosen media use type.
* - Stores media IDs in PrivateTempStore and redirects to the
* HarvestDownloadController for streaming download.
*/ */
final class CollectionHarvestForm extends FormBase { final class CollectionHarvestForm extends FormBase {
/**
* The entity type manager.
*
* @var \Drupal\Core\Entity\EntityTypeManagerInterface
*/
protected EntityTypeManagerInterface $entityTypeManager; protected EntityTypeManagerInterface $entityTypeManager;
/**
* The Islandora utility service.
*
* @var \Drupal\islandora\IslandoraUtils
*/
protected IslandoraUtils $utils; protected IslandoraUtils $utils;
/**
* The Drupal file system service.
*
* @var \Drupal\Core\File\FileSystemInterface
*/
protected FileSystemInterface $fileSystem; protected FileSystemInterface $fileSystem;
/**
* The Flysystem factory service.
*
* @var \Drupal\flysystem\FlysystemFactory
*/
protected FlysystemFactory $flysystemFactory; protected FlysystemFactory $flysystemFactory;
/**
* The private tempstore factory.
*
* @var \Drupal\Core\TempStore\PrivateTempStoreFactory
*/
protected PrivateTempStoreFactory $tempStoreFactory; protected PrivateTempStoreFactory $tempStoreFactory;
/** /**
* {@inheritdoc} * Constructs a new CollectionHarvestForm object.
*
* @param \Drupal\Core\Entity\EntityTypeManagerInterface $entity_type_manager
* The entity type manager service.
* @param \Drupal\islandora\IslandoraUtils $utils
* The Islandora utility service.
* @param \Drupal\Core\File\FileSystemInterface $file_system
* The file system service.
* @param \Drupal\flysystem\FlysystemFactory $flysystem_factory
* The Flysystem factory service.
* @param \Drupal\Core\TempStore\PrivateTempStoreFactory $temp_store_factory
* The private tempstore factory service.
*/ */
public function __construct( public function __construct(
EntityTypeManagerInterface $entity_type_manager, EntityTypeManagerInterface $entity_type_manager,
@ -69,14 +117,19 @@ final class CollectionHarvestForm extends FormBase {
* {@inheritdoc} * {@inheritdoc}
*/ */
public function buildForm(array $form, FormStateInterface $form_state): array { public function buildForm(array $form, FormStateInterface $form_state): array {
// Retrieve the Islandora "Collection" term.
$term = $this->utils->getTermForUri('http://purl.org/dc/dcmitype/Collection'); $term = $this->utils->getTermForUri('http://purl.org/dc/dcmitype/Collection');
$collections = []; $collections = [];
if ($term) { if ($term instanceof TermInterface) {
// Query all Islandora objects using that model.
$query = $this->entityTypeManager->getStorage('node')->getQuery(); $query = $this->entityTypeManager->getStorage('node')->getQuery();
$query->condition('type', 'islandora_object'); $query->condition('type', 'islandora_object');
$query->condition('field_model', $term->id(), 'IN'); $query->condition('field_model', $term->id(), 'IN');
$query->accessCheck(FALSE); $query->accessCheck(FALSE);
$nids = $query->execute(); $nids = $query->execute();
/** @var \Drupal\node\NodeInterface[] $nodes */
$nodes = $this->entityTypeManager->getStorage('node') $nodes = $this->entityTypeManager->getStorage('node')
->loadMultiple($nids); ->loadMultiple($nids);
foreach ($nodes as $node) { foreach ($nodes as $node) {
@ -84,6 +137,7 @@ final class CollectionHarvestForm extends FormBase {
} }
} }
// Load media use taxonomy terms.
$vid = 'islandora_media_use'; $vid = 'islandora_media_use';
$terms = $this->entityTypeManager->getStorage('taxonomy_term') $terms = $this->entityTypeManager->getStorage('taxonomy_term')
->loadTree($vid); ->loadTree($vid);
@ -92,7 +146,7 @@ final class CollectionHarvestForm extends FormBase {
$media_use_options[$term->tid] = $term->name; $media_use_options[$term->tid] = $term->name;
} }
$form['#attributes']['id'] = 'collection-harvest-form'; // Collection selector.
$form['collection'] = [ $form['collection'] = [
'#type' => 'select', '#type' => 'select',
'#title' => $this->t('Collection'), '#title' => $this->t('Collection'),
@ -100,6 +154,7 @@ final class CollectionHarvestForm extends FormBase {
'#required' => TRUE, '#required' => TRUE,
]; ];
// Media Use selector.
$form['media_use'] = [ $form['media_use'] = [
'#type' => 'select', '#type' => 'select',
'#title' => $this->t('Media Use'), '#title' => $this->t('Media Use'),
@ -107,6 +162,7 @@ final class CollectionHarvestForm extends FormBase {
'#required' => TRUE, '#required' => TRUE,
]; ];
// Submit button.
$form['actions'] = [ $form['actions'] = [
'#type' => 'actions', '#type' => 'actions',
'submit' => [ 'submit' => [
@ -115,6 +171,7 @@ final class CollectionHarvestForm extends FormBase {
'#button_type' => 'primary', '#button_type' => 'primary',
], ],
]; ];
return $form; return $form;
} }
@ -125,6 +182,7 @@ final class CollectionHarvestForm extends FormBase {
$collection_id = $form_state->getValue('collection'); $collection_id = $form_state->getValue('collection');
$media_use_tid = $form_state->getValue('media_use'); $media_use_tid = $form_state->getValue('media_use');
/** @var \Drupal\taxonomy\TermInterface|null $term */
$term = $this->entityTypeManager->getStorage('taxonomy_term') $term = $this->entityTypeManager->getStorage('taxonomy_term')
->load($media_use_tid); ->load($media_use_tid);
if (!$term) { if (!$term) {
@ -132,7 +190,7 @@ final class CollectionHarvestForm extends FormBase {
return; return;
} }
// Load nodes in collection // Find all Islandora objects that are members of the selected collection.
$query = $this->entityTypeManager->getStorage('node')->getQuery(); $query = $this->entityTypeManager->getStorage('node')->getQuery();
$query->condition('type', 'islandora_object'); $query->condition('type', 'islandora_object');
$query->condition('field_member_of', $collection_id); $query->condition('field_member_of', $collection_id);
@ -141,9 +199,15 @@ final class CollectionHarvestForm extends FormBase {
$media_entities = []; $media_entities = [];
foreach ($nids as $nid) { foreach ($nids as $nid) {
/** @var \Drupal\node\NodeInterface $node */
$node = $this->entityTypeManager->getStorage('node')->load($nid); $node = $this->entityTypeManager->getStorage('node')->load($nid);
if (!$node instanceof NodeInterface) {
continue;
}
/** @var \Drupal\media\MediaInterface|null $media */
$media = $this->utils->getMediaWithTerm($node, $term); $media = $this->utils->getMediaWithTerm($node, $term);
if ($media) { if ($media instanceof MediaInterface) {
$source_field = $media->getSource() $source_field = $media->getSource()
->getConfiguration()['source_field'] ?? NULL; ->getConfiguration()['source_field'] ?? NULL;
if ($source_field && $media->hasField($source_field) && !$media->get($source_field) if ($source_field && $media->hasField($source_field) && !$media->get($source_field)
@ -159,14 +223,14 @@ final class CollectionHarvestForm extends FormBase {
return; return;
} }
// Store in tempstore // Store only IDs for serialization safety.
$temp_store = $this->tempStoreFactory->get('islandora_collection_harvest'); $temp_store = $this->tempStoreFactory->get('islandora_collection_harvest');
$zip_filename = 'collection_' . time() . '.zip'; $zip_filename = 'collection_' . time() . '.zip';
$temp_store->set($zip_filename, $media_entities); $temp_store->set($zip_filename, array_map(fn($m) => $m->id(), $media_entities));
$this->messenger()
->addStatus($this->t('Your ZIP download has been initiated. Please wait for the download to start.'));
// Redirect to download route // Notify the user and redirect to download.
$this->messenger()
->addStatus($this->t('Your ZIP download has been initiated. Please wait for it to start.'));
$form_state->setRedirect('islandora_collection_harvest.download_zip', ['filename' => $zip_filename]); $form_state->setRedirect('islandora_collection_harvest.download_zip', ['filename' => $zip_filename]);
} }

Loading…
Cancel
Save