From 279fbaf9f5a9727083d6144c6d80c452f074b225 Mon Sep 17 00:00:00 2001 From: astanley Date: Wed, 22 Oct 2025 12:57:29 -0300 Subject: [PATCH] Stream results --- composer.json | 14 ++ src/Controller/HarvestDownloadController.php | 186 ++++++++++++++----- src/Form/CollectionHarvestForm.php | 86 +++++++-- 3 files changed, 231 insertions(+), 55 deletions(-) create mode 100644 composer.json diff --git a/composer.json b/composer.json new file mode 100644 index 0000000..1c4457f --- /dev/null +++ b/composer.json @@ -0,0 +1,14 @@ +{ + "name": "custom/islandora_collection_harvest", + "description": "Islandora module for zipping and streaming collection media using ZipStream.", + "type": "drupal-module", + "require": { + "php": ">=8.1", + "maennchen/zipstream-php": "^3.1" + }, + "extra": { + "drupal": { + "extension_type": "module" + } + } +} diff --git a/src/Controller/HarvestDownloadController.php b/src/Controller/HarvestDownloadController.php index d5dc417..e96d856 100644 --- a/src/Controller/HarvestDownloadController.php +++ b/src/Controller/HarvestDownloadController.php @@ -4,24 +4,70 @@ declare(strict_types=1); namespace Drupal\islandora_collection_harvest\Controller; -use Drupal\Core\StreamWrapper\StreamWrapperManagerInterface; -use Drupal\flysystem\FlysystemFactory; +use Drupal\Core\Controller\ControllerBase; use Drupal\Core\File\FileSystemInterface; +use Drupal\Core\StreamWrapper\StreamWrapperManagerInterface; use Drupal\Core\TempStore\PrivateTempStoreFactory; +use Drupal\flysystem\FlysystemFactory; use Symfony\Component\HttpFoundation\StreamedResponse; -use Drupal\Core\Controller\ControllerBase; use Symfony\Component\DependencyInjection\ContainerInterface; - +use ZipStream\ZipStream; +use Drupal\media\MediaInterface; +use Drupal\file\FileInterface; + +/** + * Controller responsible for streaming ZIP downloads of collection media. + * + * This class: + * - Loads media entities stored by the form in PrivateTempStore. + * - Builds and streams a ZIP archive using ZipStream without + * temporary files or memory-heavy buffering. + * - Supports both local and Flysystem (remote) file schemes. + * + * @see \Drupal\islandora_collection_harvest\Form\CollectionHarvestForm + */ final class HarvestDownloadController extends ControllerBase { + /** + * The file system service. + * + * @var \Drupal\Core\File\FileSystemInterface + */ protected FileSystemInterface $fileSystem; + /** + * The private tempstore factory. + * + * @var \Drupal\Core\TempStore\PrivateTempStoreFactory + */ protected PrivateTempStoreFactory $tempStoreFactory; + /** + * The Flysystem factory service. + * + * @var \Drupal\flysystem\FlysystemFactory + */ protected FlysystemFactory $flysystemFactory; + /** + * The stream wrapper manager service. + * + * @var \Drupal\Core\StreamWrapper\StreamWrapperManagerInterface + */ protected StreamWrapperManagerInterface $streamWrapperManager; + /** + * Constructs a new HarvestDownloadController. + * + * @param \Drupal\Core\File\FileSystemInterface $file_system + * The file system service. + * @param \Drupal\Core\TempStore\PrivateTempStoreFactory $tempStoreFactory + * The private tempstore factory. + * @param \Drupal\flysystem\FlysystemFactory $flysystemFactory + * The Flysystem factory. + * @param \Drupal\Core\StreamWrapper\StreamWrapperManagerInterface $streamWrapperManager + * The stream wrapper manager. + */ public function __construct( FileSystemInterface $file_system, PrivateTempStoreFactory $tempStoreFactory, @@ -34,6 +80,9 @@ final class HarvestDownloadController extends ControllerBase { $this->streamWrapperManager = $streamWrapperManager; } + /** + * {@inheritdoc} + */ public static function create(ContainerInterface $container): self { return new self( $container->get('file_system'), @@ -44,74 +93,123 @@ final class HarvestDownloadController extends ControllerBase { } /** - * Builds and downloads archive. + * Streams a ZIP file containing the requested media files. + * + * The media IDs were stored in PrivateTempStore by the form submit handler. + * This method: + * - Reloads all referenced media entities. + * - Iterates through each file, adding it to a ZipStream stream. + * - Handles both local and remote file systems gracefully. * * @param string $filename + * The generated filename (used as the PrivateTempStore key and download name). * * @return \Symfony\Component\HttpFoundation\StreamedResponse + * The streaming ZIP response. + * + * @throws \Symfony\Component\HttpKernel\Exception\NotFoundHttpException + * Thrown if the media list cannot be found in tempstore. */ public function download(string $filename): StreamedResponse { + set_time_limit(0); + ignore_user_abort(true); + + // Retrieve stored media IDs. $temp_store = $this->tempStoreFactory->get('islandora_collection_harvest'); - $media_entities = $temp_store->get($filename); - if (empty($media_entities)) { + $media_ids = $temp_store->get($filename); + + if (empty($media_ids)) { throw new \Symfony\Component\HttpKernel\Exception\NotFoundHttpException('No media found.'); } - return new StreamedResponse(function() use ($media_entities) { - $zip = new \ZipArchive(); - $tmpfile = tempnam(sys_get_temp_dir(), 'collection_zip_'); - $zip->open($tmpfile, \ZipArchive::CREATE | \ZipArchive::OVERWRITE); + // Reload media entities for this request. + /** @var \Drupal\media\MediaInterface[] $media_entities */ + $media_entities = \Drupal::entityTypeManager() + ->getStorage('media') + ->loadMultiple($media_ids); + + // Build the streamed ZIP response. + $response = new StreamedResponse(function () use ($media_entities) { + // Clean any buffered output. + if (ob_get_level()) { + @ob_end_clean(); + } + + ignore_user_abort(true); + set_time_limit(0); + + // Initialize the ZipStream object. + $zip = new ZipStream(); + // Process each media entity. foreach ($media_entities as $media) { - $source_field = $media->getSource() - ->getConfiguration()['source_field'] ?? NULL; - if (!$source_field || !$media->hasField($source_field)) { + if (!$media instanceof MediaInterface) { continue; } + + $source_field = $media->getSource()->getConfiguration()['source_field'] ?? NULL; + if (!$source_field || !$media->hasField($source_field) || $media->get($source_field)->isEmpty()) { + continue; + } + + /** @var \Drupal\file\FileInterface|null $file */ $file = $media->get($source_field)->entity; - if (!$file) { + if (!$file instanceof FileInterface) { continue; } $uri = $file->getFileUri(); - $filename_in_zip = basename($uri); - $real_path = $this->fileSystem->realpath($uri); + $name_in_zip = basename($uri); - if ($real_path && file_exists($real_path)) { - $zip->addFile($real_path, $filename_in_zip); + // Try to use a local file path first. + $realpath = $this->fileSystem->realpath($uri); + if ($realpath && file_exists($realpath)) { + $zip->addFileFromPath($name_in_zip, $realpath); + continue; } - else { - // Flysystem v1 fallback - try { - $scheme = $this->streamWrapperManager->getScheme($uri); - $path = substr($uri, strlen($scheme) + 3); - $filesystem = $this->flysystemFactory->getFilesystem($scheme); - - if ($filesystem && $filesystem->has($path)) { + + // Fall back to Flysystem (remote storage). + try { + $scheme = $this->streamWrapperManager->getScheme($uri); + $path = substr($uri, strlen($scheme) + 3); + $filesystem = $this->flysystemFactory->getFilesystem($scheme); + + if ($filesystem && $filesystem->has($path)) { + // Try stream-based read first. + if (method_exists($filesystem, 'readStream')) { + $stream = $filesystem->readStream($path); + if ($stream) { + $zip->addFileFromStream($name_in_zip, $stream); + @fclose($stream); + } + } + else { + // Fallback to full read (less memory-efficient). $contents = $filesystem->read($path); - if ($contents !== FALSE) { - $tmp_fly = tempnam(sys_get_temp_dir(), 'zip_'); - file_put_contents($tmp_fly, $contents); - $zip->addFile($tmp_fly, $filename_in_zip); + if ($contents !== false) { + $zip->addFile($name_in_zip, $contents); } } } - catch (\Exception $e) { - $this->logger('islandora_collection_harvest')->error( - 'Error reading @uri from Flysystem: @msg', - ['@uri' => $uri, '@msg' => $e->getMessage()] - ); - } + } + catch (\Exception $e) { + $this->logger('islandora_collection_harvest')->error( + 'Flysystem read failed for @uri: @msg', + ['@uri' => $uri, '@msg' => $e->getMessage()] + ); } } - $zip->close(); - readfile($tmpfile); - unlink($tmpfile); - }, 200, [ - 'Content-Type' => 'application/zip', - 'Content-Disposition' => 'attachment; filename="' . $filename . '"', - ]); + // Finalize the ZIP output and flush to browser. + $zip->finish(); + @flush(); + }); + + // Set appropriate download headers. + $response->headers->set('Content-Type', 'application/zip'); + $response->headers->set('Content-Disposition', 'attachment; filename="' . $filename . '"'); + + return $response; } } diff --git a/src/Form/CollectionHarvestForm.php b/src/Form/CollectionHarvestForm.php index 36571f7..32fd545 100644 --- a/src/Form/CollectionHarvestForm.php +++ b/src/Form/CollectionHarvestForm.php @@ -12,24 +12,72 @@ use Drupal\flysystem\FlysystemFactory; use Drupal\islandora\IslandoraUtils; use Drupal\Core\TempStore\PrivateTempStoreFactory; use Symfony\Component\DependencyInjection\ContainerInterface; +use Drupal\node\NodeInterface; +use Drupal\media\MediaInterface; +use Drupal\taxonomy\TermInterface; /** - * Form for selecting Collection and Media Use and preparing ZIP download. + * Provides a form to select a collection and a media use term, + * and prepares a streamed ZIP download of all matching media. + * + * Workflow: + * - Lists Islandora "collection" nodes (field_model = Collection). + * - Lists all taxonomy terms from 'islandora_media_use'. + * - On submit, gathers all child nodes and their associated media + * of the chosen media use type. + * - Stores media IDs in PrivateTempStore and redirects to the + * HarvestDownloadController for streaming download. */ final class CollectionHarvestForm extends FormBase { + /** + * The entity type manager. + * + * @var \Drupal\Core\Entity\EntityTypeManagerInterface + */ protected EntityTypeManagerInterface $entityTypeManager; + /** + * The Islandora utility service. + * + * @var \Drupal\islandora\IslandoraUtils + */ protected IslandoraUtils $utils; + /** + * The Drupal file system service. + * + * @var \Drupal\Core\File\FileSystemInterface + */ protected FileSystemInterface $fileSystem; + /** + * The Flysystem factory service. + * + * @var \Drupal\flysystem\FlysystemFactory + */ protected FlysystemFactory $flysystemFactory; + /** + * The private tempstore factory. + * + * @var \Drupal\Core\TempStore\PrivateTempStoreFactory + */ protected PrivateTempStoreFactory $tempStoreFactory; /** - * {@inheritdoc} + * Constructs a new CollectionHarvestForm object. + * + * @param \Drupal\Core\Entity\EntityTypeManagerInterface $entity_type_manager + * The entity type manager service. + * @param \Drupal\islandora\IslandoraUtils $utils + * The Islandora utility service. + * @param \Drupal\Core\File\FileSystemInterface $file_system + * The file system service. + * @param \Drupal\flysystem\FlysystemFactory $flysystem_factory + * The Flysystem factory service. + * @param \Drupal\Core\TempStore\PrivateTempStoreFactory $temp_store_factory + * The private tempstore factory service. */ public function __construct( EntityTypeManagerInterface $entity_type_manager, @@ -69,14 +117,19 @@ final class CollectionHarvestForm extends FormBase { * {@inheritdoc} */ public function buildForm(array $form, FormStateInterface $form_state): array { + // Retrieve the Islandora "Collection" term. $term = $this->utils->getTermForUri('http://purl.org/dc/dcmitype/Collection'); + $collections = []; - if ($term) { + if ($term instanceof TermInterface) { + // Query all Islandora objects using that model. $query = $this->entityTypeManager->getStorage('node')->getQuery(); $query->condition('type', 'islandora_object'); $query->condition('field_model', $term->id(), 'IN'); $query->accessCheck(FALSE); $nids = $query->execute(); + + /** @var \Drupal\node\NodeInterface[] $nodes */ $nodes = $this->entityTypeManager->getStorage('node') ->loadMultiple($nids); foreach ($nodes as $node) { @@ -84,6 +137,7 @@ final class CollectionHarvestForm extends FormBase { } } + // Load media use taxonomy terms. $vid = 'islandora_media_use'; $terms = $this->entityTypeManager->getStorage('taxonomy_term') ->loadTree($vid); @@ -92,7 +146,7 @@ final class CollectionHarvestForm extends FormBase { $media_use_options[$term->tid] = $term->name; } - $form['#attributes']['id'] = 'collection-harvest-form'; + // Collection selector. $form['collection'] = [ '#type' => 'select', '#title' => $this->t('Collection'), @@ -100,6 +154,7 @@ final class CollectionHarvestForm extends FormBase { '#required' => TRUE, ]; + // Media Use selector. $form['media_use'] = [ '#type' => 'select', '#title' => $this->t('Media Use'), @@ -107,6 +162,7 @@ final class CollectionHarvestForm extends FormBase { '#required' => TRUE, ]; + // Submit button. $form['actions'] = [ '#type' => 'actions', 'submit' => [ @@ -115,6 +171,7 @@ final class CollectionHarvestForm extends FormBase { '#button_type' => 'primary', ], ]; + return $form; } @@ -125,6 +182,7 @@ final class CollectionHarvestForm extends FormBase { $collection_id = $form_state->getValue('collection'); $media_use_tid = $form_state->getValue('media_use'); + /** @var \Drupal\taxonomy\TermInterface|null $term */ $term = $this->entityTypeManager->getStorage('taxonomy_term') ->load($media_use_tid); if (!$term) { @@ -132,7 +190,7 @@ final class CollectionHarvestForm extends FormBase { return; } - // Load nodes in collection + // Find all Islandora objects that are members of the selected collection. $query = $this->entityTypeManager->getStorage('node')->getQuery(); $query->condition('type', 'islandora_object'); $query->condition('field_member_of', $collection_id); @@ -141,9 +199,15 @@ final class CollectionHarvestForm extends FormBase { $media_entities = []; foreach ($nids as $nid) { + /** @var \Drupal\node\NodeInterface $node */ $node = $this->entityTypeManager->getStorage('node')->load($nid); + if (!$node instanceof NodeInterface) { + continue; + } + + /** @var \Drupal\media\MediaInterface|null $media */ $media = $this->utils->getMediaWithTerm($node, $term); - if ($media) { + if ($media instanceof MediaInterface) { $source_field = $media->getSource() ->getConfiguration()['source_field'] ?? NULL; if ($source_field && $media->hasField($source_field) && !$media->get($source_field) @@ -159,14 +223,14 @@ final class CollectionHarvestForm extends FormBase { return; } - // Store in tempstore + // Store only IDs for serialization safety. $temp_store = $this->tempStoreFactory->get('islandora_collection_harvest'); $zip_filename = 'collection_' . time() . '.zip'; - $temp_store->set($zip_filename, $media_entities); - $this->messenger() - ->addStatus($this->t('Your ZIP download has been initiated. Please wait for the download to start.')); + $temp_store->set($zip_filename, array_map(fn($m) => $m->id(), $media_entities)); - // Redirect to download route + // Notify the user and redirect to download. + $this->messenger() + ->addStatus($this->t('Your ZIP download has been initiated. Please wait for it to start.')); $form_state->setRedirect('islandora_collection_harvest.download_zip', ['filename' => $zip_filename]); }