<?php

// $Id$

define('FOXML_10', 'info:fedora/fedora-system:FOXML-1.0');
define('FOXML_11', 'info:fedora/fedora-system:FOXML-1.1');
define('METS_10', 'info:fedora/fedora-system:METSFedoraExt-1.0');
define('METS_11', 'info:fedora/fedora-system:METSFedoraExt-1.1');
define('ATOM_11', 'info:fedora/fedora-system:ATOM-1.1');
define('ATOMZip_11', 'info:fedora/fedora-system:ATOMZip-1.1');

/**
 * Function to to export all objects assocoiated with a given pid to the export area
 */
function export_to_export_area($pid, $foxml_dir, $ob_dir, &$log = array()) {
  if (!$paths = export_objects_for_pid($pid, $ob_dir, $log)) {
    return FALSE;
  }

  if (!export_foxml_for_pid($pid, $foxml_dir, $paths, $log)) {
    return FALSE;
  }

  return TRUE;
}

function export_objects_for_pid($pid, $dir, &$log) {
  module_load_include('inc', 'fedora_repository', 'api/fedora_item');
  $item = new Fedora_Item($pid);
  if (!$object = $item->get_datastreams_list_as_SimpleXML($pid)) {
    $log[] = log_line(t("Failed to get datastream %dsid for pid %pid", array('%dsid' => $ds->ID, '%pid' => $pid)), 'error');
    return FALSE;
  }      

  // Datastreams added as a result of the ingest process
  $ignore_dsids = array('QUERY');

  $paths = array();
  foreach ($object->datastreamDef as $ds) {
    if (!in_array($ds->ID, $ignore_dsids)) {
      $file = $dir .'/'. $ds->label .'.'. get_file_extension($ds->MIMEType);
      $paths[$ds->ID] = $file;

      //$content = $ob_helper->getDatastreamDissemination($pid, $ds->ID);
      if ($content = $ob_helper->getStream($pid, $ds->ID, FALSE)) {
        if (!$fp = @fopen($file, 'w')) {
          $log[] = log_line(t("Failed to open file %file to write datastream %dsid for pid %pid", array('%file' => $file, '%dsid' => $ds->ID, '%pid' => $pid)), 'error');
          return FALSE;
        }
        fwrite($fp, $content);
        fclose($fp);
      } 
      else {
        $log[] = log_line(t("Failed to get datastream %dsid for pid %pid", array('%dsid' => $ds->ID, '%pid' => $pid)), 'error');
      }
    }
  }
  return $paths;
}

function export_foxml_for_pid($pid, $dir, $paths, &$log, $format = FOXML_11, $remove_islandora = FALSE) {
  module_load_include('inc', 'fedora_repository', 'ObjectHelper');
  $ob_helper = new ObjectHelper();
  if (!$object_xml = $ob_helper->getObject($pid, 'migrate', $format)) {
    $log[] = log_line(t("Failed to get foxml for %pid", array('%pid' => $pid)), 'error');
    return FALSE;
  }
  
  $foxml = new DOMDocument();
  $foxml->loadXML($object_xml);

  $xpath = new DOMXpath($foxml);

  // Remove rdf elements added during ingest (if present)
  if ($remove_islandora) {
    $xpath->registerNamespace('rdf', 'http://www.w3.org/1999/02/22-rdf-syntax-ns#');
    $descNode = $xpath->query("//rdf:RDF/rdf:Description")->item(0);
  
    if ($model = $descNode->getElementsByTagName('hasModel')->item(0)) {
      $descNode->removeChild($model);
    }
  
    if ($member = $descNode->getElementsByTagName('rel:isMemberOfCollection')->item(0)) {
      $descNode->removeChild($member);
    }
  }

  if ($remove_islandora) {
    // Update object paths in the foxml for this pid
    switch ($format) {
      case FOXML_10:
      case FOXML_11:
  
        $disallowed_groups = array('E', 'R');
  
        // Update datastream uris
        $xpath->registerNamespace('foxml', 'info:fedora/fedora-system:def/foxml#');
        foreach ($xpath->query("//foxml:datastream[@ID]") as $dsNode) {
  
          // Don't update datastreams having external uris
          if (in_array($dsNode->getAttribute('CONTROL_GROUP'), $disallowed_groups)) {
            continue;
          }

          $dsId = $dsNode->getAttribute('ID');
  
          // Remove QUERY datastream
          if ($dsId == "QUERY") {
            $parentNode = $xpath->query('/foxml:digitalObject')->item(0);
            $parentNode->removeChild($dsNode);
          }
  
          foreach ($dsNode->getElementsByTagName('*') as $contentNode) {
            if ($str = $contentNode->getAttribute('REF')) {
              $contentNode->setAttribute('REF', url($paths[$dsId], array('absolute' => TRUE)));
            }
          }
        }
        break;
      
      case METS_10:
      case METS_11:
        // Update datastream uris
        $xpath->registerNamespace('METS', 'http://www.loc.gov/METS/');
        foreach ($xpath->query('//METS:fileGrp[@ID="DATASTREAMS"]/METS:fileGrp') as $dsNode) {
  
          $dsId = $dsNode->getAttribute('ID');
  
          // Remove QUERY datastream
          if ($dsId == "QUERY") {
            $parentNode = $xpath->query('//METS:fileGrp[@ID="DATASTREAMS"]')->item(0);
            $parentNode->removeChild($dsNode);
          }
  
          $xpath->registerNamespace('xlink', 'http://www.loc.gov/METS/');
          foreach ($xpath->query('METS:file[@OWNERID!="E"][@OWNERID!="R"]/METS:FLocat[@xlink:href]', $dsNode) as $Floc) {
            $Floc->setAttribute('xlink:href', url($paths[$dsId], array('absolute' => TRUE)));
          }
/*  
          foreach ($dsNode->getElementsByTagName('METS:file') as $contentNode) {
            // Don't update datastreams having external uris
            if (in_array($dsNode->getAttribute('OWNERID'), $disallowed_groups)) {
              continue;
            }

            foreach ($xpath->('METS:FLocat[@xlink:href]', $contentNode) as $Floc) {
              $Floc->setAttribute('xlink:href', url($paths[$dsId], array('absolute' => true)));
            }
          `}
*/  
        }
  
        break;
  
      default:
        $log[] = log_line(t("Unknown or invalid format: ". $format), 'error');
        return FALSE;
    }
  } //if $remove_islandora

  $file = $dir .'/'. $pid .'.xml';
  if (!$foxml->save($file)) {
    $log[] = log_line(t("Failed to write datastream %dsid for pid %pid to %file", array('%dsid' => $ds->ID, '%pid' => $pid, '%file' => $file)), 'error');
    return FALSE;
  } 
  else {
    $log[] = log_line(t("Exported %pid to %file", array('%pid' => $pid, '%file' => $file)), 'info');
  }

  return TRUE;
}

function get_file_extension($mimeType) {
  return substr(strstr($mimeType, '/'), 1);
}

function log_line($msg, $severity = 'info', $sep = "\t") {
  return date("Y-m-d H:i:s") . $sep . ucfirst($severity) . $sep . $msg;
}