<?php

/**
 * @file
 * Class for determining MIME types and file extensions.
 *
 * This class inspired by Chris Jean's work, here:
 * http://chrisjean.com/2009/02/14/generating-mime-type-in-php-is-not-magic/
 *
 * It does some MIME trickery, inspired by the need to to deal with Openoffice
 * and MS Office 2007 file formats -- which are often mis-interpreted by
 * mime-magic, fileinfo, and the *nix `file` command.
 *
 * In Drupal 6, we also make use of file_get_mimetype. See:
 * http://api.drupal.org/api/function/file_get_mimetype/6
 * ... however this only provides a uni-directional lookup (ext->mime).
 * While I don't have a specific use case for a mime->extension lookup, I think
 * it's good to have in here.
 *
 * Drupal 7 will have better mime handlers.  See:
 * http://api.drupal.org/api/function/file_default_mimetype_mapping/7
 */

class MimeDetect {

  protected $protectedMimeTypes = array(
    /*
     * This is a shortlist of mimetypes which should catch most
     * mimetype<-->extension lookups in the context of Islandora collections.
     *
     * It has been cut from a much longer list.
     *
     * Two types of mimetypes should be put in this list:
     *  1) Special emerging formats which may not yet be expressed in the system
     *     mime.types file.
     *  2) Heavily used mimetypes of particular importance to the Islandora
     *     project, as lookups against this list will be quicker and less
     *     resource intensive than other methods.
     *
     * Lookups are first checked against this short list.  If no results are
     * found, then the lookup function may move on to check other sources,
     * namely the system's mime.types file.
     *
     * In most cases though, this short list should suffice.
     *
     * If modifying this list, please note that for promiscuous mimetypes
     * (those which map to multiple extensions, such as text/plain)
     * The function get_extension will always return the *LAST* extension in
     * this list, so you should put your preferred extension *LAST*.
     *
     * e.g...
     * "jpeg"    => "image/jpeg",
     * "jpe"     => "image/jpeg",
     * "jpg"     => "image/jpeg",
     *
     * $this->get_extension('image/jpeg') will always return 'jpg'.
     *
     */
    // Openoffice:
    'odb' => 'application/vnd.oasis.opendocument.database',
    'odc' => 'application/vnd.oasis.opendocument.chart',
    'odf' => 'application/vnd.oasis.opendocument.formula',
    'odg' => 'application/vnd.oasis.opendocument.graphics',
    'odi' => 'application/vnd.oasis.opendocument.image',
    'odm' => 'application/vnd.oasis.opendocument.text-master',
    'odp' => 'application/vnd.oasis.opendocument.presentation',
    'ods' => 'application/vnd.oasis.opendocument.spreadsheet',
    'odt' => 'application/vnd.oasis.opendocument.text',
    'otg' => 'application/vnd.oasis.opendocument.graphics-template',
    'oth' => 'application/vnd.oasis.opendocument.text-web',
    'otp' => 'application/vnd.oasis.opendocument.presentation-template',
    'ots' => 'application/vnd.oasis.opendocument.spreadsheet-template',
    'ott' => 'application/vnd.oasis.opendocument.text-template',
    // Staroffice:
    'stc' => 'application/vnd.sun.xml.calc.template',
    'std' => 'application/vnd.sun.xml.draw.template',
    'sti' => 'application/vnd.sun.xml.impress.template',
    'stw' => 'application/vnd.sun.xml.writer.template',
    'sxc' => 'application/vnd.sun.xml.calc',
    'sxd' => 'application/vnd.sun.xml.draw',
    'sxg' => 'application/vnd.sun.xml.writer.global',
    'sxi' => 'application/vnd.sun.xml.impress',
    'sxm' => 'application/vnd.sun.xml.math',
    'sxw' => 'application/vnd.sun.xml.writer',
    // K-office:
    'kil' => 'application/x-killustrator',
    'kpt' => 'application/x-kpresenter',
    'kpr' => 'application/x-kpresenter',
    'ksp' => 'application/x-kspread',
    'kwt' => 'application/x-kword',
    'kwd' => 'application/x-kword',
    // Ms office 97:
    'doc' => 'application/msword',
    'xls' => 'application/vnd.ms-excel',
    'ppt' => 'application/vnd.ms-powerpoint',
    // Office2007:
    'docx' => 'application/vnd.openxmlformats-officedocument.wordprocessingml.document',
    'docm' => 'application/vnd.ms-word.document.macroEnabled.12',
    'dotx' => 'application/vnd.openxmlformats-officedocument.wordprocessingml.template',
    'dotm' => 'application/vnd.ms-word.template.macroEnabled.12',
    'xlsx' => 'application/vnd.openxmlformats-officedocument.spreadsheetml.sheet',
    'xlsm' => 'application/vnd.ms-excel.sheet.macroEnabled.12',
    'xltx' => 'application/vnd.openxmlformats-officedocument.spreadsheetml.template',
    'xltm' => 'application/vnd.ms-excel.template.macroEnabled.12',
    'xlsb' => 'application/vnd.ms-excel.sheet.binary.macroEnabled.12',
    'xlam' => 'application/vnd.ms-excel.addin.macroEnabled.12',
    'pptx' => 'application/vnd.openxmlformats-officedocument.presentationml.presentation',
    'pptm' => 'application/vnd.ms-powerpoint.presentation.macroEnabled.12',
    'ppsx' => 'application/vnd.openxmlformats-officedocument.presentationml.slideshow',
    'ppsm' => 'application/vnd.ms-powerpoint.slideshow.macroEnabled.12',
    'potx' => 'application/vnd.openxmlformats-officedocument.presentationml.template',
    'potm' => 'application/vnd.ms-powerpoint.template.macroEnabled.12',
    'ppam' => 'application/vnd.ms-powerpoint.addin.macroEnabled.12',
    'sldx' => 'application/vnd.openxmlformats-officedocument.presentationml.slide',
    'sldm' => 'application/vnd.ms-powerpoint.slide.macroEnabled.12',
    // Wordperfect (who cares?):
    'wpd' => 'application/wordperfect',
    // Common and generic containers:
    'pdf' => 'application/pdf',
    'eps' => 'application/postscript',
    'ps' => 'application/postscript',
    'rtf' => 'text/rtf',
    'rtx' => 'text/richtext',
    'latex' => 'application/x-latex',
    'tex' => 'application/x-tex',
    'texi' => 'application/x-texinfo',
    'texinfo' => 'application/x-texinfo',
    // *ml:
    'css' => 'text/css',
    'htm' => 'text/html',
    'html' => 'text/html',
    'wbxml' => 'application/vnd.wap.wbxml',
    'xht' => 'application/xhtml+xml',
    'xhtml' => 'application/xhtml+xml',
    'xsl' => 'text/xsl',
    'xslt' => 'text/xsl',
    'xml' => 'text/xml',
    'csv' => 'text/csv',
    'tsv' => 'text/tab-separated-values',
    'txt' => 'text/plain',
    // images:
    "bmp" => "image/bmp",
    'dng' => 'image/x-adobe-dng',
    "gif" => "image/gif",
    "ief" => "image/ief",
    "jpeg" => "image/jpeg",
    "jpe" => "image/jpeg",
    "jpg" => "image/jpeg",
    "jp2" => "image/jp2",
    "png" => "image/png",
    "tiff" => "image/tiff",
    "tif" => "image/tiff",
    "djvu" => "image/vnd.djvu",
    "djv" => "image/vnd.djvu",
    "wbmp" => "image/vnd.wap.wbmp",
    "ras" => "image/x-cmu-raster",
    "pnm" => "image/x-portable-anymap",
    "pbm" => "image/x-portable-bitmap",
    "pgm" => "image/x-portable-graymap",
    "ppm" => "image/x-portable-pixmap",
    "rgb" => "image/x-rgb",
    "xbm" => "image/x-xbitmap",
    "xpm" => "image/x-xpixmap",
    "xwd" => "image/x-windowdump",
    // videos:
    "mkv" => "video/x-matroska",
    "mpeg" => "video/mpeg",
    "mpe" => "video/mpeg",
    "mpg" => "video/mpeg",
    "m4v" => "video/mp4",
    "mp4" => "video/mp4",
    "ogv" => "video/ogg",
    "qt" => "video/quicktime",
    "mov" => "video/quicktime",
    "mxu" => "video/vnd.mpegurl",
    "avi" => "video/x-msvideo",
    "movie" => "video/x-sgi-movie",
    "flv" => "video/x-flv",
    "swf" => "application/x-shockwave-flash",
    // Audio:
    "mp3" => "audio/mpeg",
    "mp4a" => "audio/mp4",
    "m4a" => "audio/mp4",
    "oga" => "audio/ogg",
    "ogg" => "audio/ogg",
    "flac" => "audio/x-flac",
    "wav" => "audio/vnd.wave",
    // Chemical:
    // MDL Molfile.
    "mol" => "chemical/x-mdl-molfile",
    // XYZ format.
    "xyz" => "chemical/x-xyz",
    // PDB.
    "pdb" => "chemical/x-pdb",
    // ChemDraw CDX.
    'cdx' => 'chemical/x-cdx',
    // ChemDraw 3D.
    "c3d" => "chemical/x-chem3d",
    // ChemDraw file.
    "chm" => "chemical/x-chemdraw",
    // Crystallographic Information File.
    "cif" => "chemical/x-cif",
    // Chemical Markup Language.
    "cml" => "chemical/x-cml",
    // GAMESS Input.
    "inp" => "chemical/x-gamess-input",
    // GAMESS Output.
    "gam" => "chemical/x-gamess-output",
    // Gaussian Cube.
    "cub" => "chemical/x-gaussian-cube",
    // Gaussian 98/03 Cartesian Input.
    "gau" => "chemical/x-gaussian-input",
    // JCAMP Spectroscopic Data Exchange Format.
    "jdx" => "chemical/x-jcamp-dx",
    // OpenDX Grid.
    "dx" => "chemical/x-jcamp-dx",
    // MOPAC Cartesian.
    "mop" => "chemical/x-mopac-input",
    // Compressed formats:
    // (note: http://svn.cleancode.org/svn/email/trunk/mime.types)
    "tgz" => "application/x-gzip",
    "gz" => "application/x-gzip",
    "tar" => "application/x-tar",
    "gtar" => "application/x-gtar",
    "zip" => "application/x-zip",
    "rng" => "application/xml",
    // others:
    'bin' => 'application/octet-stream',
    // Web Archives:
    "warc" => "application/warc",
  );
  protected $protectedFileExtensions;
  protected $extensionExceptions = array(
    // XXX: Deprecated... Only here due to old 'tif' => 'image/tif' mapping...
    // The correct MIMEtype is 'image/tiff'.
    'image/tif' => 'tif',
  );
  protected $systemTypes;
  protected $systemExts;
  protected $etcMimeTypes = '/etc/mime.types';

  /**
   * Construtor.
   */
  public function __construct() {

    // Populate the reverse shortlist:
    $this->protectedFileExtensions = array_flip($this->protectedMimeTypes);
    $this->protectedFileExtensions += $this->extensionExceptions;

    // Pick up a local mime.types file if it is available.
    if (is_readable('mime.types')) {
      $this->etcMimeTypes = 'mime.types';
    }
  }

  /**
   * Gets MIME type associated with the give file's extension.
   *
   * @param string $filename
   *   The filename
   * @param bool $debug
   *   Returns a debug array.
   *
   * @return mixed
   *   string or an array
   */
  public function getMimetype($filename, $debug = FALSE) {

    $file_name_and_extension = explode('.', $filename);
    $ext = drupal_strtolower(array_pop($file_name_and_extension));

    if (!empty($this->protectedMimeTypes[$ext])) {
      if (TRUE === $debug) {
        return array('mime_type' => $this->protectedMimeTypes[$ext], 'method' => 'from_array');
      }
      return $this->protectedMimeTypes[$ext];
    }

    if (function_exists('file_get_mimetype')) {
      $drupal_mimetype = file_get_mimetype($filename);
      if ('application/octet-stream' != $drupal_mimetype) {
        if (TRUE == $debug) {
          return array('mime_type' => $drupal_mimetype, 'method' => 'file_get_mimetype');
        }
        return $drupal_mimetype;
      }
    }

    if (!isset($this->systemTypes)) {
      $this->systemTypes = $this->systemExtensionMimetypes();
    }
    if (isset($this->systemTypes[$ext])) {
      if (TRUE == $debug) {
        return array('mime_type' => $this->systemTypes[$ext], 'method' => 'mime.types');
      }
      return $this->systemTypes[$ext];
    }

    if (TRUE === $debug) {
      return array('mime_type' => 'application/octet-stream', 'method' => 'last_resort');
    }
    return 'application/octet-stream';
  }

  /**
   * Gets one valid file extension for a given MIME type.
   *
   * @param string $mime_type
   *   The MIME type.
   * @param bool $debug
   *   Generated debug information?
   *
   * @return string
   *   The file extensions associated with the given MIME type.
   */
  public function getExtension($mime_type, $debug = FALSE) {

    if (!empty($this->protectedFileExtensions[$mime_type])) {
      if (TRUE == $debug) {
        return array('extension' => $this->protectedFileExtensions[$mime_type], 'method' => 'from_array');
      }
      return $this->protectedFileExtensions[$mime_type];
    }

    if (!isset($this->systemExts)) {
      $this->systemExts = $this->systemMimetypeExtensions();
    }
    if (isset($this->systemExts[$mime_type])) {
      if (TRUE == $debug) {
        return array('extension' => $this->systemExts[$mime_type], 'method' => 'mime.types');
      }
      return $this->systemExts[$mime_type];
    }

    if (TRUE == $debug) {
      return array('extension' => 'bin', 'method' => 'last_resort');
    }
    return 'bin';
  }

  /**
   * Gets an associative array of MIME type and extension associations.
   *
   * Users the system mime.types file, or a local mime.types if one is found
   * @see MIMEDetect::__constuctor()
   *
   * @return array
   *   An associative array where the keys are MIME types and the values
   *   extensions.
   */
  protected function systemMimetypeExtensions() {
    $out = array();
    if (file_exists($this->etcMimeTypes)) {
      $file = fopen($this->etcMimeTypes, 'r');
      while (($line = fgets($file)) !== FALSE) {
        $line = trim(preg_replace('/#.*/', '', $line));
        if (!$line) {
          continue;
        }
        $parts = preg_split('/\s+/', $line);
        if (count($parts) == 1) {
          continue;
        }
        // A single part means a mimetype without extensions, which we ignore.
        $type = array_shift($parts);
        if (!isset($out[$type])) {
          $out[$type] = array_shift($parts);
        }
        // We take the first ext from the line if many are present.
      }
      fclose($file);
    }
    return $out;
  }

  /**
   * Gets a associative array of extensions and MIME types.
   *
   * Users the system mime.types file, or a local mime.types if one is found
   * @see MIMEDetect::__constuctor()
   *
   * @return array
   *   An associative array where the keys are extensions and the values
   *   MIME types.
   */
  protected function systemExtensionMimetypes() {
    $out = array();
    if (file_exists($this->etcMimeTypes)) {
      $file = fopen($this->etcMimeTypes, 'r');
      while (($line = fgets($file)) !== FALSE) {
        $line = trim(preg_replace('/#.*/', '', $line));
        if (!$line) {
          continue;
        }
        $parts = preg_split('/\s+/', $line);
        if (count($parts) == 1) {
          continue;
        }
        // A single part means a mimetype without extensions, which we ignore.
        $type = array_shift($parts);
        foreach ($parts as $part) {
          $out[$part] = $type;
        }
      }
      fclose($file);
    }
    return $out;
  }

  /**
   * Gets MIME type array.
   *
   * @return array
   *   Returns associative array with exts and mimetypes.
   */
  public function getMimeTypes() {
    return $this->protectedMimeTypes;
  }

  /**
   * Get all valid extensions for this MIME type.
   *
   * @param string $mimetype
   *   The MIME type we are searching for.
   *
   * @return array
   *   An array of valid extensions for this MIME type.
   */
  public function getValidExtensions($mimetype) {
    $filter = function ($mime) use ($mimetype) {
      return $mime == $mimetype;
    };
    return array_keys(array_filter($this->protectedMimeTypes, $filter));
  }
}