# -*- coding: utf-8 -*-
# Moovida - Home multimedia server
# Copyright (C) 2006-2009 Fluendo Embedded S.L. (www.fluendo.com).
# All rights reserved.
#
# This file is available under one of two license agreements.
#
# This file is licensed under the GPL version 3.
# See "LICENSE.GPL" in the root of this distribution including a special
# exception to use Moovida with Fluendo's plugins.
#
# The GPL part of Moovida is also available under a commercial licensing
# agreement from Fluendo.
# See "LICENSE.Moovida" in the root directory of this distribution package
# for details on that license.
#
# Authors: Alessandro Decina <alessandro@fluendo.com>
#          Julien Moutte <julien@fluendo.com>

import gobject
gobject.threads_init()
import gst
import os
import time
import platform

# for thumbnails
from PIL import Image, ImageStat
try:
    from hashlib import md5
except ImportError:
    from md5 import md5

from twisted.internet import defer, reactor

from elisa.core.log import Loggable
from elisa.core.utils import locale_helper
from elisa.core.components.metadata_provider import MetadataError

supported_metadata_keys = set(['artist', 'album', 'song', 'track', 
        'WM/AlbumTitle', 'WM/AlbumArtist', 'WM/TrackNumber', 'WM/Year',
        'thumbnail','date', 'date-time-modified'])
media_type_keys = set(['uri', 'file_type', 'mime_type'])
thumbnail_keys = set(['uri', 'thumbnail', 'file_type', 'mime_type'])
supported_keys = supported_metadata_keys.union(media_type_keys)

SEEK_SCHEDULED = 'scheduled'
SEEK_DONE = 'done'

# expanduser() is broken on windows if we give it a unicode object, so we need
# to do this
_user_path = \
    os.path.expanduser("~").decode(locale_helper.filesystem_encoding())
THUMBNAIL_DIR = os.path.join(_user_path,
                             u".thumbnails",
                             u"large")
THUMBNAIL_SIZE = 320

class ThumbnailError(MetadataError):
    pass

class TimeoutError(MetadataError):
    pass

class GstMetadataError(MetadataError):
    pass

# These thumbnail function should be in a common utils file
def get_thumbnail_location(uri):
    thumbnail_filename = u"%s.png" % md5(uri).hexdigest()
    location = os.path.join(THUMBNAIL_DIR, thumbnail_filename)
    return location

def check_thumbnail_directory():
    if not os.path.exists(THUMBNAIL_DIR):
        try:
            os.makedirs(THUMBNAIL_DIR, 0700)
            return True
        except OSError, e:
            return False

def have_thumbnail(uri):
    location = get_thumbnail_location(uri)
    if os.path.exists(location):
        stat = os.stat(location)
        if stat.st_size != 0:
            return True
    return False

def tweak_gstreamer_elements():
    # load the plugins so we can override the ranks... yuck!
    for plugin_name in ('typefindfunctions', 'png', 'jpeg', 'ffmpeg', 'soup'):
        gst.plugin_load_by_name(plugin_name)

    registry = gst.registry_get_default()
    typefinders = gst.type_find_factory_get_list()
    for typefinder in typefinders:
        name = typefinder.get_name()

        if name in ('image/jpeg', 'image/png'):
            # override the rank of the jpeg and png typefinders in order to speed up
            # typefinding when browsing Pictures
            typefinder.set_rank(gst.RANK_PRIMARY + 1)
        elif name in ('fftype_wsaud',):
            # we don't really want to typefind .py files as audio files...
            registry.remove_feature(typefinder)
        elif platform.system() == 'Windows' and \
                name in  ('application/x-ape', 'application/x-apetag'):
            registry.remove_feature(typefinder)
    
    # raise the rank of souphttpsrc as it's better than gnomevfs for http
    soupfactory = gst.element_factory_find('souphttpsrc')
    if soupfactory:
        soupfactory.set_rank(gst.RANK_PRIMARY + 1)

    # raise the rank of metadatademux to retrieve exif tags
    metadata_factory = gst.element_factory_find('metadatademux')
    if metadata_factory:
        metadata_factory.set_rank(gst.RANK_PRIMARY + 1)

class GstMetadataPipeline(Loggable):
    reuse_elements = False
    timeout = 2
    thumb_timeout = 3

    def __init__(self):
        super(GstMetadataPipeline, self).__init__()

        self._pipeline = None
        self._src = None
        self._ffmpegcolorspace = None
        self._plugged_elements = []
        self._frame_locations = [1.0 / 3.0, 2.0 / 3.0, 0.1, 0.9, 0.5]
        self._probe_pad = None
        self._buffer_probe_id = None
        self._event_probe_id = None

        # other instance variables that need to be reset for each new metadata
        # request are set directly in _reset()

    def clean(self):
        self._clean_pipeline(finalize=True)

        if self._timeout_call is not None:
            self._timeout_call.cancel()
            self._timeout_call = None

        if self._seek_call is not None:
            self._seek_call.cancel()
            self._seek_call = None

    def initialize(self):
        tweak_gstreamer_elements()
        self._reset()

    def _clean_pipeline(self, finalize=False):
        # disconnect probes if any
        if self._probe_pad is not None:
            if self._buffer_probe_id is not None:
                self._probe_pad.remove_buffer_probe(self._buffer_probe_id)
            if self._event_probe_id is not None:
                self._probe_pad.remove_event_probe(self._event_probe_id)
         
            self._probe_pad = None
            self._buffer_probe_id = None
            self._event_probe_id = None

        # reset the pipeline to READY
        if self._pipeline is not None:
            self._bus.set_flushing(True)
            self._pipeline.set_state(gst.STATE_READY)
            self._pipeline.get_state()

        if self._src is not None:
            self._pipeline.remove(self._src)
            self._src.set_state(gst.STATE_NULL)
            self._src = None

        if not self.reuse_elements or finalize:
            # destroy the pipeline
            if self._pipeline is not None:
                self._bus.set_flushing(True)
                self._pipeline.set_state(gst.STATE_NULL)
                self._pipeline = None
                self._ffmpegcolorspace = None
                self._plugged_elements = []
        else:
            # remove dynamically plugged elements
            for element in self._plugged_elements:
                self._pipeline.remove(element)
                element.set_state(gst.STATE_NULL)
            self._plugged_elements = []

    def _build_pipeline(self):
        self._pipeline = gst.Pipeline()
        self._bus = self._pipeline.get_bus()
        self._bus.add_signal_watch()
        
        # connect bus signals
        self._bus.connect('message::application',
                self._bus_message_application_cb)
        self._bus.connect('message::error', self._bus_message_error_cb)
        self._bus.connect('message::eos', self._bus_message_eos_cb)
        self._bus.connect('message::tag', self._bus_message_tag_cb)
        self._bus.connect('message::state-changed',
                self._bus_message_state_changed_cb)
        
        self._src = None
        
        self._typefind = gst.element_factory_make('typefind')
        self._typefind.connect('have-type', self._typefind_have_type_cb)
        self._pipeline.add(self._typefind)
        
        decodebin = gst.element_factory_make('decodebin2')
        decodebin.connect('autoplug-continue',
              self._decodebin_autoplug_continue_cb)
        decodebin.connect('autoplug-select',
              self._decodebin_autoplug_select_cb)
        decodebin.connect('new-decoded-pad',
              self._decodebin_new_decoded_pad_cb)
        decodebin.connect('unknown-type',
              self._decodebin_unknown_type_cb)
        
        self._pipeline.add(decodebin)
        
        self._typefind.link(decodebin)
        
        self._pipeline.set_state(gst.STATE_READY)

    def _reset(self):
        # NOTE: we call gst_element_set_state so we MUST NOT be called from the
        # streaming thread

        # destroy the current pipeline if reuse_elements == False, otherwise
        # clean it so that it can be reused
        self.debug('cleaning up current session')
        self._clean_pipeline()

        # the metadata dictionary of the current request
        self._req_metadata = None
        # the uri value in the metadata dictionary
        self._req_uri = None
        # the deferred that we callback when we finish loading stuff in
        # self._req_metadata
        self._req_defer = None

        # the caps as given by the typefind::have-type signal
        self._typefind_caps = None
        self._typefind_file_type = None
        self._typefind_mime_type = None

        # the video/audio/image caps that we get from decodebin pads when
        # we plug decodebin
        self._video_caps = None
        self._audio_caps = None
        self._image_caps = None

        # the taglist containing all the tags for the stream
        self._tags = gst.TagList()

        # the duration of the current stream, used to seek when doing a
        # thumbnail 
        self._duration = None
        self._seek_status = None
        self._seek_location_index = 0
        self._seek_call = None

        self._timeout_call = None

        # timestamps used for logging purposes
        self._start_timestamp = 0
        self._end_timestamp = 0

        # prepare for next run
        if self._pipeline is None:
            # we're either being called from initialize() or
            # self.reuse_elements == False
            self._build_pipeline()

    def _bus_message_error_cb(self, bus, message):
        gerror, debug = message.parse_error() 
        if self._typefind_file_type is not None or \
                self._video_caps is not None or \
                self._audio_caps is not None or \
                self._image_caps is not None:
            # we got an error going to PAUSED but we still can report the info
            # that we got from have_type_cb
            self.debug('error going to paused %s: %s', gerror.message, debug)
            self._done()
        else:
            self._failed(GstMetadataError('error'
                    ' domain: %r code: %r message: %s debug: %s' % 
                    (gerror.domain, gerror.code, gerror.message, debug)))

    def _bus_message_application_cb(self, bus, message):
        if message.structure.get_name() == 'metadata-done':
            self._done()
            return

    def _bus_message_eos_cb(self, bus, message):
        # FIXME: fail in case of no interesting caps found ?
        self.log('got EOS')
        self._done()

    def _bus_message_tag_cb(self, bus, message):
        taglist = message.parse_tag()
        # store these tags
        self._tags = self._tags.merge(taglist, gst.TAG_MERGE_APPEND)
   
    def _bus_message_state_changed_cb(self, bus, message):
        if message.src is not self._pipeline:
            return

        prev, current, pending = message.parse_state_changed()
        if prev == gst.STATE_READY and current == gst.STATE_PAUSED:
            self.debug('reached PAUSED')
            self._done()

    def _typefind_have_type_cb(self, typefind, probability, caps):
        self.debug('have type %s' % caps)

        # self._typefind_caps = caps is broken, bug in the bindings
        # FIXME: fix the bug and change this asap
        self._typefind_caps = caps.copy()
        (file_type, gst_mime_type) = self._get_media_type_from_caps(caps)
        self._typefind_mime_type = gst_mime_type
        self._typefind_file_type = file_type

        # NB: id3 tags most of the time are used with mp3 (even if it isn't
        # uncommon to find them with AIFF or WAV). Given that mp3 is by far the
        # most used audio format at the moment we make the common case fast here
        # by assuming that the file_type is audio. By doing this we also set the
        # mime_type to application/x-id3, but this doesn't matter at the moment
        # since we don't use the mime_type anywhere.
        if gst_mime_type == 'application/x-id3':
            file_type = self._typefind_file_type = 'audio'
        elif gst_mime_type == 'audio/x-m4a':
            # FIXME: see http://bugzilla.gnome.org/show_bug.cgi?id=340375 and
            # use this hack until we write our typefinder for this
            file_type = None
        elif gst_mime_type == 'video/x-ms-asf':
            # this typefinder matches quite a few wma files. Disable for
            # now so that the pipeline goes to PAUSED and the correct file type
            # is detected
            file_type = self._typefind_file_type = None

        req_keys = set(self._req_metadata.keys())
        # FIXME: add proper grouping of conditions
        if req_keys == media_type_keys and file_type in \
                    ('video', 'audio', 'image') or \
                (file_type == 'video' and \
                    (not 'thumbnail' in req_keys or  \
                    have_thumbnail(self._req_uri))) or \
                (file_type == 'image' and \
                    req_keys.intersection(supported_keys) in 
                            (thumbnail_keys,
                            media_type_keys.union(thumbnail_keys))
                            and  have_thumbnail(self._req_uri)):

            self.debug('got media_type for %s, NOT going to paused',
                    self._req_uri)
            # we are in the streaming thread so we post a message on the bus
            # here and when we read it from the main thread we call _done()
            structure = gst.Structure('metadata-done')
            self._bus.post(gst.message_new_application(self._pipeline, structure))
            return

    def _seek_next_thumbnail_location(self):
        self._seek_status = SEEK_SCHEDULED

        self._seek_call = \
            reactor.callLater(0, self._seek_next_thumbnail_location_real)

    def _seek_next_thumbnail_location_real(self):
        self._seek_call = None

        if self._duration is None:
            # first seek, get the duration
            try:
                self._duration, format = self._pipeline.query_duration(gst.FORMAT_TIME)
            except gst.QueryError, e:
                self.debug('duration query failed: %s', e)
                
                return

            if self._duration == -1:
                self.debug('invalid duration, not seeking')
                return self._done()
           
            self.debug('stream duration %s' % self._duration)
        
        if self._seek_location_index == len(self._frame_locations):
            self.debug('no more seek locations')
            return self._done()

        location = self._frame_locations[self._seek_location_index]
        self.debug('seek to location %d, time %s duration %s' %
                (self._seek_location_index,
                gst.TIME_ARGS(int(location * self._duration)),
                gst.TIME_ARGS(self._duration)))
        self._seek_location_index += 1
        
        seek_event = gst.event_new_seek(1.0, gst.FORMAT_TIME,
                gst.SEEK_FLAG_FLUSH | gst.SEEK_FLAG_KEY_UNIT,
                gst.SEEK_TYPE_SET, int(location * self._duration),
                gst.SEEK_TYPE_NONE, 0)
        # now send our seek event on the probe pad
        res = self._probe_pad.send_event(seek_event)
        seek_event = None
        
        self.debug('seek done res %s' % res)

    def _close_pad(self, pad):
        queue = gst.element_factory_make('queue')
        # set the queue leaky so that if we take some time to do the thumbnail
        # the demuxer doesnt' block on full queues
        queue.props.leaky = 1
        sink = gst.element_factory_make('fakesink')
        self._pipeline.add(queue, sink)
        # add sink before queue so when we iterate over the elements to clean
        # them we clean the sink first and unblock the queue if it's blocked
        # prerolling
        self._plugged_elements.append(sink)
        self._plugged_elements.append(queue)
        pad.link(queue.get_pad('sink'))
        queue.link(sink)
        queue.set_state(gst.STATE_PAUSED)
        sink.set_state(gst.STATE_PAUSED)

    def _video_frame_rgb_thumbnail_probe_cb(self, pad, buf, data):
        caps = buf.get_caps()
        width = caps[0]['width']
        height = caps[0]['height']
        stride = buf.size / height

        self.debug('got RGB thumbnail frame with caps %s pts %d '
                'size %d stride %d' % (caps, buf.timestamp, buf.size, stride))

        # build PIL image from that buffer stride is round_up_2
        try:
            img = Image.frombuffer("RGB", (width, height),
                    buf, "raw", "RGB", stride, 1)
        except Exception, e:
            self.debug("Invalid frame: %s" % e)
            return False

        # for pictures we don't care about thumbnail being boring
        if self._typefind_file_type == 'image':
            boring = False
            i = 0
        else:
            # generate statitics from this image and check if it's entertaining. 
            stat = ImageStat.Stat(img)
            boring = True
            for i in stat.var:
                if i > 1000:
                    boring = False
                    break

        img.thumbnail((THUMBNAIL_SIZE, 240), Image.NEAREST)
        img.save(get_thumbnail_location(self._req_uri))

        # force cleanup
        stat = None
        img = None

        if boring:
            # we saved this image to start with and we want to try again
            self.debug('image is boring, scheduling seek to next location')
            self._seek_next_thumbnail_location()
        else:
            # call _done from main thread
            self.debug('image is not boring, we are done (var %i)' % i)
            # if we are only looking for a thumbnail we can terminate here
            keys = set(self._req_metadata.keys())
            if keys == thumbnail_keys:
                structure = gst.Structure('metadata-done')
                self._bus.post(gst.message_new_application(self._pipeline, structure))

        # we don't care about that buffer
        return False

    def _video_frame_thumbnail_event_probe_cb(self, pad, event, data):
        if self._seek_status == SEEK_SCHEDULED and event.type == gst.EVENT_NEWSEGMENT:
            # received newsegment after seek resuming normal operations
            self.debug('received newsegment after seek, resuming normal operations')
            self._seek_status = SEEK_DONE

        return True

    def _video_frame_thumbnail_probe_cb(self, pad, buf, data):
        if self._seek_status == SEEK_SCHEDULED:
            self.debug('a seek is scheduled flushing buffer')
            # make the CPU idle on that thread to leave some space to the seek
            time.sleep(0.01)
            # drop the buffer as we will seek
            return False

        caps = buf.get_caps()

        self.debug('received buffer in thumbnail probe with caps %s pts %d '
                'size %d' % (caps, buf.timestamp, buf.size))

        csp = gst.element_factory_make('ffmpegcolorspace')
        cf = gst.element_factory_make('capsfilter')
        sink = gst.element_factory_make('fakesink')

        outcaps = gst.caps_from_string('video/x-raw-rgb, width=%d, height=%d, '
                'bpp=24, depth=24, red_mask=16711680, green_mask=65280, '
                'blue_mask=255, endianness=4321' %
                (caps[0]['width'], caps[0]['height']))
        self.debug('setting output caps on rgb pad to %s' % outcaps)
        cf.props.caps = outcaps

        sinkpad = csp.get_pad('sink')
        srcpad = csp.get_pad('src')

        cf.link(sink)
        csp.link(cf)

        sink.set_state(gst.STATE_PAUSED)
        cf.set_state(gst.STATE_PAUSED)
        csp.set_state(gst.STATE_PAUSED)

        # monitor src pad for a buffer
        id = srcpad.add_buffer_probe(self._video_frame_rgb_thumbnail_probe_cb, None)
        # push our incoming frame in there
        self.debug('push buffer for RGB conversion')
        sinkpad.chain(buf)

        srcpad.remove_buffer_probe(id)

        srcpad = None
        sinkpad = None

        # clean colorspace conversion element
        csp.set_state(gst.STATE_NULL)
        cf.set_state(gst.STATE_NULL)
        sink.set_state(gst.STATE_NULL)
        csp = None
        cf = None
        sink = None

        if self._seek_status == SEEK_SCHEDULED:
            self.debug('a seek is now scheduled dropping this buffer')
            # this frame was not interesting drop this buffer
            return False
        else:
            self.debug('this buffer was interesting trying to commit PAUSED')
            # we authorize that frame to complete PAUSED state
            return True

    def _decodebin_autoplug_continue_cb(self, decodebin, pad, caps):
        '''This function is called for each new pad detected while autoplugging.
        It's a good opportunity to optimize performance by considering some
        compressed media types as final caps and to gather caps information 
        about the media clip'''

        ret = True
        name = caps[0].get_name()
        req_keys = set(self._req_metadata.keys())
        
        if 'thumbnail' in req_keys:
            thumbnail = True
        else:
            thumbnail = False

        # when not thumbnailing and trying to reach PAUSED we can skip plugging
        # an expensive video decoder
        if not thumbnail and name in \
            ('video/x-h264', 'video/x-wmv', 'video/x-vp6', 'video/x-divx', \
             'video/x-xvid', 'video/x-h263', 'video/x-vp6-flash', \
             'video/x-msmpeg', 'video/x-flash-video'):
            self.debug('do not plug a video decoder for name %s' % name)
            if self._video_caps is None:
                self._video_caps = caps.copy()
            ret = False
        # we have a list of audio caps that won't ever contain tags
        if name in ('audio/x-wma', 'audio/x-ac3', 'audio/x-dts'):
            self.debug('do not plug an audio decoder for name %s' % name)
            if self._audio_caps is None:
                self._audio_caps = caps.copy()
            ret = False

        if name.startswith('video/x-raw'):
            ret = False
            if self._video_caps is None:
                if isinstance(pad, gst.GhostPad):
                    real_pad = pad.get_target()
                else:
                    real_pad = pad
                parent = real_pad.get_parent_element()
                sink_pad = parent.get_pad('sink')
                self._video_caps = sink_pad.get_caps()
                self.debug('storing video caps %s' % self._video_caps)
            # we want to plug a video decoder for thumbnailing
            if 'thumbnail' in req_keys and self._probe_pad is None:
                self.debug('adding a buffer probe on video pad with caps '
                        '%s for thumbnailing' % caps)
                self._buffer_probe_id = pad.add_buffer_probe(self._video_frame_thumbnail_probe_cb, None)
                self._event_probe_id = pad.add_event_probe(self._video_frame_thumbnail_event_probe_cb, None)
                self._probe_pad = pad
                    
        elif name.startswith('audio/x-raw'):
            if self._audio_caps is None:
                if isinstance(pad, gst.GhostPad):
                    real_pad = pad.get_target()
                else:
                    real_pad = pad
                parent = real_pad.get_parent_element()
                sink_pad = parent.get_pad('sink')
                self._audio_caps = sink_pad.get_caps()
                self.debug('storing audio caps %s' % self._audio_caps)
        elif name.startswith('image/'):
            if self._image_caps is None:
                self.debug('storing image caps %s' % caps)
                self._image_caps = caps.copy()
            if not thumbnail:
                ret = False

        return ret

    def _decodebin_autoplug_select_cb(self, decodebin, pad, caps, factory):
        name = caps[0].get_name()
        
        # prohibit hardware decoders for autoplugging
        if name.startswith('video'):
            # we can quickly filter on some factory names (like VA dec)
            if factory.get_name() == 'fluvadec':
                self.debug('detected hardware decoder %s, skipping' \
                        % factory.get_name())
                return 2 # AUTOPLUG_SELECT_SKIP
            for t in factory.get_static_pad_templates():
                if t.direction == gst.PAD_SRC:
                    for c in t.get_caps():
                        if c.get_name () == 'video/x-raw-ismd':
                            self.debug('detected hardware decoder %s, ' \
                                    'skipping' % factory.get_name())
                            return 2 # AUTOPLUG_SELECT_SKIP
        
        return 0 # AUTOPLUG_SELECT_TRY

    def _decodebin_new_decoded_pad_cb(self, decodebin, pad, is_last):
        self.debug('new decoded pad %s, caps %s, is_last %s' % (pad,
                pad.get_caps(), is_last))

        if not pad.is_linked():
            self._close_pad(pad)
    
    def _decodebin_unknown_type_cb(self, decodebin, pad, caps):
        self.debug('unknown pad %s, caps %s' % (pad, caps))

    def _plug_src(self, uri):
        src = gst.element_make_from_uri(gst.URI_SRC, uri)
        # FIXME: workaround for jpegdec that does a gst_buffer_join for each
        # gst_pad_chain.
        src.props.blocksize = 1 * 1024 * 1024

        return src

    def get_metadata(self, requested_metadata):
        assert self._timeout_call is None

        self._req_metadata = requested_metadata
        self._req_uri = requested_metadata['uri']
        self._req_defer = defer.Deferred()

        self.debug('getting metadata %s' % self._req_metadata)

        self._start_timestamp = time.time()

        # if we need to do thumbnails we'd better check for directory first
        req_keys = set(self._req_metadata.keys())
        if 'thumbnail' in req_keys:
            check_thumbnail_directory()

        self._src = self._plug_src(self._req_uri)
        self._pipeline.add(self._src)
        self._src.link(self._typefind)

        self._timeout_call = reactor.callLater(self.timeout, self._timeout)

        # reset the bus in case this is not the first request  
        self._bus.set_flushing(False)
        self._pipeline.set_state(gst.STATE_PLAYING)

        return self._req_defer

    def _get_media_type_from_caps(self, caps):
        res = {}
        mime_type = caps[0].get_name()
        file_type = mime_type.split('/', 1)[0]

        return {'file_type': file_type, 'mime_type': mime_type}

    def _done(self):
        self.debug('_done called, gathering information')
        
        if not self._timeout_call.called and not self._timeout_call.cancelled:
            self._timeout_call.cancel()

        # we can't check self._seek_call.called here because we don't know if we
        # scheduled a seek call at all
        if self._seek_call is not None:
            self._seek_call.cancel()
            self._seek_call = None

        self._end_timestamp = time.time()

        metadata = self._req_metadata
        metadata_defer = self._req_defer

        available_metadata = {}
        for caps in (self._video_caps, self._audio_caps,
                self._image_caps):
            if caps is not None:
                available_metadata.update(self._get_media_type_from_caps(caps))
                break

        # fallback to typefind caps
        if available_metadata.get('file_type') is None:
            available_metadata['file_type'] = self._typefind_file_type
            available_metadata['mime_type'] = self._typefind_mime_type

        if available_metadata['file_type'] in ('video', 'image') and \
            have_thumbnail(self._req_uri):
            thumbnail_location = get_thumbnail_location(self._req_uri)
            # We cannot pass unicode objects through AMP for now, we encode it
            # in pigment-friendly encoding, since this is what will be used to
            # open the file
            available_metadata['thumbnail'] = \
                    thumbnail_location.encode(locale_helper.pgm_file_encoding())

        tags = self._tags

        try:
            del tags['extended-comment']
        except KeyError:
            pass

        tag_keys = tags.keys()
        for gst_key, elisa_key in (('track-number', 'track'),
                    ('title', 'song')):
            try:
                available_metadata[elisa_key] = tags[gst_key]
            except KeyError:
                pass

            
        for key in tag_keys:
            value = tags[key]
            # FIXME: this was an old assumption, let's keep it until we update
            # all the old code
            if isinstance(value, list):
                try:
                    value = value[0]
                except IndexError:
                    continue

            available_metadata[key] = value
       
        if 'date' in available_metadata:
            # we need to rewrite the date to a timestamp
            date = available_metadata['date']
            if date is not None:
                try:
                    time_stamp = time.mktime((date.year, date.month, \
                                              date.day, 0, 0, 0, 0, 0, -1))
                    available_metadata['date'] = time_stamp
                except (OverflowError, ValueError):
                    pass

        for key, value in available_metadata.iteritems():
            try:
                if metadata[key] is None:
                    metadata[key] = value
            except KeyError:
                pass

        self.info('finished getting metadata %s, elapsed time %s' % 
                (metadata, self._end_timestamp - self._start_timestamp))
        
        self._reset()
        metadata_defer.callback(metadata)

    def _timeout(self, thumb_timeout=False):
        self.debug('timeout thumb %s video caps %s',
                thumb_timeout, self._video_caps)

        if not thumb_timeout and (self._typefind_file_type == 'video' or
                self._video_caps is not None):
            # give some more time to the pipeline if we are trying to make a
            # thumbnail
            self._timeout_call = \
                reactor.callLater(self.thumb_timeout, self._timeout, True)
        else:
            keys = set(self._req_metadata.keys())
            if keys != thumbnail_keys and \
                    (self._typefind_file_type is not None or \
                    self._video_caps is not None or \
                    self._audio_caps is not None or \
                    self._image_caps is not None):
                # timeout while going to paused. This can happen on really slow
                # machines while doing the thumbnail. Even if we didn't do the
                # thumbnail, we have some clue about the media type here.
                self._done()
            else:
                self._failed(TimeoutError('timeout'))

    def _failed(self, error):
        # cancel delayed calls
        if not self._timeout_call.called:
            self._timeout_call.cancel()

        if self._seek_call is not None:
            self._seek_call.cancel()
            self._seek_call = None
 
        self._end_timestamp = time.time()

        metadata = self._req_metadata
        metadata_defer = self._req_defer
        self.debug('error getting metadata %s, error: %s, '
                'elapsed time: %s, timeout %s' % (metadata, error,
                self._end_timestamp - self._start_timestamp,
                self._timeout_call.called))

        self._reset()

        metadata_defer.errback(error)
