# -*- coding: utf-8 -*-
# Copyright (C) 2011  Michał Masłowski  <mtjm@mtjm.eu>
#
# This program is free software: you can redistribute it and/or modify
# it under the terms of the GNU Affero General Public License as
# published by the Free Software Foundation, either version 3 of the
# License, or (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
# GNU Affero General Public License for more details.
#
# You should have received a copy of the GNU Affero General Public License
# along with this program.  If not, see <http://www.gnu.org/licenses/>.


"""
A plugin to find all media linked to by chosen page.
"""


from urlparse import urljoin

from getmediumurl.plugin import Plugin
from getmediumurl.xmlhtml import read_html, read_xml


__all__ = ("AllLinks",)


class AllLinks(Plugin):

    """A plugin using other plugins on linked pages."""

    def __init__(self, mediumid, matcher, plugins=None):
        """Initialize.

        The *plugins* argument is a sequence of plugin instances with
        media linked to by the page.  If `None` is used, then it will
        be determined using the page URL stored in *mediumid*.
        """
        super(AllLinks, self).__init__(mediumid, matcher)
        if plugins is None:
            plugins = self._match_plugins(mediumid, matcher)
        self.plugins = plugins

    def __iter__(self):
        """Iterate media."""
        for plugin in self.plugins:
            for medium in plugin:
                yield medium

    @classmethod
    def _match_plugins(cls, mediumid, matcher):
        """Return a list of plugins matched on links from this URL."""
        try:
            reader = matcher.urlreader(mediumid)
        except ValueError:
            return ()
        if reader.content_type.find("html") != -1:
            page = read_html(reader.content)
        elif reader.content_type.find("xml") != -1:
            page = read_xml(reader.content)
        else:
            return ()
        plugins = []
        for element in page.getiterator("*"):
            url = urljoin(mediumid, element.get("href", None)
                          or element.get("src", None))
            if url is None:
                continue
            plugin = matcher.match(url, fast=True)
            if plugin is not None:
                plugins.append(plugin)
        # Don't do to much requests when using the plugins.
        if len(plugins) < 4:
            return plugins
        else:
            return ()

    @classmethod
    def match(cls, url, matcher):
        """Match every page with links to matching pages."""
        plugins = cls._match_plugins(url, matcher)
        if plugins:
            return cls(url, matcher, plugins)
        else:
            return None
