# -*- coding: utf-8 -*-
# Copyright (C) 2010  Michał Masłowski  <mtjm@mtjm.eu>
#
# This program is free software: you can redistribute it and/or modify
# it under the terms of the GNU Affero General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
# GNU Affero General Public License for more details.
#
# You should have received a copy of the GNU Affero General Public License
# along with this program.  If not, see <http://www.gnu.org/licenses/>.


"""
Getting a delimited substring of a string.
"""


import re

from getmediumurl.compat import basestring


__all__ = ("get_substring", "set_get_substring_defaults")


#: Regular expression for typical start of an URL.
_BEGIN_URL = re.compile("^[a-z]+://$")
#: Regular expression for typical end of a file path.
_END_URL = re.compile("^\.[.a-z0-9]+$")


def _fix_str(target, string):
    """Return ASCII `string` as ``bytes`` object if `target` is."""
    try:
        if isinstance(target, bytes) and isinstance(string, str):
            return bytes(string, "ascii")
        else:
            return string
    except (NameError, TypeError):
        # Old Python, doesn't need the conversion.
        return string


# Functions inspired by WatchVideo SVN revision 22 file src/plugin.py
# method Plugin.getUrlInPage.


def get_substring(string, start, end, include_start, include_end):
    """Return substring of `string` with specified `start` and `end`.

    On Python 3, if `string` is a ``bytes`` object and ``start`` or
    ``end`` is ``str``, they are assumed to be ASCII-only strings and
    are converted to ``bytes``.

    :Parameters:
      `string`
        the whole string
      `start`
        beginning of the substring to be returned
      `end`
        end of the substring to be returned, or an iterable of
        such substrings of which the first one found will be used
      `include_start`
        if true, then the returned value with begin with `start`
      `include_end`
        if true, then the returned value with end with `end`

    :Return:
      the substring, or `None` if not found

    """
    start = _fix_str(string, start)
    begin = string.find(start)
    if begin == -1:
        return None
    start_length = len(start)
    begin_end = begin + start_length
    finish = -1
    for an_end in end:
        an_end = _fix_str(string, an_end)
        the_end = an_end
        finish = string.find(an_end, begin_end)
        if finish != -1:
            end_length = len(the_end)
            finish += end_length
            break
    if finish == -1:
        return None
    if not include_start:
        begin += start_length
    if not include_end:
        finish -= end_length
    return string[begin:finish]


def set_get_substring_defaults(start="http://", end=".flv",
                               include_start=None, include_end=None):
    """Return a list of arguments to `get_substring`.

    :Parameters:
      `start`
        beginning of the substring to be returned
      `end`
        end of the substring to be returned, or an iterable of
        such substrings of which the first one found will be used
      `include_start`
        if true, then the returned value with begin with `start`
      `include_end`
        if true, then the returned value with end with `end`

    :Return:
      a sequence of arguments with default values replaced

    If `include_start` or `include_end` is `None` (default), then
    it will be set to true if it looks like start or end of an
    URL, i.e. if they match respectively `_BEGIN_URL` or
    `_END_URL` regular expression.

    If `end` is not a single substring, then all of them bust match
    `_END_URL` to have `include_end` be true by default.
    """
    # Convertion to booleans is unnecessary, but makes tests simpler.
    if include_start is None:
        include_start = bool(_BEGIN_URL.match(start))
    if isinstance(end, basestring):
        end = (end,)
    if include_end is None:
        include_end = True
        for an_end in end:
            if _END_URL.match(an_end) is None:
                include_end = False
                break
    return (start, end, include_start, include_end)
