Adding new stuff

2017-10-08 12:00:02 +01:00 · 2017-10-08 12:00:02 +01:00 · a410da0e04
commit a410da0e04
parent 39ee792ad4
722 changed files with 331 additions and 189 deletions
--- a/vim-plugins/python-mode/pymode/libs/logilab/common/textutils.py
+++ b/vim-plugins/python-mode/pymode/libs/logilab/common/textutils.py
@ -1,537 +0,0 @@
-# copyright 2003-2011 LOGILAB S.A. (Paris, FRANCE), all rights reserved.
-# contact http://www.logilab.fr/ -- mailto:contact@logilab.fr
-#
-# This file is part of logilab-common.
-#
-# logilab-common is free software: you can redistribute it and/or modify it under
-# the terms of the GNU Lesser General Public License as published by the Free
-# Software Foundation, either version 2.1 of the License, or (at your option) any
-# later version.
-#
-# logilab-common is distributed in the hope that it will be useful, but WITHOUT
-# ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
-# FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public License for more
-# details.
-#
-# You should have received a copy of the GNU Lesser General Public License along
-# with logilab-common.  If not, see <http://www.gnu.org/licenses/>.
-"""Some text manipulation utility functions.
-
-
-:group text formatting: normalize_text, normalize_paragraph, pretty_match,\
-unquote, colorize_ansi
-:group text manipulation: searchall, splitstrip
-:sort: text formatting, text manipulation
-
-:type ANSI_STYLES: dict(str)
-:var ANSI_STYLES: dictionary mapping style identifier to ANSI terminal code
-
-:type ANSI_COLORS: dict(str)
-:var ANSI_COLORS: dictionary mapping color identifier to ANSI terminal code
-
-:type ANSI_PREFIX: str
-:var ANSI_PREFIX:
-  ANSI terminal code notifying the start of an ANSI escape sequence
-
-:type ANSI_END: str
-:var ANSI_END:
-  ANSI terminal code notifying the end of an ANSI escape sequence
-
-:type ANSI_RESET: str
-:var ANSI_RESET:
-  ANSI terminal code resetting format defined by a previous ANSI escape sequence
-"""
-__docformat__ = "restructuredtext en"
-
-import sys
-import re
-import os.path as osp
-from warnings import warn
-from unicodedata import normalize as _uninormalize
-try:
-    from os import linesep
-except ImportError:
-    linesep = '\n' # gae
-
-from logilab.common.deprecation import deprecated
-
-MANUAL_UNICODE_MAP = {
-    u'\xa1': u'!',    # INVERTED EXCLAMATION MARK
-    u'\u0142': u'l',  # LATIN SMALL LETTER L WITH STROKE
-    u'\u2044': u'/',  # FRACTION SLASH
-    u'\xc6': u'AE',   # LATIN CAPITAL LETTER AE
-    u'\xa9': u'(c)',  # COPYRIGHT SIGN
-    u'\xab': u'"',    # LEFT-POINTING DOUBLE ANGLE QUOTATION MARK
-    u'\xe6': u'ae',   # LATIN SMALL LETTER AE
-    u'\xae': u'(r)',  # REGISTERED SIGN
-    u'\u0153': u'oe', # LATIN SMALL LIGATURE OE
-    u'\u0152': u'OE', # LATIN CAPITAL LIGATURE OE
-    u'\xd8': u'O',    # LATIN CAPITAL LETTER O WITH STROKE
-    u'\xf8': u'o',    # LATIN SMALL LETTER O WITH STROKE
-    u'\xbb': u'"',    # RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK
-    u'\xdf': u'ss',   # LATIN SMALL LETTER SHARP S
-    }
-
-def unormalize(ustring, ignorenonascii=None, substitute=None):
-    """replace diacritical characters with their corresponding ascii characters
-
-    Convert the unicode string to its long normalized form (unicode character
-    will be transform into several characters) and keep the first one only.
-    The normal form KD (NFKD) will apply the compatibility decomposition, i.e.
-    replace all compatibility characters with their equivalents.
-
-    :type substitute: str
-    :param substitute: replacement character to use if decomposition fails
-
-    :see: Another project about ASCII transliterations of Unicode text
-          http://pypi.python.org/pypi/Unidecode
-    """
-    # backward compatibility, ignorenonascii was a boolean
-    if ignorenonascii is not None:
-        warn("ignorenonascii is deprecated, use substitute named parameter instead",
-             DeprecationWarning, stacklevel=2)
-        if ignorenonascii:
-            substitute = ''
-    res = []
-    for letter in ustring[:]:
-        try:
-            replacement = MANUAL_UNICODE_MAP[letter]
-        except KeyError:
-            replacement = _uninormalize('NFKD', letter)[0]
-            if ord(replacement) >= 2 ** 7:
-                if substitute is None:
-                    raise ValueError("can't deal with non-ascii based characters")
-                replacement = substitute
-        res.append(replacement)
-    return u''.join(res)
-
-def unquote(string):
-    """remove optional quotes (simple or double) from the string
-
-    :type string: str or unicode
-    :param string: an optionally quoted string
-
-    :rtype: str or unicode
-    :return: the unquoted string (or the input string if it wasn't quoted)
-    """
-    if not string:
-        return string
-    if string[0] in '"\'':
-        string = string[1:]
-    if string[-1] in '"\'':
-        string = string[:-1]
-    return string
-
-
-_BLANKLINES_RGX = re.compile('\r?\n\r?\n')
-_NORM_SPACES_RGX = re.compile('\s+')
-
-def normalize_text(text, line_len=80, indent='', rest=False):
-    """normalize a text to display it with a maximum line size and
-    optionally arbitrary indentation. Line jumps are normalized but blank
-    lines are kept. The indentation string may be used to insert a
-    comment (#) or a quoting (>) mark  for instance.
-
-    :type text: str or unicode
-    :param text: the input text to normalize
-
-    :type line_len: int
-    :param line_len: expected maximum line's length, default to 80
-
-    :type indent: str or unicode
-    :param indent: optional string to use as indentation
-
-    :rtype: str or unicode
-    :return:
-      the input text normalized to fit on lines with a maximized size
-      inferior to `line_len`, and optionally prefixed by an
-      indentation string
-    """
-    if rest:
-        normp = normalize_rest_paragraph
-    else:
-        normp = normalize_paragraph
-    result = []
-    for text in _BLANKLINES_RGX.split(text):
-        result.append(normp(text, line_len, indent))
-    return ('%s%s%s' % (linesep, indent, linesep)).join(result)
-
-
-def normalize_paragraph(text, line_len=80, indent=''):
-    """normalize a text to display it with a maximum line size and
-    optionally arbitrary indentation. Line jumps are normalized. The
-    indentation string may be used top insert a comment mark for
-    instance.
-
-    :type text: str or unicode
-    :param text: the input text to normalize
-
-    :type line_len: int
-    :param line_len: expected maximum line's length, default to 80
-
-    :type indent: str or unicode
-    :param indent: optional string to use as indentation
-
-    :rtype: str or unicode
-    :return:
-      the input text normalized to fit on lines with a maximized size
-      inferior to `line_len`, and optionally prefixed by an
-      indentation string
-    """
-    text = _NORM_SPACES_RGX.sub(' ', text)
-    line_len = line_len - len(indent)
-    lines = []
-    while text:
-        aline, text = splittext(text.strip(), line_len)
-        lines.append(indent + aline)
-    return linesep.join(lines)
-
-def normalize_rest_paragraph(text, line_len=80, indent=''):
-    """normalize a ReST text to display it with a maximum line size and
-    optionally arbitrary indentation. Line jumps are normalized. The
-    indentation string may be used top insert a comment mark for
-    instance.
-
-    :type text: str or unicode
-    :param text: the input text to normalize
-
-    :type line_len: int
-    :param line_len: expected maximum line's length, default to 80
-
-    :type indent: str or unicode
-    :param indent: optional string to use as indentation
-
-    :rtype: str or unicode
-    :return:
-      the input text normalized to fit on lines with a maximized size
-      inferior to `line_len`, and optionally prefixed by an
-      indentation string
-    """
-    toreport = ''
-    lines = []
-    line_len = line_len - len(indent)
-    for line in text.splitlines():
-        line = toreport + _NORM_SPACES_RGX.sub(' ', line.strip())
-        toreport = ''
-        while len(line) > line_len:
-            # too long line, need split
-            line, toreport = splittext(line, line_len)
-            lines.append(indent + line)
-            if toreport:
-                line = toreport + ' '
-                toreport = ''
-            else:
-                line = ''
-        if line:
-            lines.append(indent + line.strip())
-    return linesep.join(lines)
-
-
-def splittext(text, line_len):
-    """split the given text on space according to the given max line size
-
-    return a 2-uple:
-    * a line <= line_len if possible
-    * the rest of the text which has to be reported on another line
-    """
-    if len(text) <= line_len:
-        return text, ''
-    pos = min(len(text)-1, line_len)
-    while pos > 0 and text[pos] != ' ':
-        pos -= 1
-    if pos == 0:
-        pos = min(len(text), line_len)
-        while len(text) > pos and text[pos] != ' ':
-            pos += 1
-    return text[:pos], text[pos+1:].strip()
-
-
-def splitstrip(string, sep=','):
-    """return a list of stripped string by splitting the string given as
-    argument on `sep` (',' by default). Empty string are discarded.
-
-    >>> splitstrip('a, b, c   ,  4,,')
-    ['a', 'b', 'c', '4']
-    >>> splitstrip('a')
-    ['a']
-    >>>
-
-    :type string: str or unicode
-    :param string: a csv line
-
-    :type sep: str or unicode
-    :param sep: field separator, default to the comma (',')
-
-    :rtype: str or unicode
-    :return: the unquoted string (or the input string if it wasn't quoted)
-    """
-    return [word.strip() for word in string.split(sep) if word.strip()]
-
-get_csv = deprecated('get_csv is deprecated, use splitstrip')(splitstrip)
-
-
-def split_url_or_path(url_or_path):
-    """return the latest component of a string containing either an url of the
-    form <scheme>://<path> or a local file system path
-    """
-    if '://' in url_or_path:
-        return url_or_path.rstrip('/').rsplit('/', 1)
-    return osp.split(url_or_path.rstrip(osp.sep))
-
-
-def text_to_dict(text):
-    """parse multilines text containing simple 'key=value' lines and return a
-    dict of {'key': 'value'}. When the same key is encountered multiple time,
-    value is turned into a list containing all values.
-
-    >>> d = text_to_dict('''multiple=1
-    ... multiple= 2
-    ... single =3
-    ... ''')
-    >>> d['single']
-    '3'
-    >>> d['multiple']
-    ['1', '2']
-
-    """
-    res = {}
-    if not text:
-        return res
-    for line in text.splitlines():
-        line = line.strip()
-        if line and not line.startswith('#'):
-            key, value = [w.strip() for w in line.split('=', 1)]
-            if key in res:
-                try:
-                    res[key].append(value)
-                except AttributeError:
-                    res[key] = [res[key], value]
-            else:
-                res[key] = value
-    return res
-
-
-_BLANK_URE = r'(\s|,)+'
-_BLANK_RE = re.compile(_BLANK_URE)
-__VALUE_URE = r'-?(([0-9]+\.[0-9]*)|((0x?)?[0-9]+))'
-__UNITS_URE = r'[a-zA-Z]+'
-_VALUE_RE = re.compile(r'(?P<value>%s)(?P<unit>%s)?'%(__VALUE_URE, __UNITS_URE))
-_VALIDATION_RE = re.compile(r'^((%s)(%s))*(%s)?$' % (__VALUE_URE, __UNITS_URE,
-                                                    __VALUE_URE))
-
-BYTE_UNITS = {
-    "b": 1,
-    "kb": 1024,
-    "mb": 1024 ** 2,
-    "gb": 1024 ** 3,
-    "tb": 1024 ** 4,
-}
-
-TIME_UNITS = {
-    "ms": 0.0001,
-    "s": 1,
-    "min": 60,
-    "h": 60 * 60,
-    "d": 60 * 60 *24,
-}
-
-def apply_units(string, units, inter=None, final=float, blank_reg=_BLANK_RE,
-                value_reg=_VALUE_RE):
-    """Parse the string applying the units defined in units
-    (e.g.: "1.5m",{'m',60} -> 80).
-
-    :type string: str or unicode
-    :param string: the string to parse
-
-    :type units: dict (or any object with __getitem__ using basestring key)
-    :param units: a dict mapping a unit string repr to its value
-
-    :type inter: type
-    :param inter: used to parse every intermediate value (need __sum__)
-
-    :type blank_reg: regexp
-    :param blank_reg: should match every blank char to ignore.
-
-    :type value_reg: regexp with "value" and optional "unit" group
-    :param value_reg: match a value and it's unit into the
-    """
-    if inter is None:
-        inter = final
-    fstring = _BLANK_RE.sub('', string)
-    if not (fstring and _VALIDATION_RE.match(fstring)):
-        raise ValueError("Invalid unit string: %r." % string)
-    values = []
-    for match in value_reg.finditer(fstring):
-        dic = match.groupdict()
-        lit, unit = dic["value"], dic.get("unit")
-        value = inter(lit)
-        if unit is not None:
-            try:
-                value *= units[unit.lower()]
-            except KeyError:
-                raise KeyError('invalid unit %s. valid units are %s' %
-                               (unit, units.keys()))
-        values.append(value)
-    return final(sum(values))
-
-
-_LINE_RGX = re.compile('\r\n|\r+|\n')
-
-def pretty_match(match, string, underline_char='^'):
-    """return a string with the match location underlined:
-
-    >>> import re
-    >>> print(pretty_match(re.search('mange', 'il mange du bacon'), 'il mange du bacon'))
-    il mange du bacon
-       ^^^^^
-    >>>
-
-    :type match: _sre.SRE_match
-    :param match: object returned by re.match, re.search or re.finditer
-
-    :type string: str or unicode
-    :param string:
-      the string on which the regular expression has been applied to
-      obtain the `match` object
-
-    :type underline_char: str or unicode
-    :param underline_char:
-      character to use to underline the matched section, default to the
-      carret '^'
-
-    :rtype: str or unicode
-    :return:
-      the original string with an inserted line to underline the match
-      location
-    """
-    start = match.start()
-    end = match.end()
-    string = _LINE_RGX.sub(linesep, string)
-    start_line_pos = string.rfind(linesep, 0, start)
-    if start_line_pos == -1:
-        start_line_pos = 0
-        result = []
-    else:
-        result = [string[:start_line_pos]]
-        start_line_pos += len(linesep)
-    offset = start - start_line_pos
-    underline = ' ' * offset + underline_char * (end - start)
-    end_line_pos = string.find(linesep, end)
-    if end_line_pos == -1:
-        string = string[start_line_pos:]
-        result.append(string)
-        result.append(underline)
-    else:
-        end = string[end_line_pos + len(linesep):]
-        string = string[start_line_pos:end_line_pos]
-        result.append(string)
-        result.append(underline)
-        result.append(end)
-    return linesep.join(result).rstrip()
-
-
-# Ansi colorization ###########################################################
-
-ANSI_PREFIX = '\033['
-ANSI_END = 'm'
-ANSI_RESET = '\033[0m'
-ANSI_STYLES = {
-    'reset': "0",
-    'bold': "1",
-    'italic': "3",
-    'underline': "4",
-    'blink': "5",
-    'inverse': "7",
-    'strike': "9",
-}
-ANSI_COLORS = {
-    'reset': "0",
-    'black': "30",
-    'red': "31",
-    'green': "32",
-    'yellow': "33",
-    'blue': "34",
-    'magenta': "35",
-    'cyan': "36",
-    'white': "37",
-}
-
-def _get_ansi_code(color=None, style=None):
-    """return ansi escape code corresponding to color and style
-
-    :type color: str or None
-    :param color:
-      the color name (see `ANSI_COLORS` for available values)
-      or the color number when 256 colors are available
-
-    :type style: str or None
-    :param style:
-      style string (see `ANSI_COLORS` for available values). To get
-      several style effects at the same time, use a coma as separator.
-
-    :raise KeyError: if an unexistent color or style identifier is given
-
-    :rtype: str
-    :return: the built escape code
-    """
-    ansi_code = []
-    if style:
-        style_attrs = splitstrip(style)
-        for effect in style_attrs:
-            ansi_code.append(ANSI_STYLES[effect])
-    if color:
-        if color.isdigit():
-            ansi_code.extend(['38', '5'])
-            ansi_code.append(color)
-        else:
-            ansi_code.append(ANSI_COLORS[color])
-    if ansi_code:
-        return ANSI_PREFIX + ';'.join(ansi_code) + ANSI_END
-    return ''
-
-def colorize_ansi(msg, color=None, style=None):
-    """colorize message by wrapping it with ansi escape codes
-
-    :type msg: str or unicode
-    :param msg: the message string to colorize
-
-    :type color: str or None
-    :param color:
-      the color identifier (see `ANSI_COLORS` for available values)
-
-    :type style: str or None
-    :param style:
-      style string (see `ANSI_COLORS` for available values). To get
-      several style effects at the same time, use a coma as separator.
-
-    :raise KeyError: if an unexistent color or style identifier is given
-
-    :rtype: str or unicode
-    :return: the ansi escaped string
-    """
-    # If both color and style are not defined, then leave the text as is
-    if color is None and style is None:
-        return msg
-    escape_code = _get_ansi_code(color, style)
-    # If invalid (or unknown) color, don't wrap msg with ansi codes
-    if escape_code:
-        return '%s%s%s' % (escape_code, msg, ANSI_RESET)
-    return msg
-
-DIFF_STYLE = {'separator': 'cyan', 'remove': 'red', 'add': 'green'}
-
-def diff_colorize_ansi(lines, out=sys.stdout, style=DIFF_STYLE):
-    for line in lines:
-        if line[:4] in ('--- ', '+++ '):
-            out.write(colorize_ansi(line, style['separator']))
-        elif line[0] == '-':
-            out.write(colorize_ansi(line, style['remove']))
-        elif line[0] == '+':
-            out.write(colorize_ansi(line, style['add']))
-        elif line[:4] == '--- ':
-            out.write(colorize_ansi(line, style['separator']))
-        elif line[:4] == '+++ ':
-            out.write(colorize_ansi(line, style['separator']))
-        else:
-            out.write(line)
-