123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358 |
- from __future__ import absolute_import, unicode_literals, print_function
- import sys
- import re
- import inspect
- import itertools
- import textwrap
- import functools
- import six
- import jaraco.collections
- from jaraco.functools import compose
- def substitution(old, new):
- """
- Return a function that will perform a substitution on a string
- """
- return lambda s: s.replace(old, new)
- def multi_substitution(*substitutions):
- """
- Take a sequence of pairs specifying substitutions, and create
- a function that performs those substitutions.
- >>> multi_substitution(('foo', 'bar'), ('bar', 'baz'))('foo')
- 'baz'
- """
- substitutions = itertools.starmap(substitution, substitutions)
- # compose function applies last function first, so reverse the
- # substitutions to get the expected order.
- substitutions = reversed(tuple(substitutions))
- return compose(*substitutions)
- class FoldedCase(six.text_type):
- """
- A case insensitive string class; behaves just like str
- except compares equal when the only variation is case.
- >>> s = FoldedCase('hello world')
- >>> s == 'Hello World'
- True
- >>> 'Hello World' == s
- True
- >>> s.index('O')
- 4
- >>> s.split('O')
- ['hell', ' w', 'rld']
- >>> sorted(map(FoldedCase, ['GAMMA', 'alpha', 'Beta']))
- ['alpha', 'Beta', 'GAMMA']
- """
- def __lt__(self, other):
- return self.lower() < other.lower()
- def __gt__(self, other):
- return self.lower() > other.lower()
- def __eq__(self, other):
- return self.lower() == other.lower()
- def __hash__(self):
- return hash(self.lower())
- # cache lower since it's likely to be called frequently.
- def lower(self):
- self._lower = super(FoldedCase, self).lower()
- self.lower = lambda: self._lower
- return self._lower
- def index(self, sub):
- return self.lower().index(sub.lower())
- def split(self, splitter=' ', maxsplit=0):
- pattern = re.compile(re.escape(splitter), re.I)
- return pattern.split(self, maxsplit)
- def local_format(string):
- """
- format the string using variables in the caller's local namespace.
- >>> a = 3
- >>> local_format("{a:5}")
- ' 3'
- """
- context = inspect.currentframe().f_back.f_locals
- if sys.version_info < (3, 2):
- return string.format(**context)
- return string.format_map(context)
- def global_format(string):
- """
- format the string using variables in the caller's global namespace.
- >>> a = 3
- >>> fmt = "The func name: {global_format.__name__}"
- >>> global_format(fmt)
- 'The func name: global_format'
- """
- context = inspect.currentframe().f_back.f_globals
- if sys.version_info < (3, 2):
- return string.format(**context)
- return string.format_map(context)
- def namespace_format(string):
- """
- Format the string using variable in the caller's scope (locals + globals).
- >>> a = 3
- >>> fmt = "A is {a} and this func is {namespace_format.__name__}"
- >>> namespace_format(fmt)
- 'A is 3 and this func is namespace_format'
- """
- context = jaraco.collections.DictStack()
- context.push(inspect.currentframe().f_back.f_globals)
- context.push(inspect.currentframe().f_back.f_locals)
- if sys.version_info < (3, 2):
- return string.format(**context)
- return string.format_map(context)
- def is_decodable(value):
- r"""
- Return True if the supplied value is decodable (using the default
- encoding).
- >>> is_decodable(b'\xff')
- False
- >>> is_decodable(b'\x32')
- True
- """
- # TODO: This code could be expressed more consisely and directly
- # with a jaraco.context.ExceptionTrap, but that adds an unfortunate
- # long dependency tree, so for now, use boolean literals.
- try:
- value.decode()
- except UnicodeDecodeError:
- return False
- return True
- def is_binary(value):
- """
- Return True if the value appears to be binary (that is, it's a byte
- string and isn't decodable).
- """
- return isinstance(value, bytes) and not is_decodable(value)
- def trim(s):
- r"""
- Trim something like a docstring to remove the whitespace that
- is common due to indentation and formatting.
- >>> trim("\n\tfoo = bar\n\t\tbar = baz\n")
- 'foo = bar\n\tbar = baz'
- """
- return textwrap.dedent(s).strip()
- class Splitter(object):
- """object that will split a string with the given arguments for each call
- >>> s = Splitter(',')
- >>> s('hello, world, this is your, master calling')
- ['hello', ' world', ' this is your', ' master calling']
- """
- def __init__(self, *args):
- self.args = args
- def __call__(self, s):
- return s.split(*self.args)
- def indent(string, prefix=' ' * 4):
- return prefix + string
- class WordSet(tuple):
- """
- Given a Python identifier, return the words that identifier represents,
- whether in camel case, underscore-separated, etc.
- >>> WordSet.parse("camelCase")
- ('camel', 'Case')
- >>> WordSet.parse("under_sep")
- ('under', 'sep')
- Acronyms should be retained
- >>> WordSet.parse("firstSNL")
- ('first', 'SNL')
- >>> WordSet.parse("you_and_I")
- ('you', 'and', 'I')
- >>> WordSet.parse("A simple test")
- ('A', 'simple', 'test')
- Multiple caps should not interfere with the first cap of another word.
- >>> WordSet.parse("myABCClass")
- ('my', 'ABC', 'Class')
- The result is a WordSet, so you can get the form you need.
- >>> WordSet.parse("myABCClass").underscore_separated()
- 'my_ABC_Class'
- >>> WordSet.parse('a-command').camel_case()
- 'ACommand'
- >>> WordSet.parse('someIdentifier').lowered().space_separated()
- 'some identifier'
- Slices of the result should return another WordSet.
- >>> WordSet.parse('taken-out-of-context')[1:].underscore_separated()
- 'out_of_context'
- >>> WordSet.from_class_name(WordSet()).lowered().space_separated()
- 'word set'
- """
- _pattern = re.compile('([A-Z]?[a-z]+)|([A-Z]+(?![a-z]))')
- def capitalized(self):
- return WordSet(word.capitalize() for word in self)
- def lowered(self):
- return WordSet(word.lower() for word in self)
- def camel_case(self):
- return ''.join(self.capitalized())
- def headless_camel_case(self):
- words = iter(self)
- first = next(words).lower()
- return itertools.chain((first,), WordSet(words).camel_case())
- def underscore_separated(self):
- return '_'.join(self)
- def dash_separated(self):
- return '-'.join(self)
- def space_separated(self):
- return ' '.join(self)
- def __getitem__(self, item):
- result = super(WordSet, self).__getitem__(item)
- if isinstance(item, slice):
- result = WordSet(result)
- return result
- # for compatibility with Python 2
- def __getslice__(self, i, j):
- return self.__getitem__(slice(i, j))
- @classmethod
- def parse(cls, identifier):
- matches = cls._pattern.finditer(identifier)
- return WordSet(match.group(0) for match in matches)
- @classmethod
- def from_class_name(cls, subject):
- return cls.parse(subject.__class__.__name__)
- # for backward compatibility
- words = WordSet.parse
- def simple_html_strip(s):
- r"""
- Remove HTML from the string `s`.
- >>> str(simple_html_strip(''))
- ''
- >>> print(simple_html_strip('A <bold>stormy</bold> day in paradise'))
- A stormy day in paradise
- >>> print(simple_html_strip('Somebody <!-- do not --> tell the truth.'))
- Somebody tell the truth.
- >>> print(simple_html_strip('What about<br/>\nmultiple lines?'))
- What about
- multiple lines?
- """
- html_stripper = re.compile('(<!--.*?-->)|(<[^>]*>)|([^<]+)', re.DOTALL)
- texts = (
- match.group(3) or ''
- for match
- in html_stripper.finditer(s)
- )
- return ''.join(texts)
- class SeparatedValues(six.text_type):
- """
- A string separated by a separator. Overrides __iter__ for getting
- the values.
- >>> list(SeparatedValues('a,b,c'))
- ['a', 'b', 'c']
- Whitespace is stripped and empty values are discarded.
- >>> list(SeparatedValues(' a, b , c, '))
- ['a', 'b', 'c']
- """
- separator = ','
- def __iter__(self):
- parts = self.split(self.separator)
- return six.moves.filter(None, (part.strip() for part in parts))
- class Stripper:
- r"""
- Given a series of lines, find the common prefix and strip it from them.
- >>> lines = [
- ... 'abcdefg\n',
- ... 'abc\n',
- ... 'abcde\n',
- ... ]
- >>> res = Stripper.strip_prefix(lines)
- >>> res.prefix
- 'abc'
- >>> list(res.lines)
- ['defg\n', '\n', 'de\n']
- """
- def __init__(self, prefix, lines):
- self.prefix = prefix
- self.lines = map(self, lines)
- @classmethod
- def strip_prefix(cls, lines):
- prefix_lines, lines = itertools.tee(lines)
- prefix = functools.reduce(cls.common_prefix, prefix_lines)
- return cls(prefix, lines)
- def __call__(self, line):
- null, prefix, rest = line.partition(self.prefix)
- return rest
- @staticmethod
- def common_prefix(s1, s2):
- """
- Return the common prefix of two lines.
- """
- index = min(len(s1), len(s2))
- while s1[:index] != s2[:index]:
- index -= 1
- return s1[:index]
|