521 lines
22 KiB
Python
521 lines
22 KiB
Python
|
"""
|
||
|
This is a copy of sensai.util.string from sensAI commit d7b4afcc89b4d2e922a816cb07dffde27f297354
|
||
|
"""
|
||
|
|
||
|
|
||
|
import functools
|
||
|
import logging
|
||
|
import re
|
||
|
import sys
|
||
|
import types
|
||
|
from abc import ABC, abstractmethod
|
||
|
from typing import Union, List, Dict, Any, Sequence, Iterable, Optional, Mapping, Callable
|
||
|
|
||
|
reCommaWhitespacePotentiallyBreaks = re.compile(r",\s+")
|
||
|
|
||
|
log = logging.getLogger(__name__)
|
||
|
|
||
|
|
||
|
class StringConverter(ABC):
|
||
|
"""
|
||
|
Abstraction for a string conversion mechanism
|
||
|
"""
|
||
|
@abstractmethod
|
||
|
def to_string(self, x) -> str:
|
||
|
pass
|
||
|
|
||
|
|
||
|
def dict_string(d: Mapping, brackets: Optional[str] = None, converter: StringConverter = None):
|
||
|
"""
|
||
|
Converts a dictionary to a string of the form "<key>=<value>, <key>=<value>, ...", optionally enclosed
|
||
|
by brackets
|
||
|
|
||
|
:param d: the dictionary
|
||
|
:param brackets: a two-character string containing the opening and closing bracket to use, e.g. ``"{}"``;
|
||
|
if None, do not use enclosing brackets
|
||
|
:param converter: the string converter to use for values
|
||
|
:return: the string representation
|
||
|
"""
|
||
|
s = ', '.join([f'{k}={to_string(v, converter=converter, context=k)}' for k, v in d.items()])
|
||
|
if brackets is not None:
|
||
|
return brackets[:1] + s + brackets[-1:]
|
||
|
else:
|
||
|
return s
|
||
|
|
||
|
|
||
|
def list_string(l: Iterable[Any], brackets="[]", quote: Optional[str] = None, converter: StringConverter = None):
|
||
|
"""
|
||
|
Converts a list or any other iterable to a string of the form "[<value>, <value>, ...]", optionally enclosed
|
||
|
by different brackets or with the values quoted.
|
||
|
|
||
|
:param l: the list
|
||
|
:param brackets: a two-character string containing the opening and closing bracket to use, e.g. ``"[]"``;
|
||
|
if None, do not use enclosing brackets
|
||
|
:param quote: a 1-character string defining the quote to use around each value, e.g. ``"'"``.
|
||
|
:param converter: the string converter to use for values
|
||
|
:return: the string representation
|
||
|
"""
|
||
|
def item(x):
|
||
|
x = to_string(x, converter=converter, context="list")
|
||
|
if quote is not None:
|
||
|
return quote + x + quote
|
||
|
else:
|
||
|
return x
|
||
|
s = ", ".join((item(x) for x in l))
|
||
|
if brackets is not None:
|
||
|
return brackets[:1] + s + brackets[-1:]
|
||
|
else:
|
||
|
return s
|
||
|
|
||
|
|
||
|
def to_string(x, converter: StringConverter = None, apply_converter_to_non_complex_objects=True, context=None):
|
||
|
"""
|
||
|
Converts the given object to a string, with proper handling of lists, tuples and dictionaries, optionally using a converter.
|
||
|
The conversion also removes unwanted line breaks (as present, in particular, in sklearn's string representations).
|
||
|
|
||
|
:param x: the object to convert
|
||
|
:param converter: the converter with which to convert objects to strings
|
||
|
:param apply_converter_to_non_complex_objects: whether to apply/pass on the converter (if any) not only when converting complex objects
|
||
|
but also non-complex, primitive objects; use of this flag enables converters to implement their conversion functionality using this
|
||
|
function for complex objects without causing an infinite recursion.
|
||
|
:param context: context in which the object is being converted (e.g. dictionary key for case where x is the corresponding
|
||
|
dictionary value), only for debugging purposes (will be reported in log messages upon recursion exception)
|
||
|
:return: the string representation
|
||
|
"""
|
||
|
try:
|
||
|
if type(x) == list:
|
||
|
return list_string(x, converter=converter)
|
||
|
elif type(x) == tuple:
|
||
|
return list_string(x, brackets="()", converter=converter)
|
||
|
elif type(x) == dict:
|
||
|
return dict_string(x, brackets="{}", converter=converter)
|
||
|
elif type(x) == types.MethodType:
|
||
|
# could be bound method of a ToStringMixin instance (which would print the repr of the instance, which can potentially cause
|
||
|
# an infinite recursion)
|
||
|
return f"Method[{x.__name__}]"
|
||
|
else:
|
||
|
if converter and apply_converter_to_non_complex_objects:
|
||
|
s = converter.to_string(x)
|
||
|
else:
|
||
|
s = str(x)
|
||
|
|
||
|
# remove any unwanted line breaks and indentation after commas (as generated, for example, by sklearn objects)
|
||
|
s = reCommaWhitespacePotentiallyBreaks.sub(", ", s)
|
||
|
|
||
|
return s
|
||
|
except RecursionError as e:
|
||
|
log.error(f"Recursion in string conversion detected; context={context}")
|
||
|
raise
|
||
|
|
||
|
|
||
|
def object_repr(obj, member_names_or_dict: Union[List[str], Dict[str, Any]]):
|
||
|
if type(member_names_or_dict) == dict:
|
||
|
members_dict = member_names_or_dict
|
||
|
else:
|
||
|
members_dict = {m: to_string(getattr(obj, m)) for m in member_names_or_dict}
|
||
|
return f"{obj.__class__.__name__}[{dict_string(members_dict)}]"
|
||
|
|
||
|
|
||
|
def or_regex_group(allowed_names: Sequence[str]):
|
||
|
"""
|
||
|
|
||
|
:param allowed_names: strings to include as literals in the regex
|
||
|
:return: a regular expression string of the form (<name1>| ...|<nameN>), which any of the given names
|
||
|
"""
|
||
|
allowed_names = [re.escape(name) for name in allowed_names]
|
||
|
return r"(%s)" % "|".join(allowed_names)
|
||
|
|
||
|
|
||
|
def function_name(x: Callable) -> str:
|
||
|
if isinstance(x, functools.partial):
|
||
|
return function_name(x.func)
|
||
|
elif hasattr(x, "__name__"):
|
||
|
return x.__name__
|
||
|
else:
|
||
|
return str(x)
|
||
|
|
||
|
|
||
|
class ToStringMixin:
|
||
|
"""
|
||
|
Provides implementations for ``__str__`` and ``__repr__`` which are based on the format ``"<class name>[<object info>]"`` and
|
||
|
``"<class name>[id=<object id>, <object info>]"`` respectively, where ``<object info>`` is usually a list of entries of the
|
||
|
form ``"<name>=<value>, ..."``.
|
||
|
|
||
|
By default, ``<class name>`` will be the qualified name of the class, and ``<object info>`` will include all properties
|
||
|
of the class, including private ones starting with an underscore (though the underscore will be dropped in the string
|
||
|
representation).
|
||
|
|
||
|
* To exclude private properties, override :meth:`_toStringExcludePrivate` to return True. If there are exceptions
|
||
|
(and some private properties shall be retained), additionally override :meth:`_toStringExcludeExceptions`.
|
||
|
* To exclude a particular set of properties, override :meth:`_toStringExcludes`.
|
||
|
* To include only select properties (introducing inclusion semantics), override :meth:`_toStringIncludes`.
|
||
|
* To add values to the properties list that aren't actually properties of the object (i.e. derived properties),
|
||
|
override :meth:`_toStringAdditionalEntries`.
|
||
|
* To define a fully custom representation for ``<object info>`` which is not based on the above principles, override
|
||
|
:meth:`_toStringObjectInfo`.
|
||
|
|
||
|
For well-defined string conversions within a class hierarchy, it can be a good practice to define additional
|
||
|
inclusions/exclusions by overriding the respective method once more and basing the return value on an extended
|
||
|
version of the value returned by superclass.
|
||
|
In some cases, the requirements of a subclass can be at odds with the definitions in the superclass: The superclass
|
||
|
may make use of exclusion semantics, but the subclass may want to use inclusion semantics (and include
|
||
|
only some of the many properties it adds). In this case, if the subclass used :meth:`_toStringInclude`, the exclusion semantics
|
||
|
of the superclass would be void and none of its properties would actually be included.
|
||
|
In such cases, override :meth:`_toStringIncludesForced` to add inclusions regardless of the semantics otherwise used along
|
||
|
the class hierarchy.
|
||
|
|
||
|
.. document private functions
|
||
|
.. automethod:: _tostring_class_name
|
||
|
.. automethod:: _tostring_object_info
|
||
|
.. automethod:: _tostring_excludes
|
||
|
.. automethod:: _tostring_exclude_exceptions
|
||
|
.. automethod:: _tostring_includes
|
||
|
.. automethod:: _tostring_includes_forced
|
||
|
.. automethod:: _tostring_additional_entries
|
||
|
.. automethod:: _tostring_exclude_private
|
||
|
"""
|
||
|
_TOSTRING_INCLUDE_ALL = "__all__"
|
||
|
|
||
|
def _tostring_class_name(self):
|
||
|
"""
|
||
|
:return: the string use for <class name> in the string representation ``"<class name>[<object info]"``
|
||
|
"""
|
||
|
return type(self).__qualname__
|
||
|
|
||
|
def _tostring_properties(self,
|
||
|
exclude: Optional[Union[str, Iterable[str]]] = None,
|
||
|
include: Optional[Union[str, Iterable[str]]] = None,
|
||
|
exclude_exceptions: Optional[List[str]] = None,
|
||
|
include_forced: Optional[List[str]] = None,
|
||
|
additional_entries: Dict[str, Any] = None,
|
||
|
converter: StringConverter = None) -> str:
|
||
|
"""
|
||
|
Creates a string of the class attributes, with optional exclusions/inclusions/additions.
|
||
|
Exclusions take precedence over inclusions.
|
||
|
|
||
|
:param exclude: attributes to be excluded
|
||
|
:param include: attributes to be included; if non-empty, only the specified attributes will be printed (bar the ones
|
||
|
excluded by ``exclude``)
|
||
|
:param include_forced: additional attributes to be included
|
||
|
:param additional_entries: additional key-value entries to be added
|
||
|
:param converter: the string converter to use; if None, use default (which avoids infinite recursions)
|
||
|
:return: a string containing entry/property names and values
|
||
|
"""
|
||
|
def mklist(x):
|
||
|
if x is None:
|
||
|
return []
|
||
|
if type(x) == str:
|
||
|
return [x]
|
||
|
return x
|
||
|
|
||
|
exclude = mklist(exclude)
|
||
|
include = mklist(include)
|
||
|
include_forced = mklist(include_forced)
|
||
|
exclude_exceptions = mklist(exclude_exceptions)
|
||
|
|
||
|
def is_excluded(k):
|
||
|
if k in include_forced or k in exclude_exceptions:
|
||
|
return False
|
||
|
if k in exclude:
|
||
|
return True
|
||
|
if self._tostring_exclude_private():
|
||
|
is_private = k.startswith("_")
|
||
|
return is_private
|
||
|
else:
|
||
|
return False
|
||
|
|
||
|
# determine relevant attribute dictionary
|
||
|
if len(include) == 1 and include[0] == self._TOSTRING_INCLUDE_ALL: # exclude semantics (include everything by default)
|
||
|
attribute_dict = self.__dict__
|
||
|
else: # include semantics (include only inclusions)
|
||
|
attribute_dict = {k: getattr(self, k) for k in set(include + include_forced)
|
||
|
if hasattr(self, k) and k != self._TOSTRING_INCLUDE_ALL}
|
||
|
|
||
|
# apply exclusions and remove underscores from attribute names
|
||
|
d = {k.strip("_"): v for k, v in attribute_dict.items() if not is_excluded(k)}
|
||
|
|
||
|
if additional_entries is not None:
|
||
|
d.update(additional_entries)
|
||
|
|
||
|
if converter is None:
|
||
|
converter = self._StringConverterAvoidToStringMixinRecursion(self)
|
||
|
return dict_string(d, converter=converter)
|
||
|
|
||
|
def _tostring_object_info(self) -> str:
|
||
|
"""
|
||
|
Override this method to use a fully custom definition of the ``<object info>`` part in the full string
|
||
|
representation ``"<class name>[<object info>]"`` to be generated.
|
||
|
As soon as this method is overridden, any property-based exclusions, inclusions, etc. will have no effect
|
||
|
(unless the implementation is specifically designed to make use of them - as is the default
|
||
|
implementation).
|
||
|
NOTE: Overrides must not internally use super() because of a technical limitation in the proxy
|
||
|
object that is used for nested object structures.
|
||
|
|
||
|
:return: a string containing the string to use for ``<object info>``
|
||
|
"""
|
||
|
return self._tostring_properties(exclude=self._tostring_excludes(), include=self._tostring_includes(),
|
||
|
exclude_exceptions=self._tostring_exclude_exceptions(), include_forced=self._tostring_includes_forced(),
|
||
|
additional_entries=self._tostring_additional_entries())
|
||
|
|
||
|
def _tostring_excludes(self) -> List[str]:
|
||
|
"""
|
||
|
Makes the string representation exclude the returned attributes.
|
||
|
This method can be conveniently overridden by subclasses which can call super and extend the list returned.
|
||
|
|
||
|
This method will only have no effect if :meth:`_toStringObjectInfo` is overridden to not use its result.
|
||
|
|
||
|
:return: a list of attribute names
|
||
|
"""
|
||
|
return []
|
||
|
|
||
|
def _tostring_includes(self) -> List[str]:
|
||
|
"""
|
||
|
Makes the string representation include only the returned attributes (i.e. introduces inclusion semantics);
|
||
|
By default, the list contains only a marker element, which is interpreted as "all attributes included".
|
||
|
|
||
|
This method can be conveniently overridden by sub-classes which can call super and extend the list returned.
|
||
|
Note that it is not a problem for a list containing the aforementioned marker element (which stands for all attributes)
|
||
|
to be extended; the marker element will be ignored and only the user-added elements will be considered as included.
|
||
|
|
||
|
Note: To add an included attribute in a sub-class, regardless of any super-classes using exclusion or inclusion semantics,
|
||
|
use _toStringIncludesForced instead.
|
||
|
|
||
|
This method will have no effect if :meth:`_toStringObjectInfo` is overridden to not use its result.
|
||
|
|
||
|
:return: a list of attribute names to be included in the string representation
|
||
|
"""
|
||
|
return [self._TOSTRING_INCLUDE_ALL]
|
||
|
|
||
|
# noinspection PyMethodMayBeStatic
|
||
|
def _tostring_includes_forced(self) -> List[str]:
|
||
|
"""
|
||
|
Defines a list of attribute names that are required to be present in the string representation, regardless of the
|
||
|
instance using include semantics or exclude semantics, thus facilitating added inclusions in sub-classes.
|
||
|
|
||
|
This method will have no effect if :meth:`_toStringObjectInfo` is overridden to not use its result.
|
||
|
|
||
|
:return: a list of attribute names
|
||
|
"""
|
||
|
return []
|
||
|
|
||
|
def _tostring_additional_entries(self) -> Dict[str, Any]:
|
||
|
"""
|
||
|
:return: a dictionary of entries to be included in the ``<object info>`` part of the string representation
|
||
|
"""
|
||
|
return {}
|
||
|
|
||
|
def _tostring_exclude_private(self) -> bool:
|
||
|
"""
|
||
|
:return: whether to exclude properties that are private (start with an underscore); explicitly included attributes
|
||
|
will still be considered - as will properties exempt from the rule via :meth:`toStringExcludeException`.
|
||
|
"""
|
||
|
return False
|
||
|
|
||
|
def _tostring_exclude_exceptions(self) -> List[str]:
|
||
|
"""
|
||
|
Defines attribute names which should not be excluded even though other rules (particularly the exclusion of private members
|
||
|
via :meth:`_toStringExcludePrivate`) would otherwise exclude them.
|
||
|
|
||
|
:return: a list of attribute names
|
||
|
"""
|
||
|
return []
|
||
|
|
||
|
def __str__(self):
|
||
|
return f"{self._tostring_class_name()}[{self._tostring_object_info()}]"
|
||
|
|
||
|
def __repr__(self):
|
||
|
info = f"id={id(self)}"
|
||
|
property_info = self._tostring_object_info()
|
||
|
if len(property_info) > 0:
|
||
|
info += ", " + property_info
|
||
|
return f"{self._tostring_class_name()}[{info}]"
|
||
|
|
||
|
def pprint(self, file=sys.stdout):
|
||
|
"""
|
||
|
Prints a prettily formatted string representation of the object (with line breaks and indentations)
|
||
|
to ``stdout`` or the given file.
|
||
|
|
||
|
:param file: the file to print to
|
||
|
"""
|
||
|
print(self.pprints(), file=file)
|
||
|
|
||
|
def pprints(self) -> str:
|
||
|
"""
|
||
|
:return: a prettily formatted string representation with line breaks and indentations
|
||
|
"""
|
||
|
return pretty_string_repr(self)
|
||
|
|
||
|
class _StringConverterAvoidToStringMixinRecursion(StringConverter):
|
||
|
"""
|
||
|
Avoids recursions when converting objects implementing :class:`ToStringMixin` which may contain themselves to strings.
|
||
|
Use of this object prevents infinite recursions caused by a :class:`ToStringMixin` instance recursively containing itself in
|
||
|
either a property of another :class:`ToStringMixin`, a list or a tuple.
|
||
|
It handles all :class:`ToStringMixin` instances recursively encountered.
|
||
|
|
||
|
A previously handled instance is converted to a string of the form "<class name>[<<]".
|
||
|
"""
|
||
|
def __init__(self, *handled_objects: "ToStringMixin"):
|
||
|
"""
|
||
|
:param handled_objects: objects which are initially assumed to have been handled already
|
||
|
"""
|
||
|
self._handled_to_string_mixin_ids = set([id(o) for o in handled_objects])
|
||
|
|
||
|
def to_string(self, x) -> str:
|
||
|
if isinstance(x, ToStringMixin):
|
||
|
oid = id(x)
|
||
|
if oid in self._handled_to_string_mixin_ids:
|
||
|
return f"{x._tostring_class_name()}[<<]"
|
||
|
self._handled_to_string_mixin_ids.add(oid)
|
||
|
return str(self._ToStringMixinProxy(x, self))
|
||
|
else:
|
||
|
return to_string(x, converter=self, apply_converter_to_non_complex_objects=False, context=x.__class__)
|
||
|
|
||
|
class _ToStringMixinProxy:
|
||
|
"""
|
||
|
A proxy object which wraps a ToStringMixin to ensure that the converter is applied when creating the properties string.
|
||
|
The proxy is to achieve that all ToStringMixin methods that aren't explicitly overwritten are bound to this proxy
|
||
|
(rather than the original object), such that the transitive call to _toStringProperties will call the new
|
||
|
implementation.
|
||
|
"""
|
||
|
|
||
|
# methods where we assume that they could transitively call _toStringProperties (others are assumed not to)
|
||
|
TOSTRING_METHODS_TRANSITIVELY_CALLING_TOSTRINGPROPERTIES = {"_tostring_object_info"}
|
||
|
|
||
|
def __init__(self, x: "ToStringMixin", converter):
|
||
|
self.x = x
|
||
|
self.converter = converter
|
||
|
|
||
|
def _tostring_properties(self, *args, **kwargs):
|
||
|
return self.x._tostring_properties(*args, **kwargs, converter=self.converter)
|
||
|
|
||
|
def _tostring_class_name(self):
|
||
|
return self.x._tostring_class_name()
|
||
|
|
||
|
def __getattr__(self, attr: str):
|
||
|
if attr.startswith("_tostring"): # ToStringMixin method which we may bind to use this proxy to ensure correct transitive call
|
||
|
method = getattr(self.x.__class__, attr)
|
||
|
obj = self if attr in self.TOSTRING_METHODS_TRANSITIVELY_CALLING_TOSTRINGPROPERTIES else self.x
|
||
|
return lambda *args, **kwargs: method(obj, *args, **kwargs)
|
||
|
else:
|
||
|
return getattr(self.x, attr)
|
||
|
|
||
|
def __str__(self: "ToStringMixin"):
|
||
|
return ToStringMixin.__str__(self)
|
||
|
|
||
|
|
||
|
def pretty_string_repr(s: Any, initial_indentation_level=0, indentation_string=" "):
|
||
|
"""
|
||
|
Creates a pretty string representation (using indentations) from the given object/string representation (as generated, for example, via
|
||
|
ToStringMixin). An indentation level is added for every opening bracket.
|
||
|
|
||
|
:param s: an object or object string representation
|
||
|
:param initial_indentation_level: the initial indentation level
|
||
|
:param indentation_string: the string which corresponds to a single indentation level
|
||
|
:return: a reformatted version of the input string with added indentations and line breaks
|
||
|
"""
|
||
|
if type(s) != str:
|
||
|
s = str(s)
|
||
|
indent = initial_indentation_level
|
||
|
result = indentation_string * indent
|
||
|
i = 0
|
||
|
|
||
|
def nl():
|
||
|
nonlocal result
|
||
|
result += "\n" + (indentation_string * indent)
|
||
|
|
||
|
def take(cnt=1):
|
||
|
nonlocal result, i
|
||
|
result += s[i:i+cnt]
|
||
|
i += cnt
|
||
|
|
||
|
def find_matching(j):
|
||
|
start = j
|
||
|
op = s[j]
|
||
|
cl = {"[": "]", "(": ")", "'": "'"}[s[j]]
|
||
|
is_bracket = cl != s[j]
|
||
|
stack = 0
|
||
|
while j < len(s):
|
||
|
if s[j] == op and (is_bracket or j == start):
|
||
|
stack += 1
|
||
|
elif s[j] == cl:
|
||
|
stack -= 1
|
||
|
if stack == 0:
|
||
|
return j
|
||
|
j += 1
|
||
|
return None
|
||
|
|
||
|
brackets = "[("
|
||
|
quotes = "'"
|
||
|
while i < len(s):
|
||
|
is_bracket = s[i] in brackets
|
||
|
is_quote = s[i] in quotes
|
||
|
if is_bracket or is_quote:
|
||
|
i_match = find_matching(i)
|
||
|
take_full_match_without_break = False
|
||
|
if i_match is not None:
|
||
|
k = i_match + 1
|
||
|
full_match = s[i:k]
|
||
|
take_full_match_without_break = is_quote or not("=" in full_match and "," in full_match)
|
||
|
if take_full_match_without_break:
|
||
|
take(k-i)
|
||
|
if not take_full_match_without_break:
|
||
|
take(1)
|
||
|
indent += 1
|
||
|
nl()
|
||
|
elif s[i] in "])":
|
||
|
take(1)
|
||
|
indent -= 1
|
||
|
elif s[i:i+2] == ", ":
|
||
|
take(2)
|
||
|
nl()
|
||
|
else:
|
||
|
take(1)
|
||
|
|
||
|
return result
|
||
|
|
||
|
|
||
|
class TagBuilder:
|
||
|
"""
|
||
|
Assists in building strings made up of components that are joined via a glue string
|
||
|
"""
|
||
|
def __init__(self, *initial_components: str, glue="_"):
|
||
|
"""
|
||
|
:param initial_components: initial components to always include at the beginning
|
||
|
:param glue: the glue string which joins components
|
||
|
"""
|
||
|
self.glue = glue
|
||
|
self.components = list(initial_components)
|
||
|
|
||
|
def with_component(self, component: str):
|
||
|
self.components.append(component)
|
||
|
|
||
|
def with_conditional(self, cond: bool, component: str):
|
||
|
"""
|
||
|
Conditionally adds the given component
|
||
|
|
||
|
:param cond: the condition
|
||
|
:param component: the component to add if the condition holds
|
||
|
:return: the builder
|
||
|
"""
|
||
|
if cond:
|
||
|
self.components.append(component)
|
||
|
return self
|
||
|
|
||
|
def with_alternative(self, cond: bool, true_component: str, false_component: str):
|
||
|
"""
|
||
|
Adds a component depending on a condition
|
||
|
|
||
|
:param cond: the condition
|
||
|
:param true_component: the component to add if the condition holds
|
||
|
:param false_component: the component to add if the condition does not hold
|
||
|
:return: the builder
|
||
|
"""
|
||
|
self.components.append(true_component if cond else false_component)
|
||
|
return self
|
||
|
|
||
|
def build(self):
|
||
|
"""
|
||
|
:return: the string (with all components joined)
|
||
|
"""
|
||
|
return self.glue.join(self.components)
|