Source code for got.asts.utils
import itertools
import os
import random
import re
import sys
try:
from got.asts import consts
except ImportError:
import consts
[docs]class ImmutableMixin(object):
_inited = False
def __init__(self):
self._inited = True
def __setattr__(self, key, value):
if self._inited:
raise NotImplementedError
super().__setattr__(key, value)
[docs]class EnumMixin(object):
def __iter__(self):
for k, v in map(lambda x: (x, getattr(self, x)), dir(self)):
if not k.startswith('_'):
yield v
[docs]def tokenize(text):
return re.findall(re.compile("[\w']+", re.U), text)
[docs]def itersubclasses(cls, _seen=None):
if not isinstance(cls, type):
raise TypeError(('itersubclasses must be called with '
'new-style classes, not %.100r') % cls)
_seen = _seen or set()
try:
subs = cls.__subclasses__()
except TypeError:
subs = cls.__subclasses__(cls)
for sub in subs:
if sub not in _seen:
_seen.add(sub)
yield sub
for sub_ in itersubclasses(sub, _seen):
yield sub_
[docs]def import_modules_from_package(package):
path = [os.path.dirname(__file__), '..'] + package.split('.')
path = os.path.join(*path)
for root, dirs, files in os.walk(path):
for filename in files:
if filename.startswith('__') or not filename.endswith('.py'):
continue
new_package = ".".join(root.split(os.sep)).split("....")[1]
module_name = '%s.%s' % (new_package, filename[:-3])
if module_name not in sys.modules:
__import__(module_name)
[docs]def index(array, key, start=0):
i = start
while array[i] != key:
i += 1
return i
[docs]def match_strings(str1, str2):
"""
Returns the largest index i such that str1[:i] == str2[:i]
"""
i = 0
min_len = len(str1) if len(str1) < len(str2) else len(str2)
while i < min_len and str1[i] == str2[i]: i += 1
return i
[docs]def make_unique_endings(strings_collection):
"""
Make each string in the collection end with a unique character.
Essential for correct builiding of a generalized annotated suffix tree.
Returns the updated strings collection, encoded in Unicode.
max strings_collection ~ 1.100.000
"""
res = []
for i in range(len(strings_collection)):
hex_code = hex(consts.String.UNICODE_SPECIAL_SYMBOLS_START+i)
hex_code = r"\U" + "0" * (8 - len(hex_code) + 2) + hex_code[2:]
res.append(strings_collection[i] + hex_code.encode('latin-1').decode("unicode-escape"))
return res