# -*- coding: utf-8 -*-
__doc__ = """\
A module of useful functions for analyzing Python code.
"""
import keyword
import os
import re
import tokenize
shebang = re.compile("^#\!.*$") # noqa
encoding = re.compile(".*coding[:=]\s*([-\w.]+)") # noqa
# __builtins__ is different for every module so we need a hard-coded list:
builtins = [
"ArithmeticError",
"AssertionError",
"AttributeError",
"BaseException",
"BufferError",
"BytesWarning",
"DeprecationWarning",
"EOFError",
"Ellipsis",
"EnvironmentError",
"Exception",
"False",
"FloatingPointError",
"FutureWarning",
"GeneratorExit",
"IOError",
"ImportError",
"ImportWarning",
"IndentationError",
"IndexError",
"KeyError",
"KeyboardInterrupt",
"LookupError",
"MemoryError",
"NameError",
"None",
"NotImplemented",
"NotImplementedError",
"OSError",
"OverflowError",
"PendingDeprecationWarning",
"ReferenceError",
"RuntimeError",
"RuntimeWarning",
"StandardError",
"StopIteration",
"SyntaxError",
"SyntaxWarning",
"SystemError",
"SystemExit",
"TabError",
"True",
"TypeError",
"UnboundLocalError",
"UnicodeDecodeError",
"UnicodeEncodeError",
"UnicodeError",
"UnicodeTranslateError",
"UnicodeWarning",
"UserWarning",
"ValueError",
"Warning",
"ZeroDivisionError",
"__IPYTHON__",
"__IPYTHON__active",
"__debug__",
"__doc__",
"__import__",
"__name__",
"__package__",
"abs",
"all",
"any",
"apply",
"basestring",
"bin",
"bool",
"buffer",
"bytearray",
"bytes",
"callable",
"chr",
"classmethod",
"cmp",
"coerce",
"compile",
"complex",
"copyright",
"credits",
"delattr",
"dict",
"dir",
"divmod",
"dreload",
"enumerate",
"eval",
"execfile",
"exit",
"file",
"filter",
"float",
"format",
"frozenset",
"getattr",
"globals",
"hasattr",
"hash",
"help",
"hex",
"id",
"input",
"int",
"intern",
"ip_set_hook",
"ipalias",
"ipmagic",
"ipsystem",
"isinstance",
"issubclass",
"iter",
"jobs",
"len",
"license",
"list",
"locals",
"long",
"map",
"max",
"min",
"next",
"object",
"oct",
"open",
"ord",
"pow",
"print",
"property",
"quit",
"range",
"raw_input",
"reduce",
"reload",
"repr",
"reversed",
"round",
"set",
"setattr",
"slice",
"sorted",
"staticmethod",
"str",
"sum",
"super",
"tuple",
"type",
"unichr",
"unicode",
"vars",
"xrange",
"zip"
]
reserved_words = keyword.kwlist + builtins
[docs]def enumerate_keyword_args(tokens):
"""
Iterates over *tokens* and returns a dictionary with function names as the
keys and lists of keyword arguments as the values.
"""
keyword_args = {}
inside_function = False
for index, tok in enumerate(tokens):
token_type = tok[0]
token_string = tok[1]
if token_type == tokenize.NEWLINE:
inside_function = False
if token_type == tokenize.NAME:
if token_string == "def":
function_name = tokens[index+1][1]
inside_function = function_name
keyword_args.update({function_name: []})
elif inside_function:
if tokens[index+1][1] == "=": # keyword argument
keyword_args[function_name].append(token_string)
return keyword_args
[docs]def enumerate_imports(tokens):
"""
Iterates over *tokens* and returns a list of all imported modules.
.. note:: This ignores imports using the "as" and "from" keywords.
"""
imported_modules = []
import_line = False
from_import = False
for index, tok in enumerate(tokens):
token_type = tok[0]
token_string = tok[1]
if token_type == tokenize.NEWLINE:
import_line = False
from_import = False
elif token_string == "import":
import_line = True
elif token_string == "from":
from_import = True
elif import_line:
if token_type == tokenize.NAME and tokens[index+1][1] != "as":
if not from_import:
if token_string not in reserved_words:
if token_string not in imported_modules:
imported_modules.append(token_string)
return imported_modules
[docs]def enumerate_global_imports(tokens):
"""
Returns a list of all globally imported modules (skips modules imported
inside of classes, methods, or functions). Example::
>>> enumerate_global_imports(tokens)
["sys", "os", "tokenize", "re"]
.. note::
Does not enumerate imports using the "from" or "as" keywords.
"""
imported_modules = []
import_line = False
from_import = False
parent_module = ""
function_count = 0
indentation = 0
for index, tok in enumerate(tokens):
token_type = tok[0]
token_string = tok[1]
if token_type == tokenize.INDENT:
indentation += 1
elif token_type == tokenize.DEDENT:
indentation -= 1
elif token_type == tokenize.NEWLINE:
import_line = False
from_import = False
elif token_type == tokenize.NAME:
if token_string in ["def", "class"]:
function_count += 1
if indentation == function_count - 1:
function_count -= 1
elif function_count >= indentation:
if token_string == "import":
import_line = True
elif token_string == "from":
from_import = True
elif import_line:
if token_type == tokenize.NAME and tokens[index+1][1] != "as":
if not from_import and token_string not in reserved_words:
if token_string not in imported_modules:
if tokens[index+1][1] == ".": # module.module
parent_module = token_string + "."
else:
if parent_module:
module_string = (
parent_module + token_string)
imported_modules.append(module_string)
parent_module = ""
else:
imported_modules.append(token_string)
return imported_modules
# TODO: Finish this (even though it isn't used):
[docs]def enumerate_dynamic_imports(tokens):
"""
Returns a dictionary of all dynamically imported modules (those inside of
classes or functions) in the form of {<func or class name>: [<modules>]}
Example:
>>> enumerate_dynamic_imports(tokens)
{"myfunc": ["zlib", "base64"]}
"""
imported_modules = []
import_line = False
for index, tok in enumerate(tokens):
token_type = tok[0]
token_string = tok[1]
if token_type == tokenize.NEWLINE:
import_line = False
elif token_string == "import":
try:
if tokens[index-1][0] == tokenize.NEWLINE:
import_line = True
except IndexError:
import_line = True # Just means this is the first line
elif import_line:
if token_type == tokenize.NAME and tokens[index+1][1] != "as":
if token_string not in reserved_words:
if token_string not in imported_modules:
imported_modules.append(token_string)
return imported_modules
[docs]def enumerate_method_calls(tokens, modules):
"""
Returns a list of all object (not module) method calls in the given tokens.
*modules* is expected to be a list of all global modules imported into the
source code we're working on.
For example:
>>> enumerate_method_calls(tokens)
["re.compile", "sys.argv", "f.write"]
"""
out = []
for index, tok in enumerate(tokens):
token_type = tok[0]
token_string = tok[1]
if token_type == tokenize.NAME:
next_tok_string = tokens[index+1][1]
if next_tok_string == "(": # Method call
prev_tok_string = tokens[index-1][1]
# Check if we're attached to an object or module
if prev_tok_string == ".": # We're attached
prev_prev_tok_string = tokens[index-2][1]
if prev_prev_tok_string not in ['""', "''", "]", ")", "}"]:
if prev_prev_tok_string not in modules:
to_replace = "%s.%s" % (
prev_prev_tok_string, token_string)
if to_replace not in out:
out.append(to_replace)
return out
[docs]def enumerate_builtins(tokens):
"""
Returns a list of all the builtins being used in *tokens*.
"""
out = []
for index, tok in enumerate(tokens):
# token_type = tok[0]
token_string = tok[1]
if token_string in builtins:
# todo: I need to test if print can be replaced
special_special = []
if token_string not in special_special:
if not token_string.startswith("__"): # Don't count magic funcs
if tokens[index-1][1] != "." and tokens[index+1][1] != "=":
if token_string not in out:
out.append(token_string)
return out
[docs]def enumerate_import_methods(tokens):
"""
Returns a list of imported module methods (such as re.compile) inside
*tokens*.
"""
global_imports = enumerate_global_imports(tokens)
out = []
for item in global_imports:
for index, tok in enumerate(tokens):
try:
next_tok = tokens[index+1]
try:
next_next_tok = tokens[index+2]
except IndexError:
# Pretend it is a newline
next_next_tok = (54, "\n", (1, 1), (1, 2), "#\n")
except IndexError: # Last token, no biggie
# Pretend it is a newline here too
next_tok = (54, "\n", (1, 1), (1, 2), "#\n")
# token_type = tok[0]
token_string = tok[1]
if token_string == item:
if next_tok[1] == ".": # We're calling a method
module_method = "%s.%s" % (token_string, next_next_tok[1])
if module_method not in out:
out.append(module_method)
return out
[docs]def enumerate_local_modules(tokens, path):
"""
Returns a list of modules inside *tokens* that are local to *path*.
**Note:** Will recursively look inside *path* for said modules.
"""
# Have to get a list of all modules before we can do anything else
modules = enumerate_imports(tokens)
local_modules = []
parent = ""
# Now check the local dir for matching modules
for root, dirs, files in os.walk(path):
root = root.replace("\\", "/")
if not parent:
parent = os.path.split(root)[1]
for f in files:
if f.endswith(".py"):
f = f[:-3] # Strip .py
module_tree = root.split(parent)[1].replace("/", ".")
module_tree = module_tree.lstrip(".")
if module_tree:
module = "%s.%s" % (module_tree, f)
else:
module = f
if module not in modules:
local_modules.append(module)
return local_modules
[docs]def get_shebang(tokens):
"""
Returns the shebang string in *tokens* if it exists. None if not.
"""
# This (short) loop preserves shebangs and encoding strings:
for tok in tokens[0:4]: # Will always be in the first four tokens
line = tok[4]
# Save the first comment line if it starts with a shebang
# (e.g. "#!/usr/bin/env python")
if shebang.match(line): # Must be first line
return line