codeapi-new/pg_logger.py

# Online Python Tutor
# https://github.com/pgbovine/OnlinePythonTutor/
#
# Copyright (C) Philip J. Guo (philip@pgbovine.net)
#
# Permission is hereby granted, free of charge, to any person obtaining a
# copy of this software and associated documentation files (the
# "Software"), to deal in the Software without restriction, including
# without limitation the rights to use, copy, modify, merge, publish,
# distribute, sublicense, and/or sell copies of the Software, and to
# permit persons to whom the Software is furnished to do so, subject to
# the following conditions:
#
# The above copyright notice and this permission notice shall be included
# in all copies or substantial portions of the Software.
#
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
# OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
# MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
# IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
# CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
# TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
# SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.


# This is the meat of the Online Python Tutor back-end.  It implements a
# full logger for Python program execution (based on pdb, the standard
# Python debugger imported via the bdb module), printing out the values
# of all in-scope data structures after each executed instruction.

# NB: try to import the minimal amount of stuff in this module to lessen
# the security attack surface

import sys
import bdb  # the KEY import here!
import re
import traceback
import types

# TODO: use the 'six' package to smooth out Py2 and Py3 differences
is_python3 = sys.version_info[0] == 3

# NB: don't use cStringIO since it doesn't support unicode!!!
if is_python3:
    import io as StringIO
    import io  # expose regular io for Python3 users too
else:
    import StringIO
import pg_encoder


# upper-bound on the number of executed lines, in order to guard against
# infinite loops
# MAX_EXECUTED_LINES = 300
MAX_EXECUTED_LINES = 5000  # on 2016-05-01, I increased the limit from 300 to 1000 for Python due to popular user demand! and I also improved the warning message

# DEBUG = False
DEBUG = True

BREAKPOINT_STR = "#break"

# if a line starts with this string, then look for a comma-separated
# list of variables after the colon. *hide* those variables in da trace
#
# 2018-06-17:
# - now supports unix-style shell globs using the syntax in
#   https://docs.python.org/3/library/fnmatch.html so you can write things
#   like '#pythontutor_hide: _*' to hide all private instance variables
# - also now filters class and instance fields in addition to top-level vars
PYTUTOR_HIDE_STR = "#pythontutor_hide:"
# 2018-06-17: a comma-separated list of types that should be displayed *inline*
# like primitives, with their actual values HIDDEN to save space. for details
# of what types are legal to specify, see:
# pg_encoder.py:should_inline_object_by_type()
# - also accepts shell globs, just like PYTUTOR_HIDE_STR
PYTUTOR_INLINE_TYPE_STR = "#pythontutor_hide_type:"

CLASS_RE = re.compile(r"class\s+")


# copied-pasted from translate() in https://github.com/python/cpython/blob/2.7/Lib/fnmatch.py
def globToRegex(pat):
    """Translate a shell PATTERN to a regular expression.
    There is no way to quote meta-characters.
    """

    i, n = 0, len(pat)
    res = ""
    while i < n:
        c = pat[i]
        i = i + 1
        if c == "*":
            res = res + ".*"
        elif c == "?":
            res = res + "."
        elif c == "[":
            j = i
            if j < n and pat[j] == "!":
                j = j + 1
            if j < n and pat[j] == "]":
                j = j + 1
            while j < n and pat[j] != "]":
                j = j + 1
            if j >= n:
                res = res + "\\["
            else:
                stuff = pat[i:j].replace("\\", "\\\\")
                i = j + 1
                if stuff[0] == "!":
                    stuff = "^" + stuff[1:]
                elif stuff[0] == "^":
                    stuff = "\\" + stuff
                res = "%s[%s]" % (res, stuff)
        else:
            res = res + re.escape(c)
    return res + r"\Z(?ms)"


def compileGlobMatch(pattern):
    # very important to use match and *not* search!
    return re.compile(globToRegex(pattern)).match


# test globToRegex and compileGlobMatch
"""
for e in ('_*', '__*', '__*__', '*_$'):
    stuff = compileGlobMatch(e)
    for s in ('_test', 'test_', '_test_', '__test', '__test__'):
        print(e, s, stuff(s) is not None)
"""


TRY_ANACONDA_STR = '\n\nYou can also try "Python 3.6 with Anaconda (experimental)",\nwhich is slower but lets you import many more modules.\n'


# simple sandboxing scheme:
#
# - use resource.setrlimit to deprive this process of ANY file descriptors
#   (which will cause file read/write and subprocess shell launches to fail)
# - restrict user builtins and module imports
#   (beware that this is NOT foolproof at all ... there are known flaws!)
#
# ALWAYS use defense-in-depth and don't just rely on these simple mechanisms
try:
    import resource

    resource_module_loaded = True
except ImportError:
    # Google App Engine doesn't seem to have the 'resource' module
    resource_module_loaded = False


# From http://coreygoldberg.blogspot.com/2009/05/python-redirect-or-turn-off-stdout-and.html
class NullDevice:
    def write(self, s):
        pass


# ugh, I can't figure out why in Python 2, __builtins__ seems to
# be a dict, but in Python 3, __builtins__ seems to be a module,
# so just handle both cases ... UGLY!
if type(__builtins__) is dict:
    BUILTIN_IMPORT = __builtins__["__import__"]
else:
    assert type(__builtins__) is types.ModuleType
    BUILTIN_IMPORT = __builtins__.__import__


# whitelist of module imports
ALLOWED_STDLIB_MODULE_IMPORTS = (
    "math",
    "random",
    "time",
    "datetime",
    "functools",
    "itertools",
    "operator",
    "string",
    "collections",
    "re",
    "json",
    "heapq",
    "bisect",
    "copy",
    "hashlib",
    "typing",
    # the above modules were first added in 2012-09
    # and then incrementally appended to up until
    # 2016-ish (see git blame logs)
    # added these additional ones on 2018-06-15
    # after seeing usage logs of what users tried
    # importing a lot but we didn't support yet
    # (ignoring imports that heavily deal with
    # filesystem, networking, or 3rd-party libs)
    "__future__",
    "cmath",
    "decimal",
    "fractions",
    "pprint",
    "calendar",
    "pickle",
    "types",
    "array",
    "locale",
    "abc",
    "doctest",
    "unittest",
)

# allow users to import but don't explicitly import it since it's
# already been done above
OTHER_STDLIB_WHITELIST = ("StringIO", "io")


# Restrict imports to a whitelist
def __restricted_import__(*args):
    # filter args to ONLY take in real strings so that someone can't
    # subclass str and bypass the 'in' test on the next line
    args = [e for e in args if type(e) is str]

    all_allowed_imports = sorted(ALLOWED_STDLIB_MODULE_IMPORTS + OTHER_STDLIB_WHITELIST)
    if is_python3:
        all_allowed_imports.remove("StringIO")
    else:
        all_allowed_imports.remove("typing")

    if args[0] in all_allowed_imports:
        imported_mod = BUILTIN_IMPORT(*args)
        # somewhat weak protection against imported modules that contain one
        # of these troublesome builtins. again, NOTHING is foolproof ...
        # just more defense in depth :)
        #
        # unload it so that if someone attempts to reload it, then it has to be
        # loaded from the filesystem, which is (supposedly!) blocked by setrlimit
        for mod in ("os", "sys", "posix", "gc"):
            if hasattr(imported_mod, mod):
                delattr(imported_mod, mod)

        return imported_mod
    else:
        # original error message ...
        # raise ImportError('{0} not supported'.format(args[0]))

        # 2017-12-06: added a better error message to tell the user what
        # modules *can* be imported in python tutor ...
        ENTRIES_PER_LINE = 6

        lines_to_print = []
        # adapted from https://stackoverflow.com/questions/312443/how-do-you-split-a-list-into-evenly-sized-chunks
        for i in range(0, len(all_allowed_imports), ENTRIES_PER_LINE):
            lines_to_print.append(all_allowed_imports[i : i + ENTRIES_PER_LINE])
        pretty_printed_imports = ",\n  ".join([", ".join(e) for e in lines_to_print])

        raise ImportError(
            "{0} not found or not supported\nOnly these modules can be imported:\n  {1}{2}".format(
                args[0], pretty_printed_imports, TRY_ANACONDA_STR
            )
        )


# Support interactive user input by:
#
# 1. running the entire program up to a call to raw_input (or input in py3),
# 2. bailing and returning a trace ending in a special 'raw_input' event,
# 3. letting the web frontend issue a prompt to the user to grab a string,
# 4. RE-RUNNING the whole program with that string added to input_string_queue,
# 5. which should bring execution to the next raw_input call (if
#    available), or to termination.
# Repeat until no more raw_input calls are encountered.
# Note that this is mad inefficient, but is simple to implement!

# VERY IMPORTANT -- set random seed to 0 to ensure deterministic execution:
import random

random.seed(0)

# queue of input strings passed from either raw_input or mouse_input
input_string_queue = []


def open_wrapper(*args):
    if is_python3:
        raise Exception(
            """open() is not supported by Python Tutor.
Instead use io.StringIO() to simulate a file.
Example: http://goo.gl/uNvBGl"""
            + TRY_ANACONDA_STR
        )
    else:
        raise Exception(
            """open() is not supported by Python Tutor.
Instead use StringIO.StringIO() to simulate a file.
Example: http://goo.gl/Q9xQ4p"""
            + TRY_ANACONDA_STR
        )


# create a more sensible error message for unsupported features
def create_banned_builtins_wrapper(fn_name):
    def err_func(*args):
        raise Exception(
            "'" + fn_name + "' is not supported by Python Tutor." + TRY_ANACONDA_STR
        )

    return err_func


class RawInputException(Exception):
    pass


def raw_input_wrapper(prompt=""):
    if input_string_queue:
        input_str = input_string_queue.pop(0)

        # write the prompt to stdout, to emulate what happens at the terminal
        # but don't write the user input to avoid duplication in debug output
        sys.stdout.write(str(prompt))  # always convert prompt into a string
        # sys.stdout.write(input_str + "\n")  # removed to prevent input duplication
        return input_str
    raise RawInputException(str(prompt))  # always convert prompt into a string


# Python 2 input() does eval(raw_input())
def python2_input_wrapper(prompt=""):
    if input_string_queue:
        input_str = input_string_queue.pop(0)

        # write the prompt to stdout, to emulate what happens at the terminal
        # but don't write the user input to avoid duplication in debug output
        sys.stdout.write(str(prompt))  # always convert prompt into a string
        # sys.stdout.write(input_str + "\n")  # removed to prevent input duplication
        return eval(input_str)  # remember to eval!
    raise RawInputException(str(prompt))  # always convert prompt into a string


class MouseInputException(Exception):
    pass


def mouse_input_wrapper(prompt=""):
    if input_string_queue:
        return input_string_queue.pop(0)
    raise MouseInputException(prompt)


# blacklist of builtins
BANNED_BUILTINS = []  # 2018-06-15 don't ban any builtins since that's just security by obscurity
# we should rely on other layered security mechanisms

# old banned built-ins prior to 2018-06-15
# BANNED_BUILTINS = ['reload', 'open', 'compile',
#                   'file', 'eval', 'exec', 'execfile',
#                   'exit', 'quit', 'help',
#                   'dir', 'globals', 'locals', 'vars']
# Peter says 'apply' isn't dangerous, so don't ban it

IGNORE_VARS = set(
    ("__builtins__", "__name__", "__exception__", "__doc__", "__package__")
)


"""
2013-12-26

Okay, what's with this f_valuestack business?

If you compile your own CPython and patch Objects/frameobject.c to add a
Python accessor for f_valuestack, then you can actually access the value
stack, which is useful for, say, grabbbing the objects within
list/set/dict comprehensions as they're being built. e.g., try:

    z = [x*y for x in range(5) for y in range(5)]

Note that on pythontutor.com, I am currently running custom-compiled
versions of Python-2.7.6 and Python-3.3.3 with this f_valuestack hack.
Unless you run your own custom CPython, you won't get these benefits.
- update as of 2018-06-16: I don't think the above has been true for a while


Patch:

 static PyObject *
 frame_getlineno(PyFrameObject *f, void *closure)
 {
     return PyLong_FromLong(PyFrame_GetLineNumber(f));
 }

+// copied from Py2crazy, which was for Python 2, but let's hope this still works!
+static PyObject *
+frame_getvaluestack(PyFrameObject* f) {
+    // pgbovine - TODO: will this memory leak? hopefully not,
+    // since all other accessors seem to follow the same idiom
+    PyObject* lst = PyList_New(0);
+    if (f->f_stacktop != NULL) {
+        PyObject** p = NULL;
+        for (p = f->f_valuestack; p < f->f_stacktop; p++) {
+            PyList_Append(lst, *p);
+        }
+    }
+
+    return lst;
+}
+
 /* Setter for f_lineno - you can set f_lineno from within a trace function in
  * order to jump to a given line of code, subject to some restrictions.  Most
  * lines are OK to jump to because they don't make any assumptions about the
@@ -368,6 +384,11 @@

 static PyGetSetDef frame_getsetlist[] = {
     {"f_locals",        (getter)frame_getlocals, NULL, NULL},
     {"f_lineno",        (getter)frame_getlineno,
                     (setter)frame_setlineno, NULL},
     {"f_trace",         (getter)frame_gettrace, (setter)frame_settrace, NULL},
+
+    // pgbovine
+    {"f_valuestack",(getter)frame_getvaluestack,
+                    (setter)NULL /* don't let it be set */, NULL},
+
     {0}
 };
"""


# at_global_scope should be true only if 'frame' represents the global scope
def get_user_globals(frame, at_global_scope=False):
    d = filter_var_dict(frame.f_globals)

    # don't blurt out all of f_valuestack for now ...
    """
  if at_global_scope and hasattr(frame, 'f_valuestack'):
    for (i, e) in enumerate(frame.f_valuestack):
      d['_tmp' + str(i+1)] = e
  """

    # print out list objects being built up in Python 2.x list comprehensions
    # (which don't have its own special <listcomp> frame, sadly)
    if not is_python3 and hasattr(frame, "f_valuestack"):
        for i, e in enumerate([e for e in frame.f_valuestack if type(e) is list]):
            d["_tmp" + str(i + 1)] = e

    # also filter out __return__ for globals only, but NOT for locals
    if "__return__" in d:
        del d["__return__"]
    return d


def get_user_locals(frame):
    ret = filter_var_dict(frame.f_locals)
    # don't blurt out all of f_valuestack for now ...
    """
  if hasattr(frame, 'f_valuestack'):
    for (i, e) in enumerate(frame.f_valuestack):
      ret['_tmp' + str(i+1)] = e
  """

    # special printing of list/set/dict comprehension objects as they are
    # being built up incrementally ...
    f_name = frame.f_code.co_name
    if hasattr(frame, "f_valuestack"):
        # print out list objects being built up in Python 2.x list comprehensions
        # (which don't have its own special <listcomp> frame, sadly)
        if not is_python3:
            for i, e in enumerate([e for e in frame.f_valuestack if type(e) is list]):
                ret["_tmp" + str(i + 1)] = e

        # for dict and set comprehensions, which have their own frames:
        if f_name.endswith("comp>"):
            for i, e in enumerate(
                [e for e in frame.f_valuestack if type(e) in (list, set, dict)]
            ):
                ret["_tmp" + str(i + 1)] = e

    return ret


def filter_var_dict(d):
    ret = {}
    for k, v in d.items():
        if k not in IGNORE_VARS:
            ret[k] = v
    return ret


# yield all function objects locally-reachable from frame,
# making sure to traverse inside all compound objects ...
def visit_all_locally_reachable_function_objs(frame):
    for k, v in get_user_locals(frame).items():
        for e in visit_function_obj(v, set()):
            if e:  # only non-null if it's a function object
                assert type(e) in (types.FunctionType, types.MethodType)
                yield e


# TODO: this might be slow if we're traversing inside lots of objects:
def visit_function_obj(v, ids_seen_set):
    v_id = id(v)

    # to prevent infinite loop
    if v_id in ids_seen_set:
        yield None
    else:
        ids_seen_set.add(v_id)

        typ = type(v)

        # simple base case
        if typ in (types.FunctionType, types.MethodType):
            yield v

        # recursive cases
        elif typ in (list, tuple, set):
            for child in v:
                for child_res in visit_function_obj(child, ids_seen_set):
                    yield child_res

        elif typ == dict or pg_encoder.is_class(v) or pg_encoder.is_instance(v):
            contents_dict = None

            if typ == dict:
                contents_dict = v
            # warning: some classes or instances don't have __dict__ attributes
            elif hasattr(v, "__dict__"):
                contents_dict = v.__dict__

            if contents_dict:
                for key_child, val_child in contents_dict.items():
                    for key_child_res in visit_function_obj(key_child, ids_seen_set):
                        yield key_child_res
                    for val_child_res in visit_function_obj(val_child, ids_seen_set):
                        yield val_child_res

        # degenerate base case
        yield None


class PGLogger(bdb.Bdb):
    # if custom_modules is non-empty, it should be a dict mapping module
    # names to the python source code of each module. when _runscript is
    # called, it will do "from <module> import *" for all modules in
    # custom_modules before running the user's script and then trace all
    # code within custom_modules
    #
    # if separate_stdout_by_module, then have a separate stdout stream
    # for each module rather than all stdout going to a single stream
    def __init__(
        self,
        cumulative_mode,
        heap_primitives,
        show_only_outputs,
        finalizer_func,
        disable_security_checks=False,
        allow_all_modules=False,
        crazy_mode=False,
        custom_modules=None,
        separate_stdout_by_module=False,
        probe_exprs=None,
    ):
        bdb.Bdb.__init__(self)
        self.mainpyfile = ""
        self._wait_for_mainpyfile = 0

        if probe_exprs:
            self.probe_exprs = probe_exprs
        else:
            self.probe_exprs = None

        self.separate_stdout_by_module = separate_stdout_by_module
        self.stdout_by_module = {}  # Key: module name, Value: StringIO faux-stdout

        self.modules_to_trace = set(["__main__"])  # always trace __main__!

        # Key: module name
        # Value: module's python code as a string
        self.custom_modules = custom_modules
        if self.custom_modules:
            for module_name in self.custom_modules:
                self.modules_to_trace.add(module_name)

        self.disable_security_checks = disable_security_checks
        self.allow_all_modules = allow_all_modules
        # if we allow all modules, we shouldn't do security checks
        # either since otherwise users can't really import anything
        # because that will likely involve opening files on disk, which
        # is disallowed by security checks
        if self.allow_all_modules:
            self.disable_security_checks = True

        # if True, then displays ALL stack frames that have ever existed
        # rather than only those currently on the stack (and their
        # lexical parents)
        self.cumulative_mode = cumulative_mode

        # if True, then render certain primitive objects as heap objects
        self.render_heap_primitives = heap_primitives

        # if True, then don't render any data structures in the trace,
        # and show only outputs
        self.show_only_outputs = show_only_outputs

        # Run using the custom Py2crazy Python interpreter
        self.crazy_mode = crazy_mode

        # a function that takes the output trace as a parameter and
        # processes it
        self.finalizer_func = finalizer_func

        # each entry contains a dict with the information for a single
        # executed line
        self.trace = []

        # if this is true, don't put any more stuff into self.trace
        self.done = False

        # if this is non-null, don't do any more tracing until a
        # 'return' instruction with a stack gotten from
        # get_stack_code_IDs() that matches wait_for_return_stack
        self.wait_for_return_stack = None

        # http://stackoverflow.com/questions/2112396/in-python-in-google-app-engine-how-do-you-capture-output-produced-by-the-print
        self.GAE_STDOUT = sys.stdout

        # Key:   function object
        # Value: parent frame
        self.closures = {}

        # Key:   code object for a lambda
        # Value: parent frame
        self.lambda_closures = {}

        # set of function objects that were defined in the global scope
        self.globally_defined_funcs = set()

        # Key: frame object
        # Value: monotonically increasing small ID, based on call order
        self.frame_ordered_ids = {}
        self.cur_frame_id = 1

        # List of frames to KEEP AROUND after the function exits.
        # If cumulative_mode is True, then keep ALL frames in
        # zombie_frames; otherwise keep only frames where
        # nested functions were defined within them.
        self.zombie_frames = []

        # set of elements within zombie_frames that are also
        # LEXICAL PARENTS of other frames
        self.parent_frames_set = set()

        # all globals that ever appeared in the program, in the order in
        # which they appeared. note that this might be a superset of all
        # the globals that exist at any particular execution point,
        # since globals might have been deleted (using, say, 'del')
        self.all_globals_in_order = []

        # very important for this single object to persist throughout
        # execution, or else canonical small IDs won't be consistent.
        self.encoder = pg_encoder.ObjectEncoder(self)

        self.executed_script = None  # Python script to be executed!

        # if there is at least one line that ends with BREAKPOINT_STR,
        # then activate "breakpoint mode", where execution should stop
        # ONLY at breakpoint lines.
        self.breakpoints = []

        self.vars_to_hide = set()  # a set of regex match objects
        # created by compileGlobMatch() from
        # the contents of PYTUTOR_HIDE_STR
        self.types_to_inline = (
            set()
        )  # a set of regex match objects derived from PYTUTOR_INLINE_TYPE_STR

        self.prev_lineno = -1  # keep track of previous line just executed

    def should_hide_var(self, var):
        for re_match in self.vars_to_hide:
            if re_match(var):
                return True
        return False

    def get_user_stdout(self):
        def encode_stringio(sio):
            # This is SUPER KRAZY! In Python 2, the buflist inside of a StringIO
            # instance can be made up of both str and unicode, so we need to convert
            # the str to unicode and replace invalid characters with the Unicode '?'
            # But leave unicode elements alone. This way, EVERYTHING inside buflist
            # will be unicode. (Note that in Python 3, everything is already unicode,
            # so we're fine.)
            if not is_python3:
                sio.buflist = [
                    (e.decode("utf-8", "replace") if type(e) is str else e)
                    for e in sio.buflist
                ]
            return sio.getvalue()

        if self.separate_stdout_by_module:
            ret = {}
            for module_name in self.stdout_by_module:
                ret[module_name] = encode_stringio(self.stdout_by_module[module_name])
            return ret
        else:
            # common case - single stdout stream
            return encode_stringio(self.user_stdout)

    def get_frame_id(self, cur_frame):
        return self.frame_ordered_ids[cur_frame]

    # Returns the (lexical) parent of a function value.
    def get_parent_of_function(self, val):
        if val in self.closures:
            return self.get_frame_id(self.closures[val])
        elif val in self.lambda_closures:
            return self.get_frame_id(self.lambda_closures[val])
        else:
            return None

    # Returns the (lexical) parent frame of the function that was called
    # to create the stack frame 'frame'.
    #
    # OKAY, this is a SUPER hack, but I don't see a way around it
    # since it's impossible to tell exactly which function
    # ('closure') object was called to create 'frame'.
    #
    # The Python interpreter doesn't maintain this information,
    # so unless we hack the interpreter, we will simply have
    # to make an educated guess based on the contents of local
    # variables inherited from possible parent frame candidates.
    def get_parent_frame(self, frame):
        # print >> sys.stderr, 'get_parent_frame: frame.f_code', frame.f_code
        for func_obj, parent_frame in self.closures.items():
            # ok, there's a possible match, but let's compare the
            # local variables in parent_frame to those of frame
            # to make sure. this is a hack that happens to work because in
            # Python, each stack frame inherits ('inlines') a copy of the
            # variables from its (lexical) parent frame.
            if func_obj.__code__ == frame.f_code:
                all_matched = True
                for k in frame.f_locals:
                    # Do not try to match local names
                    if k in frame.f_code.co_varnames:
                        continue
                    if k != "__return__" and k in parent_frame.f_locals:
                        if parent_frame.f_locals[k] != frame.f_locals[k]:
                            all_matched = False
                            break

                if all_matched:
                    return parent_frame

        for lambda_code_obj, parent_frame in self.lambda_closures.items():
            if lambda_code_obj == frame.f_code:
                # TODO: should we do more verification like above?!?
                return parent_frame

        return None

    def lookup_zombie_frame_by_id(self, frame_id):
        # TODO: kinda inefficient
        for e in self.zombie_frames:
            if self.get_frame_id(e) == frame_id:
                return e
        assert False  # should never get here

    # unused ...
    # def reset(self):
    #    bdb.Bdb.reset(self)
    #    self.forget()

    def forget(self):
        self.lineno = None
        self.stack = []
        self.curindex = 0
        self.curframe = None

    def setup(self, f, t):
        self.forget()
        self.stack, self.curindex = self.get_stack(f, t)
        self.curframe = self.stack[self.curindex][0]

    # should be a reasonably unique ID to match calls and returns:
    def get_stack_code_IDs(self):
        return [id(e[0].f_code) for e in self.stack]

    # Override Bdb methods

    def user_call(self, frame, argument_list):
        """This method is called when there is the remote possibility
        that we ever need to stop in this function."""
        # TODO: figure out a way to move this down to 'def interaction'
        # or right before self.trace.append ...
        if self.done:
            return

        if self._wait_for_mainpyfile:
            return
        if self.stop_here(frame):
            # delete __return__ so that on subsequent calls to
            # a generator function, the OLD yielded (returned)
            # value gets deleted from the frame ...
            try:
                del frame.f_locals["__return__"]
            except KeyError:
                pass

            self.interaction(frame, None, "call")

    def user_line(self, frame):
        """This function is called when we stop or break at this line."""
        if self.done:
            return

        if self._wait_for_mainpyfile:
            if (
                frame.f_globals["__name__"] not in self.modules_to_trace
            ) or frame.f_lineno <= 0:
                # older code:
                # if (self.canonic(frame.f_code.co_filename) != "<string>" or
                #    frame.f_lineno <= 0):
                return
            self._wait_for_mainpyfile = 0
        self.interaction(frame, None, "step_line")

    def user_return(self, frame, return_value):
        """This function is called when a return trap is set here."""
        if self.done:
            return

        frame.f_locals["__return__"] = return_value
        self.interaction(frame, None, "return")

    def user_exception(self, frame, exc_info):
        """This function is called if an exception occurs,
        but only if we are to stop at or just below this level."""
        if self.done:
            return

        exc_type, exc_value, exc_traceback = exc_info
        frame.f_locals["__exception__"] = exc_type, exc_value
        if type(exc_type) == type(""):
            exc_type_name = exc_type
        else:
            exc_type_name = exc_type.__name__

        if exc_type_name == "RawInputException":
            raw_input_arg = str(
                exc_value.args[0]
            )  # make sure it's a string so it's JSON serializable!
            self.trace.append(dict(event="raw_input", prompt=raw_input_arg))
            self.done = True
        elif exc_type_name == "MouseInputException":
            mouse_input_arg = str(
                exc_value.args[0]
            )  # make sure it's a string so it's JSON serializable!
            self.trace.append(dict(event="mouse_input", prompt=mouse_input_arg))
            self.done = True
        else:
            self.interaction(frame, exc_traceback, "exception")

    def get_script_line(self, n):
        return self.executed_script_lines[n - 1]

    # General interaction function

    def interaction(self, frame, traceback, event_type):
        self.setup(frame, traceback)
        tos = self.stack[self.curindex]
        top_frame = tos[0]
        lineno = tos[1]

        topframe_module = top_frame.f_globals["__name__"]

        # debug ...
        """
        print >> sys.stderr
        print >> sys.stderr, '=== STACK ===', 'curindex:', self.curindex
        for (e,ln) in self.stack:
          print >> sys.stderr, e.f_code.co_name + ' ' + e.f_code.co_filename + ' ' + str(ln)
        print >> sys.stderr, "top_frame", top_frame.f_code.co_name, top_frame.f_code
        """

        # don't trace inside of ANY functions that aren't user-written code
        # (e.g., those from imported modules -- e.g., random, re -- or the
        # __restricted_import__ function in this file)
        #
        # empirically, it seems like the FIRST entry in self.stack is
        # the 'run' function from bdb.py, but everything else on the
        # stack is the user program's "real stack"

        # Look only at the "topmost" frame on the stack ...

        # if we're not in a module that we are explicitly tracing, skip:
        # (this comes up in tests/backend-tests/namedtuple.txt)
        if topframe_module not in self.modules_to_trace:
            return
        # also don't trace inside of the magic "constructor" code
        if top_frame.f_code.co_name == "__new__":
            return
        # or __repr__, which is often called when running print statements
        if top_frame.f_code.co_name == "__repr__":
            return

        # don't trace if wait_for_return_stack is non-null ...
        if self.wait_for_return_stack:
            if event_type == "return" and (
                self.wait_for_return_stack == self.get_stack_code_IDs()
            ):
                self.wait_for_return_stack = None  # reset!
            return  # always bail!
        else:
            # Skip all "calls" that are actually class definitions, since
            # those faux calls produce lots of ugly cruft in the trace.
            #
            # NB: Only trigger on calls to functions defined in
            # user-written code (i.e., co_filename == '<string>'), but that
            # should already be ensured by the above check for whether we're
            # in user-written code.
            if event_type == "call":
                first_lineno = top_frame.f_code.co_firstlineno
                if topframe_module == "__main__":
                    func_line = self.get_script_line(first_lineno)
                elif topframe_module in self.custom_modules:
                    module_code = self.custom_modules[topframe_module]
                    module_code_lines = (
                        module_code.splitlines()
                    )  # TODO: maybe pre-split lines?
                    func_line = module_code_lines[first_lineno - 1]
                else:
                    # you're hosed
                    func_line = ""
                # print >> sys.stderr, func_line

                if CLASS_RE.match(func_line.lstrip()):  # ignore leading spaces
                    self.wait_for_return_stack = self.get_stack_code_IDs()
                    return

        self.encoder.reset_heap()  # VERY VERY VERY IMPORTANT,
        # or else we won't properly capture heap object mutations in the trace!

        if event_type == "call":
            # Don't be so strict about this assertion because it FAILS
            # when you're calling a generator (not for the first time),
            # since that frame has already previously been on the stack ...
            # assert top_frame not in self.frame_ordered_ids

            self.frame_ordered_ids[top_frame] = self.cur_frame_id
            self.cur_frame_id += 1

            if self.cumulative_mode:
                self.zombie_frames.append(top_frame)

        # kinda tricky to get the timing right -- basically, as soon as you
        # make a call, set sys.stdout to the stream for the appropriate
        # module, and as soon as you return, set sys.stdout to the
        # stream for your caller's module. we need to do this on the
        # return call since we want to immediately start picking up
        # prints to stdout *right after* this function returns
        if self.separate_stdout_by_module:
            if event_type == "call":
                if topframe_module in self.stdout_by_module:
                    sys.stdout = self.stdout_by_module[topframe_module]
                else:
                    sys.stdout = self.stdout_by_module["<other>"]
            elif event_type == "return" and self.curindex > 0:
                prev_tos = self.stack[self.curindex - 1]
                prev_topframe = prev_tos[0]
                prev_topframe_module = prev_topframe.f_globals["__name__"]
                if prev_topframe_module in self.stdout_by_module:
                    sys.stdout = self.stdout_by_module[prev_topframe_module]
                else:
                    sys.stdout = self.stdout_by_module["<other>"]

        # only render zombie frames that are NO LONGER on the stack
        #
        # subtle: self.stack[:self.curindex+1] is the real stack, since
        # everything after self.curindex+1 is beyond the top of the
        # stack. this seems to be relevant only when there's an exception,
        # since the ENTIRE stack is preserved but self.curindex
        # starts decrementing as the exception bubbles up the stack.
        cur_stack_frames = [e[0] for e in self.stack[: self.curindex + 1]]
        zombie_frames_to_render = [
            e for e in self.zombie_frames if e not in cur_stack_frames
        ]

        # each element is a pair of (function name, ENCODED locals dict)
        encoded_stack_locals = []

        # returns a dict with keys: function name, frame id, id of parent frame, encoded_locals dict
        def create_encoded_stack_entry(cur_frame):
            # print >> sys.stderr, '- create_encoded_stack_entry', cur_frame, self.closures, self.lambda_closures
            ret = {}

            parent_frame_id_list = []

            f = cur_frame
            while True:
                p = self.get_parent_frame(f)
                if p:
                    pid = self.get_frame_id(p)
                    assert pid
                    parent_frame_id_list.append(pid)
                    f = p
                else:
                    break

            cur_name = cur_frame.f_code.co_name

            if cur_name == "":
                cur_name = "unnamed function"

            # augment lambdas with line number
            if cur_name == "<lambda>":
                cur_name += pg_encoder.create_lambda_line_number(
                    cur_frame.f_code, self.encoder.line_to_lambda_code
                )

            # encode in a JSON-friendly format now, in order to prevent ill
            # effects of aliasing later down the line ...
            encoded_locals = {}

            for k, v in get_user_locals(cur_frame).items():
                is_in_parent_frame = False

                # don't display locals that appear in your parents' stack frames,
                # since that's redundant
                for pid in parent_frame_id_list:
                    parent_frame = self.lookup_zombie_frame_by_id(pid)
                    if k in parent_frame.f_locals:
                        # ignore __return__, which is never copied
                        if k != "__return__":
                            # these values SHOULD BE ALIASES
                            # (don't do an 'is' check since it might not fire for primitives)
                            if parent_frame.f_locals[k] == v:
                                is_in_parent_frame = True

                if is_in_parent_frame and k not in cur_frame.f_code.co_varnames:
                    continue

                # don't display some built-in locals ...
                if k == "__module__":
                    continue

                if self.should_hide_var(k):
                    continue

                encoded_val = self.encoder.encode(v, self.get_parent_of_function)
                encoded_locals[k] = encoded_val

            # order the variable names in a sensible way:

            # Let's start with co_varnames, since it (often) contains all
            # variables in this frame, some of which might not exist yet.
            ordered_varnames = []
            for e in cur_frame.f_code.co_varnames:
                if e in encoded_locals:
                    ordered_varnames.append(e)

            # sometimes co_varnames doesn't contain all of the true local
            # variables: e.g., when executing a 'class' definition.  in that
            # case, iterate over encoded_locals and push them onto the end
            # of ordered_varnames in alphabetical order
            for e in sorted(encoded_locals.keys()):
                if e != "__return__" and e not in ordered_varnames:
                    ordered_varnames.append(e)

            # finally, put __return__ at the very end
            if "__return__" in encoded_locals:
                ordered_varnames.append("__return__")

            # doctor Python 3 initializer to look like a normal function (denero)
            if "__locals__" in encoded_locals:
                ordered_varnames.remove("__locals__")
                local = encoded_locals.pop("__locals__")
                if encoded_locals.get("__return__", True) is None:
                    encoded_locals["__return__"] = local

            # crucial sanity checks!
            assert len(ordered_varnames) == len(encoded_locals)
            for e in ordered_varnames:
                assert e in encoded_locals

            return dict(
                func_name=cur_name,
                is_parent=(cur_frame in self.parent_frames_set),
                frame_id=self.get_frame_id(cur_frame),
                parent_frame_id_list=parent_frame_id_list,
                encoded_locals=encoded_locals,
                ordered_varnames=ordered_varnames,
            )

        i = self.curindex

        # look for whether a nested function has been defined during
        # this particular call:
        if i > 1:  # i == 1 implies that there's only a global scope visible
            for v in visit_all_locally_reachable_function_objs(top_frame):
                if v not in self.closures and v not in self.globally_defined_funcs:
                    # Look for the presence of the code object (v.func_code
                    # for Python 2 or v.__code__ for Python 3) in the
                    # constant pool (f_code.co_consts) of an enclosing
                    # stack frame, and set that frame as your parent.
                    #
                    # This technique properly handles lambdas passed as
                    # function parameters. e.g., this example:
                    #
                    # def foo(x):
                    #   bar(lambda y: x + y)
                    # def bar(a):
                    #   print a(20)
                    # foo(10)
                    chosen_parent_frame = None
                    # SUPER hacky but seems to work -- use reversed(self.stack)
                    # because we want to traverse starting from the TOP of the stack
                    # (most recent frame) and find the first frame containing
                    # a constant code object that matches v.__code__ or v.func_code
                    #
                    # required for this example from Berkeley CS61a:
                    #
                    # def f(p, k):
                    #     def g():
                    #         print(k)
                    #     if k == 0:
                    #         f(g, 1)
                    # f(None, 0)
                    #
                    # there are two calls to f, each of which defines a
                    # closure g that should point to the respective frame.
                    #
                    # note that for the second call to f, the parent of the
                    # g defined in there should be that frame, which is at
                    # the TOP of the stack. this reversed() hack does the
                    # right thing. note that if you don't traverse the stack
                    # backwards, then you will mistakenly get the parent as
                    # the FIRST f frame (bottom of the stack).
                    for my_frame, my_lineno in reversed(self.stack):
                        if chosen_parent_frame:
                            break

                        for frame_const in my_frame.f_code.co_consts:
                            if frame_const is (
                                v.__code__ if is_python3 else v.func_code
                            ):
                                chosen_parent_frame = my_frame
                                break

                    # 2013-12-01 commented out this line so tests/backend-tests/papajohn-monster.txt
                    # works without an assertion failure ...
                    # assert chosen_parent_frame # I hope this always passes :0

                    # this condition should be False for functions declared in global scope ...
                    if chosen_parent_frame in self.frame_ordered_ids:
                        self.closures[v] = chosen_parent_frame
                        self.parent_frames_set.add(
                            chosen_parent_frame
                        )  # unequivocally add to this set!!!
                        if not chosen_parent_frame in self.zombie_frames:
                            self.zombie_frames.append(chosen_parent_frame)
            else:
                # look for code objects of lambdas defined within this
                # function, which comes up in cases like line 2 of:
                # def x(y):
                #   (lambda z: lambda w: z+y)(y)
                #
                # x(42)
                if top_frame.f_code.co_consts:
                    for e in top_frame.f_code.co_consts:
                        if type(e) == types.CodeType and e.co_name == "<lambda>":
                            # TODO: what if it's already in lambda_closures?
                            self.lambda_closures[e] = top_frame
                            self.parent_frames_set.add(
                                top_frame
                            )  # copy-paste from above
                            if not top_frame in self.zombie_frames:
                                self.zombie_frames.append(top_frame)
        else:
            # if there is only a global scope visible ...
            for k, v in get_user_globals(top_frame).items():
                if (
                    type(v) in (types.FunctionType, types.MethodType)
                    and v not in self.closures
                ):
                    self.globally_defined_funcs.add(v)

        # climb up until you find '<module>', which is (hopefully) the global scope
        top_frame = None
        while True:
            cur_frame = self.stack[i][0]
            cur_name = cur_frame.f_code.co_name
            if cur_name == "<module>":
                break

            # do this check because in some cases, certain frames on the
            # stack might NOT be tracked, so don't push a stack entry for
            # those frames. this happens when you have a callback function
            # in an imported module. e.g., your code:
            #     def foo():
            #         bar(baz)
            #
            #     def baz(): pass
            #
            # imported module code:
            #     def bar(callback_func):
            #         callback_func()
            #
            # when baz is executing, the real stack is [foo, bar, baz] but
            # bar is in imported module code, so pg_logger doesn't trace
            # it, and it doesn't show up in frame_ordered_ids. thus, the
            # stack to render should only be [foo, baz].
            if cur_frame in self.frame_ordered_ids:
                encoded_stack_locals.append(create_encoded_stack_entry(cur_frame))
                if not top_frame:
                    top_frame = cur_frame
            i -= 1

        zombie_encoded_stack_locals = [
            create_encoded_stack_entry(e) for e in zombie_frames_to_render
        ]

        # encode in a JSON-friendly format now, in order to prevent ill
        # effects of aliasing later down the line ...
        encoded_globals = {}
        cur_globals_dict = get_user_globals(
            tos[0], at_global_scope=(self.curindex <= 1)
        )
        for k, v in cur_globals_dict.items():
            if self.should_hide_var(k):
                continue

            encoded_val = self.encoder.encode(v, self.get_parent_of_function)
            encoded_globals[k] = encoded_val

            if k not in self.all_globals_in_order:
                self.all_globals_in_order.append(k)

        # filter out globals that don't exist at this execution point
        # (because they've been, say, deleted with 'del')
        ordered_globals = [e for e in self.all_globals_in_order if e in encoded_globals]
        assert len(ordered_globals) == len(encoded_globals)

        # merge zombie_encoded_stack_locals and encoded_stack_locals
        # into one master ordered list using some simple rules for
        # making it look aesthetically pretty
        stack_to_render = []
        # first push all regular stack entries
        if encoded_stack_locals:
            for e in encoded_stack_locals:
                e["is_zombie"] = False
                e["is_highlighted"] = False
                stack_to_render.append(e)

            # highlight the top-most active stack entry
            stack_to_render[0]["is_highlighted"] = True

        # now push all zombie stack entries
        for e in zombie_encoded_stack_locals:
            # don't display return value for zombie frames
            # TODO: reconsider ...
            """
          try:
            e['ordered_varnames'].remove('__return__')
          except ValueError:
            pass
          """

            e["is_zombie"] = True
            e["is_highlighted"] = False  # never highlight zombie entries

            stack_to_render.append(e)

        # now sort by frame_id since that sorts frames in "chronological
        # order" based on the order they were invoked
        stack_to_render.sort(key=lambda e: e["frame_id"])

        # create a unique hash for this stack entry, so that the
        # frontend can uniquely identify it when doing incremental
        # rendering. the strategy is to use a frankenstein-like mix of the
        # relevant fields to properly disambiguate closures and recursive
        # calls to the same function
        for e in stack_to_render:
            hash_str = e["func_name"]
            # frame_id is UNIQUE, so it can disambiguate recursive calls
            hash_str += "_f" + str(e["frame_id"])

            # needed to refresh GUI display ...
            if e["is_parent"]:
                hash_str += "_p"

            # TODO: this is no longer needed, right? (since frame_id is unique)
            # if e['parent_frame_id_list']:
            #  hash_str += '_p' + '_'.join([str(i) for i in e['parent_frame_id_list']])
            if e["is_zombie"]:
                hash_str += "_z"

            e["unique_hash"] = hash_str

        # handle probe_exprs *before* encoding the heap with self.encoder.get_heap
        encoded_probe_vals = {}
        if self.probe_exprs:
            if top_frame:  # are we in a function call?
                top_frame_locals = get_user_locals(top_frame)
            else:
                top_frame_locals = {}
            for e in self.probe_exprs:
                try:
                    # evaluate it with globals + locals of the top frame ...
                    probe_val = eval(e, cur_globals_dict, top_frame_locals)
                    encoded_probe_vals[e] = self.encoder.encode(
                        probe_val, self.get_parent_of_function
                    )
                except:
                    pass  # don't encode the value if there's been an error

        if self.show_only_outputs:
            trace_entry = dict(
                line=lineno,
                event=event_type,
                func_name=tos[0].f_code.co_name,
                globals={},
                ordered_globals=[],
                stack_to_render=[],
                heap={},
                stdout=self.get_user_stdout(),
            )
        else:
            trace_entry = dict(
                line=lineno,
                event=event_type,
                func_name=tos[0].f_code.co_name,
                globals=encoded_globals,
                ordered_globals=ordered_globals,
                stack_to_render=stack_to_render,
                heap=self.encoder.get_heap(),
                stdout=self.get_user_stdout(),
            )
            if encoded_probe_vals:
                trace_entry["probe_exprs"] = encoded_probe_vals

        # optional column numbers for greater precision
        # (only relevant in Py2crazy, a hacked CPython that supports column numbers)
        if self.crazy_mode:
            # at the very least, grab the column number
            trace_entry["column"] = frame.f_colno

            # now try to find start_col and extent
            # (-1 is an invalid instruction index)
            if frame.f_lasti >= 0:
                key = (
                    frame.f_code.co_code,
                    frame.f_lineno,
                    frame.f_colno,
                    frame.f_lasti,
                )
                if key in self.bytecode_map:
                    v = self.bytecode_map[key]
                    trace_entry["expr_start_col"] = v.start_col
                    trace_entry["expr_width"] = v.extent
                    trace_entry["opcode"] = v.opcode

        # set a 'custom_module_name' field if we're executing in a module
        # that's not the __main__ script:
        if topframe_module != "__main__":
            trace_entry["custom_module_name"] = topframe_module

        # if there's an exception, then record its info:
        if event_type == "exception":
            # always check in f_locals
            exc = frame.f_locals["__exception__"]
            trace_entry["exception_msg"] = exc[0].__name__ + ": " + str(exc[1])

        # append to the trace only the breakpoint line and the next
        # executed line, so that if you set only ONE breakpoint, OPT shows
        # the state before and after that line gets executed.
        append_to_trace = True
        if self.breakpoints:
            if not (
                (lineno in self.breakpoints) or (self.prev_lineno in self.breakpoints)
            ):
                append_to_trace = False

            # TRICKY -- however, if there's an exception, then ALWAYS
            # append it to the trace, so that the error can be displayed
            if event_type == "exception":
                append_to_trace = True

        self.prev_lineno = lineno

        if append_to_trace:
            self.trace.append(trace_entry)

        # sanity check to make sure the state of the world at a 'call' instruction
        # is identical to that at the instruction immediately following it ...
        """
        if len(self.trace) > 1:
          cur = self.trace[-1]
          prev = self.trace[-2]
          if prev['event'] == 'call':
            assert cur['globals'] == prev['globals']
            for (s1, s2) in zip(cur['stack_to_render'], prev['stack_to_render']):
              assert s1 == s2
            assert cur['heap'] == prev['heap']
            assert cur['stdout'] == prev['stdout']
        """

        if len(self.trace) >= MAX_EXECUTED_LINES:
            self.trace.append(
                dict(
                    event="instruction_limit_reached",
                    exception_msg="最多执行到 "
                    + str(MAX_EXECUTED_LINES)
                    + " 步，请减少循环的次数",
                )
            )
            self.force_terminate()

        self.forget()

    def _runscript(self, script_str):
        self.executed_script = script_str
        self.executed_script_lines = self.executed_script.splitlines()

        for i, line in enumerate(self.executed_script_lines):
            line_no = i + 1
            # subtle -- if the stripped line starts with '#break', that
            # means it may be a commented-out version of a normal Python
            # 'break' statement, which shouldn't be confused with an
            # OPT user-defined breakpoint!
            #
            # TODO: this still fails when someone writes something like
            # '##break' since it doesn't start with '#break'!!! i just
            # picked an unfortunate name that's also a python keyword :0
            if line.endswith(BREAKPOINT_STR) and not line.strip().startswith(
                BREAKPOINT_STR
            ):
                self.breakpoints.append(line_no)

            if line.startswith(PYTUTOR_HIDE_STR):
                hide_vars = line[len(PYTUTOR_HIDE_STR) :]
                # remember to call strip() -> compileGlobMatch()
                hide_vars = [compileGlobMatch(e.strip()) for e in hide_vars.split(",")]
                self.vars_to_hide.update(hide_vars)

            if line.startswith(PYTUTOR_INLINE_TYPE_STR):
                listed_types = line[len(PYTUTOR_INLINE_TYPE_STR) :]
                # remember to call strip() -> compileGlobMatch()
                listed_types = [
                    compileGlobMatch(e.strip()) for e in listed_types.split(",")
                ]
                self.types_to_inline.update(listed_types)

        # populate an extent map to get more accurate ranges from code
        if self.crazy_mode:
            # in Py2crazy standard library as Python-2.7.5/Lib/super_dis.py
            import super_dis

            try:
                self.bytecode_map = super_dis.get_bytecode_map(self.executed_script)
            except:
                # failure oblivious
                self.bytecode_map = {}

        # When bdb sets tracing, a number of call and line events happens
        # BEFORE debugger even reaches user's code (and the exact sequence of
        # events depends on python version). So we take special measures to
        # avoid stopping before we reach the main script (see user_line and
        # user_call for details).
        self._wait_for_mainpyfile = 1

        # ok, let's try to sorta 'sandbox' the user script by not
        # allowing certain potentially dangerous operations.
        user_builtins = {}

        # ugh, I can't figure out why in Python 2, __builtins__ seems to
        # be a dict, but in Python 3, __builtins__ seems to be a module,
        # so just handle both cases ... UGLY!
        if type(__builtins__) is dict:
            builtin_items = __builtins__.items()
        else:
            assert type(__builtins__) is types.ModuleType
            builtin_items = []
            for k in dir(__builtins__):
                builtin_items.append((k, getattr(__builtins__, k)))

        for k, v in builtin_items:
            if (
                k == "open" and not self.allow_all_modules
            ):  # put this before BANNED_BUILTINS
                user_builtins[k] = open_wrapper
            elif k in BANNED_BUILTINS:
                user_builtins[k] = create_banned_builtins_wrapper(k)
            elif k == "__import__" and not self.allow_all_modules:
                user_builtins[k] = __restricted_import__
            else:
                if k == "raw_input":
                    user_builtins[k] = raw_input_wrapper
                elif k == "input":
                    if is_python3:
                        # Python 3 input() is Python 2 raw_input()
                        user_builtins[k] = raw_input_wrapper
                    else:
                        user_builtins[k] = python2_input_wrapper
                else:
                    user_builtins[k] = v

        user_builtins["mouse_input"] = mouse_input_wrapper

        if self.separate_stdout_by_module:
            self.stdout_by_module["__main__"] = StringIO.StringIO()
            if self.custom_modules:
                for module_name in self.custom_modules:
                    self.stdout_by_module[module_name] = StringIO.StringIO()
            self.stdout_by_module["<other>"] = (
                StringIO.StringIO()
            )  # catch-all for all other modules we're NOT tracing
            sys.stdout = self.stdout_by_module["<other>"]  # start with <other>
        else:
            # default -- a single unified stdout stream
            self.user_stdout = StringIO.StringIO()
            sys.stdout = self.user_stdout

        self.ORIGINAL_STDERR = sys.stderr

        # don't do this, or else certain kinds of errors, such as syntax
        # errors, will be silently ignored. WEIRD!
        # sys.stderr = NullDevice # silence errors

        user_globals = {}

        # if there are custom_modules, 'import' them into user_globals,
        # which emulates "from <module> import *"
        if self.custom_modules:
            for mn in self.custom_modules:
                # http://code.activestate.com/recipes/82234-importing-a-dynamically-generated-module/
                new_m = types.ModuleType(mn)
                exec(self.custom_modules[mn], new_m.__dict__)  # exec in custom globals
                user_globals.update(new_m.__dict__)

        # important: do this LAST to get precedence over values in custom_modules
        user_globals.update({"__name__": "__main__", "__builtins__": user_builtins})

        try:
            # if allow_all_modules is on, then try to parse script_str into an
            # AST, traverse the tree to find all modules that it imports, and then
            # try to PRE-IMPORT all of those. if we *don't* pre-import a module,
            # then when it's imported in the user's code, it may take *forever*
            # because the bdb debugger tries to single-step thru that code
            # (i think!). run 'import pandas' to quickly test this.
            if self.allow_all_modules:
                import ast

                try:
                    all_modules_to_preimport = []
                    tree = ast.parse(script_str)
                    for node in ast.walk(tree):
                        if isinstance(node, ast.Import):
                            for n in node.names:
                                all_modules_to_preimport.append(n.name)
                        elif isinstance(node, ast.ImportFrom):
                            all_modules_to_preimport(node.module)

                    for m in all_modules_to_preimport:
                        if (
                            m in script_str
                        ):  # optimization: load only modules that appear in script_str
                            try:
                                __import__(m)
                            except ImportError:
                                pass
                except:
                    pass

            # enforce resource limits RIGHT BEFORE running script_str

            # set ~200MB virtual memory limit AND a 5-second CPU time
            # limit (tuned for Webfaction shared hosting) to protect against
            # memory bombs such as:
            #   x = 2
            #   while True: x = x*x
            if resource_module_loaded and (not self.disable_security_checks):
                assert not self.allow_all_modules  # <-- shouldn't be on!

                # PREEMPTIVELY import all of these modules, so that when the user's
                # script imports them, it won't try to do a file read (since they've
                # already been imported and cached in memory). Remember that when
                # the user's code runs, resource.setrlimit(resource.RLIMIT_NOFILE, (0, 0))
                # will already be in effect, so no more files can be opened.
                for m in ALLOWED_STDLIB_MODULE_IMPORTS:
                    if (
                        m in script_str
                    ):  # optimization: load only modules that appear in script_str
                        try:
                            __import__(m)
                        except ImportError:
                            pass

                resource.setrlimit(resource.RLIMIT_AS, (200000000, 200000000))
                resource.setrlimit(resource.RLIMIT_CPU, (5, 5))

                # protect against unauthorized filesystem accesses ...
                resource.setrlimit(
                    resource.RLIMIT_NOFILE, (0, 0)
                )  # no opened files allowed

                # VERY WEIRD. If you activate this resource limitation, it
                # ends up generating an EMPTY trace for the following program:
                #   "x = 0\nfor i in range(10):\n  x += 1\n   print x\n  x += 1\n"
                # (at least on my Webfaction hosting with Python 2.7)
                # resource.setrlimit(resource.RLIMIT_FSIZE, (0, 0))  # (redundancy for paranoia)

                # The posix module is a built-in and has a ton of OS access
                # facilities ... if you delete those functions from
                # sys.modules['posix'], it seems like they're gone EVEN IF
                # someone else imports posix in a roundabout way. Of course,
                # I don't know how foolproof this scheme is, though.
                # (It's not sufficient to just "del sys.modules['posix']";
                #  it can just be reimported without accessing an external
                #  file and tripping RLIMIT_NOFILE, since the posix module
                #  is baked into the python executable, ergh. Actually DON'T
                #  "del sys.modules['posix']", since re-importing it will
                #  refresh all of the attributes. ergh^2)
                for a in dir(sys.modules["posix"]):
                    delattr(sys.modules["posix"], a)
                # do the same with os
                for a in dir(sys.modules["os"]):
                    # 'path' is needed for __restricted_import__ to work
                    # and 'stat' is needed for some errors to be reported properly
                    if a not in ("path", "stat"):
                        delattr(sys.modules["os"], a)
                # ppl can dig up trashed objects with gc.get_objects()
                import gc

                for a in dir(sys.modules["gc"]):
                    delattr(sys.modules["gc"], a)
                del sys.modules["gc"]

                # sys.modules contains an in-memory cache of already-loaded
                # modules, so if you delete modules from here, they will
                # need to be re-loaded from the filesystem.
                #
                # Thus, as an extra precaution, remove these modules so that
                # they can't be re-imported without opening a new file,
                # which is disallowed by resource.RLIMIT_NOFILE
                #
                # Of course, this isn't a foolproof solution by any means,
                # and it might lead to UNEXPECTED FAILURES later in execution.
                del sys.modules["os"]
                del sys.modules["os.path"]
                del sys.modules["sys"]

            self.run(script_str, user_globals, user_globals)
        # sys.exit ...
        except SystemExit:
            # sys.exit(0)
            raise bdb.BdbQuit
        except:
            if DEBUG:
                traceback.print_exc()

            trace_entry = dict(event="uncaught_exception")

            (exc_type, exc_val, exc_tb) = sys.exc_info()
            if hasattr(exc_val, "lineno"):
                trace_entry["line"] = exc_val.lineno
            if hasattr(exc_val, "offset"):
                trace_entry["offset"] = exc_val.offset

            trace_entry["exception_msg"] = type(exc_val).__name__ + ": " + str(exc_val)

            # SUPER SUBTLE! if ANY exception has already been recorded by
            # the program, then DON'T record it again as an uncaught_exception.
            # This looks kinda weird since the exact exception message doesn't
            # need to match up, but in practice, there should be at most only
            # ONE exception per trace.
            already_caught = False
            for e in self.trace:
                if e["event"] == "exception":
                    already_caught = True
                    break

            if not already_caught:
                if not self.done:
                    self.trace.append(trace_entry)

            raise bdb.BdbQuit  # need to forceably STOP execution

    def force_terminate(self):
        # self.finalize()
        raise bdb.BdbQuit  # need to forceably STOP execution

    def finalize(self):
        sys.stdout = self.GAE_STDOUT  # very important!
        sys.stderr = self.ORIGINAL_STDERR

        assert len(self.trace) <= (MAX_EXECUTED_LINES + 1)

        # don't do this anymore ...
        """
      # filter all entries after 'return' from '<module>', since they
      # seem extraneous:
      res = []
      for e in self.trace:
        res.append(e)
        if e['event'] == 'return' and e['func_name'] == '<module>':
          break
      """

        res = self.trace

        # if the SECOND to last entry is an 'exception'
        # and the last entry is return from <module>, then axe the last
        # entry, for aesthetic reasons :)
        if (
            len(res) >= 2
            and res[-2]["event"] == "exception"
            and res[-1]["event"] == "return"
            and res[-1]["func_name"] == "<module>"
        ):
            res.pop()

        self.trace = res

        if self.custom_modules:
            # when there's custom_modules, call with a dict as the first parameter
            return self.finalizer_func(
                dict(
                    main_code=self.executed_script, custom_modules=self.custom_modules
                ),
                self.trace,
            )
        else:
            # common case
            return self.finalizer_func(self.executed_script, self.trace)


import json


# the MAIN meaty function!!!
def exec_script_str(script_str, raw_input_lst_json, options_json, finalizer_func):
    if options_json:
        options = json.loads(options_json)
    else:
        # defaults
        options = {
            "cumulative_mode": False,
            "heap_primitives": False,
            "show_only_outputs": False,
        }

    py_crazy_mode = "py_crazy_mode" in options and options["py_crazy_mode"]

    logger = PGLogger(
        options["cumulative_mode"],
        options["heap_primitives"],
        options["show_only_outputs"],
        finalizer_func,
        crazy_mode=py_crazy_mode,
    )

    # TODO: refactor these NOT to be globals
    global input_string_queue
    input_string_queue = []
    if raw_input_lst_json:
        # TODO: if we want to support unicode, remove str() cast
        # raw_input_lst_json is already a Python list, no need to json.loads
        input_string_queue = [str(e) for e in raw_input_lst_json]

    try:
        logger._runscript(script_str)
    except bdb.BdbQuit:
        pass
    finally:
        logger.finalize()


# disables security check and returns the result of finalizer_func
# WARNING: ONLY RUN THIS LOCALLY and never over the web, since
# security checks are disabled
#
# [optional] probe_exprs is a list of strings representing
# expressions whose values to probe at each step (advanced)
def exec_script_str_local(
    script_str,
    raw_input_lst_json,
    cumulative_mode,
    heap_primitives,
    finalizer_func,
    probe_exprs=None,
    allow_all_modules=False,
):
    logger = PGLogger(
        cumulative_mode,
        heap_primitives,
        False,
        finalizer_func,
        disable_security_checks=True,
        allow_all_modules=allow_all_modules,
        probe_exprs=probe_exprs,
    )

    # TODO: refactor these NOT to be globals
    global input_string_queue
    input_string_queue = []
    if raw_input_lst_json:
        # TODO: if we want to support unicode, remove str() cast
        # raw_input_lst_json is already a Python list, no need to json.loads
        input_string_queue = [str(e) for e in raw_input_lst_json]

    try:
        logger._runscript(script_str)
    except bdb.BdbQuit:
        pass
    finally:
        return logger.finalize()