Adafruit-WebIDE/helpers/python/encoder_combined.py

import os
import sys
#import pg_logger
import json

abspath = os.path.abspath(sys.argv[1])
dname = os.path.dirname(abspath)
os.chdir(dname)


# Online Python Tutor
# https://github.com/pgbovine/OnlinePythonTutor/
#
# Copyright (C) 2010-2012 Philip J. Guo (philip@pgbovine.net)
#
# Permission is hereby granted, free of charge, to any person obtaining a
# copy of this software and associated documentation files (the
# "Software"), to deal in the Software without restriction, including
# without limitation the rights to use, copy, modify, merge, publish,
# distribute, sublicense, and/or sell copies of the Software, and to
# permit persons to whom the Software is furnished to do so, subject to
# the following conditions:
#
# The above copyright notice and this permission notice shall be included
# in all copies or substantial portions of the Software.
#
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
# OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
# MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
# IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
# CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
# TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
# SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.

# Thanks to John DeNero for making the encoder work on both Python 2 and 3


# Given an arbitrary piece of Python data, encode it in such a manner
# that it can be later encoded into JSON.
#   http://json.org/
#
# We use this function to encode run-time traces of data structures
# to send to the front-end.
#
# Format:
#   Primitives:
#   * None, int, long, float, str, bool - unchanged
#     (json.dumps encodes these fine verbatim)
#
#   Compound objects:
#   * list     - ['LIST', elt1, elt2, elt3, ..., eltN]
#   * tuple    - ['TUPLE', elt1, elt2, elt3, ..., eltN]
#   * set      - ['SET', elt1, elt2, elt3, ..., eltN]
#   * dict     - ['DICT', [key1, value1], [key2, value2], ..., [keyN, valueN]]
#   * instance - ['INSTANCE', class name, [attr1, value1], [attr2, value2], ..., [attrN, valueN]]
#   * class    - ['CLASS', class name, [list of superclass names], [attr1, value1], [attr2, value2], ..., [attrN, valueN]]
#   * function - ['FUNCTION', function name, parent frame ID (for nested functions)]
#   * module   - ['module', module name]
#   * other    - [<type name>, string representation of object]
#   * compound object reference - ['REF', target object's unique_id]
#
# the unique_id is derived from id(), which allows us to capture aliasing


# number of significant digits for floats
FLOAT_PRECISION = 4


import re, types
import sys
typeRE = re.compile("<type '(.*)'>")
classRE = re.compile("<class '(.*)'>")

import inspect

is_python3 = (sys.version_info[0] == 3)
if is_python3:
  long = None # Avoid NameError when evaluating "long"


def is_class(dat):
  """Return whether dat is a class."""
  if is_python3:
    return isinstance(dat, type)
  else:
    return type(dat) in (types.ClassType, types.TypeType)


def is_instance(dat):
  """Return whether dat is an instance of a class."""
  if is_python3:
    return isinstance(type(dat), type) and not isinstance(dat, type)
  else:
    # ugh, classRE match is a bit of a hack :(
    return type(dat) == types.InstanceType or classRE.match(str(type(dat)))


def get_name(obj):
  """Return the name of an object."""
  return obj.__name__ if hasattr(obj, '__name__') else get_name(type(obj))


# Note that this might BLOAT MEMORY CONSUMPTION since we're holding on
# to every reference ever created by the program without ever releasing
# anything!
class ObjectEncoder:
  def __init__(self):
    # Key: canonicalized small ID
    # Value: encoded (compound) heap object
    self.encoded_heap_objects = {}

    self.id_to_small_IDs = {}
    self.cur_small_ID = 1


  def get_heap(self):
    return self.encoded_heap_objects


  def reset_heap(self):
    # VERY IMPORTANT to reassign to an empty dict rather than just
    # clearing the existing dict, since get_heap() could have been
    # called earlier to return a reference to a previous heap state
    self.encoded_heap_objects = {}

  def set_function_parent_frame_ID(self, ref_obj, enclosing_frame_id):
    assert ref_obj[0] == 'REF'
    func_obj = self.encoded_heap_objects[ref_obj[1]]
    assert func_obj[0] == 'FUNCTION'
    func_obj[-1] = enclosing_frame_id


  # return either a primitive object or an object reference;
  # and as a side effect, update encoded_heap_objects
  def encode(self, dat, get_parent):
    """Encode a data value DAT using the GET_PARENT function for parent ids."""
    # primitive type
    if type(dat) in (int, long, float, str, bool, type(None)):
      if type(dat) is float:
        return round(dat, FLOAT_PRECISION)
      else:
        return dat

    # compound type - return an object reference and update encoded_heap_objects
    else:
      my_id = id(dat)

      try:
        my_small_id = self.id_to_small_IDs[my_id]
      except KeyError:
        my_small_id = self.cur_small_ID
        self.id_to_small_IDs[my_id] = self.cur_small_ID
        self.cur_small_ID += 1

      del my_id # to prevent bugs later in this function

      ret = ['REF', my_small_id]

      # punt early if you've already encoded this object
      if my_small_id in self.encoded_heap_objects:
        return ret


      # major side-effect!
      new_obj = []
      self.encoded_heap_objects[my_small_id] = new_obj

      typ = type(dat)

      if typ == list:
        new_obj.append('LIST')
        for e in dat:
          new_obj.append(self.encode(e, get_parent))
      elif typ == tuple:
        new_obj.append('TUPLE')
        for e in dat:
          new_obj.append(self.encode(e, get_parent))
      elif typ == set:
        new_obj.append('SET')
        for e in dat:
          new_obj.append(self.encode(e, get_parent))
      elif typ == dict:
        new_obj.append('DICT')
        for (k, v) in dat.items():
          # don't display some built-in locals ...
          if k not in ('__module__', '__return__', '__locals__'):
            new_obj.append([self.encode(k, get_parent), self.encode(v, get_parent)])
      elif typ in (types.FunctionType, types.MethodType):
        if is_python3:
          argspec = inspect.getfullargspec(dat)
        else:
          argspec = inspect.getargspec(dat)

        printed_args = [e for e in argspec.args]
        if argspec.varargs:
          printed_args.append('*' + argspec.varargs)

        if is_python3:
          if argspec.varkw:
            printed_args.append('**' + argspec.varkw)
          if argspec.kwonlyargs:
            printed_args.extend(argspec.kwonlyargs)
        else:
          if argspec.keywords:
            printed_args.append('**' + argspec.keywords)

        func_name = get_name(dat)
        pretty_name = func_name + '(' + ', '.join(printed_args) + ')'
        encoded_val = ['FUNCTION', pretty_name, None]
        if get_parent:
          enclosing_frame_id = get_parent(dat)
          encoded_val[2] = enclosing_frame_id
        new_obj.extend(encoded_val)
      elif typ is types.BuiltinFunctionType:
        pretty_name = get_name(dat) + '(...)'
        new_obj.extend(['FUNCTION', pretty_name, None])
      elif is_class(dat) or is_instance(dat):
        self.encode_class_or_instance(dat, new_obj)
      elif typ is types.ModuleType:
        new_obj.extend(['module', dat.__name__])
      else:
        typeStr = str(typ)
        m = typeRE.match(typeStr)

        if not m:
          m = classRE.match(typeStr)

        assert m, typ
        new_obj.extend([m.group(1), str(dat)])

      return ret


  def encode_class_or_instance(self, dat, new_obj):
    """Encode dat as a class or instance."""
    if is_instance(dat):
      if hasattr(dat, '__class__'):
        # common case ...
        class_name = get_name(dat.__class__)
      else:
        # super special case for something like
        # "from datetime import datetime_CAPI" in Python 3.2,
        # which is some weird 'PyCapsule' type ...
        # http://docs.python.org/release/3.1.5/c-api/capsule.html
        class_name = get_name(type(dat))

      new_obj.extend(['INSTANCE', class_name])
      # don't traverse inside modules, or else risk EXPLODING the visualization
      if class_name == 'module':
        return
    else:
      superclass_names = [e.__name__ for e in dat.__bases__ if e is not object]
      new_obj.extend(['CLASS', get_name(dat), superclass_names])

    # traverse inside of its __dict__ to grab attributes
    # (filter out useless-seeming ones):
    hidden = ('__doc__', '__module__', '__return__', '__dict__',
        '__locals__', '__weakref__')
    if hasattr(dat, '__dict__'):
      user_attrs = sorted([e for e in dat.__dict__ if e not in hidden])
    else:
      user_attrs = []

    for attr in user_attrs:
      new_obj.append([self.encode(attr, None), self.encode(dat.__dict__[attr], None)])


# Online Python Tutor
# https://github.com/pgbovine/OnlinePythonTutor/
#
# Copyright (C) 2010-2012 Philip J. Guo (philip@pgbovine.net)
#
# Permission is hereby granted, free of charge, to any person obtaining a
# copy of this software and associated documentation files (the
# "Software"), to deal in the Software without restriction, including
# without limitation the rights to use, copy, modify, merge, publish,
# distribute, sublicense, and/or sell copies of the Software, and to
# permit persons to whom the Software is furnished to do so, subject to
# the following conditions:
#
# The above copyright notice and this permission notice shall be included
# in all copies or substantial portions of the Software.
#
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
# OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
# MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
# IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
# CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
# TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
# SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.


# This is the meat of the Online Python Tutor back-end.  It implements a
# full logger for Python program execution (based on pdb, the standard
# Python debugger imported via the bdb module), printing out the values
# of all in-scope data structures after each executed instruction.


import sys
import bdb # the KEY import here!
import re
import traceback
import types

is_python3 = (sys.version_info[0] == 3)

if is_python3:
  import io as cStringIO
else:
  import cStringIO
#import pg_encoder


# TODO: not threadsafe:

# upper-bound on the number of executed lines, in order to guard against
# infinite loops
MAX_EXECUTED_LINES = 300

#DEBUG = False
DEBUG = True


# simple sandboxing scheme:
#
# - use resource.setrlimit to deprive this process of ANY file descriptors
#   (which will cause file read/write and subprocess shell launches to fail)
# - restrict user builtins and module imports
#   (beware that this is NOT foolproof at all ... there are known flaws!)
#
# ALWAYS use defense-in-depth and don't just rely on these simple mechanisms
try:
  import resource
  resource_module_loaded = True
except ImportError:
  # Google App Engine doesn't seem to have the 'resource' module
  resource_module_loaded = False


# ugh, I can't figure out why in Python 2, __builtins__ seems to
# be a dict, but in Python 3, __builtins__ seems to be a module,
# so just handle both cases ... UGLY!
if type(__builtins__) is dict:
  BUILTIN_IMPORT = __builtins__['__import__']
else:
  assert type(__builtins__) is types.ModuleType
  BUILTIN_IMPORT = __builtins__.__import__


# whitelist of module imports
ALLOWED_MODULE_IMPORTS = ('math', 'random', 'datetime',
                          'functools', 'operator', 'string',
                          'collections', 're', 'json',
                          'heapq', 'bisect', 'time')

# PREEMPTIVELY import all of these modules, so that when the user's
# script imports them, it won't try to do a file read (since they've
# already been imported and cached in memory). Remember that when
# the user's code runs, resource.setrlimit(resource.RLIMIT_NOFILE, (0, 0))
# will already be in effect, so no more files can be opened.
#for m in ALLOWED_MODULE_IMPORTS:
#  __import__(m)


# Restrict imports to a whitelist
def __restricted_import__(*args):
  # if args[0] in ALLOWED_MODULE_IMPORTS:
  return BUILTIN_IMPORT(*args)
  # else:
  #  raise ImportError('{0} not supported'.format(args[0]))


# blacklist of builtins
BANNED_BUILTINS = (#'reload', 'input', 'apply', 'open', 'compile',
                   #'file', 'eval', 'exec', 'execfile',
                   #'exit', 'quit', 'raw_input', 'help',
                   #'dir', 'globals', 'locals', 'vars'
                   )


IGNORE_VARS = set(('__user_stdout__', '__builtins__', '__name__', '__exception__', '__doc__', '__package__'))

def get_user_stdout(frame):
  return frame.f_globals['__user_stdout__'].getvalue()

def get_user_globals(frame):
  d = filter_var_dict(frame.f_globals)
  # also filter out __return__ for globals only, but NOT for locals
  if '__return__' in d:
    del d['__return__']
  return d

def get_user_locals(frame):
  return filter_var_dict(frame.f_locals)

def filter_var_dict(d):
  ret = {}
  for (k,v) in d.items():
    if k not in IGNORE_VARS:
      ret[k] = v
  return ret


# yield all function objects locally-reachable from frame,
# making sure to traverse inside all compound objects ...
def visit_all_locally_reachable_function_objs(frame):
  for (k, v) in get_user_locals(frame).items():
    for e in visit_function_obj(v, set()):
      if e: # only non-null if it's a function object
        assert type(e) in (types.FunctionType, types.MethodType)
        yield e


# TODO: this might be slow if we're traversing inside lots of objects:
def visit_function_obj(v, ids_seen_set):
  v_id = id(v)

  # to prevent infinite loop
  if v_id in ids_seen_set:
    yield None
  else:
    ids_seen_set.add(v_id)

    typ = type(v)

    # simple base case
    if typ in (types.FunctionType, types.MethodType):
      yield v

    # recursive cases
    elif typ in (list, tuple, set):
      for child in v:
        for child_res in visit_function_obj(child, ids_seen_set):
          yield child_res

    elif typ == dict or is_class(v) or is_instance(v):
      contents_dict = None

      if typ == dict:
        contents_dict = v
      # warning: some classes or instances don't have __dict__ attributes
      elif hasattr(v, '__dict__'):
        contents_dict = v.__dict__

      if contents_dict:
        for (key_child, val_child) in contents_dict.items():
          for key_child_res in visit_function_obj(key_child, ids_seen_set):
            yield key_child_res
          for val_child_res in visit_function_obj(val_child, ids_seen_set):
            yield val_child_res

    # degenerate base case
    yield None


class PGLogger(bdb.Bdb):

    def __init__(self, cumulative_mode, finalizer_func):
        bdb.Bdb.__init__(self)
        self.mainpyfile = ''
        self._wait_for_mainpyfile = 0

        # a function that takes the output trace as a parameter and
        # processes it
        self.finalizer_func = finalizer_func

        # if True, then displays ALL stack frames that have ever existed
        # rather than only those currently on the stack (and their
        # lexical parents)
        self.cumulative_mode = cumulative_mode

        # each entry contains a dict with the information for a single
        # executed line
        self.trace = []

        #http://stackoverflow.com/questions/2112396/in-python-in-google-app-engine-how-do-you-capture-output-produced-by-the-print
        self.GAE_STDOUT = sys.stdout

        # Key:   function object
        # Value: parent frame
        self.closures = {}

        # set of function objects that were defined in the global scope
        self.globally_defined_funcs = set()

        # Key: frame object
        # Value: monotonically increasing small ID, based on call order
        self.frame_ordered_ids = {}
        self.cur_frame_id = 1

        # List of frames to KEEP AROUND after the function exits.
        # If cumulative_mode is True, then keep ALL frames in
        # zombie_frames; otherwise keep only frames where
        # nested functions were defined within them.
        self.zombie_frames = []

        # set of elements within zombie_frames that are also
        # LEXICAL PARENTS of other frames
        self.parent_frames_set = set()

        # all globals that ever appeared in the program, in the order in
        # which they appeared. note that this might be a superset of all
        # the globals that exist at any particular execution point,
        # since globals might have been deleted (using, say, 'del')
        self.all_globals_in_order = []

        # very important for this single object to persist throughout
        # execution, or else canonical small IDs won't be consistent.
        self.encoder = ObjectEncoder()

        self.executed_script = None # Python script to be executed!


    def get_frame_id(self, cur_frame):
      return self.frame_ordered_ids[cur_frame]

    # Returns the (lexical) parent of a function value.
    def get_parent_of_function(self, val):
      if val not in self.closures:
        return None
      return self.get_frame_id(self.closures[val])


    # Returns the (lexical) parent frame of the function that was called
    # to create the stack frame 'frame'.
    #
    # OKAY, this is a SUPER hack, but I don't see a way around it
    # since it's impossible to tell exactly which function
    # ('closure') object was called to create 'frame'.
    #
    # The Python interpreter doesn't maintain this information,
    # so unless we hack the interpreter, we will simply have
    # to make an educated guess based on the contents of local
    # variables inherited from possible parent frame candidates.
    def get_parent_frame(self, frame):
      for (func_obj, parent_frame) in self.closures.items():
        # ok, there's a possible match, but let's compare the
        # local variables in parent_frame to those of frame
        # to make sure. this is a hack that happens to work because in
        # Python, each stack frame inherits ('inlines') a copy of the
        # variables from its (lexical) parent frame.
        if func_obj.__code__ == frame.f_code:
          all_matched = True
          for k in frame.f_locals:
            # Do not try to match local names
            if k in frame.f_code.co_varnames:
              continue
            if k != '__return__' and k in parent_frame.f_locals:
              if parent_frame.f_locals[k] != frame.f_locals[k]:
                all_matched = False
                break

          if all_matched:
            return parent_frame

      return None


    def lookup_zombie_frame_by_id(self, frame_id):
      # TODO: kinda inefficient
      for e in self.zombie_frames:
        if self.get_frame_id(e) == frame_id:
          return e
      assert False # should never get here


    # unused ...
    #def reset(self):
    #    bdb.Bdb.reset(self)
    #    self.forget()


    def forget(self):
        self.lineno = None
        self.stack = []
        self.curindex = 0
        self.curframe = None

    def setup(self, f, t):
        self.forget()
        self.stack, self.curindex = self.get_stack(f, t)
        self.curframe = self.stack[self.curindex][0]


    # Override Bdb methods

    def user_call(self, frame, argument_list):
        """This method is called when there is the remote possibility
        that we ever need to stop in this function."""
        if self._wait_for_mainpyfile:
            return
        if self.stop_here(frame):
            # delete __return__ so that on subsequent calls to
            # a generator function, the OLD yielded (returned)
            # value gets deleted from the frame ...
            try:
              del frame.f_locals['__return__']
            except KeyError:
              pass

            self.interaction(frame, None, 'call')

    def user_line(self, frame):
        """This function is called when we stop or break at this line."""
        if self._wait_for_mainpyfile:
            if (self.canonic(frame.f_code.co_filename) != "<string>" or
                frame.f_lineno <= 0):
                return
            self._wait_for_mainpyfile = 0
        self.interaction(frame, None, 'step_line')

    def user_return(self, frame, return_value):
        """This function is called when a return trap is set here."""
        frame.f_locals['__return__'] = return_value
        self.interaction(frame, None, 'return')

    def user_exception(self, frame, exc_info):
        exc_type, exc_value, exc_traceback = exc_info
        """This function is called if an exception occurs,
        but only if we are to stop at or just below this level."""
        frame.f_locals['__exception__'] = exc_type, exc_value
        if type(exc_type) == type(''):
            exc_type_name = exc_type
        else: exc_type_name = exc_type.__name__
        self.interaction(frame, exc_traceback, 'exception')


    # General interaction function

    def interaction(self, frame, traceback, event_type):
        self.setup(frame, traceback)
        tos = self.stack[self.curindex]
        top_frame = tos[0]
        lineno = tos[1]

        # don't trace inside of ANY functions that aren't user-written code
        # (e.g., those from imported modules -- e.g., random, re -- or the
        # __restricted_import__ function in this file)
        #
        # empirically, it seems like the FIRST entry in self.stack is
        # the 'run' function from bdb.py, but everything else on the
        # stack is the user program's "real stack"
        for (cur_frame, cur_line) in self.stack[1:]:
          # it seems like user-written code has a filename of '<string>',
          # but maybe there are false positives too?
          if self.canonic(cur_frame.f_code.co_filename) != '<string>':
            return
          # also don't trace inside of the magic "constructor" code
          if cur_frame.f_code.co_name == '__new__':
            return
          # or __repr__, which is often called when running print statements
          if cur_frame.f_code.co_name == '__repr__':
            return


        # debug ...
        #print('===', file=sys.stderr)
        #for (e,ln) in self.stack:
        #  print(e.f_code.co_name + ' ' + e.f_code.co_filename + ' ' + str(ln), file=sys.stderr)
        #print('', file=sys.stderr)


        # don't trace inside of our __restricted_import__ helper function
        # (this check is now subsumed by the above check)
        #if top_frame.f_code.co_name == '__restricted_import__':
        #  return

        self.encoder.reset_heap() # VERY VERY VERY IMPORTANT,
                                  # or else we won't properly capture heap object mutations in the trace!

        if event_type == 'call':
          # Don't be so strict about this assertion because it FAILS
          # when you're calling a generator (not for the first time),
          # since that frame has already previously been on the stack ...
          #assert top_frame not in self.frame_ordered_ids

          self.frame_ordered_ids[top_frame] = self.cur_frame_id
          self.cur_frame_id += 1

          if self.cumulative_mode:
            self.zombie_frames.append(top_frame)


        # only render zombie frames that are NO LONGER on the stack
        cur_stack_frames = [e[0] for e in self.stack]
        zombie_frames_to_render = [e for e in self.zombie_frames if e not in cur_stack_frames]


        # each element is a pair of (function name, ENCODED locals dict)
        encoded_stack_locals = []


        # returns a dict with keys: function name, frame id, id of parent frame, encoded_locals dict
        def create_encoded_stack_entry(cur_frame):
          ret = {}


          parent_frame_id_list = []

          f = cur_frame
          while True:
            p = self.get_parent_frame(f)
            if p:
              pid = self.get_frame_id(p)
              assert pid
              parent_frame_id_list.append(pid)
              f = p
            else:
              break


          cur_name = cur_frame.f_code.co_name

          if cur_name == '':
            cur_name = 'unnamed function'

          # encode in a JSON-friendly format now, in order to prevent ill
          # effects of aliasing later down the line ...
          encoded_locals = {}

          for (k, v) in get_user_locals(cur_frame).items():
            is_in_parent_frame = False

            # don't display locals that appear in your parents' stack frames,
            # since that's redundant
            for pid in parent_frame_id_list:
              parent_frame = self.lookup_zombie_frame_by_id(pid)
              if k in parent_frame.f_locals:
                # ignore __return__, which is never copied
                if k != '__return__':
                  # these values SHOULD BE ALIASES
                  # (don't do an 'is' check since it might not fire for primitives)
                  if parent_frame.f_locals[k] == v:
                      is_in_parent_frame = True

            if is_in_parent_frame and k not in cur_frame.f_code.co_varnames:
              continue

            # don't display some built-in locals ...
            if k == '__module__':
              continue

            encoded_val = self.encoder.encode(v, self.get_parent_of_function)
            encoded_locals[k] = encoded_val


          # order the variable names in a sensible way:

          # Let's start with co_varnames, since it (often) contains all
          # variables in this frame, some of which might not exist yet.
          ordered_varnames = []
          for e in cur_frame.f_code.co_varnames:
            if e in encoded_locals:
              ordered_varnames.append(e)

          # sometimes co_varnames doesn't contain all of the true local
          # variables: e.g., when executing a 'class' definition.  in that
          # case, iterate over encoded_locals and push them onto the end
          # of ordered_varnames in alphabetical order
          for e in sorted(encoded_locals.keys()):
            if e != '__return__' and e not in ordered_varnames:
              ordered_varnames.append(e)

          # finally, put __return__ at the very end
          if '__return__' in encoded_locals:
            ordered_varnames.append('__return__')

          # doctor Python 3 initializer to look like a normal function (denero)
          if '__locals__' in encoded_locals:
            ordered_varnames.remove('__locals__')
            local = encoded_locals.pop('__locals__')
            if encoded_locals.get('__return__', True) is None:
              encoded_locals['__return__'] = local

          # crucial sanity checks!
          assert len(ordered_varnames) == len(encoded_locals)
          for e in ordered_varnames:
            assert e in encoded_locals

          return dict(func_name=cur_name,
                      is_parent=(cur_frame in self.parent_frames_set),
                      frame_id=self.get_frame_id(cur_frame),
                      parent_frame_id_list=parent_frame_id_list,
                      encoded_locals=encoded_locals,
                      ordered_varnames=ordered_varnames)


        i = self.curindex

        # look for whether a nested function has been defined during
        # this particular call:
        if i > 1: # i == 1 implies that there's only a global scope visible
          for v in visit_all_locally_reachable_function_objs(top_frame):
            if (v not in self.closures and \
                v not in self.globally_defined_funcs):

              # Look for the presence of the code object (v.func_code
              # for Python 2 or v.__code__ for Python 3) in the
              # constant pool (f_code.co_consts) of an enclosing
              # stack frame, and set that frame as your parent.
              #
              # This technique properly handles lambdas passed as
              # function parameters. e.g., this example:
              #
              # def foo(x):
              #   bar(lambda y: x + y)
              # def bar(a):
              #   print a(20)
              # foo(10)
              chosen_parent_frame = None
              for (my_frame, my_lineno) in self.stack:
                if chosen_parent_frame:
                  break

                for frame_const in my_frame.f_code.co_consts:
                  if frame_const is (v.__code__ if is_python3 else v.func_code):
                    chosen_parent_frame = my_frame
                    break

              assert chosen_parent_frame # I hope this always passes :0

              # this condition should be False for functions declared in global scope ...
              if chosen_parent_frame in self.frame_ordered_ids:
                self.closures[v] = chosen_parent_frame
                self.parent_frames_set.add(chosen_parent_frame) # unequivocally add to this set!!!
                if not chosen_parent_frame in self.zombie_frames:
                  self.zombie_frames.append(chosen_parent_frame)
        else:
          # if there is only a global scope visible ...
          for (k, v) in get_user_globals(top_frame).items():
            if (type(v) in (types.FunctionType, types.MethodType) and \
                v not in self.closures):
              self.globally_defined_funcs.add(v)


        # climb up until you find '<module>', which is (hopefully) the global scope
        while True:
          cur_frame = self.stack[i][0]
          cur_name = cur_frame.f_code.co_name
          if cur_name == '<module>':
            break

          encoded_stack_locals.append(create_encoded_stack_entry(cur_frame))
          i -= 1

        zombie_encoded_stack_locals = [create_encoded_stack_entry(e) for e in zombie_frames_to_render]


        # encode in a JSON-friendly format now, in order to prevent ill
        # effects of aliasing later down the line ...
        encoded_globals = {}
        for (k, v) in get_user_globals(tos[0]).items():
          encoded_val = self.encoder.encode(v, self.get_parent_of_function)
          encoded_globals[k] = encoded_val

          if k not in self.all_globals_in_order:
            self.all_globals_in_order.append(k)

        # filter out globals that don't exist at this execution point
        # (because they've been, say, deleted with 'del')
        ordered_globals = [e for e in self.all_globals_in_order if e in encoded_globals]
        assert len(ordered_globals) == len(encoded_globals)


        # merge zombie_encoded_stack_locals and encoded_stack_locals
        # into one master ordered list using some simple rules for
        # making it look aesthetically pretty
        stack_to_render = [];

        # first push all regular stack entries
        if encoded_stack_locals:
          for e in encoded_stack_locals:
            e['is_zombie'] = False
            e['is_highlighted'] = False
            stack_to_render.append(e)

          # highlight the top-most active stack entry
          stack_to_render[0]['is_highlighted'] = True


        # now push all zombie stack entries
        for e in zombie_encoded_stack_locals:
          # don't display return value for zombie frames
          # TODO: reconsider ...
          '''
          try:
            e['ordered_varnames'].remove('__return__')
          except ValueError:
            pass
          '''

          e['is_zombie'] = True
          e['is_highlighted'] = False # never highlight zombie entries

          stack_to_render.append(e)

        # now sort by frame_id since that sorts frames in "chronological
        # order" based on the order they were invoked
        stack_to_render.sort(key=lambda e: e['frame_id'])


        # create a unique hash for this stack entry, so that the
        # frontend can uniquely identify it when doing incremental
        # rendering. the strategy is to use a frankenstein-like mix of the
        # relevant fields to properly disambiguate closures and recursive
        # calls to the same function
        for e in stack_to_render:
          hash_str = e['func_name']
          # frame_id is UNIQUE, so it can disambiguate recursive calls
          hash_str += '_f' + str(e['frame_id'])

          # needed to refresh GUI display ...
          if e['is_parent']:
            hash_str += '_p'

          # TODO: this is no longer needed, right? (since frame_id is unique)
          #if e['parent_frame_id_list']:
          #  hash_str += '_p' + '_'.join([str(i) for i in e['parent_frame_id_list']])
          if e['is_zombie']:
            hash_str += '_z'

          e['unique_hash'] = hash_str


        trace_entry = dict(line=lineno,
                           event=event_type,
                           func_name=tos[0].f_code.co_name,
                           globals=encoded_globals,
                           ordered_globals=ordered_globals,
                           stack_to_render=stack_to_render,
                           heap=self.encoder.get_heap(),
                           stdout=get_user_stdout(tos[0]))

        # if there's an exception, then record its info:
        if event_type == 'exception':
          # always check in f_locals
          exc = frame.f_locals['__exception__']
          trace_entry['exception_msg'] = exc[0].__name__ + ': ' + str(exc[1])

        self.trace.append(trace_entry)


        # sanity check to make sure the state of the world at a 'call' instruction
        # is identical to that at the instruction immediately following it ...
        '''
        if len(self.trace) > 1:
          cur = self.trace[-1]
          prev = self.trace[-2]
          if prev['event'] == 'call':
            assert cur['globals'] == prev['globals']
            for (s1, s2) in zip(cur['stack_to_render'], prev['stack_to_render']):
              assert s1 == s2
            assert cur['heap'] == prev['heap']
            assert cur['stdout'] == prev['stdout']
        '''


        if len(self.trace) >= MAX_EXECUTED_LINES:
          self.trace.append(dict(event='instruction_limit_reached', exception_msg='(stopped after ' + str(MAX_EXECUTED_LINES) + ' steps to prevent possible infinite loop)'))
          self.force_terminate()

        self.forget()


    def _runscript(self, script_str):
        self.executed_script = script_str

        # When bdb sets tracing, a number of call and line events happens
        # BEFORE debugger even reaches user's code (and the exact sequence of
        # events depends on python version). So we take special measures to
        # avoid stopping before we reach the main script (see user_line and
        # user_call for details).
        self._wait_for_mainpyfile = 1


        # ok, let's try to sorta 'sandbox' the user script by not
        # allowing certain potentially dangerous operations.
        user_builtins = {}

        # ugh, I can't figure out why in Python 2, __builtins__ seems to
        # be a dict, but in Python 3, __builtins__ seems to be a module,
        # so just handle both cases ... UGLY!
        if type(__builtins__) is dict:
          builtin_items = __builtins__.items()
        else:
          assert type(__builtins__) is types.ModuleType
          builtin_items = []
          for k in dir(__builtins__):
            builtin_items.append((k, getattr(__builtins__, k)))

        for (k, v) in builtin_items:
          if k in BANNED_BUILTINS:
            continue
          elif k == '__import__':
            user_builtins[k] = __restricted_import__
          else:
            user_builtins[k] = v


        user_stdout = cStringIO.StringIO()

        sys.stdout = user_stdout
        user_globals = {"__name__"    : "__main__",
                        "__builtins__" : user_builtins,
                        "__user_stdout__" : user_stdout}

        try:
          # enforce resource limits RIGHT BEFORE running script_str

          # set ~200MB virtual memory limit AND a 10-second CPU time
          # limit (tuned for Webfaction shared hosting) to protect against
          # memory bombs such as:
          #   x = 2
          #   while True: x = x*x
          if resource_module_loaded:
            resource.setrlimit(resource.RLIMIT_AS, (200000000, 200000000))
            resource.setrlimit(resource.RLIMIT_CPU, (10, 10))

            # protect against unauthorized filesystem accesses ...
            #resource.setrlimit(resource.RLIMIT_NOFILE, (0, 0)) # no opened files allowed

            # VERY WEIRD. If you activate this resource limitation, it
            # ends up generating an EMPTY trace for the following program:
            #   "x = 0\nfor i in range(10):\n  x += 1\n   print x\n  x += 1\n"
            # (at least on my Webfaction hosting with Python 2.7)
            #resource.setrlimit(resource.RLIMIT_FSIZE, (0, 0))  # (redundancy for paranoia)

            # sys.modules contains an in-memory cache of already-loaded
            # modules, so if you delete modules from here, they will
            # need to be re-loaded from the filesystem.
            #
            # Thus, as an extra precaution, remove these modules so that
            # they can't be re-imported without opening a new file,
            # which is disallowed by resource.RLIMIT_NOFILE
            #
            # Of course, this isn't a foolproof solution by any means,
            # and it might lead to UNEXPECTED FAILURES later in execution.
            #del sys.modules['os']
            #del sys.modules['sys']

          self.run(script_str, user_globals, user_globals)
        # sys.exit ...
        except SystemExit:
          #sys.exit(0)
          raise bdb.BdbQuit
        except:
          if DEBUG:
            traceback.print_exc()

          trace_entry = dict(event='uncaught_exception')

          (exc_type, exc_val, exc_tb) = sys.exc_info()
          if hasattr(exc_val, 'lineno'):
            trace_entry['line'] = exc_val.lineno
          if hasattr(exc_val, 'offset'):
            trace_entry['offset'] = exc_val.offset

          trace_entry['exception_msg'] = type(exc_val).__name__ + ": " +  str(exc_val)

          # SUPER SUBTLE! if this exact same exception has already been
          # recorded by the program, then DON'T record it again as an
          # uncaught_exception
          already_caught = False
          for e in self.trace:
            if e['event'] == 'exception' and e['exception_msg'] == trace_entry['exception_msg']:
              already_caught = True
              break

          if not already_caught:
            self.trace.append(trace_entry)

          raise bdb.BdbQuit # need to forceably STOP execution


    def force_terminate(self):
      #self.finalize()
      raise bdb.BdbQuit # need to forceably STOP execution


    def finalize(self):
      sys.stdout = self.GAE_STDOUT # very important!

      assert len(self.trace) <= (MAX_EXECUTED_LINES + 1)

      # don't do this anymore ...
      '''
      # filter all entries after 'return' from '<module>', since they
      # seem extraneous:
      res = []
      for e in self.trace:
        res.append(e)
        if e['event'] == 'return' and e['func_name'] == '<module>':
          break
      '''

      res = self.trace

      # if the SECOND to last entry is an 'exception'
      # and the last entry is return from <module>, then axe the last
      # entry, for aesthetic reasons :)
      if len(res) >= 2 and \
         res[-2]['event'] == 'exception' and \
         res[-1]['event'] == 'return' and res[-1]['func_name'] == '<module>':
        res.pop()

      self.trace = res

      self.finalizer_func(self.executed_script, self.trace)


# the MAIN meaty function!!!
def exec_script_str(script_str, cumulative_mode, finalizer_func):
  logger = PGLogger(cumulative_mode, finalizer_func)

  try:
    logger._runscript(script_str)
  except bdb.BdbQuit:
    pass
  finally:
    logger.finalize()


def json_finalizer(input_code, output_trace):
  ret = dict(code=input_code, trace=output_trace)
  json_output = json.dumps(ret, indent=None) # use indent=None for most compact repr
  print(json_output)


fin = open(sys.argv[1])

exec_script_str(fin.read(), False, json_finalizer)