Repository URL to install this package:
|
Version:
2.15 ▾
|
google-python-cloud-debugger
/
module_explorer.py
|
|---|
# Copyright 2015 Google Inc. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS-IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""Finds all the code objects defined by a module."""
import gc
import os
import sys
import types
import six
# Maximum traversal depth when looking for all the code objects referenced by
# a module or another code object.
_MAX_REFERENTS_BFS_DEPTH = 15
# Absolute limit on the amount of objects to scan when looking for all the code
# objects implemented in a module.
_MAX_VISIT_OBJECTS = 100000
# Maximum referents an object can have before it is skipped in the BFS
# traversal. This is to prevent things like long objects or dictionaries that
# probably do not contain code objects from using the _MAX_VISIT_OBJECTS quota.
_MAX_OBJECT_REFERENTS = 1000
# Object types to ignore when looking for the code objects.
_BFS_IGNORE_TYPES = (types.ModuleType, type(None), bool, float, six.binary_type,
six.text_type, types.BuiltinFunctionType,
types.BuiltinMethodType, list) + six.integer_types
def GetCodeObjectAtLine(module, line):
"""Searches for a code object at the specified line in the specified module.
Args:
module: module to explore.
line: 1-based line number of the statement.
Returns:
(True, Code object) on success or (False, (prev_line, next_line)) on
failure, where prev_line and next_line are the closest lines with code above
and below the specified line, or None if they do not exist.
"""
if not hasattr(module, '__file__'):
return (False, (None, None))
prev_line = 0
next_line = six.MAXSIZE
for code_object in _GetModuleCodeObjects(module):
for co_line_number in _GetLineNumbers(code_object):
if co_line_number == line:
return (True, code_object)
elif co_line_number < line:
prev_line = max(prev_line, co_line_number)
elif co_line_number > line:
next_line = min(next_line, co_line_number)
break
prev_line = None if prev_line == 0 else prev_line
next_line = None if next_line == six.MAXSIZE else next_line
return (False, (prev_line, next_line))
def _GetLineNumbers(code_object):
"""Generator for getting the line numbers of a code object.
Args:
code_object: the code object.
Yields:
The next line number in the code object.
"""
# Get the line number deltas, which are the odd number entries, from the
# lnotab. See
# https://svn.python.org/projects/python/branches/pep-0384/Objects/lnotab_notes.txt
# In Python 3, this is just a byte array. In Python 2 it is a string so the
# numerical values have to be extracted from the individual characters.
if six.PY3:
line_incrs = code_object.co_lnotab[1::2]
else:
line_incrs = (ord(c) for c in code_object.co_lnotab[1::2])
current_line = code_object.co_firstlineno
for line_incr in line_incrs:
current_line += line_incr
yield current_line
def _GetModuleCodeObjects(module):
"""Gets all code objects defined in the specified module.
There are two BFS traversals involved. One in this function and the other in
_FindCodeObjectsReferents. Only the BFS in _FindCodeObjectsReferents has
a depth limit. This function does not. The motivation is that this function
explores code object of the module and they can have any arbitrary nesting
level. _FindCodeObjectsReferents, on the other hand, traverses through class
definitions and random references. It's much more expensive and will likely
go into unrelated objects.
There is also a limit on how many total objects are going to be traversed in
all. This limit makes sure that if something goes wrong, the lookup doesn't
hang.
Args:
module: module to explore.
Returns:
Set of code objects defined in module.
"""
visit_recorder = _VisitRecorder()
current = [module]
code_objects = set()
while current:
current = _FindCodeObjectsReferents(module, current, visit_recorder)
code_objects |= current
# Unfortunately Python code objects don't implement tp_traverse, so this
# type can't be used with gc.get_referents. The workaround is to get the
# relevant objects explicitly here.
current = [code_object.co_consts for code_object in current]
return code_objects
def _FindCodeObjectsReferents(module, start_objects, visit_recorder):
"""Looks for all the code objects referenced by objects in start_objects.
The traversal implemented by this function is a shallow one. In other words
if the reference chain is a -> b -> co1 -> c -> co2, this function will
return [co1] only.
The traversal is implemented with BFS. The maximum depth is limited to avoid
touching all the objects in the process. Each object is only visited once
using visit_recorder.
Args:
module: module in which we are looking for code objects.
start_objects: initial set of objects for the BFS traversal.
visit_recorder: instance of _VisitRecorder class to ensure each object is
visited at most once.
Returns:
List of code objects.
"""
def CheckIgnoreCodeObject(code_object):
"""Checks if the code object can be ignored.
Code objects that are not implemented in the module, or are from a lambda or
generator expression can be ignored.
If the module was precompiled, the code object may point to .py file, while
the module says that it originated from .pyc file. We just strip extension
altogether to work around it.
Args:
code_object: code object that we want to check against module.
Returns:
True if the code object can be ignored, False otherwise.
"""
if code_object.co_name in ('<lambda>', '<genexpr>'):
return True
code_object_file = os.path.splitext(code_object.co_filename)[0]
module_file = os.path.splitext(module.__file__)[0]
# The simple case.
if code_object_file == module_file:
return False
return True
def CheckIgnoreClass(cls):
"""Returns True if the class is definitely not coming from "module"."""
cls_module = sys.modules.get(cls.__module__)
if not cls_module:
return False # We can't tell for sure, so explore this class.
return (
cls_module is not module and
getattr(cls_module, '__file__', None) != module.__file__)
code_objects = set()
current = start_objects
for obj in current:
visit_recorder.Record(current)
depth = 0
while current and depth < _MAX_REFERENTS_BFS_DEPTH:
new_current = []
for current_obj in current:
referents = gc.get_referents(current_obj)
if (current_obj is not module.__dict__ and
len(referents) > _MAX_OBJECT_REFERENTS):
continue
for obj in referents:
if isinstance(obj, _BFS_IGNORE_TYPES) or not visit_recorder.Record(obj):
continue
if isinstance(obj, types.CodeType) and CheckIgnoreCodeObject(obj):
continue
if isinstance(obj, six.class_types) and CheckIgnoreClass(obj):
continue
if isinstance(obj, types.CodeType):
code_objects.add(obj)
else:
new_current.append(obj)
current = new_current
depth += 1
return code_objects
class _VisitRecorder(object):
"""Helper class to track of already visited objects and implement quota.
This class keeps a map from integer to object. The key is a unique object
ID (raw object pointer). The value is the object itself. We need to keep the
object in the map, so that it doesn't get released during iteration (since
object ID is only unique as long as the object is alive).
"""
def __init__(self):
self._visit_recorder_objects = {}
def Record(self, obj):
"""Records the object as visited.
Args:
obj: visited object.
Returns:
True if the object hasn't been previously visited or False if it has
already been recorded or the quota has been exhausted.
"""
if len(self._visit_recorder_objects) >= _MAX_VISIT_OBJECTS:
return False
obj_id = id(obj)
if obj_id in self._visit_recorder_objects:
return False
self._visit_recorder_objects[obj_id] = obj
return True