Gemfury

hemamaps / newrelic python

Repository URL to install this package:
Details
newrelic / newrelic / api / web_transaction.py
import sys
import cgi
import time
import string
import re
import logging
import functools

try:
    import urlparse
except ImportError:
    import urllib.parse as urlparse

from .application import application_instance
from .transaction import Transaction, current_transaction
from .function_trace import FunctionTrace
from .html_insertion import insert_html_snippet, verify_body_exists

from ..common.object_names import callable_name
from ..common.object_wrapper import wrap_object, FunctionWrapper
from ..common.encoding_utils import (obfuscate, deobfuscate, json_encode,
    json_decode)

from ..core.attribute_filter import DST_BROWSER_MONITORING

from ..packages import six

_logger = logging.getLogger(__name__)

_js_agent_header_fragment = '<script type="text/javascript">%s</script>'
_js_agent_footer_fragment = '<script type="text/javascript">'\
                            'window.NREUM||(NREUM={});NREUM.info=%s</script>'

# Seconds since epoch for Jan 1 2000

JAN_1_2000 = time.mktime((2000, 1, 1, 0, 0, 0, 0, 0, 0))

def _lookup_environ_setting(environ, name, default=False):
    flag = environ.get(name, default)
    if default is None or default:
        try:
            flag = not flag.lower() in ['off', 'false', '0']
        except AttributeError:
            pass
    else:
        try:
            flag = flag.lower() in ['on', 'true', '1']
        except AttributeError:
            pass
    return flag

def _parse_synthetics_header(header):
    # Return a dictionary of values from Synthetics header
    # Returns empty dict, if version is not supported.

    synthetics = {}
    version = None

    if len(header) > 0:
        version = int(header[0])

    if version == 1:
        synthetics['version'] = version
        synthetics['account_id'] = int(header[1])
        synthetics['resource_id'] = header[2]
        synthetics['job_id'] = header[3]
        synthetics['monitor_id'] = header[4]

    return synthetics

def _remove_query_string(url):
    out = urlparse.urlsplit(url)
    return urlparse.urlunsplit((out.scheme, out.netloc, out.path, '', ''))

def _is_websocket(environ):
    return environ.get('HTTP_UPGRADE') == 'websocket'

class WebTransaction(Transaction):

    report_unicode_error = True

    def __init__(self, application, environ):

        # The web transaction can be enabled/disabled by
        # the value of the variable "newrelic.enabled"
        # in the WSGI environ dictionary. We need to check
        # this before initialising the transaction as needs
        # to be passed in base class constructor. The
        # default is None, which would then result in the
        # base class making the decision based on whether
        # application or agent as a whole are enabled.

        enabled = _lookup_environ_setting(environ,
                'newrelic.enabled', None)

        # Initialise the common transaction base class.

        super(WebTransaction, self).__init__(application, enabled)

        # Disable transactions for websocket connections.
        # Also disable autorum if this is a websocket. This is a good idea for
        # two reasons. First, RUM is unnecessary for websocket transactions
        # anyway. Secondly, due to a bug in the gevent-websocket (0.9.5)
        # package, if our _WSGIApplicationMiddleware is applied a websocket
        # connection cannot be made.

        if _is_websocket(environ):
            self.autorum_disabled = True
            self.enabled = False

        # Bail out if the transaction is running in a
        # disabled state.

        if not self.enabled:
            return

        # Will need to check the settings a number of times.

        settings = self._settings

        # Check for override settings from WSGI environ.

        self.background_task = _lookup_environ_setting(environ,
                'newrelic.set_background_task', False)

        self.ignore_transaction = _lookup_environ_setting(environ,
                'newrelic.ignore_transaction', False)
        self.suppress_apdex = _lookup_environ_setting(environ,
                'newrelic.suppress_apdex_metric', False)
        self.suppress_transaction_trace = _lookup_environ_setting(environ,
                'newrelic.suppress_transaction_trace', False)
        self.capture_params = _lookup_environ_setting(environ,
                'newrelic.capture_request_params',
                settings.capture_params)
        self.autorum_disabled = _lookup_environ_setting(environ,
                'newrelic.disable_browser_autorum',
                not settings.browser_monitoring.auto_instrument)

        # Make sure that if high security mode is enabled that
        # capture of request params is still being disabled.
        # No warning is issued for this in the logs because it
        # is a per request configuration and would create a lot
        # of noise.

        if settings.high_security:
            self.capture_params = False

        # WSGI spec says SERVER_PORT "can never be empty string",
        # but I'm going to set a default value anyway...

        port = environ.get('SERVER_PORT', None)
        if port:
            try:
                self._port = int(port)
            except Exception:
                pass

        # Extract from the WSGI environ dictionary
        # details of the URL path. This will be set as
        # default path for the web transaction. This can
        # be overridden by framework to be more specific
        # to avoid metrics explosion problem resulting
        # from too many distinct URLs for same resource
        # due to use of REST style URL concepts or
        # otherwise.

        request_uri = environ.get('REQUEST_URI', None)

        if request_uri is None:
            # The gunicorn WSGI server uses RAW_URI instead
            # of the more typical REQUEST_URI used by Apache
            # and other web servers.

            request_uri = environ.get('RAW_URI', None)

        script_name = environ.get('SCRIPT_NAME', None)
        path_info = environ.get('PATH_INFO', None)
        http_cookie = environ.get('HTTP_COOKIE', None)

        self._request_uri = request_uri

        if self._request_uri is not None:
            # Need to make sure we drop off any query string
            # arguments on the path if we have to fallback
            # to using the original REQUEST_URI. Can't use
            # attribute access on result as only support for
            # Python 2.5+.

            self._request_uri = urlparse.urlparse(self._request_uri)[2]

        if script_name is not None or path_info is not None:
            if path_info is None:
                path = script_name
            elif script_name is None:
                path = path_info
            else:
                path = script_name + path_info

            self.set_transaction_name(path, 'Uri', priority=1)

            if self._request_uri is None:
                self._request_uri = path
        else:
            if self._request_uri is not None:
                self.set_transaction_name(self._request_uri, 'Uri', priority=1)

        # See if the WSGI environ dictionary includes the
        # special 'X-Request-Start' or 'X-Queue-Start' HTTP
        # headers. These header are optional headers that can be
        # set within the underlying web server or WSGI server to
        # indicate when the current request was first received
        # and ready to be processed. The difference between this
        # time and when application starts processing the
        # request is the queue time and represents how long
        # spent in any explicit request queuing system, or how
        # long waiting in connecting state against listener
        # sockets where request needs to be proxied between any
        # processes within the application server.
        #
        # Note that mod_wsgi sets its own distinct variables
        # automatically. Initially it set mod_wsgi.queue_start,
        # which equated to when Apache first accepted the
        # request. This got changed to mod_wsgi.request_start
        # however, and mod_wsgi.queue_start was instead used
        # just for when requests are to be queued up for the
        # daemon process and corresponded to the point at which
        # they are being proxied, after Apache does any
        # authentication etc. We check for both so older
        # versions of mod_wsgi will still work, although we
        # don't try and use the fact that it is possible to
        # distinguish the two points and just pick up the
        # earlier of the two.
        #
        # Checking for the mod_wsgi values means it is not
        # necessary to enable and use mod_headers to add X
        # -Request-Start or X-Queue-Start. But we still check
        # for the headers and give priority to the explicitly
        # added header in case that header was added in front
        # end server to Apache instead.
        #
        # Which ever header is used, we accommodate the value
        # being in seconds, milliseconds or microseconds. Also
        # handle it being prefixed with 't='.

        now = time.time()

        def _parse_time_stamp(time_stamp):
            """
            Converts time_stamp to seconds. Input can be microseconds,
            milliseconds or seconds

            Divide the timestamp by the highest resolution divisor. If
            the result is older than Jan 1 2000, then pick a lower
            resolution divisor and repeat.  It is safe to assume no
            requests were queued for more than 10 years.

            """
            for divisor in (1000000.0, 1000.0, 1.0):
                converted_time = time_stamp/divisor

                # If queue_start is in the future, return 0.0.

                if converted_time > now:
                    return 0.0

                if converted_time > JAN_1_2000:
                    return converted_time

            return 0.0

        queue_time_headers = ('HTTP_X_REQUEST_START', 'HTTP_X_QUEUE_START',
                'mod_wsgi.request_start', 'mod_wsgi.queue_start')

        for queue_time_header in queue_time_headers:
            value = environ.get(queue_time_header, None)

            try:
                if value.startswith('t='):
                    try:
                        self.queue_start = _parse_time_stamp(float(value[2:]))
                    except Exception:
                        pass
                else:
                    try:
                        self.queue_start = _parse_time_stamp(float(value))
                    except Exception:
                        pass

            except Exception:
                pass

            if self.queue_start > 0.0:
                break

        # Capture query request string parameters, unless we're in
        # High Security Mode.

        if not settings.high_security:

            value = environ.get('QUERY_STRING', None)

            if value:
                try:
                    params = urlparse.parse_qs(value, keep_blank_values=True)
                except Exception:
                    params = cgi.parse_qs(value, keep_blank_values=True)

                self._request_params.update(params)

        # Check for Synthetics header

        if settings.synthetics.enabled and \
                settings.trusted_account_ids and settings.encoding_key:

            try:
                header_name = 'HTTP_X_NEWRELIC_SYNTHETICS'
                header = self.decode_newrelic_header(environ, header_name)
                synthetics = _parse_synthetics_header(header)

                if synthetics['account_id'] in settings.trusted_account_ids:

                    # Save obfuscated header, because we will pass it along
                    # unchanged in all external requests.

                    self.synthetics_header = environ.get(header_name)

                    if synthetics['version'] == 1:
                        self.synthetics_resource_id = synthetics['resource_id']
                        self.synthetics_job_id = synthetics['job_id']
                        self.synthetics_monitor_id = synthetics['monitor_id']

            except Exception:
                pass

        # Check for the New Relic cross process ID header and extract
        # the relevant details.

        if settings.cross_application_tracer.enabled and \
                settings.cross_process_id and settings.trusted_account_ids and \
                settings.encoding_key:

            client_cross_process_id = environ.get('HTTP_X_NEWRELIC_ID')

            if client_cross_process_id:
                try:
                    client_cross_process_id = deobfuscate(
                            client_cross_process_id, settings.encoding_key)

                    # The cross process ID consists of the client
                    # account ID and the ID of the specific application
                    # the client is recording requests against. We need
                    # to validate that the client account ID is in the
                    # list of trusted account IDs and ignore it if it
                    # isn't. The trusted account IDs list has the
                    # account IDs as integers, so save the client ones
                    # away as integers here so easier to compare later.

                    client_account_id, client_application_id = \
                            map(int, client_cross_process_id.split('#'))

                    if client_account_id in settings.trusted_account_ids:
                        self.client_cross_process_id = client_cross_process_id
                        self.client_account_id = client_account_id
                        self.client_application_id = client_application_id

                        header_name = 'HTTP_X_NEWRELIC_TRANSACTION'
                        txn_header = self.decode_newrelic_header(
                                environ, header_name)

                        if txn_header:
                            self.is_part_of_cat = True
                            self.referring_transaction_guid = txn_header[0]

                            # Incoming record_tt is OR'd with existing
                            # record_tt. In the scenario where we make multiple
                            # ext request, this will ensure we don't set the
                            # record_tt to False by a later request if it was
                            # set to True by an earlier request.

                            self.record_tt = self.record_tt or txn_header[1]

                            if isinstance(txn_header[2], six.string_types):
                                self._trip_id = txn_header[2]
                            if isinstance(txn_header[3], six.string_types):
                                self._referring_path_hash = txn_header[3]

                except Exception:
                    pass

        # Capture WSGI request environ dictionary values. We capture
        # content length explicitly as will need it for cross process
        # metrics.

        self._read_length = int(environ.get('CONTENT_LENGTH') or -1)

        if settings.capture_environ:
            for name in settings.include_environ:
                if name in environ:
                    self._request_environment[name] = environ[name]

        # Strip out the query params from the HTTP_REFERER if capture_params
        # is disabled in the settings.

        if (self._request_environment.get('HTTP_REFERER') and
                not self.capture_params):
            self._request_environment['HTTP_REFERER'] = \
                _remove_query_string(self._request_environment['HTTP_REFERER'])

        try:
            if 'CONTENT_LENGTH' in self._request_environment:
                self._request_environment['CONTENT_LENGTH'] = int(
                        self._request_environment['CONTENT_LENGTH'])
        except Exception:
            del self._request_environment['CONTENT_LENGTH']

        # Flags for tracking whether RUM header and footer have been
        # generated.

        self.rum_header_generated = False
        self.rum_footer_generated = False

    def decode_newrelic_header(self, environ, header_name):
        encoded_header = environ.get(header_name)
        if encoded_header:
            try:
                decoded_header = json_decode(deobfuscate(
                        encoded_header, self._settings.encoding_key))
            except Exception:
                decoded_header = None

        return decoded_header

    def process_response(self, status, response_headers, *args):
        """Processes response status and headers, extracting any
        details required and returning a set of additional headers
        to merge into that being returned for the web transaction.

        """

        additional_headers = []

        # Extract the HTTP status response code.

        try:
            self.response_code = int(status.split(' ')[0])
        except Exception:
            pass

        # Extract response content length and type for inclusion in agent
        # attributes

        try:

            for header, value in response_headers:
                lower_header = header.lower()
                if 'content-length' == lower_header:
                    self._response_properties['CONTENT_LENGTH'] = int(value)
                elif 'content-type' == lower_header:
                    self._response_properties['CONTENT_TYPE'] = value

        except Exception:
            pass

        # Generate metrics and response headers for inbound cross
        # process web external calls.

        if self.client_cross_process_id is not None:

            # Need to work out queueing time and duration up to this
            # point for inclusion in metrics and response header. If the
            # recording of the transaction had been prematurely stopped
            # via an API call, only return time up until that call was
            # made so it will match what is reported as duration for the
            # transaction.

            if self.queue_start:
                queue_time = self.start_time - self.queue_start
            else:
                queue_time = 0

            if self.end_time:
                duration = self.end_time = self.start_time
            else:
                duration = time.time() - self.start_time

            # Generate the metric identifying the caller.

            metric_name = 'ClientApplication/%s/all' % (
                    self.client_cross_process_id)
            self.record_custom_metric(metric_name, duration)

            # Generate the additional response headers which provide
            # information back to the caller. We need to freeze the
            # transaction name before adding to the header.

            self._freeze_path()

            payload = (self._settings.cross_process_id, self.path, queue_time,
                    duration, self._read_length, self.guid, self.record_tt)
            app_data = json_encode(payload)

            additional_headers.append(('X-NewRelic-App-Data', obfuscate(
                    app_data, self._settings.encoding_key)))

        # The additional headers returned need to be merged into the
        # original response headers passed back by the application.

        return additional_headers

    def browser_timing_header(self):
        """Returns the JavaScript header to be included in any HTML
        response to perform real user monitoring. This function returns
        the header as a native Python string. In Python 2 native strings
        are stored as bytes. In Python 3 native strings are stored as
        unicode.

        """

        if not self.enabled:
            return ''

        if self._state != self.STATE_RUNNING:
            return ''

        if self.background_task:
            return ''

        if self.ignore_transaction:
            return ''

        if not self._settings:
            return ''

        if not self._settings.browser_monitoring.enabled:
            return ''

        if not self._settings.license_key:
            return ''

        # Don't return the header a second time if it has already
        # been generated.

        if self.rum_header_generated:
            return ''

        # Requirement is that the first 13 characters of the account
        # license key is used as the key when obfuscating values for
        # the RUM footer. Will not be able to perform the obfuscation
        # if license key isn't that long for some reason.

        if len(self._settings.license_key) < 13:
            return ''

        # Return the RUM header only if the agent received a valid value
        # for js_agent_loader from the data collector. The data
        # collector is not meant to send a non empty value for the
        # js_agent_loader value if browser_monitoring.loader is set to
        # 'none'.

        if self._settings.js_agent_loader:
            header = _js_agent_header_fragment % self._settings.js_agent_loader

            # To avoid any issues with browser encodings, we will make sure that
            # the javascript we inject for the browser agent is ASCII encodable.
            # Since we obfuscate all agent and user attributes, and the transaction
            # name with base 64 encoding, this will preserve those strings, if
            # they have values outside of the ASCII character set.
            # In the case of Python 2, we actually then use the encoded value
            # as we need a native string, which for Python 2 is a byte string.
            # If encoding as ASCII fails we will return an empty string.

            try:
                if six.PY2:
                    header = header.encode('ascii')
                else:
                    header.encode('ascii')

            except UnicodeError:
                if not WebTransaction.unicode_error_reported:
                    _logger.error('ASCII encoding of js-agent-header failed.',
                            header)
                    WebTransaction.unicode_error_reported = True

                header = ''

        else:
            header = ''

        # We remember if we have returned a non empty string value and
        # if called a second time we will not return it again. The flag
        # will also be used to check whether the footer should be
        # generated.

        if header:
            self.rum_header_generated = True

        return header

    def browser_timing_footer(self):
        """Returns the JavaScript footer to be included in any HTML
        response to perform real user monitoring. This function returns
        the footer as a native Python string. In Python 2 native strings
        are stored as bytes. In Python 3 native strings are stored as
        unicode.

        """

        if not self.enabled:
            return ''

        if self._state != self.STATE_RUNNING:
            return ''

        if self.ignore_transaction:
            return ''

        # Only generate a footer if the header had already been
        # generated and we haven't already generated the footer.

        if not self.rum_header_generated:
            return ''

        if self.rum_footer_generated:
            return ''

        # Make sure we freeze the path.

        self._freeze_path()

        # When obfuscating values for the footer, we only use the
        # first 13 characters of the account license key.

        obfuscation_key = self._settings.license_key[:13]

        intrinsics = self.browser_monitoring_intrinsics(obfuscation_key)

        attributes = {}

        user_attributes = {}
        for attr in self.user_attributes:
            if attr.destinations & DST_BROWSER_MONITORING:
                user_attributes[attr.name] = attr.value

        if user_attributes:
            attributes['u'] = user_attributes

        agent_attributes = {}
        for attr in self.request_parameters_attributes:
            if attr.destinations & DST_BROWSER_MONITORING:
                agent_attributes[attr.name] = attr.value

        if agent_attributes:
            attributes['a'] = agent_attributes

        # create the data structure that pull all our data in

        footer_data = intrinsics

        if attributes:
            attributes = obfuscate(json_encode(attributes), obfuscation_key)
            footer_data['atts'] = attributes

        footer = _js_agent_footer_fragment % json_encode(footer_data)

        # To avoid any issues with browser encodings, we will make sure that
        # the javascript we inject for the browser agent is ASCII encodable.
        # Since we obfuscate all agent and user attributes, and the transaction
        # name with base 64 encoding, this will preserve those strings, if
        # they have values outside of the ASCII character set.
        # In the case of Python 2, we actually then use the encoded value
        # as we need a native string, which for Python 2 is a byte string.
        # If encoding as ASCII fails we will return an empty string.

        try:
            if six.PY2:
                footer = footer.encode('ascii')
            else:
                footer.encode('ascii')

        except UnicodeError:
            if not WebTransaction.unicode_error_reported:
                _logger.error('ASCII encoding of js-agent-footer failed.',
                        footer)
                WebTransaction.unicode_error_reported = True

            footer = ''

        # We remember if we have returned a non empty string value and
        # if called a second time we will not return it again.

        if footer:
            self.rum_footer_generated = True

        return footer

    def browser_monitoring_intrinsics(self, obfuscation_key):
        txn_name = obfuscate(self.path, obfuscation_key)

        queue_start = self.queue_start or self.start_time
        start_time = self.start_time
        end_time = time.time()

        queue_duration = int((start_time - queue_start) * 1000)
        request_duration = int((end_time - start_time) * 1000)

        intrinsics = {
            "beacon": self._settings.beacon,
            "errorBeacon": self._settings.error_beacon,
            "licenseKey": self._settings.browser_key,
            "applicationID": self._settings.application_id,
            "transactionName": txn_name,
            "queueTime": queue_duration,
            "applicationTime": request_duration,
            "agent": self._settings.js_agent_file,
        }

        if self._settings.browser_monitoring.ssl_for_http is not None:
            ssl_for_http = self._settings.browser_monitoring.ssl_for_http
            intrinsics['sslForHttp'] = ssl_for_http

        return intrinsics

class _WSGIApplicationIterable(object):

    def __init__(self, transaction, generator):
        self.transaction = transaction
        self.generator = generator
        self.response_trace = None

    def __iter__(self):
        if not self.transaction._sent_start:
            self.transaction._sent_start = time.time()
        try:
            self.response_trace = FunctionTrace(self.transaction,
                    name='Response', group='Python/WSGI')
            self.response_trace.__enter__()

            for item in self.generator:
                yield item
                try:
                    self.transaction._calls_yield += 1
                    self.transaction._bytes_sent += len(item)
                except Exception:
                    pass

            self.response_trace.__exit__(None, None, None)
            self.response_trace = None

        except GeneratorExit:
            raise

        except:  # Catch all
            self.transaction.record_exception(*sys.exc_info())
            raise

    def close(self):
        try:
            if self.response_trace:
                self.response_trace.__exit__(None, None, None)
                self.response_trace = None

            with FunctionTrace(self.transaction, name='Finalize',
                    group='Python/WSGI'):
                if hasattr(self.generator, 'close'):
                    name = callable_name(self.generator.close)
                    with FunctionTrace(self.transaction, name):
                        self.generator.close()

        except:  # Catch all
            self.transaction.__exit__(*sys.exc_info())
            raise

        else:
            self.transaction.__exit__(None, None, None)
            self.transaction._sent_end = time.time()

class _WSGIInputWrapper(object):

    def __init__(self, transaction, input):
        self.__transaction = transaction
        self.__input = input

    def __getattr__(self, name):
        return getattr(self.__input, name)

    def close(self):
        if hasattr(self.__input, 'close'):
            self.__input.close()

    def read(self, *args, **kwargs):
        if not self.__transaction._read_start:
            self.__transaction._read_start = time.time()
        try:
            data = self.__input.read(*args, **kwargs)
            try:
                self.__transaction._calls_read += 1
                self.__transaction._bytes_read += len(data)
            except Exception:
                pass
        finally:
            self.__transaction._read_end = time.time()
        return data

    def readline(self, *args, **kwargs):
        if not self.__transaction._read_start:
            self.__transaction._read_start = time.time()
        try:
            line = self.__input.readline(*args, **kwargs)
            try:
                self.__transaction._calls_readline += 1
                self.__transaction._bytes_read += len(line)
            except Exception:
                pass
        finally:
            self.__transaction._read_end = time.time()
        return line

    def readlines(self, *args, **kwargs):
        if not self.__transaction._read_start:
            self.__transaction._read_start = time.time()
        try:
            lines = self.__input.readlines(*args, **kwargs)
            try:
                self.__transaction._calls_readlines += 1
                self.__transaction._bytes_read += sum(map(len, lines))
            except Exception:
                pass
        finally:
            self.__transaction._read_end = time.time()
        return lines

class _WSGIApplicationMiddleware(object):

    # This is a WSGI middleware for automatically inserting RUM into
    # HTML responses. It only works for where a WSGI application is
    # returning response content via a iterable/generator. It does not
    # work if the WSGI application write() callable is being used. It
    # will buffer response content up to the start of <body>. This is
    # technically in violation of the WSGI specification if one is
    # strict, but will still work with all known WSGI servers. Because
    # it does buffer, then technically it may cause a problem with
    # streamed responses. For that to occur then it would have to be a
    # HTML response that doesn't actually use <body> and so technically
    # is not a valid HTML response. It is assumed though that in
    # streaming a response, the <head> itself isn't streamed out only
    # gradually.

    search_maximum = 64*1024

    def __init__(self, application, environ, start_response, transaction):
        self.application = application

        self.pass_through = True

        self.request_environ = environ
        self.outer_start_response = start_response
        self.outer_write = None

        self.transaction = transaction

        self.response_status = None
        self.response_headers = []
        self.response_args = ()

        self.content_length = None

        self.response_length = 0
        self.response_data = []

        settings = transaction.settings

        self.debug = settings and settings.debug.log_autorum_middleware

    def process_data(self, data):
        # If this is the first data block, then immediately try
        # for an insertion using full set of criteria. If this
        # works then we are done, else we move to next phase of
        # buffering up content until we find the body element.

        def html_to_be_inserted():
            header = self.transaction.browser_timing_header()

            if not header:
                return b''

            footer = self.transaction.browser_timing_footer()

            return six.b(header) + six.b(footer)

        if not self.response_data:
            modified = insert_html_snippet(data, html_to_be_inserted)

            if modified is not None:
                if self.debug:
                    _logger.debug('RUM insertion from WSGI middleware '
                            'triggered on first yielded string from '
                            'response. Bytes added was %r.',
                            len(modified) - len(data))

                if self.content_length is not None:
                    length = len(modified) - len(data)
                    self.content_length += length

                return [modified]

        # Buffer up the data. If we haven't found the start of
        # the body element, that is all we do. If we have reached
        # the limit of buffering allowed, then give up and return
        # the buffered data.

        if not self.response_data or not verify_body_exists(data):
            self.response_length += len(data)
            self.response_data.append(data)

            if self.response_length >= self.search_maximum:
                buffered_data = self.response_data
                self.response_data = []
                return buffered_data

            return

        # Now join back together any buffered data into a single
        # string. This makes it easier to process, but there is a
        # risk that we could temporarily double memory use for
        # the response content if had small data blocks followed
        # by very large data block. Expect that the risk of this
        # occurring is very small.

        if self.response_data:
            self.response_data.append(data)
            data = b''.join(self.response_data)
            self.response_data = []

        # Perform the insertion of the HTML. This should always
        # succeed as we would only have got here if we had found
        # the body element, which is the fallback point for
        # insertion.

        modified = insert_html_snippet(data, html_to_be_inserted)

        if modified is not None:
            if self.debug:
                _logger.debug('RUM insertion from WSGI middleware '
                        'triggered on subsequent string yielded from '
                        'response. Bytes added was %r.',
                        len(modified) - len(data))

            if self.content_length is not None:
                length = len(modified) - len(data)
                self.content_length += length

            return [modified]

        # Something went very wrong as we should never get here.

        return [data]

    def flush_headers(self):
        # Add back in any response content length header. It will
        # have been updated with the adjusted length by now if
        # additional data was inserted into the response.

        if self.content_length is not None:
            header = (('Content-Length', str(self.content_length)))
            self.response_headers.append(header)

        self.outer_write = self.outer_start_response(self.response_status,
                self.response_headers, *self.response_args)

    def inner_write(self, data):
        # If the write() callable is used, we do not attempt to
        # do any insertion at all here after.

        self.pass_through = True

        # Flush the response headers if this hasn't yet been done.

        if self.outer_write is None:
            self.flush_headers()

        # Now write out any buffered response data in case the
        # WSGI application was doing something evil where it
        # mixed use of yield and write. Technically if write()
        # is used, it is supposed to be before any attempt to
        # yield a string. When done switch to pass through mode.

        if self.response_data:
            for buffered_data in self.response_data:
                self.outer_write(buffered_data)
            self.response_data = []

        return self.outer_write(data)

    def start_response(self, status, response_headers, *args):
        # The start_response() function can be called more than
        # once. In that case, the values derived from the most
        # recent call are used. We therefore need to reset any
        # calculated values.

        self.pass_through = True

        self.response_status = status
        self.response_headers = response_headers
        self.response_args = args

        self.content_length = None

        # We need to check again if auto RUM has been disabled.
        # This is because it can be disabled using an API call.
        # Also check whether RUM insertion has already occurred.

        if (self.transaction.autorum_disabled or
                self.transaction.rum_header_generated):

            self.flush_headers()
            self.pass_through = True

            return self.inner_write

        # Extract values for response headers we need to work. Do
        # not copy across the content length header at this time
        # as we will need to adjust the length later if we are
        # able to inject our Javascript.

        pass_through = False

        headers = []

        content_type = None
        content_length = None
        content_encoding = None
        content_disposition = None

        for (name, value) in response_headers:
            _name = name.lower()

            if _name == 'content-length':
                try:
                    content_length = int(value)
                    continue

                except ValueError:
                    pass_through = True

            elif _name == 'content-type':
                content_type = value

            elif _name == 'content-encoding':
                content_encoding = value

            elif _name == 'content-disposition':
                content_disposition = value

            headers.append((name, value))

        # We can only inject our Javascript if the content type
        # is an allowed value, no content encoding has been set
        # and an attachment isn't being used.

        def should_insert_html():
            if pass_through:
                return False

            if content_encoding is not None:
                # This will match any encoding, including if the
                # value 'identity' is used. Technically the value
                # 'identity' should only be used in the header
                # Accept-Encoding and not Content-Encoding. In
                # other words, a WSGI application should not be
                # returning identity. We could check and allow it
                # anyway and still do RUM insertion, but don't.

                return False

            if (content_disposition is not None and
                    content_disposition.split(';')[0].strip().lower() ==
                    'attachment'):
                return False

            if content_type is None:
                return False

            settings = self.transaction.settings
            allowed_content_type = settings.browser_monitoring.content_type

            if content_type.split(';')[0] not in allowed_content_type:
                return False

            return True

        if should_insert_html():
            self.pass_through = False

            self.content_length = content_length
            self.response_headers = headers

        # If in pass through mode at this point, we need to flush
        # out the headers. We technically might do this again
        # later if start_response() was called more than once.

        if self.pass_through:
            self.flush_headers()

        return self.inner_write

    def __call__(self):
        iterable = None

        try:
            # Grab the iterable returned by the wrapped WSGI
            # application.

            iterable = self.application(self.request_environ,
                    self.start_response)

            # Process the response content from the iterable.

            for data in iterable:
                # If we are in pass through mode, simply pass it
                # through. If we are in pass through mode then
                # the headers should already have been flushed.

                if self.pass_through:
                    yield data

                    continue

                # If the headers haven't been flushed we need to
                # check for the potential insertion point and
                # buffer up data as necessary if we can't find it.

                if self.outer_write is None:
                    # Ignore any empty strings.

                    if not data:
                        continue

                    # Check for the insertion point. Will return
                    # None if data was buffered.

                    buffered_data = self.process_data(data)

                    if buffered_data is None:
                        continue

                    # The data was returned, with it being
                    # potentially modified. It would not have
                    # been modified if we had reached maximum to
                    # be buffer. Flush out the headers, switch to
                    # pass through mode and yield the data.

                    self.flush_headers()
                    self.pass_through = True

                    for data in buffered_data:
                        yield data

                else:
                    # Depending on how the WSGI specification is
                    # interpreted, this shouldn't occur. That is,
                    # nothing should be yielded prior to the
                    # start_response() function being called. The
                    # CGI/WSGI example in the WSGI specification
                    # does allow that though as do various WSGI
                    # servers that followed that example.

                    yield data

            # Ensure that headers have been written if the
            # response was actually empty.

            if self.outer_write is None:
                self.flush_headers()
                self.pass_through = True

            # Ensure that any remaining buffered data is also
            # written. Technically this should never be able
            # to occur at this point, but do it just in case.

            if self.response_data:
                for data in self.response_data:
                    yield data

        finally:
            # Call close() on the iterable as required by the
            # WSGI specification.

            if hasattr(iterable, 'close'):
                name = callable_name(iterable.close)
                with FunctionTrace(self.transaction, name):
                    iterable.close()

def WSGIApplicationWrapper(wrapped, application=None, name=None,
        group=None, framework=None):

    if framework is not None and not isinstance(framework, tuple):
        framework = (framework, None)

    def _nr_wsgi_application_wrapper_(wrapped, instance, args, kwargs):
        # Check to see if any transaction is present, even an inactive
        # one which has been marked to be ignored or which has been
        # stopped already.

        transaction = current_transaction(active_only=False)

        if transaction:
            # If there is any active transaction we will return without
            # applying a new WSGI application wrapper context. In the
            # case of a transaction which is being ignored or which has
            # been stopped, we do that without doing anything further.

            if transaction.ignore_transaction or transaction.stopped:
                return wrapped(*args, **kwargs)

            # For any other transaction, we record the details of any
            # framework against the transaction for later reporting as
            # supportability metrics.

            if framework:
                transaction.add_framework_info(
                        name=framework[0], version=framework[1])

            # Also override the web transaction name to be the name of
            # the wrapped callable if not explicitly named, and we want
            # the default name to be that of the WSGI component for the
            # framework. This will override the use of a raw URL which
            # can result in metric grouping issues where a framework is
            # not instrumented or is leaking URLs.

            settings = transaction._settings

            if name is None and settings:
                if framework is not None:
                    naming_scheme = settings.transaction_name.naming_scheme
                    if naming_scheme in (None, 'framework'):
                        transaction.set_transaction_name(
                                callable_name(wrapped), priority=1)

            elif name:
                transaction.set_transaction_name(name, group, priority=1)

            return wrapped(*args, **kwargs)

        # Otherwise treat it as top level transaction. We have to though
        # look first to see whether the application name has been
        # overridden through the WSGI environ dictionary.

        def _args(environ, start_response, *args, **kwargs):
            return environ, start_response

        environ, start_response = _args(*args, **kwargs)

        app_name = environ.get('newrelic.app_name')

        target_application = application

        if app_name:
            if app_name.find(';') != -1:
                app_names = [string.strip(n) for n in app_name.split(';')]
                app_name = app_names[0]
                target_application = application_instance(app_name)
                for altname in app_names[1:]:
                    target_application.link_to_application(altname)
            else:
                target_application = application_instance(app_name)
        else:
            # If application has an activate() method we assume it is an
            # actual application. Do this rather than check type so that
            # can easily mock it for testing.

            # FIXME Should this allow for multiple apps if a string.

            if not hasattr(application, 'activate'):
                target_application = application_instance(application)

        # Now start recording the actual web transaction.

        transaction = WebTransaction(target_application, environ)
        transaction.__enter__()

        # Record details of framework against the transaction for later
        # reporting as supportability metrics.

        if framework:
            transaction.add_framework_info(
                    name=framework[0], version=framework[1])

        # Override the initial web transaction name to be the supplied
        # name, or the name of the wrapped callable if wanting to use
        # the callable as the default. This will override the use of a
        # raw URL which can result in metric grouping issues where a
        # framework is not instrumented or is leaking URLs.
        #
        # Note that at present if default for naming scheme is still
        # None and we aren't specifically wrapping a designated
        # framework, then we still allow old URL based naming to
        # override. When we switch to always forcing a name we need to
        # check for naming scheme being None here.

        settings = transaction._settings

        if name is None and settings:
            naming_scheme = settings.transaction_name.naming_scheme

            if framework is not None:
                if naming_scheme in (None, 'framework'):
                    transaction.set_transaction_name(
                            callable_name(wrapped), priority=1)

            elif naming_scheme in ('component', 'framework'):
                transaction.set_transaction_name(
                        callable_name(wrapped), priority=1)

        elif name:
            transaction.set_transaction_name(name, group, priority=1)

        def _start_response(status, response_headers, *args):

            additional_headers = transaction.process_response(
                    status, response_headers, *args)

            _write = start_response(status,
                    response_headers+additional_headers, *args)

            def write(data):
                if not transaction._sent_start:
                    transaction._sent_start = time.time()
                result = _write(data)
                transaction._calls_write += 1
                try:
                    transaction._bytes_sent += len(data)
                except Exception:
                    pass
                transaction._sent_end = time.time()
                return result

            return write

        try:
            # Should always exist, but check as test harnesses may not
            # have it.

            if 'wsgi.input' in environ:
                environ['wsgi.input'] = _WSGIInputWrapper(transaction,
                        environ['wsgi.input'])

            with FunctionTrace(transaction, name='Application',
                    group='Python/WSGI'):
                with FunctionTrace(transaction, name=callable_name(wrapped)):
                    if (settings and settings.browser_monitoring.enabled and
                            not transaction.autorum_disabled):
                        middleware = _WSGIApplicationMiddleware(wrapped,
                                environ, _start_response, transaction)
                        result = middleware()
                    else:
                        result = wrapped(environ, _start_response)

        except:  # Catch all
            transaction.__exit__(*sys.exc_info())
            raise

        return _WSGIApplicationIterable(transaction, result)

    return FunctionWrapper(wrapped, _nr_wsgi_application_wrapper_)

def wsgi_application(application=None, name=None, group=None, framework=None):
    return functools.partial(WSGIApplicationWrapper, application=application,
            name=name, group=group, framework=framework)

def wrap_wsgi_application(module, object_path, application=None,
            name=None, group=None, framework=None):
    wrap_object(module, object_path, WSGIApplicationWrapper,
            (application, name, group, framework))
hemamaps / newrelic python

Products

About

Resources

Contact Gemfury