'''A high-level interface to the pycurl extension'''
# ** mfx NOTE: the CGI class uses "black magic" using COOKIEFILE in
# combination with a non-existant file name. See the libcurl docs
# for more info.
import sys, pycurl
py3 = sys.version_info[0] == 3
# python 2/3 compatibility
if py3:
import urllib.parse as urllib_parse
from urllib.parse import urljoin
from io import BytesIO
else:
import urllib as urllib_parse
from urlparse import urljoin
try:
from cStringIO import StringIO as BytesIO
except ImportError:
from StringIO import StringIO as BytesIO
# We should ignore SIGPIPE when using pycurl.NOSIGNAL - see
# the libcurl tutorial for more info.
try:
import signal
from signal import SIGPIPE, SIG_IGN
except ImportError:
pass
else:
signal.signal(SIGPIPE, SIG_IGN)
class Curl:
"High-level interface to pycurl functions."
def __init__(self, base_url="", fakeheaders=[]):
self.handle = pycurl.Curl()
# These members might be set.
self.set_url(base_url)
self.verbosity = 0
self.fakeheaders = fakeheaders
# Nothing past here should be modified by the caller.
self.payload = None
self.payload_io = BytesIO()
self.hrd = ""
# Verify that we've got the right site; harmless on a non-SSL connect.
self.set_option(pycurl.SSL_VERIFYHOST, 2)
# Follow redirects in case it wants to take us to a CGI...
self.set_option(pycurl.FOLLOWLOCATION, 1)
self.set_option(pycurl.MAXREDIRS, 5)
self.set_option(pycurl.NOSIGNAL, 1)
# Setting this option with even a nonexistent file makes libcurl
# handle cookie capture and playback automatically.
self.set_option(pycurl.COOKIEFILE, "/dev/null")
# Set timeouts to avoid hanging too long
self.set_timeout(30)
# Use password identification from .netrc automatically
self.set_option(pycurl.NETRC, 1)
self.set_option(pycurl.WRITEFUNCTION, self.payload_io.write)
def header_callback(x):
self.hdr += x.decode('ascii')
self.set_option(pycurl.HEADERFUNCTION, header_callback)
def set_timeout(self, timeout):
"Set timeout for a retrieving an object"
self.set_option(pycurl.TIMEOUT, timeout)
def set_url(self, url):
"Set the base URL to be retrieved."
self.base_url = url
self.set_option(pycurl.URL, self.base_url)
def set_option(self, *args):
"Set an option on the retrieval."
self.handle.setopt(*args)
def set_verbosity(self, level):
"Set verbosity to 1 to see transactions."
self.set_option(pycurl.VERBOSE, level)
def __request(self, relative_url=None):
"Perform the pending request."
if self.fakeheaders:
self.set_option(pycurl.HTTPHEADER, self.fakeheaders)
if relative_url:
self.set_option(pycurl.URL, urljoin(self.base_url, relative_url))
self.payload = None
self.payload_io.seek(0)
self.payload_io.truncate()
self.hdr = ""
self.handle.perform()
self.payload = self.payload_io.getvalue()
return self.payload
def get(self, url="", params=None):
"Ship a GET request for a specified URL, capture the response."
if params:
url += "?" + urllib_parse.urlencode(params)
self.set_option(pycurl.HTTPGET, 1)
return self.__request(url)
def post(self, cgi, params):
"Ship a POST request to a specified CGI, capture the response."
self.set_option(pycurl.POST, 1)
self.set_option(pycurl.POSTFIELDS, urllib_parse.urlencode(params))
return self.__request(cgi)
def body(self):
"Return the body from the last response."
return self.payload
def header(self):
"Return the header from the last response."
return self.hdr
def get_info(self, *args):
"Get information about retrieval."
return self.handle.getinfo(*args)
def info(self):
"Return a dictionary with all info on the last response."
m = {}
m['effective-url'] = self.handle.getinfo(pycurl.EFFECTIVE_URL)
m['http-code'] = self.handle.getinfo(pycurl.HTTP_CODE)
m['total-time'] = self.handle.getinfo(pycurl.TOTAL_TIME)
m['namelookup-time'] = self.handle.getinfo(pycurl.NAMELOOKUP_TIME)
m['connect-time'] = self.handle.getinfo(pycurl.CONNECT_TIME)
m['pretransfer-time'] = self.handle.getinfo(pycurl.PRETRANSFER_TIME)
m['redirect-time'] = self.handle.getinfo(pycurl.REDIRECT_TIME)
m['redirect-count'] = self.handle.getinfo(pycurl.REDIRECT_COUNT)
m['size-upload'] = self.handle.getinfo(pycurl.SIZE_UPLOAD)
m['size-download'] = self.handle.getinfo(pycurl.SIZE_DOWNLOAD)
m['speed-upload'] = self.handle.getinfo(pycurl.SPEED_UPLOAD)
m['header-size'] = self.handle.getinfo(pycurl.HEADER_SIZE)
m['request-size'] = self.handle.getinfo(pycurl.REQUEST_SIZE)
m['content-length-download'] = self.handle.getinfo(pycurl.CONTENT_LENGTH_DOWNLOAD)
m['content-length-upload'] = self.handle.getinfo(pycurl.CONTENT_LENGTH_UPLOAD)
m['content-type'] = self.handle.getinfo(pycurl.CONTENT_TYPE)
m['response-code'] = self.handle.getinfo(pycurl.RESPONSE_CODE)
m['speed-download'] = self.handle.getinfo(pycurl.SPEED_DOWNLOAD)
m['ssl-verifyresult'] = self.handle.getinfo(pycurl.SSL_VERIFYRESULT)
m['filetime'] = self.handle.getinfo(pycurl.INFO_FILETIME)
m['starttransfer-time'] = self.handle.getinfo(pycurl.STARTTRANSFER_TIME)
m['redirect-time'] = self.handle.getinfo(pycurl.REDIRECT_TIME)
m['redirect-count'] = self.handle.getinfo(pycurl.REDIRECT_COUNT)
m['http-connectcode'] = self.handle.getinfo(pycurl.HTTP_CONNECTCODE)
m['httpauth-avail'] = self.handle.getinfo(pycurl.HTTPAUTH_AVAIL)
m['proxyauth-avail'] = self.handle.getinfo(pycurl.PROXYAUTH_AVAIL)
m['os-errno'] = self.handle.getinfo(pycurl.OS_ERRNO)
m['num-connects'] = self.handle.getinfo(pycurl.NUM_CONNECTS)
m['ssl-engines'] = self.handle.getinfo(pycurl.SSL_ENGINES)
m['cookielist'] = self.handle.getinfo(pycurl.INFO_COOKIELIST)
m['lastsocket'] = self.handle.getinfo(pycurl.LASTSOCKET)
m['ftp-entry-path'] = self.handle.getinfo(pycurl.FTP_ENTRY_PATH)
return m
def answered(self, check):
"Did a given check string occur in the last payload?"
return self.payload.find(check) >= 0
def close(self):
"Close a session, freeing resources."
if self.handle:
self.handle.close()
self.handle = None
self.hdr = ""
self.payload = ""
def __del__(self):
self.close()
if __name__ == "__main__":
if len(sys.argv) < 2:
url = 'https://curl.haxx.se'
else:
url = sys.argv[1]
c = Curl()
c.get(url)
print(c.body())
print('='*74 + '\n')
import pprint
pprint.pprint(c.info())
print(c.get_info(pycurl.OS_ERRNO))
print(c.info()['os-errno'])
c.close()