pycurl是curl的一个python版本,pycurl的模块的常用方法如下:
###模块导入###
>>> import pycurl ###创建curl对象### >>> curl = pycurl.Curl() ###连接等待时间,0则不等待### >>> curl.setopt(pycurl.CONNECTTIMEOUT,5) ###超时时间### >>> curl.setopt(pycurl.TIMEOUT,5) ###下载进度条,非0则屏蔽### >>> curl.setopt(pycurl.NOPROGRESS,0) ###指定HTTP最大次数### >>> curl.setopt(pycurl.MAXREDIRS,5) ###完成交互后强制断开连接,不重用### >>> curl.setopt(pycurl.FORBID_REUSE,1) ###设置DNS信息保存时间,默认为120秒### >>> curl.setopt(pycurl.DNS_CACHE_TIMEOUT,60) ###设置HTTP的User-Agent(自行设置时需跟着常规标准走)### >>> curl.setopt(pycurl.USERAGENT,"www.plcxue.com") ###设置请求的Url### >>> curl.setopt(pycurl.URL,"http://www.plcxue.com") ###将返回的HTTP HEADER定向到getheader### >>> curl.setopt(pycurl.HEADERFUNCTION,getheader) ###将返回的内容定向到回调函数getbody### >>> curl.setopt(pycurl.WRITEHEADERFUNCTION,getbody) ###将返回的HTTP HEADER定向到fileobj文件对象### >>> curl.setopt(pycurl.WRITEHEADER,fileobj) ###将返回的HTML内容定向到fileobj文件对象### >>> curl.setopt(pycurl.WRITEDATE,fileobj)
常见的例子如下:
>>> curl = pycurl.Curl()###返回HTTP状态码###>>> curl.getinfo(pycurl.HTTP_CODE)###传输结束时所消耗的总时间###>>> curl.getinfo(pycurl.TOTAL_TIME)###DNS解析所消耗的时间###>>> curl.getinfo(pycurl.NAMELOOKUP_TIME)###建立连接所消耗的时间###>>> curl.getinfo(pycurl.CONNECT_TIME)###从建立连接到准备传输所消耗的时间###>>> curl.getinfo(pycurl.PRETRANSFER_TIME)###从建立连接到数据开始传输所消耗的时间###>>> curl.getinfo(pycurl.STARTTRANSFER_TIME)###重定向所消耗的时间###>>> curl.getinfo(pycurl.REDIRECT_TIME)###上传数据包大小###>>> curl.getinfo(pycurl.SIZE_UPLOAD)###下载数据包大小###>>> curl.getinfo(pycurl.SIZE_DOWNLOAD)###平均下载速度###>>> curl.getinfo(pycurl.SPEED_DOWNLOAD)###平均上传速度###>>> curl.getinfo(pycurl.SPEED_UPLOAD)###HTTP头部大小###>>> curl.getinfo(pycurl.HEADER_SIZE)
还有一个网上的实现POST的例子:
import pycurlimport StringIOimport urllib url = "http://www.google.com/"post_data_dic = {"name":"value"}crl = pycurl.Curl()crl.setopt(pycurl.VERBOSE,1)crl.setopt(pycurl.FOLLOWLOCATION, 1)crl.setopt(pycurl.MAXREDIRS, 5)#crl.setopt(pycurl.AUTOREFERER,1) crl.setopt(pycurl.CONNECTTIMEOUT, 60)crl.setopt(pycurl.TIMEOUT, 300)#crl.setopt(pycurl.PROXY,proxy)crl.setopt(pycurl.HTTPPROXYTUNNEL,1)#crl.setopt(pycurl.NOSIGNAL, 1)crl.fp = StringIO.StringIO()crl.setopt(pycurl.USERAGENT, "dhgu hoho") # Option -d/--data HTTP POST datacrl.setopt(crl.POSTFIELDS, urllib.urlencode(post_data_dic)) crl.setopt(pycurl.URL, url)crl.setopt(crl.WRITEFUNCTION, crl.fp.write)crl.perform() print crl.fp.getvalue()