python从Route Views Archieve上下载解析.bz2数据包
1.下载压缩包
2.解压缩包
3.解析解压缩后文件
#encoding:utf-8
import os
import requests
import urllib
import urllib2
import bz2
url = 'http://archive.routeviews.org/bgpdata/2001.10/RIBS/rib.20011026.1648.bz2'
#1.下载文件
#先清空文件
f_del=open('/root/python-file-download/ribs.bz2', "r+")
f_del.truncate()
print "emptying file successful!"
#下载到指定地址
urllib.urlretrieve(url,'/root/python-file-download/ribs.bz2')
print "downloading sucessful!"
print ""
#2.解压缩
f='/root/python-file-download/ribs.bz2'
#清空解压缩文件
f_del=open('/root/python-file-download/ribs', "r+")
f_del.truncate()
print "emptying decompression file is successful!"
#打开压缩文件
zipfile = bz2.BZ2File(f)
#读文件
data = zipfile.read()
#设定解压缩后文件路径
newfilepath='/root/python-file-download/ribs'
#写入数据
open(newfilepath, 'wb').write(data) #wb-用写二进制模式打开文件
print "decompression is successful!"
print ""
#3.解析文件
#清空解析文件
f_del=open('/root/python-file-download/ribs.txt',"r+")
f_del.truncate()
print "emptying analyzing file is successful!"
#执行终端bgpdump命令进行解析
os.system("bgpdump '/root/python-file-download/ribs.bz2' > '/root/python-file-download/ribs.txt' ")
print "analyzing file is successful!"
知识补充
URL
Internet上的每一个网页都具有一个唯一的名称标识,通常称之为URL(Uniform Resource Locator, 统一资源定位器)。它是www的统一资源定位标志,简单地说URL就是web地址,俗称“网址”。