在这里写了一个比较简单的爬虫,爬取了湖北省统计局关于居民消费价格指数的一些简单信息,然后绘制成excel表格输出。一来是为了复习一下爬虫的简单知识,一是为了熟悉一下python操作excel的技术。
转换成功之后
#http://data.stats.gov.cn/easyquery.htm?cn=G0103
from urllib.request import urlopen
from bs4 import BeautifulSoup
import xlwt
def crawl():
#打开网页
html = urlopen("http://data.hb.stats.cn/CityData.aspx?DataType=65&ReportType=1")
bsObj = BeautifulSoup(html.read(), "html5lib")
titles = []
for title in bsObj.find("tr", {"class":"tr-title"}).find_all("td"):
titles.append(title.getText())
# datas = []
# for data in bsObj.find("tr", {"class":"tr-title"}).find("")
print (titles)
datas = []
dd = []
for data in bsObj.find_all("tr", {"class":"tr-title"}):
for d in data.find_all("td"):
dd.append(d.getText())
datas.append(dd)
dd = []
print (datas)
return datas
def set_style(name, height, bold = False):
style = xlwt.XFStyle() #初始化样式
font = xlwt.Font() #为样式创建字体
font.name = name
font.bold = bold
font.color_index = 4
font.height = height
style.font = font
return style
def write_excel():
#创建工作簿
workbook = xlwt.Workbook(encoding="utf-8")
#创建sheet
data_sheet = workbook.add_sheet("show_data")
datas = crawl()
index = 0
for i in datas:
for x, item in enumerate(i):
data_sheet.write(index, x, item, set_style('Times New Roman',220, True))
index += 1
workbook.save("data.xls")
if __name__ == '__main__':
write_excel()
print("创建你xlsx文件成功")