使用Pyquery 爬取数据存为csv文件
爬取网页:http://www.zuihaodaxue.cn/zuihaodaxuepaiming2018.html
环境:windows+Anaconda
代码如下:
import requests
from pyquery import PyQuery as pq
def get_page(url):
"""发起请求 获得源码"""
r = requests.get(url)
r.encoding = 'utf8'
html = r.text
return html
def parse(text):
"""解析数据 写入文件"""
doc = pq(text)
# 获得每一行的tr标签
tds = doc('table.table tbody tr.alt').items()
for td in tds:
rank = td.find('td:first-child').text() # 排名
name = td.find('div').text() # 大学名称
city = td.find('td:nth-child(3)').text() # 城市
score = td.find('td:nth-child(4)').text() # 总分
with open('college.csv', 'a+', encoding='utf8') as f:
f.write(rank + '\t\t')
f.write(name + '\t\t')
f.write(city + '\t\t')
f.write(score + '\t\t\n')
print("写入完成")
if __name__ == "__main__":
url = "http://www.zuihaodaxue.cn/zuihaodaxuepaiming2018.html"
text = get_page(url)
parse(text)