一个简单的爬取文字的程序,结合前述3篇博客,基本包括一些爬虫基础,希望可以互相学习
import requests
from lxml import etree
def get_url():
url='https://share.html5.qq.com/fx/u?r=rBHXbBC'
r=requests.get(url)
r.encoding = 'UTF-8'
#print(r.text)
html=etree.HTML(r.text)
ts = html.xpath('//div[@class="item article"]/section/article/p/span/text()')
#print(ts)
for t in ts:
# 去掉空格换行之类的
d = t.strip()
print(d)
save1File(d)
def save1File(d):
print('''保存''')
with open('F:python//test//爬虫学习//保存文字//datas.txt', 'a',encoding='utf-8') as fp:
fp.write(d+'\n')
get_url()
save1File()