这是一个朋友做的旅游方面的分析,需要游客的所有笔记的文本,并且保存在一个txt里
源码如下:
import requests
from lxml import etree
from bs4 import BeautifulSoup
headers = {
'user-agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/77.0.3865.120 Safari/537.36'
}
for num in range(1,81,1):
print(num)
url = 'https://you.ctrip.com/searchsite/travels/?query=%e9%81%bf%e6%9a%91%e6%97%85%e6%b8%b8&isAnswered=&isRecommended=&publishDate=365&PageNo='+str(num)
html = requests.get(url).text
txt = etree