#coding=utf-8
from bs4 import BeautifulSoup
import requests
def function_get_url():
for page in range(8):
url0 = "http://www.xctmr.com/news/signs/index_" + str(page) + '.html'
header = {"User-Agent": "Mozilla/5.0 (Windows NT 6.1; WOW64; rv:34.0) Gecko/20100101 Firefox/34.0"}
request = requests.get(url0)
soup = BeautifulSoup(request.text, 'xml')
content = soup.select('ul > li > a[href]')
for item in content:
url = item['href']
print(url)
request = requests.get(url)
soup = BeautifulSoup(request.text, 'xml')
content = soup.find('h3').text
data = soup.find('p').text
content1 = content.encode('iso-8859-1').decode("gbk")
data1 = data.encode('iso-8859-1').decode("gbk")
content_str = content1 + '\n' + data1
b = open('123.txt', 'a')
b.write(content_str+'\n')
if __name__ == "__main__":
function_get_url()
爬取网站
最新推荐文章于 2023-11-13 14:15:49 发布