import pandas as pd
import urllib.parse
import urllib.request
from lxml import etree
import requests
import re
import time
import os
import random
def get_zhushi_info(url):
#urls='https://so.gushiwen.org/shiwen2017/ajaxshiwencont.aspx?id=45c396367f59&value=zhu'
proxy = random.choice(proxy_list)
header = random.choice(spider_header)
urlhandle = urllib.request.ProxyHandler({'http': proxy})
opener = urllib.request.build_opener(urlhandle)
urllib.request.install_opener(opener)
req = urllib.request.Request(url)
req.add_header('User-Agent', header)
author_poem = []
response_result = urllib.request.urlopen(req).read()
html = response_result.decode('utf-8')
html = etree.HTML(html)
title = html.xpath(
'//div[@class="main3"]/div[@class="left"]/div[@class="sons"]/div[@c
爬虫之爬取古诗文网站
最新推荐文章于 2024-06-13 11:18:37 发布