```python
#coding:utf-8
import urllib.response,urllib.request
import re
import xlwt
from bs4 import BeautifulSoup
import requests
import gzip
findnum = re.compile(r'height="23">(\d+)</td>')
findauth = re.compile(r'" title="(.*?)">')
findname = re.compile(r'"tooltip">(.*?)</a>')
findinq = re.compile(r'rel="(.*?)..."',re.M)
findtype = re.compile(r'<td align="center">(.*?)[\s]*</td>')
findpro = re.compile(r'<font color="red">(.*?)</font>')
findwords = re.compile(r'<td align="right" height="23">(\d+)[\s]*</td>')
#获取网页链接
def geturl(baseurl):
headers = {
'User-Agent':'Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:106.0) Gecko/20100101 Firefox/106.0'}
ip = {
'http'
爬虫晋江TOP排行
最新推荐文章于 2025-04-28 18:09:42 发布