import requests import re import xlwt from bs4 import BeautifulSoup url = "https://www.dygod.net/html/gndy/dyzz/" hd = { 'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/115.0.0.0 Safari/537.36 Edg/115.0.1901.188' } def getmanget(linkurl): res = requests.get(linkurl, headers=hd) res.encoding = res.apparent_encoding soup = BeautifulSoup(res.text, "html.parser") ret = soup.find_all("a") for n in ret: if "magnet" in str(n.string): return n.string def saveExcle(worksheet, count, lst): for i in range(6): worksheet.write(count, i, lst[i]) count = 0 total = [] workbook = xlwt.Workbook(encoding="utf-8") # 创建workbook对象 worksheet = workbook.add_sheet('sheet1') for i in range(2, 3): url = "https://www.dygod.net/html/gndy/dyzz/index_"+str(i)+".html" res = requests.get(url, headers=hd) res.encoding = res.apparent_encoding soup = BeautifulSoup(res.text, "html.parser") ret = soup.find_all(class_="tbspan",style="margin-top:6px") # print(ret) for x in ret: info = [] print(x.find("a").string) info.append(x.find("a").string) pat = re.compile(r"◎译 名(.*)\n") ret = re.findall(pat,str(x)) for n in ret: n = n.replace(u'\u3000',u'') print("◎译 名",n) info.append(str(n).split("/")[0]) pat = re.compile(r"◎片 名(.*)\n") ret = re.findall(pat, str(x)) for n in ret: n = n.replace(u'\u3000', u'') print("◎片 名",n) info.append(str(n).split("/")[0]) pat = re.compile(r"◎年 代(.*)\n") ret = re.findall(pat, str(x)) for n in ret: n = n.replace(u'\u3000', u'') print("◎年 代", n) info.append(str(n).split("/")[0]) pat = re.compile(r"◎类 别(.*)\n") ret = re.findall(pat, str(x)) for n in ret: n = n.replace(u'\u3000', u'') print("◎类 别", n) info.append(str(n).split("/")[0]) linkurl = "https://www.dygod.net" + x.find("a").get("href") manget = getmanget(linkurl) if manget: info.append(str(manget)) saveExcle(worksheet,count,info) count += 1 print("="*100) workbook.save("movie.xls") print(count)