import csv
import requests
import re
url = 'https://www.dytt89.com'
headers = {
'user-agent':'Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/86.0.4240.198 Safari/537.36'
}
resp = requests.get(headers = headers, url = url)
resp.encoding='gb2312'
# print(resp.text)
#
# with open('my.html',mode = 'w',encoding='utf-8') as f:
# f.write(resp.text)
obj1 = re.compile(r'2022必看热片.*?<ul>(?P<ul>.*?)</ul>'
,re.S)
obj2 = re.compile(r"<li><a href='(?P<h>.*?)'",re.S)
result1 = obj1.finditer(resp.text)
link_list= []
for i in result1:
ul = i.group('ul')
result2 = obj2.finditer(ul)
for ii in result2:
link = url + ii.group('h')
link_list.append(link)
obj3 = re.compile(r'<div class="title_all"><h1>(?P<name>.*?)</h1></div>'
r'.*?'
r'<td style="WORD-WRAP: break-word" bgcolor="#fdfddf"><a href=".*?">(?P<mag>.*?)</a></td>'
,re.S)
f = open('moviePara.csv',encoding='utf-8',newline='',mode='a+')
writer = csv.writer(f)
for i in link_list:
res = requests.get(headers=headers, url=i)
res.encoding = 'gb2312'
result3 = obj3.finditer(res.text)
for ii in result3:
print(ii.group('name'), ii.group('mag'))
dic = ii.groupdict()
writer.writerow(dic.values())
resp.close()
dygod
于 2022-03-25 13:55:18 首次发布