# https://www.dytt89.com/
import requests
from lxml import etree
import os
import csv
import re
source = requests.get('https://www.dytt89.com/')
source.encoding = 'gb2312'
# os.mkdir('b')
# print(source.text)
html = etree.HTML(source.text)
lists = html.xpath('//div[@class="co_content222"]/ul/li')
s = []
for i in lists:
movie_url = i.xpath('./a/@href')[0]
if 'https:' in movie_url:
continue
try:
movie_name = i.xpath('./a/text()')[0]
# print(movie_name)
except Exception as e:
print(e)
movie_name = i.xpath('./a/@title')[0]
# print(movie_name)
movie_url = 'https://www.dytt89.com' + movie_url
print(movie_url)
infos_source = requests.get(url=movie_url)
infos_source.encoding = 'gb2312'
# print(infos_source.text)
obj = re.compile(r'<br />◎译 名 (?P<yiming>.*?)<br />◎片 名 (?P<name>.*?)'
'<br />◎年 代 (?P<year>.*?)<br />◎产 地 (?P<area>.*?)<br />◎类 别 (?P<type>.*?)<br />',re.S)
infos = obj.finditer(infos_source.text)
if not infos:
print('没有爪刀')
for i in infos:
print(11111, i.group())
print(i.group('yiming'))
print(i.group('name'))
print(i.group('year'))
print(i.group('area'))
print(i.group('type'))
s.append([movie_name, movie_url])
with open('ddtt.csv', 'w', encoding='utf8', newline='') as f:
writer = csv.writer(f)
writer.writerows(s)
# print('写入:', movie_name, movie_url)
# print('over!!')
ddtt练习
最新推荐文章于 2024-11-09 21:51:27 发布