import requests
from lxml import etree
import csv
headers = {
'User-Agent':'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/70.0.3538.102 Safari/537.36'
}
url_head = 'https://www.dytt8.net'
def get_onepage(url):
URL =[]
response = requests.get(url,headers=headers)
text = response.text
html = etree.HTML(text)
tables = html.xpath('//table')
for table in tables:
A = table.xpath('.//a[@class="ulink"]')
for a in A:
URL.append(url_head+a.xpath('@href')[0])
return URL
def parse_url(url):
lianjie = ''
response = requests.get(url,headers=headers)
text = response.content.decode('gbk')
html = etree.HTML(text)
Title = html.xpath('//font[@color="#07519a"]')[0]
title = Title.text
LJ = html.xpath('//td[@style="WORD-WRAP: break-word"]')
for lj in LJ:
lianjie = lj.xpath('./a')[0].text
movie = {'标题':title,'下载地址':lianjie}
return movie
if __name__ == '__main__':
f = open('电影下载地址.csv','w',encoding='utf-8')
filedname = ['标题','下载地址']
writer = csv.DictWriter(f,fieldnames=filedname)
writer.writeheader()
for i in range(1,50):
URL = 'https://www.dytt8.net/html/gndy/dyzz/list_23_{}.html'.format(i)
urls = get_onepage(URL)
for url in urls:
writer.writerow(parse_url(url))
f.close()