# -*- coding: utf-8 -*-
"""
Created on Mon Jan 29 10:19:26 2018
@author: dell
"""
#导入模块
import requests#网络请求模块
import re#提取数据
#import time#time.sleep
for n in range(1,10):
#网址
a_url = 'http://www.dytt8.net/html/gndy/dyzz/list_23_'+str(n)+'.html'
#print(a_url)
html_1=requests.get(a_url)
html_1.encoding = 'gb2312'
#print(html_1.status_code)#200
#print(html_1.text)#查看网页源代码
#re.findall 列表
detil_list = re.findall('<a href="(.*?)" class="ulink',html_1.text)
#print(detil_list)
for m in detil_list:
b_url = 'http://www.dytt8.net'+m
#print(b_url)
#time.sleep(2)#暂停两秒
html_2=requests.get(b_url)
#指定网页编码格式
html_2.encoding = 'gb2312'
#print(html_2.text)
#re.findall 返回列表
ftp = re.findall('<a href="(.*?)">.*?</a></td>',html_2.text)
#print(ftp)
with open('C:\\Users\\dell\\Desktop\\dytt.txt','a',encoding='utf-8') as ff:
#写入本地
ff.write(ftp[0]+'\n')
爬出某电影网站上电影下载地址的简单爬虫程序
最新推荐文章于 2024-05-14 13:17:24 发布