python爬视频网站数据_109-python-某视频网站爬取数据

最新推荐文章于 2021-11-21 22:32:05 发布

weixin_39999586

最新推荐文章于 2021-11-21 22:32:05 发布

阅读量41

点赞数

文章标签： python爬视频网站数据

import re

from urllib.request import urlopen,Request

from multiprocessing import Pool

def get_page(url,pattern):

headers = {'User-Agent': 'Mozilla/5.0 (Windows NT 6.1; WOW64; rv:23.0) Gecko/20100101 Firefox/23.0'}

req = Request(url=url, headers=headers)

response = urlopen(req).read().decode('utf-8')

return pattern,response # 正则表达式编译结果网页源代码

def parse_page(info):

pattern,page_content=info

res=re.findall(pattern,page_content)

for item in res:

dic={

'index':item[0].strip(),

'title':item[1].strip(),

'actor':item[2].strip(),

'time':item[3].strip(),

'score': '%s%s'%(item[4].strip(), item[5].strip())

}

print(dic)

if __name__ == '__main__':

# regex = r'

.*?<.*?class="board-index.*?>(\d+).*?title="(.*?)".*?class="movie-item-info".*?

(.*?)

.*?

(.*?)

'

# regex = r'

.*? .*?.*?(.*?).*?

(.*?)

.*?

(.*?)

'

# regex = r'

.*?.*?.*?(.*?).*?

(.*?)

.*?

(.*?)

.*?(\d+\.?)(\d+)

.*?'

# regex = r'

.*?.*?.*?(.*?).*?

(.*?)

.*?

(.*?)

'

# regex = r'

.*?.*?(.*?).*?

(.*?)

'

regex = r'

.*?.*?(.*?).*?

(.*?)

.*?

(.*?).*?(.*?)

.*?'

pattern1=re.compile(regex,re.S)

url_dic={'http://maoyan.com/board/7':pattern1}

p=Pool()

res_l=[]

for url,pattern in url_dic.items():

res=p.apply_async(get_page,args=(url,pattern),callback=parse_page)

res_l.append(res)

for i in res_l:

i.get()

weixin_39999586

关注

0
点赞
踩
0

收藏

觉得还不错? 一键收藏
0
评论
python爬视频网站数据_109-python-某视频网站爬取数据

import refrom urllib.request import urlopen,Requestfrom multiprocessing import Pooldef get_page(url,pattern):headers = {'User-Agent': 'Mozilla/5.0 (Windows NT 6.1; WOW64; rv:23.0) Gecko/20100101 Firef...
复制链接

扫一扫

评论

被折叠的条评论为什么被折叠?

到【灌水乐园】发言

查看更多评论

添加红包

成就一亿技术人!

hope_wisdom

发出的红包

实付元

使用余额支付

点击重新获取

扫码支付

钱包余额 0

抵扣说明：

1.余额是钱包充值的虚拟货币，按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载，可以购买VIP、付费专栏及课程。