python简单爬取峨眉电影网的电影信息

本文仅供交流学习,菜鸟刚学习爬虫,大佬勿喷,转载须告知

import requests
from bs4 import BeautifulSoup
import re


def getHtml(url):
    try:
        r = requests.get(url,timeout = 30)
        r.status_code
        r.raise_for_status
        html = r.text
        soup = BeautifulSoup(html,"html.parser")
        links = soup.find(class_="nav navbar-nav li_m-l-4")
        link = links.find_all("a")
        s = link[5]
        h = s.get("href")
        return url+h
    except:
        "wrong1"
def getID(url):
    try:
        list1 = []
        r = requests.get(url)
        r.status_code
        r.raise_for_status
        r.encoding = 'utf-8'
        html = r.text
        soup = BeautifulSoup(html,"html.parser")
        links = soup.find(class_="content-img-list")
        links1 = links.find_all("a")
        if list1 != []:
            list1 = []
        for i in links1:
            list1.append(i.get("href"))
        return list1[0:12]
    except:
        "wrong2"
def get_movie_details(url):
    try:
        r = requests.get(url)
        r.status_code
        r.raise_for_status
        r.encoding = 'utf-8'
        html = r.text
        soup = BeautifulSoup(html,"html.parser")
        soup.prettify()
        s = soup.find(class_="text-list").get_text()
        print(s)
    except:
        "wrong3"
def main(url):
    start_url = getHtml(url)
    for i in range(1,233):
        category = start_url+"&page={}".format(i)
        movie = getID(category)
        for ids in movie:
            items = get_movie_details(url+'{}'.format(ids))
            item_list.append(ids)
        

    

if __name__ =="__main__":
    url = "http://www.emdy.cn/"
    item_list = []
    main(url)


  • 0
    点赞
  • 0
    收藏
    觉得还不错? 一键收藏
  • 打赏
    打赏
  • 0
    评论
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包

打赏作者

Dr.Disrespect

你的鼓励将是我创作的最大动力

¥1 ¥2 ¥4 ¥6 ¥10 ¥20
扫码支付:¥1
获取中
扫码支付

您的余额不足,请更换扫码支付或充值

打赏作者

实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值