Python爬取第一电影天堂最新电影(5000多部)代码实例(一)

1、实例代码:

#!/usr/bin/env python
#-*- coding: utf-8 -*-
#@Time    : 2020/4/7 16:28
#@Author  : zhangliangliang
#@File    : crawlerDemo3.py
#@Software: PyCharm
from urllib import request

from lxml import etree
import random
import requests,threading,datetime
from bs4 import BeautifulSoup

BASE_URL = "http://www.dytt8.net"

def readFile(path):
    content_list = []
    with open(path,'r') as f:
        for content in f:
            content_list.append(content.rstrip())
    return content_list

def writeFile(path,text):
    with open(path,'a') as f:
        f.write(text)
        f.write('\n')

def truncateFile(path):
    with open(path, 'w', encoding='utf-8') as f:
        f.truncate()

def getHeaders():
    user_agent_list = readFile("/Users/zll/pycharmProjects/studyPython/crawler_poject_base_part1/config/user_agent.txt")
    UserAgent = random.choice(user_agent_list)
    headers = {
  'User-Agent': UserAgent}
    print(headers)
    return headers

def getIp():
    ip_list = readFile('/Users/zll/pycharmProjects/studyPython/crawler_poject_base_part1/config/ip.txt')
    #print(ip_list)
    ip = random.choice(ip_list)
    print(ip)
    return ip

def checkip(targeturl,ip):
    headers =getHeaders()  #定制请求头
    proxies = {
  "http": "http://"+ip, "https": "https://"+ip}  #代理ip
    try:
        response=requests.get(url=targeturl,proxies=proxies,headers=headers,timeout=5).status_code
        if response == 200 :
            return True
        else:
            return False
    except:
        return False



def getProxies(url):
    ip = getIp()
    if checkip(url,ip) is True:
        proxies = {
  'http':'http://'+ip}
        print(proxies)
        return proxies
    else:
        return
    #print(proxies)

#def getProxies(url):


def get_detail_url(url):
    proxies = getProxies(url)
    header = getHeaders()
    try:
        response = requests.get(url, headers=header,proxies=proxies)
    #print(response.content.decode('gbk'))
  • 3
    点赞
  • 9
    收藏
    觉得还不错? 一键收藏
  • 2
    评论
评论 2
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值