python爬虫背景_python 爬取背景图 简单示例

#coding=utf-8

#Python Version python3

#Desciption

#python paperwalls.py serach keyword download dir eg:python 1.py kaori ../resource/wallpaper

#download_pic 下载图片

#get_download_link 获取下载的链接

#getwallpaper 获取图片 保存到本地

#缺点: 没有用多线程 细节处理不是很好 没有一般通用性

import requests

import re

import os

import sys

# 代理地址

proxies = { "http": "http://127.0.0.1:25378", "https": "http://127.0.0.1:25378", }

# 下载的目录

download_dir = '../resource/wallpaper/'

#已经下载的数量

downloaded_num = 0

# 总共搜索的到的数量

total = 0

#下载图片

def download_pic(url, name, pic_type):

#获取全局的变量

global proxies

global download_dir

global downloaded_num

global total

# 目录不存在创建目录下载图片

if not os.path.exists(download_dir):

os.makedirs(download_dir)

# 下载图片到指定的目录

r = requests.get(url, =proxiesproxies)

# 请求成功 已下载数+1

downloaded_num += 1

#打开文件 写入图片

with open('%s/%s.%s'%(download_dir, name, pic_type), 'wb') as f:

f.write(r.content)

#格式化输出下载结果 eg:[ 101/817] 629538.jpg Done!

print('[{:5d}/{}] {}.{} Done!'.format(downloaded_num, total, name, pic_type))

#下载链接

def get_download_link(wallpaper_id, wallpaper_type, server, user_id):

global proxies

post_data = {

'wallpaper_id': wallpaper_id,

'type': wallpaper_type,

'server': server,

'user_id': user_id,

}

r = requests.post('https://wall.alphacoders.com/get_download_link.php', data=post_data, proxies=proxies)

download_pic(r.text, wallpaper_id, wallpaper_type)

#根据关键词获取壁纸

def getwallpaper(keyword):

global proxies

global total

#正则匹配页数

p_nextpage = re.compile(r"")

#获取POST表单数据 \d+? 一个或多个数字 \w+? 一个或多个字符

p_item = re.compile(r'data-id="(\d+?)" data-type="(\w+?)" data-server="(\w+?)" data-user-id="(\d+?)"')

#设置当前页为第一页

page_num = 1

#循环

while 1:

r_page = requests.get('https://wall.alphacoders.com/search.php?search=%s&lang=Chinese&page=%d' % (keyword.lower(), page_num), proxies=proxies)

nextpage_link = p_nextpage.search(r_page.text)

# 如果没有查询到结果 跳出循环

if nextpage_link == None:

print("Sorry, we have no results for your search!")

break

if page_num == 1:

# 获取第一个返回值 总数

total = int(re.search(r"

\s+?(\d+)(.+?)\s+?

", r_page.text).group(1))

print("the %s wallpaper's total is %d" % (keyword, total))

for item in p_item.findall(r_page.text):

wallpaper_id = item[0]

wallpaper_type = item[1]

server = item[2]

user_id = item[3]

get_download_link(wallpaper_id, wallpaper_type, server, user_id)

# 没有下一页了 跳出循环

if nextpage_link.group(1) == '#':

print("All wallpaper done!")

break

page_num += 1

#主函数

if __name__ == '__main__':

if len(sys.argv) < 2 or len(sys.argv) > 3:

usage_text = "Usage:\n\tpython getwallpaper.py miku [miki_pic]\nFirst param: the name of script\nSecond param: the wallpaper's keyword which you want to search\nThird param: the dir's name where you want to download in, optional, default in ../resource/wallpapers"

print(usage_text)

elif len(sys.argv) == 3:

download_dir = str(sys.argv[2])

getwallpaper(str(sys.argv[1]))

else:

getwallpaper(str(sys.argv[1]))

  • 0
    点赞
  • 0
    收藏
    觉得还不错? 一键收藏
  • 0
    评论

“相关推荐”对你有帮助么?

  • 非常没帮助
  • 没帮助
  • 一般
  • 有帮助
  • 非常有帮助
提交
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值