Python多线程爬取7160网站美女图片

直接上代码

#coding=UTF-8
from urllib.request import urlopen
from pyquery import PyQuery as pq
import re
import pymongo
import threading
client=pymongo.MongoClient(host='localhost',port=27017)
table=client.taobao.mutitry47160
lock=threading.Lock()
i = 1

def save_to_mongo(result):
	try:
		if table.insert(result):
			print('存储到Mongo成功')
	except Exception:
		print('存储到Mongo失败',result)

def download_son(Son_link,l):
	Sonson_link='https://www.7160.com'+Son_link+'index_'+str(l)+'.html'
	doc3=pq(Sonson_link,encoding='gbk')
	image_son=doc3('.picsbox.picsboxcenter p a img').attr('src')
	title_son=doc3('.picsbox.picsboxcenter p a img').attr('alt')
	product2={'image':image_son,'title':title_son}
	print(product2)
	save_to_mongo(product2)


def father_link():
	lock.acquire()
	global i
	i += 1
	lock.release()
	url='https://www.7160.com/rentiyishu/list_1_'+str(i)+'.html'
	print('--------------------------------------'+str(i)+'--------------------------------------')
	doc=pq(url,encoding='gbk')
	items=doc('.news_bom-left li').items()
	for item in items:
		Son_link=item.find('a').attr('href')
		doc2=pq('https://www.7160.com'+Son_link,encoding='gbk')
		image_main=doc2('.picsbox.picsboxcenter p a img').attr('src')
		title_main=doc2('.picsbox.picsboxcenter p a img').attr('alt')
		product={'image':image_main,'title':title_main}
		save_to_mongo(product)
		#获取页码
		page_num=doc2('body > div > div.center > div.NEWS > div.picmainer > div.itempage > a:nth-child(1)').text()
		page_num=re.findall(r"\d+\.?\d*",page_num)
		try:
			page_num=int(page_num[0])
			print('共%d页,开始爬取'%page_num)
			for l in range(2,page_num+1):
				r1=threading.Thread(target=download_son,args=(Son_link,l))
				r1.start()
		except Exception:
			pass
def main():
	for i in range(1,108):
		t1=threading.Thread(target=father_link)
		t1.start()

if __name__ == '__main__':
	main()
  • 1
    点赞
  • 1
    收藏
    觉得还不错? 一键收藏
  • 0
    评论

“相关推荐”对你有帮助么?

  • 非常没帮助
  • 没帮助
  • 一般
  • 有帮助
  • 非常有帮助
提交
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值