基于selenium爬取图片,并且将保存于mysql

import json
import logging
from bs4 import BeautifulSoup
from selenium import webdriver
import requests
import pymysql
from multiprocessing import Process


def func1(url):
    driver = webdriver.Firefox()
    my_i=0
    my_j=0
    headers={'User-Agent':'Mozilla/5.0 (Windows NT 6.3; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/66.0.3359.139 Safari/537.36'}
    for url in url:
        try:
            my_j+=1
            html=json.loads(requests.get(url,headers=headers).text)
            print('%d个页面完成' % my_j)
            for _ in html['list']:
                try:
                    my_i+=1
                    my_url='https://www.2222zw.com'+_.get('playurl')
                    get_things(my_url,driver)
                    print('%d个链接完成'%my_i)
                except Exception as ee:
                    logging.error(ee)
                    print('发生错误%s'%ee)
                else:
                    logging.info('ok')
        except Exception as e:
            logging.error(e)
            print('发生错误%s'%e)
        else:
            logging.info('just great job')


def get_things(url,driver):
    driver.get(url)
    html=driver.page_source
    soup=BeautifulSoup(html,'lxml')
    my_href=soup.select('#postmessage > img')
    for my_href in my_href:
        connect_mysql(my_href.get('src'))




def connect_mysql(my_href):
    setting={'host':'localhost',
             'user':'root',
             'password':'123456',
             'db':'one',
             'charset':'utf8',
             'port':3306
             }
    conn=pymysql.connect(**setting)
    conn_obj=conn.cursor()
    sql='insert into table2(my_href)values(%s)'
    params=(my_href,)
    conn_obj.execute(sql,params)
    conn_obj.connection.commit()




if __name__=='__main__':
    logging.basicConfig(filename='logs',format='%(message)s--%(asctime)s')
    url = ['https://www.2222zw.com/html/artlist/toukuizipai/23_{}.json'.format(str(i)) for i in range(2, 82)]
    p1=Process(target=func1,args=(url,))
    p2 = Process(target=func1, args=(url[40:60],))
    p3 = Process(target=func1, args=(url[60:],))
    p1.start()
    p2.start()
    p3.start()
    p1.join()
    p2.join()
    p3.join()




import pymysql
import requests
class My_images:
    def __init__(self):
        setting={
            'user':'root',
            'port':3306,
            'password':'123456',
            'host':'localhost',
            'db':'one',
            'charset':'utf8'
        }
        conn=pymysql.connect(**setting)
        self.conn_obj=conn.cursor()

    def func1(self):
        sql='select my_href from table2'
        self.conn_obj.execute(sql)
        ii=0
        for i in self.conn_obj.fetchall():
            ii+=1
            my_href=i[0]
            self.func2(my_href,ii)
            print('ok')
    def func2(self,my_href,ii):
        html=requests.get(my_href)
        with open('imgs/{}.jpg'.format(str(ii)),'wb') as f:
            f.write(html.content)




my=My_images()
my.func1()




    #


    #
阅读更多
个人分类: python3
上一篇dlib的安装,tensorflow 安装
下一篇语义分类
想对作者说点什么? 我来说一句

没有更多推荐了,返回首页

关闭
关闭
关闭