import json
import logging
from bs4 import BeautifulSoup
from selenium import webdriver
import requests
import pymysql
from multiprocessing import Process
def func1(url):
driver = webdriver.Firefox()
my_i=0
my_j=0
headers={'User-Agent':'Mozilla/5.0 (Windows NT 6.3; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/66.0.3359.139 Safari/537.36'}
for url in url:
try:
my_j+=1
html=json.loads(requests.get(url,headers=headers).text)
print('%d个页面完成' % my_j)
for _ in html['list']:
try:
my_i+=1
my_url='https://www.2222zw.com'+_.get('playurl')
get_things(my_url,driver)
print('%d个链接完成'%my_i)
except Exception as ee:
logging.error(ee)
print('发生错误%s'%ee)
else:
logging.info('ok')
except Exception as e:
logging.error(e)
print('发生错误%s'%e)
else:
logging.info('just great job')
def get_things(url,driver):
driver.get(url)
html=driver.page_source
soup=BeautifulSoup(html,'lxml')
my_href=soup.select('#postmessage > img')
for my_href in my_href:
connect_mysql(my_href.get('src'))
def connect_mysql(my_href):
setting={'host':'localhost',
'user':'root',
'password':'123456',
'db':'one',
'charset':'utf8',
'port':3306
}
conn=pymysql.connect(**setting)
conn_obj=conn.cursor()
sql='insert into table2(my_href)values(%s)'
params=(my_href,)
conn_obj.execute(sql,params)
conn_obj.connection.commit()
if __name__=='__main__':
logging.basicConfig(filename='logs',format='%(message)s--%(asctime)s')
url = ['https://www.2222zw.com/html/artlist/toukuizipai/23_{}.json'.format(str(i)) for i in range(2, 82)]
p1=Process(target=func1,args=(url,))
p2 = Process(target=func1, args=(url[40:60],))
p3 = Process(target=func1, args=(url[60:],))
p1.start()
p2.start()
p3.start()
p1.join()
p2.join()
p3.join()
#
#
import logging
from bs4 import BeautifulSoup
from selenium import webdriver
import requests
import pymysql
from multiprocessing import Process
def func1(url):
driver = webdriver.Firefox()
my_i=0
my_j=0
headers={'User-Agent':'Mozilla/5.0 (Windows NT 6.3; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/66.0.3359.139 Safari/537.36'}
for url in url:
try:
my_j+=1
html=json.loads(requests.get(url,headers=headers).text)
print('%d个页面完成' % my_j)
for _ in html['list']:
try:
my_i+=1
my_url='https://www.2222zw.com'+_.get('playurl')
get_things(my_url,driver)
print('%d个链接完成'%my_i)
except Exception as ee:
logging.error(ee)
print('发生错误%s'%ee)
else:
logging.info('ok')
except Exception as e:
logging.error(e)
print('发生错误%s'%e)
else:
logging.info('just great job')
def get_things(url,driver):
driver.get(url)
html=driver.page_source
soup=BeautifulSoup(html,'lxml')
my_href=soup.select('#postmessage > img')
for my_href in my_href:
connect_mysql(my_href.get('src'))
def connect_mysql(my_href):
setting={'host':'localhost',
'user':'root',
'password':'123456',
'db':'one',
'charset':'utf8',
'port':3306
}
conn=pymysql.connect(**setting)
conn_obj=conn.cursor()
sql='insert into table2(my_href)values(%s)'
params=(my_href,)
conn_obj.execute(sql,params)
conn_obj.connection.commit()
if __name__=='__main__':
logging.basicConfig(filename='logs',format='%(message)s--%(asctime)s')
url = ['https://www.2222zw.com/html/artlist/toukuizipai/23_{}.json'.format(str(i)) for i in range(2, 82)]
p1=Process(target=func1,args=(url,))
p2 = Process(target=func1, args=(url[40:60],))
p3 = Process(target=func1, args=(url[60:],))
p1.start()
p2.start()
p3.start()
p1.join()
p2.join()
p3.join()
import pymysql import requests class My_images: def __init__(self): setting={ 'user':'root', 'port':3306, 'password':'123456', 'host':'localhost', 'db':'one', 'charset':'utf8' } conn=pymysql.connect(**setting) self.conn_obj=conn.cursor() def func1(self): sql='select my_href from table2' self.conn_obj.execute(sql) ii=0 for i in self.conn_obj.fetchall(): ii+=1 my_href=i[0] self.func2(my_href,ii) print('ok') def func2(self,my_href,ii): html=requests.get(my_href) with open('imgs/{}.jpg'.format(str(ii)),'wb') as f: f.write(html.content) my=My_images() my.func1()
#
#