出现的几个问题

import pymongo
from pymongo.mongo_client import MongoClient
import time
import pymysql
import os,urllib
from lxml import etree
import requests
#clienm = MongoClient('localhost',27017)
#mongoDatabase = clienm.test
#clieno = mongoDatabase.Tests

cliend = pymysql.connect(host="localhost",user='root',passwd='123456',db='test',port=3306,charset='utf8')
conn = cliend.cursor()
cliend.commit()


def get_url(url):
    urllist = []
    response = requests.get(url).text
    html =  etree.HTML(response)
    url = html.xpath('//div[@class="row"]//ul[2]/li')
    for li in url:
        src = "http://www.24hha.com/"+li.xpath('./a/@href')[0]
        urllist.append(src)
    return urllist
def get_pi(urllist):
    for i in urllist:
        response = requests.get(i).text
        html =  etree.HTML(response)
        url = html.xpath('//p/img/@src')[0]
        download_image(url)
def download_image(url):
    file_path='D:/book/iyellow'
#    print(url)
#    set_mongodb(url)
    set_mysqldb(url)
#    file_name = url.split('/')[-1][1:8]
#    print(file_name)
#    if not os.path.exists(file_path):
#        os.makedirs(file_path)
#    a = os.path.splitext(url)[1]
#    filename = "{}{}{}{}".format(file_path,os.sep,file_name,a)
#    #是否有这个路径
#    r = requests.get(url)
#    with open(filename, 'wb') as f:
#        f.write(r.content)                      
##
#    if not os.path.exists(file_path):
#        os.makedirs(file_path)
##    filename = file_path+str(time.time())+".jpg"
##    print(filename)
#    #下载图片,并保存到文件夹中
#    filename = "{}{}{}".format(file_path,os.sep,file_name)
#    urllib.request.urlretrieve(url,filename=filename)
#def set_mongodb(url):
#
#    imges = {'images':url}
#    clieno.insert(imges)
def set_mysqldb(url):
#url 应该转换下
    url = url.encode('utf-8').decode('utf-8')
    print(type(url))
    sql = 'insert into stus (imges) values ("{}")'.format(url)
    print(sql)
    conn.execute(sql)
    cliend.commit()
    
for page in range(2,10):
    
    url = "http://www.24hha.com/piclist.x?classid=1&page={}".format(page)
    urllist = get_url(url)
    get_pi(urllist)

 

  • 0
    点赞
  • 0
    收藏
    觉得还不错? 一键收藏
  • 0
    评论
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值