python 根据 关键词 下载网站图片

def decode_gb2312(data):
    data = data.split()
    key_word = data[1] + data[2]
    type = data[3]
    file_name1 = key_word + type
    type = type.encode('gb2312').hex()
    key_word = key_word.encode('gb2312').hex()
    print(type)
    print(key_word)
    i = 0
    type2 = ""
    type = type.upper()
    while i < len(type):
        if i % 2 == 0:
            type2 = type2 + '%'
        type2 = type2 + type[i]
        i = i + 1
    print(type2)

    i = 0
    key_word2 = ""
    key_word = key_word.upper()
    while i < len(key_word):
        if i % 2 == 0:
            key_word2 = key_word2 + '%'
        key_word2 = key_word2 + key_word[i]
        i = i + 1
    print(key_word2)
    return key_word2, type2, file_name1


def one():
    with open("D:\image_keyword", encoding="utf-8-sig") as file:
        datas = file.readlines()
        for data in datas:
            key_word2, type2, file_name1 = decode_gb2312(data)
            url = 'http://www.chinawestagr.com/bch/searchResult.aspx?type='+type2+'&context='+key_word2
            print(url)
            result = requests.get(url)
            result = result.text
            # print(result)

            html = pq(result)
            # 读取title内容
            # print(html.title)
            # attrs = html.title.attrs
            # print(attrs)
            print(html('title'))
            print(html('a').attr('href'))
            a = html('a').attr('href')
            if a is not None and a.__contains__('CropContent'):
                href = 'http://www.chinawestagr.com/bch/' + a
                result = requests.get(href)
                result = result.text
                # print(result)

                html = pq(result)
                imgs = html('img')
                print(imgs)
                i = 0
                while i < imgs.size():
                    img = imgs[i].attrib['src']
                    if img.__contains__('UploadFiles'):
                        img_url = 'http://www.chinawestagr.com/bch/'+img
                        try:
                            pic = requests.get(img_url, timeout=5)  # 超时异常判断 5秒超时
                        except requests.exceptions.ConnectionError:
                            print('当前图片无法下载')
                            continue
                        file_name = "D:/spider/citrus_spider/spiderfiles/grapimages/" + file_name1 + str(i) + ".jpg"  # 拼接图片名
                        print(file_name)
                        # 将图片存入本地
                        fp = open(file_name, 'wb')
                        fp.write(pic.content)  # 写入图片
                        fp.close()
                    i = i + 1
 if __name__ == '__main__':
         one()
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值