网站:https://pdf.directindustry-china.cn/pdf/constar-motion-co-ltd-188398.html
#!/usr/bin/env python
# -*- coding: utf-8 -*-
# @Time : 2022-11-11 9:44
# @Author : 陈良兴
# @File : 爬取Constar Motion.py
# @Software : PyCharm
# https://pdf.directindustry-china.cn/pdf-en/constar-motion-co-ltd/constar-high-power-12v24v-coreless-dc-motor-3257n9c2b/188398-994050.html
# https://img.directindustry-china.cn/pdf/repository_di/188398/constar-high-power-12v24v-coreless-dc-motor-3257n9c2b-994050_1mg.jpg
# https://pdf.directindustry-china.cn/pdf-en/constar-motion-co-ltd/constar-high-power-12v24v-coreless-dc-motor-3257n9c2b/188398-994050.html
# 高清图:https://img.directindustry-china.cn/pdf/repository_di/188398/constar-high-power-12v24v-coreless-dc-motor-3257n9c2b-994050_1b.jpg
import requests
import re
# 下载并保存图片
def pic_download(pic_url, pic_name):
try:
response = requests.get(pic_url)
path = r'./电机/%s.jpg' % pic_name
with open(path, 'wb') as fp:
fp.write(response.content)
except requests.exceptions.InvalidURL:
pass
def get_detail():
url_list = []
headers = {'user-agent': 'Mozilla/5.0 (Linux; Android 6.0; Nexus 5 Build/MRA58N) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/94.0.4606.71 Mobile Safari/537.36'}
url = 'https://pdf.directindustry-china.cn/pdf/constar-motion-co-ltd-188398.html'
page_text = requests.get(url=url, headers=headers).content.decode('utf-8')
# print(page_text)
pat_link_href = r'<img src="(.*?)" alt="(.*?)" />'
url_link = re.findall(pat_link_href, page_text, re.S)
print(url_link)
for url in url_link:
# 伪装_1mg非高清图,需改为_1b
pic_download(url[0].replace('_1mg', '_1b'), url[1])
if __name__ == '__main__':
get_detail()
print("Download complete!")