import requests
from requests.exceptions import RequestException
#from lxml import etree
import re
def get_one_page(url):
try:
header={
'User-Agent':'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/94.0.4606.61 Safari/537.36 Edg/94.0.992.31'
}
response=requests.get(url,headers=header)
response.encoding='utf-8'
if response.status_code==200:
return (response.text)
return None
except RequestException:
return None
def parse_one_page(html):
pattern = re.compile('<li>.*?<img src=\"(.*?)\".*?>', re.S)
items = re.findall(pattern,html)
print(items)
return items
def write_to_file(items):
header = {
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/94.0.4606.61 Safari/537.36 Edg/94.0.992.31'
}
print(1)
i=0
for item in items:
i=i+1
file_name='E:\pyexe\meinv\\'+str(i)+'.jpg'
response = requests.get(item, headers=header)
with open(file_name,'wb') as f:
f.write(response.content)
print('complete')
def main():
html=get_one_page('http://m.quantuwang1.com/meinv/taotu/index.html')
items=parse_one_page(html)
write_to_file(items)
main()
爬取美女照片
最新推荐文章于 2024-07-23 16:23:28 发布