正则
import requests
import re
from bs4 import BeautifulSoup
import time
headers = {
'user-agent': 'Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/72.0.3626.121 Safari/537.36'}
def main():
url='https://search.jd.com/Search?keyword=%E6%89%8B%E6%9C%BA&enc=utf-8&wq=%E6%89%8B%E6%9C%BA&pvid=6d9b66b19bd14681a382b642822d3685'
resl = requests.get(url, headers=headers)
if resl.status_code == 200:
resl.encoding = 'utf-8'
txt = resl.text
txt = re.findall(r'<div class="p-img">\s+?<a(.*?)></a>', txt, re.S)
for txt1 in txt:
urls = re.findall(r'//.+?html', txt1, re.S)
urls = 'http:' + urls[0]
info = requests.get(urls, headers=headers)
info = info.text
pinfo &#