1.代码不会报错但是存的csv不能用pd.read_csv()读取
import requests
from lxml import etree
import re
import time
import random
import csv
from fontTools.ttLib import TTFont
import base64
import io
import pandas as pd
import urllib3
urllib3.disable_warnings()
def decode_base64(font_face):
b = base64.b64decode(font_face)
font = TTFont(io.BytesIO(b))
bestcmap = font['cmap'].getBestCmap()
unicode_num_dict = {
}
for key in bestcmap.keys():
num = int(bestcmap[key].replace("glyph", "")) - 1
key = str(hex(key))
key = key.replace("0x", "&#x")
key += ";"
unicode_num_dict[key] = str(num)
return unicode_num_dict
headers = {
"user-agent": "Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/70.0.3538.25 Safari/537.36 Core/1.70.3775.400 QQBrowser/10.6.4208.400",
'Connection': 'close'}
url =pd.read_excel(".\安居客\租房\租房_所有街道链接.xlsx")
count=1
for i in url.link.tolist():
base_url=i+'p{}/'
page = 1
while page <= 50:
url = base_url.format(page)
try:
res = requests.get(url, headers=headers, verify=False)
content = res.content.decode("utf-8")
font_face = re.findall("charset=utf-8;base64,(.*)'\) format", content)[0]
except Exce