下面展示一些 内联代码片
。
// An highlighted block
import requests, re ,json
from bs4 import BeautifulSoup
import random
import json
import time
user_agent_list = [
'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_3) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/80.0.3987.132 Safari/537.36',
'Mozilla/5.0 (Macintosh; Intel Mac OS X 10.15; rv:73.0) Gecko/20100101 Firefox/73.0',
'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_3) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/13.0.5 Safari/605.1.15',
'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/58.0.3029.110 Safari/537.36 Edge/16.16299',
'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/71.0.3578.98 Safari/537.36',
'Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:63.0) Gecko/20100101 Firefox/63.0'
]
def load_data_from_dict(o, *keys):
oo = o
for i, key in enumerate(keys):
if not oo:
return None
if i == (len(keys) - 1):
return oo.get(key) if isinstance(oo, dict) else None
oo = oo.get(key) if isinstance(oo, dict) else oo
def write_fun(line):
with open('maigoo景点4A.csv','a') as f:
f.write(line)
f.close()
class maigoo:
def __init__(self):
self.session =requests.session()
def run(self,keyword):
self.get_main()
def get_main(self):
#更新head
self.session.headers.update(
{
'user-agent':random.choice(user_agent_list)
}
)
for i in range(0,3020,20):
if i==3001:
break
print(i)
url = 'https://www.maigoo.com/public/mod/php/getpage.php?action=getpage&dataid=10620592&page=1&templateid=136219&ismobile=0&startid={}&num=20&append=1&numshow=3000&blockac=shenghuo&blockitid=419733'.format(i)
print(url)
try:
# r= self.session.get(url,params=params)
r=requests.get(url)
print(r)
except requests.exceptions.ChunkedEncodingError:
print("requests.exceptions.ChunkedEncodingError")
soup =BeautifulSoup(r.content,'lxml')
tds=soup.find_all("td")
for i in range(0,len(tds),3):
citypattern1= '<td class="sch_name"><a href=".*?" target="_blank">(.*?)</a></td>'
pointname = re.compile (citypattern1, re.S).findall (str(tds[i+1]))
citypattern3= '<td class="sch_name"><a href="(.*?)" target="_blank">.*?</a></td>'
pointurl = re.compile (citypattern3, re.S).findall (str (tds[i + 1]))
citypattern2 = '<td>(.*?)</td>'
cityname = re.compile (citypattern2, re.S).findall (str (tds[i + 2]))
try:
r1 = requests.get (pointurl[0])
except Exception as e :
citypattern1 = '<td class="sch_name">(.*?)</td>'
pointname = re.compile (citypattern1, re.S).findall (str (tds[i + 1]))
line1 = str (pointname[0]) + ',' + str ("") + ',' + str (cityname[0]) + ',' + str ("") + ',' + str ("") + ',' + str ("") + ',' + str ("") + '\n'
print (line1)
write_fun (line1)
continue
soup1 = BeautifulSoup (r1.content, 'lxml')
sc_tese=soup1.find ('div', attrs={'class' : 'sc_tese'}).text
sc_tesefin=' '.join(sc_tese.split("\n"))
allmap=soup1.find ('div', attrs={'id' : 'allmap'})
citypattern5='<em class="fcolor bdcolor">所在地/隶属:</em>.*?<span class="c666 dhidden">(.*?)</span>'
addr=re.compile (citypattern5, re.S).findall (str(soup1))
citypattern4= '<div class="ditucont" id="allmap" mapx="(.*?)" mapy="(.*?)" provice=".*?"></div>'
try:
citylonlat = re.compile (citypattern4, re.S).findall (str (allmap))
lon=citylonlat[0][0]
lat= citylonlat[0][1]
except Exception as e:
lon=''
lat=''
try:
pointnamefin= pointname[0]
except Exception as e :
pointnamefin=''
try :
pointurlfin =pointurl[0]
except Exception as e :
pointurlfin = ''
try :
citynamefin=cityname[0]
except Exception as e :
citynamefin = ''
try :
sc_tesefi=sc_tesefin
except Exception as e :
sc_tesefi = ''
try :
lonfin=lon
except Exception as e :
lonfin = ''
try :
latfin=lat
except Exception as e :
latfin = ''
try :
addrfin=addr[0]
except Exception as e :
addrfin = ''
line =str(pointnamefin)+','+str(pointurlfin)+','+str(citynamefin)+','+str(sc_tesefi)+','+str(lonfin)+','+str(latfin)+','+str(addrfin)+'\n'
print(line)
write_fun(line)
if __name__ == '__main__':
maigoo = maigoo()
maigoo.get_main()