获取www.maigoo.com 景点爬虫,包括1A到5A级景点名称。景点地址,景点描述和景点经纬度

下面展示一些 内联代码片

// An highlighted block
import  requests, re ,json
from bs4 import  BeautifulSoup
import random
import json
import time
user_agent_list = [
    'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_3) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/80.0.3987.132 Safari/537.36',
    'Mozilla/5.0 (Macintosh; Intel Mac OS X 10.15; rv:73.0) Gecko/20100101 Firefox/73.0',
    'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_3) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/13.0.5 Safari/605.1.15',
    'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/58.0.3029.110 Safari/537.36 Edge/16.16299',
    'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/71.0.3578.98 Safari/537.36',
    'Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:63.0) Gecko/20100101 Firefox/63.0'
]
def load_data_from_dict(o, *keys):
    oo = o
    for i, key in enumerate(keys):
        if not oo:
            return None
        if i == (len(keys) - 1):
            return oo.get(key) if isinstance(oo, dict) else None
        oo = oo.get(key) if isinstance(oo, dict) else oo

def write_fun(line):
    with open('maigoo景点4A.csv','a') as f:
        f.write(line)
        f.close()

class maigoo:
    def __init__(self):
        self.session =requests.session()

    def run(self,keyword):
        self.get_main()

    def get_main(self):

        #更新head
            self.session.headers.update(
                {
                    'user-agent':random.choice(user_agent_list)
                }

            )
            for i in range(0,3020,20):
                if i==3001:
                    break
                print(i)

                url = 'https://www.maigoo.com/public/mod/php/getpage.php?action=getpage&dataid=10620592&page=1&templateid=136219&ismobile=0&startid={}&num=20&append=1&numshow=3000&blockac=shenghuo&blockitid=419733'.format(i)
                print(url)
                try:
                  # r= self.session.get(url,params=params)
                  r=requests.get(url)
                  print(r)
                except requests.exceptions.ChunkedEncodingError:
                        print("requests.exceptions.ChunkedEncodingError")
                soup =BeautifulSoup(r.content,'lxml')


                tds=soup.find_all("td")
                for i in range(0,len(tds),3):
                    citypattern1= '<td class="sch_name"><a href=".*?" target="_blank">(.*?)</a></td>'
                    pointname = re.compile (citypattern1, re.S).findall (str(tds[i+1]))
                    citypattern3= '<td class="sch_name"><a href="(.*?)" target="_blank">.*?</a></td>'
                    pointurl = re.compile (citypattern3, re.S).findall (str (tds[i + 1]))
                    citypattern2 = '<td>(.*?)</td>'
                    cityname = re.compile (citypattern2, re.S).findall (str (tds[i + 2]))
                    try:
                        r1 = requests.get (pointurl[0])
                    except Exception as e :
                        citypattern1 = '<td class="sch_name">(.*?)</td>'
                        pointname = re.compile (citypattern1, re.S).findall (str (tds[i + 1]))
                        line1 = str (pointname[0]) + ',' + str ("") + ',' + str (cityname[0]) + ',' + str ("") + ',' + str ("") + ',' + str ("") + ',' + str ("") + '\n'
                        print (line1)
                        write_fun (line1)
                        continue
                    soup1 = BeautifulSoup (r1.content, 'lxml')
                    sc_tese=soup1.find ('div', attrs={'class' : 'sc_tese'}).text
                    sc_tesefin=' '.join(sc_tese.split("\n"))

                    allmap=soup1.find ('div', attrs={'id' : 'allmap'})
                    citypattern5='<em class="fcolor bdcolor">所在地/隶属:</em>.*?<span class="c666 dhidden">(.*?)</span>'
                    addr=re.compile (citypattern5, re.S).findall (str(soup1))
                    citypattern4= '<div class="ditucont" id="allmap" mapx="(.*?)" mapy="(.*?)" provice=".*?"></div>'
                    try:
                        citylonlat = re.compile (citypattern4, re.S).findall (str (allmap))
                        lon=citylonlat[0][0]
                        lat= citylonlat[0][1]
                    except Exception as e:
                        lon=''
                        lat=''
                    try:
                        pointnamefin= pointname[0]
                    except Exception as e :
                        pointnamefin=''

                    try :
                        pointurlfin =pointurl[0]
                    except Exception as e :
                        pointurlfin = ''

                    try :
                        citynamefin=cityname[0]
                    except Exception as e :
                        citynamefin = ''

                    try :
                        sc_tesefi=sc_tesefin
                    except Exception as e :
                        sc_tesefi = ''

                    try :
                        lonfin=lon
                    except Exception as e :
                        lonfin = ''

                    try :
                        latfin=lat
                    except Exception as e :
                        latfin = ''

                    try :
                        addrfin=addr[0]
                    except Exception as e :
                        addrfin = ''
                    line =str(pointnamefin)+','+str(pointurlfin)+','+str(citynamefin)+','+str(sc_tesefi)+','+str(lonfin)+','+str(latfin)+','+str(addrfin)+'\n'
                    print(line)
                    write_fun(line)




if __name__ == '__main__':
    maigoo = maigoo()
    maigoo.get_main()

评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值