python爬虫城市数据

示例一

# -*- coding:utf-8 -*-
import csv
import json
import codecs
import os
import string
import sys
import time
import urllib.request
from urllib.parse import quote
from builtins import object, float, range, int, len, open, list, str

class BaiDuPOI(object):
def init(self, itemy, loc):
self.itemy = itemy
self.loc = loc

<span class="token keyword">def</span> <span class="token function">urls</span><span class="token punctuation">(</span>self<span class="token punctuation">)</span><span class="token punctuation">:</span>
    api_key <span class="token operator">=</span> baidu_api
    urls <span class="token operator">=</span> <span class="token punctuation">[</span><span class="token punctuation">]</span>
    <span class="token keyword">for</span> pages <span class="token keyword">in</span> <span class="token builtin">range</span><span class="token punctuation">(</span><span class="token number">0</span><span class="token punctuation">,</span> <span class="token number">2</span><span class="token punctuation">)</span><span class="token punctuation">:</span>
        url <span class="token operator">=</span> <span class="token string">'http://api.map.baidu.com/place/v2/search?query='</span> <span class="token operator">+</span> self<span class="token punctuation">.</span>itemy <span class="token operator">+</span> <span class="token string">'&amp;bounds='</span> <span class="token operator">+</span> self<span class="token punctuation">.</span>loc <span class="token operator">+</span> <span class="token string">'&amp;page_size=20&amp;page_num='</span> <span class="token operator">+</span> <span class="token builtin">str</span><span class="token punctuation">(</span>
            pages<span class="token punctuation">)</span> <span class="token operator">+</span> <span class="token string">'&amp;output=json&amp;ak='</span> <span class="token operator">+</span> api_key
        urls<span class="token punctuation">.</span>append<span class="token punctuation">(</span>url<span class="token punctuation">)</span>
    <span class="token keyword">return</span> urls

<span class="token keyword">def</span> <span class="token function">baidu_search</span><span class="token punctuation">(</span>self<span class="token punctuation">)</span><span class="token punctuation">:</span>
    <span class="token triple-quoted-string string">'''json_sel = []
    for url in self.urls():
        s = quote(url, safe=string.printable)
        json_obj = urllib.request.urlopen(s).read().decode('utf-8')
        data = json.loads(json_obj)
        for item in data['results']:
            jname = item["name"]
            jlat = item["location"]["lat"]
            jlng = item["location"]["lng"]
            js_sel = jname + ',' + str(jlat) + ',' + str(jlng)
            json_sel.append(js_sel)
    return json_sel
    '''</span>
    data <span class="token operator">=</span> <span class="token punctuation">[</span><span class="token punctuation">]</span>
    <span class="token keyword">for</span> url <span class="token keyword">in</span> self<span class="token punctuation">.</span>urls<span class="token punctuation">(</span><span class="token punctuation">)</span><span class="token punctuation">:</span>
        s <span class="token operator">=</span> quote<span class="token punctuation">(</span>url<span class="token punctuation">,</span> safe<span class="token operator">=</span>string<span class="token punctuation">.</span>printable<span class="token punctuation">)</span>
        json_obj <span class="token operator">=</span> urllib<span class="token punctuation">.</span>request<span class="token punctuation">.</span>urlopen<span class="token punctuation">(</span>s<span class="token punctuation">)</span><span class="token punctuation">.</span>read<span class="token punctuation">(</span><span class="token punctuation">)</span><span class="token punctuation">.</span>decode<span class="token punctuation">(</span><span class="token string">'utf-8'</span><span class="token punctuation">)</span>
        data<span class="token punctuation">.</span>append<span class="token punctuation">(</span>json<span class="token punctuation">.</span>loads<span class="token punctuation">(</span>json_obj<span class="token punctuation">)</span><span class="token punctuation">)</span>
        time<span class="token punctuation">.</span>sleep<span class="token punctuation">(</span><span class="token number">1</span><span class="token punctuation">)</span>  <span class="token comment"># 休眠1秒</span>
    <span class="token keyword">return</span> data

class LocaDiv(object):
def init(self, loc_all):
self.loc_all = loc_all

<span class="token keyword">def</span> <span class="token function">lat_all</span><span class="token punctuation">(</span>self<span class="token punctuation">)</span><span class="token punctuation">:</span>
    lat_sw <span class="token operator">=</span> <span class="token builtin">float</span><span class="token punctuation">(</span>self<span class="token punctuation">.</span>loc_all<span class="token punctuation">.</span>split<span class="token punctuation">(</span><span class="token string">','</span><span class="token punctuation">)</span><span class="token punctuation">[</span><span class="token number">0</span><span class="token punctuation">]</span><span class="token punctuation">)</span>
    lat_ne <span class="token operator">=</span> <span class="token builtin">float</span><span class="token punctuation">(</span>self<span class="token punctuation">.</span>loc_all<span class="token punctuation">.</span>split<span class="token punctuation">(</span><span class="token string">','</span><span class="token punctuation">)</span><span class="token punctuation">[</span><span class="token number">2</span><span class="token punctuation">]</span><span class="token punctuation">)</span>
    lat_list <span class="token operator">=</span> <span class="token punctuation">[</span><span class="token punctuation">]</span>
    <span class="token keyword">for</span> i <span class="token keyword">in</span> <span class="token builtin">range</span><span class="token punctuation">(</span><span class="token number">0</span><span class="token punctuation">,</span> <span class="token builtin">int</span><span class="token punctuation">(</span><span class="token punctuation">(</span>lat_ne <span class="token operator">-</span> lat_sw <span class="token operator">+</span> <span class="token number">0.0001</span><span class="token punctuation">)</span> <span class="token operator">/</span> <span class="token number">0.2</span><span class="token punctuation">)</span><span class="token punctuation">)</span><span class="token punctuation">:</span>  <span class="token comment"># 0.1为网格大小,可更改</span>
        lat_list<span class="token punctuation">.</span>append<span class="token punctuation">(</span>lat_sw <span class="token operator">+</span> <span class="token number">0.2</span> <span class="token operator">*</span> i<span class="token punctuation">)</span>  <span class="token comment"># 0.05</span>
    lat_list<span class="token punctuation">.</span>append<span class="token punctuation">(</span>lat_ne<span class="token punctuation">)</span>
    <span class="token keyword">return</span> lat_list

<span class="token keyword">def</span> <span class="token function">lng_all</span><span class="token punctuation">(</span>self<span class="token punctuation">)</span><span class="token punctuation">:</span>
    lng_sw <span class="token operator">=</span> <span class="token builtin">float</span><span class="token punctuation">(</span>self<span class="token punctuation">.</span>loc_all<span class="token punctuation">.</span>split<span class="token punctuation">(</span><span class="token string">','</span><span class="token punctuation">)</span><span class="token punctuation">[</span><span class="token number">1</span><span class="token punctuation">]</span><span class="token punctuation">)</span>
    lng_ne <span class="token operator">=</span> <span class="token builtin">float</span><span class="token punctuation">(</span>self<span class="token punctuation">.</span>loc_all<span class="token punctuation">.</span>split<span class="token punctuation">(</span><span class="token string">','</span><span class="token punctuation">)</span><span class="token punctuation">[</span><span class="token number">3</span><span class="token punctuation">]</span><span class="token punctuation">)</span>
    lng_list <span class="token operator">=</span> <span class="token punctuation">[</span><span class="token punctuation">]</span>
    <span class="token keyword">for</span> i <span class="token keyword">in</span> <span class="token builtin">range</span><span class="token punctuation">(</span><span class="token number">0</span><span class="token punctuation">,</span> <span class="token builtin">int</span><span class="token punctuation">(</span><span class="token punctuation">(</span>lng_ne <span class="token operator">-</span> lng_sw <span class="token operator">+</span> <span class="token number">0.0001</span><span class="token punctuation">)</span> <span class="token operator">/</span> <span class="token number">0.3</span><span class="token punctuation">)</span><span class="token punctuation">)</span><span class="token punctuation">:</span>  <span class="token comment"># 0.2为网格大小,可更改</span>
        lng_list<span class="token punctuation">.</span>append<span class="token punctuation">(</span>lng_sw <span class="token operator">+</span> <span class="token number">0.3</span> <span class="token operator">*</span> i<span class="token punctuation">)</span>  <span class="token comment"># 0.2为网格大小,可更改</span>
    lng_list<span class="token punctuation">.</span>append<span class="token punctuation">(</span>lng_ne<span class="token punctuation">)</span>
    <span class="token keyword">return</span> lng_list

<span class="token keyword">def</span> <span class="token function">ls_com</span><span class="token punctuation">(</span>self<span class="token punctuation">)</span><span class="token punctuation">:</span>
    l1 <span class="token operator">=</span> self<span class="token punctuation">.</span>lat_all<span class="token punctuation">(</span><span class="token punctuation">)</span>
    l2 <span class="token operator">=</span> self<span class="token punctuation">.</span>lng_all<span class="token punctuation">(</span><span class="token punctuation">)</span>
    ab_list <span class="token operator">=</span> <span class="token punctuation">[</span><span class="token punctuation">]</span>
    <span class="token keyword">for</span> i <span class="token keyword">in</span> <span class="token builtin">range</span><span class="token punctuation">(</span><span class="token number">0</span><span class="token punctuation">,</span> <span class="token builtin">len</span><span class="token punctuation">(</span>l1<span class="token punctuation">)</span><span class="token punctuation">)</span><span class="token punctuation">:</span>
        a <span class="token operator">=</span> <span class="token builtin">str</span><span class="token punctuation">(</span>l1<span class="token punctuation">[</span>i<span class="token punctuation">]</span><span class="token punctuation">)</span>
        <span class="token keyword">for</span> i2 <span class="token keyword">in</span> <span class="token builtin">range</span><span class="token punctuation">(</span><span class="token number">0</span><span class="token punctuation">,</span> <span class="token builtin">len</span><span class="token punctuation">(</span>l2<span class="token punctuation">)</span><span class="token punctuation">)</span><span class="token punctuation">:</span>
            b <span class="token operator">=</span> <span class="token builtin">str</span><span class="token punctuation">(</span>l2<span class="token punctuation">[</span>i2<span class="token punctuation">]</span><span class="token punctuation">)</span>
            ab <span class="token operator">=</span> a <span class="token operator">+</span> <span class="token string">','</span> <span class="token operator">+</span> b
            ab_list<span class="token punctuation">.</span>append<span class="token punctuation">(</span>ab<span class="token punctuation">)</span>
    <span class="token keyword">return</span> ab_list

<span class="token keyword">def</span> <span class="token function">ls_row</span><span class="token punctuation">(</span>self<span class="token punctuation">)</span><span class="token punctuation">:</span>
    l1 <span class="token operator">=</span> self<span class="token punctuation">.</span>lat_all<span class="token punctuation">(</span><span class="token punctuation">)</span>
    l2 <span class="token operator">=</span> self<span class="token punctuation">.</span>lng_all<span class="token punctuation">(</span><span class="token punctuation">)</span>
    ls_com_v <span class="token operator">=</span> self<span class="token punctuation">.</span>ls_com<span class="token punctuation">(</span><span class="token punctuation">)</span>
    ls <span class="token operator">=</span> <span class="token punctuation">[</span><span class="token punctuation">]</span>
    <span class="token keyword">for</span> n <span class="token keyword">in</span> <span class="token builtin">range</span><span class="token punctuation">(</span><span class="token number">0</span><span class="token punctuation">,</span> <span class="token builtin">len</span><span class="token punctuation">(</span>l1<span class="token punctuation">)</span> <span class="token operator">-</span> <span class="token number">1</span><span class="token punctuation">)</span><span class="token punctuation">:</span>
        <span class="token keyword">for</span> i <span class="token keyword">in</span> <span class="token builtin">range</span><span class="token punctuation">(</span><span class="token number">0</span> <span class="token operator">+</span> <span class="token builtin">len</span><span class="token punctuation">(</span>l1<span class="token punctuation">)</span> <span class="token operator">*</span> n<span class="token punctuation">,</span> <span class="token builtin">len</span><span class="token punctuation">(</span>l2<span class="token punctuation">)</span> <span class="token operator">+</span> <span class="token punctuation">(</span><span class="token builtin">len</span><span class="token punctuation">(</span>l2<span class="token punctuation">)</span><span class="token punctuation">)</span> <span class="token operator">*</span> n <span class="token operator">-</span> <span class="token number">1</span><span class="token punctuation">)</span><span class="token punctuation">:</span>
            a <span class="token operator">=</span> ls_com_v<span class="token punctuation">[</span>i<span class="token punctuation">]</span>
            b <span class="token operator">=</span> ls_com_v<span class="token punctuation">[</span>i <span class="token operator">+</span> <span class="token builtin">len</span><span class="token punctuation">(</span>l2<span class="token punctuation">)</span> <span class="token operator">+</span> <span class="token number">1</span><span class="token punctuation">]</span>
            ab <span class="token operator">=</span> a <span class="token operator">+</span> <span class="token string">','</span> <span class="token operator">+</span> b
            ls<span class="token punctuation">.</span>append<span class="token punctuation">(</span>ab<span class="token punctuation">)</span>
    <span class="token keyword">return</span> ls

if name == main:
doc = open(‘NTPOI.csv’, ‘a+’)
writer = csv.writer(doc)

<span class="token comment"># ak</span>
baidu_api <span class="token operator">=</span> <span class="token string">"*********************"</span>  <span class="token comment"># 这里填入你的百度API的ak</span>
<span class="token keyword">print</span><span class="token punctuation">(</span><span class="token string">"开始爬取数据,请稍等..."</span><span class="token punctuation">)</span>
start_time <span class="token operator">=</span> time<span class="token punctuation">.</span>time<span class="token punctuation">(</span><span class="token punctuation">)</span>
loc <span class="token operator">=</span> LocaDiv<span class="token punctuation">(</span><span class="token string">'31.69,120.54,32.65,121.95'</span><span class="token punctuation">)</span>
locs_to_use <span class="token operator">=</span> loc<span class="token punctuation">.</span>ls_row<span class="token punctuation">(</span><span class="token punctuation">)</span>

<span class="token keyword">for</span> loc_to_use <span class="token keyword">in</span> locs_to_use<span class="token punctuation">:</span>
    par <span class="token operator">=</span> BaiDuPOI<span class="token punctuation">(</span><span class="token string">'学校'</span><span class="token punctuation">,</span> loc_to_use<span class="token punctuation">)</span>  <span class="token comment"># 请修改爬取的类别</span>
    <span class="token triple-quoted-string string">'''
    a = par.baidu_search()
    for ax in a:
        writer.writerow(a)
   '''</span>

    listdata <span class="token operator">=</span> par<span class="token punctuation">.</span>baidu_search<span class="token punctuation">(</span><span class="token punctuation">)</span>
    <span class="token keyword">for</span> resultIndex <span class="token keyword">in</span> <span class="token builtin">range</span><span class="token punctuation">(</span><span class="token builtin">len</span><span class="token punctuation">(</span>listdata<span class="token punctuation">)</span><span class="token punctuation">)</span><span class="token punctuation">:</span>  <span class="token comment"># 提取返回的结果</span>
        resultlist<span class="token operator">=</span>listdata<span class="token punctuation">[</span>resultIndex<span class="token punctuation">]</span><span class="token punctuation">[</span><span class="token string">"results"</span><span class="token punctuation">]</span>
        <span class="token keyword">for</span> pIndex <span class="token keyword">in</span>  <span class="token builtin">range</span><span class="token punctuation">(</span><span class="token builtin">len</span><span class="token punctuation">(</span>resultlist<span class="token punctuation">)</span><span class="token punctuation">)</span><span class="token punctuation">:</span>
             writer<span class="token punctuation">.</span>writerow<span class="token punctuation">(</span><span class="token builtin">list</span><span class="token punctuation">(</span>resultlist<span class="token punctuation">[</span>pIndex<span class="token punctuation">]</span><span class="token punctuation">.</span>values<span class="token punctuation">(</span><span class="token punctuation">)</span><span class="token punctuation">)</span><span class="token punctuation">)</span>
    <span class="token comment"># time.sleep(1)  # 休眠1秒</span>

doc<span class="token punctuation">.</span>close<span class="token punctuation">(</span><span class="token punctuation">)</span>
end_time <span class="token operator">=</span> time<span class="token punctuation">.</span>time<span class="token punctuation">(</span><span class="token punctuation">)</span>
<span class="token keyword">print</span><span class="token punctuation">(</span><span class="token string">"学校爬取完毕,用时%.2f秒"</span> <span class="token operator">%</span> <span class="token punctuation">(</span>end_time <span class="token operator">-</span> start_time<span class="token punctuation">)</span><span class="token punctuation">)</span>
  • 1
  • 2
  • 3
  • 4
  • 5
  • 6
  • 7
  • 8
  • 9
  • 10
  • 11
  • 12
  • 13
  • 14
  • 15
  • 16
  • 17
  • 18
  • 19
  • 20
  • 21
  • 22
  • 23
  • 24
  • 25
  • 26
  • 27
  • 28
  • 29
  • 30
  • 31
  • 32
  • 33
  • 34
  • 35
  • 36
  • 37
  • 38
  • 39
  • 40
  • 41
  • 42
  • 43
  • 44
  • 45
  • 46
  • 47
  • 48
  • 49
  • 50
  • 51
  • 52
  • 53
  • 54
  • 55
  • 56
  • 57
  • 58
  • 59
  • 60
  • 61
  • 62
  • 63
  • 64
  • 65
  • 66
  • 67
  • 68
  • 69
  • 70
  • 71
  • 72
  • 73
  • 74
  • 75
  • 76
  • 77
  • 78
  • 79
  • 80
  • 81
  • 82
  • 83
  • 84
  • 85
  • 86
  • 87
  • 88
  • 89
  • 90
  • 91
  • 92
  • 93
  • 94
  • 95
  • 96
  • 97
  • 98
  • 99
  • 100
  • 101
  • 102
  • 103
  • 104
  • 105
  • 106
  • 107
  • 108
  • 109
  • 110
  • 111
  • 112
  • 113
  • 114
  • 115
  • 116
  • 117
  • 118
  • 119
  • 120
  • 121
  • 122
  • 123
  • 124
  • 125
  • 126
  • 127

示例二

# 提取城市的POI点信息并将其保存至CSV
import csv
import string
import urllib
import json
from urllib.parse import quote

left_bottom = [120.89,31.83]; # 设置区域左下角坐标(百度坐标系)
right_top = [121.40,32.47]; # 设置区域右上角坐标(百度坐标系)
part_n = 8; # 设置区域网格(2*2)
url0 = ‘http://api.map.baidu.com/place/v2/search?’;
x_item = (right_top[0]-left_bottom[0])/part_n;
y_item = (right_top[1]-left_bottom[1])/part_n;
query = ‘学校’; #搜索关键词设置
ak = ‘OGTkT2pv********************mm9mek’; #百度地图api信令
n = 0; # 切片计数器

datacsv=open(“baidu.csv”, “a+”, encoding=“utf-8”);
csvwriter = csv.writer(datacsv, dialect=(“excel”))

for i in range(part_n):
for j in range(part_n):
left_bottom_part = [left_bottom[0]+ix_item,left_bottom[1]+jy_item]; # 切片的左下角坐标
right_top_part = [right_top[0]+ix_item,right_top[1]+jy_item]; # 切片的右上角坐标
for k in range(20):
url = url0 + ‘query=’ + query + ‘&page_size=20&page_num=’ + str(k) + ‘&scope=1&bounds=’ + str(left_bottom_part[1]) + ‘,’ + str(left_bottom_part[0]) + ‘,’+str(right_top_part[1]) + ‘,’ + str(right_top_part[0]) + ‘&output=json&ak=’ + ak;
s=quote(url, safe=string.printable)
data = urllib.request.urlopen(s);
hjson = json.loads(data.read().decode(‘utf-8’));
if hjson[‘message’] == ‘ok’:
results = hjson[‘results’];
for m in range(len(results)): # 提取返回的结果
csvwriter.writerow(list(results[m].values()))
n += 1;
print(‘第’,str(n),‘个切片入库成功’)

  • 1
  • 2
  • 3
  • 4
  • 5
  • 6
  • 7
  • 8
  • 9
  • 10
  • 11
  • 12
  • 13
  • 14
  • 15
  • 16
  • 17
  • 18
  • 19
  • 20
  • 21
  • 22
  • 23
  • 24
  • 25
  • 26
  • 27
  • 28
  • 29
  • 30
  • 31
  • 32
  • 33
  • 34
  • 35

如图:

在这里插入图片描述
在这里插入图片描述

                                </div><div><div></div></div>
            <link href="https://csdnimg.cn/release/phoenix/mdeditor/markdown_views-60ecaf1f42.css" rel="stylesheet">
                            </div>
        </article>
  • 0
    点赞
  • 1
    收藏
    觉得还不错? 一键收藏
  • 0
    评论

“相关推荐”对你有帮助么?

  • 非常没帮助
  • 没帮助
  • 一般
  • 有帮助
  • 非常有帮助
提交
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值