python爬取2018省市区导出excel

工作中需要用到省市区信息就搞了下!

import requests
from bs4 import BeautifulSoup
import time
import random
import xlsxwriter
global count
url ="http://www.stats.gov.cn/tjsj/tjbz/tjyqhdmhcxhfdm/2018/index.html"
#url ="http://www.stats.gov.cn/tjsj/tjbz/tjyqhdmhcxhfdm/2018/12/1201.html"
count=1
fin_result=[]
result =[]
tag_name = ['统计用区划代码','名称']
citytrHref=[]
countytrHref=[]
def demo(url,test,ok):
    global count
    response = requests.get(url)
    soup=BeautifulSoup(response.content.decode('gbk'),'lxml')
    all_provinces = soup.findAll(attrs={'class',test})
    href=''
    for tr in all_provinces:
        for td in tr.findAll('td'):  
            for all_province in td.findAll('a'):
                name=all_province.text
                href="http://www.stats.gov.cn/tjsj/tjbz/tjyqhdmhcxhfdm/2018/"+all_province['href']  
                if name.isdigit()==True or count==1:
                    if count==1:
                        result.append('000000000000')
                        fin_result.append(name)                
                    if count==0:
                        result.append(name)
                else:
                    fin_result.append(name)
                #fin_result['href'+all_province['href']]=href
                print(name)
                #print(href)
                time.sleep(random.random()*8)
            if count==1:
                citytrHref.append(href)
        if  ok=='isok':
            countytrHref.append(href)
    count=0
def save_excel(fin_result,result,tag_name,file_name):  # 将抓取到的信息存储到excel当中
    book = xlsxwriter.Workbook(r'C:\Users\m1769\Desktop\%s.xls' % file_name) # 默认存储在桌面上
    tmp = book.add_worksheet()
    row_num = len(fin_result)
    tag_pos = 'A%s' % 1
    tmp.write_row(tag_pos,tag_name)
    for i in range(2, row_num):
        con_pos ='A%s' % i
        con_pos1 ='B%s' % i
        tmp.write_column(con_pos,result)
        tmp.write_column(con_pos1,fin_result)
        break
    book.close()
if __name__ == '__main__':
    demo(url,'provincetr','')
    for i in range(0, len(citytrHref)):
        demo(citytrHref[i],'citytr','isok')
    for i in range(0, len(countytrHref)):
        demo(countytrHref[i],'countytr','')
    save_excel(fin_result,result,tag_name,'2018城市信息')
    print('导出完成')

©️2020 CSDN 皮肤主题: 大白 设计师: CSDN官方博客 返回首页
实付0元
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、C币套餐、付费专栏及课程。

余额充值