首先从国家统计局爬出2017年中国省市县乡4级行政区划
import requests
from bs4 import BeautifulSoup as bs
def get_rslt(abc):
base_url = 'http://www.stats.gov.cn/tjsj/tjbz/tjyqhdmhcxhfdm/2017/'
url = base_url + abc
kv = {'user-agent': 'Mozilla/5.0'}
r = requests.get(url, headers=kv)
r.raise_for_status()
r.encoding = r.apparent_encoding
soup = bs(r.text,'lxml')
rslt = []
for i in soup.find_all(['a']):
rslt.append([i['href'],i['href'][:2],i.contents[0]])
return rslt[:-1]
province_html = []
province_dic = {}
a = get_rslt('index.html')
for i in a:
province_html.append(i[0])
province_dic[i[1]] = i[2]
city_html = []
city_dic = {}
for i in province_html:
#for i in ['34.html']:
x = i.split('.')[0]
a = get_rslt(i)
b = []
c = []
for j in a:
b.append(j[0])
c.append(j[2])
d = [b[i] for i in range(len(b)) if i%2]
e = [c[i] for i in range(len(c)) if i%2]
f = [c[i] for i in range((len(c))) if (i+1)%2]
g = {}
for j in range(len(e)):
g[f[j][:4]] = e[j]
city_dic[x] = g
for i in d:
city_html.append(i)
county_html = []
county_dic = {}
for i in city_html:
x = i[3:7]
a = get_rslt(i)
b = []
c = []
for j in a:
b.append(j[0])
c.append(j[2])
d = [b[i] for i in range(len(b)) if i%2]
e = [c[i] for i in range(len(c)) if i%2]
f = [c[i] for i in range((len(c))) if (i+1)%2]
g = {}
for j in range(len(e)):
g[f[j][:6]] = e[j]
county_dic[x] = g
for i in d:
county_html.append(i)
town_dic = {}
def abc(htmllist):
dic = {}
for i in htmllist:
j = i[3:5]+'/'+i
x = i[3:9]
print(x,j)
a = get_rslt(j)
c = []
for j in a:
c.append(j[2])
e = [c[i] for i in range(len(c)) if i%2]
f = [c[i] for i in range((len(c))) if not i%2]
g = {}
for j in range(len(e)):
g[f[j][:9]] = e[j]
dic[x] = g
return dic
county_html1 = county_html[:500]
county_html2 = county_html[500:1000]
county_html3 = county_html[1000:1500]
county_html4 = county_html[1500:2000]
county_html5 = county_html[2000:2500]
county_html6 = county_html[2500:]
town1 = abc(county_html1)
town2 = abc(county_html2)
town3 = abc(county_html3)
town4 = abc(county_html4)
town5 = abc(county_html5)
town6 = abc(county_html6)
结果保存在a.py中,过程略过。 因为乡镇太多,爬起来有点卡,所以分了5段爬。
#/usr/bin/env python
#coding: utf-8
from PyQt5.QtGui import *
from PyQt5.QtCore import *
from PyQt5.QtWidgets import *
from PyQt5 import QtWidgets
import a
import sys
class Address(QWidget):
def __init__(self):
super(Address,self).__init__()
self.province_list = a.province_dic
self.city_list = a.city_dic
self.county_list = a.county_dic
self.town_list = a.town_dic
self.set_ui()
def set_ui(self):
self.province_label = QLabel("province")
self.province = QComboBox()
self.city_label = QLabel("city")
self.city = QComboBox()
self.county_label = QLabel("county")
self.county = QComboBox()
self.town_label = QLabel("town")
self.town = QComboBox()
self.id_label = QLabel('id')
self.id = QLineEdit()
self.location_label = QLabel("address")
self.location = QLineEdit()
self.province.currentTextChanged.connect(self.choose_city)
self.city.currentTextChanged.connect(self.choose_county)
self.county.currentTextChanged.connect(self.choose_town)
self.town.currentTextChanged.connect(self.show_code)
for i, j in self.province_list.items():
self.province.addItem(j,QVariant(i))
self.layout = QGridLayout()
self.toplayout = QVBoxLayout()
self.layout.addWidget(self.province_label,0,0)
self.layout.addWidget(self.province,0,1)
self.layout.addWidget(self.city_label,0,2)
self.layout.addWidget(self.city,0,3)
self.layout.addWidget(self.county_label,1,0)
self.layout.addWidget(self.county,1,1)
self.layout.addWidget(self.town_label,1,2)
self.layout.addWidget(self.town,1,3)
self.layout.addWidget(self.id_label,2,0)
self.layout.addWidget(self.id,2,1,1,3)
self.layout.addWidget(self.location_label,3,0)
self.layout.addWidget(self.location,3,1,1,3)
self.a = QWidget()
self.a.setLayout(self.layout)
self.toplayout.addWidget(self.a)
# self.toplayout.addWidget(self.id)
self.setLayout(self.toplayout)
def choose_city(self):
province_id = self.province.currentData()
self.city.clear()
self.county.clear()
self.town.clear()
for i, j in self.city_list[province_id].items():
self.city.addItem(j, QVariant(i))
def choose_county(self):
city_id = self.city.currentData()
self.county.clear()
if city_id:
for i, j in self.county_list[city_id].items():
self.county.addItem(j, QVariant(i))
def choose_town(self):
self.town.clear()
county_id = self.county.currentData()
if county_id:
for i, j in self.town_list[county_id].items():
self.town.addItem(j, QVariant(i))
def show_code(self):
self.id.setText(self.town.currentData())
address_detail = self.province.currentText() + self.city.currentText() + self.county.currentText() + self.town.currentText()
self.location.setText(address_detail)
if __name__ == "__main__":
app = QApplication(sys.argv)
mainwindow = Address()
mainwindow.show()
sys.exit(app.exec_())
其他代码引用直接调用import adress 创建类就可以了
from PyQt5.QtGui import *
from PyQt5.QtCore import *
from PyQt5.QtWidgets import *
from PyQt5 import QtWidgets
import sys
from address import *
class Regist(QWidget):
def __init__(self):
super(Regist,self).__init__()
self.set_ui()
def set_ui(self):
self.layout = QVBoxLayout()
a = Address()
a.location_label.setText("location_a")
self.address_a = a.location
b = Address()
b.location_label.setText("location_b")
self.address_b = b.location
self.commit_bnt = QPushButton('sumbit')
self.commit_bnt.clicked.connect(self.submit)
self.layout.addWidget(a)
self.layout.addWidget(b)
self.layout.addWidget(self.commit_bnt)
self.setLayout(self.layout)
def loc_b(self):
pass
def submit(self):
pass
if __name__ == "__main__":
app = QApplication(sys.argv)
mainwindow = Regist()
mainwindow.show()
sys.exit(app.exec_())