百度地图信息采集器-----含UI
界面展示:
数据展示:
技术点:
- requests — 用于对页面发起请求
- threading — 线程库 解决了页面爬取的速度
- os — 创建文件以及打开指定文件
- xlwt — 将数据写入excel 表中
- pyqt5 — 解决了界面的UI需求
- pyinstaller — 进行打包成exe程序
UI界面部分代码:
class FormLayout(QtWidgets.QWidget):
def __init__(self):
QtWidgets.QWidget.__init__(self)
# 开放右键策略
self.setContextMenuPolicy(Qt.CustomContextMenu)
self.customContextMenuRequested.connect(self.rightMenuShow)
# 设置标题
self.setWindowTitle('百度地图采集')
self.setWindowIcon(QIcon('bitbug_favicon.ico'))
self.resize(1100, 770)
# 选择框
self.province = QComboBox() # 省份
self.district = QComboBox() # 地区
self.page_num = QLineEdit() # 页码
self.page_num.resize(20, 10)
self.loginBtn = QPushButton("选择存储路径")
self.edit = QLineEdit()
self.edit.resize(20, 10)
# 展示框
self.save_Btn = QPushButton("打开存储文件")
self.start_Btn = QPushButton("开始采集")
self.model = QStandardItemModel(0, 3)
# 设置水平方向四个头标签文本内容
self.model.setHorizontalHeaderLabels(['标题', '地址', '手机号'])
self.tableView = QTableView()
self.tableView.setModel(self.model)
self.tableView.horizontalHeader().setStretchLastSection(True)
# #水平方向,表格大小拓展到适当的尺寸
self.tableView.horizontalHeader().setSectionResizeMode(QHeaderView.Stretch)
# 添加表单布局
self.gridlayout1 = QFormLayout()
# gridlayout2 = QFormLayout()
# self.gridlayout3 = QFormLayout()
self.gridlayout4 = QFormLayout()
self.gridlayout8 = QFormLayout()
self.gridlayout9 = QFormLayout()
# 展示框
self.gridlayout10 = QFormLayout()
self.gridlayout11 = QFormLayout()
# 添加水平布局
self.hbox = QHBoxLayout()
# 添加垂直布局
self.vbox = QHBoxLayout()
# 添加布局
self.vlayout = QVBoxLayout() # 将QVBoxLayout改成QHBoxLayout可以改变hbox和vbox的布局从垂直布局到水平布局
self.loginBtn.clicked.connect(self.msg) # 提交按钮
self.save_Btn.clicked.connect(lambda: self.processing_data()) # 保存按钮
self.start_Btn.clicked.connect(self.start_login) # 保存按钮
self.initui()
self.thread = None # 初始化线程
def initui(self):
# 设置参数
self.province.addItems(
['全国', '热门城市', '安徽', '福建', '广东', '广西', '贵州', '甘肃', '海南', '河南', '黑龙江', '湖北', '湖南', '河北', '江苏', '江西', '吉林', '辽宁',
'宁夏', '内蒙古', '青海', '山东', '山西', '陕西', '四川', '新疆', '西藏', '云南', '浙江', '澳门特别行政区', '香港特别行政区'])
# self.district.addItems(self.province_list['全国'])
self.gridlayout1.addRow("省份", self.province)
self.gridlayout4.addRow("采集的关键词", self.page_num)
self.gridlayout8.addRow(self.loginBtn) #
self.gridlayout9.addRow(self.edit)
self.gridlayout10.addRow(self.save_Btn)
self.gridlayout11.addRow(self.start_Btn)
# 横向布局添加
self.hbox.addLayout(self.gridlayout1)
# self.hbox.addLayout(self.gridlayout3)
self.hbox.addLayout(self.gridlayout4)
self.hbox.addLayout(self.gridlayout8)
self.hbox.addLayout(self.gridlayout9)
# 垂直布局
self.vbox.addLayout(self.gridlayout11)
self.vbox.addLayout(self.gridlayout10)
# 总布局
# self.vlayout.addStretch(1)
self.vlayout.addLayout(self.hbox)
self.vlayout.addWidget(self.tableView)
self.vlayout.addLayout(self.vbox)
self.setLayout(self.vlayout)
# 提示
QMessageBox.about(self, "声明:", "此软件为交流学习,不做任何商业牟利行为...")
def msg(self):
"""选择文件夹"""
global path_
directory1 = QFileDialog.getExistingDirectory(self, "选择文件夹", "/")
self.edit.setText(directory1)
path_ = directory1
代码部分:
class Hot_City_Coordinates():
"""获取热门城市"""
def __init__(self):
self.headers = {
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/84.0.4147.105 Safari/537.36',
'Cookie': 'BIDUPSID=708E39B6F293AF8D7B18BD51D6E15C42; PSTM=1596811009; BAIDUID=708E39B6F293AF8D297129216C3D6397:FG=1; BDORZ=B490B5EBF6F3CD402E515D22BCDA1598; MCITY=-%3A; delPer=0; PSINO=5; H_PS_PSSID=32294_1456_32300_32380_32327_31660_32351_32046_32398_32116_32525_32482_22160',
}
self.content_list = [] # 存储市数据
self.content_list_ui = [] # 存储UI界面数据
def initialize(self, path_):
"""
初始化目录
"""
try:
if os.path.exists(path_): # 目录存在,返回为真
return path_
else:
os.mkdir(path_)
return path_
except Exception as e:
print(e)
def hot_city(self,map_pro, content , _path):
"""获取热门城市"""
country_url = 'https://map.baidu.com/?newmap=1&reqflag=pcmap&biz=1&from=webmap&da_par=after_baidu&pcevaname=pc4.1&qt=s&da_src=searchBox.button&wd=' + str(
content) + '&c=1&src=0&wd2=&pn=0&sug=0&l=5&b=(8661331.22,1425243.879999999;14551379.22,7913307.879999999)&from=webmap&biz_forward={%22scaler%22:2,%22styles%22:%22pl%22}&sug_forward=&auth=vQPQf98AKyG0bHBMP8U5%3DyJ31HF0NUV5uxHTNVEERVRtxjhNwzWWvy1uVt1GgvPUDZYOYIZuVtcvY1SGpuEt2gz4yYxGccZcuVtPWv3GuBRtal73hJUvhgMZSguxzBEHLNRTVtcEWe1GD8zv7u%40ZPuzztgwBzvf0wd0vyIUO7AMSFOukoPPB16A2mmiJLL'
print(country_url)
response = requests.get(country_url, headers=self.headers, verify=False)
json_con = response.json()
# 获取所有市的坐标并进行切割拼接
split_result = Provinces_cities_coordinates.split('\n')
city_dict = {x.split(' ')[1]: x.split(' ')[0] for x in split_result[1:]}
print(city_dict)
mkdir_path = _path + '/' + map_pro + '-' + content
province_path = self.initialize(mkdir_path)
if map_pro == '热门城市':
for x in json_con['content']:
hot_city_code = x['code']
AUTH = json_con['result']['auth']
hot_city_geo = x['geo']
hot_city_name = x['name']
city_ur1 = f'https://map.baidu.com/?newmap=1&reqflag=pcmap&biz=1&from=webmap&da_par=after_baidu&pcevaname=pc4.1&qt=spot&from=webmap&c={str(hot_city_code)}&wd={content}&wd2=&pn=0&nn=0&db=0&sug=0&addr=0&&da_src=pcmappg.poi.page&on_gel=1&src=7&gr=3&l=5&rn=50&tn=B_NORMAL_MAP&auth={AUTH}&ie=utf-8&b=({hot_city_geo})'
print('url', city_ur1)
t = threading.Thread(target=self.get_city_data,
args=(city_ur1, hot_city_name, map_pro, province_path, content))
t.start()
def get_city_data(self, city_ur1, city_name, map_pro, _path, content):
"""获取市的数据"""
city_all = requests.get(city_ur1, headers=self.headers)
city_json = city_all.json()
# 获取数据每个市的数据
try:
for city in city_json['content']:
content_dict = {}
content_dict['middle_name'] = city['name'] # 数据名字
content_dict['middle_address'] = city['addr'] # 数据地址
try:
content_dict['middle_tel'] = city['tel'] # 数据电话
except Exception as e:
content_dict['middle_tel'] = '暂无电话'
self.content_list.append(content_dict)
self.content_list_ui.append(content_dict)
print(city_name, '开始存入')
# 调用保存数据
t = threading.Thread(target=self.get_save, args=(map_pro, city_name, content, _path))
t.start()
self.content_list.clear()
except Exception as e:
print(e)
# print("最后一次打印")
关于程序打包
-
下载
pip install pyinstaller
-
cd 进入要打包的文件路径
-
输入:
pyinstaller -F 你的文件名路径.py
- -i 给应用程序添加图标
- -F 指定打包后只生成一个exe格式的文件
如想要加程序图标:pyinstaller -F -i ./你的图片.ico 你的文件.py
如果打包失败弹出下面的错误可能应为你的PyQt5版本过高或过低将版本升至5.9.x版本就好
- 最后需要注意的是,需要将exe程序和图片放置一个路径里,才可以显示图片,exe程序在打包好的dist文件夹里面
至此 文章结束
欢迎来技术交流 若有爬虫需求也可以联系我
QQ:978662809