from tkinter import * # 导入窗口控件
import requests
from lxml import etree
import tkinter
from tkinter import ttk
from bs4 import BeautifulSoup
import webbrowser # 调用浏览器打开网页
from tkinter import messagebox # 弹出提示框
import ast
from openpyxl import Workbook
from pandas import DataFrame
import pandas as pd
#爬取排污许可证详细信息并写入EXCEL
def download_song():
url_id = "getxxgkContent&dataid=9339c790f8694f878b81e6fdc864be69"
datas = {"xkgk": "getxxgkContent",
"dataid": url_id}
url = "http://permit.mee.gov.cn/permitExt/xkgkAction!xkgk.action?xkgk=" + url_id
# for i in range(1): #获取前10页内容,此页面只有1面,取消循环
html = requests.get(url, headers=datas)
soup = BeautifulSoup(html.text, 'lxml')
name_id = soup.find_all('p', style="font-size:36px;")[0].text # 得到企业名称
name_add = soup.find_all('p', style="font-weight: bold;color: green;font-size: 14px;")[0].text # 得到企业地址等信息 ..strip() 属性删除空格
content=name_add
content = content.strip() # 删除字符串左边空格
content = content.split() # 拆分字符串,通过指定分隔符对字符串进行分割,默认是空格。rstrip("\xa0\xa0\xa0\xa0\r\n\t\t\t")
# content=content.partition(":")
str = ''.join(content)
u1, u2, u3, u4, u5 = str.split(':', 4)
f1 = u2.find('行业类别')
f2 = u2[0:f1]
g1 = u3.find('所在地区')
g2 = u3[0:g1]
h1 = u4.find('发证机关')
h2 = u4[0:h1]
wb = Workbook() # 创建文件对象
# grab the active worksheet
ws = wb.active # 获取第一个sheet
cell = ws["A1"]
cell.value = '生产经营场所地址'
cell = ws['A2']
cell.value = f2
cell = ws["B1"]
cell.value = '行业类别'
cell = ws['B2']
cell.value = g2
cell = ws["C1"]
cell.value = '所在地区'
cell = ws['C2']
cell.value = h2
cell = ws["D1"]
cell.value = '发证机关'
cell = ws['D2']
cell.value = u5
#wb.save("d:\\sample.xlsx")
pass
root = Tk() # 创建窗口
root.title("label-test")
root.geometry("200x200+600+300") # 小写x代表乘号500x400为窗口大小,+500+300窗口显示位置
button = Button(root, text="开始下载", font=('华文行楷', 20), command=download_song) # 创建按钮控件
button.grid(row=5, column=0, sticky=W) # 位置显示在2行,0列,对齐方式 W ,N,S,E
#button1 = Button(root, text="退出", font=('华文行楷', 10), command=root.quit) # 创建按钮控件
#button1.grid(row=5, column=1, sticky=W) # 位置显示在2行,0列,对齐方式 W ,N,S,E
root.mainloop() # 显示窗口 mainloop 消息循环
爬取排污许可证企业详细信息(测试一)
最新推荐文章于 2021-06-23 22:34:51 发布