首先建表
CREATE TABLE `base_company` (
`id` int(10) unsigned NOT NULL AUTO_INCREMENT COMMENT '自增id',
`company_id` int(11) unsigned DEFAULT NULL COMMENT '公司',
`country_id` int(11) unsigned DEFAULT NULL COMMENT '国家',
`company` varchar(255) DEFAULT NULL COMMENT '公司名称',
`company_code` varchar(255) DEFAULT NULL COMMENT '公司社会信用代码',
`apply_date` date DEFAULT '2001-01-01' COMMENT '默认''2001-01-01''',
`abort_apply_date` date DEFAULT '2001-01-01' COMMENT '默认''2001-01-01''',
PRIMARY KEY (`id`) USING BTREE,
UNIQUE KEY `company` (`company`)
) ENGINE=InnoDB AUTO_INCREMENT=133 DEFAULT CHARSET=utf8 ROW_FORMAT=COMPACT COMMENT='公司代码表';
pymysql操作数据库
import pymysql
from config.mysqlconfig import *
from config.import sql_cfg
s = sql_ac()
def sql_database(sql,datainfo=s):
"""
#数据库连接和执行sql语句
:param sql: 需要执行的sql语句
:param datainfo: 数据库连接信息 host、user、password、port、database
:return: 只有sql是查询语句的时候才会return查询出来的结果数据
"""
# print(datainfo) #查看连接信息
conn = pymysql.connect(host=datainfo['host'],user=datainfo['user'],password=datainfo['pwd'],port=datainfo['port'],database=datainfo['database'],charset='utf8')
cur = conn.cursor() #创建游标
#执行sql ---确定是什么sql --是查询还是删除还是修改还是插入
sql = sql.lower() #将字符串中所有字母都转换成小写
if sql.startswith('select') == True: #判断sql是否是查询操作
# print('执行查询操作!')
try:
cur.execute(sql)
data = cur.fetchall() #获取查询出来的数据
return data
except Exception as e:
print('你的sql出错了:{}'.format(e))
elif sql.startswith('delete') == True: #判断sql是否是删除操作
print('执行删除操作!')
try:
cur.execute(sql)
except Exception as e:
print('你的sql出错了:{}'.format(e))
else:
print('执行删除操作成功')
elif sql.startswith('update') == True: #判断sql是否是修改数据操作
print('执行修改数据操作')
try:
cur.execute(sql)
except Exception as e:
print('你的sql出错了:{}'.format(e))
else:
print('数据修改成功')
elif sql.startswith('insert') == True: #判断sql是否是插入数据操作
print('执行插入数据操作')
try:
cur.execute(sql)
except Exception as e:
print('你的sql出错了:{}'.format(e))
else:
print('数据插入成功')
conn.commit()#使用pymysql操作数据库时,增删改与查询是有区别的,在增删改操作时一定要记得conn.commit(),提交当前事务。
cur.close() #关闭游标
conn.close()#关闭数据库连接
#本地环境数据库
s1 = sql_accsit()
def sql_databasesit(sql, datainfo=s1):
"""
#数据库连接和执行sql语句
:param sql: 需要执行的sql语句
:param datainfo: 数据库连接信息 host、user、password、port、database
:return: 只有sql是查询语句的时候才会return查询出来的结果数据
"""
print(datainfo)
conn = pymysql.connect(host=datainfo['host'], user=datainfo['user'], password=datainfo['pwd'],
port=datainfo['port'], database=datainfo['database'], charset='utf8')
cur = conn.cursor() # 创建游标
# 执行sql ---确定是什么sql --是查询还是删除还是修改还是插入
sql = sql.lower() # 将字符串中所有字母都转换成小写
if sql.startswith('select') == True: # 判断sql是否是查询操作
print('执行查询操作!')
try:
cur.execute(sql)
data = cur.fetchall() # 获取查询出来的数据
return data
except Exception as e:
print('你的sql出错了:{}'.format(e))
elif sql.startswith('delete') == True: # 判断sql是否是删除操作
print('执行删除操作!')
try:
cur.execute(sql)
except Exception as e:
print('你的sql出错了:{}'.format(e))
else:
print('执行删除操作成功')
elif sql.startswith('update') == True: # 判断sql是否是修改数据操作
print('执行修改数据操作')
try:
cur.execute(sql)
except Exception as e:
print('你的sql出错了:{}'.format(e))
else:
print('数据修改成功')
elif sql.startswith('insert') == True: # 判断sql是否是插入数据操作
print('执行插入数据操作')
try:
cur.execute(sql)
except Exception as e:
print('你的sql出错了:{}'.format(e))
else:
print('数据插入成功')
conn.commit() # 使用pymysql操作数据库时,增删改与查询是有区别的,在增删改操作时一定要记得conn.commit(),提交当前事务。
cur.close() # 关闭游标
conn.close() # 关闭数据库连接
代码如下
import re
import time
import requests
import selenium.webdriver.chrome
from selenium.webdriver.common.by import By
from account.config.ac_config.mysqlconfig import *
from account.common.acpymysql import *
s=sql_sit()
from faker import Faker
#初始化,设置locale为中文;默认是英文
fake = Faker(locale = 'zh_CN')
city=fake.city()
# url="https://www.tianyancha.com/search?key=%E6%B9%96%E5%8D%97%E4%BA%92%E8%81%94%E7%BD%91&sessionNo=1658236710.92335029"
url='https://www.tianyancha.com/search?key={}互联网&sessionNo=1666322342.06145143&pageNum=4'.format(city)
header={
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/70.0.3538.25 Safari/537.36 Core/1.70.3883.400 QQBrowser/10.8.4559.400',
}
resp=requests.get(url,headers=header,verify=True)
page_content=resp.text
obj=re.compile(r'<div class="index_name__qEdWi">.*?<span><em>(?P<name>.*?)</span>',re.S)
# obj=re.compile(r'<div class="index_name__qEdWi">.*?<span><em>(?P<name>.*?)</em>(?P<sname>.*?)<em>(?P<hname>.*?)</em>(?P<cname>.*?)</span>',re.S)
result=obj.finditer(page_content)
listcom=[]
for it in result:
# print(it.group("name"))
name=it.group('name')
# print(
# str(name.split('</em>')).split('<em>')
# )
# print(it.group("name")+it.group("sname")+it.group("hname")+it.group("cname"))
# print(str(name.split('</em>')).split('<em>'))
res = re.findall('[\u4e00-\u9fa5]', name) #u4e00-u9fa5 (中文) u0800-u4e00 (日文) uac00-ud7ff(韩文)
# print(res)
a=''.join(res)
# print(''.join(res))
print(a)
listcom.append(a)
# 获取表最大的id,插入数据库需要id
print(listcom)
print(len(listcom))
sqlmaxid='SELECT MAX(id) FROM base_company;'
# 假如没有数据,先插入一条数据
try:
k=int(sql_database(sqlmaxid,s)[0][0])
print(k)
except:
sqlinsetid="INSERT INTO base_company (id,company) VALUES (1,'武汉市友奇人力资源有限公司');"
print(sqlinsetid)
sql_database(sqlinsetid, datainfo=s)
try:
# 把获取到的公司名称插入到数据库
for i in range(len(listcom)):
i += 1
k += 1
print(i)
sql = 'insert into base_company (id,company)values({},\'{}\') '.format(k, listcom[i - 1]) # 加入引号插入数据库
print(sql)
sql_database(sql, datainfo=s)
except:
print('插入数据库报错了')
#通过公司名称查询公司,获取到信用代码
url_tyc='https://www.tianyancha.com/'
header={
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/70.0.3538.25 Safari/537.36 Core/1.70.3883.400 QQBrowser/10.8.4559.400',
}
sqlc='SELECT company from base_company WHERE company_code is NULL;'
company=list(sql_database(sqlc,s))
print('公司一共有{}'.format(company))
def companycodedef(companyname):
driver = selenium.webdriver.Chrome()
driver.get(url_tyc)
driver.maximize_window()
# driver.set_window_size('10','1')
driver.implicitly_wait(30)
driver.find_element(By.CSS_SELECTOR,'#page-container > div:nth-child(1)> div > div.container.text-center > div:nth-child(2)> div:nth-child(2)> div > input').send_keys(companyname)
driver.find_element(By.CSS_SELECTOR,
'#page-container > div:nth-child(1)> div > div.container.text-center > div:nth-child(2)> div:nth-child(2)> button > span').click()
driver.find_element(By.XPATH,
'/html/body/div[1]/div[2]/div/div[2]/section/main/div[2]/div[2]/div[1]/div/div[2]/div[2]/div[1]/div[1]/a/span/em').click()
hd = driver.window_handles # 获取当前浏览器上的所有句柄
driver.switch_to.window(hd[-1])
page = driver.page_source
# ","taxNumber":" ","regCapitalCurrency":"人民币"
obj = re.compile(r'"creditCode":"(?P<name>.*?)","fromTime":', re.S)
result = obj.finditer(page)
time.sleep(4)
# print(page)
print(result)
for it in result:
# print(it.group("name"))
name = it.group('name')
print(name)
name=name.upper()
# 'UPDATE base_company SET company_code=123 WHERE company='宜昌市瀚益投资开发有限公司'; '
sql = "UPDATE base_company SET company_code='{}' WHERE company='{}'; ".format(name, companyname) # 加入引号插入数据库
print(sql)
sql_database(sql, datainfo=s)
# res = re.findall(name)
# print(res)
# print(resp.text)
driver.quit()
try:
# 把所有公司都去拉取信用代码
print(company)
lista=[]
for i in range(len(company)):
popc=company.pop(0)
print(popc)
companyname=list(popc)[0]
companycodedef(companyname)
lista.append(list(popc))
print(lista)
except:
print('报错了,可能公司数据数据为None') #把公司为null或者错误数据删除
sqldel="delete from base_company WHERE company not LIKE '%公司%' OR company is NULL;"
print(sqldel)
sql_database(sqldel, datainfo=s)
# 社会信用代码为空
# def