[Python] 纯文本查看 复制代码#读取公司名称
path ="C:/Users/wxx/Desktop/公司名称.xlsx" #公司名称可以放在excel表格里
df = pd.read_excel(path)
df1 = df['公司名称'].values #获取公司名称这一列的值
print(df1)
ids = []
companys = []
for key in df1[0:]:
print(key) #公司名称
url = 'https://www.qichacha.com/search?key='+key
id, company = get_id(url)
ids.append(id)
companys.append(company)
time.sleep(3.44+random.random()) #在使用requests时必须适当控制爬取速度,防止被封,或者使用代{过}{滤}理IP
print(companys)
idsshape=([i for item in ids for i in item]) #不理解这一块的内容时请查看ids的类型为二维list就知道了
print(idsshape)
messages = [] #存储公司专利信息
for i in range(len(companys)):#data.shape[0]
company = companys[i]
id = idsshape[i]
url = 'https://www.qichacha.com/company_getinfos?unique='+id + '&companyname=' + company +'&p='+ '&tab=report'
message = get_message(url)
companyname = df1[i]
txtpath = 'C:/Users/wxx/Desktop/竞品公司/'+companyname+'竞品.txt'
with open(txtpath,"w") as f:
f.write('竞争公司'+'\t\t'+'融资状态'+'\t\t'+'市值'+'\t\t'+'日期'+'\t\t'+'地点'+'\t\t'+'公司名称'+'\t\t'+'业务介绍')
for j in range(len(message)):
number = jingpin()
number.name = message[j][0]
number.status = message[j][1]
number.date = message[j][2]
number.location = message[j][3]
number.introduction = message[j][4]
number.comname = message[j][5]
if number.status == 'IPO':
url1 = 'http://so.eastmoney.com/web/s?keyword='+number.name
page = get_shizhi(url1)
print(page)
number.value = page
elif number.status == '新三板':
url1 = 'http://so.eastmoney.com/web/s?keyword='+number.name
page = get_shizhi(url1)
print(page)
number.value = page
else:
number.value = '无'
try:
with open(txtpath,"a") as f:
f.write('\n'+number.name+'\t\t'+number.status+'\t\t'+str(number.value[0])+'\t\t'+number.date+'\t\t'+number.location+'\t\t'+number.comname+'\t\t'+number.introduction)
except UnicodeEncodeError: #这里使用try是因为爬取信息中出现了txt无法解析的字符,防止中断程序,非常有用
with open(txtpath,"a") as f:
f.write('\n'+number.name+'\t\t'+number.status+'\t\t'+str(number.value[0])+'\t\t'+number.date+'\t\t'+number.location+'\t\t'+number.comname)
print(companyname+'竞品公司收集完成!')
time.sleep(2.56+random.random())