在python中可以运行
我部署上tomcat后发现运行不了,输出下看是在哪里出了问题
只输出了 1, 2和3没有输出,说明
stop = [line.strip() for line in open('stopwords.txt', encoding='gbk').readlines()] 出了问题 ,这次代码主要就是读取stopwords.txt里的内容,所以可能是这里出了问题,但是python里能运行,上了服务器就不行,我想了下应该是路径出了问题,stopwords.txt在tomcat里的路径是E:\DeveloperKits\apache-tomcat-8.0.53\webapps\jobAnalysis\WEB-INF\classes
我把路径设置为tomcat下的路径
再运行发现可以了
import jieba
import jieba.analyse
import matplotlib.pyplot as plt
from pandas import pandas as pd
from pymongo import MongoClient
from pyecharts import WordCloud
import os
#jieba.initialize()
#上线后的路径
env_dist = os.environ
tomcatpath = env_dist.get('CATALINA_HOME')#环境变量
#停用词路径
stop_txt_path = tomcatpath + os.path.sep + 'webapps' +os.path.sep+ 'jobAnalysis' +os.path.sep+ 'WEB-INF'+os.path.sep+'classes'+os.path.sep
#文件保存路径
tomcatpath = tomcatpath + os.path.sep + 'webapps' +os.path.sep+ 'jobAnalysis' +os.path.sep+ 'results' +os.path.sep
print(tomcatpath)
print(stop_txt_path)
'''
数据清洗
'''
def dataFilter( df ):
del df["_id"]
#剔除实习岗位
df.drop(df[df['positionName'].str.contains('实习')].index,inplace=True)
avgSalarys=[]
for s in df['salary']:
if s.find('以上')>=0:
avgSalary = int(s[0:s.index('k')] + "000")
else:
arrs = s.split("-")
highSalary = 0
lowStr = arrs[0] # 15k-20k arrs[0]表示 - 前面的 15k
lowSalary = int(lowStr[0:lowStr.find("k")] + "000")
highStr = arrs[1]
highSalary=int(highStr[0:highStr.find("k")]+"000")
avgSalary=(highSalary+lowSalary)/2
avgSalarys.append(avgSalary)
df['salary'] = avgSalarys
return df
'''
绘制薪资直方图
'''
def showPlot_hist(city, position, citypinying, df):
# 全局设置中文
plt.rcParams['font.sans-serif'] = ['SimHei']
plt.rcParams['axes.unicode_minus'] = False
plt.hist(df['salary'])
plt.xlabel('salary')
plt.ylabel('num')
plt.title(citypinying+'_'+position+'_salary hist')
plt.savefig(tomcatpath + citypinying+'_'+position+'_salary.jpg')
plt.close()
print('salary success')
'''
地区分布饼图
'''
def showPlot_pie(city,position,citypinying,df):
from pyecharts import Pie
pie = Pie(city+'招聘'+position+'岗位公司分布情况分析',title_pos='bottom',width=800)
districts={}
for item in df['district']:
if item in districts:
districts[item]=districts[item]+1
else:
districts[item]=1
pie.add('公司分布',districts.keys(),districts.values(),center=[25,50],is_random=True,radius=[30,75],rosetype='radius')
pie.render(tomcatpath + citypinying+"_"+position+"_pie.html")
print('districts success')
'''
学历要求
'''
def showPlot_bar(city,position,citypinying,df):
dicts ={}
for i in df['education']:
if i not in dicts:
dicts[i]=0
else:
dicts[i]+=1
index = list(dicts.keys())
num=[]
for i in index:
num.append(dicts[i])
#overlap 多图叠加
from pyecharts import Bar
bar = Bar('学历要求')
bar.add('学历要求',index,num,mark_line=['min','max'])
bar.render(tomcatpath + citypinying+'_'+position+'_bar.html')
print('education success')
'''
福利待遇词云
'''
def showWordCloud(city,position,citypinying,df):
text =''
for line in df['positionAdvantage']:
text += line+'\n'
print('1')
stop = [line.strip() for line in open(stop_txt_path+'stopwords.txt','r',encoding='gbk').readlines()]
print('2')
cut_text = jieba.cut(text)
# 去掉无意的词
l=[]
for ct in cut_text:
if ct not in stop:
l.append(ct)
cut_text=l
# 统计出现次数
dicts ={}
for i in cut_text:
if i not in dicts:
dicts[i]=1
else:
dicts[i]+=1
index = list(dicts.keys())
num = []
for i in index:
num.append(dicts[i])
word_cloud = WordCloud(width=1700,height=1000)
word_cloud.add('',index,num,word_size_range=[15,200],shape='cardioid')
word_cloud.render(tomcatpath + citypinying+'_'+position+'_wordcloud.html')
print('wordcloud success')
'''
从mongoz中读取数据
'''
def readMongoData(position,citypinying):
client = MongoClient('127.0.0.1',27017)
db = client['lagou']
collection = db[citypinying+"_"+position]
results = collection.find()
data = pd.DataFrame(list(results))
return data
import sys
if __name__=="__main__":
city="上海"
position = "python"
citypinying = "shanghai"
if len(sys.argv)>2:
city=sys.argv[1]
position=sys.argv[2]
citypinying=sys.argv[3]
df = readMongoData(position,citypinying)
df = dataFilter(df)
showPlot_hist(city, position, citypinying, df)
showPlot_pie(city, position, citypinying, df)
showPlot_bar(city, position, citypinying, df)
showWordCloud(city, position, citypinying, df)