问题描述:
执行以下代码构成的.py文件:
import numpy as np
import pandas as pd
from gensim.corpora import Dictionary
from gensim.models import CoherenceModel
def CalcTopicsCoherence(topics, texts, dictionary, corpus):
def TimeInterval(oldtime,title=0):
import time
newtime=time.time()
if oldtime!=0: print('runtime:%.2f '%(newtime-oldtime),'epoch:',title)
return newtime
oldtime=TimeInterval(0,0)
cm = CoherenceModel(topics=topics, texts=texts, corpus=corpus, dictionary=dictionary, coherence='u_mass')
um_per_topic=cm.get_coherence_per_topic()
oldtime=TimeInterval(oldtime,1)
cm = CoherenceModel(topics=topics, texts=texts, corpus=corpus, dictionary=dictionary, coherence='c_v')
cv_per_topic=cm.get_coherence_per_topic()
oldtime=TimeInterval(oldtime,2)
cm = CoherenceModel(topics=topics, texts=texts, corpus=corpus, dictionary=dictionary, coherence='c_npmi')
npmi_per_topic=cm.get_coherence_per_topic()
oldtime=TimeInterval(oldtime,3)
df=pd.DataFrame({'um_per_topic':um_per_topic, 'cv_per_topic':cv_per_topic, 'npmi_per_topic':npmi_per_topic})
print(df.corr(method='pearson'))
print(um_per_topic,'\r\n',cv_per_topic,'\r\n',npmi_per_topic,'\r\n','-'*50)
return (um_per_topic,cv_per_topic,npmi_per_topic)
texts_oringinal=[['喜欢','电视机','声音','语音','简单' ],
['特别','色彩','投屏','好用','海信' ],
['性价比','高','流畅','挺快','正好' ],
['屏幕','音效','价格','实惠','音质' ],
['送货','太','到货','时间','一个月' ],
['遥控器','手机','想','下载','小孩' ],
['电视机','小米','家里','态度','简单' ],
['单','到货','做工','功能强大','精细' ],
['速度','运行','清晰','声音','流畅' ],
['速度','运行','画质','系统','售后服务' ],
['海信','品牌','值得','购买','信赖' ],
['东西','便宜','推荐','海信','真的' ],
['京东','挺','质量','物流','服务' ],
['挺','55','寸','大气','购买' ],
['性价比','高','寸','55','画面' ],
['性价比','高','感觉','客厅','语音' ],
['品牌','值得','没','信赖','下单' ],
['功能','合适','55','寸','漂亮' ],
['清晰','画面','没','服务态度','舒服' ],
['语音','识别','手机','海信','想' ],
['安装','快','师傅','物流','齐全' ],
['京东','清晰','55','寸','大气' ],
['京东','快','特别','物流','语音' ],
['买','海信','满意','品牌','值得' ],
['特别','合适','色彩','到货','还好' ],
['海信','品牌','大气','值得','挺' ],
['音质','遥控器','超值','边框','手机' ],
['送货','太','发货','时间','海信' ],
['特别','喜欢','电视机','小米','快递' ],
['东西','实惠','推荐','真的','海信' ],
['海信','品牌','合适','55','寸' ],
['清晰','特别','喜欢','画面','下单' ],
['速度','运行','功能','特别','合适' ],
['电视','质量','大气','小米','东西' ],
['电视','不错','买','满意','家里' ],
['安装','速度','运行','京东','清晰' ],
['价格','实惠','挺快','好用','海信' ],
['喜欢','感觉','语音','音质','画质' ],
['安装','运行','师傅','性价比','高' ],
['下单','音质','看着','速度快','舒服' ],
['不错','看着','操作','上网','蛮高' ],
['京东','售后服务','安装费','周到','太贵' ],
['合适','漂亮','同步','海信','师傅' ],
['快递','语音','赞','小哥','识别' ],
['不错','看着','美观','中','送' ],
['服务态度','快递','赞','操作','小哥' ],
['速度','运行','买','清晰','满意' ],
['东西','实惠','便宜','挺快','支持' ],
['买','满意','十二','双十','一买' ],
['挺','美观','中','试用','海信' ],
['买','海信','京东','满意','品牌' ],
['安装','快','到货','齐全','一个月' ],
['安装','快','挺','很快','送货' ],
['品牌','值得','没','信赖','送' ],
['舒服','海信','感觉','师傅','喜欢' ],
['品牌','值得','信赖','下单','速度快' ],
['安装','同步','快','师傅','服务' ],
['海信','品牌','送货','值得','服务态度' ],
['快递','赞','小哥','到货','一个月' ],
['送货','大气','高清','太','时间' ],
['画面','音质','操作','超值','上网' ],
['下单','看着','速度快','送到','小时' ],
['清晰','画质','系统','海信','同步' ],
['品牌','值得','没','信赖','拥有' ],
['价格','实惠','十二','双十','海信' ],
['服务','没','挺快','送','海信' ],
['京东','物流','支持','售后服务','安装费' ],
['性价比','高','正好','上网','海信' ],
['速度','运行','画质','支持','系统' ],
['电视','不错','清晰','很快','送货' ],
['电视','不错','快','很快','质量' ],
['物流','挺快','支持','国产','海信' ],
['性价比','高','大气','语音','高清' ],
['屏幕','音效','效果','功能','语音' ],
['画面','快递','舒服','操作','上网' ],
['速度','功能','很快','送货','合适' ],
['不错','清晰','画面','服务态度','便宜' ],
['清晰','挺','大气','画面','美观' ],
['电视','不错','清晰','质量','真的' ],
['外观','屏幕','外形','音效','效果' ],
['速度','运行','挺','音质','美观' ],
['京东','质量','服务','真的','小米' ],
['价格','小米','实惠','清晰度','做工' ],
['安装','快','齐全','海信','同步' ],
['服务态度','下单','速度快','送到','小时' ],
['外观','屏幕','外形','音效','尺寸' ],
['性价比','高','正好','遥控器','海信' ],
['功能','送货','合适','漂亮','太' ],
['价格','电视机','实惠','便宜','推荐' ],
['挺','大气','推荐','高清','超薄' ],
['功能','品牌','合适','值得','漂亮' ],
['买','满意','海信','喜欢','师傅' ],
['京东','服务','下单','速度快','送到' ],
['挺','大气','高清','超薄','朋友' ],
['买','满意','价格','实惠','好用' ],
['很快','送货','太','时间','海信' ],
['性价比','高','快递','赞','挺快' ],
['安装','不错','快','物流','服务' ],
['价格','电视机','清晰度','操作','海信' ],
['很快','发货','真的','海信','喜欢' ],
['特别','高','电视机','清晰度','色彩' ],
['没','东西','便宜','推荐','舒服' ],
['买','满意','电视机','清晰度','海信' ],
['买','满意','舒服','海信','喜欢' ],
['挺','画面','音质','超值','海信' ],
['买','满意','价格','海信','师傅' ],
['屏幕','音效','时尚','同步','海信' ],
['电视','大气','小米','高清','超薄' ],
['安装','服务','送到','同步','海信' ],
['安装','京东','服务','海信','同步' ],
['效果','功能','合适','漂亮','同步' ],
['快递','赞','小哥','送','海信' ],
['买','满意','性价比','高','挺快' ],
['清晰','很快','画面','服务态度','海信' ],
['电视','清晰','特别','质量','小米' ],
['不错','性价比','看着','海信','同步' ],
['京东','服务','送到','海信','师傅' ],
['速度','运行','系统','同步','海信' ],
['京东','很快','品牌','送货','物流' ],
['速度','运行','画质','系统','海信' ],
['电视','质量','小米','画质','海信' ],
['电视','速度','不错','运行','清晰' ],
['电视','质量','小米','画质','舒服' ],
['速度','运行','电视机','操作','系统' ],
['质量','快递','看着','赞','小哥' ],
['外观','速度','屏幕','外形','运行' ],
['海信','品牌','价格','值得','画面' ],
['安装','电视','不错','快','师傅' ],
['电视','不错','师傅','安装','挺' ],
['安装','京东','师傅','特别','55' ],
['功能','京东','快','合适','价格' ],
['外观','外形','效果','尺寸','大小' ],
['发货','真的','包装','喜欢','购物' ],
['外观','安装','屏幕','外形','音效' ],
['安装','外观','屏幕','外形','音效' ],
['电视','不错','师傅','很快','质量' ],
['买','京东','满意','质量','没' ],
['安装','快','师傅','服务','物流' ],
['电视','不错','海信','清晰','很快' ],
['电视','不错','京东','价格','高' ],
['电视','速度','不错','运行','功能' ],
['电视','买','不错','京东','满意' ],
['外观','外形','尺寸','大小','合适' ],
['电视','不错','安装','清晰','师傅' ],
['质量','发货','海信','喜欢','购物' ],
['电视','买','海信','京东','不错' ],
['电视','买','海信','不错','京东' ],
['安装','电视','外观','速度','不错' ],
['安装','外观','速度','屏幕','外形' ]]
print('len texts_oringinal: ',len(texts_oringinal))
topics_random_gen = [['高','购买','信赖','感觉','时间','品牌'],
['语音','性价比','色彩','舒服','55','小米'],
['漂亮','寸','太','下单','画面','流畅'],
['性价比','寸','家里','漂亮','小米','售后服务'],
['购买','音质','东西','音效','合适','快'],
['购买','画面','合适','清晰','时间','遥控器'],
['售后服务','挺快','送货','京东','小孩','品牌'],
['画质','发货','没','性价比','买','喜欢'],
['声音','京东','运行','便宜','发货','没'],
['物流','推荐','流畅','质量','时间','边框'],
['没','挺快','师傅','到货','功能','小米'],
['55','特别','海信','性价比','感觉','超值'],
['购买','寸','简单','到货','满意','高'],
['特别','速度','信赖','流畅','购买','安装'],
['下单','流畅','真的','大气','海信','画面'],
['边框','品牌','服务','速度','超值','京东'],
['推荐','品牌','没','系统','功能','遥控器'],
['送货','清晰','客厅','音效','品牌','快'],
['值得','色彩','师傅','京东','性价比','边框'],
['购买','小米','舒服','边框','信赖','小孩'],
['海信','高','画质','大气','流畅','性价比'],
['太','时间','屏幕','太','寸','东西'],
['真的','物流','时间','送货','家里','值得'],
['京东','边框','合适','挺','合适','单'],
['到货','遥控器','高','师傅','物流','信赖'],
['清晰','安装','遥控器','售后服务','值得','海信'],
['高','态度','家里','喜欢','品牌','寸'],
['东西','寸','购买','屏幕','55','流畅'],
['电视机','速度','到货','运行','正好','超值'],
['寸','下载','没','服务态度','漂亮','清晰'],
['运行','购买','超值','信赖','到货','真的'],
['手机','品牌','下单','质量','性价比','师傅'],
['特别','单','流畅','挺快','流畅','挺'],
['屏幕','品牌','感觉','单','太','海信'],
['信赖','京东','55','电视机','值得','精细'],
['55','售后服务','寸','正好','东西','语音'],
['性价比','精细','价格','东西','想','55'],
['寸','识别','电视机','单','简单','海信'],
['遥控器','语音','精细','到货','性价比','功能'],
['到货','发货','品牌','速度','画质','品牌'],
['下载','大气','京东','大气','发货','服务态度'],
['屏幕','音质','师傅','遥控器','时间','功能强大'],
['便宜','没','家里','高','大气','信赖'],
['55','信赖','画面','流畅','海信','信赖'],
['大气','55','送货','挺','大气','性价比'],
['画面','品牌','信赖','手机','特别','做工'],
['寸','便宜','简单','家里','超值','挺'],
['东西','运行','清晰','清晰','55','速度'],
['边框','功能','55','单','客厅','安装'],
['发货','电视机','55','东西','舒服','安装']]
print('len topics_random_gen: ',len(topics_random_gen))
dictionary = Dictionary(texts_oringinal)
corpus = [dictionary.doc2bow(text) for text in texts_oringinal]
rdm=CalcTopicsCoherence(topics_random_gen[:50],texts_oringinal,dictionary,corpus) #从语料库的词汇中随机生成的主题,C_V与其他指标的相关系数为正
orn=CalcTopicsCoherence(texts_oringinal[:50],texts_oringinal,dictionary,corpus) #选取语料库中的原文主题,C_V与其他指标的相关系数为负
报错,运行及错误信息:
Microsoft Windows [版本 10.0.19045.3324]
(c) Microsoft Corporation。保留所有权利。
F:\workplace\temp_edit_py>d:/anaconda3/Scripts/activate
(base) F:\workplace\temp_edit_py>conda activate py3701
(py3701) F:\workplace\temp_edit_py>d:/anaconda3/envs/py3701/python.exe f:/ViewReference/CodeView/tmptest.py
len texts_oringinal: 149
len topics_random_gen: 50
runtime:0.01 epoch: 1
len texts_oringinal: 149
len topics_random_gen: 50
runtime:0.01 epoch: 1
Traceback (most recent call last):
File "<string>", line 1, in <module>
File "d:\anaconda3\envs\py3701\lib\multiprocessing\spawn.py", line 105, in spawn_main
exitcode = _main(fd)
File "d:\anaconda3\envs\py3701\lib\multiprocessing\spawn.py", line 114, in _main
prepare(preparation_data)
File "d:\anaconda3\envs\py3701\lib\multiprocessing\spawn.py", line 225, in prepare
_fixup_main_from_path(data['init_main_from_path'])
File "d:\anaconda3\envs\py3701\lib\multiprocessing\spawn.py", line 277, in _fixup_main_from_path
run_name="__mp_main__")
File "d:\anaconda3\envs\py3701\lib\runpy.py", line 263, in run_path
pkg_name=pkg_name, script_name=fname)
File "d:\anaconda3\envs\py3701\lib\runpy.py", line 96, in _run_module_code
mod_name, mod_spec, pkg_name, script_name)
File "d:\anaconda3\envs\py3701\lib\runpy.py", line 85, in _run_code
exec(code, run_globals)
File "f:\ViewReference\CodeView\tmptest.py", line 240, in <module>
rdm=CalcTopicsCoherence(topics_random_gen[:50],texts_oringinal,dictionary,corpus) #从语料库的词汇中随机生成的主题,C_V与其他指标的相关系数为正
File "f:\ViewReference\CodeView\tmptest.py", line 20, in CalcTopicsCoherence
cv_per_topic=cm.get_coherence_per_topic()
File "d:\anaconda3\envs\py3701\lib\site-packages\gensim\models\coherencemodel.py", line 572, in get_coherence_per_topic
self.estimate_probabilities(segmented_topics)
File "d:\anaconda3\envs\py3701\lib\site-packages\gensim\models\coherencemodel.py", line 544, in estimate_probabilities
self._accumulator = self.measure.prob(**kwargs)
File "d:\anaconda3\envs\py3701\lib\site-packages\gensim\topic_coherence\probability_estimation.py", line 156, in p_boolean_sliding_window
return accumulator.accumulate(texts, window_size)
File "d:\anaconda3\envs\py3701\lib\site-packages\gensim\topic_coherence\text_analysis.py", line 444, in accumulate
workers, input_q, output_q = self.start_workers(window_size)
len texts_oringinal: 149
len topics_random_gen: 50
File "d:\anaconda3\envs\py3701\lib\site-packages\gensim\topic_coherence\text_analysis.py", line 478, in start_workers
worker.start()
File "d:\anaconda3\envs\py3701\lib\multiprocessing\process.py", line 112, in start
self._popen = self._Popen(self)
File "d:\anaconda3\envs\py3701\lib\multiprocessing\context.py", line 223, in _Popen
return _default_context.get_context().Process._Popen(process_obj)
File "d:\anaconda3\envs\py3701\lib\multiprocessing\context.py", line 322, in _Popen
return Popen(process_obj)
File "d:\anaconda3\envs\py3701\lib\multiprocessing\popen_spawn_win32.py", line 46, in __init__
prep_data = spawn.get_preparation_data(process_obj._name)
File "d:\anaconda3\envs\py3701\lib\multiprocessing\spawn.py", line 143, in get_preparation_data
_check_not_importing_main()
File "d:\anaconda3\envs\py3701\lib\multiprocessing\spawn.py", line 136, in _check_not_importing_main
is not going to be frozen to produce an executable.''')
RuntimeError:
An attempt has been made to start a new process before the
current process has finished its bootstrapping phase.
This probably means that you are not using fork to start your
child processes and you have forgotten to use the proper idiom
in the main module:
if __name__ == '__main__':
freeze_support()
...
The "freeze_support()" line can be omitted if the program
is not going to be frozen to produce an executable.
runtime:0.05 epoch: 1
Traceback (most recent call last):
File "<string>", line 1, in <module>
File "d:\anaconda3\envs\py3701\lib\multiprocessing\spawn.py", line 105, in spawn_main
exitcode = _main(fd)
File "d:\anaconda3\envs\py3701\lib\multiprocessing\spawn.py", line 114, in _main
prepare(preparation_data)
File "d:\anaconda3\envs\py3701\lib\multiprocessing\spawn.py", line 225, in prepare
_fixup_main_from_path(data['init_main_from_path'])
File "d:\anaconda3\envs\py3701\lib\multiprocessing\spawn.py", line 277, in _fixup_main_from_path
run_name="__mp_main__")
File "d:\anaconda3\envs\py3701\lib\runpy.py", line 263, in run_path
pkg_name=pkg_name, script_name=fname)
File "d:\anaconda3\envs\py3701\lib\runpy.py", line 96, in _run_module_code
mod_name, mod_spec, pkg_name, script_name)
File "d:\anaconda3\envs\py3701\lib\runpy.py", line 85, in _run_code
exec(code, run_globals)
File "f:\ViewReference\CodeView\tmptest.py", line 240, in <module>
rdm=CalcTopicsCoherence(topics_random_gen[:50],texts_oringinal,dictionary,corpus) #从语料库的词汇中随机生成的主题,C_V与其他指标的相关系数为正
File "f:\ViewReference\CodeView\tmptest.py", line 20, in CalcTopicsCoherence
cv_per_topic=cm.get_coherence_per_topic()
File "d:\anaconda3\envs\py3701\lib\site-packages\gensim\models\coherencemodel.py", line 572, in get_coherence_per_topic
self.estimate_probabilities(segmented_topics)
File "d:\anaconda3\envs\py3701\lib\site-packages\gensim\models\coherencemodel.py", line 544, in estimate_probabilities
self._accumulator = self.measure.prob(**kwargs)
File "d:\anaconda3\envs\py3701\lib\site-packages\gensim\topic_coherence\probability_estimation.py", line 156, in p_boolean_sliding_window
return accumulator.accumulate(texts, window_size)
File "d:\anaconda3\envs\py3701\lib\site-packages\gensim\topic_coherence\text_analysis.py", line 444, in accumulate
workers, input_q, output_q = self.start_workers(window_size)
File "d:\anaconda3\envs\py3701\lib\site-packages\gensim\topic_coherence\text_analysis.py", line 478, in start_workers
worker.start()
File "d:\anaconda3\envs\py3701\lib\multiprocessing\process.py", line 112, in start
self._popen = self._Popen(self)
File "d:\anaconda3\envs\py3701\lib\multiprocessing\context.py", line 223, in _Popen
return _default_context.get_context().Process._Popen(process_obj)
File "d:\anaconda3\envs\py3701\lib\multiprocessing\context.py", line 322, in _Popen
return Popen(process_obj)
File "d:\anaconda3\envs\py3701\lib\multiprocessing\popen_spawn_win32.py", line 46, in __init__
prep_data = spawn.get_preparation_data(process_obj._name)
File "d:\anaconda3\envs\py3701\lib\multiprocessing\spawn.py", line 143, in get_preparation_data
_check_not_importing_main()
File "d:\anaconda3\envs\py3701\lib\multiprocessing\spawn.py", line 136, in _check_not_importing_main
is not going to be frozen to produce an executable.''')
RuntimeError:
An attempt has been made to start a new process before the
current process has finished its bootstrapping phase.
This probably means that you are not using fork to start your
child processes and you have forgotten to use the proper idiom
in the main module:
if __name__ == '__main__':
freeze_support()
...
The "freeze_support()" line can be omitted if the program
is not going to be frozen to produce an executable.
len texts_oringinal: 149
len topics_random_gen: 50
runtime:0.01 epoch: 1
Traceback (most recent call last):
File "<string>", line 1, in <module>
File "d:\anaconda3\envs\py3701\lib\multiprocessing\spawn.py", line 105, in spawn_main
exitcode = _main(fd)
File "d:\anaconda3\envs\py3701\lib\multiprocessing\spawn.py", line 114, in _main
prepare(preparation_data)
File "d:\anaconda3\envs\py3701\lib\multiprocessing\spawn.py", line 225, in prepare
_fixup_main_from_path(data['init_main_from_path'])
File "d:\anaconda3\envs\py3701\lib\multiprocessing\spawn.py", line 277, in _fixup_main_from_path
run_name="__mp_main__")
File "d:\anaconda3\envs\py3701\lib\runpy.py", line 263, in run_path
pkg_name=pkg_name, script_name=fname)
File "d:\anaconda3\envs\py3701\lib\runpy.py", line 96, in _run_module_code
mod_name, mod_spec, pkg_name, script_name)
File "d:\anaconda3\envs\py3701\lib\runpy.py", line 85, in _run_code
exec(code, run_globals)
File "f:\ViewReference\CodeView\tmptest.py", line 240, in <module>
rdm=CalcTopicsCoherence(topics_random_gen[:50],texts_oringinal,dictionary,corpus) #从语料库的词汇中随机生成的主题,C_V与其他指标的相关系数为正
File "f:\ViewReference\CodeView\tmptest.py", line 20, in CalcTopicsCoherence
cv_per_topic=cm.get_coherence_per_topic()
File "d:\anaconda3\envs\py3701\lib\site-packages\gensim\models\coherencemodel.py", line 572, in get_coherence_per_topic
self.estimate_probabilities(segmented_topics)
File "d:\anaconda3\envs\py3701\lib\site-packages\gensim\models\coherencemodel.py", line 544, in estimate_probabilities
self._accumulator = self.measure.prob(**kwargs)
File "d:\anaconda3\envs\py3701\lib\site-packages\gensim\topic_coherence\probability_estimation.py", line 156, in p_boolean_sliding_window
return accumulator.accumulate(texts, window_size)
File "d:\anaconda3\envs\py3701\lib\site-packages\gensim\topic_coherence\text_analysis.py", line 444, in accumulate
workers, input_q, output_q = self.start_workers(window_size)
File "d:\anaconda3\envs\py3701\lib\site-packages\gensim\topic_coherence\text_analysis.py", line 478, in start_workers
worker.start()
File "d:\anaconda3\envs\py3701\lib\multiprocessing\process.py", line 112, in start
self._popen = self._Popen(self)
File "d:\anaconda3\envs\py3701\lib\multiprocessing\context.py", line 223, in _Popen
return _default_context.get_context().Process._Popen(process_obj)
File "d:\anaconda3\envs\py3701\lib\multiprocessing\context.py", line 322, in _Popen
return Popen(process_obj)
File "d:\anaconda3\envs\py3701\lib\multiprocessing\popen_spawn_win32.py", line 46, in __init__
prep_data = spawn.get_preparation_data(process_obj._name)
File "d:\anaconda3\envs\py3701\lib\multiprocessing\spawn.py", line 143, in get_preparation_data
_check_not_importing_main()
File "d:\anaconda3\envs\py3701\lib\multiprocessing\spawn.py", line 136, in _check_not_importing_main
is not going to be frozen to produce an executable.''')
RuntimeError:
An attempt has been made to start a new process before the
current process has finished its bootstrapping phase.
This probably means that you are not using fork to start your
child processes and you have forgotten to use the proper idiom
in the main module:
if __name__ == '__main__':
freeze_support()
...
The "freeze_support()" line can be omitted if the program
is not going to be frozen to produce an executable.
分析:
多进程需要在main函数中运行
解决方法1:
加main函数,在main中调用方法
def main():
...需要执行的代码...
if __name__ == '__main__'
main()
解决方法2:
num_workers改为0,单进程加载(没试过)
参考:
https://blog.csdn.net/qq_43580193/article/details/105924104/
https://www.cnblogs.com/carmen-019/p/15077749.html
https://www.codenong.com/34223502/