w ='比赛'
t ='比赛开始没多久就结束了比赛,现在没有比赛'defreplace_color(text, word):
new_word ='\033[031m'+ word +'\033[0m'# red
len_w =len(word)
len_t =len(text)for i inrange(len_t - len_w,-1,-1):if text[i: i + len_w]== word:
text = text[:i]+ new_word + text[i + len_w:]return text
print(t)print(replace_color(t, w))
单个高亮
from jieba import tokenize
text ='我用小米手机订购了一袋小米'
entity ='小米'
replace_color =lambda word:'\033[033m'+ word +'\033[0m'
replace_word =lambda sentence, word, head, tail: sentence[:head]+ word + sentence[tail:]for word, head, tail in tokenize(text):if word == entity:
word = replace_color(word)print(replace_word(text, word, head, tail))
from xlsxwriter.workbook import Workbook
import re
# 创建Excel对象
workbook = Workbook('a.xlsx')
worksheet = workbook.add_worksheet()
color = workbook.add_format({'color':'red','bold':True})# 日期高亮
rc = re.compile('([0-9年月日]{2,})')
sentence ='小洪和小黄2020年1月12日母校初见。1月26日长烟落日孤城闭,2月9日神仙眷侣云比心'
format_ls = rc.split(sentence)for i inrange(len(format_ls)-1,-1,-1):if rc.fullmatch(format_ls[i]):
format_ls.insert(i, color)# Prefix the word with the formatprint(format_ls)# 写入单元格
row, col =2,1
worksheet.write_rich_string(row, col,*format_ls)
workbook.close()
xlwings
from pandas import DataFrame
from jieba import tokenize
from xlwings import App
defner(text):for clause in text.split(','):# 切句for word, head, tail in tokenize(clause):# 分词+位置if word in{'小米','苹果'}:# NERyield(
text,
clause[:head]+'【'+ word +'】'+ clause[tail:],
word,)deflss2excel(ls_of_ls, columns, fname):
DataFrame(ls_of_ls, columns=columns).to_excel(fname, index=False)defmerge_cells(fname):# 打开excel
app = App(add_book=False, visible=False)# 关闭警告
app.display_alerts =False# 打开book
book = app.books.open(fname)try:# 打开sheetfor sheet in book.sheets:# 当前区域
current_region = sheet.cells(1,1).current_region
current_region.column_width =12# 列宽
current_region.api.Font.Size =9# 字体格式
current_region.api.WrapText =True# 换行
current_region.api.HorizontalAlignment =1# 垂直上靠
current_region.api.VerticalAlignment =-4160# 水平左靠# 最后一个单元格(的行)
last_row_index = current_region.last_cell.row
# 合并单元格
i =2while i < last_row_index:for j inrange(i +1, last_row_index +2):if sheet.cells(i,1).value != sheet.cells(j,1).value:
cells = sheet.range('A{}:A{}'.format(i, j -1)).api
cells.MergeCells =True# 合并
cells.WrapText =True# 换行
i = j
# 背景色
sheet.range('A1:C1').api.Interior.Color =65535except Exception as e:print('\033[031m{}\033[0m'.format(e))# 开启警告
app.display_alerts =True# 保存
book.save()# 关闭excel
app.quit()
fname ='phone.xlsx'
fields =['text','clause','word']
texts =['买小米机,送了袋小米和苹果','诺基亚','买华为送苹果']
ls_of_ls =[i for text in texts for i in ner(text)]
lss2excel(ls_of_ls, fields, fname)
merge_cells(fname)