Python高亮文本中的关键词

最新推荐文章于 2024-08-10 03:56:56 发布

小基基o_O

最新推荐文章于 2024-08-10 03:56:56 发布

阅读量6.6k

点赞数 7

分类专栏： Python可视化

本文链接：https://blog.csdn.net/Yellow_python/article/details/100516921

版权

Python可视化专栏收录该内容

15 篇文章 1 订阅

订阅专栏

文章目录

print
存[html]
存excel
- xlsxwriter
- xlwings

print

全部高亮

w = '比赛'
t = '比赛开始没多久就结束了比赛，现在没有比赛'

def replace_color(text, word):
    new_word = '\033[031m' + word + '\033[0m'  # red
    len_w = len(word)
    len_t = len(text)
    for i in range(len_t - len_w, -1, -1):
        if text[i: i + len_w] == word:
            text = text[:i] + new_word + text[i + len_w:]
    return text

print(t)
print(replace_color(t, w))

单个高亮

from jieba import tokenize

text = '我用小米手机订购了一袋小米'
entity = '小米'

replace_color = lambda word: '\033[033m' + word + '\033[0m'
replace_word = lambda sentence, word, head, tail: sentence[:head] + word + sentence[tail:]

for word, head, tail in tokenize(text):
    if word == entity:
        word = replace_color(word)
        print(replace_word(text, word, head, tail))

mysql高亮查询

def highlight(self, field, table, keyword, n=99):
    sql = "SELECT %s FROM %s WHERE INSTR(%s,'%s')>0;" % (field, table, field, keyword)
    for i in self.fetchone(sql, n):
        text = i[0]
        highlight_word = '\033[031m' + keyword + '\033[0m'  # red
        len_w = len(keyword)
        len_t = len(text)
        for i in range(len_t - len_w, -1, -1):
            if text[i: i + len_w] == keyword:
                text = text[:i] + highlight_word + text[i + len_w:]
        print(text)

存html

py文件

def replace_html_tag(text, word):
    new_word = '<font color="red">' + word + '</font>'
    len_w = len(word)
    len_t = len(text)
    for i in range(len_t - len_w, -1, -1):
        if text[i: i + len_w] == word:
            text = text[:i] + new_word + text[i + len_w:]
    return text


def save_html(ls_of_ls, prefix):
    fname = prefix + '.html'
    with open(fname, 'w', encoding='utf-8') as f:
        f.write('<html><head><meta charset="UTF-8"></head><body><table border="1">\n')
        for ls in ls_of_ls:
            f.write('<tr>')
            for i in ls:
                f.write('<td><font size="4">{}</font></td>'.format(i))
            f.write('</tr>\n')
        f.write('</table></body></html>')


texts = ['深扣菊花舔指笑', '菊花菊花一闪闪', '接天莲叶无穷碧', '硬日菊花别样红']
word = '菊花'

ls_of_ls = []
for text in texts:
    ls_of_ls.append([word, replace_html_tag(text, word)])
save_html(ls_of_ls, word)

生成的html代码

<html><head><meta charset="UTF-8"></head><body><table border="1">
<tr><td><font size="4">菊花</font></td><td><font size="4">深扣<font color="red">菊花</font>舔指笑</font></td></tr>
<tr><td><font size="4">菊花</font></td><td><font size="4"><font color="red">菊花</font><font color="red">菊花</font>一闪闪</font></td></tr>
<tr><td><font size="4">菊花</font></td><td><font size="4">接天莲叶无穷碧</font></td></tr>
<tr><td><font size="4">菊花</font></td><td><font size="4">硬日<font color="red">菊花</font>别样红</font></td></tr>
</table></body></html>

html展示

菊花	深扣菊花舔指笑
菊花	菊花菊花一闪闪
菊花	接天莲叶无穷碧
菊花	硬日菊花别样红

存excel

xlsxwriter

from xlsxwriter.workbook import Workbook
import re

# 创建Excel对象
workbook = Workbook('a.xlsx')
worksheet = workbook.add_worksheet()
color = workbook.add_format({'color': 'red', 'bold': True})

# 日期高亮
rc = re.compile('([0-9年月日]{2,})')
sentence = '小洪和小黄2020年1月12日母校初见。1月26日长烟落日孤城闭，2月9日神仙眷侣云比心'
format_ls = rc.split(sentence)
for i in range(len(format_ls)-1, -1, -1):
    if rc.fullmatch(format_ls[i]):
        format_ls.insert(i, color)  # Prefix the word with the format
print(format_ls)

# 写入单元格
row, col = 2, 1
worksheet.write_rich_string(row, col, *format_ls)
workbook.close()

xlwings

from pandas import DataFrame
from jieba import tokenize
from xlwings import App

def ner(text):
    for clause in text.split('，'):  # 切句
        for word, head, tail in tokenize(clause):  # 分词+位置
            if word in {'小米', '苹果'}:  # NER
                yield (
                    text,
                    clause[:head] + '【' + word + '】' + clause[tail:],
                    word,
                )

def lss2excel(ls_of_ls, columns, fname):
    DataFrame(ls_of_ls, columns=columns).to_excel(fname, index=False)

def merge_cells(fname):
    # 打开excel
    app = App(add_book=False, visible=False)
    # 关闭警告
    app.display_alerts = False
    # 打开book
    book = app.books.open(fname)
    try:
        # 打开sheet
        for sheet in book.sheets:
            # 当前区域
            current_region = sheet.cells(1, 1).current_region
            current_region.column_width = 12  # 列宽
            current_region.api.Font.Size = 9  # 字体格式
            current_region.api.WrapText = True  # 换行
            current_region.api.HorizontalAlignment = 1  # 垂直上靠
            current_region.api.VerticalAlignment = -4160  # 水平左靠
            # 最后一个单元格（的行）
            last_row_index = current_region.last_cell.row
            # 合并单元格
            i = 2
            while i < last_row_index:
                for j in range(i + 1, last_row_index + 2):
                    if sheet.cells(i, 1).value != sheet.cells(j, 1).value:
                        cells = sheet.range('A{}:A{}'.format(i, j - 1)).api
                        cells.MergeCells = True  # 合并
                        cells.WrapText = True  # 换行
                        i = j
            # 背景色
            sheet.range('A1:C1').api.Interior.Color = 65535
    except Exception as e:
        print('\033[031m{}\033[0m'.format(e))
    # 开启警告
    app.display_alerts = True
    # 保存
    book.save()
    # 关闭excel
    app.quit()

fname = 'phone.xlsx'
fields = ['text', 'clause', 'word']
texts = ['买小米机，送了袋小米和苹果', '诺基亚', '买华为送苹果']
ls_of_ls = [i for text in texts for i in ner(text)]
lss2excel(ls_of_ls, fields, fname)
merge_cells(fname)