加亮搜索关键字,摘要出关键字的上下文

# -*- encoding:utf-8 -*-
import re

def get_pattern(query):
    items = []
    for pstr in re.split('\s+', query):
        sub_pstr = re.sub('^[\*\?\+]', '', pstr)
        if sub_pstr: items.append(sub_pstr)

    return '(?i)%s' % '|'.join(items)

def highlight(text, query):
    query = query.strip()
    if text and query:
        def dashrepl(matchobj):
            return '<em class="hL">' + matchobj.group(0) + '</em>'
        return re.sub(get_pattern(query), dashrepl, text)
    else:
        return text

def get_summary_text(obj, query, searchable_text):
    if query and searchable_text:
        try:
            searchable_text = searchable_text.decode('utf-8')
        except:
            try:
                searchable_text = searchable_text.decode('gb18030')
            except:
                return ''

        m = re.search(get_pattern(query), searchable_text)
        if m is None: return ''

        len_text = len(searchable_text)
        if len_text <= 100:
            return highlight(searchable_text, query)

        start, end = m.span()

        summary_text = ''
        if start - 50 > 0:
            summary_text += '... '
            summary_text += searchable_text[start - 50:start]
        else:
            summary_text += searchable_text[:start]

        summary_text += m.group(0)

        end_text = searchable_text[end:]
        if len(end_text) < 50:
            summary_text += end_text
        else:
            summary_text += end_text[:50]
            summary_text += ' ...'

        return highlight(summary_text, query)
    else: return ''

 

评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值