参考链接 http://www.cnblogs.com/yjf512/p/3598332.html
/usr/local/coreseek/etc
进入配置文件所在目录 csf.conf
# whether to strip HTML tags from incoming documents
# known values are 0 (do not strip) and 1 (do strip)
# optional, default is 0
#从文档中删除html标签
html_strip = 1
# what HTML attributes to index if stripping HTML
# optional, default is empty (do not index anything)
# 保留标签中的属性
html_index_attrs = img=alt,title; a=title;
# what HTML elements contents to strip
# optional, default is empty (do not strip element contents)
#去除标签里面的内容
# html_remove_elements = style, script