html设置文本格式的标记,以纯文本格式转义非HTML标记(将纯文本转换为HTML)

我需要将HTML::WhiteListSanitizer子类化为非白名单标记(通过更改process_node),猴子修补程序HTML::Node以避免使用标记名称和猴子修补程序HTML::Text应用分词:

class Text2HTML

def self.convert text

text = simple_format text

text = auto_link text, :all, :target => '_blank'

text = NonHTMLEscaper.sanitize text

text

end

# based on http://www.ruby-forum.com/topic/87492

def self.wbr_split str, len = 10

fragment = /.{#{len}}/

str.split(/(\s+)/).map! { |word|

(/\s/ === word) ? word : word.gsub(fragment, '\0')

}.join

end

protected

extend ActionView::Helpers::TagHelper

extend ActionView::Helpers::TextHelper

extend ActionView::Helpers::UrlHelper

class NonHTMLEscaper < HTML::WhiteListSanitizer

self.allowed_tags << 'wbr'

def self.sanitize *args

self.new.sanitize *args

end

protected

# Copy, just to reference this Node definition

def tokenize(text, options)

options[:parent] = []

options[:attributes] ||= allowed_attributes

options[:tags] ||= allowed_tags

tokenizer = HTML::Tokenizer.new(text)

result = []

while token = tokenizer.next

node = Node.parse(nil, 0, 0, token, false)

process_node node, result, options

end

result

end

# gsub <> instead of returning nil

def process_node(node, result, options)

result << case node

when HTML::Tag

if node.closing == :close

options[:parent].shift

else

options[:parent].unshift node.name

end

process_attributes_for node, options

options[:tags].include?(node.name) ? node : node.to_s.gsub(/, "<").gsub(/>/, ">")

else

bad_tags.include?(options[:parent].first) ? nil : node.to_s

end

end

class Text < HTML::Text

def initialize(parent, line, pos, content)

super parent, line, pos, content

@content = Text2HTML.wbr_split content

end

end

# remove tag/attributes downcases and reference this Text

class Node < HTML::Node

def self.parse parent, line, pos, content, strict=true

if content !~ /^

Text.new(parent, line, pos, content)

else

scanner = StringScanner.new(content)

unless scanner.skip(/)

if strict

raise "expected

else

return Text.new(parent, line, pos, content)

end

end

if scanner.skip(/!\[CDATA\[/)

unless scanner.skip_until(/\]\]>/)

if strict

raise "expected ]]> (got #{scanner.rest.inspect} for #{content})"

else

scanner.skip_until(/\Z/)

end

end

return HTML::CDATA.new(parent, line, pos, scanner.pre_match.gsub(/

end

closing = ( scanner.scan(/\//) ? :close : nil )

return Text.new(parent, line, pos, content) unless name = scanner.scan(/[^\s!>\/]+/)

unless closing

scanner.skip(/\s*/)

attributes = {}

while attr = scanner.scan(/[-\w:]+/)

value = true

if scanner.scan(/\s*=\s*/)

if delim = scanner.scan(/['"]/)

value = ""

while text = scanner.scan(/[^#{delim}\\]+|./)

case text

when "\\" then

value << text

value << scanner.getch

when delim

break

else value << text

end

end

else

value = scanner.scan(/[^\s>\/]+/)

end

end

attributes[attr] = value

scanner.skip(/\s*/)

end

closing = ( scanner.scan(/\//) ? :self : nil )

end

unless scanner.scan(/\s*>/)

if strict

raise "expected > (got #{scanner.rest.inspect} for #{content}, #{attributes.inspect})"

else

# throw away all text until we find what we're looking for

scanner.skip_until(/>/) or scanner.terminate

end

end

HTML::Tag.new(parent, line, pos, name, attributes, closing)

end

end

end

end

end

end

  • 0
    点赞
  • 0
    收藏
    觉得还不错? 一键收藏
  • 0
    评论

“相关推荐”对你有帮助么?

  • 非常没帮助
  • 没帮助
  • 一般
  • 有帮助
  • 非常有帮助
提交
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值