一、XSS过滤
像在KindEditor里,富文本编辑框,写入类似“<script>alert(123)</script>
”,编辑框会给过滤掉。但如果选择源码编辑,写入如上代码,就过滤不了了。实现了XSS攻击。
1、解决办法:对特殊字符进行处理。
pip3 install beatifulsoup4
,这个模块会生成对象,find方法找指定的标签
from bs4 import BeautifulSoup
content=""" # 需要进行XSS过滤的内容
<p class='c1' id='i1'>
asdfaa<span style="font-family:NSimSun;">sdf<a>a</a>sdf</span>sdf
</p>
<p>
<strong class='c2' id='i2'>asdf</strong>
<script>alert(123)</script>
</p>
<h2>
asdf
</h2>
"""
soup = BeautifulSoup(content, 'html.parser') # html.parser 指定解析器
tag = soup.find('script') # 找script标签
tag.hidden = True # TRUE:清空内容和标签; False:之清空内容
tag.clear() # 找到的标签清空
# 删除某个标签的属性
span = soup.find('span')
# print(span.attrs)
del span.attrs['style']
2、另外一种方法通过白名单实现:
# tags = {'p', 'strong'} # 定义白名单
tags = {
'p': ['class'],
'strong': ['id',]
}
from bs4 import BeautifulSoup
soup = BeautifulSoup(content, 'html.parser')
for tag in soup.find_all():
if tag.name in tags:
pass
else:
tag.hidden = True
tag.clear()
continue
# 用户提交标签的所有属性
input_attrs = tag.attrs # {'class': 'c1', 'id': 'i1'}
valid_attrs = tags[tag.name] # ['class']
for k in list(input_attrs.keys()):
if k in valid_attrs:
pass
else:
del tag.attrs[k]
content = soup.decode()
print(content)
这里需要注意的点:删除dict的key值时:
dic = {'k1':'v1', 'k2':'v2', 'k3':'v3'}
for k,v in dic.items():
if k == 'k2':
del dic[k]
# 开始程序判断迭代三次、删除后迭代两次,迭代乱了,会报错。
# 所有迭代过程中,都不允许删除本身元素。
# 正确如下:
for k,v in list(dic.keys()): # Python3 dic.keys是迭代器,list转成列表
if k == 'k2':
del dic[k]
完整示例:
#!/usr/bin/env python
# -*- coding:utf-8 -*-
from bs4 import BeautifulSoup
class XSSFilter(object):
__instance = None
def __init__(self):
# XSS白名单
self.valid_tags = {
"font": ['color', 'size', 'face', 'style'],
'b': [],
'div': [],
"span": [],
"table": [
'border', 'cellspacing', 'cellpadding'
],
'th': [
'colspan', 'rowspan'
],
'td': [
'colspan', 'rowspan'
],
"a": ['href', 'target', 'name'],
"img": ['src', 'alt', 'title'],
'p': [
'align'
],
"pre": ['class'],
"hr": ['class'],
'strong': []
}
def __new__(cls, *args, **kwargs):
"""
单例模式
:param cls:
:param args:
:param kwargs:
:return:
"""
if not cls.__instance:
obj = object.__new__(cls, *args, **kwargs)
cls.__instance = obj
return cls.__instance
def process(self, content):
soup = BeautifulSoup(content, 'html.parser')
# 遍历所有HTML标签
for tag in soup.find_all(recursive=True):
# 判断标签名是否在白名单中
if tag.name not in self.valid_tags:
tag.hidden = True
if tag.name not in ['html', 'body']:
tag.hidden = True
tag.clear()
continue
# 当前标签的所有属性白名单
attr_rules = self.valid_tags[tag.name]
keys = list(tag.attrs.keys())
for key in keys:
if key not in attr_rules:
del tag[key]
return soup.decode()
if __name__ == '__main__':
html = """<p class="title">
<b>The Dormouse's story</b>
</p>
<p class="story">
<div name='root'>
Once upon a time there were three little sisters; and their names were
<a href="http://example.com/elsie" class="sister c1" style='color:red;background-color:green;' id="link1"><!-- Elsie --></a>
<a href="http://example.com/lacie" class="sister" id="link2">Lacie</a> and
<a href="http://example.com/tillie" class="sister" id="link3">Tilffffffffffffflie</a>;
and they lived at the bottom of a well.
<script>alert(123)</script>
</div>
</p>
<p class="story">...</p>"""
obj = XSSFilter()
v = obj.process(html)
print(v)
二、单例模式
一个类,创建三个对象占得内存多还是创建一个对象占得多?
像上面的每次过滤都需要创建一个对象,白名单是不变的。所用只用一个对象做这个操作,就是单例模式
class Foo(object):
instance = None
def __init__(self):
self.name = 'alex'
def __new__(cls, *args, **kwargs):
if Foo.instance:
return Foo.instance
else:
Foo.instance = object.__new__(cls, *args, **kwargs)
return Foo.instance
obj1 = Foo()
obj2 = Foo()
print(id(obj1),id(obj2))
转载请务必保留此出处:http://blog.csdn.net/fgf00/article/details/60134384