执行pyshark报错的解决方法 lxml.etree.XMLSyntaxError: Input is not proper UTF-8, indicate encoding !

该博客介绍了使用pyshark库解析pcap文件时遇到的非UTF-8编码错误,以及通过修改代码实现正确解码的方法。文章展示了修复后的代码执行过程,打印出TLS协议中不同类型的报文,如ServerHello、ClientKeyExchange等,并统计了各类型报文的数量。
摘要由CSDN通过智能技术生成

python代码

import pyshark
from collections import Counter

cap = pyshark.FileCapture(r'E:\test.pcap', display_filter='(((((((tls.handshake.type == 2) || (tls.handshake.type == '
                                                           '1)) || (tls.record.content_type == 20)) || ('
                                                           'tls.handshake.type == 12)) || (tls.handshake.type == 16)) '
                                                           '|| (tls.handshake.type == 4))) && (tls.record.version == '
                                                           '0x0303)')

signal_names_all = []
signal_names_unique = []


def print_conversation_header(pkt):
    try:
        # for packet in cap:
        number = pkt.number
        sniff_time = pkt.sniff_time
        protocol = pkt.transport_layer
        src_addr = pkt.ip.src
        src_port = pkt[pkt.transport_layer].srcport
        dst_addr = pkt.ip.dst
        dst_port = pkt[pkt.transport_layer].dstport
        method = dir(pkt.tls)
        signal_name = pkt.tls.record
        print('%s  %s  %s  %s:%s --> %s:%s  %s' % (
            number, sniff_time, protocol, src_addr, src_port, dst_addr, dst_port, signal_name))

        if signal_name not in signal_names_unique:
            signal_names_unique.append(signal_name)
            signal_names_all.append(signal_name)
        else:
            signal_names_all.append(signal_name)

    except AttributeError as e:
        print(e)


cap.apply_on_packets(print_conversation_header, timeout=100)
signal_names_all_condense = []
for signal_names_all_tmp in signal_names_all:
    signal_names_all_tmp = signal_names_all_tmp.split(':', 3)[2]
    signal_names_all_condense.append(signal_names_all_tmp)
print('signal_names_all_condense:', signal_names_all_condense)
signal_names_all_condense_len = len(signal_names_all_condense)
print('signal_names_all_length:', signal_names_all_condense_len)
print('signal_names_all_count:', Counter(signal_names_all_condense).most_common(7))

signal_names_unique_condense = []
for signal_names_unique_tmp in signal_names_unique:
    signal_names_unique_tmp = signal_names_unique_tmp.split(':', 3)[2]
    signal_names_unique_condense.append(signal_names_unique_tmp)
print('signal_names_unique_condense:', signal_names_unique_condense)
print('signal_names_unique_length:', len(signal_names_unique_condense))


报错信息

Traceback (most recent call last):
  File "E:\project\pythonProject\main.py", line 20, in <module>
    cap.apply_on_packets(print_conversation_header, timeout=1000, )
  File "C:\Users\Administrator\AppData\Local\Programs\Python\Python39\lib\site-packages\pyshark\capture\capture.py", line 274, in apply_on_packets
    return self.eventloop.run_until_complete(coro)
  File "C:\Users\Administrator\AppData\Local\Programs\Python\Python39\lib\asyncio\base_events.py", line 642, in run_until_complete
    return future.result()
  File "C:\Users\Administrator\AppData\Local\Programs\Python\Python39\lib\asyncio\tasks.py", line 478, in wait_for
    return fut.result()
  File "C:\Users\Administrator\AppData\Local\Programs\Python\Python39\lib\site-packages\pyshark\capture\capture.py", line 285, in packets_from_tshark
    await self._go_through_packets_from_fd(tshark_process.stdout, packet_callback, packet_count=packet_count)
  File "C:\Users\Administrator\AppData\Local\Programs\Python\Python39\lib\site-packages\pyshark\capture\capture.py", line 301, in _go_through_packets_from_fd
    packet, data = await self._get_packet_from_stream(fd, data, got_first_packet=packets_captured > 0,
  File "C:\Users\Administrator\AppData\Local\Programs\Python\Python39\lib\site-packages\pyshark\capture\capture.py", line 360, in _get_packet_from_stream
    packet = packet_from_xml_packet(packet, psml_structure=psml_structure)
  File "C:\Users\Administrator\AppData\Local\Programs\Python\Python39\lib\site-packages\pyshark\tshark\tshark_xml.py", line 26, in packet_from_xml_packet
    xml_pkt = lxml.objectify.fromstring(xml_pkt, parser)
  File "src\lxml\objectify.pyx", line 1808, in lxml.objectify.fromstring
  File "src\lxml\etree.pyx", line 3237, in lxml.etree.fromstring
  File "src\lxml\parser.pxi", line 1896, in lxml.etree._parseMemoryDocument
  File "src\lxml\parser.pxi", line 1784, in lxml.etree._parseDoc
  File "src\lxml\parser.pxi", line 1141, in lxml.etree._BaseParser._parseDoc
  File "src\lxml\parser.pxi", line 615, in lxml.etree._ParserContext._handleParseResultDoc
  File "src\lxml\parser.pxi", line 725, in lxml.etree._handleParseResult
  File "src\lxml\parser.pxi", line 654, in lxml.etree._raiseParseError
  File "<string>", line 145
lxml.etree.XMLSyntaxError: Input is not proper UTF-8, indicate encoding !
Bytes: 0xE5 0x2C 0x69 0x64, line 145, column 213

解决方法

https://github.com/KimiNewt/pyshark/pull/479/commits/81cc27e8f88cd7fe9f7a25579048f44ac5d9c69f

if not isinstance(xml_pkt, lxml.objectify.ObjectifiedElement):
    parser = lxml.objectify.makeparser(huge_tree=True, recover=True)
    xml_pkt = xml_pkt.decode(errors='ignore') #新增的代码
    xml_pkt = lxml.objectify.fromstring(xml_pkt, parser)
if psml_structure:
    return _packet_from_psml_packet(xml_pkt, psml_structure)

在这里插入图片描述

执行成功后的输出

C:\Users\Administrator\AppData\Local\Programs\Python\Python39\python.exe E:/project/pythonProject/main.py
43  2021-06-05 12:20:38.826781  TCP  50.116.10.209:443 --> 192.168.31.211:60979  TLSv1.2 Record Layer: Handshake Protocol: Server Hello
79  2021-06-05 12:20:39.110780  TCP  61.136.167.251:443 --> 192.168.31.211:60984  TLSv1.2 Record Layer: Handshake Protocol: Server Hello
83  2021-06-05 12:20:39.112349  TCP  61.136.167.251:443 --> 192.168.31.211:60984  TLSv1.2 Record Layer: Handshake Protocol: Certificate
85  2021-06-05 12:20:39.112814  TCP  61.136.167.251:443 --> 192.168.31.211:60982  TLSv1.2 Record Layer: Handshake Protocol: Server Hello
89  2021-06-05 12:20:39.114782  TCP  61.136.167.251:443 --> 192.168.31.211:60982  TLSv1.2 Record Layer: Handshake Protocol: Certificate
91  2021-06-05 12:20:39.116997  TCP  192.168.31.211:60984 --> 61.136.167.251:443  TLSv1.2 Record Layer: Handshake Protocol: Client Key Exchange
92  2021-06-05 12:20:39.120964  TCP  192.168.31.211:60982 --> 61.136.167.251:443  TLSv1.2 Record Layer: Handshake Protocol: Client Key Exchange
93  2021-06-05 12:20:39.128045  TCP  61.136.167.251:443 --> 192.168.31.211:60984  TLSv1.2 Record Layer: Change Cipher Spec Protocol: Change Cipher Spec

##############此处省略很多行#####################

3758  2021-06-05 12:20:52.363707  TCP  39.107.11.210:443 --> 192.168.31.211:61084  TLSv1.2 Record Layer: Handshake Protocol: Server Hello
3760  2021-06-05 12:20:52.363707  TCP  39.107.11.210:443 --> 192.168.31.211:61084  TLSv1.2 Record Layer: Handshake Protocol: Certificate
3762  2021-06-05 12:20:52.367089  TCP  192.168.31.211:61083 --> 39.107.11.210:443  TLSv1.2 Record Layer: Handshake Protocol: Client Key Exchange
3764  2021-06-05 12:20:52.374333  TCP  192.168.31.211:61084 --> 39.107.11.210:443  TLSv1.2 Record Layer: Handshake Protocol: Client Key Exchange
3765  2021-06-05 12:20:52.393812  TCP  39.107.11.210:443 --> 192.168.31.211:61083  TLSv1.2 Record Layer: Handshake Protocol: New Session Ticket
3772  2021-06-05 12:20:52.404383  TCP  39.107.11.210:443 --> 192.168.31.211:61084  TLSv1.2 Record Layer: Handshake Protocol: New Session Ticket
signal_names_all_condense: [' Server Hello', ' Server Hello', ' Certificate', ' Server Hello', ' Certificate', ' Client Key Exchange', ' Client Key Exchange', ' Change Cipher Spec', ' Change Cipher Spec', ' Multiple Handshake Messages', ' Multiple Handshake Messages', ' Server Hello', ' Certificate', ' Client Key Exchange', ' Client Key Exchange', ' Change Cipher Spec', ' Client Key Exchange', ' Change Cipher Spec', ' Change Cipher Spec', ' Server Hello', ' Server Key Exchange', ' Client Key Exchange', ' New Session Ticket', ' Change Cipher Spec', ' Server Hello', ' Change Cipher Spec', ' Change Cipher Spec', ' Server Hello', ' Change Cipher Spec', ' Change Cipher Spec', ' Server Hello', ' Change Cipher Spec', ' Server Hello', ' Certificate', ' Client Key Exchange', ' New Session Ticket', ' Server Hello', ' Certificate', ' Client Key Exchange', ' Server Hello', ' Certificate', ' Client Key Exchange', ' Server Hello', ' Certificate', ' Client Key Exchange', ' Server Hello', ' Certificate', ' Client Key Exchange', ' Server Hello', ' Certificate', ' Client Key Exchange', ' Server Hello', ' Certificate', ' New Session Ticket', ' New Session Ticket', ' Client Key Exchange', ' New Session Ticket', ' New Session Ticket', ' New Session Ticket', ' Server Hello', ' Certificate', ' Client Key Exchange', ' Server Hello', ' Certificate', ' Client Key Exchange', ' Server Hello', ' Certificate', ' Client Key Exchange', ' New Session Ticket', ' Server Hello', ' Certificate', ' Client Key Exchange', ' New Session Ticket', ' New Session Ticket', ' New Session Ticket', ' Server Hello', ' Certificate', ' Client Key Exchange', ' Server Hello', ' Certificate', ' New Session Ticket', ' Client Key Exchange', ' Server Hello', ' Certificate', ' Client Key Exchange', ' Server Hello', ' Server Key Exchange', ' Client Key Exchange', ' Server Hello', ' Certificate', ' Client Key Exchange', ' New Session Ticket', ' New Session Ticket', ' New Session Ticket', ' New Session Ticket', ' New Session Ticket', ' Server Hello', ' Certificate', ' Server Hello', ' Certificate', ' Server Hello', ' Certificate', ' Server Hello', ' Certificate', ' Server Hello', ' Certificate', ' Server Hello', ' Certificate', ' Server Hello', ' Certificate', ' Server Hello', ' Certificate', ' Client Key Exchange', ' Server Hello', ' Certificate', ' Client Key Exchange', ' Server Hello', ' Certificate', ' Server Hello', ' Certificate', ' Client Key Exchange', ' Server Hello', ' Certificate', ' Client Key Exchange', ' New Session Ticket', ' New Session Ticket', ' Client Key Exchange', ' New Session Ticket', ' New Session Ticket', ' New Session Ticket', ' Server Hello', ' Server Key Exchange', ' Client Key Exchange', ' New Session Ticket', ' Server Hello', ' Server Hello', ' Server Key Exchange', ' Client Key Exchange', ' Change Cipher Spec', ' New Session Ticket', ' Change Cipher Spec', ' Server Hello', ' Certificate', ' Client Key Exchange', ' Server Hello', ' Certificate', ' Client Key Exchange', ' New Session Ticket', ' New Session Ticket', ' Server Hello', ' Server Key Exchange', ' Client Key Exchange', ' New Session Ticket', ' Change Cipher Spec', ' Server Hello', ' Certificate', ' Client Key Exchange', ' Server Hello', ' Certificate', ' Client Key Exchange', ' New Session Ticket', ' New Session Ticket', ' Server Hello', ' Server Hello', ' Certificate', ' Client Key Exchange', ' New Session Ticket', ' Server Hello', ' Certificate', ' Server Hello', ' Certificate', ' Client Key Exchange', ' Client Key Exchange', ' New Session Ticket', ' New Session Ticket', ' Server Hello', ' Server Key Exchange', ' Client Key Exchange', ' New Session Ticket', ' Change Cipher Spec', ' Server Hello', ' Server Key Exchange', ' Client Key Exchange', ' New Session Ticket', ' Change Cipher Spec', ' Server Hello', ' Certificate', ' Client Key Exchange', ' Server Hello', ' Certificate', ' Client Key Exchange', ' New Session Ticket', ' New Session Ticket', ' Change Cipher Spec', ' Server Hello', ' Certificate', ' Client Key Exchange', ' New Session Ticket', ' Server Hello', ' Certificate', ' Client Key Exchange', ' Server Hello', ' Certificate', ' Client Key Exchange', ' New Session Ticket', ' New Session Ticket', ' Server Hello', ' Certificate', ' Client Key Exchange', ' Server Hello', ' Certificate', ' Client Key Exchange', ' New Session Ticket', ' New Session Ticket', ' Server Hello', ' Server Hello', ' Change Cipher Spec', ' Change Cipher Spec', ' Server Hello', ' Certificate', ' Server Hello', ' Certificate', ' Client Key Exchange', ' Client Key Exchange', ' New Session Ticket', ' New Session Ticket', ' Server Hello', ' Change Cipher Spec', ' Server Hello', ' Server Hello', ' Change Cipher Spec', ' Change Cipher Spec', ' Server Hello', ' Change Cipher Spec', ' Server Hello', ' Server Hello', ' Change Cipher Spec', ' Change Cipher Spec', ' Server Hello', ' Change Cipher Spec', ' Server Hello', ' Change Cipher Spec', ' Server Hello', ' Certificate', ' Client Key Exchange', ' Server Hello', ' Certificate', ' Client Key Exchange', ' New Session Ticket', ' New Session Ticket', ' Server Hello', ' Certificate', ' Client Key Exchange', ' New Session Ticket', ' Server Hello', ' Server Key Exchange', ' Client Key Exchange', ' New Session Ticket', ' Change Cipher Spec', ' Server Hello', ' Certificate', ' Server Hello', ' Certificate', ' Client Key Exchange', ' Client Key Exchange', ' New Session Ticket', ' New Session Ticket']
signal_names_all_length: 267
signal_names_all_count: [(' Server Hello', 75), (' Client Key Exchange', 54), (' Certificate', 51), (' New Session Ticket', 49), (' Change Cipher Spec', 28), (' Server Key Exchange', 8), (' Multiple Handshake Messages', 2)]
signal_names_unique_condense: [' Server Hello', ' Certificate', ' Client Key Exchange', ' Change Cipher Spec', ' Multiple Handshake Messages', ' Server Key Exchange', ' New Session Ticket', ' Change Cipher Spec']
signal_names_unique_length: 8

Process finished with exit code 0
  • 0
    点赞
  • 1
    收藏
    觉得还不错? 一键收藏
  • 2
    评论
评论 2
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值