画饼状图
% % time
plt. figure( figsize= ( 5 , 5 ) )
plt. axis( 'equal' )
label = [ 'DV' , 'unknown' , 'OV' , 'EV' ]
explode= [ 0.01 , 0.01 , 0.01 , 0.01 ]
values = [ certlevel[ 'DV' ] , certlevel[ 'UnknownValidationLevel' ] , certlevel[ 'OV' ] , certlevel[ 'EV' ] ]
plt. pie( values, labels= label, explode= explode, labeldistance= 1.1 , autopct= '%1.1f%%' , shadow= False , startangle= 90 , pctdistance= 0.6 )
plt. legend( loc= 'upper left' , bbox_to_anchor= ( - 0.1 , 1 ) )
plt. title( 'certlevel-stat' )
plt. savefig( './certlevel-stat.png' )
plt. show( )
根据特定条件查询dataframe
num = test. query( 'tlshave == True | certhave == True | ocsphave == True' ) . ip. count( )
Cert_valid = ct_conntrue[ ct_conntrue[ 'certvalid' ] == True ]
打印信息
print ( '通过cert传递sct的{}个域名中,有效的sct有{},占比{:.2%}' . format ( sct_cert_num, sct_cert_valid_num, sct_cert_valid_num/ sct_cert_num) )
series使用string操作
sct_list = sct_cert[ 'sctcert_valid' ] . str . split( ";" ) . tolist( )
errs = scsv_raw[ scsv_raw. connerror. str . contains( "fallback" ) == False ]
列表转series
sct_tls_verify = pd. Series( temp)
统计series的值
test[ 'have_sct' ] . value_counts( )
合并series
sct_log = pd. concat( [ sct_cert_log, sct_tls_log, sct_ocsp_log] , axis= 0 , ignore_index= True )
列表去重
uniq_item = list ( set ( item) )
读写csv文件
raw = pd. read_csv( rawcsv, encoding = "ISO-8859-1" , usecols= [ "server_name" , "protocol" ] ,error_bad_lines= False , low_memory= False )
top. to_csv( "top_stat.csv" , index= False )
根据一定条件为dataframe添加一列
def classify_scsv ( v) :
if "inappropriate fallback" in str ( v) :
return "Success" ;
elif pd. isnull( v) :
return "Failed" ;
else :
return "Other Error"
def classify_scsv_wrapper ( a) :
x = a. assign( errors_classified= a. connerror. apply ( lambda x: classify_scsv( x) ) )
return x
scsvclassified = classify_scsv_wrapper( scsv_fhst)
def connclass ( x) :
x = str ( x)
if "fallback" in x:
return True
else :
return False
test[ 'apply' ] = test. connerror. apply ( lambda x: connclass( x) )
top_table[ "have_sct" ] = top_table. apply ( lambda x: True if ( x. tlshave== True ) | ( x. certhave== True ) | ( x. ocsphave== True ) else False , axis= 1 )
垃圾回收
del ct_raw
import gc
gc. collect( )
dataframe行数
len ( test)
test. ip. count( )
选取某个值是否为空的行
ct_conntrue = ct_raw[ ct_raw[ 'connerror' ] . isnull( ) ]
ct_conntrue = ct_raw[ ct_raw[ 'connerror' ] . . notnull( ) ]
将某些空值填为false
top_new[ "http-header--sts" ] = top_new[ "http-header--sts" ] . isna( ) . map ( { True : "False" , False : "True" } )
applymap对dataframe每一个数据进行操作
df1. applymap( lambda x : 1 if x> 0 else 0 )
遍历series
for item, value in test. items( ) :
遍历dataframe
for item in error_null. iterrows( ) :
if item[ 1 ] [ 'ciphersuite' ] not in scsvCiphers:
print ( item[ 1 ] [ 'domain' ] , item[ 1 ] [ 'ciphersuite' ] )
flag = 0
count_othercipher += 1
根据某列匹配dataframe信息,并去重
top_raw = raw_data[ raw_data[ "server_name" ] . isin( topdomain_raw[ "domain" ] . unique( ) ) ]
top = top_raw. drop_duplicates( subset= [ 'server_name' ] , keep= 'first' )
top. index = range ( 0 , len ( top) )
top. head( 3 )
去掉某列
top_new. drop( 'connerror' , axis= 1 , inplace= True )
top_new. drop( [ 'tlshave' , 'certhave' , 'ocsphave' ] , axis= 1 , inplace= True )
重命名
top_new = top_new. rename( columns= { 'server_name' : 'domain' } )