正则表达式
匹配多位数字
re.findall(r'\d{n}', str)
# 返回 n 位数字
匹配小数
re.findall(r'\d.+', str)
# 返回小数
list
判断列表是否为空
list == []
len(list) = 0 # ==> []
len(list) > 1 # ==> 非空
a x i s = { 0 , 列 1 , 行 axis= \begin{cases} 0, & \text{列}\\ 1,& \text{行} \end{cases} axis={0,1,列行
从后向前取n个值
arr[-n:]
获取 value 最大元素的 index
aa = [...]
aa.index(max(aa))
迭代函数
enumerate(可迭代对象)
# 生成 index 和 value
pandas
合并
pd.concat([data, pd.DataFrame(columns = blank)], axis = 0) # 增加空白columns
删除指定column为空的行
data.dropna(subset = ['price'])
删除某一列是重复的
df.drop_duplicates(subset = 'label')
判断元素是否为空
pd.isna()
pd.isnull
pd.notnull()
将某一列为空的值取出来
df[pd.isnull(df)]
replace 替换多个
df.replace({'A':100, 'B':50})
排序
df.sort_values(by = ['a'])
删除空白列
df.dropna(axis = 1, how = 'all')
# axis = 0 按行(default)
# axis = 1 按列
# any : 只要有 1 个
# all : 全部都为空才 drop
取最大/最小值的 index
df.idxmax() # 最大值的索引
df.idxmin() # 最小值的索引
使用plotly画 dataframe
import plotly.express as px
px.scatter(data, x = '', y = '') # x 和 y 是 data 中的 columns
数据类型转换
判断数据类型
isinstance(a, str) # 判断 a 是否为 str型
字符串 ==> 数值
int(str)
ndarry 转 list
aa = np.array([])
bb = aa.tolist()
列标转 Excel 列标字母(调用 xlsxwriter)
xlsxwriter.utility.xl_col_to_name(index)
- xlsxwriter 源码方法:
def xl_col_to_name(col, col_abs=False):
"""
Convert a zero indexed column cell reference to a string.
Args:
col: The cell column. Int.
col_abs: Optional flag to make the column absolute. Bool.
Returns:
Column style string.
"""
col_num = col
if col_num < 0:
warn("Col number %d must be >= 0" % col_num)
return None
col_num += 1 # Change to 1-index.
col_str = ''
col_abs = '$' if col_abs else ''
while col_num:
# Set remainder from 1 .. 26
remainder = col_num % 26
if remainder == 0:
remainder = 26
# Convert the remainder to a character.
col_letter = chr(ord('A') + remainder - 1)
# Accumulate the column letters, right to left.
col_str = col_letter + col_str
# Get the next order of magnitude.
col_num = int((col_num - 1) / 26)
return col_abs + col_str
- stackoverflow 方法
start_index = 1 # it can start either at 0 or at 1
letter = ''
while column_int > 25 + start_index:
letter += chr(65 + int((column_int-start_index)/26) - 1)
column_int = column_int - (int((column_int-start_index)/26))*26
letter += chr(65 - start_index + (int(column_int)))