自己总结的一点小知识,相互提高吧。
# -*- coding: utf-8 -*-
"""
Created on Thu Dec 19 14:43:52 2019
Description: 关于数据抽取
@author: EricRay
"""
import numpy as np
import pandas as pd
from pandas import read_excel
from pandas import DataFrame
df = read_excel(r'E:\python\hello\files\i_nuc.xls',sheet_name='Sheet4')
print(df.head(),'\n')
#i_buc.xls会在资源中给出
"""
记录抽取:
根据一定的条件,对数据进行抽取
df[condition]
condition表示过滤条件
返回值:DataFrame
condition常用类型:
比较运算:==, < , > , <= , >= , !=
范围运算:between(left,right)
空值运算:pandas.isnull(column),如df[df.title.isnull()]
字符串匹配:str.contains(patten,na = False),如df[df.title.str.contains('column',na=False)]
逻辑运算:&,| ,not(取反)
"""
print("电话为133322252452的学生:\n",df[df.电话==13322252452],'\n')
print("电话>13500000000的学生:\n",df[df.电话>13500000000],'\n')
print("电话号码在13400000000和13900000000之间:\n",df[df.电话.between (13400000000,13999999999)],'\n')
print("IP为空的:\n",df[df.IP.isnull()],'\n')
print("IP包含222:\n",df[df.IP.str.contains('222',na=False)],'\n'