from pandas import read_csv
df = read_csv('D://PA//4.1//1.csv')
df
df = read_csv('D://PA//4.1//1.csv', encoding='UTF-8')
这里要注意,如果文件中有中文,一定要转换成 UTF-8的编码格式
from pandas import read_table
df = read_table('D://PA//4.1//2.txt')
df
df = read_table('D://PA//4.1//2.txt', names=['age', 'name'], sep=',')
df
from pandas import read_excel
df = read_excel('D://PA//4.1//3.xlsx', sheetname='data')
from pandas import DataFrame
df = DataFrame({
'age': [21, 22, 23],
'name': ['KEN', 'John', 'JIMI']
})
df
df.to_csv("D:\\4.1\\df.csv")
df.to_csv("D:\\PA\\4.2\\df.csv", index=False)
dropna
from pandas import read_csv
df = read_csv('D://PA//4.3//data.csv')
newDF = df.drop_duplicates()
from pandas import read_csv
df = read_csv('D://PA//4.4//data.csv')
newDF = df.dropna()
from pandas import read_csv
df = read_csv('D://PA//4.5//data.csv')
newName = df['name'].str.strip()
df['name'] = newName
from pandas import read_csv
df = read_csv("D://PA//4.6//data.csv")
df['tel'] = df['tel'].astype(str) #先要把数字转换成字符串格式
#运营商
bands = df['tel'].str.slice(0, 3)
#地区
areas = df['tel'].str.slice(3, 7)
#号码段
nums = df['tel'].str.slice(7, 11)
n从0开始,表示不切割,1表示切割为两列。
from pandas import Series
from pandas import DataFrame
from pandas import read_csv
df = read_csv("D:\\Python\\3.2\\2.csv")
newDF = df['name'].str.split(' ', 1, True)
newDF.columns = ['band', 'name']
import pandas
from pandas import read_csv
df = read_csv("D://4.1//data5.csv", sep="|")
df[df.comments>10000]
# id ... title
#0 1197453 华为(HUAWEI)荣耀平板 Wifi版 8英寸平板电脑(高通骁龙四核 1280×800 1...
#6 996957 Apple iPad Air MD785CH 9.7英寸平板电脑 (16G WiFi版)深空灰色
#df[df.comments.between(1000, 10000)] #包含1000,10000
# id ... title
#1 1192330 ... 小米(MI)7.9英寸平板 WIFI 64GB(NVIDIA Tegra K1 2.2GHz...
#2 1225995 ... 小米(MI)7.9英寸平板 WIFI 16GB(NVIDIA Tegra K1 2.2GHz...
#3 1308557 ... 华为(HUAWEI)荣耀平板优享版 Wifi 8英寸平板电脑(高通骁龙 64位四核
#5 1197789 ... NaN
#7 1150612 ... 台电(Teclast) P98 3G八核 9.7英寸平板电脑(MTK8392 Air视网膜屏...
#8 1285329 ... 台电(Teclast)X98 Air 3G 双系统版 9.7英寸平板电脑(正版Win8+安卓...
df[pandas.isnull(df.title)]
df[df.title.str.contains('台电', na=False)]
df[(df.comments>=1000) & (df.comments<=10000)]
na是空值的意思
import numpy
from pandas import read_csv
df = read_csv("D://4.1//data6.csv")
r = numpy.random.randint(0, 10, 3)
df.loc[r, :]
import pandas
from pandas import read_csv
df1 = read_csv("D://4.1//data1.1.csv", sep="|")
df2 = read_csv("D://4.1//data2.2.csv", sep="|")
df3 = read_csv("D://4.1//data3.3.csv", sep="|")
df = pandas.concat([df1, df2, df3])
from pandas import read_csv
df = read_csv("D://4.1//data7.csv", sep=" ", names=['band', 'area', 'num'])
df = df.astype(str)
tel = df['band'] + df['area'] + df['num']
import pandas
from pandas import read_csv
items = read_csv(
"D://4.1//data8.1.csv",
sep='|',
names=['id', 'comments', 'title']
)
prices = read_csv( "D://4.1//data8.2.csv",sep='|', names=['id', 'oldPrice', 'nowPrice'])
itemPrices = pandas.merge(items, prices, left_on='id', right_on='id')
from pandas import read_csv
df = read_csv("D:\\4.1\\data9.csv", sep="|")
result = df.price*df.num