python统计字符出现次数注释_利用python进行数据分析-pandas中所出现的函数及其注释...

最新推荐文章于 2022-06-13 15:14:08 发布

weixin_39807541

最新推荐文章于 2022-06-13 15:14:08 发布

阅读量262

点赞数

文章标签： python统计字符出现次数注释

利用python进行数据分析-pandas中所出现的函数及其注释

import pandas as pd

import numpy as np

from pandas import Series, DataFrame

"""

Series的应用

"""

obj = pd.Series([4, 7, -5, 3]) # 带有索引的值，从0开始索引

print(obj)

# 0 4

# 1 7

# 2 -5

# 3 3

print(obj.values) # 返回值[ 4 7 -5 3]

print(obj.index) # 返回索引RangeIndex(start=0, stop=4, step=1)

obj = pd.Series([-1, 2, 3, 4], index=['a', 'b', 'c', 'd']) # 用abcd作为索引

print(obj)

obj['a'] = 10 # 可以查找指定索引并进行修改

print(obj['a'])

print(obj[['a', 'c']]) # 可以查找输出多个索引值

print(obj[obj > 0]) # 只保留数据中大于0的数据

print('a' in obj) # True

print('e' in obj) # False

sdata = {'Ohio': 35000, 'Texas': 71000, 'Oregon': 16000, 'Utah': 5000} # 通过字典创建数组

obj3 = pd.Series(sdata)

states = ['California', 'Ohio', 'Oregon', 'Texas']

obj4 = pd.Series(sdata, index=states) # 只显示statas索引存在的数据，因为California没在sdata中，所以显示为NaN not a number

print(obj4) # 结果如下

# California NaN

# Ohio 35000.0

# Oregon 16000.0

# Texas 71000.0

# dtype: float64

print(pd.isnull(obj4)) # Nan为True，其他为False

print(obj4.isnull())

print(pd.notnull(obj4)) # Nan为False。其他为True

print(obj3 + obj4) # 相当于数据库中的join，两个都有的值相加，只有一个有的，取值为NaN

# California NaN

# Ohio 70000.0

# Oregon 32000.0

# Texas 142000.0

# Utah NaN

# dtype: float64

obj4.name = 'population' # 设置Series的名字和索引名字

obj4.index.name = 'state'

"""

DataFrame

A DataFrame represents a rectangular table of data and contains an ordered collec‐

tion of columns, each of which can be a different value type (numeric, string,

boolean, etc.).

The DataFrame has both a row and column index

"""

data = {'state': ['Ohio', 'Ohio', 'Ohio', 'Nevada', 'Nevada', 'Nevada'],

'year': [2000, 2001, 2002, 2001, 2002, 2003],

'pop': [1.5, 1.7, 3.6, 2.4, 2.9, 3.2]}

frame = pd.DataFrame(data) # 用dict创建dataFrame

print(frame) # 下面为输出数据

# state year pop

# 0 Ohio 2000 1.5

# 1 Ohio 2001 1.7

# 2 Ohio 2002 3.6

# 3 Nevada 2001 2.4

# 4 Nevada 2002 2.9

# 5 Nevada 2003 3.2

print(frame.head()) # 这个函数只关心前五行的数据

pd.DataFrame(data, columns=['year', 'state', 'pop']) # 列按指定的书序输出

# If you pass a column that isn’t contained in the dict, it will appear with missing values in the result

print(frame.year) # 这两个都是像Series一样输出列，前面有索引的

print(frame['year']) #

print(frame.loc[0]) # 访问第0行的数据

# 对于不存在的列明，该列会用NaN代替

frame2 = pd.DataFrame(data, columns=['year', 'state', 'pop', 'debt'],

index=['one', 'two', 'three', 'four', 'five', 'six'])

frame2['debt'] = 6 # 该列所有的值从NaN变为6

frame2['debt'] = np.arange(6.) # debt列所有值变为0-5

print(frame2)

val = pd.Series([-1.2, -1.5, -1.7], index=['two', 'four', 'five'])

frame2['debt'] = val # 找到的就把值放到debt'对应的索引位置，找不到的用NaN替代

del frame2['debt'] # 把eastern列删除

# 嵌套字典的字典

# If the nested dict is passed to the DataFrame, pandas will interpret the outer dict keys

# as the columns and the inner keys as the row indices

pop = {'Nevada': {2001: 2.4, 2002: 2.9},

'Ohio': {2000: 1.5, 2001: 1.7, 2002: 3.6}}

frame3 = pd.DataFrame(pop)

print(frame3)

# Nevada Ohio

# 2001 2.4 1.7

# 2002 2.9 3.6

# 2000 NaN 1.5

print(frame3.T) # 转置

print(pd.DataFrame(pop, index=[2001, 2002, 2003]))

# Nevada Ohio

# 2001 2.4 1.7

# 2002 2.9 3.6

# 2003 NaN NaN

pdata = {'Ohio': frame3['Ohio'][:-1],

'Nevada': frame3['Nevada'][:2]}

print(pd.DataFrame(pdata))

# Ohio Nevada

# 2001 1.7 2.4

# 2002 3.6 2.9

frame3.index.name = 'year'

frame3.columns.name = 'state'

print(frame3) # 索引名和列明是什么意思，通过这个图形可以表现出来，就是平时绘制的图形中的行和列代表的意思

# state Nevada Ohio

# year

# 2001 2.4 1.7

# 2002 2.9 3.6

# 2000 NaN 1.5

print(frame3.values) # 返回值得二维数组，(3,2)的数组

# Some Index methods and properties

# append Concatenate with additional Index objects, producing a new Index

# difference Compute set difference as an Index

# intersection Compute set intersection

# union Compute set union

# isin Compute boolean array indicating whether each value is contained in the passed collection

# delete Compute new Index with element at index i deleted

# drop Compute new Index by deleting passed values

# insert Compute new Index by inserting element at index i

# is_monotonic Returns True if each element is greater than or equal to the previous element

# is_unique Returns True if the Index has no duplicate values

# unique Compute the array of unique values in the Index

"""

重建索引

"""

obj = pd.Series([4.5, 7.2, -5.3, 3.6], index=['d', 'b', 'a', 'c'])

obj2 = obj.reindex(['a', 'b', 'c', 'd', 'e']) # 用该函数重建索引，确实的e用NaN代替

obj3 = pd.Series(['blue', 'purple', 'yellow'], index=[0, 2, 4])

obj3.reindex(range(6), method='ffill') # ffill表示 forward-fills the values，也就是补上1,3,5，他们的值和前一项相同

frame = pd.DataFrame(np.arange(9).reshape((3, 3)),

index=['a', 'c', 'd'],

columns=['Ohio', 'Texas', 'California'])

frame2 = frame.reindex(['a', 'b', 'c', 'd']) # 默认重新索引index

states = ['Texas', 'Utah', 'California']

frame.reindex(columns=states) # 列重新索引

# 重建函数的参数

# index New sequence to use as index. Can be Index instance or any other sequence-like Python data structure. An

# Index will be used exactly as is without any copying.

# method Interpolation (fill) method; 'ffill' fills forward, while 'bfill' fills backward.

# fill_value Substitute value to use when introducing missing data by reindexing.

# limit When forward- or backfilling, maximum size gap (in number of elements) to fill.

# tolerance When forward- or backfilling,

# maximum size gap (in absolute numeric distance) to fill for inexact matches.

# level Match simple Index on level of MultiIndex; otherwise select subset of.

# copy If True, always copy underlying data even if new index is equivalent to old index; if False, do not copy

# the data when the indexes are equivalent

"""

轴向上删除条目

"""

obj = pd.Series(np.arange(5.), index=['a', 'b', 'c', 'd', 'e'])

new_obj = obj.drop('c')

obj.drop(['d', 'c'])

data = pd.DataFrame(np.arange(16).reshape((4, 4)),

index=['Ohio', 'Colorado', 'Utah', 'New York'],

columns=['one', 'two', 'three', 'four'])

data.drop(['Colorado', 'Ohio']) # 删除这两行对应的数据，data本身没变，返回删除后的值

data.drop('two', axis=1) # 删除列的数据，可传递axis='columns'

data.drop(['two', 'four'], axis='columns')

obj.drop('c', inplace=True) # 对obj本身直接进行修改

"""

索引选择和过滤

"""

obj = pd.Series(np.arange(4.), index=['a', 'b', 'c', 'd'])

print(obj['b']) # 这些索引都是可以的

print(obj[1])

print(obj[2:4])

print(obj[['b', 'a', 'd']])

print(obj[[1, 3]])

print(obj[obj < 2])

print(obj['b':'c']) # 这种索引与上面的数字的索引是不同的，这种包含点‘c’

obj['b':'c'] = 5 # 这也包含'c'

data = pd.DataFrame(np.arange(16).reshape((4, 4)),

index=['Ohio', 'Colorado', 'Utah', 'New York'],

columns=['one', 'two', 'three', 'four'])

print(data['two']) # 获取一列

print(data[['three', 'one']])

print(data[:2]) # 选两行

print(data[data['three'] > 5]) # 应为Three列的0123,中只有123>5位True，所以，这里索引的是123行

# one two three four

# Colorado 4 5 6 7

# Utah 8 9 10 11

# New York 12 13 14 15

print(data < 5) # 残生相应的Boolean值matrix

data[data < 5] = 0

"""

Selection with loc and iloc

They enable you to select a subset of the rows and columns from a

DataFrame with NumPy-like notation using either axis labels (loc) or integers(iloc).

"""

# one two three four

# Ohio 0 0 0 0

# Colorado 0 5 6 7

# Utah 8 9 10 11

# New York 12 13 14 15

# 上面为data的数据

print(data.loc['Colorado', ['two', 'three']]) # 前面行，后面列没啥好说的

print(data.iloc[2, [3, 0, 1]]) # 11 8 9没啥好说的

print(data.iloc[2])

print(data.iloc[[1, 2], [3, 0, 1]])

print(data)

print(data.loc[:'Utah', 'two'])

print(data.iloc[:, :3][data.three > 5]) # 先截取012列，然后选择2列>5的所有切片输出

"""

Arithmetic and Data Alignment

When you are adding together objects, if any index pairs are not the same,

the respective index in the result will be the union of the index pairs

"""

s1 = pd.Series([7.3, -2.5, 3.4, 1.5], index=['a', 'c', 'd', 'e'])

s2 = pd.Series([-2.1, 3.6, -1.5, 4, 3.1],

index=['a', 'c', 'e', 'f', 'g'])

print(s1 + s2) # 相同index的相加，不相同index把索引列出来，其值为NaN

df1 = pd.DataFrame(np.arange(9.).reshape((3, 3)), columns=list('bcd'),

index=['Ohio', 'Texas', 'Colorado'])

df2 = pd.DataFrame(np.arange(12.).reshape((4, 3)), columns=list('bde'),

index=['Utah', 'Ohio', 'Texas', 'Oregon'])

print(df1 + df2) # 行和列都和上面的相加一样要对齐，不重叠出现的都用NaN代替

"""

Arithmetic methods with fill values

In arithmetic operations between differently indexed objects, you might want to fill

with a special value, like 0, when an axis label is found in one object but not the other

# add, radd Methods for addition (+)

# sub, rsub Methods for subtraction (-)

# div, rdiv Methods for division (/)

# floordiv, rfloordiv Methods for floor division (//)

# mul, rmul Methods for multiplication (*)

# pow, rpow Methods for exponentiation (**)

"""

df1 = pd.DataFrame(np.arange(12.).reshape((3, 4)),

columns=list('abcd')) # 列明为a b c d

df2 = pd.DataFrame(np.arange(20.).reshape((4, 5)),

columns=list('abcde'))

df2.loc[1, 'b'] = np.nan

print(df1 + df2) # 因为df2的[1, 'b']设置为NaN，这不是一个数，无法与数字相加，所以，结果中这儿也是NaN

# a b c d e

# 0 0.0 2.0 4.0 6.0 NaN

# 1 9.0 NaN 13.0 15.0 NaN

# 2 18.0 20.0 22.0 24.0 NaN

# 3 NaN NaN NaN NaN NaN

print(df1.add(df2, fill_value=0)) # 此时结果首先对不重复的行和列在df1中扩充，用0填充，其结果如下

# a b c d e

# 0 0.0 2.0 4.0 6.0 4.0

# 1 9.0 5.0 13.0 15.0 9.0

# 2 18.0 20.0 22.0 24.0 14.0

# 3 15.0 16.0 17.0 18.0 19.0

# 这两个结果一对照，就很清晰了

print(1 / df1)

print(df1.rdiv(1)) # 这两个是相等的，其中r是翻转的意思

"""

Operations between DataFrame and Series

"""

arr = np.arange(12.).reshape((3, 4))

print(arr[0]) # 返回第0行

print(arr - arr[0]) # 返回每一行分别减去第0行的结果

frame = pd.DataFrame(np.arange(12.).reshape((4, 3)),

columns=list('bde'),

index=['Utah', 'Ohio', 'Texas', 'Oregon'])

series = frame.iloc[0]

print(frame - series) # 与上面的arr的减操作一样

series2 = pd.Series(range(3), index=['b', 'e', 'f'])

print(frame + series2) # 如果frame的列和series2的index不匹配，结果会添加不匹配的索引，值为NaN

series3 = frame['d']

frame.sub(series3, axis='index') # frame按行嫁娶series3的信息，axis='index' or axis=0

"""

Function Application and Mapping

The function passed to apply need not return a scalar value; it can also return a Series

with multiple values

"""

frame = pd.DataFrame(np.random.randn(4, 3), columns=list('bde'),

index=['Utah', 'Ohio', 'Texas', 'Oregon']) # 生成随机数

np.abs(frame) # 求绝对值

f = lambda x: x.max() - x.min()

frame.apply(f) # 每一列都变成最大值-最小值的结果，这样输出每一列只有一个数据

frame.apply(f, axis='columns') # 对每一行进行操作

def f(x):

return pd.Series([x.min(), x.max()], index=['min', 'max'])

print(frame.apply(f))

# b d e

# min -0.969312 -0.262939 -3.066322

# max 2.521345 1.272400 1.924451

format = lambda x: '%.2f' % x # 对逐个元素进行分析

frame.applymap(format) # applymap是逐个元素操作，这样把frame中的元素从float型变为了string型

frame['e'].map(format)

"""

Sorting and Ranking

Tie-breaking methods with rank

'average' Default: assign the average rank to each entry in the equal group

'min' Use the minimum rank for the whole group

'max' Use the maximum rank for the whole group

'first' Assign ranks in the order the values appear in the data

'dense' Like method='min', but ranks always increase by 1 in between groups rather than the number of equal

elements in a group

"""

obj = pd.Series(range(4), index=['d', 'a', 'b', 'c'])

print(obj)

# d 0

# a 1

# b 2

# c 3

# dtype: int64

print(obj.sort_index()) # 按字典顺序排序，返回结果如下

# a 1

# b 2

# c 3

# d 0

# dtype: int64

# 这一开始没明白，因为是按索引进行排序，索引是dabc，排序成abcd，所以

# 下面是按axis进行排序，默认升序

frame = pd.DataFrame(np.arange(8).reshape((2, 4)),

index=['three', 'one'],

columns=['d', 'a', 'b', 'c'])

print(frame)

# d a b c

# three 0 1 2 3

# one 4 5 6 7

print(frame.sort_index()) # 按行排序，因为是one是o开头在three的t开头前面，所以按index排序，one和three行交换

# d a b c

# one 4 5 6 7

# three 0 1 2 3

print(frame.sort_index(axis=1)) # 同理按列排序，把列从dabc变为abcd

# a b c d

# three 1 2 3 0

# one 5 6 7 4

print(frame.sort_index(axis=1, ascending=False)) # 按降序排列

# d c b a

# three 0 3 2 1

# one 4 7 6 5

obj = pd.Series([4, 7, -3, 2])

print(obj)

# 0 4

# 1 7

# 2 -3

# 3 2

# dtype: int64

print(obj.sort_values()) # 按值进行排序

# 2 -3

# 3 2

# 0 4

# 1 7

# dtype: int64

obj = pd.Series([4, np.nan, 7, np.nan, -3, 2])

print(obj.sort_values()) # 缺失的值默认排到最后

# 4 -3.0

# 5 2.0

# 0 4.0

# 2 7.0

# 1 NaN

# 3 NaN

# dtype: float64

frame = pd.DataFrame({'b': [4, 7, -3, 2], 'a': [0, 1, 0, 1]})

print(frame)

# b a

# 0 4 0

# 1 7 1

# 2 -3 0

# 3 2 1

print(frame.sort_values(by='b')) # 对列b进行排序

# b a

# 2 -3 0

# 3 2 1

# 0 4 0

# 1 7 1

print(frame.sort_values(by=['a', 'b'])) # 对多列进行排序,线对a排序，在对相同a的b排序

# b a

# 2 -3 0

# 0 4 0

# 3 2 1

# 1 7 1

obj = pd.Series([7, -5, 7, 4, 2, 0, 4])

# 等于线排序 -5 0 2 4 4 7 7

# 然后给排序数据安排等级，从1开始 1 2 3 4 5 6 7

# 对于相等的数，等级相加求平均值，所以 7的等级(6+7)/2 = 6.5

# -5的等级为1，然后下面的rank函数就能看懂了

print(obj.rank())

# 0 6.5

# 1 1.0

# 2 6.5

# 3 4.5

# 4 3.0

# 5 2.0

# 6 4.5

# dtype: float64

# Ranks can also be assigned according to the order in which they’re observed in the data:

# 等于线排序 -5 0 2 4 4 7 7

# 然后给排序数据安排等级，从1开始 1 2 3 4 5 6 7

# 直接用给定的等级写出obj的等级，不求平均值，等级是多少就是多少，如下，第一个数7的等级是6

# 第二个数-5的等级是1，类推得到

print(obj.rank(method='first'))

# 0 6.0

# 1 1.0

# 2 7.0

# 3 4.0

# 4 3.0

# 5 2.0

# 6 5.0

# dtype: float64

print(obj)

# 0 7

# 1 -5

# 2 7

# 3 4

# 4 2

# 5 0

# 6 4

# dtype: int64

# 等于线排序 7 7 4 4 2 0 -5

# 然后给排序数据安排等级，从1开始 1 2 3 4 5 6 7

print(obj.rank(ascending=False, method='max')) # 根据降序进行等级排序,然后根据等级的最大值安排等级

# 因为7的等级为12，最大值为2，所以7的等级为2，-5的等级为7，其他类推

# 0 2.0

# 1 7.0

# 2 2.0

# 3 4.0

# 4 5.0

# 5 6.0

# 6 4.0

# dtype: float64

frame = pd.DataFrame({'b': [4.3, 7, -3, 2], 'a': [0, 1, 0, 1],

'c': [-2, 5, 8, -2.5]})

print(frame)

# b a c

# 0 4.3 0 -2.0

# 1 7.0 1 5.0

# 2 -3.0 0 8.0

# 3 2.0 1 -2.5

print(frame.rank(axis='columns')) # 可根据行或者列队dataframe安排等级，这时候看输出结果，每一行都是123的组合

# 先对columns='b’的数据进行排名，'

# 然后对columns='a’排名，

# 最后对column='c’进行排名

# 排名规则与上面的Series规则一致

# b a c

# 0 3.0 2.0 1.0

# 1 3.0 1.0 2.0

# 2 1.0 2.0 3.0

# 3 3.0 2.0 1.0

"""

Axis Indexes with Duplicate Labels

"""

obj = pd.Series(range(5), index=['a', 'a', 'b', 'b', 'c'])

print(obj.index.is_unique) # 返回索引是否为一，如果为一为True，否则为False

print(obj['a']) # 会返回所有索引为a的值

df = pd.DataFrame(np.random.randn(4, 3), index=['a', 'a', 'b', 'b'])

print(df.loc['b']) # 同样会返回所有索引为b的数据

"""

Summarizing and Computing Descriptive Statistics

axis Axis to reduce over; 0 for DataFrame’s rows and 1 for columns

skipna Exclude missing values; True by default

level Reduce grouped by level if the axis is hierarchically indexed (MultiIndex)

count Number of non-NA values

describe Compute set of summary statistics for Series or each DataFrame column

min, max Compute minimum and maximum values

argmin, argmax Compute index locations (integers) at which minimum or maximum value obtained, respectively

idxmin, idxmax Compute index labels at which minimum or maximum value obtained, respectively

quantile Compute sample quantile ranging from 0 to 1

sum Sum of values

mean Mean of values

median Arithmetic median (50% quantile) of values

mad Mean absolute deviation from mean value

prod Product of all values

var Sample variance of values

std Sample standard deviation of values

skew Sample skewness (third moment) of values

kurt Sample kurtosis (fourth moment) of values

cumsum Cumulative sum of values

cummin, cummax Cumulative minimum or maximum of values, respectively

cumprod Cumulative product of values

diff Compute first arithmetic difference (useful for time series)

pct_change Compute percent changes

"""

df = pd.DataFrame([[1.4, np.nan], [7.1, -4.5],

[np.nan, np.nan], [0.75, -1.3]],

index=['a', 'b', 'c', 'd'],

columns=['one', 'two'])

print(df)

# one two

# a 1.40 NaN

# b 7.10 -4.5

# c NaN NaN

# d 0.75 -1.3

print(df.sum()) # 加出来只有one和two两列

# one 9.25

# two -5.80

# dtype: float64

print(df.sum(axis='columns')) # 或axis=1加出来是abcd四行

# a 1.40

# b 2.60

# c 0.00

# d -0.55

# dtype: float64

print(df.mean(axis='columns', skipna=False)) # 某人看到NA是跳过的，除非指名不跳过

# a NaN

# b 1.300

# c NaN

# d -0.275

# dtype: float64

print(df.idxmax()) # 最大索引值

# one b

# two d

# dtype: object

print(df.cumsum()) # 把第一行的值加第二行成为第二行，碰到NaN不处理，第二行的新值加到第三行，类推

# one two

# a 1.40 NaN

# b 8.50 -4.5

# c NaN NaN

# d 9.25 -5.8

print(df.describe()) # 一次性产生多个汇总方法

# one two

# count 3.000000 2.000000

# mean 3.083333 -2.900000

# std 3.493685 2.262742

# min 0.750000 -4.500000

# 25% 1.075000 -3.700000

# 50% 1.400000 -2.900000

# 75% 4.250000 -2.100000

# max 7.100000 -1.300000

obj = pd.Series(['a', 'a', 'b', 'c'] * 4)

print(obj) # aabc这样连续出现四次，索引从0-15

print(obj.describe()) # 对于非数字类型，describe产生不同的数据信息

# count 16

# unique 3

# top a

# freq 8

# dtype: object

"""

Unique Values, Value Counts, and Membership

isin Compute boolean array indicating whether each Series value is contained in the passed sequence of

values

match Compute integer indices for each value in an array into another array of distinct values; helpful for data

alignment and join-type operations

unique Compute array of unique values in a Series, returned in the order observed

value_counts Return a Series containing unique values as its index and frequencies as its values, ordered count in

descending order

"""

obj = pd.Series(['c', 'a', 'd', 'a', 'a', 'b', 'b', 'c', 'c'])

uniques = obj.unique() # 返回唯一值

print(uniques)

# ['c' 'a' 'd' 'b']

print(obj.value_counts()) # 统计各个数据出现的次数，结果按计数的大小降序排列

print(pd.value_counts(obj.values, sort=False))

mask = obj.isin(['b', 'c']) # 返回一个集合，如果是bc，则为True，其他为False

print(obj[mask]) #

# 0 c

# 5 b

# 6 b

# 7 c

# 8 c

# dtype: object

to_match = pd.Series(['c', 'a', 'b', 'b', 'c', 'a'])

unique_vals = pd.Series(['c', 'b', 'a'])

print(pd.Index(unique_vals).get_indexer(to_match)) # 返回unique的索引，然后用这个索引，索引to_match

# [0 2 1 1 0 2]

data = pd.DataFrame({'Qu1': [1, 3, 4, 3, 4],

'Qu2': [2, 3, 1, 2, 3],

'Qu3': [1, 5, 2, 4, 4]})

print(data)

# Qu1 Qu2 Qu3

# 0 1 2 1

# 1 3 3 5

# 2 4 1 2

# 3 3 2 4

# 4 4 3 4

result = data.apply(pd.value_counts).fillna(0) # 这里，结果中的行标签是所有列的唯一值。后面的频率值是每个列中这些值的相应计数。

print(result)

# Qu1 Qu2 Qu3

# 1 1.0 1.0 1.0

# 2 0.0 2.0 1.0

# 3 2.0 2.0 0.0

# 4 2.0 0.0 2.0

# 5 0.0 0.0 1.0

本文为我原创

本文禁止转载或摘编

分享到：

投诉或建议

weixin_39807541

关注

0
点赞
踩
0

收藏

觉得还不错? 一键收藏
0
评论
python统计字符出现次数注释_利用python进行数据分析-pandas中所出现的函数及其注释...

利用python进行数据分析-pandas中所出现的函数及其注释import pandas as pdimport numpy as npfrom pandas import Series, DataFrame"""Series的应用"""obj = pd.Series([4, 7, -5, 3]) # 带有索引的值，从0开始索引print(obj)# 0 4# 1 7# 2 -...
复制链接

扫一扫