import pandas as pd
# df = pd.read_csv('train.csv',encoding= 'utf-8')
# print(type(df))
# df.columns = ['a','b','c','d','e','f']
# data = df[df['e'] == 'Andhra Pradesh']
# data = df.loc[1:4,['a','c']] #前面是行,后面是列的筛选
# data = df.loc[2:13,["Month","Zuerich"]] #会多了那个序列在前面,不理解
# data = (df['Month'] < '1750-01') & (df['Month'] > '1749-01') #这种是不可筛选时间的,是用来帅选比如质量大于0之类
'''
# 将 date 列转换成 datetime 类型
df['Month'] = pd.to_datetime(df['Month'])
# 筛选条件为日期小于 1749-12
criteria = df['Month'] < pd.Timestamp(1749,12,1)
df[criteria].head()
# print(df[criteria].head())
criteria.to_csv('newtrain.csv')
'''
# 读取文件
df = pd.read_csv('train.csv')
# 获取九月份数据的几种方法
# 方法一 使用行索引切片,['2019/9/1':'2019/9/30'],缺点是要求日期必须是连续的。为了方便查看取前5条,以下其他方法均取前5条,由于未进行排序,顺序会有差异
df.set_index('Month', inplace=True)
print(df['1749-01':'1749-12'].head()) # 或者print(df.loc['2019/9/1':'2019/9/30',:])
a=df['1749-01':'1749-12']
a.to_csv('newtrain.csv')
参考文章:https://www.jb51.net/article/206016.htm