pandas4:读写多种格式文件pickle,csv,excel,json,html,sql...

最新推荐文章于 2024-05-28 09:45:22 发布

科学边界

最新推荐文章于 2024-05-28 09:45:22 发布

阅读量436

点赞数

分类专栏： # pandas

本博客所有文章逐渐搬到个人站www.daodaodao123.com，后续不确定能否同步更新

本文链接：https://blog.csdn.net/luteresa/article/details/107364852

版权

pandas 专栏收录该内容

7 篇文章 0 订阅

订阅专栏

import pandas as pd
scientists = pd.read_csv('./data/scientists.csv')
names = scientists['Name']
print(scientists)
print(names)

                   Name        Born        Died  Age          Occupation
0     Rosaline Franklin  1920-07-25  1958-04-16   37             Chemist
1        William Gosset  1876-06-13  1937-10-16   61        Statistician
2  Florence Nightingale  1820-05-12  1910-08-13   90               Nurse
3           Marie Curie  1867-11-07  1934-07-04   66             Chemist
4         Rachel Carson  1907-05-27  1964-04-14   56           Biologist
5             John Snow  1813-03-15  1858-06-16   45           Physician
6           Alan Turing  1912-06-23  1954-06-07   41  Computer Scientist
7          Johann Gauss  1777-04-30  1855-02-23   77       Mathematician
0       Rosaline Franklin
1          William Gosset
2    Florence Nightingale
3             Marie Curie
4           Rachel Carson
5               John Snow
6             Alan Turing
7            Johann Gauss
Name: Name, dtype: object

1.读写pickle格式

import os
path_name = './output/scientist_name_Series.pickle'
if not os.path.exists(path_name):
    names.to_pickle(path_name)

path_all = './output/scientist_DataFrame.pickle'
if not os.path.exists(path_all):
    scientists.to_pickle(path_all)

#读取pickle文件
scientists_names_frme_pickle = pd.read_pickle(path_name)

print(scientists_names_frme_pickle)

0       Rosaline Franklin
1          William Gosset
2    Florence Nightingale
3             Marie Curie
4           Rachel Carson
5               John Snow
6             Alan Turing
7            Johann Gauss
Name: Name, dtype: object

path_all = './output/scientist_DataFrame.pickle'
if not os.path.exists(path_all):
    scientists.to_pickle(path_all)
scientists_frme_pickle = pd.read_pickle(path_all)
print(scientists_frme_pickle)

                   Name        Born        Died  Age          Occupation
0     Rosaline Franklin  1920-07-25  1958-04-16   37             Chemist
1        William Gosset  1876-06-13  1937-10-16   61        Statistician
2  Florence Nightingale  1820-05-12  1910-08-13   90               Nurse
3           Marie Curie  1867-11-07  1934-07-04   66             Chemist
4         Rachel Carson  1907-05-27  1964-04-14   56           Biologist
5             John Snow  1813-03-15  1858-06-16   45           Physician
6           Alan Turing  1912-06-23  1954-06-07   41  Computer Scientist
7          Johann Gauss  1777-04-30  1855-02-23   77       Mathematician

2.读写CSV格式

2.1 读写csv格式的Serial

import pandas as pd
scientists = pd.read_csv('./data/scientists.csv')
names = scientists['Name']
print(scientists)
print(names)

                   Name        Born        Died  Age          Occupation
0     Rosaline Franklin  1920-07-25  1958-04-16   37             Chemist
1        William Gosset  1876-06-13  1937-10-16   61        Statistician
2  Florence Nightingale  1820-05-12  1910-08-13   90               Nurse
3           Marie Curie  1867-11-07  1934-07-04   66             Chemist
4         Rachel Carson  1907-05-27  1964-04-14   56           Biologist
5             John Snow  1813-03-15  1858-06-16   45           Physician
6           Alan Turing  1912-06-23  1954-06-07   41  Computer Scientist
7          Johann Gauss  1777-04-30  1855-02-23   77       Mathematician
0       Rosaline Franklin
1          William Gosset
2    Florence Nightingale
3             Marie Curie
4           Rachel Carson
5               John Snow
6             Alan Turing
7            Johann Gauss
Name: Name, dtype: object

import os
path_name = './output/scientist_name_Series.CSV'
if not os.path.exists(path_name):
    names.to_csv(path_name)
scientists_name_frme_CSV = pd.read_csv(path_name)
print(scientists_name_frme_CSV)

   Unnamed: 0                  Name
0           0     Rosaline Franklin
1           1        William Gosset
2           2  Florence Nightingale
3           3           Marie Curie
4           4         Rachel Carson
5           5             John Snow
6           6           Alan Turing
7           7          Johann Gauss

path_name = './output/scientist_name_Series1.CSV'
if not os.path.exists(path_name):
    names.to_csv(path_name,sep='*')
scientists_name_frme_CSV = pd.read_csv(path_name)
print(scientists_name_frme_CSV)

                    *Name
0     0*Rosaline Franklin
1        1*William Gosset
2  2*Florence Nightingale
3           3*Marie Curie
4         4*Rachel Carson
5             5*John Snow
6           6*Alan Turing
7          7*Johann Gauss

path_name = './output/scientist_name_Series2.CSV'
if not os.path.exists(path_name):
    names.to_csv(path_name,index=False)
scientists_name_frme_CSV = pd.read_csv(path_name)
print(scientists_name_frme_CSV)

                   Name
0     Rosaline Franklin
1        William Gosset
2  Florence Nightingale
3           Marie Curie
4         Rachel Carson
5             John Snow
6           Alan Turing
7          Johann Gauss

2.2 读写csv格式的DataFrame

path_name = './output/scientist_DataFrame.CSV'
if not os.path.exists(path_name):
    scientists.to_csv(path_name)
scientists_frme_CSV = pd.read_csv(path_name)
print(scientists_frme_CSV)

   Unnamed: 0                  Name        Born        Died  Age  \
0           0     Rosaline Franklin  1920-07-25  1958-04-16   37   
1           1        William Gosset  1876-06-13  1937-10-16   61   
2           2  Florence Nightingale  1820-05-12  1910-08-13   90   
3           3           Marie Curie  1867-11-07  1934-07-04   66   
4           4         Rachel Carson  1907-05-27  1964-04-14   56   
5           5             John Snow  1813-03-15  1858-06-16   45   
6           6           Alan Turing  1912-06-23  1954-06-07   41   
7           7          Johann Gauss  1777-04-30  1855-02-23   77   

           Occupation  
0             Chemist  
1        Statistician  
2               Nurse  
3             Chemist  
4           Biologist  
5           Physician  
6  Computer Scientist  
7       Mathematician

path_name = './output/scientist_DataFrame2.CSV'
if not os.path.exists(path_name):
    scientists.to_csv(path_name,sep='*')
scientists_frme_CSV = pd.read_csv(path_name)
print(scientists_frme_CSV)

                      *Name*Born*Died*Age*Occupation
0  0*Rosaline Franklin*1920-07-25*1958-04-16*37*C...
1  1*William Gosset*1876-06-13*1937-10-16*61*Stat...
2  2*Florence Nightingale*1820-05-12*1910-08-13*9...
3     3*Marie Curie*1867-11-07*1934-07-04*66*Chemist
4  4*Rachel Carson*1907-05-27*1964-04-14*56*Biolo...
5     5*John Snow*1813-03-15*1858-06-16*45*Physician
6  6*Alan Turing*1912-06-23*1954-06-07*41*Compute...
7  7*Johann Gauss*1777-04-30*1855-02-23*77*Mathem...

path_name = './output/scientist_DataFrame3.CSV'
if not os.path.exists(path_name):
    scientists.to_csv(path_name,index=False)
scientists_frme_CSV = pd.read_csv(path_name)
print(scientists_frme_CSV)

                   Name        Born        Died  Age          Occupation
0     Rosaline Franklin  1920-07-25  1958-04-16   37             Chemist
1        William Gosset  1876-06-13  1937-10-16   61        Statistician
2  Florence Nightingale  1820-05-12  1910-08-13   90               Nurse
3           Marie Curie  1867-11-07  1934-07-04   66             Chemist
4         Rachel Carson  1907-05-27  1964-04-14   56           Biologist
5             John Snow  1813-03-15  1858-06-16   45           Physician
6           Alan Turing  1912-06-23  1954-06-07   41  Computer Scientist
7          Johann Gauss  1777-04-30  1855-02-23   77       Mathematician

3读写excel文件

3.1读写excel文件的Serial

import pandas as pd
scientists = pd.read_csv('./data/scientists.csv')
names = scientists['Name']
print(scientists)
#print(names)

                   Name        Born        Died  Age          Occupation
0     Rosaline Franklin  1920-07-25  1958-04-16   37             Chemist
1        William Gosset  1876-06-13  1937-10-16   61        Statistician
2  Florence Nightingale  1820-05-12  1910-08-13   90               Nurse
3           Marie Curie  1867-11-07  1934-07-04   66             Chemist
4         Rachel Carson  1907-05-27  1964-04-14   56           Biologist
5             John Snow  1813-03-15  1858-06-16   45           Physician
6           Alan Turing  1912-06-23  1954-06-07   41  Computer Scientist
7          Johann Gauss  1777-04-30  1855-02-23   77       Mathematician

names.to_excel('./output/scientist_name_series.xls')
names_df = names.to_frame()
names_df.to_excel('./output/scientist_name_df.xls')
scientists.to_excel('./output/scientist_df1.xls',index=False)
scientists.to_excel('./output/scientist_df2.xls',sheet_name='scientist',index=False)

scientists_name_frme_excel= pd.read_excel('./output/scientist_name_series.xls')
print(scientists_name_frme_excel)

   Unnamed: 0                  Name
0           0     Rosaline Franklin
1           1        William Gosset
2           2  Florence Nightingale
3           3           Marie Curie
4           4         Rachel Carson
5           5             John Snow
6           6           Alan Turing
7           7          Johann Gauss

scientists_name_df_frme_excel= pd.read_excel('./output/scientist_name_df.xls')
print(scientists_name_df_frme_excel)

   Unnamed: 0                  Name
0           0     Rosaline Franklin
1           1        William Gosset
2           2  Florence Nightingale
3           3           Marie Curie
4           4         Rachel Carson
5           5             John Snow
6           6           Alan Turing
7           7          Johann Gauss

scientists_frme_excel= pd.read_excel('./output/scientist_df1.xls')
print(scientists_frme_excel)

                   Name        Born        Died  Age          Occupation
0     Rosaline Franklin  1920-07-25  1958-04-16   37             Chemist
1        William Gosset  1876-06-13  1937-10-16   61        Statistician
2  Florence Nightingale  1820-05-12  1910-08-13   90               Nurse
3           Marie Curie  1867-11-07  1934-07-04   66             Chemist
4         Rachel Carson  1907-05-27  1964-04-14   56           Biologist
5             John Snow  1813-03-15  1858-06-16   45           Physician
6           Alan Turing  1912-06-23  1954-06-07   41  Computer Scientist
7          Johann Gauss  1777-04-30  1855-02-23   77       Mathematician

scientists_frme_excel= pd.read_excel('./output/scientist_df2.xls')
print(scientists_frme_excel)

                   Name        Born        Died  Age          Occupation
0     Rosaline Franklin  1920-07-25  1958-04-16   37             Chemist
1        William Gosset  1876-06-13  1937-10-16   61        Statistician
2  Florence Nightingale  1820-05-12  1910-08-13   90               Nurse
3           Marie Curie  1867-11-07  1934-07-04   66             Chemist
4         Rachel Carson  1907-05-27  1964-04-14   56           Biologist
5             John Snow  1813-03-15  1858-06-16   45           Physician
6           Alan Turing  1912-06-23  1954-06-07   41  Computer Scientist
7          Johann Gauss  1777-04-30  1855-02-23   77       Mathematician

3.2读写excel文件的DataFrame

import xlrd

#work book, sheet

data = xlrd.open_workbook('./output/scientist_df2.xls')
sheet = data.sheet_by_name('scientist')
#sheet = data.sheet_by_index(0)

print(sheet.row_values(1))
print(sheet.col_values(1))

['Rosaline Franklin', '1920-07-25', '1958-04-16', 37.0, 'Chemist']
['Born', '1920-07-25', '1876-06-13', '1820-05-12', '1867-11-07', '1907-05-27', '1813-03-15', '1912-06-23', '1777-04-30']

print('行数=',sheet.nrows)
print('列数=',sheet.ncols)
print(sheet)

行数= 9
列数= 5
<xlrd.sheet.Sheet object at 0x7f339c292650>

print(sheet.cell(0,0).value)
print(sheet.cell(2,3).value)

Name
61.0

print(data.sheet_names())

['scientist']

print(sheet.name)
print(sheet.row_values(1))
print(sheet.col_values(1))

scientist
['Rosaline Franklin', '1920-07-25', '1958-04-16', 37.0, 'Chemist']
['Born', '1920-07-25', '1876-06-13', '1820-05-12', '1867-11-07', '1907-05-27', '1813-03-15', '1912-06-23', '1777-04-30']

4.其他格式 json,html,sql

import pandas as pd
scientists = pd.read_csv('./data/scientists.csv')

#scientists.to_clipboard()

#print(scientists.to_dicti())
print(scientists.to_html('./t.html'))
print(scientists.to_json('./t.json'))

None
None

import sqlite3
import sqlalchemy

engine = sqlalchemy.create_engine('sqlite:///my_db.sqlite')

scientists.to_sql('scientists',engine)

科学边界

关注

0
点赞
踩
0

收藏

觉得还不错? 一键收藏
0
评论
pandas4:读写多种格式文件pickle,csv,excel,json,html,sql...

import pandas as pdscientists = pd.read_csv('./data/scientists.csv')names = scientists['Name']print(scientists)print(names) Name Born Died Age Occupation0 Rosaline Franklin 1920-07-25 1958-04-16 37
复制链接

扫一扫

专栏目录