import pandas as pd
scientists = pd. read_csv( './data/scientists.csv' )
names = scientists[ 'Name' ]
print ( scientists)
print ( names)
Name Born Died Age Occupation
0 Rosaline Franklin 1920-07-25 1958-04-16 37 Chemist
1 William Gosset 1876-06-13 1937-10-16 61 Statistician
2 Florence Nightingale 1820-05-12 1910-08-13 90 Nurse
3 Marie Curie 1867-11-07 1934-07-04 66 Chemist
4 Rachel Carson 1907-05-27 1964-04-14 56 Biologist
5 John Snow 1813-03-15 1858-06-16 45 Physician
6 Alan Turing 1912-06-23 1954-06-07 41 Computer Scientist
7 Johann Gauss 1777-04-30 1855-02-23 77 Mathematician
0 Rosaline Franklin
1 William Gosset
2 Florence Nightingale
3 Marie Curie
4 Rachel Carson
5 John Snow
6 Alan Turing
7 Johann Gauss
Name: Name, dtype: object
1.读写pickle格式
import os
path_name = './output/scientist_name_Series.pickle'
if not os. path. exists( path_name) :
names. to_pickle( path_name)
path_all = './output/scientist_DataFrame.pickle'
if not os. path. exists( path_all) :
scientists. to_pickle( path_all)
scientists_names_frme_pickle = pd. read_pickle( path_name)
print ( scientists_names_frme_pickle)
0 Rosaline Franklin
1 William Gosset
2 Florence Nightingale
3 Marie Curie
4 Rachel Carson
5 John Snow
6 Alan Turing
7 Johann Gauss
Name: Name, dtype: object
path_all = './output/scientist_DataFrame.pickle'
if not os. path. exists( path_all) :
scientists. to_pickle( path_all)
scientists_frme_pickle = pd. read_pickle( path_all)
print ( scientists_frme_pickle)
Name Born Died Age Occupation
0 Rosaline Franklin 1920-07-25 1958-04-16 37 Chemist
1 William Gosset 1876-06-13 1937-10-16 61 Statistician
2 Florence Nightingale 1820-05-12 1910-08-13 90 Nurse
3 Marie Curie 1867-11-07 1934-07-04 66 Chemist
4 Rachel Carson 1907-05-27 1964-04-14 56 Biologist
5 John Snow 1813-03-15 1858-06-16 45 Physician
6 Alan Turing 1912-06-23 1954-06-07 41 Computer Scientist
7 Johann Gauss 1777-04-30 1855-02-23 77 Mathematician
2.读写CSV格式
2.1 读写csv格式的Serial
import pandas as pd
scientists = pd. read_csv( './data/scientists.csv' )
names = scientists[ 'Name' ]
print ( scientists)
print ( names)
Name Born Died Age Occupation
0 Rosaline Franklin 1920-07-25 1958-04-16 37 Chemist
1 William Gosset 1876-06-13 1937-10-16 61 Statistician
2 Florence Nightingale 1820-05-12 1910-08-13 90 Nurse
3 Marie Curie 1867-11-07 1934-07-04 66 Chemist
4 Rachel Carson 1907-05-27 1964-04-14 56 Biologist
5 John Snow 1813-03-15 1858-06-16 45 Physician
6 Alan Turing 1912-06-23 1954-06-07 41 Computer Scientist
7 Johann Gauss 1777-04-30 1855-02-23 77 Mathematician
0 Rosaline Franklin
1 William Gosset
2 Florence Nightingale
3 Marie Curie
4 Rachel Carson
5 John Snow
6 Alan Turing
7 Johann Gauss
Name: Name, dtype: object
import os
path_name = './output/scientist_name_Series.CSV'
if not os. path. exists( path_name) :
names. to_csv( path_name)
scientists_name_frme_CSV = pd. read_csv( path_name)
print ( scientists_name_frme_CSV)
Unnamed: 0 Name
0 0 Rosaline Franklin
1 1 William Gosset
2 2 Florence Nightingale
3 3 Marie Curie
4 4 Rachel Carson
5 5 John Snow
6 6 Alan Turing
7 7 Johann Gauss
path_name = './output/scientist_name_Series1.CSV'
if not os. path. exists( path_name) :
names. to_csv( path_name, sep= '*' )
scientists_name_frme_CSV = pd. read_csv( path_name)
print ( scientists_name_frme_CSV)
*Name
0 0*Rosaline Franklin
1 1*William Gosset
2 2*Florence Nightingale
3 3*Marie Curie
4 4*Rachel Carson
5 5*John Snow
6 6*Alan Turing
7 7*Johann Gauss
path_name = './output/scientist_name_Series2.CSV'
if not os. path. exists( path_name) :
names. to_csv( path_name, index= False )
scientists_name_frme_CSV = pd. read_csv( path_name)
print ( scientists_name_frme_CSV)
Name
0 Rosaline Franklin
1 William Gosset
2 Florence Nightingale
3 Marie Curie
4 Rachel Carson
5 John Snow
6 Alan Turing
7 Johann Gauss
2.2 读写csv格式的DataFrame
path_name = './output/scientist_DataFrame.CSV'
if not os. path. exists( path_name) :
scientists. to_csv( path_name)
scientists_frme_CSV = pd. read_csv( path_name)
print ( scientists_frme_CSV)
Unnamed: 0 Name Born Died Age \
0 0 Rosaline Franklin 1920-07-25 1958-04-16 37
1 1 William Gosset 1876-06-13 1937-10-16 61
2 2 Florence Nightingale 1820-05-12 1910-08-13 90
3 3 Marie Curie 1867-11-07 1934-07-04 66
4 4 Rachel Carson 1907-05-27 1964-04-14 56
5 5 John Snow 1813-03-15 1858-06-16 45
6 6 Alan Turing 1912-06-23 1954-06-07 41
7 7 Johann Gauss 1777-04-30 1855-02-23 77
Occupation
0 Chemist
1 Statistician
2 Nurse
3 Chemist
4 Biologist
5 Physician
6 Computer Scientist
7 Mathematician
path_name = './output/scientist_DataFrame2.CSV'
if not os. path. exists( path_name) :
scientists. to_csv( path_name, sep= '*' )
scientists_frme_CSV = pd. read_csv( path_name)
print ( scientists_frme_CSV)
*Name*Born*Died*Age*Occupation
0 0*Rosaline Franklin*1920-07-25*1958-04-16*37*C...
1 1*William Gosset*1876-06-13*1937-10-16*61*Stat...
2 2*Florence Nightingale*1820-05-12*1910-08-13*9...
3 3*Marie Curie*1867-11-07*1934-07-04*66*Chemist
4 4*Rachel Carson*1907-05-27*1964-04-14*56*Biolo...
5 5*John Snow*1813-03-15*1858-06-16*45*Physician
6 6*Alan Turing*1912-06-23*1954-06-07*41*Compute...
7 7*Johann Gauss*1777-04-30*1855-02-23*77*Mathem...
path_name = './output/scientist_DataFrame3.CSV'
if not os. path. exists( path_name) :
scientists. to_csv( path_name, index= False )
scientists_frme_CSV = pd. read_csv( path_name)
print ( scientists_frme_CSV)
Name Born Died Age Occupation
0 Rosaline Franklin 1920-07-25 1958-04-16 37 Chemist
1 William Gosset 1876-06-13 1937-10-16 61 Statistician
2 Florence Nightingale 1820-05-12 1910-08-13 90 Nurse
3 Marie Curie 1867-11-07 1934-07-04 66 Chemist
4 Rachel Carson 1907-05-27 1964-04-14 56 Biologist
5 John Snow 1813-03-15 1858-06-16 45 Physician
6 Alan Turing 1912-06-23 1954-06-07 41 Computer Scientist
7 Johann Gauss 1777-04-30 1855-02-23 77 Mathematician
3读写excel文件
3.1读写excel文件的Serial
import pandas as pd
scientists = pd. read_csv( './data/scientists.csv' )
names = scientists[ 'Name' ]
print ( scientists)
Name Born Died Age Occupation
0 Rosaline Franklin 1920-07-25 1958-04-16 37 Chemist
1 William Gosset 1876-06-13 1937-10-16 61 Statistician
2 Florence Nightingale 1820-05-12 1910-08-13 90 Nurse
3 Marie Curie 1867-11-07 1934-07-04 66 Chemist
4 Rachel Carson 1907-05-27 1964-04-14 56 Biologist
5 John Snow 1813-03-15 1858-06-16 45 Physician
6 Alan Turing 1912-06-23 1954-06-07 41 Computer Scientist
7 Johann Gauss 1777-04-30 1855-02-23 77 Mathematician
names. to_excel( './output/scientist_name_series.xls' )
names_df = names. to_frame( )
names_df. to_excel( './output/scientist_name_df.xls' )
scientists. to_excel( './output/scientist_df1.xls' , index= False )
scientists. to_excel( './output/scientist_df2.xls' , sheet_name= 'scientist' , index= False )
scientists_name_frme_excel= pd. read_excel( './output/scientist_name_series.xls' )
print ( scientists_name_frme_excel)
Unnamed: 0 Name
0 0 Rosaline Franklin
1 1 William Gosset
2 2 Florence Nightingale
3 3 Marie Curie
4 4 Rachel Carson
5 5 John Snow
6 6 Alan Turing
7 7 Johann Gauss
scientists_name_df_frme_excel= pd. read_excel( './output/scientist_name_df.xls' )
print ( scientists_name_df_frme_excel)
Unnamed: 0 Name
0 0 Rosaline Franklin
1 1 William Gosset
2 2 Florence Nightingale
3 3 Marie Curie
4 4 Rachel Carson
5 5 John Snow
6 6 Alan Turing
7 7 Johann Gauss
scientists_frme_excel= pd. read_excel( './output/scientist_df1.xls' )
print ( scientists_frme_excel)
Name Born Died Age Occupation
0 Rosaline Franklin 1920-07-25 1958-04-16 37 Chemist
1 William Gosset 1876-06-13 1937-10-16 61 Statistician
2 Florence Nightingale 1820-05-12 1910-08-13 90 Nurse
3 Marie Curie 1867-11-07 1934-07-04 66 Chemist
4 Rachel Carson 1907-05-27 1964-04-14 56 Biologist
5 John Snow 1813-03-15 1858-06-16 45 Physician
6 Alan Turing 1912-06-23 1954-06-07 41 Computer Scientist
7 Johann Gauss 1777-04-30 1855-02-23 77 Mathematician
scientists_frme_excel= pd. read_excel( './output/scientist_df2.xls' )
print ( scientists_frme_excel)
Name Born Died Age Occupation
0 Rosaline Franklin 1920-07-25 1958-04-16 37 Chemist
1 William Gosset 1876-06-13 1937-10-16 61 Statistician
2 Florence Nightingale 1820-05-12 1910-08-13 90 Nurse
3 Marie Curie 1867-11-07 1934-07-04 66 Chemist
4 Rachel Carson 1907-05-27 1964-04-14 56 Biologist
5 John Snow 1813-03-15 1858-06-16 45 Physician
6 Alan Turing 1912-06-23 1954-06-07 41 Computer Scientist
7 Johann Gauss 1777-04-30 1855-02-23 77 Mathematician
3.2读写excel文件的DataFrame
import xlrd
data = xlrd. open_workbook( './output/scientist_df2.xls' )
sheet = data. sheet_by_name( 'scientist' )
print ( sheet. row_values( 1 ) )
print ( sheet. col_values( 1 ) )
['Rosaline Franklin', '1920-07-25', '1958-04-16', 37.0, 'Chemist']
['Born', '1920-07-25', '1876-06-13', '1820-05-12', '1867-11-07', '1907-05-27', '1813-03-15', '1912-06-23', '1777-04-30']
print ( '行数=' , sheet. nrows)
print ( '列数=' , sheet. ncols)
print ( sheet)
行数= 9
列数= 5
<xlrd.sheet.Sheet object at 0x7f339c292650>
print ( sheet. cell( 0 , 0 ) . value)
print ( sheet. cell( 2 , 3 ) . value)
Name
61.0
print ( data. sheet_names( ) )
['scientist']
print ( sheet. name)
print ( sheet. row_values( 1 ) )
print ( sheet. col_values( 1 ) )
scientist
['Rosaline Franklin', '1920-07-25', '1958-04-16', 37.0, 'Chemist']
['Born', '1920-07-25', '1876-06-13', '1820-05-12', '1867-11-07', '1907-05-27', '1813-03-15', '1912-06-23', '1777-04-30']
4.其他格式 json,html,sql
import pandas as pd
scientists = pd. read_csv( './data/scientists.csv' )
print ( scientists. to_html( './t.html' ) )
print ( scientists. to_json( './t.json' ) )
None
None
import sqlite3
import sqlalchemy
engine = sqlalchemy. create_engine( 'sqlite:///my_db.sqlite' )
scientists. to_sql( 'scientists' , engine)