Pandas读取.db数据库信息,并处理数据

import sqlite3
con = sqlite3.connect("chinook.db")
tracks = pd.read_sql_query("SELECT * FROM tracks", con)
tracks.head()
 TrackIdNameAlbumIdMediaTypeIdGenreIdComposerMillisecondsBytesUnitPrice
01For Those About To Rock (We Salute You)111Angus Young, Malcolm Young, Brian Johnson343719111703340.99
12Balls to the Wall221None34256255104240.99
23Fast As a Shark321F. Baltes, S. Kaufman, U. Dirkscneider & W. Ho...23061939909940.99
34Restless and Wild321F. Baltes, R.A. Smith-Diesel, S. Kaufman, U. D...25205143317790.99
45Princess of the Dawn321Deaffy & R.A. Smith-Diesel37541862905210.99
# read_sql_query函数可以读取一张表,第一个参数是表名,第二个参数是引擎
genres = pd.read_sql_query("SELECT * FROM genres", con)
genres.head()
 GenreIdName
01Rock
12Jazz
23Metal
34Alternative & Punk
45Rock And Roll
genre_track = genres.merge(tracks[['GenreId', 'Milliseconds']],
                           on='GenreId', how='left').drop('GenreId', axis='columns')
genre_track.head()
 GenreIdName
01Rock
12Jazz
23Metal
34Alternative & Punk
45Rock And Roll
# 找到每种类型歌曲的平均时长
genre_time = genre_track.groupby('Name')['Milliseconds'].mean()
pd.to_timedelta(genre_time, unit='ms').dt.floor('s').sort_values()

Name
Rock And Roll        00:02:14
Opera                00:02:54
Hip Hop/Rap          00:02:58
Easy Listening       00:03:09
Bossa Nova           00:03:39
R&B/Soul             00:03:40
World                00:03:44
Pop                  00:03:49
Latin                00:03:52
Alternative & Punk   00:03:54
Soundtrack           00:04:04
Reggae               00:04:07
Alternative          00:04:24
Blues                00:04:30
Rock                 00:04:43
Jazz                 00:04:51
Classical            00:04:53
Heavy Metal          00:04:57
Electronica/Dance    00:05:02
Metal                00:05:09
Comedy               00:26:25
TV Shows             00:35:45
Drama                00:42:55
Science Fiction      00:43:45
Sci Fi & Fantasy     00:48:31
Name: Milliseconds, dtype: timedelta64[ns]
#找到每名顾客花费的总时长
cust = pd.read_sql_query("SELECT * FROM customers", con)                    
cust=cust[['CustomerId', 'FirstName', 'LastName']]

invoice = pd.read_sql_query("SELECT * FROM invoices", con)                    
invoice=invoice[['InvoiceId','CustomerId']]

ii = pd.read_sql_query("SELECT * FROM invoice_items", con)                    
ii=ii[['InvoiceId', 'UnitPrice', 'Quantity']]
cust_inv = cust.merge(invoice, on='CustomerId') \
                         .merge(ii, on='InvoiceId')
cust_inv.head()
 CustomerIdFirstNameLastNameInvoiceIdUnitPriceQuantity
01LuísGonçalves981.991
11LuísGonçalves981.991
21LuísGonçalves1210.991
31LuísGonçalves1210.991
41LuísGonçalves1210.991
# 现在可以用总量乘以单位价格,找到每名顾客的总消费
total = cust_inv['Quantity'] * cust_inv['UnitPrice']
cols = ['CustomerId', 'FirstName', 'LastName']
cust_inv.assign(Total = total).groupby(cols)['Total'].sum().sort_values(ascending=False).head()

CustomerId  FirstName  LastName  
6           Helena     Holý          49.62
26          Richard    Cunningham    47.62
57          Luis       Rojas         46.62
46          Hugh       O'Reilly      45.62
45          Ladislav   Kovács        45.62
Name: Total, dtype: float64
# sql语句查询方法read_sql_query
pd.read_sql_query('select * from tracks limit 5', con)
 TrackIdNameAlbumIdMediaTypeIdGenreIdComposerMillisecondsBytesUnitPrice
01For Those About To Rock (We Salute You)111Angus Young, Malcolm Young, Brian Johnson343719111703340.99
12Balls to the Wall221None34256255104240.99
23Fast As a Shark321F. Baltes, S. Kaufman, U. Dirkscneider & W. Ho...23061939909940.99
34Restless and Wild321F. Baltes, R.A. Smith-Diesel, S. Kaufman, U. D...25205143317790.99
45Princess of the Dawn321Deaffy & R.A. Smith-Diesel37541862905210.99
# 可以将长字符串传给read_sql_query
sql_string1 = '''
          select 
              Name, 
              time(avg(Milliseconds) / 1000, 'unixepoch') as avg_time
          from (
                  select 
                      g.Name, 
                      t.Milliseconds
                  from 
                      genres as g 
                  join
                      tracks as t
                      on 
                          g.genreid == t.genreid
              )
          group by 
              Name
          order by 
              avg_time
          '''
pd.read_sql_query(sql_string1, con).head()
 
 Nameavg_time
0Rock And Roll00:02:14
1Opera00:02:54
2Hip Hop/Rap00:02:58
3Easy Listening00:03:09
4Bossa Nova00:03:39
sql_string2 = '''
select 
                c.customerid, 
                c.FirstName, 
                c.LastName, 
                sum(ii.quantity *  ii.unitprice) as Total
          from
                customers as c
          join
                invoices as i
                     on c.customerid = i.customerid
          join
                invoice_items as ii
                     on i.invoiceid = ii.invoiceid
          group by
                c.customerid, c.FirstName, c.LastName
          order by
                Total desc
          '''
pd.read_sql_query(sql_string2, con).head()
 CustomerIdFirstNameLastNameTotal
06HelenaHolý49.62
126RichardCunningham47.62
257LuisRojas46.62
345LadislavKovács45.62
446HughO'Reilly45.62
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包

打赏作者

缘 源 园

你的鼓励将是我创造的最大动力

¥1 ¥2 ¥4 ¥6 ¥10 ¥20
扫码支付:¥1
获取中
扫码支付

您的余额不足,请更换扫码支付或充值

打赏作者

实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值