利用python进行数据分析 笔记_利用python进行数据分析笔记.pdf

Python

import pandas as pd

import numpy as np

import matplotlib.pyplot as plt

import statsmodels.api as sm

from pandas import DataFrame,Series

CH02

JSON

records = [json.loads(line) for line in open(path)]

time_zone = [rec['tz'] for rec in records if 'tz' in rec]

0

from collections import defaultdict

counts = defaultdict(int)

top10

from collections import Counter

toptz = Counter(time_zone)

toptz.most_common(10)

pandas

from pandas import DataFrame,Series

frame = DataFrame(records)

tz_counts = frame['tz'].value_counts() //

tz_counts[:10] //top10

#####fillnaNA

clean_tz = frame['tz'].fillna('Missing')

clean_tz[clean_tz == ''] = ‘Unknown’ // clean_tz==''

#####Seriesplot

tz_counts[:10].plot(kind='barh',rot=0) //

type(tz_counts) //pandas.core.series.Series

#####DataFramenull na

frame.a.notnull() //True False

clean_frame = frame[frame.a.notnull()]frame.a.dropna()

MovieLens

users =

pd.read_table('xx/movielens/users.dat',sep='::',header=None,names=

['userid','gender','age','occupation','zip’]) //::

ratings = pd.read_table('./Downloads/python/pydata-

book-master/ch02/movielens/ratings.dat',sep='::',header=None,names=

['userid','movieid','rating','timestamp'])

movies = pd.read_table('./Downloads/python/pydata-

book-master/ch02/movielens/movies.dat',sep='::',header=None,names=

['movieid','title','genres'])

#####pandasmergeusersratings

data = pd.merge(pd.merge(ratings,users),movies) //cool!

data.ix[0] //

#####pandaspivot_table

mean_ratings =

data.pivot_table('rating',index='title',columns='gender',aggfunc='m

ean’) //

#####250

rating_by_title = data.groupby('title').size() //

title

$1,000,000 Duck (1971) 37

'Night Mother (1986) 70

'Til There Was You (1997) 52

active_titles = rating_by_title.index[ra

评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值