import pandas as pd
import numpy as np
import os
%matplotlib inline
os.chdir(r"C:\Users\Hans\Desktop\data_analysis\test_data\movie")
df = pd.read_csv("ratings.dat",sep = "::",engine="python",header=None,names=['UserID','MoiveID','Rating','Timestamp'])
##names="USserID::MoiveID::Rating::Timestamp".split("::")也可以这么写
df.head()
UserID | MoiveID | Rating | Timestamp | |
---|---|---|---|---|
0 | 1 | 1193 | 5 | 978300760 |
1 | 1 | 661 | 3 | 978302109 |
2 | 1 | 914 | 3 | 978301968 |
3 | 1 | 3408 | 4 | 978300275 |
4 | 1 | 2355 | 5 | 978824291 |
df["pdate"] = pd.to_datetime(df["Timestamp"],unit='s')
#Timestamp本来是秒的形式,所以unit='s'表示时间是秒的形式,to_datetime就是将秒变成天
df