import pandas as pd
import plotly.express as px
# Load the data
column_names = ['timestamp', 'direction', 'other_columns'] # Adjust 'other_columns' as necessary
data = pd.read_csv(r"C:\Users\dell\Desktop\tyx\fujian2.csv", encoding='gbk', names=column_names, header=0)
print("数据加载成功。")
# Convert timestamp to datetime
print("开始转换时间戳...")
data['timestamp'] = pd.to_datetime(data['timestamp'], format='%Y-%m-%dT%H:%M:%S.%f', errors='coerce')
print("时间戳转换完成。")
# Drop rows with NaT timestamps
data.dropna(subset=['timestamp'], inplace=True)
print(f"去除 NaT 的行后数据行数: {len(data)}")
# Drop rows where the 'direction' column is missing
data.dropna(subset=['direction'], inplace=True)
print(f"去除 NaN 的行后数据行数: {len(data)}")
# Drop duplicate rows
data.drop_duplicates(inplace=True)
print(f"去除重复行后数据行数: {len(data)}")
# Extract hour from timestamps and define time periods
print("提取小时信息...")
data['hour'] = data['timestamp'].dt.hour
def get_time_period(hour):
if 0 <= hour < 6:
return '0:00 - 6:00'
elif 6 <= hour < 12:
return '6:00 - 12:00'
elif 12 <= hour < 18:
return '12:00 - 18:00'
else:
return '18:00 - 24:00'
data['time_period'] = data['hour'].apply(get_time_period)
print("时间段定义完成。")
# Count traffic flow by time period and direction
print("开始计数流量...")
traffic_counts = data.groupby(['time_period', 'direction']).size().reset_index(name='count')
# 使用全部数据进行绘图
try:
print("开始绘图...")
fig = px.line(traffic_counts, x='time_period', y='count', color='direction',
labels={'count': 'Traffic Count', 'time_period': 'Time Period'},
title='Traffic Count by Time Period and Direction')
print("生成图形...")
fig.show()
print("图形生成结束。")
except Exception as e:
print(f"绘图时发生错误: {e}")
C:\Users\dell\PycharmProjects\pythonProject\.venv\Scripts\python.exe C:\Users\dell\PycharmProjects\pythonProject3\main.py
数据加载成功。
开始转换时间戳...
时间戳转换完成。
去除 NaT 的行后数据行数: 8844996
去除 NaN 的行后数据行数: 8844996
去除重复行后数据行数: 8844918
提取小时信息...
时间段定义完成。
开始计数流量...
开始绘图...(卡在绘图