完成以下功能:
- 从pandas中读取数据
- 以任意两列进行分析
- 若两列是数值型,则生成散点图并计算相关性
- 若其中一列是字符串,则生成箱线图
- 若两列都是字符串,则生成表格,并计算卡方检验
import tkinter as tk
import matplotlib.pyplot as plt
from matplotlib.backends.backend_tkagg import FigureCanvasTkAgg
import pandas as pd
import numpy as np
import matplotlib as mpl
from scipy import stats
#显示中文
mpl.rcParams['font.family'] = 'Microsoft JhengHei'
mpl.rcParams['font.sans-serif'] = ['Microsoft JhengHei'] #更新字体格式
mpl.rcParams['font.style'] = 'italic'
mpl.rcParams['font.size'] = 8 #更新字体大小
# Set the backend for Matplotlib
plt.switch_backend('agg')
特别主意导入数据
# Read data from CSV file
df = pd.read_csv(r"Q:\Python Testing"+"\\"+"容量预测.csv",encoding="utf-8")
df =df.iloc[:,-5:]
主程序
# Get unique factors and dimensions from the CSV columns
factors = df.columns.tolist()
dimensions = factors.copy()
# Function to plot the data
def plot_data():
factor = selected_factor.get()
dimension = selected_dimension.get()
x = df[factor]
y = df[dimension]
# # Convert x and y to numeric type if necessary
# x = pd.to_numeric(x, errors='coerce')
# y = pd.to_numeric(y, errors='coerce')
# Clear the container frame
for widget in container.winfo_children():
widget.destroy()
# Create a figure and axes
fig, ax = plt.subplots(figsize=(6, 4))
# Check if factors and dimensions are valid
if factor in df.columns and dimension in df.columns:
if (type(x.iat[0])!=str) and (type(y.iat[0])!=str):
# Scatter plot
ax.scatter(x, y)
ax.set_title(factor+" vs. "+dimension)
ax.set_xlabel(factor)
ax.set_ylabel(dimension)
# Add the correlation coefficient to the plot
corr_coef = np.corrcoef(x, y)[0, 1]
ax.text(0.9, 0.1, 'Correlation: {:.2f}'.format(corr_coef),
horizontalalignment='right', verticalalignment='center',
transform=ax.transAxes)
elif (type(x.iat[0])==str) and (type(y.iat[0])!=str):
# Box plot
df.boxplot(column=dimension, by=factor, ax=ax)
# ax.set_title(dimension+" by "+factor)
ax.set_xlabel(factor)
ax.set_ylabel(dimension)
elif (type(x.iat[0])!=str) and (type(y.iat[0])==str):
# Box plot
df.boxplot(column=factor, by=dimension, ax=ax)
# ax.set_title(factor+" by "+dimension)
ax.set_xlabel(dimension)
ax.set_ylabel(factor)
elif (type(x.iat[0])==str) and (type(y.iat[0])==str):
# Summary table
summary_table = df.groupby([factor, dimension]).size().unstack(fill_value=0)
ax.table(cellText=summary_table.values, colLabels=summary_table.columns,
rowLabels=summary_table.index, loc='center')
ax.axis('off')
ax.set_title(factor+" vs. "+dimension +" ""Summary Table")
# Adjust the figure size to fit the table
fig.set_size_inches(6, 4)
# Chi-square test
observed = pd.crosstab(df[factor], df[dimension])
chi2, p_val, dof, expected = stats.chi2_contingency(observed)
chi2_result = "Chi-square: {:.2f} p-value: {:.4f}".format(chi2, p_val)
ax.text(0.5, 0.1, chi2_result, horizontalalignment='center', verticalalignment='center',
transform=ax.transAxes, fontsize=8, color='red')
else:
ax.text(0.5, 0.5, 'Invalid columns', horizontalalignment='center', verticalalignment='center')
# Create a canvas and show the plot in Tkinter window
canvas = FigureCanvasTkAgg(fig, master=container)
canvas.draw()
canvas.get_tk_widget().pack(side=tk.TOP, fill=tk.BOTH, expand=1)
# Create the main window
root = tk.Tk()
root.title("Data Visualization")
# Create the container frame to hold the plot
container = tk.Frame(root)
container.pack()
# Create the dropdown menus
selected_factor = tk.StringVar(root)
selected_factor.set(factors[0]) # Set the default selection to the first factor
dropdown_factor = tk.OptionMenu(root, selected_factor, *factors)
dropdown_factor.pack()
selected_dimension = tk.StringVar(root)
selected_dimension.set(dimensions[0]) # Set the default selection to the first dimension
dropdown_dimension = tk.OptionMenu(root, selected_dimension, *dimensions)
dropdown_dimension.pack()
# Create the button to plot the data and bind it to the event
plot_button = tk.Button(root, text="Plot", command=plot_data)
plot_button.pack()
# Run the main loop
root.mainloop()