import numpy as np
import pandas as pd
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import plotly.graph_objects as go
import statsmodels.api as sm
# 设置字体
plt.rcParams['font.sans-serif']=['SimHei']
plt.rcParams['axes.unicode_minus']=False# 用来正常显示负号import warnings
warnings.filterwarnings("ignore", category=UserWarning)import matplotlib.pyplot as plt
from matplotlib.font_manager import FontProperties
# 指定字体
font = FontProperties(fname=r'C:\WINDOWS\Fonts\simhei.ttf')# 请根据你的系统和字体路径修改
file_path =r'G:\data\Zephyr中国海外并购数据库\workfile.xlsx'
data = pd.read_excel(file_path)#print(data.head())
# 打印变量名称及类型for column in data.columns:print(f"变量名称: {column}, 类型: {data[column].dtype}")
变量名称: Unnamed: 0, 类型: float64
变量名称: Deal Number, 类型: int64
变量名称: Acquiror name, 类型: object
变量名称: Acquiror country code, 类型: object
变量名称: Target name, 类型: object
变量名称: Target country code, 类型: object
变量名称: Deal type, 类型: object
变量名称: Deal status, 类型: object
变量名称: Deal value th EUR, 类型: object
变量名称: Target business description(s), 类型: object
变量名称: Acquiror business description(s), 类型: object
变量名称: Deal type.1, 类型: object
变量名称: Deal sub-type, 类型: object
变量名称: Deal financing, 类型: object
变量名称: Deal method of payment, 类型: object
变量名称: Deal method of payment value th EUR, 类型: object
变量名称: Deal status.1, 类型: object
变量名称: Rumour date, 类型: datetime64[ns]
变量名称: Announced date, 类型: datetime64[ns]
变量名称: Expected completion date, 类型: datetime64[ns]
变量名称: Assumed completion date, 类型: datetime64[ns]
变量名称: Completed date, 类型: datetime64[ns]
变量名称: Postponed date, 类型: datetime64[ns]
变量名称: Withdrawn date, 类型: datetime64[ns]
变量名称: Last deal status date, 类型: datetime64[ns]
变量名称: Last deal value, offer price, bid premium update date, 类型: datetime64[ns]
变量名称: Last deal status update date, 类型: datetime64[ns]
变量名称: Last % of stake update date, 类型: datetime64[ns]
变量名称: Last acquiror, target, vendor update date, 类型: datetime64[ns]
变量名称: Last advisor update date, 类型: datetime64[ns]
变量名称: Last deal comment, rationale update date, 类型: datetime64[ns]
变量名称: Last update, 类型: datetime64[ns]
变量名称: Deal value th EUR.1, 类型: object
变量名称: Deal value (Native currency) th LCU, 类型: object
变量名称: Deal equity value th EUR, 类型: object
变量名称: Deal equity value (Native currency) th LCU, 类型: object
变量名称: Deal enterprise value th EUR, 类型: object
变量名称: Deal enterprise value (Native currency) th LCU, 类型: object
变量名称: Deal modelled enterprise value th EUR, 类型: object
变量名称: Deal modelled enterprise value (Native currency) th LCU, 类型: object
变量名称: Deal total target value th EUR, 类型: object
变量名称: Deal total target value (Native currency) th LCU, 类型: object
变量名称: Modelled Fee Income th EUR, 类型: object
变量名称: As Reported Fee Income th EUR, 类型: object
变量名称: Initial stake (%), 类型: object
变量名称: Acquired stake (%), 类型: object
变量名称: Final stake (%), 类型: object
变量名称: IRR (%), 类型: float64
变量名称: Native currency, 类型: object
# 提取“Acquiror country code”列并统计每个类别的出现次数
acquiror_country_code_counts = data['Acquiror country code'].value_counts()# 打印结果print("Acquiror country code 变量下有几类字符,分别出现了多少次:")for code, count in acquiror_country_code_counts.items():print(f"字符: {code}, 出现次数: {count}")
# 提取“Acquiror country code”列并统计每个类别的出现次数
acquiror_country_code_counts = data['Acquiror country code'].value_counts()# 将出现次数小于2.6%的字符合并为一个类别
threshold =0.026
other_codes = acquiror_country_code_counts[acquiror_country_code_counts / acquiror_country_code_counts.sum()< threshold].index
data['Acquiror country code']= data['Acquiror country code'].replace(other_codes,'其他')# 重新计算合并后的各类别出现次数
acquiror_country_code_counts = data['Acquiror country code'].value_counts()# 绘制饼状图
plt.figure(figsize=(8,8))
plt.pie(acquiror_country_code_counts, labels=acquiror_country_code_counts.index, autopct='%1.1f%%', startangle=140)
plt.title('Acquiror country code 分布饼状图')
plt.show()
deal_value_column = data['Deal value th EUR']print(f"Deal value th EUR 变量的类型是:{deal_value_column.dtype}")
Deal value th EUR 变量的类型是:object
data['Deal value th EUR']= pd.to_numeric(data['Deal value th EUR'], errors='coerce')# 打印变量类型print(f"Deal value th EUR 变量的类型是:{data['Deal value th EUR'].dtype}")
Deal value th EUR 变量的类型是:float64
deal_value_column = data['Rumour date']print(f"Rumour date 变量的类型是:{deal_value_column.dtype}")