后来越来越觉得,当时建模做了个shi么…方法搞不清楚怎么用就套,代码,害,老师肯定觉得 熊孩子怎么回事 一个程序用这么多次不知道写个函数都…
程序一.附件一数据处理
1. #导包
2. import pandas as pd
3. import numpy as np
4.
5. #文件
6. file_path='123家有信贷记录企业的相关数据及产业分类.xlsx'
7. #读取sheet的名字
8. sheetName1='企业信息'
9. sheetName2='进项发票信息'
10. sheetName3='销项发票信息'
11. data_1=pd.read_excel(file_path,sheet_name=sheetName1)
12. data_2=pd.read_excel(file_path,sheet_name=sheetName2)
13. data_3=pd.read_excel(file_path,sheet_name=sheetName3)
14.
15. # 拷贝一下,老写错代码,导数据真慢
16. data_1a=data_1.copy(deep=True)
17. data_2a=data_2.copy(deep=True)
18. data_3a=data_3.copy(deep=True)
19.
20. #时间转化为整型以方便比较,分类
21. def time_trans(time_ch):
22. year = time_ch.split('-')[0]
23. month = time_ch.split('-')[1]
24. day = time_ch.split('-')[2].split(' ')[0]
25. new_date = (year + month + day )
26. return int(new_date)
27.
28. a = list(data_2a['开票日期'])
29. for i in range(len(data_2a['开票日期'])):
30. a[i] = str(a[i])
31. a[i] = time_trans(a[i])
32. data_2a['开票日期'] = a
33.
34. # 企业代号转化为整型
35. def num_trans(num_ch):
36. num = num_ch.split('E')[1]
37. return int(num)
38. a2 = list(data_2a['企业代号'])
39. for i in range(len(a2)):
40. a2[i] = num_trans(a2[i])
41. data_2a['企业代号'] = a2
42.
43. data_21 = data_2a.loc[data_2a['开票日期']<20180101] #18年之前
44. data_22 = data_2a.loc[(data_2a['开票日期']>=20180101) & (data_2a['开票日期']<20190101)] #18年
45. data_23 = data_2a.loc[data_2a['开票日期']>=20190101] #19年及之后
46. # 18年之前
47. data_211 = data_21.loc[(data_21['发票状态']=='有效发票') & (data_21['价税合计']>0)]
48. data_212 = data_21.loc[data_21['发票状态']=='作废发票']
49. data_213 = data_21.loc[(data_21['发票状态']=='有效发票') & (data_21['价税合计']<0)]
50. # 18年
51. data_221 = data_22.loc[(data_22['发票状态']=='有效发票') & (data_22['价税合计']>0)]
52. data_222 = data_22.loc[data_22['发票状态']=='作废发票']
53. data_223 = data_22.loc[(data_22['发票状态']=='有效发票') & (data_22['价税合计']<0)]
54. # 19年及之后
55. data_231 = data_23.loc[(data_23['发票状态']=='有效发票') & (data_23['价税合计']>0)]
56. data_232 = data_23.loc[data_23['发票状态']=='作废发票']
57. data_233 = data_23.loc[(data_23['发票状态']=='有效发票') & (data_23['价税合计']<0)]
58.
59. jin211 = [0 for index in range(123)] #存放18年之前每家企业的进项有效发票金额
60. a211 = list(data_211['企业代号']);b211 = list(data_211['价税合计'])
61. for i in range(len(a211)):
62. for j in range(123):
63. if a211[i] == j+1:
64. jin211[j] += b211[i]
65.
66. jin212 = [0 for index in range(123)] #存放18年之前每家企业的进项作废发票数量
67. a212 = list(data_212['企业代号']);b212 = list(data_212['价税合计'])
68. for i in range(len(a212)):
69. for j in range(123):
70. if a212[i] == j+1:
71. jin212[j] += 1
72.
73. jin213 = [0 for index in range(123)] #存放18年之前每家企业的进项负数发票数量
74. a213 = list(data_213['企业代号']);b213 = list(data_213['价税合计'])
75. for i in range(len(a213)):
76. for j in range(123):
77. if a213[i] == j+1:
78. jin213[j] += 1
79.
80. jin221 = [0 for index in range(123)] #存放18年每家企业的进项有效发票金额
81. a221 = list(data_221['企业代号']);b221 = list(data_221['价税合计'])
82. for i in range(len(a221)):
83. for j in range(123):
84. if a221[i] == j+1:
85. jin221[j] += b221[i]
86.
87. jin222 = [0 for index in range(123)] #存放18年每家企业的进项作废发票数量
88. a222 = list(data_222['企业代号']);b222 = list(data_222['价税合计'])
89. for i in range(len(a222)):
90. for j in range(123):
91. if a222[i] == j+1:
92. jin222[j] += 1
93.
94. jin223 = [0 for index in range(123)] #存放18年每家企业的进项负数发票数量
95. a223 = list(data_223['企业代号']);b223 = list(data_223['价税合计'])
96. for i in range(len(a223)):
97. for j in range(123):
98. if a223[i] == j+1:
99. jin223[j] += 1
100.
101. jin231 = [0 for index in range(123)] #存放19年及之后每家企业的进项有效发票金额
102. a231 = list(data_231['企业代号']);b231 = list(data_231['价税合计'])
103. for i in range(len(a231)):
104. for j in range(123):
105. if a231[i] == j+1:
106. jin231[j] += b231[i]
107.
108. jin232 = [0 for index in range(123)] #存放19年及之后每家企业的进项作废发票数量
109. a232 = list(data_232['企业代号']);b232 = list(data_232['价税合计'])
110. for i in range(len(a232)):
111. for j in range(123):
112. if a232[i] == j+1:
113. jin232[j] += 1
114.
115. jin233 = [0 for index in range(123)] #存放19年及之后每家企业的进项负数发票数量
116. a233 = list(data_213['企业代号']);b233 = list(data_233['价税合计'])
117. for i in range(len(a233)):
118. for j in range(123):
119. if a233[i] == j+1:
120. jin233[j] += 1
121.
122. a2 = list(data_3a['企业代号'])
123. for i in range(len(a2)):
124. a2[i] = num_trans(a2[i])
125. data_3a['企业代号'] = a2
126.
127. a = list(data_3a['开票日期'])
128. for i in range(len(data_3a['开票日期'])):
129. a[i] = str(a[i])
130. for i in range(len(a)):
131. a[i] = time_trans(a[i])
132. data_3a['开票日期'] = a
133.
134. data_31 = data_3a.loc[data_3a['开票日期']<20180101] #18年之前
135. data_32 = data_3a.loc[(data_3a['开票日期']>=20180101) & (data_3a['开票日期']<20190101)] #18年
136. data_33 = data_3a.loc[data_3a['开票日期']>=20190101] #19年及之后
137. # 18年之前
138. data_311 = data_31.loc[(data_31['发票状态']=='有效发票') & (data_31['价税合计']>0)]
139. data_312 = data_31.loc[data_31['发票状态']=='作废发票']
140. data_313 = data_31.loc[(data_31['发票状态']=='有效发票') & (data_31['价税合计']<0)]
141. # 18年
142. data_321 = data_32.loc[(data_32['发票状态']=='有效发票') & (data_32['价税合计']>0)]
143. data_322 = data_32.loc[data_32['发票状态']=='作废发票']
144. data_323 = data_32.loc[(data_32['发票状态']=='有效发票') & (data_32['价税合计']<0)]
145. # 19年及之后
146. data_331 = data_33.loc[(data_33['发票状态']=='有效发票') & (data_33['价税合计']>0)]
147. data_332 = data_33.loc[data_33['发票状态']=='作废发票']
148. data_333 = data_33.loc[(data_33['发票状态']=='有效发票') & (data_33['价税合计']<0)]
149.
150. jin311 = [0 for index in range(123)] #存放18年之前每家企业的销项有效发票金额
151. a311 = list(data_311['企业代号']);b311 = list(data_311['金额'])
152. for i in range(len(a311)):
153. for j in range(123):
154. if a311[i] == j+1:
155. jin311[j] += b311[i]
156.
157. jin312 = [0 for index in range(123)] #存放18年之前每家企业的销项作废发票数量
158. a312 = list(data_312['企业代号']);b312 = list(data_312['金额'])
159. for i in range(len(a312)):
160. for j in range(123):
161. if a312[i] == j+1:
162. jin312[j] += 1
163.
164. jin313 = [0 for index in range(123)] #存放18年之前每家企业的销项负数发票数量
165. a313 = list(data_313['企业代号']);b313 = list(data_313['金额'])
166. for i in range(len(a313)):
167. for j in range(123):
168. if a313[i] == j+1:
169. jin313[j] += 1
170.
171. jin321 = [0 for index in range(123)] #存放18年每家企业的销项有效发票金额
172. a321 = list(data_321['企业代号']);b321 = list(data_321['金额'])
173. for i in range(len(a321)):
174. for j in range(123):
175. if a321[i] == j+1:
176. jin321[j] += b321[i]
177.
178. jin322 = [0 for index in range(123)] #存放18年每家企业的销项作废发票数量
179. a322 = list(data_322['企业代号']);b322 = list(data_322['金额'])
180. for i in range(len(a322)):
181. for j in range(123):
182. if a322[i] == j+1:
183. jin322[j] += 1
184.
185. jin323 = [0 for index in range(123)] #存放18年每家企业的销项负数发票数量
186. a323 = list(data_323['企业代号']);b323 = list(data_323['金额'])
187. for i in range(len(a323)):
188. for j in range(123):
189. if a323[i] == j+1:
190. jin323[j] += 1
191.
192. jin331 = [0 for index in range(123)] #存放19年及之后每家企业的销项有效发票金额
193. a331 = list(data_331['企业代号']);b331 = list(data_331['金额'])
194. for i in range(len(a331)):
195. for j in range(123):
196. if a331[i] == j+1:
197. jin331[j] += b331[i]
198.
199. jin332 = [0 for index in range(123)] #存放19年及之后每家企业的销项作废发票数量
200. a332 = list(data_332['企业代号']);b332 = list(data_332['金额'])
201. for i in range(len(a332)):
202. for j in range(123):
203. if a332[i] == j+1:
204. jin332[j] += 1
205.
206. jin333 = [0 for index in range(123)] #存放19年及之后每家企业的销项负数发票数量
207. a333 = list(data_333['企业代号']);b333 = list(data_333['金额'])
208. for i in range(len(a333)):
209. for j in range(123):
210. if a333[i] == j+1:
211. jin333[j] += 1
212.
213. a2 = list(data_1a['企业代号'])
214. for i in range(len(a2)):
215. a2[i] = num_trans(a2[i])
216. data_1a['企业代号'] = a2
217.
218. # 根据企业代号排序
219. data_1a = data_1a.sort_values(['企业代号'], ascending = True)
220.
221. #精度问题,四舍五入保留两位小数
222. from decimal import Decimal, ROUND_HALF_UP
223. def round_up(number, num_digits):
224. """
225. 按指定位数对数值进行四舍五入。
226. :param number:要四舍五入的数字。
227. :param num_digits:要进行四舍五入运算的位数。
228. """
229. if num_digits > 0:
230. res = round_up(number * 10, num_digits - 1) / 10
231. else:
232. res = Decimal(number * 10 ** num_digits).quantize(Decimal('1'), rounding=ROUND_HALF_UP) * 10 ** -num_digits
233. return res
234.
235. jin21 = [0 for index in range(123)];jin22 = [0 for index in range(123)];jin23 = [0 for index in range(123)]
236. jin31 = [0 for index in range(123)];jin32 = [0 for index in range(123)];jin33 = [0 for index in range(123)]
237. for i in range(123):
238. jin21[i] = jin211[i]*0.2 +jin221[i]*0.3 + jin231[i]*0.5 #进项发票金额
239. jin22[i] = jin212[i]*0.2 +jin222[i]*0.3 + jin232[i]*0.5 + jin312[i]*0.2 +jin322[i]*0.3 + jin332[i]*0.5 #作废发票数量
240. jin23[i] = jin213[i]*0.2 +jin223[i]*0.3 + jin233[i]*0.5 + jin313[i]*0.2 +jin323[i]*0.3 + jin333[i]*0.5 #负数发票数量
241. jin31[i] = jin311[i]*0.2 +jin321[i]*0.3 + jin331[i]*0.5 #销项发票金额
242. for i in range(123):
243. jin21[i] = round_up(jin21[i],2)
244. jin31[i] = round_up(jin31[i],2)
245.
246. data_1a['进项发票金额'] = jin21
247. data_1a['作废发票数量'] = jin22
248. data_1a['负数发票数量'] = jin23
249. data_1a['销项发票金额'] = jin31
250.
251. # 进销平衡
252. ph1 = [0 for index in range(123)];ph2 = [0 for index in range(123)];ph3 = [0 for index in range(123)];ph = [0 for index in range(123)]
253. for i in range(len(ph1)):
254. ph1[i] = jin311[i]-jin211[i]
255. ph2[i] = jin321[i]-jin221[i]
256. ph3[i] = jin331[i]-jin231[i]
257. ph[i] = ph1[i]*0.2+ph2[i]*0.3+ph3[i]*0.5
258. for i in range(123):
259. ph1[i] = round_up(ph1[i],2)
260. ph2[i] = round_up(ph2[i],2)
261. ph3[i] = round_up(ph3[i],2)
262. ph[i] = round_up(ph[i],2)
263. data_1a['盈余'] = ph
264.
265. # 信誉评级量化
266. level = list(data_1a['信誉评级'])
267. for i in range(len(level)):
268. if level[i] == 'A':
269. level[i] = 0.95
270. if level[i] == 'B':
271. level[i] = 0.7
272. if level[i] == 'C':
273. level[i] = 0.5
274. if level[i] == 'D':
275. level[i] = 0
276. data_1a['信誉评级']=level
277.
278. # 行业划分量化影响力
279. a = list(data_1a['企业类别'])
280. for i in range(len(a)):
281. if a[i] == 1:
282. a[i] = 0.15
283. if a[i] == 2:
284. a[i] = 0.3
285. if a[i] == 3:
286. a[i] = 0.55
287. data_1a['企业类别'] = a
288.
289. data_1a.rename(columns={'企业类别':'影响力'}, inplace = True)
290.
291. # 极小型指标转化为极大型指标
292. max(data_1a['作废发票数量'])#660.0
293. max(data_1a['负数发票数量'])#1450.7
294. data_1a['作废发票数量'] = 660.0-data_1a['作废发票数量']
295. data_1a['负数发票数量'] = 1450.70-data_1a['负数发票数量']
296.
297. #保存文件
298. data_1a.to_excel('附件一处理后的数据.xlsx')
程序二.问题一模型建立
1. import numpy as np
2. import pandas as pd
3. df_=pd.read_excel('附件一标准化后的数据.xlsx')
4.
5. df1 = df_.loc[:,['Z作废发票数量','Z负数发票数量','Z影响力','Z盈余','Z信誉评级']]
6. df1.columns = [0,1,2,3,4]
7.
8. # 信誉得分权重
9. from scipy.sparse.linalg import eigs
10. from numpy import array, hstack
11. a=array([[1,1/2,1/4,1/7,1/6],[2,1,1/3,1/6,1/5],[4,3,1,1/5,1/4],
12. [7,6,5,1,3],[6,5,4,1/3,1]])
13. L,V=eigs(a,1);
14. CR=(L-5)/4/1.12 #计算矩阵A的一致性比率
15. W=V/sum(V); print("最大特征值为:",L)
16. print("最大特征值对应的特征向量W=\n",W)
17. print("CR=",CR)
18.
19. # 信誉得分权重
20. a = np.array([[1,1/2,1/4,1/7,1/6],[2,1,1/3,1/6,1/5],[4,3,1,1/5,1/4],
21. [7,6,5,1,3],[6,5,4,1/3,1]])
22. a1 = np.zeros((5,5))
23. for i in range(len(a)):
24. for j in range(len(a[i])):
25. a1[i][j] = a[i][j]/(a[0][j]+a[1][j]+a[2][j]+a[3][j]+a[4][j])
26. print('a1:{}'.format(a1))
27.
28. v = np.zeros(5)
29. for i in range(len(a1)):
30. v[i] = sum(a1[i])
31. print('v:{}'.format(v))
32.
33. w = np.zeros(5)
34. for i in range(len(a1)):
35. w[i] = v[i]/sum(v)
36. w = w.reshape(5,1)
37. print('w:{}'.format(w))
38.
39. aw = np.dot(a,w)
40. print('aw:{}'.format(aw))
41.
42. l = 0
43. for i in range(len(aw)):
44. l+=aw[i]/(len(aw)*w[i])
45. print('l:{}'.format(l))
46.
47. ci = (l-len(aw))/(len(aw)-1)
48. print('ci:{}'.format(ci))
49.
50. ri = 1.12
51. cr = ci/ri
52. print('cr:{}'.format(cr))
53.
54. w = [0.0439,0.0660,0.1301,0.4804,0.2796]
55. score = [0 for index in range(123)]
56. for j in range(len(w)):
57. for i in range(123):
58. score[i] += df1[j][i]*w[j]
59.
60. sum(w) #检验权重之和为1
61.
62. df_['信贷安全得分'] = score
63.
64. # 删除信誉评级为D的客户并查看还有多少客户量
65. df = df_.drop(df_[(df_['信誉评级']==0)].index)
66. df.shape #99
67.
68. # 信誉得分
69. score1 = [0 for i in range(99)]
70. score2 = list(df['信誉评级'])
71. for i in range(len(score1)):
72. if score2[i] == 0.95:
73. score1[i] = 0.0465
74. if score2[i] == 0.7 or score2[i] == 0.5:
75. score1[i] = 0.0585
76. # if score2[i] == 0:
77. # score1[i] = nan
78. df['信誉年利率'] = score1
79.
80. # 信贷安全年利率
81. df2 = df.sort_values(by='信贷安全得分',ascending=False)
82.
83. # 利率函数计算
84. from sympy import *
85. x = symbols('x')
86. y = (x+0.304746)*((0.04-0.15)/(0.909247+0.304746))+0.15
87.
88. # 根据利率函数插值
89. x1 = list(df2['信贷安全得分'])
90. y1 = [0 for index in range(len(x1))]
91. for i in range(len(y1)):
92. y1[i] = y.subs(x,x1[i])
93. df2['信贷安全年利率'] = y1
94.
95. # 利率 保留4位小数(看附件三是四位小数)
96. rate = [0 for index in range(99)]
97. rate1 = list(df2['信誉年利率']);rate2 = list(df2['信贷安全年利率'])
98. for i in range(99):
99. rate[i] = rate1[i]*0.25+rate2[i]*0.75
100. rate[i] = round(rate[i],4)
101.
102. #E1,E4与实际差距较大,故而单独分析
103. rate[0] = 0.09
104. df2['年利率'] = rate
105. df2['年利率'] = df2['年利率']*100
106. df2.rename(columns={'年利率':'年利率(%)'}, inplace = True)
107.
108. df3 = df.loc[:,['Z信誉评级','Z盈余','Z进项发票金额']]
109. df3.columns = [0,1,2]
110.
111. # 信誉得分权重
112. a = np.array([[1,3,1/2],[1/3,1,1/5],[2,5,1]])
113. a1 = np.zeros((3,3))
114. for i in range(len(a)):
115. for j in range(len(a[i])):
116. a1[i][j] = a[i][j]/(a[0][j]+a[1][j]+a[2][j])
117. print('a1:{}'.format(a1))
118.
119. v = np.zeros(3)
120. for i in range(len(a1)):
121. v[i] = sum(a1[i])
122. print('v:{}'.format(v))
123.
124. w = np.zeros(3)
125. for i in range(len(a1)):
126. w[i] = v[i]/sum(v)
127. w = w.reshape(3,1)
128. print('w:{}'.format(w))
129.
130. aw = np.dot(a,w)
131. print('aw:{}'.format(aw))
132.
133. l = 0
134. for i in range(len(aw)):
135. l+=aw[i]/(len(aw)*w[i])
136. print('l:{}'.format(l))
137.
138. ci = (l-len(aw))/(len(aw)-1)
139. print('ci:{}'.format(ci))
140.
141. ri = 1.12
142. cr = ci/ri
143. print('cr:{}'.format(cr))
144.
145. # 额度得分
146. w = [0.3092,0.1096,0.5812]
147. score = [0 for index in range(99)]
148. for j in range(len(w)):
149. for i in range(99):
150. score[i] += df1[j][i]*w[j]
151. for i in range(len(score)):
152. score[i]+=1.4981055297610921
153.
154. df2['额度得分'] = score
155.
156. sum(w) #检验权重相加=1
157.
158. # 按额度得分排序
159. df3 = df2.sort_values(by='额度得分',ascending=False)
160.
161. # 企业
162. x = []
163. for i in range(1,100):
164. x.append(i)
165. # 额度得分
166. y = list(df3['额度得分'])
167.
168. #绘图 #我的编译器这块代码需要运行两次才能出图
169. import matplotlib.pyplot as plt
170. import matplotlib
171. from matplotlib import font_manager
172. my_font=font_manager.FontProperties(fname='C:\Windows\Fonts\msyh.ttc',size=10) #fname字体路径,本电脑
173. plt.plot(x,y)
174. #绘制网格
175. plt.grid(alpha=0.4)
176. #标题
177. plt.xlabel('企业代号',fontproperties=my_font)
178. plt.ylabel('额度得分',fontproperties=my_font)
179. plt.title('各企业额度得分',fontproperties=my_font,color='black',size=15)
180. #显示
181. plt.show()
182.
183. # 额度
184. M = symbols('M')
185. y_sum = sum(y)
186. y1 = [0 for index in range(99)]
187. for i in range(len(y)):
188. y1[i] = round(y[i]/y_sum,4)*M
189.
190. df3['额度(万)'] = y1
191.
192. df4 = df3.loc[:,['企业代号','企业名称','年利率(%)','额度(万)']]
193. df4 = df3.loc[:,['企业代号','信誉年利率','信贷安全年利率','额度得分','年利率(%)','额度(万)']]
194. df4 = pd.merge(df_,df4,how = 'left',on='企业代号')
195. df4.fillna(0)
196.
197. df4.to_excel('123家企业各项指标及得分.xlsx')
198.
199. # 结果
200. df5 = df4.loc[:,['企业代号','企业名称','年利率(%)','额度(万)']]
201. df5.to_excel('针对123家企业的贷款策略.xlsx')
程序三.附件二数据处理
1. #导包
2. import pandas as pd
3. import numpy as np
4.
5. #文件
6. file_path='302家无信贷记录企业的相关数据及产业分类.xlsx'
7. #读取sheet的名字
8. sheetName1='企业信息'
9. sheetName2='进项发票信息'
10. sheetName3='销项发票信息'
11. data_1=pd.read_excel(file_path,sheet_name=sheetName1)
12. data_2=pd.read_excel(file_path,sheet_name=sheetName2)
13. data_3=pd.read_excel(file_path,sheet_name=sheetName3)
14.
15. # 拷贝一下,老写错代码,导数据真慢
16. data_1a=data_1.copy(deep=True)
17. data_2a=data_2.copy(deep=True)
18. data_3a=data_3.copy(deep=True)
19.
20. #时间转化为整型以方便比较,分类
21. def time_trans(time_ch):
22. year = time_ch.split('-')[0]
23. month = time_ch.split('-')[1]
24. day = time_ch.split('-')[2].split(' ')[0]
25. new_date = (year + month + day )
26. return int(new_date)
27.
28. a = list(data_2a['开票日期'])
29. for i in range(len(data_2a['开票日期'])):
30. a[i] = str(a[i])
31. a[i] = time_trans(a[i])
32. data_2a['开票日期'] = a
33.
34. # 企业代号转化为整型
35. def num_trans(num_ch):
36. num = num_ch.split('E')[1]
37. return int(num)
38. a2 = list(data_2a['企业代号'])
39. for i in range(len(a2)):
40. a2[i] = num_trans(a2[i])
41.
42. data_2a['企业代号'] = a2
43.
44. data_21 = data_2a.loc[data_2a['开票日期']<20180101] #18年之前
45. data_22 = data_2a.loc[(data_2a['开票日期']>=20180101) & (data_2a['开票日期']<20190101)] #18年
46. data_23 = data_2a.loc[data_2a['开票日期']>=20190101] #19年及之后
47. # 18年之前
48. data_211 = data_21.loc[(data_21['发票状态']=='有效发票') & (data_21['价税合计']>0)]
49. data_212 = data_21.loc[data_21['发票状态']=='作废发票']
50. data_213 = data_21.loc[(data_21['发票状态']=='有效发票') & (data_21['价税合计']<0)]
51. # 18年
52. data_221 = data_22.loc[(data_22['发票状态']=='有效发票') & (data_22['价税合计']>0)]
53. data_222 = data_22.loc[data_22['发票状态']=='作废发票']
54. data_223 = data_22.loc[(data_22['发票状态']=='有效发票') & (data_22['价税合计']<0)]
55. # 19年及之后
56. data_231 = data_23.loc[(data_23['发票状态']=='有效发票') & (data_23['价税合计']>0)]
57. data_232 = data_23.loc[data_23['发票状态']=='作废发票']
58. data_233 = data_23.loc[(data_23['发票状态']=='有效发票') & (data_23['价税合计']<0)]
59.
60. jin211 = [0 for index in range(302)] #存放18年之前每家企业的进项有效发票金额
61. a211 = list(data_211['企业代号']);b211 = list(data_211['价税合计'])
62. for i in range(len(a211)):
63. for j in range(302):
64. if a211[i] == j+124:
65. jin211[j] += b211[i]
66.
67. jin212 = [0 for index in range(302)] #存放18年之前每家企业的进项作废发票数量
68. a212 = list(data_212['企业代号']);b212 = list(data_212['价税合计'])
69. for i in range(len(a212)):
70. for j in range(302):
71. if a212[i] == j+124:
72. jin212[j] += 1
73.
74. jin213 = [0 for index in range(302)] #存放18年之前每家企业的进项负数发票数量
75. a213 = list(data_213['企业代号']);b213 = list(data_213['价税合计'])
76. for i in range(len(a213)):
77. for j in range(302):
78. if a213[i] == j+124:
79. jin213[j] += 1
80.
81. jin221 = [0 for index in range(302)] #存放18年每家企业的进项有效发票金额
82. a221 = list(data_221['企业代号']);b221 = list(data_221['价税合计'])
83. for i in range(len(a221)):
84. for j in range(302):
85. if a221[i] == j+124:
86. jin221[j] += b221[i]
87.
88. jin222 = [0 for index in range(302)] #存放18年每家企业的进项作废发票数量
89. a222 = list(data_222['企业代号']);b222 = list(data_222['价税合计'])
90. for i in range(len(a222)):
91. for j in range(302):
92. if a222[i] == j+124:
93. jin222[j] += 1
94.
95. jin223 = [0 for index in range(302)] #存放18年每家企业的进项负数发票数量
96. a223 = list(data_223['企业代号']);b223 = list(data_223['价税合计'])
97. for i in range(len(a223)):
98. for j in range(302):
99. if a223[i] == j+124:
100. jin223[j] += 1
101.
102. jin231 = [0 for index in range(302)] #存放19年及之后每家企业的进项有效发票金额
103. a231 = list(data_231['企业代号']);b231 = list(data_231['价税合计'])
104. for i in range(len(a231)):
105. for j in range(302):
106. if a231[i] == j+124:
107. jin231[j] += b231[i]
108.
109. jin232 = [0 for index in range(302)] #存放19年及之后每家企业的进项作废发票数量
110. a232 = list(data_232['企业代号']);b232 = list(data_232['价税合计'])
111. for i in range(len(a232)):
112. for j in range(302):
113. if a232[i] == j+124:
114. jin232[j] += 1
115.
116. jin233 = [0 for index in range(302)] #存放19年及之后每家企业的进项负数发票数量
117. a233 = list(data_213['企业代号']);b233 = list(data_233['价税合计'])
118. for i in range(len(a233)):
119. for j in range(302):
120. if a233[i] == j+124:
121. jin233[j] += 1
122.
123. a2 = list(data_3a['企业代号'])
124. for i in range(len(a2)):
125. a2[i] = num_trans(a2[i])
126.
127. data_3a['企业代号'] = a2
128.
129. a = list(data_3a['开票日期'])
130. for i in range(len(data_3a['开票日期'])):
131. a[i] = str(a[i])
132.
133. for i in range(len(a)):
134. a[i] = time_trans(a[i])
135. data_3a['开票日期'] = a
136.
137. data_31 = data_3a.loc[data_3a['开票日期']<20180101] #18年之前
138. data_32 = data_3a.loc[(data_3a['开票日期']>=20180101) & (data_3a['开票日期']<20190101)] #18年
139. data_33 = data_3a.loc[data_3a['开票日期']>=20190101] #19年及之后
140. # 18年之前
141. data_311 = data_31.loc[(data_31['发票状态']=='有效发票') & (data_31['价税合计']>0)]
142. data_312 = data_31.loc[data_31['发票状态']=='作废发票']
143. data_313 = data_31.loc[(data_31['发票状态']=='有效发票') & (data_31['价税合计']<0)]
144. # 18年
145. data_321 = data_32.loc[(data_32['发票状态']=='有效发票') & (data_32['价税合计']>0)]
146. data_322 = data_32.loc[data_32['发票状态']=='作废发票']
147. data_323 = data_32.loc[(data_32['发票状态']=='有效发票') & (data_32['价税合计']<0)]
148. # 19年及之后
149. data_331 = data_33.loc[(data_33['发票状态']=='有效发票') & (data_33['价税合计']>0)]
150. data_332 = data_33.loc[data_33['发票状态']=='作废发票']
151. data_333 = data_33.loc[(data_33['发票状态']=='有效发票') & (data_33['价税合计']<0)]
152.
153. jin311 = [0 for index in range(302)] #存放18年之前每家企业的销项有效发票金额
154. a311 = list(data_311['企业代号']);b311 = list(data_311['金额'])
155. for i in range(len(a311)):
156. for j in range(302):
157. if a311[i] == j+124:
158. jin311[j] += b311[i]
159.
160. jin312 = [0 for index in range(302)] #存放18年之前每家企业的销项作废发票数量
161. a312 = list(data_312['企业代号']);b312 = list(data_312['金额'])
162. for i in range(len(a312)):
163. for j in range(302):
164. if a312[i] == j+124:
165. jin312[j] += 1
166.
167. jin313 = [0 for index in range(302)] #存放18年之前每家企业的销项负数发票数量
168. a313 = list(data_313['企业代号']);b313 = list(data_313['金额'])
169. for i in range(len(a313)):
170. for j in range(302):
171. if a313[i] == j+124:
172. jin313[j] += 1
173.
174. jin321 = [0 for index in range(302)] #存放18年每家企业的销项有效发票金额
175. a321 = list(data_321['企业代号']);b321 = list(data_321['金额'])
176. for i in range(len(a321)):
177. for j in range(302):
178. if a321[i] == j+124:
179. jin321[j] += b321[i]
180.
181. jin322 = [0 for index in range(302)] #存放18年每家企业的销项作废发票数量
182. a322 = list(data_322['企业代号']);b322 = list(data_322['金额'])
183. for i in range(len(a322)):
184. for j in range(302):
185. if a322[i] == j+124:
186. jin322[j] += 1
187.
188. jin323 = [0 for index in range(302)] #存放18年每家企业的销项负数发票数量
189. a323 = list(data_323['企业代号']);b323 = list(data_323['金额'])
190. for i in range(len(a323)):
191. for j in range(302):
192. if a323[i] == j+124:
193. jin323[j] += 1
194.
195. jin331 = [0 for index in range(302)] #存放19年及之后每家企业的销项有效发票金额
196. a331 = list(data_331['企业代号']);b331 = list(data_331['金额'])
197. for i in range(len(a331)):
198. for j in range(302):
199. if a331[i] == j+124:
200. jin331[j] += b331[i]
201.
202. jin332 = [0 for index in range(302)] #存放19年及之后每家企业的销项作废发票数量
203. a332 = list(data_332['企业代号']);b332 = list(data_332['金额'])
204. for i in range(len(a332)):
205. for j in range(302):
206. if a332[i] == j+124:
207. jin332[j] += 1
208.
209. jin333 = [0 for index in range(302)] #存放19年及之后每家企业的销项负数发票数量
210. a333 = list(data_333['企业代号']);b333 = list(data_333['金额'])
211. for i in range(len(a333)):
212. for j in range(302):
213. if a333[i] == j+124:
214. jin333[j] += 1
215.
216. a2 = list(data_1a['企业代号'])
217. for i in range(len(a2)):
218. a2[i] = num_trans(a2[i])
219.
220. data_1a['企业代号'] = a2
221.
222. # 根据企业代号排序
223. data_1a = data_1a.sort_values(['企业代号'], ascending = True)
224.
225. #精度问题,四舍五入保留两位小数
226. from decimal import Decimal, ROUND_HALF_UP
227. def round_up(number, num_digits):
228. """
229. 按指定位数对数值进行四舍五入。
230. :param number:要四舍五入的数字。
231. :param num_digits:要进行四舍五入运算的位数。
232. """
233. if num_digits > 0:
234. res = round_up(number * 10, num_digits - 1) / 10
235. else:
236. res = Decimal(number * 10 ** num_digits).quantize(Decimal('1'), rounding=ROUND_HALF_UP) * 10 ** -num_digits
237. return res
238.
239. jin21 = [0 for index in range(302)];jin22 = [0 for index in range(302)];jin23 = [0 for index in range(302)]
240. jin31 = [0 for index in range(302)];jin32 = [0 for index in range(302)];jin33 = [0 for index in range(302)]
241. for i in range(302):
242. jin21[i] = jin211[i]*0.2 +jin221[i]*0.3 + jin231[i]*0.5 #进项发票金额
243. jin22[i] = jin212[i]*0.2 +jin222[i]*0.3 + jin232[i]*0.5 + jin312[i]*0.2 +jin322[i]*0.3 + jin332[i]*0.5 #作废发票数量
244. jin23[i] = jin213[i]*0.2 +jin223[i]*0.3 + jin233[i]*0.5 + jin313[i]*0.2 +jin323[i]*0.3 + jin333[i]*0.5 #负数发票数量
245. jin31[i] = jin311[i]*0.2 +jin321[i]*0.3 + jin331[i]*0.5 #销项发票金额
246. for i in range(302):
247. jin21[i] = round_up(jin21[i],2)
248. jin31[i] = round_up(jin31[i],2)
249.
250. data_1a['进项发票金额'] = jin21
251. data_1a['作废发票数量'] = jin22
252. data_1a['负数发票数量'] = jin23
253. data_1a['销项发票金额'] = jin31
254.
255. # 进销平衡
256. ph1 = [0 for index in range(302)];ph2 = [0 for index in range(302)];ph3 = [0 for index in range(302)];ph = [0 for index in range(302)]
257. for i in range(len(ph1)):
258. ph1[i] = jin311[i]-jin211[i]
259. ph2[i] = jin321[i]-jin221[i]
260. ph3[i] = jin331[i]-jin231[i]
261. ph[i] = ph1[i]*0.2+ph2[i]*0.3+ph3[i]*0.5
262. for i in range(302):
263. ph1[i] = round_up(ph1[i],2)
264. ph2[i] = round_up(ph2[i],2)
265. ph3[i] = round_up(ph3[i],2)
266. ph[i] = round_up(ph[i],2)
267. data_1a['盈余'] = ph
268.
269. # 行业划分量化影响力
270. a = list(data_1a['企业类别'])
271. for i in range(len(a)):
272. if a[i] == 1:
273. a[i] = 0.2
274. if a[i] == 2:
275. a[i] = 0.8
276. if a[i] == 3:
277. a[i] = 0.2
278. if a[i] == 4:
279. a[i] = 0.45
280. if a[i] == 5:
281. a[i] = 0.65
282. if a[i] == 6:
283. a[i] = 0.55
284. if a[i] == 7:
285. a[i] = 0.4
286. data_1a['企业类别'] = a
287. data_1a.head()
288.
289. data_1a.rename(columns={'企业类别':'影响力'}, inplace = True)
290.
291. # 极小型指标转化为极大型指标
292. max(data_1a['作废发票数量'])#1077.9
293. max(data_1a['负数发票数量'])#179.1
294. data_1a['作废发票数量'] = 1077.9-data_1a['作废发票数量']
295. data_1a['负数发票数量'] = 179.1-data_1a['负数发票数量']
296.
297. data_1a.to_excel('附件二处理后的数据.xlsx')
程序四.问题二模型建立
1. #导包,文件
2. import numpy as np
3. import pandas as pd
4. df=pd.read_excel('附件二标准化后的数据.xlsx')
5.
6. df1 = df.loc[:,['Z作废发票数量','Z负数发票数量','Z盈余','Z影响力']]
7. df1.columns = [0,1,2,3]
8.
9. # 信誉得分权重
10. a = np.array([[1,1/2,1/7,1/4],[2,1,1/6,1/3],[7,6,1,4],[4,3,1/4,1]])
11. a1 = np.zeros((4,4))
12. for i in range(len(a)):
13. for j in range(len(a[i])):
14. a1[i][j] = a[i][j]/(a[0][j]+a[1][j]+a[2][j]+a[3][j])
15. print('a1:{}'.format(a1))
16.
17. v = np.zeros(4)
18. for i in range(len(a1)):
19. v[i] = sum(a1[i])
20. print('v:{}'.format(v))
21.
22. w = np.zeros(4)
23. for i in range(len(a1)):
24. w[i] = v[i]/sum(v)
25. w = w.reshape(4,1)
26. print('w:{}'.format(w))
27.
28. aw = np.dot(a,w)
29. print('aw:{}'.format(aw))
30.
31. l = 0
32. for i in range(len(aw)):
33. l+=aw[i]/(len(aw)*w[i])
34. print('l:{}'.format(l))
35.
36. ci = (l-len(aw))/(len(aw)-1)
37. print('ci:{}'.format(ci))
38.
39. ri = 1.12
40. cr = ci/ri
41. print('cr:{}'.format(cr))
42.
43. w = [0.0639,0.1012,0.6072,0.2277]
44. score = [0 for index in range(len(df1[1]))]
45. for j in range(len(w)):
46. for i in range(len(df1[1])):
47. score[i] += df1[j][i]*w[j]
48.
49. sum(w) #检验权重之和为1
50.
51. df['信贷安全得分'] = score
52.
53. # 信誉得分排序
54. df2 = df.sort_values(by='信贷安全得分',ascending=False)
55.
56. # 利率函数计算
57. from sympy import *
58. x = symbols('x')
59. y = (x+1.331047)*((0.04-0.15)/(1.504037+1.331047))+0.15
60.
61. # 根据利率函数插值
62. x1 = list(df2['信贷安全得分'])
63. y1 = [0 for index in range(len(x1))]
64. for i in range(len(y1)):
65. y1[i] = y.subs(x,x1[i])
66. y1[i] = round(y1[i],4)
67. for i in range(len(y1)):
68. if y1[i]<0.04:
69. y1[i] = 0.04
70. if y1[i]>0.15:
71. y1[i] = 0.15
72.
73. df2['年利率(%)'] = y1
74. df2['年利率(%)'] = df2['年利率(%)']*100
75.
76. df3 = df2.loc[:,['Z盈余','Z进项发票金额']]
77. df3.columns = [0,1]
78.
79. # 额度得分
80. w = [0.65,0.35]
81. score = [0 for index in range(len(df3))]
82. for j in range(len(w)):
83. for i in range(len(df3)):
84. score[i] += df3.iloc[i,j]*w[j]
85.
86. df2['额度得分'] = score
87.
88. sum(w) #检验权重相加=1
89.
90. # 按额度得分排序
91. df3 = df2.sort_values(by='额度得分',ascending=False)
92.
93. score = list(df3['额度得分'])
94. # print(score)
95. min(score) #-0.9139382826107219
96. for i in range(len(score)):
97. score[i] += 0.9139382826107219
98. # score
99. limit = [ 0 for i in range(len(score))]
100. for i in range(len(score)):
101. if score[i]>3:
102. limit[i] = 100
103. # print(score)
104. s = sum(score[5:])
105. for i in range(5,len(score)):
106. limit[i] = (score[i]/s)*9500
107.
108. for i in range(len(limit)):
109. if limit[i]>100:
110. limit[i] = 100
111. if limit[i]<10:
112. limit[i] = 0
113. limit[-5] = 10
114.
115. sum(limit) #检验总额度
116.
117. # 企业
118. x = []
119. for i in range(124,426):
120. x.append(i)
121. # 额度得分
122. y = list(df3['额度得分'])
123. #绘图
124. import matplotlib.pyplot as plt
125. import matplotlib
126. from matplotlib import font_manager
127. my_font=font_manager.FontProperties(fname='C:\Windows\Fonts\msyh.ttc',size=10) #fname字体路径,本电脑
128. plt.plot(x,y)
129. #绘制网格
130. plt.grid(alpha=0.4)
131. #标题
132. plt.xlabel('企业代号',fontproperties=my_font)
133. plt.ylabel('额度得分',fontproperties=my_font)
134. plt.title('各企业额度得分',fontproperties=my_font,color='black',size=15)
135. #显示
136. plt.show()
137.
138. df3['额度(万)'] = limit
139.
140. df3.to_excel('302家企业各项指标及得分.xlsx')
141.
142. # 结果
143. df4 = df3.loc[:,['企业代号','企业名称','年利率(%)','额度(万)']]
144. df4.to_excel('针对302家企业的贷款策略.xlsx')
程序五.疫情影响下的信贷策略
1. import pandas as pd
2. import numpy as np
3. data = pd.read_excel('302家企业各项指标及得分及细分产业.xlsx')
4.
5. classify = list(data['产业细分'])
6. influence = [0 for index in range(len(classify))]
7. add = [0 for index in range(len(classify))]
8. for i in range(len(classify)):
9. if classify[i] == 1:
10. influence[i] = -0.028 ; add[i] = 0.034
11. if classify[i] == 2:
12. influence[i] = -0.085 ; add[i] = 0.041
13. if classify[i] == 3:
14. influence[i] = -0.102 ; add[i] = 0.044
15. if classify[i] == 4:
16. influence[i] = -0.175 ; add[i] = 0.078
17. if classify[i] == 5:
18. influence[i] = -0.178 ; add[i] = 0.012
19. if classify[i] == 6:
20. influence[i] = -0.140 ; add[i] = 0.017
21. if classify[i] == 7:
22. influence[i] = -0.353 ; add[i] = -0.18
23. if classify[i] == 8:
24. influence[i] = -0.243 ; add[i] = 0.048
25. if classify[i] == 9:
26. influence[i] = -0.061 ; add[i] = 0.041
27. if classify[i] == 10:
28. influence[i] = 0.132 ; add[i] = 0.057
29. if classify[i] == 11:
30. influence[i] = -0.018 ; add[i] = 0.090
31. if classify[i] == 12:
32. influence[i] = -0.114 ; add[i] = -0.053
33. if classify[i] == 13:
34. influence[i] = -0.094 ; add[i] = -0.080
35. if classify[i] == 14:
36. influence[i] = -0.316 ; add[i] = -0.102
37. if classify[i] == 15:
38. influence[i] = -0.785 ; add[i] = -0.693
39. if classify[i] == 16:
40. influence[i] = -0.028 ; add[i] = 0.034
41. if classify[i] == 17:
42. influence[i] = -2.131 ; add[i] = -1.685
43.
44. data['波动'] = influence
45. data['同期增长'] = add
46.
47. data['信贷安全得分'] = data['信贷安全得分']*((data['波动']+1)*0.6+(data['同期增长']+1)*0.4)
48. data1 = data.sort_values(by='信贷安全得分',ascending=False)
49.
50. # 利率函数计算
51. from sympy import *
52. x = symbols('x')
53. y = (x+1.211785)*((0.04-0.15)/(1.393039+1.211785))+0.15
54.
55. # 根据利率函数插值
56. x1 = list(data1['信贷安全得分'])
57. y1 = [0 for index in range(len(x1))]
58. for i in range(len(y1)):
59. y1[i] = y.subs(x,x1[i])
60. y1[i] = round(y1[i],4)
61.
62. for i in range(len(y1)):
63. if y1[i]<0.0400: y1[i]=0.0400
64. if y1[i]>0.1500: y1[i]=0.1500
65.
66. data1['年利率'] = y1
67. data1['年利率'] = data1['年利率']*100
68. data1.rename(columns={'年利率':'年利率(%)'}, inplace = True)
69.
70. data1['额度得分'] = data1['额度得分']*((data1['波动']+1)*0.6+(data['同期增长']+1)*0.4)
71. data2 = data1.sort_values(by='额度得分',ascending=False)
72.
73. # 企业
74. x = []
75. for i in range(1,len(y1)+1): x.append(i)
76. # 额度得分
77. y = list(data2['额度得分'])
78. #绘图 #我的编译器这块代码需要运行两次才能出图
79. import matplotlib.pyplot as plt
80. import matplotlib
81. from matplotlib import font_manager
82. my_font=font_manager.FontProperties(fname='C:\Windows\Fonts\msyh.ttc',size=10) #fname字体路径,本电脑
83. plt.plot(x,y)
84. #绘制网格
85. plt.grid(alpha=0.4)
86. #标题
87. plt.xlabel('企业代号',fontproperties=my_font)
88. plt.ylabel('额度得分',fontproperties=my_font)
89. plt.title('各企业额度得分',fontproperties=my_font,color='black',size=15)
90. #显示
91. plt.show()
92.
93. score = list(data2['额度得分'])
94. # print(score)
95. # min(score) #-0.9685167812983371
96. for i in range(len(score)):
97. score[i] += 0.9685167812983371
98. # score
99. limit = [ 0 for i in range(len(score))]
100. for i in range(len(score)):
101. if score[i]>4:
102. limit[i] = 100
103. s = sum(score[2:])
104. for i in range(2,len(score)):
105. limit[i] = (score[i]/s)*9800
106.
107. for i in range(len(limit)):
108. if limit[i]>100: limit[i]=100
109. if limit[i]<10: limit[i]=10
110.
111. sum(limit) #检验总额度是否超过100万
112.
113. data2['额度']=limit
114. data2.rename(columns={'额度':'额度(万)'}, inplace = True)
115.
116. data3 = data2.loc[:,['企业代号','企业名称','产业细分','波动','同期增长','年利率(%)','额度(万)']]
117. data3['波动'] = data3['波动']*100
118. data3.rename(columns={'波动':'波动(%)'}, inplace = True)
119. data3['同期增长'] = data3['同期增长']*100
120. data3.rename(columns={'同期增长':'同期增长(%)'}, inplace = True)
121.
122. data3.to_excel('疫情影响下针对302家企业的贷款策略.xlsx')
思路有时间再写