废话不多说,直接上代码!
```python
from pyspark.ml import Pipeline
from pyspark.ml.classification import RandomForestClassifier
from pyspark.ml.linalg import Vectors
from pyspark.sql import SparkSession
import pandas as pd
from pyspark.ml.feature import Normalizer, VectorAssembler, StandardScaler, StringIndexer
from pyspark.sql import SparkSession, types
# aa=aa[['tempcabinetnacelle_1sec','blade3tempbattbox_1sec','tempcabinetnacelle_1sec','label']]
# trainData=aa[['tempcabinetnacelle_1sec','blade3tempbattbox_1sec','tempcabinetnacelle_1sec','label']]
spark = SparkSession.builder.appName("appName111").enableHiveSupport().getOrCreate()
df = spark.read.csv('/root/a.csv', header=True)
df=df[['tempcabinetnacelle_1sec','blade3tempbattbox_1sec','blade1tempbattbox_1sec','label']]
##类型转换
df=df.withColumn('tempcabinetnacelle_1sec', df['tempcabinetnacelle_1sec'].cast(types.FloatType()))
df=df.withColumn('blade3tempbattbox_1sec', df['blade3tempbattbox_1sec'].cast(types.FloatType()))
df=df.withColumn('blade1tempbattbox_1sec', df['blade1tempbattbox_1sec'].cast(types.FloatType()))
df=df.withColumn('label', df['label'].cast(types.IntegerType()))
##将特征转化为featute
df_assembler = VectorAssembler(inputCols=['tempcabinetnacelle_1sec','blade3tempbattbox_1sec','blade1tempbattbox_1sec'], outputCol="features")
df = df_assembler.transform(df)
df.show()
rf = RandomForestClassifier(numTrees=int(2), maxDepth=int(4), labelCol='label', seed=11)
# ###训练
model = rf.fit(df)
ff=model.featureImportances
print(len(ff))
importancesList=[float(col) for col in ff]
colList=['tempcabinetnacelle_1sec','blade3tempbattbox_1sec','blade1tempbattbox_1sec']
result=dict(zip(colList,importancesList))
print(result)`在这里插入代码片`