去重 首先构建学习数据 # 导包 from pyspark import SparkContext from pyspark.sql.session import SparkSession # 创建连接 sc = SparkContext.getOrCreate() spark = SparkSession(sc) # 生成数据 df = spark.createDataFrame([ (1, 144.5, 5.9, 33, 'M'), (2, 167.2, 5.4, 45,