from pyspark.sql import SparkSession
from pyspark.sql.functions import regexp_replace
source_path = r"你的文件.csv"
spark = SparkSession.builder.appName("multilinestring").getOrCreate()
lz_df = spark.read.option("header", "true") \
.option("multiLine", "true") \
.option("quote", "\"") \
.option("escape", "\"") \
.option("delimiter", ",") \
.format("csv") \
.load(source_path)
for columns in lz_df.columns:
lz_df = lz_df.withColumn(columns, regexp_replace(lz_df[columns], "\n", ""))
pyspark读取csv文件时处理数据跨行问题
最新推荐文章于 2024-07-12 16:16:27 发布