发送方:
我发送的是一个模拟数据流,读取csv文件实时发送
import pulsar
import json
df = pd.read_csv('/path/df_obj.csv')
data_df = df.to_dict(orient='records')
client = pulsar.Client('pulsar://10.xx.xx.xxx:7777')
producer = client.create_producer('public/spark/topic1')
for i in range(10):
df_i=data_df[i]
json_df_i = json.dumps(df_i)
out_df = json_df_i.encode('utf-8')
producer.send(out_df)
client.close()
接收方:
需要下载一个连接pulsar和spark的jar包
from pyspark.sql import SparkSession
jar_path = "/home/xxx/pulsar-spark-connector_2.12-3.2.2.2.jar"
spark = SparkSession.builder \
.master("local[3]") \
.config("spark.jars",jar_path) \
.appName('test_pulsar') \
.getOrCreate()
ds = spark.readStream \
.format("pulsar") \
.option("service.url","pulsar://10.xx.xx.xxx:7777") \
.option("topic","public/spark/topic1") \
.load()