spark-streaming统计各岗位招聘信息行数

任务:

搭建集群

 

重新开一个端口

还是在spark-master里面

运行

spark-submit  文件名.py    #一般是py文件里面导入了pyspark包就这样运行

 一直在不停的统计数据

 job.py代码:

# import findspark
# findspark.init()
from pyspark import SparkContext
from pyspark.streaming import StreamingContext

def updateFunc(values,state):
    cnt = 0
    for v in values:
        cnt += v
    if state == None:
        return cnt
    else:
        return cnt+state

if __name__ == '__main__':
    sparkConf = SparkContext()
    sparkConf.setLogLevel("ERROR")
    ssc = StreamingContext(sparkConf, 5)
    ssc.checkpoint("cp_for_job")
    stream = ssc.socketTextStream("localhost",9001)
    stream.map(lambda x:(x.split(",")[1],1)).updateStateByKey(updateFunc).pprint()

    ssc.start()
    ssc.awaitTermination()

 my_socket_server.py

import socket
import time
from threading import Thread

ADDRESS = ('localhost', 9001)
g_socket_server =None
g_conn_pool={}

def init():

    global g_socket_server

    g_socket_server = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
    g_socket_server.bind(ADDRESS)

    g_socket_server.listen(5)

    print("server start, wait for client connecting... ")

def accept_client():
    while True:

        client, info=g_socket_server.accept()

        thread = Thread(target=message_handle, args=(client, info))#设置成守护线程

        thread.setDaemon(True)

        thread.start()
def message_handle(client, info):
    client.sendall("success".encode(encoding='utf8'))
    while True:
        try:
            with open("/root/spark/part-00000",'r',encoding="utf-8") as f:
                for line in f:
                    if len(line.strip())>0:
                        client.sendall(line.encode(encoding='utf8'))
                        # time.sleep(0.2)
        except Exception as e:
            print(e)
            break

def remove_client(client_type):
    client=g_conn_pool[client_type]
    if None != client:
        client.close()
        g_conn_pool.pop(client_type)
        print(" client offin:" + client_type)
if __name__ =='__main__':
    init()
    #新开一个线程,用于接收新连接
    thread = Thread(target=accept_client)
    thread.setDaemon(True)
    thread.start()
    while True:
        time. sleep(0.1)

评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值