#coding=UTF-8
import requests
import json
from lxml import etree
from pykafka import KafkaClient
import sys
import schedule
import time
class Aiqiyi():
def __init__(self):
myhosts = "127.0.0.1:9092"
client = KafkaClient(hosts=myhosts)
self.topic = client.topics['test'.encode()]
print(client.topics)
def sendMessage(self, mydict):
with self.topic.get_sync_producer(delivery_reports=True) as producer:
data = json.dumps(mydict)
producer.produce(bytes(data, encoding='utf-8'))
def getProperti():
headers = {
'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_12_3) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/58.0.3029.110 Safari/537.36'}
url = 'http://list.iqiyi.com/www/25/-------------4-1-2-iqiyi-1-.html'
sourceHtml = requests.get(url, headers=headers).content.decode('utf-8')
selector = etree.HTML(sourceHtml)
links = selector.xpath('/html/body/div[3]/div/div/div[3]/div/ul[@class="site-piclist site-piclist-180101 site-piclist-auto"]/li[1]')
for link in links:
#节目链接
href = link.xpath("//div['site-piclist_info']/div[1]/p/a/@href")
#节目名称
name = link.xpath("//div['site-piclist_info']/div[1]/p/a/text()")
#节目图片
img = link.xpath("//div[@class='site-piclist_pic']/a/img/@src")
#上映时间
time = link.xpath("//div['site-piclist_info']/div[@class='role_info']/text()")
multi_list = map(list, zip(name, href, img,time))
for i in multi_list:
jso = {}
jso["name"] = str(i[0].strip())
jso["href"] = str(i[1].strip())
jso["img"] = str(i[2].strip())
jso["time"] = str(i[3].replace("\r\n","").strip())
df = [('a111', 'a2', 'a3'), ('b111', 'b2', 'b3'), ('c111', 'c2', 'c3')]
sendMessage(df)
#print(jso)
def job():
print("I'm working...")
if __name__ == "__main__":
schedule.every(1).minutes.do(getProperti)
while True:
schedule.run_pending()
time.sleep(1)
#coding=UTF-8
from pykafka import KafkaClient
import json
import sys
class pythonSendkafka:
def clien(self):
myhosts = "127.0.0.1:9092"
client = KafkaClient(hosts=myhosts)
self.topic = client.topics['test'.encode()]
print(client.topics)
def sendMessage(self, mydict):
with self.topic.get_sync_producer(delivery_reports=True) as producer:
data = json.dumps(mydict)
producer.produce(bytes(data, encoding='utf-8'))
df = [('a111', 'a2', 'a3'), ('b111', 'b2', 'b3'), ('c111', 'c2', 'c3')]
py = pythonSendkafka()
py.clien()
py.sendMessage(df)