大众点评字体_大众点评字体解析

import requests

import re

from lxml import etree

import lxml

shanghu=[]

headers={

'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/73.0.3683.86 Safari/537.36',

}

'''大众点评_token生成过程,jv为http://www.dianping.com?shopId=6585548样式'''

js='''

iP.reload = function(jv) {

var jw;

var jx = {};

if (typeof jv === _$_543c[91]) {

jx = iO.parse(jv.split(_$_543c[146])[1])

} else {

if (typeof jv === _$_543c[2]) {

jx = jv

}

}

;iP.sign = iJ(jx);

iP.cts = new Date().getTime();

jw = iI(iP);

if (Rohr_Opt.LogVal && typeof (window) !== _$_543c[0]) {

window[Rohr_Opt.LogVal] = encodeURIComponent(jw)

}

;return jw

}

'''

def deal_text(href,code):

code_first_two=code[0:2]

r=requests.get(url=href,headers=headers)

'''获取偏移量'''

jvguv=re.findall('%s{background:-(.*?).0px -(.*?).0px;}'%code,r.text)

x=int(jvguv[0][0])

y=int(jvguv[0][1])

'''获取加密的汉字'''

url='https:'+re.findall(r'svgmtsi\[class\^="%s"].*?background-image: url\((.*?)\)'%code_first_two,r.text)[0]

num=requests.get(url=url,headers=headers)

q=re.findall('y="(\d+)"',num.text)

tree=etree.HTML(num.content)

y1=int(q[0])

y2=int(q[1])

y3=int(q[2])

y4=int(q[3])

y5=int(q[4])

a=tree.xpath('//text[@y="%s"]/text()'%y1)[0]

b=tree.xpath('//text[@y="%s"]/text()'%y2)[0]

c=tree.xpath('//text[@y="%s"]/text()'%y3)[0]

d= tree.xpath('//text[@y="%s"]/text()'%y4)[0]

e= tree.xpath('//text[@y="%s"]/text()'%y5)[0]

if y

text=a[x // 12]

elif y

text= b[x // 12]

elif y

text = c[x // 12]

elif y

text = d[x // 12]

else:

text= e[x //12]

return text

def deal_num(href,code):

code_first_two=code[0:2]

r=requests.get(url=href,headers=headers)

'''获取偏移量'''

jvguv=re.findall('%s{background:-(.*?).0px -(.*?).0px;}'%code,r.text)

x=int(jvguv[0][0])

y=jvguv[0][1]

'''获取加密的数字'''

url='https:'+re.findall(r'svgmtsi\[class\^="%s"].*?background-image: url\((.*?)\)'%code_first_two,r.text)[0]

num=requests.get(url=url,headers=headers)

q=re.findall('y="(\d+)"',num.text)

tree=etree.HTML(num.content)

y1=q[0]

y2=q[1]

y3=q[2]

a=tree.xpath('//text[@y="%s"]/text()'%y1)[0]

b=tree.xpath('//text[@y="%s"]/text()'%y2)[0]

c=tree.xpath('//text[@y="%s"]/text()'%y3)[0]

if y<=y1:

return a[x//12]

elif y<=y2:

return b[x // 12]

else :

return c[x // 12]

def get_shopcode():

while True :

try:

r=requests.get(url='http://www.dianping.com/huizhou/ch10/g103',headers=headers)

text=r.text

tree = etree.HTML(text)

href = 'http://s3plus' + re.findall('//s3plus(.*?)">', text)[0]

shops = tree.xpath('.//div[@id="shop-all-list"]/ul/li')

for shop in shops :

a = ''

c = ''

d=''

shop_name = shop.xpath('.//div[@class="tit"]/a/h4/text()')[0]

star = shop.xpath('.//div[@class="comment"]/span')[0]

review_num = shop.xpath('.//div[@class="comment"]/a[contains(@class,"review-num")]/b')[0] # 获取可见的数字

review_num1 = shop.xpath('.//div[@class="comment"]/a[contains(@class,"mean-price")]/b')[0]

tag = shop.xpath('.//div[@class="tag-addr"]/a[2]/span')[0]

addr = shop.xpath('.//div[@class="tag-addr"]//span')[0]

addr_text = shop.xpath('.//div[@class="operate J_operate Hide"]/a[2]')[0]

'''获取标签下所有的text'''

# print(addr_text.xpath('string(.)'))

star = star.attrib['title']

for i in review_num1 :

code = i.attrib['class']

b = deal_num(href, code)

c = c + b

for i in review_num :

code = i.attrib['class']

b = deal_num(href, code)

a = a + b

# print(i.attrib['class'])

# for i in tag :

# print(i.attrib['class'])

# if addr.text:

# print("123")

# for i in addr:

# text = i.attrib['class']

# b = deal_text(href,text)

# d = d + b

# print(d)

print(addr_text.attrib['data-address'])

print(star + ' ' + shop_name + ' ' + a + '条点评' + ' ' + '人均' + c)

print('-------------------------')

break

except Exception as e:

pass

get_shopcode()

# for i in range(0,len(comment)):

#

# shanghu.append(comment[i])

# print(comment)

  • 0
    点赞
  • 1
    收藏
    觉得还不错? 一键收藏
  • 0
    评论

“相关推荐”对你有帮助么?

  • 非常没帮助
  • 没帮助
  • 一般
  • 有帮助
  • 非常有帮助
提交
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值