import requests
from lxml import etree
import re
from bs4 import BeautifulSoup
import random
import time
import os
import threading
from retry import retry
class niuke(object):
def __init__(self):
self.url='https://www.nowcoder.com/discuss/tag/{}?type=2&page={}'
self.headers={"User-Agent": "Mozilla/5.0 (Windows NT 6.1; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/69.0.3497.92 Safari/537.36"}
self.prefix='https://www.nowcoder.com'
self.origin_url='https://www.nowcoder.com/discuss/tags?type=2'
def get_enterprise(self):
r=requests.get(self.origin_url,headers=self.headers)
tree=etree.HTML(r.text)
enterprise=tree.xpath('//div[@data-nav="企业"]/ul[@class="discuss-tags-mod"]/li/a/@data-href')
enterprise_name=tree.xpath('//div[@d