import requests
import time
import pandas as pd
import urllib.parse as urp
import re
from matplotlib import pyplot as plt
import jieba
from wordcloud import WordCloud
import PIL.Image as image
import numpy as np
from pylab import *
mpl.rcParams['font.sans-serif'] = ['SimHei']
def get_data(i,keyword):
url = 'https://tousu.sina.com.cn/api/index/s?&keywords='+keyword+'&page_size=10&page='+i
headers = {
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/70.0.3538.102 Safari/537.36 Edge/18.18363'}
data = requests.get(url,headers=headers).text
qingqiupat = ',"appeal":"(.*?)"'
neironpat = '"summary":"(.*?)"'
locationpat = 'location":"(.*?)"}},'
titlepat = '"title":"(.*?)","uid":'
qingqiulist = re.compile(qingqiupat).findall(data)
neironlist = re.compile(neironpat).findall(data)
locationlist = re.compile(locationpat).findall(data)
titlelist = re.compile(titlepat
python黑猫投诉网爬虫,无需修改参数可直接生成投诉内容词云、投诉请求与地址柱形图~(源码)
最新推荐文章于 2024-04-24 22:31:16 发布
本文介绍如何使用Python编写爬虫,从黑猫投诉网站获取投诉内容,进而制作投诉词云图和投诉请求与地址的柱形图。通过源码分享,无需额外参数调整即可运行。
摘要由CSDN通过智能技术生成