我在使用RedditAPI进行数据爬取的代码:
import praw
# 使用你的Reddit API信息进行认证
reddit = praw.Reddit(
client_id='my_id',
client_secret='my_secret',
user_agent='test/1.0 by username',
)
# 获取'popular' subreddit
subreddit = reddit.subreddit('popular')
# 遍历热门帖子
for submission in subreddit.hot(limit=10): # 这里限制了帖子数量为10,你可以根据需要进行调整
print(f'Title: {submission.title}')
print(f'URL: {submission.url}')
print('---')
运行时遇到的问题:
Traceback (most recent call last):
File "E:\selenium_scraper\src\example.py", line 46, in <module>
for submission in subreddit.hot(limit=10): # 这里限制了帖子数量为10,你可以根据需要进行调整
File "E:\Anaconda2\envs\my_env2\lib\site-packages\praw\models\listing\generator.py", line 63, in __next__
self._next_batch()
File "E:\Anaconda2\envs\my_env2\lib\site-packages\praw\models\listing\generator.py", line 89, in _next_batch
self._listing = self._reddit.get(self.url, params=self.params)
File "E:\Anaconda2\envs\my_env2\lib\site-packages\praw\util\deprecate_args.py", line 43, in wrapped
return func(**dict(zip(_old_args, args)), **kwargs)
File "E:\Anaconda2\envs\my_env2\lib\site-packages\praw\reddit.py", line 712, in get
return self._objectify_request(method="GET", params=params, path=path)
File "E:\Anaconda2\envs\my_env2\lib\site-packages\praw\reddit.py", line 517, in _objectify_request
self.request(
File "E:\Anaconda2\envs\my_env2\lib\site-packages\praw\util\deprecate_args.py", line 43, in wrapped
return func(**dict(zip(_old_args, args)), **kwargs)
File "E:\Anaconda2\envs\my_env2\lib\site-packages\praw\reddit.py", line 941, in request
return self._core.request(
File "E:\Anaconda2\envs\my_env2\lib\site-packages\prawcore\sessions.py", line 328, in request
return self._request_with_retries(
File "E:\Anaconda2\envs\my_env2\lib\site-packages\prawcore\sessions.py", line 254, in _request_with_retries
return self._do_retry(
File "E:\Anaconda2\envs\my_env2\lib\site-packages\prawcore\sessions.py", line 162, in _do_retry
return self._request_with_retries(
File "E:\Anaconda2\envs\my_env2\lib\site-packages\prawcore\sessions.py", line 254, in _request_with_retries
return self._do_retry(
File "E:\Anaconda2\envs\my_env2\lib\site-packages\prawcore\sessions.py", line 162, in _do_retry
return self._request_with_retries(
File "E:\Anaconda2\envs\my_env2\lib\site-packages\prawcore\sessions.py", line 234, in _request_with_retries
response, saved_exception = self._make_request(
File "E:\Anaconda2\envs\my_env2\lib\site-packages\prawcore\sessions.py", line 186, in _make_request
response = self._rate_limiter.call(
File "E:\Anaconda2\envs\my_env2\lib\site-packages\prawcore\rate_limit.py", line 46, in call
kwargs["headers"] = set_header_callback()
File "E:\Anaconda2\envs\my_env2\lib\site-packages\prawcore\sessions.py", line 282, in _set_header_callback
self._authorizer.refresh()
File "E:\Anaconda2\envs\my_env2\lib\site-packages\prawcore\auth.py", line 378, in refresh
self._request_token(grant_type="client_credentials", **additional_kwargs)
File "E:\Anaconda2\envs\my_env2\lib\site-packages\prawcore\auth.py", line 155, in _request_token
response = self._authenticator._post(url=url, **data)
File "E:\Anaconda2\envs\my_env2\lib\site-packages\prawcore\auth.py", line 51, in _post
response = self._requestor.request(
File "E:\Anaconda2\envs\my_env2\lib\site-packages\prawcore\requestor.py", line 70, in request
raise RequestException(exc, args, kwargs) from None
prawcore.exceptions.RequestException: error with request HTTPSConnectionPool(host='www.reddit.com', port=443): Max retries exceeded with url: /api/v1/access_token (Caused by SSLError(SSLEOFError(8, 'EOF occurred in violation of protocol (_ssl.c:1122)')))
同样的代码在别人的电脑上可以正常爬取数据;
Reddit网站使用浏览器可以正常访问
在打开了本机的代理服务器且使用了VPN的情况下代码在自己的电脑上运行会出现以下错误:
我已经尝试了添加代理服务器、关闭代理服务器、降低请求频率的方法,但是依旧出现相同的错误,实在是没有其他的办法了,急需大家的帮助。