python面向对象编程实例爬虫_python微博爬虫(scrapy)示例源码

classWeiboSpider(Spider):

name = 'weibocn'allowed_domains = ['m.weibo.cn']

user_url = 'https://m.weibo.cn/api/container/getIndex?uid={uid}&type=uid&value={uid}&containerid=100505{uid}'follow_url = 'https://m.weibo.cn/api/container/getIndex?containerid=231051_-_followers_-_{uid}&page={page}'fan_url = 'https://m.weibo.cn/api/container/getIndex?containerid=231051_-_fans_-_{uid}&page={page}'weibo_url = 'https://m.weibo.cn/api/container/getIndex?uid={uid}&type=uid&page={page}&containerid=107603{uid}'start_users = ['3217179555', '1742566624', '2282991915', '1288739185', '3952070245', '5878659096'] defstart_requests(self): foruid inself.start_users: yieldRequest(self.user_url.format(uid=uid), callback=self.parse_user) defparse_user(self, response): """解析用户信息:param response: Response对象"""self.logger.debug(response)

result = json.loads(response.text) ifresult.get('data').get('userInfo'):

user_info = result.get('data').get('userInfo')

user_item = UserItem()

field_map = { 'id': 'id', 'name': 'screen_name', 'avatar': 'profile_image_url', 'cover': 'cover_image_phone', 'gender': 'gender', 'description': 'description', 'fans_count': 'followers_count', 'follows_count': 'follow_count', 'weibos_count': 'statuses_count', 'verified': 'verified', 'verified_reason': 'verified_reason', 'verified_type': 'verified_type'} forfield, attr infield_map.items():

user_item[field] = user_info.get(attr) yielduser_item # 关注uid = user_info.get('id') yieldRequest(self.follow_url.format(uid=uid, page=1), callback=self.parse_follows,

meta={'page': 1, 'uid': uid}) # 粉丝yieldRequest(self.fan_url.format(uid=uid, page=1), callback=self.parse_fans,

meta={'page': 1, 'uid': uid}) # 微博yieldRequest(self.weibo_url.format(uid=uid, page=1), callback=self.parse_weibos,

meta={'page': 1, 'uid': uid}) defparse_follows(self, response): """解析用户关注:param response: Response对象"""result = json.loads(response.text) ifresult.get('ok') andresult.get('data').get('cards') andlen(result.get('data').get('cards')) andresult.get('data').get('cards')[-1].get( 'card_group'): # 解析用户follows = result.get('data').get('cards')[-1].get('card_group') forfollow infollows: iffollow.get('user'):

uid = follow.get('user').get('id') yieldRequest(self.user_url.format(uid=uid), callback=self.parse_user)

uid = response.meta.get('uid') # 关注列表user_relation_item = UserRelationItem()

follows = [{'id': follow.get('user').get('id'), 'name': follow.get('user').get('screen_name')} forfollow infollows]

user_relation_item['id'] = uid

user_relation_item['follows'] = follows

user_relation_item['fans'] = [] yielduser_relation_item # 下一页关注page = response.meta.get('page') 1yieldRequest(self.follow_url.format(uid=uid, page=page),

callback=self.parse_follows, meta={'page': page, 'uid': uid}) defparse_fans(self, response): """解析用户粉丝:param response: Response对象"""result = json.loads(response.text) ifresult.get('ok') andresult.get('data').get('cards') andlen(result.get('data').get('cards')) andresult.get('data').get('cards')[-1].get( 'card_group'): # 解析用户fans = result.get('data').get('cards')[-1].get('card_group') forfan infans: iffan.get('user'):

uid = fan.get('user').get('id') yieldRequest(self.user_url.format(uid=uid), callback=self.parse_user)

uid = response.meta.get('uid') # 粉丝列表user_relation_item = UserRelationItem()

fans = [{'id': fan.get('user').get('id'), 'name': fan.get('user').get('screen_name')} forfan infans]

user_relation_item['id'] = uid

user_relation_item['fans'] = fans

user_relation_item['follows'] = [] yielduser_relation_item # 下一页粉丝page = response.meta.get('page') 1yieldRequest(self.fan_url.format(uid=uid, page=page),

callback=self.parse_fans, meta={'page': page, 'uid': uid}) defparse_weibos(self, response): """解析微博列表:param response: Response对象"""result = json.loads(response.text) ifresult.get('ok') andresult.get('data').get('cards'):

weibos = result.get('data').get('cards') forweibo inweibos:

mblog = weibo.get('mblog') ifmblog:

weibo_item = WeiboItem()

field_map = { 'id': 'id', 'attitudes_count': 'attitudes_count', 'comments_count': 'comments_count', 'reposts_count': 'reposts_count', 'picture': 'original_pic', 'pictures': 'pics', 'created_at': 'created_at', 'source': 'source', 'text': 'text', 'raw_text': 'raw_text', 'thumbnail': 'thumbnail_pic',

} forfield, attr infield_map.items():

weibo_item[field] = mblog.get(attr)

weibo_item['user'] = response.meta.get('uid') yieldweibo_item # 下一页微博uid = response.meta.get('uid')

page = response.meta.get('page') 1yieldRequest(self.weibo_url.format(uid=uid, page=page), callback=self.parse_weibos,

meta={'uid': uid, 'page': page})

评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值