python爬虫：传递URL参数学习笔记

最新推荐文章于 2022-05-21 07:00:00 发布

山谷來客

最新推荐文章于 2022-05-21 07:00:00 发布

阅读量6.3k

点赞数

分类专栏： python

本文链接：https://blog.csdn.net/u010035907/article/details/52894199

版权

python 专栏收录该内容

28 篇文章 1 订阅

订阅专栏

# -*- coding: utf-8 -*-
"""
Created on Sat Oct 22 17:57:13 2016

@author: hhxsym

买粮网搜索列表获
"""

import os
import requests
from bs4 import BeautifulSoup

inpath="C:\\Users\\hhxsym\\Desktop\\课程群Python爬虫"
inpath = unicode(inpath , "utf8")
os.chdir(inpath) #不做编码转换后，中文路径无法打开，更改

def get_search_list(keyword = None, page = 1):
url = 'http://www.mailiangwang.com/biz/list'

payload={'keyword':keyword, 'pageid':page}
response = requests.get(url,params=payload) # requests.get(url地址，关键字url参数)
print response.url #查看返回内容的url地址
print response.status_code #打印状态码

soup = BeautifulSoup(response.text,'lxml')
#print soup
names = soup.select('body > div.wrap > div.merchantList > div.p_dataList > div.p_dataItem > span.n1 > a')
#CSS样式， .后面跟的是CSS样式,找到所有符合条件的a标签的字符串列表
#names = soup.select('body > div.wrap > div.merchantList > div > div.p_dataItem > span.n1 > a') #这个也可以
#names = soup.select('body > div.wrap > div.merchantList > div.p_dataList > div:nth-child(2) > span.n1 > a') # 为啥这个不行，原因？
capitials = soup.select('body > div.wrap > div.merchantList > div.p_dataList > div.p_dataItem > span.n3')
adds = soup.select('body > div.wrap > div.merchantList > div.p_dataList > div.p_dataItem > span.n5')
categorys = soup.select('body > div.wrap > div.merchantList > div.p_dataList > div.p_dataItem > span.n6')

with open('data.txt', 'w') as f:
f.write('公司名称|注册资本|公司地址|主营品类\n') #写入标题行
for name, capitial, add, category in zip(names, capitials, adds, categorys):
name = name.get('title').strip() #剔除空格
capitial = capitial.text
add = add.text
category = category.text

data = [name, capitial, add, category+'\n']
#print '|'.join(data)
f.write('|'.join(data).encode('utf-8')) #写入数据行
#f.write('|'.join(data)) #会报错，UnicodeEncodeError，解决办法：编码为utf-8
print '写入成功！'



if __name__=='__main__':
get_search_list(u'玉米', 1)

#文本数据excel方法：复制 -> 粘贴到excel -> 菜单栏"数据" -> 分列 -> ‘分隔符号’ ->……

山谷來客

关注

0
点赞
踩
1

收藏

觉得还不错? 一键收藏
0
评论
python爬虫：传递URL参数学习笔记

# -*- coding: utf-8 -*-"""Created on Sat Oct 22 17:57:13 2016@author: hhxsym买粮网搜索列表获"""import osimport requestsfrom bs4 import BeautifulSoupinpath="C:\\Users\\hhx
复制链接

扫一扫