#对红牛公司进行网络爬虫import requests
import re
import bs4
url = r'http://www.redbull.com.cn/about/branch'
requests = requests.get(url)
requests #返回值以2开头说明是正确的
requests.text #提取为字符串
soup = bs4.BeautifulSoup(requests.text)#转换为soup格式
company = re.findall('<h2>(.*?)</h2>', requests.text)
address =[i.text for i in soup.findAll(name ='p', attrs ={
'class':'mapIco'})]#findAll为soup中的一个方法
email =[i.text for i in soup.findAll(name ='p', attrs ={
'class':'mailIco'})]
tel =[i.text for i in soup.findAll(name ='p', attrs ={
'class':'telIco'})]import pandas as pd
pd.DataFrame({
'company': company,'address': address,'email': email,'tel': tel})