import urllib.request
from bs4 import BeautifulSoup
import re
url=r'http://www.suning.com/'
res = urllib.request.urlopen(url)
htmlfile = res.read()
mysoup = BeautifulSoup(htmlfile, 'html.parser')
# print(mysoup)
# find_all( name , attrs , recursive , text , **kwargs,limit );findAll返回值是个列表
# name:tag的name,可传字符串、正则表达式、列表、True、方法
# attrs:属性
# recursive:只想搜索tag的直接子节点,可以使用参数 recursive=False,否则搜索所有子孙节点
# text:搜文档中的字符串内容,可传字符串、正则表达式、列表、True
# **kwargs:搜文档中的字符串内容,可传字符串、正则表达式、列表、True
print(mysoup.find_all('title'))
print(mysoup.head.find_all(True))
print(mysoup.head.find_all(content="app-id=537508092"))
print(mysoup.head.find_all('meta',attrs = {"content":"app-id=537508092"}))
print(mysoup.head.find_all('meta',content="app-id=537508092"))
print(mysoup.head.find_all('meta')[3])
print(len(mysoup.head.find_all('meta')),mysoup.head.find_all('meta'))
print(len(mysoup.head.find_all('meta',recursive=False)),mysoup.head.find_all('meta',recursive=False))
print(mysoup.head.find_all(text=re.compile(".*价格.*")))
print(mysoup.head.find_all(text=True))
print(mysoup.head.find_all('meta',content="app-id=537508092"))
print(mysoup.head.find_all(content="app-id=537508092"))
print(mysoup.head.find_all('meta',limit=2))
print(mysoup.head.find_all('meta',limit=2)[1].find_all('meta',limit=3))
# find(name=None, attrs={}, recursive=True, text=None, **kwargs)
# find返回的是字符串值,而且是返回从头查找到的第一个tag对
print(mysoup.find('title'))
print(mysoup.head.find(content="app-id=537508092"))
print(mysoup.head.find('meta',attrs = {"content":"app-id=537508092"}))
print(mysoup.head.find(text=re.compile(".*价格.*")))
# find_parents()返回所有祖先节点的列表,find_parent()返回直接父节点
print(mysoup.title.find_parent())
print(mysoup.title.find_parent().find_all('link')[1])
print(mysoup.title.find_parents())
# find_next_siblings()返回后面所有兄弟节点的列表,find_next_sibling()返回后面第一个兄弟节点
print(mysoup.title.find_next_sibling())
print(mysoup.title.find_next_siblings())
# find_previous_siblings()返回前面所有兄弟节点的列表,find_previous_sibling()返回前面第一个兄弟节点
print(mysoup.title.find_previous_sibling())
print(mysoup.title.find_previous_siblings())
# find_all_next()返回节点后所有符合条件的节点的列表, find_next()返回节点后第一个符合条件的节点
print(mysoup.title.find_next('link'))
print(mysoup.title.find_all_next('link'))
# find_all_previous()返回节点前所有符合条件的节点, find_previous()返回节点前第一个符合条件的节点
print(mysoup.title.find_previous('link'))
print(mysoup.title.find_all_previous('link'))
# CSS选择器,用soup.select()方法,返回类型是list
# 通过标签名查找,不加任何修饰;通过类名查找,类名前加'.';通过id名查找,id名前加'#';属性用[]括起来
print(mysoup.select('title'))
print(mysoup.body.select('.logo'))
print(mysoup.body.select('img[class="logo"]'))
print(mysoup.head.select('meta[name="pageid"]'))
print(mysoup.body.select('a[target="_blank"]'))
print(mysoup.body.select('#_TOP_BANNER_'))
print(mysoup.select('em'))
# 在li标签中查找,class="title"的p标签
print(mysoup.select('li p[class="title"]'))
#在div标签中,查找class等于login的标签,标签名与class中间用空格分开
print(mysoup.select('div .login'))
#在a标签中,查找id等于waitPayCounts的标签,标签名与id中间用空格分开
print(mysoup.select('a #waitPayCounts'))
#在id等于J_total_num_box的标签中,查找id等于showTotalQty的标签
print(mysoup.select('#J_total_num_box #showTotalQty'))
#在class等于ng-vip-union的标签中,查找class等于ng-iconfont的标签
print(mysoup.select('.ng-vip-union .ng-iconfont'))
#在class等于J_total_num_box的标签中,查找id等于showTotalQty的标签
print(mysoup.select('.total-num-box #showTotalQty'))