python爬动态网页

elenium+chrome抓动态网页
抓取https://m.maigoo.com/brand/search/?brandlevel=2723页面中的商标名字

 

from selenium import webdriver
from selenium.common.exceptions import TimeoutException
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from pyquery import PyQuery as pq
from selenium.webdriver.chrome.options import Options
import time

chrome_options = Options()
chrome_options.add_argument('--headless')
chrome_options.add_argument('--disable-gpu')

browser = webdriver.Chrome(chrome_options=chrome_options)

wait = WebDriverWait(browser, 10)


# https://m.maigoo.com/brand/search/?&catid=7&brandlevel=2729&initial=A&areaid=4639
# 不断点击 '加载更多'
def search(url='https://m.maigoo.com/brand/search/?brandlevel=2723'):
    i = url.split('=')[-1]
    count = 0
    try:
        browser.get(url)
        while True:
            if count > 100: break
            submit = wait.until(
                EC.element_to_be_clickable((By.CSS_SELECTOR, '#result > div.morebox2 > a.morebtn.jiamore'))
            )
            time.sleep(0.5)
            count += 1
            submit.click()
    except:
        pass
    finally:
        get_product(i, count)


def get_product(i, count):
    # 获取网页源代码
    html = browser.page_source
    doc = pq(html)
    # # 获取全部商标
    items = doc('#result .load_block').items()
    # 输出商标名
    with open('result/content.txt', 'a', encoding='utf-8') as f:
        for item in items:
            product = item.find('.ttl .scont').text()
            print('%s %d %s' % (i, count, product))
            for i in product.split(' '):
                if i == '': continue
                f.write(i + '\n')


if __name__ == '__main__':
    search()

 

  • 0
    点赞
  • 0
    收藏
    觉得还不错? 一键收藏
  • 0
    评论
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值