爬虫
lgc_
这个作者很懒,什么都没留下…
展开
-
python爬取百度贴吧图片
# -*- coding: utf-8 -*- import requests from lxml import etree # &pn=50 class Ximage: def __init__(self): self.baseurl = "http://tieba.baidu.com/f?kw=" # 定义主页url头部信息 ...原创 2018-09-13 17:37:55 · 448 阅读 · 0 评论 -
python爬取糗事百科
import requests from lxml import etree import pymongo class QiushiSpider: def __init__(self): self.url = "https://www.qiushibaike.com/text/page/8/" # 定义爬取的url self.headers ...原创 2018-09-14 12:03:02 · 181 阅读 · 0 评论 -
Python爬取ajax动态加载内容
import requests import json import csv url= "https://movie.douban.com/j/chart/top_list?" params={ "type":17, "interval_id" :"100:90", "action":"", "start":0, "limit":100原创 2018-09-14 12:05:13 · 1046 阅读 · 0 评论 -
selenium+BeautifulSoup 爬虫
from selenium import webdriver from bs4 import BeautifulSoup as bs import time driver = webdriver.PhantomJS() driver.get("https://www.douyu.com/directory/all") #while True: i = 1 while True: #htm...原创 2018-09-14 17:37:31 · 309 阅读 · 0 评论