一个简单的爬虫(一)

这段代码是抓取点评上海所有没发门店的,大概是有16000多家门店,代码很简单,上代码。

#-*-coding:utf-8 -*-

import requests
import socket
import MySQLdb
import datetime
import time
from lxml import etree
import random
from UserAgent import user_agent_list

class DpShangHai:
    def __init__(self):
        self.Accept = '*/*'
        self.AcceptEncoding = 'gzip, deflate, sdch'
        self.AcceptLanguage = 'zh-CN,zh;q=0.8'
        self.CacheControl = 'max-age=0'
        self.Host = 'www.dpfile.com'
        self.pageIndex = None
        self.UserAgent = 'Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/50.0.2661.102 Safari/537.36'
        self.Connection = 'keep-alive'
        self.Accept_Language = 'zh-CN,zh;q=0.8'
        #初始化headers
        self.headers = ''
        self.proxy = ''
        # 存放程序是否继续运行的变量
        self.enable = False
        # connect MySQLdb
        self.db = MySQLdb.connect("IP", "username", "password", "database")
        #定义SQL对象
        self.sql = ''
        #cursor()方法获取操作游标
        self.cursor = self.db.cursor()
        self.of = open('proxy.txt', 'w')
        self.dates = str(datetime.date.today())
        self.LIST  = [
            ('http://www.dianping.com/search/keyword/1/0_%E7%BE%8E%E5%8F%91/r5','浦东新区'),
            ('http://www.dianping.com/search/keyword/1/0_%E7%BE%8E%E5%8F%91/r2','徐汇区'),
            ('http://www.dianping.com/search/keyword/1/0_%E7%BE%8E%E5%8F%91/r6','黄浦区'),
            ('http://www.dianping.com/search/keyword/1/0_%E7%BE%8E%E5%8F%91/r1','卢湾区'),
            ('http://www.dianping.com/search/keyword/1/0_%E7%BE%8E%E5%8F%91/r3','静安区'),
            ('http://www.dianping.com/search/keyword/1/0_%E7%BE%8E%E5%8F%91/r4','长宁区'),
            ('http://www.dianping.com/search/keyword/1/0_%E7%BE%8E%E5%8F%91/r12','闵行区'),
            ('http://www.dianping.com/search/keyword/1/0_%E7%BE%8E%E5%8F%91/r10','杨浦区'),
            ('http://www.dianping.com/search/keyword/1/0_%E7%BE%8E%E5%8F%91/r7','普陀区'),
            ('http://www.dianping.com/search/keyword/1/0_%E7%BE%8E%E5%8F%91/r9','虹口区'),
            ('http://www.dianping.com/search/keyword/1/0_%E7%BE%8E%E5%8F%91/r13','宝山区'),
            ('http://www.dianping.com/search/keyword/1/0_%E7%BE%8E%E5%8F%91/r8','闸北区'),
            ('http://www.dianping.com/search/keyword/1/0_%E7%BE%8E%E5%8F%91/r5937','松江区'),
            ('http://www.dianping.com/search/keyword/1/0_%E7%BE%8E%E5%8F%91/r5938','嘉定区'),
            ('http://www.dianping.com/search/keyword/1/0_%E7%
评论 2
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值