文件路径:pyspider>database>__init__.py
#!/usr/bin/env python # -*- encoding: utf-8 -*- # vim: set et sw=4 ts=4 sts=4 ff=unix fenc=utf8: # Author: Binux<i@binux.me> # http://binux.me # Created on 2014-10-08 15:04:08 #导入兼容模块,urlparse:url解析,parse_qs:解析结果处理 from six.moves.urllib.parse import urlparse, parse_qs #连接数据库 def connect_database(url): """ create database object by url mysql: mysql+type://user:passwd@host:port/database sqlite: # relative path sqlite+type:///path/to/database.db # absolute path sqlite+type:path/to/database.db # memory database sqlite+type:// mongodb: mongodb+type://[username:password@]host1[:port1][,host2[:port2],...[,hostN[:portN]]][/[database][?options]] more: http://docs.mongodb.org/manual/reference/connection-string/ sqlalchemy: sqlalchemy+postgresql+type://user:passwd@host:port/database sqlalchemy+mysql+mysqlconnector+type://user:passwd@host:port/database more: http://docs.sqlalchemy.org/en/rel_0_9/core/engines.html redis: redis+taskdb://host:port/db elasticsearch: elasticsearch+type://host:port/?index=pyspider local: local+projectdb://filepath,filepath type: taskdb projectdb resultdb """ #调用_connect_database函数连接数据库 db = _connect_database(url) #复制数据库连接 db.copy = lambda: _connect_database(url) return db def _connect_database(url): # NOQA #将url解析为6元组,参考:http://blog.csdn.net/yueguanghaidao/article/details/16368399 parsed = urlparse(url) #将parsed.scheme根据+号切片成列表 scheme = parsed.scheme.split('+') #如果scheme列表长度为1,则抛出异常,否则引擎engine,数据类型dbtype,其它模式other_scheme赋值 if len(scheme) == 1: raise Exception('wrong scheme format: %s' % parsed.scheme) else: engine, dbtype = scheme[0], scheme[-1] other_scheme = "+".join(scheme[1:-1]) #如果dbtype不在'taskdb', 'projectdb', 'resultdb'中,则抛出异常 if dbtype not in ('taskdb', 'projectdb', 'resultdb'): raise LookupError('unknown database type: %s, ' 'type should be one of ["taskdb", "projectdb", "resultdb"]', dbtype) #如果为mysql引擎 if engine == 'mysql': #定义参数字典,用来保存数据库连接信息 parames = {} if parsed.username: parames['user'] = parsed.username if parsed.password: parames['passwd'] = parsed.password if parsed.hostname: parames['host'] = parsed.hostname if parsed.port: parames['port'] = parsed.port if parsed.path.strip('/'): parames['database'] = parsed.path.strip('/') #如果数据类型为taskdb,则导入mysql的taskdb模块 if dbtype == 'taskdb': from .mysql.taskdb import TaskDB return TaskDB(**parames) #如果数据类型为projectdb,则导入mysql的projectdb模块 elif dbtype == 'projectdb': from .mysql.projectdb import ProjectDB return ProjectDB(**parames) #如果数据类型为resultdb,则导入mysql的resultdb模块 elif dbtype == 'resultdb': from .mysql.resultdb import ResultDB return ResultDB(**parames) else: raise LookupError #如果为sqlite引擎 elif engine == 'sqlite': #如果路径以//开头,表示绝对路径 if parsed.path.startswith('//'): #parsed.path.strip('/')表示去除parsed.path两端的/符号 path = '/' + parsed.path.strip('/') # 如果路径以/开头,表示相对路径 elif parsed.path.startswith('/'): path = './' + parsed.path.strip('/') #如果没有值,则给个默认值 elif not parsed.path: path = ':memory:' #其它情况抛出异常 else: raise Exception('error path: %s' % parsed.path) # 如果数据类型为taskdb,则导入sqlite的taskdb模块 if dbtype == 'taskdb': from .sqlite.taskdb import TaskDB return TaskDB(path) # 如果数据类型为projectdb,则导入sqlite的projectdb模块 elif dbtype == 'projectdb': from .sqlite.projectdb import ProjectDB return ProjectDB(path) # 如果数据类型为resultdb,则导入sqlite的resultdb模块 elif dbtype == 'resultdb': from .sqlite.resultdb import ResultDB return ResultDB(path) #其它情况 抛出 “无效数据查询的基类” 异常 else: raise LookupError #如果为mongodb引擎 elif engine == 'mongodb': #将url模式替换为mongodb模式 url = url.replace(parsed.scheme, 'mongodb') #定义参数字典,用来保存参数路径 parames = {} if parsed.path.strip('/'): parames['database'] = parsed.path.strip('/') # 如果数据类型为taskdb,则导入mongodb的taskdb模块 if dbtype == 'taskdb': from .mongodb.taskdb import TaskDB return TaskDB(url, **parames) # 如果数据类型为projectdb,则导入mongodb的projectdb模块 elif dbtype == 'projectdb': from .mongodb.projectdb import ProjectDB return ProjectDB(url, **parames) # 如果数据类型为resultdb,则导入mongodb的resultdb模块 elif dbtype == 'resultdb': from .mongodb.resultdb import ResultDB return ResultDB(url, **parames) else: raise LookupError # 如果为sqlalchemy引擎 elif engine == 'sqlalchemy': if not other_scheme: raise Exception('wrong scheme format: %s' % parsed.scheme) url = url.replace(parsed.scheme, other_scheme) # 如果数据类型为taskdb,则导入sqlalchemy的taskdb模块 if dbtype == 'taskdb': from .sqlalchemy.taskdb import TaskDB return TaskDB(url) # 如果数据类型为projectdb,则导入sqlalchemy的projectdb模块 elif dbtype == 'projectdb': from .sqlalchemy.projectdb import ProjectDB return ProjectDB(url) # 如果数据类型为resultdb,则导入sqlalchemy的resultdb模块 elif dbtype == 'resultdb': from .sqlalchemy.resultdb import ResultDB return ResultDB(url) else: raise LookupError # 如果为redis引擎 elif engine == 'redis': # 如果数据类型为taskdb,则导入redis的taskdb模块 if dbtype == 'taskdb': from .redis.taskdb import TaskDB return TaskDB(parsed.hostname, parsed.port, int(parsed.path.strip('/') or 0)) else: raise LookupError('not supported dbtype: %s', dbtype) # 如果为local引擎 elif engine == 'local': scripts = url.split('//', 1)[1].split(',') # 如果数据类型为projectdb,则导入local的projectdb模块 if dbtype == 'projectdb': from .local.projectdb import ProjectDB return ProjectDB(scripts) else: raise LookupError('not supported dbtype: %s', dbtype) # 如果为elasticsearch引擎 elif engine == 'elasticsearch' or engine == 'es': # in python 2.6 url like "http://host/?query", query will not been splitted if parsed.path.startswith('/?'): index = parse_qs(parsed.path[2:]) else: index = parse_qs(parsed.query) if 'index' in index and index['index']: index = index['index'][0] else: index = 'pyspider' # 如果数据类型为projectdb,则导入elasticsearch的projectdb模块 if dbtype == 'projectdb': from .elasticsearch.projectdb import ProjectDB return ProjectDB([parsed.netloc], index=index) # 如果数据类型为resultdb,则导入elasticsearch的resultdb模块 elif dbtype == 'resultdb': from .elasticsearch.resultdb import ResultDB return ResultDB([parsed.netloc], index=index) # 如果数据类型为taskdb,则导入elasticsearch的taskdb模块 elif dbtype == 'taskdb': from .elasticsearch.taskdb import TaskDB return TaskDB([parsed.netloc], index=index) #其它返回不支持数据库引擎异常 else: raise Exception('unknown engine: %s' % engine)