将geoip,geoip_city数据导入Postgresql数据库

MaxMind公司提供的geoip数据格式有两种,一种是二进制文件,一种是csv文件,可能还有其他格式的,比如dotted octet format。本次总结只以csv文件举例。

1.从MaxMind得到csv文件,这个需要付费的。
2.然后将得到的文件解压缩到服务器特定目录。
3.安装PostgresSQL数据库,并且安装ip4r
(可以从网上下载postgresql-distrib-ip4r包安装)
4.安装python以及Psycopg2
1)yum list "*psycopg*"
2)或者直接用wget ftp://ftp.muug.mb.ca/mirror/fedora/epel/5/i386/python-psycopg2-2.0.7-1.el5.i386.rpm得到
3)rpm -Uvh psycopg2-2.0.7-1.el5.i386.rpm
5. 准备python导入脚本文件

'''
Script for loading GeoIP CSV data into a postgresql database
'''

import logging, psycopg2, psycopg2.extensions, sys

from optparse import OptionGroup, OptionParser
from StringIO import StringIO

class GeoIPDataLoader(object):

def __init__(self, dsn, blocks='GeoLiteCity-Blocks.csv', locations='GeoLiteCity-Location.csv', schema='public'):
self.con = psycopg2.connect(dsn)
# We don't need transactions... right?
self.con.set_isolation_level(psycopg2.extensions.ISOLATION_LEVEL_AUTOCOMMIT)
# The data is in ISO8859_15 encoding
self.con.set_client_encoding('iso8859_15')
self.cur = self.con.cursor()

self.blocks_csv = blocks
self.location_csv = locations
self.schema = schema

def close(self):
self.con.close()

def create_tables(self):
print 'Creating structure...',
self.db_execute(
'''
CREATE TABLE locations
(
id bigint NOT NULL,
country character(2) NOT NULL,
region character(2),
city character varying(75),
postal_code character varying(15),
latitude numeric(6,4) NOT NULL,
longitude numeric(7,4),
metro_code integer,
area_code integer,
CONSTRAINT locations_pkey PRIMARY KEY (id)
);

CREATE TABLE blocks
(
start_ip bigint NOT NULL,
end_ip bigint NOT NULL,
location_id bigint NOT NULL
);

'''
)
print '\033[1;32mDone\033[1;m'

def create_indexes(self, ip4=False):
print 'Adding Indexes...',
sys.stdout.flush()
if not ip4:
self.db_execute('''
CREATE INDEX ix_start_end_ip ON blocks
USING btree (start_ip, end_ip) WITH (FILLFACTOR=100);
CREATE INDEX ix_end_start_ip ON blocks
USING btree (end_ip, start_ip) WITH (FILLFACTOR=100);
''')
else:
self.db_execute('''
CREATE INDEX ix_ip_range ON blocks
USING gist (ip_range) WITH (FILLFACTOR=100);
''')
print '\033[1;32mDone\033[1;m'

def create_functions(self, ip4=False):
print 'Adding utility functions...',
sys.stdout.flush()
if ip4:
self.db_execute('''
CREATE OR REPLACE FUNCTION get_location(inet) RETURNS bigint AS $$
SELECT location_id FROM %s.blocks
WHERE ip_range >>= ip4($1)
$$ LANGUAGE SQL;
''' % self.schema)
else:
self.db_execute('''
CREATE OR REPLACE FUNCTION inet_to_bigint(inet) RETURNS bigint AS $$
SELECT $1 - inet '0.0.0.0'
$$ LANGUAGE SQL;
''' % self.schema)
print '\033[1;32mDone\033[1;m'

def create_schema(self):
try:
self.db_execute('''CREATE SCHEMA %s;''' % self.schema)
except psycopg2.ProgrammingError:
pass

self.db_execute('SET search_path TO %s,public;' % self.schema)

def db_execute(self, ddl):
self.cur.execute(ddl)
# self.con.commit()

def load_data(self):
# Load Locations
self.load_table(self.location_csv, 'locations')
# Load Blocks
self.load_table(self.blocks_csv, 'blocks')

def load_table(self, file_name, table_name):
print 'Loading table \033[1;34m%s\033[1;m from file \033[1;34m%s\033[1;m...' % (table_name, file_name),
sys.stdout.flush()
geo_file = open(file_name)
# Skip the copyright header
geo_file.readline()
geo_file.readline()
#Remove quotes... psycopg2's `copy` errors on them
string_data = geo_file.read().replace('"', '')
self.cur.copy_from(StringIO(string_data), table_name, sep=',', null='')
print '\033[1;32mDone\033[1;m'

def migrate_to_ip4(self):
print 'Adding ip_range column'
self.db_execute('''
ALTER TABLE blocks ADD COLUMN ip_range ip4r;
ALTER TABLE blocks ALTER COLUMN ip_range SET STORAGE PLAIN;
''')

print 'Migrating data to ip4...',
sys.stdout.flush()
self.db_execute('''UPDATE blocks SET ip_range = ip4r(start_ip::ip4, end_ip::ip4)''')
print '\033[1;32mDone\033[1;m'

print 'Dropping unneeded columns'
self.db_execute('''
ALTER TABLE blocks DROP COLUMN start_ip;
ALTER TABLE blocks DROP COLUMN end_ip;
''')
def vacuum(self):
print 'Vaccuming database...',
sys.stdout.flush()
self.db_execute('VACUUM FULL ANALYZE')
print '\033[1;32mDone\033[1;m'

def main():
DSN = "dbname='%s' user='%s' host='%s'"

parser = OptionParser()
# Operational options
parser.add_option('-c', '--load-ddl', dest='load_ddl', default=False,
action='store_true', help='Create database structure')

parser.add_option('-g', '--load-data', dest='load', default=False,
action='store_true', help='Load the GeoIP data')

parser.add_option('-b', '--blocks-file', dest='blocks_csv', default='GeoLiteCity-Blocks.csv',
action='store', help='GeoIP Blocks CSV file [default: %default]', metavar='BLOCKS_FILE')
parser.add_option('-l', '--locations-file', dest='locations_csv', default='GeoLiteCity-Location.csv',
action='store', help='GoeIP Locations CSV file [default: %default]', metavar='LOCATIONS_FILE')

db_group = OptionGroup(parser, 'Database Options')
# Database options
db_group.add_option('-H', '--host', dest='db_host', default='localhost',
action='store', help='Database host [default: %default]', metavar='DB_HOST')
db_group.add_option('-d', '--database', dest='db_name', default='geoip_db',
action='store', help='Database name [default: %default]', metavar='DATABASE_NAME')
db_group.add_option('-U', '--user', dest='db_user', default='geoip',
action='store', help='User [default: %default]', metavar='USER_NAME')
db_group.add_option('-W','--prompt',dest='password',default='',
action='store',help='Password',metavar='PASSWORD')

db_group.add_option('-s', '--schema', dest='schema', default='public',
action='store', help='Database Schema [default: %default]', metavar='SCHEMA')

db_group.add_option('--ip4r', dest='ip4', default=False,
action='store_true', help='Use IP4r module [default: %default]')

parser.add_option_group(db_group)

(options, args) = parser.parse_args()

data_loader = GeoIPDataLoader("dbname='%s' user='%s' password='%s' host='%s'" % (options.db_name, options.db_user,options.password, options.db_host),
blocks=options.blocks_csv, locations=options.locations_csv, schema=options.schema)

if not options.load_ddl and not options.load:
parser.print_help()
return

if options.load_ddl:
if options.schema != 'public':
data_loader.create_schema()
data_loader.create_tables()

if options.load:
data_loader.load_data()

if options.ip4:
data_loader.migrate_to_ip4()

if options.load:
data_loader.create_indexes(options.ip4 is True)

if options.load_ddl:
data_loader.create_functions(options.ip4 is True)

data_loader.vacuum()

if __name__ == "__main__":
main()



注意:
1.该文件中创建索引使用的WITH选项是PostgresSQL 8.2以上版本才支持的。
2.我添加了数据库密码验证,要不数据库密码验证通不过。

6: 创建表


--DROP TABLE locations;
--DROP TABLE blocks;

-- Table DDL
CREATE TABLE locations
(
id bigint NOT NULL,
country character(2) NOT NULL,
region character(2),
city character varying(75),
postal_code character varying(15),
latitude numeric(6,4) NOT NULL,
longitude numeric(7,4),
metro_code integer,
area_code integer,
CONSTRAINT locations_pkey PRIMARY KEY (id)
);

CREATE TABLE blocks
(
start_ip bigint NOT NULL,
end_ip bigint NOT NULL,
location_id bigint NOT NULL,
CONSTRAINT blocks_pkey PRIMARY KEY (start_ip, end_ip)
);


7: 使用脚本命令导入

chmod +x /tmp/load_geoip.py
python /tmp/load_geoip.py --ip4r -l /tmp/GeoIPCity-134-Location.csv -b /tmp/GeoIPCity-134-Blocks.csv --host 127.0.0.1 -U slony -W 12345 -d BeboAndFacebook -g


8. 如果是更新的话,要考虑slony集群环境,先用slonik在主数据库服务器上把表清空,然后再导入新的数据。

参考文章:[url]http://www.siafoo.net/article/53[/url]
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值