起因:
前两天突然想找个省市县的行政代码库,发现网上要么不是最新的,要么要帐号,要积分,要钱。让人好烦,就写了这个脚本。
数据库结构:
1 CREATE TABLE IF NOT EXISTS `area` (
2 `id` int(11) NOT NULL auto_increment,
3 `code` varchar(6) NOT NULL,
4 `name` varchar(20) NOT NULL,
5 `citycode` varchar(6) NOT NULL,
6 PRIMARY KEY (`id`)
7 ) ENGINE=Innodb DEFAULT CHARSET=utf8;
8
9 CREATE TABLE IF NOT EXISTS `city` (
10 `id` int(11) NOT NULL auto_increment,
11 `code` varchar(6) NOT NULL,
12 `name` varchar(20) NOT NULL,
13 `provincecode` varchar(6) NOT NULL,
14 PRIMARY KEY (`id`)
15 ) ENGINE=Innodb DEFAULT CHARSET=utf8;
16
17
18 CREATE TABLE IF NOT EXISTS `province` (
19 `id` int(11) NOT NULL auto_increment,
20 `code` varchar(6) NOT NULL,
21 `name` varchar(20) NOT NULL,
22 PRIMARY KEY (`id`)
23 ) ENGINE=Innodb DEFAULT CHARSET=utf8 ;
脚本文件:
1 <?php
2 set_time_limit(0);
3 /**
4 *
5 */
6 class get_city_code {
7 //
8 private $html = '';
9 public $code_rt;
10 private static $instance = '';
11 private $db = '';
12 private $box = array();
13 private $url = 'http://www.stats.gov.cn/tjsj/tjbz/xzqhdm/201504/t20150415_712722.html';
14 //
15 private function __construct() {
16
17 }
18
19 public static function getInstance() {
20 if ( ! self::$instance instanceof get_city_code ) {
21 self::$instance = new self();
22 }
23 return self::$instance;
24 }
25 public function start() {
26 //
27 $this->connect_tongji_html();
28 $this->code_rt = new code_result();
29 $this->code_rt->html = $this->html;
30 $this->code_rt->filter_all_data();
31 }
32 private function connect_tongji_html() {
33 $ch = curl_init();
34 $url = $this->getUrl();
35
36 curl_setopt($ch, CURLOPT_URL, $url);
37 curl_setopt($ch, CURLOPT_RETURNTRANSFER, 1);
38 curl_setopt($ch, CURLOPT_HEADER, 0);
39 //执行并获取HTML文档内容
40 $this->html = curl_exec($ch);
41
42 //释放curl句柄
43 curl_close($ch);
44 }
45
46 //获取url
47 private function getUrl() {
48 return $this->url;
49 }
50
51 //插入数据库
52 public function insert() {
53 $i=0;
54 $box = array();
55 foreach($this->code_rt->code_data as $k => $v) {
56 $k_arr = str_split($k,2);
57 $first = $k_arr[0];
58 $second = $k_arr[1];
59 $three = $k_arr[2];
60
61 if( !empty( $box[$first]) ){
62 if( !empty($box[$first][$second]) ) {
63 $box[$first][$second][$three] = $v;
64 }else{
65 $box[$first][$second][] = $v;
66 }
67 }else{
68 $box[$first][] = array($v);
69 }
70 }
71
72 $this->get_db();
73 //print_r($box);
74 foreach( $box as $k1=>$v1){
75
76 $code1 = $k1."0000";
77 $name1 = $v1[0][0];
78 $sql = "insert into province values (NULL,'".$code1."','".$name1."')";
79 $this->db->query($sql);
80 foreach ( $v1 as $k2 => $v2 ) {
81 if($k2 == 0) {
82 continue;
83 }
84 $code2 = $k1.$k2."00";
85 $name2 = $v2[0]=='市辖区' ? $name1 : $v2[0];
86 if( $name2 == '县'){
87 continue;
88 }
89 $sql = "insert into city values (NULL,'".$code2."','".$name2."','".$code1."')";
90 $this->db->query($sql);
91
92 foreach( $v2 as $k3=>$v3 ) {
93 if($k3 == 0){
94 continue;
95 }
96 $code3 = $k1.$k2.$k3;
97 $name3 = $v3;
98 $sql = "insert into area values (NULL,'".$code3."','".$name3."','".$code2."')";
99 $this->db->query($sql);
100 }
101 }
102 }
103
104 $this->db->close();
105 }
106
107 //
108 private function get_db () {
109 $db = new mysqli('localhost','root','sunl','blog');
110 $db->set_charset('utf8');
111 $this->db = $db;
112 }
113 }
114
115 class code_result {
116 public $html = '';
117 public $code_data = array();
118 private $code_arr = array();
119 private $name_arr = array();
120
121 public function __construct () {
122
123 }
124
125 public function filter_all_data() {
126 //获取所有的p标签
127 $patten = "/<p.*><\/p>/";
128 preg_match($patten, $this->html, $p);
129 //去除所有的
130 $this->html = preg_replace( '/ /', '', strip_tags($p[0]));
131
132 $this->html = preg_replace( '/\s+/', '', $this->html );
133 $this->html = preg_replace( '/ /', '', $this->html );
134 $this->html = preg_replace( '/ /', '', $this->html );
135 $this->html = preg_replace( '/ /', '', $this->html );
136 //echo $this->html;
137
138 //匹配code id
139 $patten2 = "/[\d{6}]+/";
140 preg_match_all($patten2, $this->html, $this->code_arr);
141
142 //匹配县市名称
143 $patten3 = "/[\x{4e00}-\x{9fa5}]+/u";
144 preg_match_all($patten3, $this->html, $this->name_arr);
145
146 $this->code_data = array_combine($this->code_arr[0] , $this->name_arr[0]);
147 //print_r($this->code_data);die;
148 }
149
150 public function getCodeData() {
151 return $this->code_data;
152 }
153 }
154
155 $code = get_city_code::getInstance();
156 $code->start();
157 $code->insert();
158 ?>