# coding:utf-8 import <span class="wp_keywordlink_affiliate"><a href="https://www.168seo.cn/tag/pandas" title="View all posts in pandas" target="_blank">pandas</a></span> as pd import <span class="wp_keywordlink_affiliate"><a href="https://www.168seo.cn/tag/pymongo" title="View all posts in pymongo" target="_blank">pymongo</a></span> from odo import odo Client = <span class="wp_keywordlink_affiliate"><a href="https://www.168seo.cn/tag/pymongo" title="View all posts in pymongo" target="_blank">pymongo</a></span>.MongoClient(host='localhost',port=27017) df = pd.read_csv('User.csv',error_bad_lines=False) #error_bad_lines: if False then any lines causing an error will be skipped bad lines db = Client.weibo.repacleartuser df.columns = df.columns.str.upper() # columns 都转换成字符串,变成大写 df.rename(columns={ '邮箱是否激活':'邮箱激活', '手机是否激活': '手机激活'}, inplace=True) # columns 名称替换 # df.rename(columns = {'$b':'B'}, inplace = True) # http://www.cnblogs.com/hhh5460/p/5816774.html # for index, row in df.iterrows(): # print(row['ID']) # Dataframe 进行循环 # if isinstance(row['ID'],int): # print(row) # isinstance 判断 row['ID']是不是int类型 newdf= df[['ID','用户名','邮箱','手机号','上次登录','登录次数','注册来源','注册IP','邮箱激活','手机激活','账户状态','系统时间']] from time import time b = time() db.insert_many(newdf.to_dict('records')) # 插入数据 # 200万数据用时96.56582021713257秒 e = time() print(e-b)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
|
# coding:utf-8
import
pandas
as
pd
import
pymongo
from
odo
import
odo
Client
=
pymongo
.
MongoClient
(
host
=
'localhost'
,
port
=
27017
)
df
=
pd
.
read_csv
(
'User.csv'
,
error_bad_lines
=
False
)
#error_bad_lines: if False then any lines causing an error will be skipped bad lines
db
=
Client
.
weibo
.
repacleartuser
df
.
columns
=
df
.
columns
.
str
.
upper
(
)
# columns 都转换成字符串,变成大写
df
.
rename
(
columns
=
{
'邮箱是否激活'
:
'邮箱激活'
,
'手机是否激活'
:
'手机激活'
}
,
inplace
=
True
)
# columns 名称替换
# df.rename(columns = {'$b':'B'}, inplace = True)
# http://www.cnblogs.com/hhh5460/p/5816774.html
# for index, row in df.iterrows():
# print(row['ID'])
# Dataframe 进行循环
# if isinstance(row['ID'],int):
# print(row)
# isinstance 判断 row['ID']是不是int类型
newdf
=
df
[
[
'ID'
,
'用户名'
,
'邮箱'
,
'手机号'
,
'上次登录'
,
'登录次数'
,
'注册来源'
,
'注册IP'
,
'邮箱激活'
,
'手机激活'
,
'账户状态'
,
'系统时间'
]
]
from
time
import
time
b
=
time
(
)
db
.
insert_many
(
newdf
.
to_dict
(
'records'
)
)
# 插入数据
# 200万数据用时96.56582021713257秒
e
=
time
(
)
print
(
e
-
b
)
|