pandas数组中一行中有一个列表,把列表拆分
df0 = asin_name page_bad_review_id
0 B08L5X9SZX ["RE6WNBG0BB94C"]
1 B08VNCJ3D6 ["R3K64G2XVC9CZN", "R2FMNEUEP7IYFG", "RWE7E817...
2 B08VHW531Y ["R2I32O1ADN9IID", "R3RCR9VCB2NV2E", "R18AJHV5...
3 B08L5VMX7Q ["RJN0J8J5QBWEI"]
4 B08VHWJZKP ["R2I32O1ADN9IID", "R3RCR9VCB2NV2E", "R18AJHV5...
5 B08PBRNFY6 []
df1 = asin_name review_id
0 B08L5X9SZX RE6WNBG0BB94C
1 B08VNCJ3D6 R3K64G2XVC9CZN,R2FMNEUEP7IYFG,RWE7E817GGYS7,RO...
2 B08VHW531Y R2I32O1ADN9IID,R3RCR9VCB2NV2E,R18AJHV57DWZ1A,R...
3 B08L5VMX7Q RJN0J8J5QBWEI
4 B08VHWJZKP R2I32O1ADN9IID,R3RCR9VCB2NV2E,R18AJHV57DWZ1A,R...
5 B08PBRNFY6
df2 = asin_name review_id
0 B08L5X9SZX RE6WNBG0BB94C
1 B08VNCJ3D6 R3K64G2XVC9CZN
1 B08VNCJ3D6 R2FMNEUEP7IYFG
1 B08VNCJ3D6 RWE7E817GGYS7
1 B08VNCJ3D6 RO0N62VDEAQOZ
1 B08VNCJ3D6 R27XSTM4EBHSXN
1 B08VNCJ3D6 RBZ3U6LE7KU4W
def json_dump(self,df):
if '[' in str(df['page_bad_review_id']):
return str(df['page_bad_review_id']).replace('[','').replace(']','').replace('"','').replace(' ','')
else:
return ''
df['review_id'] = df.apply(lambda r: self.json_dump(r), axis=1)
print('df1 = ',df[['asin_name','review_id']])
df = df.drop('review_id', axis=1).join(df['review_id'].str.split(',', expand=True).stack().reset_index(level=1, drop=True).rename('review_id'))
pandas连接mysql
import pymysql import pandas as pd#连接数据库 conn = pymysql.connect(host=setting.Host, port=setting.Port, user=setting.User, passwd=setting.Passwd, db=setting.DB, charset='utf8')sql = """select * from test"""
df = pd.read_sql(sql, con=conn)