import pandas as pd
path= 'C:/Users/18578/Desktop/testCF.csv'
orgin_data = pd. read_csv( path, header= None )
items= orgin_data. iloc[ : , 1 ] . drop_duplicates( ) . sort_values( )
user= orgin_data. iloc[ : , 0 ] . drop_duplicates( ) . sort_values( )
print ( items)
print ( user)
0 101
1 102
2 103
6 104
9 105
14 106
10 107
Name: 1, dtype: int64
0 A1
3 A2
7 B3
11 C4
15 D5
Name: 0, dtype: object
import numpy as np
item_list= [ ]
user_list= [ ]
for i in items:
item_list. append( i)
for j in user:
user_list. append( j)
print ( item_list)
print ( user_list)
[101, 102, 103, 104, 105, 106, 107]
['A1', 'A2', 'B3', 'C4', 'D5']
import numpy as np
new_data= pd. DataFrame( np. arange( 35 ) . reshape( len ( user_list) , len ( item_list) ) , index= user_list, columns= item_list)
new_data. loc[ : , : ] = 0
print ( new_data)
101 102 103 104 105 106 107
A1 0 0 0 0 0 0 0
A2 0 0 0 0 0 0 0
B3 0 0 0 0 0 0 0
C4 0 0 0 0 0 0 0
D5 0 0 0 0 0 0 0
with open ( path, encoding = 'utf-8' ) as f:
list_data = np. loadtxt( f, str , delimiter = "," )
print ( list_data)
[['A1' '101' '5']
['A1' '102' '3']
['A1' '103' '2.5']
['A2' '101' '2']
['A2' '102' '2.5']
['A2' '103' '5']
['A2' '104' '2']
['B3' '101' '2.5']
['B3' '104' '4']
['B3' '105' '4.5']
['B3' '107' '5']
['C4' '101' '5']
['C4' '103' '3']
['C4' '104' '4.5']
['C4' '106' '4']
['D5' '101' '4']
['D5' '102' '3']
['D5' '103' '2']
['D5' '104' '4']
['D5' '105' '3.5']
['D5' '106' '4']]
for i in list_data:
for j in new_data. columns. values:
for k in new_data. index. values:
if ( i[ 1 ] == str ( j) and i[ 0 ] == str ( k) ) :
print ( i, j, k)
new_data. loc[ str ( k) , j] = i[ 2 ]
['A1' '101' '5'] 101 A1
['A1' '102' '3'] 102 A1
['A1' '103' '2.5'] 103 A1
['A2' '101' '2'] 101 A2
['A2' '102' '2.5'] 102 A2
['A2' '103' '5'] 103 A2
['A2' '104' '2'] 104 A2
['B3' '101' '2.5'] 101 B3
['B3' '104' '4'] 104 B3
['B3' '105' '4.5'] 105 B3
['B3' '107' '5'] 107 B3
['C4' '101' '5'] 101 C4
['C4' '103' '3'] 103 C4
['C4' '104' '4.5'] 104 C4
['C4' '106' '4'] 106 C4
['D5' '101' '4'] 101 D5
['D5' '102' '3'] 102 D5
['D5' '103' '2'] 103 D5
['D5' '104' '4'] 104 D5
['D5' '105' '3.5'] 105 D5
['D5' '106' '4'] 106 D5
print ( new_data)
101 102 103 104 105 106 107
A1 5 3 2.5 0 0 0 0
A2 2 2.5 5 2 0 0 0
B3 2.5 0 0 4 4.5 0 5
C4 5 0 3 4.5 0 4 0
D5 4 3 2 4 3.5 4 0