import pandas as pd
#read the values
newdata = pd.read_csv('annotations_final.csv',sep="\t")
# show the head
newdata.head(5)
#show info
newdata.info()
newdata.columns
#concate two column
newdata[["clip_id","no voice"]]
# Some of the tags in the dataset are really close to each other. Lets merge them together synonyms = [['beat', 'beats'], ['chant', 'chanting'], ['choir', 'choral'], ['classical', 'clasical', 'classic'], ['drum', 'drums'], ['electro', 'electronic', 'electronica', 'electric'], ['fast', 'fast beat', 'quick'], ['female', 'female singer', 'female singing', 'female vocals', 'female vocal', 'female voice', 'woman', 'woman singing', 'women'], ['flute', 'flutes'], ['guitar', 'guitars'], ['hard', 'hard rock'], ['harpsichord', 'harpsicord'], ['heavy', 'heavy metal', 'metal'], ['horn', 'horns'], ['india', 'indian'], ['jazz', 'jazzy'], ['male', 'male singer', 'male vocal', 'male vocals', 'male voice', 'man', 'man singing', 'men'], ['no beat', 'no drums'], ['no singer', 'no singing', 'no vocal','no vocals', 'no voice', 'no voices', 'instrumental'], ['opera', 'operatic'], ['orchestra', 'orchestral'], ['quiet', 'silence'], ['singer', 'singing'], ['space', 'spacey'], ['string', 'strings'], ['synth', 'synthesizer'], ['violin', 'violins'], ['vocal', 'vocals', 'voice', 'voices'], ['strange', 'weird']]
# Merge the synonyms and drop all other columns than the first one. """ Example: Merge 'beat', 'beats' and save it to 'beat'. Merge 'classical', 'clasical', 'classic' and save it to 'classical'. """ for synonym_list in synonyms: newdata[synonym_list[0]] = newdata[synonym_list].max(axis=1) newdata.drop(synonym_list[1:], axis=1, inplace=True)
# Lets view it. newdata.head()