try to apply vowpal wabbit on malicious websites data. this is a useful script can convert a
arff(weka) format to vw required format. a perl script posts online actually not working at all.
#!/usr/bin/python
import sys
filename = sys.argv[1]
f = open(filename, "r")
fout = open("total1.vw", 'w')
for line in f:
vwline = ""
if len(line)<4 or line.startswith("@"):
continue
strs = line.split(',')
strs = map(lambda x: x.strip(), strs)
vwline = strs[-1] + " |f "
for i,e in enumerate(strs[:-1]):
vwline += str(i) + ':' + e + " "
fout.write(vwline)
fout.write('\n')
f.close()
fout.close()