from torch.utils.data import Dataset
data_path ="C:/Users/luoweu/Desktop/pytorch学习/DataSet/smsspamcollection/SMSSpamCollection"classMydataset(Dataset):def__init__(self):
self.lines =open(data_path,encoding ='mac_roman').readlines()def__getitem__(self,index):
line = self.lines[index].strip()#参数为空时返回删除字符串前后空格后的副本,有字符参数时则删除前后字符串
label = line[:4].strip()
content=line[4:].strip()return label,content
def__len__(self):returnlen(self.lines)
data = Mydataset()print(data[5],len(data))
('spam', "FreeMsg Hey there darling it's been 3 week's now and no word back! I'd like some fun you up for it still? Tb ok! XxX std chgs to send, £1.50 to rcv") 5574
2.数据加载器类(分批、打乱、多线程加载数据)
from torch.utils.data import DataLoader
dataloader = DataLoader(dataset=data,batch_size=10,shuffle=True,num_workers=0)for i,j inenumerate(dataloader):#不能用索引访问if i==5:print(j)break
[('ham', 'ham', 'spam', 'ham', 'ham', 'ham', 'spam', 'ham', 'ham', 'ham'), ("*deep sigh* ... I miss you :-( ... I am really surprised you haven't gone to the net cafe yet to get to me ... Don't you miss me?", "I'm glad. You are following your dreams.", 'Monthly password for wap. mobsi.com is 391784. Use your wap phone not PC.', 'I get out of class in bsn in like <#> minutes, you know where advising is?', 'WHORE YOU ARE UNBELIEVABLE.', 'Send this to ur friends and receive something about ur voice..... How is my speaking expression? 1.childish 2.naughty 3.Sentiment 4.rowdy 5.ful of attitude 6.romantic 7.shy 8.Attractive 9.funny <#> .irritating <#> .lovable. reply me..', 'Latest Nokia Mobile or iPOD MP3 Player +£400 proze GUARANTEED! Reply with: WIN to 83355 now! Norcorp Ltd.£1,50/Mtmsgrcvd18+', 'Ok anyway no need to change with what you said', 'Except theres a chick with huge boobs.', "I take it we didn't have the phone callon Friday. Can we assume we won't have it this year now?")]