所以我试图在python中构建一个自然学习处理器,我正在使用我在网上找到的一些代码,然后调整自己的东西。但现在,它只是不想工作。它一直在给我
ValueError: Found array with 0 sample(s) (shape=(0, 262)) while a minimum of 1 is required.
这是我的代码。我道歉,如果它是凌乱的我只是直接从互联网上复制它:
from collections import Counter
import pandas
from nltk.corpus import stopwords
import pandas as pd
import numpy
headlines = []
apps = pd.read_csv('DataUse.csv')
for e in apps['title_lower']:
headlines.append(e)
testdata = pd.read_csv('testdata.csv')
# Find all the unique words in the headlines.
unique_words = list(set(" ".join(headlines).split(" ")))
def make_matrix(headlines, vocab):
matrix = []
for headline in headlines:
# Count each word in the headline, and make a dictionary.
counter = Counter(headline)
# Turn the dictionary into a matrix row using the vocab.
row = [counter.get(w, 0) for w in vocab]
matrix.append(row)
df = pandas.DataFrame(matrix)
df.columns = unique_words
return df
print(make_matrix(headlines, unique_words))
import re
# Lowercase, then replace any non-letter, space, or digit character in the headlines.
new_headlines = [re.sub(r'[^\w\s\d]','',h.lower()) for h in headlines]
#