1、fasttext主要参数
lr:学习率
dim: 隐藏层词向量维数
ws: 背景词窗口大小
minCount: 最小词频
wordNgrams: n-gram最大长度
loss: 损失函数
2、fasttext实验主要代码
import fasttext
import pandas as pd
from sklearn.model_selection import train_test_split
train_df = pd.read_csv('./train_set.csv', sep='\t')
train_df['label_ft'] = '__label__' + train_df['label'].astype(str)
X_train, X_valid, y_train, y_valid = train_test_split(train_df[['text', 'label_ft']], train_df['label'], test_size=0.2, shuffle=True, random_state=42)
X_train.to_csv('train.csv', index=None, header=None, sep='\t')
model = fasttext.train_supervised('train.csv', lr=0.05, dim=3000, wordNgrams=3,
verbose=2, minCount=1, epoch=50, loss="hs")
val_pred = [model.predict(x)[0][0].split('__')[-1] for x in X_valid['text']]
print(f1_score(y_valid.astype(str), val_pred, average='macro'))