nlp
smallTutou
这个作者很懒,什么都没留下…
展开
-
word2vec模型训练
import numpy as np from collections import defaultdict class word2vec(): def __init__(self): self.n = settings['n'] self.lr = settings['learning_rate'] self.epochs = settings['epochs'] self.window = settings['window_s原创 2022-04-15 16:00:33 · 851 阅读 · 0 评论 -
利用tf-idf提取关键词
##读取.txt并保存为.txt from jieba import analyse f = open("result.txt", "w", encoding="utf-8") for line in open("text.txt",encoding="utf-8"): tfidf = analyse.extract_tags text = line keywords = tfidf(text, topK=4, withWeight=False, allowPOS=())原创 2022-04-08 15:27:13 · 1693 阅读 · 0 评论 -
单词拼写纠错
from nltk.corpus import reuters from docx import Document from nltk import sent_tokenize, word_tokenize import re import numpy as np #词典库 vocab = [] for line in open("data/spell-testset1.txt"): items = line.split(":") item = items[0].strip() v原创 2022-04-08 15:20:18 · 100 阅读 · 0 评论 -
词性标注pos_tagging
import numpy as np #1.构造词典、词性库 tag2id,id2tag = {},{} word2id ,id2word = {},{} for line in open("traindata.txt"): items = line.split("/") tag,word = items[1].rstrip(),items[0] if tag not in tag2id: tag2id[tag] = len(tag2id) i原创 2022-04-08 15:14:36 · 286 阅读 · 0 评论