nlp实验

1实验一

# 文件路径:grammar_checker.py

import language_tool_python

# 初始化LanguageTool对象
tool = language_tool_python.LanguageTool('en-US')

def check_grammar(sentence: str) -> bool:
    """
    检查给定句子的语法是否正确。

    参数:
    sentence (str): 需要检查的句子

    返回:
    bool: 如果句子没有语法错误,返回True;否则返回False
    """
    matches = tool.check(sentence)
    return len(matches) == 0

def main():
    # 获取用户输入的句子
    sentence = input("请输入一句话: ")

    # 检查句子的语法
    is_correct = check_grammar(sentence)

    # 输出结果
    if is_correct:
        print("这句话符合英语语法规范。")
    else:
        print("这句话不符合英语语法规范。")

if __name__ == "__main__":
    main()
 

实验3

import nltk
from nltk import CFG
from nltk.parse import RecursiveDescentParser
import pydot
from nltk.tree import Tree

# 定义一个扩展的语法规则
grammar = CFG.fromstring("""
    S -> NP VP
    PP -> P NP
    NP -> Det N | Det N PP | 'I' | 'pajamas'
    VP -> V NP | V NP PP | V
    Det -> 'an' | 'my'
    N -> 'elephant' | 'pajamas'
    V -> 'shot'
    P -> 'in'
""")

# 创建递归下降分析器
parser = RecursiveDescentParser(grammar)

def get_best_parse_tree(sentence):
    # 进行句法分析
    for tree in parser.parse(sentence.split()):
        return tree

def tree_to_dot(tree):
    dot_str = ["digraph G {"]  # 使用列表来存储每一行的字符串
    counter = [0]  # 使用列表来保持引用并进行修改

    def traverse(node, parent_name):
        node_name = f'node{counter[0]}'
        counter[0] += 1

        if isinstance(node, str):
            dot_str.append(f'    {node_name} [label="{node}"];')
        else:
            dot_str.append(f'    {node_name} [label="{node.label()}"];')
            for child in node:
                child_name = traverse(child, node_name)
                dot_str.append(f'    {node_name} -> {child_name};')
        return node_name

    traverse(tree, 'root')
    dot_str.append("}")
    return "\n".join(dot_str)

def draw_tree(tree, filename):
    dot_str = tree_to_dot(tree)
    graph = pydot.graph_from_dot_data(dot_str)[0]
    graph.write_png(filename)

# 输入句子
input_sentence = "I shot an elephant in my pajamas"

# 获取最佳句法分析树
best_tree = get_best_parse_tree(input_sentence)

# 打印句法分析树
if best_tree:
    print(best_tree)
    # 将句法树保存为图像文件
    draw_tree(best_tree, "parse_tree.png")
    print("Parse tree saved as parse_tree.png")
else:
    print("No valid parse tree found.")
 

实验4

import numpy as np
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader, Dataset
from sklearn.preprocessing import LabelEncoder
from sklearn.model_selection import train_test_split

# 定义天气数据字典
weather_data = {
    "今天": "晴朗",
    "明天": "小雨",
    "后天": "多云"
}

# 数据集准备
class WeatherDataset(Dataset):
    def __init__(self, X, y):
        self.X = X
        self.y = y

    def __len__(self):
        return len(self.X)

    def __getitem__(self, idx):
        return self.X[idx], self.y[idx]

# LSTM模型
class LSTMModel(nn.Module):
    def __init__(self, input_size, hidden_size, output_size):
        super(LSTMModel, self).__init__()
        self.hidden_size = hidden_size
        self.lstm = nn.LSTM(input_size, hidden_size, batch_first=True)
        self.fc = nn.Linear(hidden_size, output_size)

    def forward(self, x):
        h0 = torch.zeros(1, x.size(0), self.hidden_size).to(x.device)
        c0 = torch.zeros(1, x.size(0), self.hidden_size).to(x.device)
        out, _ = self.lstm(x, (h0, c0))
        out = self.fc(out[:, -1, :])
        return out

# 数据预处理
sentences = ["今天天气怎么样", "明天天气怎么样", "后天天气怎么样"]
labels = ["今天", "明天", "后天"]

# 对句子字符进行编码
tokenizer = LabelEncoder()
tokenizer.fit(list("今天天气怎么样明天后天"))
encoded_sentences = [tokenizer.transform(list(sentence)) for sentence in sentences]

# 对标签进行编码
label_encoder = LabelEncoder()
label_encoder.fit(labels)
encoded_labels = label_encoder.transform(labels)

max_length = max(len(sentence) for sentence in encoded_sentences)
X = np.array([np.pad(sentence, (0, max_length - len(sentence)), 'constant') for sentence in encoded_sentences])
y = np.array(encoded_labels)

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

train_dataset = WeatherDataset(X_train, y_train)
train_loader = DataLoader(train_dataset, batch_size=2, shuffle=True)

# 超参数
input_size = len(tokenizer.classes_)
hidden_size = 16
output_size = len(label_encoder.classes_)
num_epochs = 100
learning_rate = 0.01

# 模型实例化
model = LSTMModel(input_size, hidden_size, output_size)
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=learning_rate)

# 训练模型
for epoch in range(num_epochs):
    for X_batch, y_batch in train_loader:
        X_batch = nn.functional.one_hot(X_batch.clone().detach().long(), num_classes=input_size).float()
        y_batch = y_batch.clone().detach().long()

        outputs = model(X_batch)
        loss = criterion(outputs, y_batch)

        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

    if (epoch+1) % 10 == 0:
        print(f'Epoch [{epoch+1}/{num_epochs}], Loss: {loss.item():.4f}')

print("训练完成")

# 对话系统函数
def dialogue_system(input_sentence):
    encoded_sentence = tokenizer.transform(list(input_sentence))
    padded_sentence = np.pad(encoded_sentence, (0, max_length - len(encoded_sentence)), 'constant')
    input_tensor = nn.functional.one_hot(torch.tensor([padded_sentence]), num_classes=input_size).float()
    output = model(input_tensor)
    predicted_label = torch.argmax(output, dim=1).item()
    predicted_day = label_encoder.inverse_transform([predicted_label])[0]
    return weather_data[predicted_day]

# 测试对话系统
test_sentences = ["今天天气怎么样", "明天天气怎么样", "后天天气怎么样"]
for sentence in test_sentences:
    print(f"问:{sentence}")
    print(f"答:{dialogue_system(sentence)}")
 

  • 3
    点赞
  • 9
    收藏
    觉得还不错? 一键收藏
  • 0
    评论
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值