UVALive 6195 The Dueling Philosophers Problem 解题报告

题意:给出一些论文,一些会被另一些引用,被引用的论文必须排在引用它的论文的前面。如果存在大于一个个合法的顺序输出2,存在一个输出1,不存在输出0.

解法:裸的拓扑排序,但题目m的数据范围少了一个0,被坑了一下午,天灾人货

//time 162 MS
#include <cstdio>
#include <cstring>
#include <algorithm>
#include <iostream>
#include <queue>
using namespace std;
const int maxn = 50015;
struct Edge{
	int v,next;
}edge[maxn*10];
int head[maxn],ind[maxn],en;
void addedge(int u,int v)
{
	edge[en].v=v;
	edge[en].next=head[u];
	head[u]=en++;
}
void init()
{
	memset(head,-1,sizeof(head));
	memset(ind,0,sizeof(ind));
	en = 0;
}
int toposort(int n)
{
	int u,cnt=0;
	bool flag=0;
	queue<int> q;
	for(int i=1;i<=n;i++)
		if(ind[i]==0)	q.push(i);
	while(!q.empty())
	{
		cnt++;
		if(q.size()>1) flag = 1;
		u=q.front(),q.pop();
		for(int i=head[u];i!=-1;i=edge[i].next)
		{
			int v=edge[i].v;
			ind[v]--;
			if(ind[v]==0)q.push(v);
		}
	}
	if(cnt!=n) return 0;
	if(flag==1) return 2;
	return 1;

}
int main()
{
	//freopen("in.txt","r",stdin);
	int n,m;
	int u,v;
	while(scanf("%d%d",&n,&m)&&(n+m))
	{
		init();
		for(int i = 0; i < m; i++)
		{
			scanf("%d%d",&u,&v);
			addedge(u,v);
			++ind[v];
		}
		printf("%d\n",toposort(n));
	}
	return 0;
}


  • 0
    点赞
  • 0
    收藏
    觉得还不错? 一键收藏
  • 0
    评论
Dueling DQN是一种基于DQN算法的改进算法,它能够更高效地学习状态价值函数。与传统的DQN只会更新某个动作的Q值不同,Dueling DQN会同时更新状态价值函数和动作优势函数,从而更加频繁、准确地学习状态价值函数。具体来说,Dueling DQN将Q值分解为状态价值函数和动作优势函数两部分,这样就可以在学习状态价值函数时避免重复计算动作优势函数,从而提高学习效率。 下面是Dueling DQN的代码实现: ```python import torch import torch.nn as nn import torch.optim as optim import numpy as np class DuelingDQN(nn.Module): def __init__(self, input_size, output_size): super(DuelingDQN, self).__init__() self.input_size = input_size self.output_size = output_size self.fc1 = nn.Linear(input_size, 128) self.fc2 = nn.Linear(128, 128) self.fc3_adv = nn.Linear(128, output_size) self.fc3_val = nn.Linear(128, 1) def forward(self, x): x = torch.relu(self.fc1(x)) x = torch.relu(self.fc2(x)) adv = self.fc3_adv(x) val = self.fc3_val(x).expand(x.size(0), self.output_size) x = val + adv - adv.mean(1, keepdim=True).expand(x.size(0), self.output_size) return x class Agent(): def __init__(self, input_size, output_size, gamma=0.99, lr=0.001): self.device = torch.device("cuda" if torch.cuda.is_available() else "cpu") self.gamma = gamma self.model = DuelingDQN(input_size, output_size).to(self.device) self.optimizer = optim.Adam(self.model.parameters(), lr=lr) self.loss_fn = nn.MSELoss() def act(self, state, epsilon=0.0): if np.random.random() < epsilon: return np.random.choice(self.model.output_size) state = torch.FloatTensor(state).unsqueeze(0).to(self.device) q_values = self.model(state) return q_values.argmax().item() def learn(self, state, action, reward, next_state, done): state = torch.FloatTensor(state).unsqueeze(0).to(self.device) next_state = torch.FloatTensor(next_state).unsqueeze(0).to(self.device) action = torch.LongTensor([action]).to(self.device) reward = torch.FloatTensor([reward]).to(self.device) q_value = self.model(state).gather(1, action.unsqueeze(1)) next_q_value = self.model(next_state).max(1)[0].unsqueeze(1) expected_q_value = reward + self.gamma * next_q_value * (1 - done) loss = self.loss_fn(q_value, expected_q_value.detach()) self.optimizer.zero_grad() loss.backward() self.optimizer.step() ```
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值