UVALive 6195 —— The Dueling Philosophers Problem

原题:https://icpcarchive.ecs.baylor.edu/index.php?option=com_onlinejudge&Itemid=8&page=show_problem&problem=4206


题意:有n个点,给出m条限制关系,即a必须在b前面;将这n个点进行先后排序,如果只有一种排序方法,则输出 1 ;如果有多种,则输出 2 ;如果无法给出排序(即存在矛盾关系),则输出 0 ;

思路:如果存在强连通分量,那么就输出0,因为强连通分量中的任意两点可以相互到达,既要满足a 在 b 前面,又要满足 b 在 a 前面,显然矛盾,无法给出排序;

进行拓扑排序,当遇到入度 = 0 的点超过一个时,就输出 2,因为此时这些点之间不存在偏序关系,可以把 a 放 b 前面,也可以把b 放 a前面,有多种方式;

注意:题目里的 m 的边界值少了个 0;


#include<bits/stdc++.h>
using namespace std;
const int maxn = 1100;
const int maxm = 500005;
int n, m;
int head[maxn], edgenum;
int Time, taj, top;
int DFN[maxn], Low[maxn], Stack[maxn];
bool Instack[maxn];
int in[maxn];

struct Edge
{
	int from, to, next;
}edge[maxm];

void add(int u, int v)
{
	edge[edgenum].from = u;
	edge[edgenum].to = v;
	edge[edgenum].next = head[u];
	head[u] = edgenum++;
}

vector<int>bcc[maxn];
void Tarjan(int u)
{
	DFN[u] = Low[u] = ++Time;
	Stack[top++] = u;
	Instack[u] = true;
	for(int i = head[u];i != -1;i = edge[i].next)
	{
		int v = edge[i].to;
		if(DFN[v] == -1)
		{
			Tarjan(v);
			Low[u] = min(Low[u], Low[v]);
		}
		else if(Instack[v] && Low[u] > DFN[v])
			Low[u] = DFN[v];
	}
	if(Low[u] == DFN[u])
	{
		taj++;
		bcc[taj].clear();
		while(1)
		{
			int now = Stack[--top];
			Instack[now] = false;
			bcc[taj].push_back(now);
			if(now == u)	break;
		}
	}
}

void init()
{
	memset(head, -1, sizeof head);
	edgenum = 0;
	memset(DFN, -1, sizeof DFN);
	memset(Instack, false, sizeof Instack);
	Time = taj = top = 0;
}

queue<int>Q;
bool toposort()
{
	while(!Q.empty())	Q.pop();
	int cnt = 0;
	for(int i = 1;i<=n;i++)
	{
		if(in[i] == 0)
		{
			cnt++;
			in[i]--;
			Q.push(i);
		}
	}
	if(cnt > 1)	return false;
	while(!Q.empty())
	{
		int u = Q.front();
		Q.pop();
		cnt = 0;
		for(int i = head[u];i != -1;i = edge[i].next)
		{
			int v = edge[i].to;
			in[v]--;
			if(in[v] == 0)
			{
				in[v]--;
				Q.push(v);
				cnt++;
			}
		}
		if(cnt > 1)	return false;
	}
	return true;
}

int main()
{
	while(~scanf("%d%d", &n, &m))
	{
		if(n == 0 && m == 0)	break;
		init();
		memset(in, 0, sizeof in);
		while(m--)
		{
			int u, v;
			scanf("%d%d", &u, &v);
			add(u, v);
			in[v]++;
		}
		for(int i = 1;i<=n;i++)
		{
			if(DFN[i] == -1)
			Tarjan(i);
		}
		if(taj != n)
		{
			printf("0\n");
			continue;
		}
		if(toposort())	printf("1\n");
		else	printf("2\n");
		
	}
	return 0;
}


  • 0
    点赞
  • 0
    收藏
    觉得还不错? 一键收藏
  • 0
    评论
Dueling DQN是一种基于DQN算法的改进算法,它能够更高效地学习状态价值函数。与传统的DQN只会更新某个动作的Q值不同,Dueling DQN会同时更新状态价值函数和动作优势函数,从而更加频繁、准确地学习状态价值函数。具体来说,Dueling DQN将Q值分解为状态价值函数和动作优势函数两部分,这样就可以在学习状态价值函数时避免重复计算动作优势函数,从而提高学习效率。 下面是Dueling DQN的代码实现: ```python import torch import torch.nn as nn import torch.optim as optim import numpy as np class DuelingDQN(nn.Module): def __init__(self, input_size, output_size): super(DuelingDQN, self).__init__() self.input_size = input_size self.output_size = output_size self.fc1 = nn.Linear(input_size, 128) self.fc2 = nn.Linear(128, 128) self.fc3_adv = nn.Linear(128, output_size) self.fc3_val = nn.Linear(128, 1) def forward(self, x): x = torch.relu(self.fc1(x)) x = torch.relu(self.fc2(x)) adv = self.fc3_adv(x) val = self.fc3_val(x).expand(x.size(0), self.output_size) x = val + adv - adv.mean(1, keepdim=True).expand(x.size(0), self.output_size) return x class Agent(): def __init__(self, input_size, output_size, gamma=0.99, lr=0.001): self.device = torch.device("cuda" if torch.cuda.is_available() else "cpu") self.gamma = gamma self.model = DuelingDQN(input_size, output_size).to(self.device) self.optimizer = optim.Adam(self.model.parameters(), lr=lr) self.loss_fn = nn.MSELoss() def act(self, state, epsilon=0.0): if np.random.random() < epsilon: return np.random.choice(self.model.output_size) state = torch.FloatTensor(state).unsqueeze(0).to(self.device) q_values = self.model(state) return q_values.argmax().item() def learn(self, state, action, reward, next_state, done): state = torch.FloatTensor(state).unsqueeze(0).to(self.device) next_state = torch.FloatTensor(next_state).unsqueeze(0).to(self.device) action = torch.LongTensor([action]).to(self.device) reward = torch.FloatTensor([reward]).to(self.device) q_value = self.model(state).gather(1, action.unsqueeze(1)) next_q_value = self.model(next_state).max(1)[0].unsqueeze(1) expected_q_value = reward + self.gamma * next_q_value * (1 - done) loss = self.loss_fn(q_value, expected_q_value.detach()) self.optimizer.zero_grad() loss.backward() self.optimizer.step() ```
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值