program qlearning
use iso_fortran_env
use omp_lib
implicit none
character(4) :: n_string
character(4) :: mu_string
character(4) :: advstate_string
character(4) :: rndshow_string
character(4) :: taupr_string
character(4) :: ah_string
character(4) :: showtwo_string
character(10) :: rmax_string
character(10) :: gridstep_string
character(10) :: seq_string
character(10) :: grid_string
character(50) :: filename
character(50) :: filename_grid
character :: bsp = char(8)
! initialize the parameters
integer, parameter :: m = 15 ! number of equally-spaced price points
integer, parameter :: maxt = 1e9 ! maximum number of periods
integer, parameter :: norm = 100000 ! number of periods for “forward” simulation after convergence
real(8), parameter :: amin = 1.0 ! minimum value price grid
real(8), parameter :: amax = 2.1 ! maximum value price grid
real(8), parameter :: a0 = 0 ! aggregate demand
real(8), parameter :: c = 1 ! marginal cost (same for all firms)
real(8), parameter :: sigma = 0.01 ! scale parameter random draws
real(8) :: a ! vertical differentiation parameter (same for all firms)
! initialize all other variables
real(8) :: alpha ! learning rate
real(8) :: beta ! experimentation rate
real(8) :: gamma ! proportion of consumers who is shown k products
real(8) :: delta ! discount factor
real(8) :: adv ! adv
real(8) :: mu ! product differentiation parameter
real(8) :: taupr ! probability of losing advantage if other DPDP conditions are satisfied
real(8) :: ah ! demand shock (set to non-zero in case of stochastic demand)
integer :: n ! number of firms
integer :: advstate ! indicator for whether dynamic “advantage” is state variable as well
integer :: rndshow ! indicator for whether buy-box is random
integer :: showtwo ! show two firms in buy-box (in case of three firms)
integer :: rmax ! repetitions
integer :: gridstep, seq, s, gridmax, stat, t, i, j, l, r, temp, cnt, ct, g, mxt, CycleLength,&
hh, tied(m), xx, state, statePrime, iPeriod, idum, iv(32), iy, idum2, numCores
real(8) :: newq, oldq, rvn, uniform_random_value, uniform_random_value1,&
uniform_random_value2, qnb, qnb2, qb, qb2, p1, p2, p3, phi, uniform_random_value3, epst,&
start_grid, finish_grid, start, finish, tmr, mmax
real(8), dimension(3,1) :: p, plast, r1
real(8), dimension(3,100) :: temp2
real(8), dimension(100,1) :: VisitedStates
real(8), dimension(1,norm) :: firmadvantage
real(8), dimension(m,1) :: pi0, pi00, pi0a
real(8), dimension(m) :: qi0, qi0a, aa
real(8), dimension(3) :: aset
integer, dimension(1) :: tp, firmadv, LastObservedAdvantage, LastObservedTP
! initialize allocatable variables
integer, dimension(😃, allocatable :: mxtr, clr, clr0, clr1, clr2
integer(kind=int64), dimension(😃, allocatable :: tr ! make sure the largest possible value is big enough!
integer, dimension(:😅, allocatable :: aPrime, LastObservedPrices, strategy, strategyPrime, strat0, lp, mantle
real(8), dimension(😃, allocatable :: psr, rvnr, csr, pisr, qsr, symmr, symmr1, pisr1, pisr0, pir1, pir0
real(8), dimension(:😅, allocatable :: PriceCycle, qt, pit, qbt, qnbt, maxValQ, p0, maxVQ0, price, quantity, profit, &
revenue, grid, pr, pir, revenue_tot, quantity_tot, profit_tot, price_sw, consumersurplus, oldPrice, newPrice
real(8), dimension(:,:😅, allocatable :: q
do i = 1, m
aa(i) = amin+(i-1)*(amax-amin)/(m-1)
end do
!2 0.25 0 0 0 0 0 1000 1 71 grid1
n=2
mu=0.25
advstate=0
taupr=0
rndshow=0
ah=0
showtwo=0
rmax=10!00
gridstep=1
seq=71
grid_string=“grid2”
s = (advstate*(n-1)+1)*m**n ! cardinality of state space
a = 2 ! baseline value of a
aset = (/ a-ah, a, a+ah /) ! possible values of a
allocate(q(s,m,n),aPrime(n,1),LastObservedPrices(n,1),qt(n,1),pit(n,1),qbt(n,1),qnbt(n,1),maxValQ(s,n),&
strategy(s,n),strategyPrime(s,n),strat0(n,1),p0(n,1),maxVQ0(n,1),lp(n,1),mantle(n,1),&
price(n,norm), quantity(n,norm), profit(n,norm), revenue(n,norm),revenue_tot(1,norm),pr(3,rmax),&
pir(3,rmax),psr(rmax),rvnr(rmax),csr(rmax),pisr(rmax),qsr(rmax),tr(rmax),mxtr(rmax),clr(rmax),&
clr0(rmax),clr1(rmax),clr2(rmax),symmr(rmax),pisr1(rmax),pisr0(rmax),pir1(rmax),pir0(rmax),symmr1(rmax),&
quantity_tot(1,norm),profit_tot(1,norm),price_sw(1,norm),consumersurplus(1,norm),oldPrice(n,1),&
newPrice(n,1))
filename_grid = trim(grid_string) // ‘.txt’
open(12, file=filename_grid)
! first determine length of file
gridmax=-1
stat=0
do while (stat==0)
gridmax=gridmax+1
read(12,*,iostat=stat)
end do
allocate(grid(5,gridmax))
close(12)
! then read the file
open(12, file=filename_grid)
read(12,*) grid
if (gridstep==0) then ! use entire grid
gridstep = gridmax
seq = 1
seq_string = ‘1’
end if
if (taupr0) then
taupr_string = ‘0’
else
taupr_string = taupr_string(3:4)
end if
if (ah0) then
ah_string = ‘0’
else
ah_string = ah_string(3:4)
end if
filename = ‘output_’ // ‘.csv’
open (10, file=filename, status=‘unknown’)
write(10,*)“grid, r, periods, time, notconv, n, advstate, rndshow, showtwo, taupr, ah, alpha, beta, gamma, mu, adv, &
delta, price1,price2, price3, pricesw, revenue, sales, cs, profits, cyclelength, prc1c, prc2c, prcsymm1c, &
indprof1c, highfirm1c, cyclelengthproper, indprof0c, highfirm0c”
! grid loop
do g = (seq-1)gridstep+1,seqgridstep;
write(*, fmt="(1x,a,i0)", advance="yes")"Grid = ", g
alpha = grid(1,g)
beta = grid(2,g)
gamma = grid(3,g)
adv = grid(4,g)
delta = grid(5,g)
! determine initial Q-function
if (n==2) then
do j = 1, m
do i = 1, m
p1 = aa(j)
p2 = aa(i)
if (p1==p2) then
phi=0.5
else if (p1<p2) then
phi=1.0
else
phi=0.0
end if
qb = exp((a-p1)/mu)/(exp((a-p1)/mu)+exp(a0/mu))
qnb = exp((a-p1)/mu)/(exp((a-p1)/mu)+exp((a-p2)/mu)+exp(a0/mu))
pi0(i,1) = (p1-c)*(phi*gamma*qb+(1-gamma)*qnb)
end do
qi0(j) = (sum(pi0)/m)/(1-delta)
end do
else if ((n==3) .and. (showtwo==1)) then ! n==3, two are shown in buy-box
do j = 1, m
do i = 1, m
do l = 1, m
p1 = aa(j)
p2 = aa(i)
p3 = aa(l)
! note that phi includes qb
if ((p1==p2) .and. (p1==p3)) then ! 0.67 probability of getting the buy-box (with firm 2 or 3)
phi = (1./3.)*exp((a-p1)/mu)/(exp((a-p1)/mu)+exp((a-p2)/mu)+exp(a0/mu))+ &
(1./3.)*exp((a-p1)/mu)/(exp((a-p1)/mu)+exp((a-p3)/mu)+exp(a0/mu))
else if ((p1==p2) .and. (p1>p3)) then ! 0.5 probability of getting the buy-box (with firm 3)
phi = (1./2.)*exp((a-p1)/mu)/(exp((a-p1)/mu)+exp((a-p3)/mu)+exp(a0/mu))
else if ((p1==p2) .and. (p1<p3)) then ! 1.0 probability of getting the buy-box (with firm 2)
phi = exp((a-p1)/mu)/(exp((a-p1)/mu)+exp((a-p2)/mu)+exp(a0/mu))
else if ((p1==p3) .and. (p1>p2)) then ! 0.5 probability of getting the buy-box (with firm 2)
phi = (1./2.)*exp((a-p1)/mu)/(exp((a-p1)/mu)+exp((a-p2)/mu)+exp(a0/mu))
else if ((p1==p3) .and. (p1<p2)) then ! 1.0 probability of getting the buy-box (with firm 3)
phi = exp((a-p1)/mu)/(exp((a-p1)/mu)+exp((a-p3)/mu)+exp(a0/mu))
else if ((p1<p2) .and. (p2<p3)) then ! 1.0 probability of getting the buy-box (with firm 2)
phi = exp((a-p1)/mu)/(exp((a-p1)/mu)+exp((a-p2)/mu)+exp(a0/mu))
else if ((p1<p3) .and. (p3<p2)) then ! 1.0 probability of getting the buy-box (with firm 3)
phi = exp((a-p1)/mu)/(exp((a-p1)/mu)+exp((a-p3)/mu)+exp(a0/mu))
else if ((p1<p2) .and. (p2==p3)) then ! 1.0 probability of getting the buy-box (with firm 2 or 3)
phi =(1./2.)*exp((a-p1)/mu)/(exp((a-p1)/mu)+exp((a-p2)/mu)+exp(a0/mu))+ &
(1./2.)*exp((a-p1)/mu)/(exp((a-p1)/mu)+exp((a-p3)/mu)+exp(a0/mu))
else if ((p1<p2) .and. (p3<p1)) then ! 1.0 probability of getting the buy-box (with firm 3)
phi = exp((a-p1)/mu)/(exp((a-p1)/mu)+exp((a-p3)/mu)+exp(a0/mu))
else if ((p1<p3) .and. (p2<p1)) then ! 1.0 probability of getting the buy-box (with firm 2)
phi = exp((a-p1)/mu)/(exp((a-p1)/mu)+exp((a-p2)/mu)+exp(a0/mu))
else if ((p1>p2) .and. (p1>p3)) then ! 0.0 probability of getting the buy-box
phi = 0.0
end if
qnb = exp((a-p1)/mu)/(exp((a-p1)/mu)+exp((a-p2)/mu)+exp((a-p3)/mu)+exp(a0/mu))
pi0(l,1) = (p1-c)*(phi*gamma+(1-gamma)*qnb)
end do
pi00(i,1)= sum(pi0)/m
end do
qi0(j) = (sum(pi00)/m)/(1-delta)
end do
else ! n==3
do j = 1, m
do i = 1, m
do l = 1, m
p1 = aa(j)
p2 = aa(i)
p3 = aa(l)
if (p1==p2 .and. p1==p3) then
phi = 1./3.
else if (p1==p2 .and. p1<p3) then
phi = 0.5
else if (p1==p3 .and. p1<p2) then
phi = 0.5
else if (p1<p2 .and. p1<p3) then
phi = 1.0
else
phi = 0.0
end if
qb = exp((a-p1)/mu)/(exp((a-p1)/mu)+exp(a0/mu))
qnb = exp((a-p1)/mu)/(exp((a-p1)/mu)+exp((a-p2)/mu)+exp((a-p3)/mu)+exp(a0/mu))
pi0(l,1) = (p1-c)*(phi*gamma*qb+(1-gamma)*qnb)
end do
pi00(i,1)= sum(pi0)/m
end do
qi0(j) = (sum(pi00)/m)/(1-delta)
end do
end if
do j = 1, n
maxVQ0(j,1) = maxval(qi0)
strat0(j,1) = maxloc(qi0,dim=1)
end do
call cpu_time(start)
do r = 1, rmax
! reseeds the random number generator using ten times negative of grid number
idum2 = 123456789
iv=0
iy=0
idum=-10*r
do j = 1, n
q(1:s,1:m,j)=spread(qi0, 1, s)
maxValQ(1:s,j) = spread(maxVQ0(j,1),1,s)
strategyPrime(1:s,j) = spread(strat0(j,1),1,s)
end do
strategy = strategyPrime
t = 1
cnt = 0
qt=0
pit=0
p=0
plast=0
lp=0
mxt=0
mantle=0 ! do this before main loop
firmadv=0 ! do this before main loop
! determine initial state
uniform_random_value = ran2(idum, iv, iy, idum2)
state = int(s * uniform_random_value + 1)
epst=1 ! initialize epsilon
do while (t <= maxt .and. cnt < norm)
if (t==maxt) then
print*, "maximum number of periods reached"
mxt=1
end if
if (ah>0) then ! determine stochastic demand realization
uniform_random_value = ran2(idum, iv, iy, idum2)
a = aset(int(3 * uniform_random_value + 1))
else
a = aset(2)
end if
! determine actions firms
do j = 1, n
uniform_random_value = ran2(idum, iv, iy, idum2)
if (uniform_random_value <epst) then ! exploration mode
uniform_random_value = ran2(idum, iv, iy, idum2)
aPrime(j,1) = int(m * uniform_random_value + 1)
else ! exploitation model
aPrime(j,1) = strategyPrime(state,j) ! take best action ("greedy")
end if
end do
epst = epst*exp(-beta)
! get prices using action space
do j = 1, n
p(j,1) = aa(aPrime(j,1))
end do
! random draw to determine if advantage is exogenously taken away or not in case of DPDP (used in case of "richer form" of DPDP)
uniform_random_value = ran2(idum, iv, iy, idum2)
! dynamic price-directed prominence
if (adv > 0) then ! if PDP is in effect, skip all this
mantle=0
firmadv=0
if (t>1) then
if ((n==3) .and. (showtwo==1)) then
mantle=1 ! default is now that you get the mantle, note that taupr option not enabled for showtwo==1
! tp is now firm who doesn't get the buy-box!
do j = 1,n
if (p(j,1)>plast(j,1)) then ! if charging more than in last period, you do not get the mantle
mantle(j,1)=0
end if
end do
mantle(tp(1),1)=0 ! firm that wasn't displayed previously firm never gets the mantle
else
if ((p(tp(1),1) <= plast(tp(1),1) .and. uniform_random_value <= 1-taupr)) then
mantle(tp(1),1)=1
end if
end if
end if
end if
! determine who got the buy box
uniform_random_value1 = ran2(idum, iv, iy, idum2)
uniform_random_value2 = ran2(idum, iv, iy, idum2)
r1(1,1) = sigma*uniform_random_value1*(amax-amin)/(m-1)
r1(2,1) = sigma*uniform_random_value2*(amax-amin)/(m-1)
if (n==3) then
uniform_random_value3 = ran2(idum, iv, iy, idum2)
r1(3,1) = sigma*uniform_random_value3*(amax-amin)/(m-1)
end if
if (t==1) then ! no firm can have the advantage in the first period so mantle will be 0
if ((n==3) .and. (showtwo==1)) then
tp = minloc(-p(1:n,1)+r1(1:n,1))
else
tp = minloc(p(1:n,1)-r1(1:n,1)) ! rv-adjusted low price firm gets displayed this period
end if
else if ((n==3) .and. (showtwo==1)) then
tp = minloc(-p(1:n,1)+r1(1:n,1)+adv*mantle(1:n,1))
else if ((mantle(tp(1),1)==1) .and. (p(tp(1),1) > minval(p(1:n,1)+adv))) then
tp = minloc(p(1:n,1)-r1(1:n,1)) ! firm that has the mantle got strictly ADV-undercut and so choose the rv-adjusted low price firm to be displayed
else if (mantle(tp(1),1)==0) then ! mantle==0 always if adv=0, from code above
tp = minloc(p(1:n,1)-r1(1:n,1)) ! the displayed firm from previous period lost the mantle, so just show the rv-adjusted low-price firm
! if we reach this and didn't encounter the relevant case, then the displayed firm didn't change so tp(1) doesn't change either
end if
if(rndshow==1) then
tp=minloc(-r1(1:n,1))
end if
if ((n==3) .and. (showtwo==1)) then
lp(1:n,1)=1
lp(tp(1),1)=0
else
lp(1:n,1)=0
lp(tp(1),1)=1
end if
! determine new state s(t+1)
firmadv=advstate*tp(1)
! note: in case of n=3 and showtwo=1, this will be the firm that is *not* displayed
temp = aPrime(1,1)
do j = 2, n
temp = temp + (aPrime(j,1) - 1)*(m**(j-1))
end do
if (advstate>0) then
statePrime = (firmadv(1)-1)*(m**n)+temp !
else
statePrime = temp
end if
! determine market shares and profits
do j = 1, n
qbt(j,1) = exp((a-p(j,1))/mu)/(sum(lp(1:n,1)*exp((a-p(1:n,1))/mu))+exp(a0/mu))
qnbt(j,1) = exp((a-p(j,1))/mu)/(sum(exp((a-p(1:n,1))/mu))+exp(a0/mu))
qt(j,1) = lp(j,1)*gamma*qbt(j,1)+(1-gamma)*qnbt(j,1)
pit(j,1) = (p(j,1)-c)*qt(j,1)
end do
! update Q-function
do j = 1, n
! Calvano et al notation
oldq = q(state,aPrime(j,1),j)
newq = oldq + alpha*(pit(j,1)+delta*maxValQ(statePrime,j)-oldq)
if (t==1 .and. advstate==1) then
! -> initial state in which no one has advantage is never reached for t>1 and so is not part of q-matrix, so updating not needed
newq = oldq
end if
q(state,aPrime(j,1),j)=newq
if (newq > maxValQ(state,j)) then
maxValQ(state,j) = newq ! update maxValQ to higher value
if (strategyPrime(state,j) /= aPrime(j,1)) then ! only if this corresponds to new action, update strategyPrime
strategyPrime(state,j) = aPrime(j,1);
end if
end if
if ((newq < maxValQ(state,j)) .and. (strategyPrime(state,j)==aPrime(j,1))) then
! updating the cell with the maxValQ to lower value, so then best action might change
! deal with ties randomly
mmax = maxval(q(state,1:m,j), dim=1) ! maximum value
tied = 0
hh = 0
do i = 1, m
if (abs(q(state,i,j)-mmax) <= epsilon(q(state,1:m,j))) then
hh = hh+1
tied(hh) = i
end if
end do
if (hh > 1) then
uniform_random_value = ran2(idum, iv, iy, idum2)
xx = int(hh * uniform_random_value + 1)
strategyPrime(state,j) = tied(xx)
else ! no ties
strategyPrime(state,j) = tied(1)
end if
maxValQ(state,j) = q(state,strategyPrime(state,j),j);
end if
end do
! determine convergence
if (all(strategyPrime(state,1:n)==strategy(state,1:n))) then
cnt = cnt+1
else
cnt = 0
end if
plast(1:n,1)=p(1:n,1)
strategy(state,1:n)=strategyPrime(state,1:n)
state=statePrime
t = t + 1
end do
LastObservedPrices = aPrime
LastObservedAdvantage = firmadv
LastObservedTP = tp(1) ! not the same as LastObservedAdvantage, which equals zero outside of smarterAI
! compute cycle length
! runs *twice* (using last actions first run as starting point to eliminate possible experimentation)
do i = 1, 2
VisitedStates = 0
iPeriod = 1
temp2 = 0
do
! determine current state from actions and buybox outcomes, starting with ones from end of Q-learning loop
temp = aPrime(1,1)
do j = 2, n
temp = temp + (aPrime(j,1) - 1)*(m**(j-1))
end do
if (advstate>0) then
temp = (firmadv(1)-1)*(m**n)+temp
end if
if ((iPeriod>1) .and. (any(VisitedStates(1:iPeriod-1,1)==temp))) then
exit
end if
do j = 1, n
temp2(j,iPeriod) = aa(aPrime(j,1))
oldPrice(j,1) = aa(aPrime(j,1))
end do
VisitedStates(iPeriod,1) = temp
aPrime = reshape(strategy(temp,1:n),(/n,1/))
if (advstate > 0) then ! no need to compute buy-box winners if not in smarterAI (because identity of winner is not in state space)
do j = 1, n
newPrice(j,1) = aa(aPrime(j,1))
end do
! random draw to determine if advantage is realized in case of DPDP (random tau)
uniform_random_value = ran2(idum, iv, iy, idum2)
mantle=0
firmadv=0
if (adv > 0) then
mantle=0
firmadv=0
if ((n==3) .and. (showtwo==1)) then ! show two firms
mantle=1 ! default is now you get the mantle, not enabled for taupr option
! tp is now firm who doesn't get the buy-box!
do j = 1,n
if (newPrice(j,1)>oldPrice(j,1)) then ! if charging more than in last period, you do not get the mantle
mantle(j,1)=0
end if
end do
mantle(tp(1),1)=0 ! highest priced firm never gets the mantle
else
if ((newPrice(tp(1),1) <= oldPrice(tp(1),1) .and. uniform_random_value <= 1-taupr)) then
mantle(tp(1),1)=1
end if
end if
end if
! determine who got the buy box
uniform_random_value1 = ran2(idum, iv, iy, idum2)
uniform_random_value2 = ran2(idum, iv, iy, idum2)
r1(1,1) = sigma*uniform_random_value1*(amax-amin)/(m-1)
r1(2,1) = sigma*uniform_random_value2*(amax-amin)/(m-1)
if (n==3) then
uniform_random_value3 = ran2(idum, iv, iy, idum2)
r1(3,1) = sigma*uniform_random_value3*(amax-amin)/(m-1)
end if
if ((n==3) .and. (showtwo==1)) then
tp = minloc(-newPrice(1:n,1)+r1(1:n,1)+adv*mantle(1:n,1))
else if ((mantle(tp(1),1)==1) .and. (newPrice(tp(1),1) > minval(newPrice(1:n,1)+adv))) then
tp = minloc(newPrice(1:n,1)-r1(1:n,1)) ! firm that has the mantle got strictly ADV-undercut and so choose the rv-adjusted low price firm to be displayed
else if (mantle(tp(1),1)==0) then
tp = minloc(newPrice(1:n,1)-r1(1:n,1)) ! the displayed firm from previous period lost the mantle, so just show the rv-adjusted low-price firm
end if ! if we reach this and didn't encounter the relevant case, then the displayed firm didn't change so tp(1) doesn't change either
if(rndshow==1) then
tp = minloc(-r1(1:n,1))
end if
firmadv=advstate*tp(1)
end if
iPeriod = iPeriod+1
end do
end do
CycleLength = iPeriod-1
allocate(PriceCycle(n,CycleLength)) ! size of PriceCycle depends on cycle length
PriceCycle(1:n,1:CycleLength)=temp2(1:n,1:CycleLength)
! forward simulate 100k periods using converged prices to determine profits, revenue, cs, etc.
ct = 1
do while (ct<norm+1)
if (ct==norm+1) exit
if (ah>0) then ! determine stochastic demand realization
uniform_random_value = ran2(idum, iv, iy, idum2)
a = aset(int(3 * uniform_random_value + 1))
else
a = aset(2)
end if
! ct==1 case
if (ct==1) then
aPrime = LastObservedPrices
do j = 1, n
price(j,ct) = aa(aPrime(j,1))
end do
tp=LastObservedTP(1) ! set displayed firm to the last one displayed in Q-loop
if ((n==3) .and. (showtwo==1)) then !
lp(1:n,1)=1
lp(tp(1),1)=0
else
lp(1:n,1)=0
lp(tp(1),1)=1
end if
! determine current state firm and new actions
firmadvantage(1,ct) = advstate*tp(1)
temp = aPrime(1,1)
do j = 2, n
temp = temp + (aPrime(j,1) - 1)*(m**(j-1))
end do
if (firmadvantage(1,ct)>0) then
temp = (firmadvantage(1,ct)-1)*(m**n)+temp
end if
do j = 1, n
aPrime(j,1) = strategy(temp,j) ! pull new actions
end do
else ! ct>1 case
! get prices using action space, using actions either pulled from end of (ct==1) above or at end of current case
do j = 1, n
price(j,ct) = aa(aPrime(j,1))
end do
mantle = 0
firmadvantage(1,ct) = 0
! random draw to determine if advantage is lost in case of DPDP (random tau)
uniform_random_value = ran2(idum, iv, iy, idum2)
if (adv > 0) then
mantle=0
firmadv=0
if ((n==3) .and. (showtwo==1)) then ! show two firms
mantle=1 ! default is now you get the mantle, note that taupr option not enabled for showtwo==1
! tp is now firm who doesn't get the buy-box!
do j = 1,n
if (price(j,ct)>price(j,ct-1)) then ! if charging more than in last period, you do not get the mantle
mantle(j,1)=0
end if
end do
mantle(tp(1),1)=0 ! firm that wasn't displayed previously never gets the mantle
else
if ((price(tp(1),ct) <= price(tp(1),ct-1) .and. uniform_random_value <= 1-taupr)) then
mantle(tp(1),1)=1
end if
end if
end if
! determine who got the buy box
uniform_random_value1 = ran2(idum, iv, iy, idum2)
uniform_random_value2 = ran2(idum, iv, iy, idum2)
r1(1,1) = sigma*uniform_random_value1*(amax-amin)/(m-1)
r1(2,1) = sigma*uniform_random_value2*(amax-amin)/(m-1)
if (n==3) then
uniform_random_value3 = ran2(idum, iv, iy, idum2)
r1(3,1) = sigma*uniform_random_value3*(amax-amin)/(m-1)
end if
if ((n==3) .and. (showtwo==1)) then
tp = minloc(-price(1:n,ct)+r1(1:n,1)+adv*mantle(1:n,1))
else if ((mantle(tp(1),1)==1) .and. (price(tp(1),ct) > minval(price(1:n,ct)+adv))) then
tp = minloc(price(1:n,ct)-r1(1:n,1)) ! firm that has the mantle got strictly ADV-undercut and so choose the rv-adjusted low price firm to be displayed
else if (mantle(tp(1),1)==0) then
tp = minloc(price(1:n,ct)-r1(1:n,1)) ! the displayed firm from previous period lost the mantle, so just show the rv-adjusted low-price firm
! if we reach this and didn't encounter the relevant case, then the displayed firm didn't change so tp(1) doesn't change either
end if
if(rndshow==1) then
tp=minloc(-r1(1:n,1))
end if
if ((n==3) .and. (showtwo==1)) then !
lp(1:n,1)=1
lp(tp(1),1)=0
else
lp(1:n,1)=0
lp(tp(1),1)=1
end if
! determine current state firm and new actions
firmadvantage(1,ct) = advstate*tp(1)
temp = aPrime(1,1)
do j = 2, n
temp = temp + (aPrime(j,1) - 1)*(m**(j-1))
end do
if (firmadvantage(1,ct)>0) then
temp = (firmadvantage(1,ct)-1)*(m**n)+temp
end if
do j = 1, n
aPrime(j,1) = strategy(temp,j) ! pull new actions
end do
end if
! determine market shares and profits using previous actions (not the ones just pulled at end of above if-then)
do j = 1, n
qbt(j,1) = exp((a-price(j,ct))/mu)/(sum(lp(1:n,1)*exp((a-price(1:n,ct))/mu))+exp(a0/mu))
qnbt(j,1) = exp((a-price(j,ct))/mu)/(sum(exp((a-price(1:n,ct))/mu))+exp(a0/mu))
quantity(j,ct) = lp(j,1)*gamma*qbt(j,1)+(1-gamma)*qnbt(j,1)
revenue(j,ct) = price(j,ct)*quantity(j,ct)
profit(j,ct) = (price(j,ct)-c)*quantity(j,ct)
end do
revenue_tot(1,ct) = sum(price(1:n,ct)*quantity(1:n,ct))
quantity_tot(1,ct) = sum(quantity(1:n,ct))
profit_tot(1,ct) = sum(profit(1:n,ct))
price_sw(1,ct) = sum(revenue(1:n,ct)/quantity_tot(1,ct))
consumersurplus(1,ct) = gamma*mu*log(sum(lp(1:n,1)*exp((a-price(1:n,ct))/mu))+exp(a0/mu))&
+ (1-gamma)*mu*log(sum(exp((a-price(1:n,ct))/mu))+exp(a0/mu))
ct=ct+1 ! bump counter up
end do
pr(1,r)=sum(price(1,1:norm))/norm
pr(2,r)=sum(price(2,1:norm))/norm
if (n==3) then
pr(3,r)=sum(price(3,1:norm))/norm
end if
psr(r)=sum(price_sw(1,1:norm))/norm
rvnr(r)=sum(revenue_tot(1,1:norm))/norm
csr(r)=sum(consumersurplus(1,1:norm))/norm
pir(1,r)=sum(profit(1,1:norm))/norm
pir(2,r)=sum(profit(2,1:norm))/norm
if (n==3) then
pir(3,r)=sum(profit(3,1:norm))/norm
end if
pisr(r)=sum(profit_tot(1,1:norm))/norm
qsr(r)=sum(quantity_tot(1,1:norm))/norm
tr(r)=t
clr(r)=CycleLength
clr0(r)=1 ! other cycles
clr1(r)=0
clr2(r)=0
pisr1(r)=0 ! industry profit 1-price cycles
pisr0(r)=pisr(r) ! industry profit other cycles
pir1(r)=0
! high profit
pir0(r)=pir(1,r)/pisr(r) ! only correct for n=2
if (pir(2,r)>pir(1,r)) then
pir0(r)=pir(2,r)/pisr(r)
end if
if (CycleLength==1) then
clr0(r)=0
clr1(r)=1
pisr0(r)=0
pisr1(r)=pisr(r)
! high profit
pir1(r)=pir(1,r)/pisr(r) ! only correct for n=2
if (pir(2,r)>pir(1,r)) then
pir1(r)=pir(2,r)/pisr(r)
end if
pir0(r)=0
else if (CycleLength==2) then ! part of other cycles
clr2(r)=1
end if
symmr(r)=0; symmr1(r)=0
if (n==2) then
if (all(PriceCycle(1,1:CycleLength)==PriceCycle(2,1:CycleLength))) then
symmr(r)=1
if (CycleLength==1) then
symmr1(r)=1
end if
end if
end if
mxtr(r)=mxt
deallocate(PriceCycle)
end do
call cpu_time(finish)
tmr = finish-start
print *
write(*, fmt="(1x,a,i14)", advance="yes")"Average number of repetitions: ",sum(tr(1:rmax))/rmax
print*, "Sessions not converged: ",sum(mxtr(1:rmax))
print*, "Number of firms: ",n
print*, "Advantage part of state space: ",advstate
print*, "Randomly show products: ",rndshow
print*, "Show two firms in buy-box: ",showtwo
print '(" Tau probability: ",f4.2)',taupr
print '(" Demand shock: ",f4.2)',ah
print '(" Alpha: ",f6.4)',alpha
print '(" Beta: ",f10.8)',beta
print '(" Gamma: ",f4.2)',gamma
print '(" Mu: ",f4.2)',mu
print '(" ADV: ",f4.2)',adv
print '(" Delta: ",f5.3)',delta
write(*, fmt="(1x,a,i0)", advance="no") "It took about ", floor(tmr/60)
write(*, fmt="(1x,a,i0)", advance="no") "minutes and ", nint(tmr-floor(tmr/60)*60)
write(*,*) "seconds to finish"
print*, "------------------------------------------------"
print '(" Average price firm 1: ",f10.8)',sum(pr(1,1:rmax))/rmax
print '(" Average price firm 2: ",f10.8)',sum(pr(2,1:rmax))/rmax
if (n==3) then
print '(" Average price firm 3: ",f10.8)',sum(pr(3,1:rmax))/rmax
end if
print '(" Average time to convergence (sec):",f14.8)',tmr/rmax
print '(" Sales-weighted average price: ",f10.8)',sum(psr(1:rmax))/rmax
print '(" Total revenue: ",f10.8)',sum(rvnr(1:rmax))/rmax
print '(" Total sales: ",f10.8)',sum(qsr(1:rmax))/rmax
print '(" Consumer surplus: ",f10.8)',sum(csr(1:rmax))/rmax
print '(" Total profits: ",f10.8)',sum(pisr(1:rmax))/rmax
print*, "------------------------------------------------"
print*, "CYCLES"
print*, "------------------------------------------------"
print '(" Average cycle length: ",f9.5)',sum(real(clr(1:rmax)))/rmax
print '(" Percentage cycles of length 1: ",f9.5)',100*sum(real(clr1(1:rmax)))/rmax
print '(" Percentage cycles of length 2: ",f9.5)',100*sum(real(clr2(1:rmax)))/rmax
print*, "------------------------------------------------"
print*, "1-price cycles"
print*, "------------------------------------------------"
print '(" Percentage symmetric: ",f9.5)',100*sum(symmr1(1:rmax))/sum(real(clr1(1:rmax)))
print '(" Industry profits: ",f9.5)',sum(pisr1(1:rmax))/sum(real(clr1(1:rmax)))
print '(" High-profit firm''s share: ",f9.5)',100*sum(pir1(1:rmax))/sum(real(clr1(1:rmax)))
print*, "------------------------------------------------"
print*, "Other cycles"
print*, "------------------------------------------------"
print '(" Avg cycle length (proper cycles): ",f9.5)',sum(real(clr0(1:rmax))*real(clr(1:rmax)))/sum(real(clr0(1:rmax)))
print '(" Industry profits: ",f9.5)',sum(pisr0(1:rmax))/sum(real(clr0(1:rmax)))
print '(" High-profit firm''s share: ",f9.5)',100*sum(pir0(1:rmax))/sum(real(clr0(1:rmax)))
print*, "------------------------------------------------"
write(10, fmt="(1x,a,i0)", advance="no")"", g
write(10, fmt="(1x,a,i0)", advance="no")", ", rmax
write(10, fmt="(1x,a,i14)", advance="no")", ", sum(tr(1:rmax))/rmax
write(10, fmt="(1x,a,f14.8)", advance="no")", ", tmr/rmax
write(10, fmt="(1x,a,i0)", advance="no")", ", sum(mxtr(1:rmax))
write(10, fmt="(1x,a,i0)", advance="no")", ", n
write(10, fmt="(1x,a,i0)", advance="no")", ", advstate
write(10, fmt="(1x,a,i0)", advance="no")", ", rndshow
write(10, fmt="(1x,a,i0)", advance="no")", ", showtwo
write(10, fmt="(1x,a,f4.2)", advance="no")", ", taupr
write(10, fmt="(1x,a,f4.2)", advance="no")", ", ah
write(10, fmt="(1x,a,f6.4)", advance="no")", ", alpha
write(10, fmt="(1x,a,f10.8)", advance="no")", ", beta
write(10, fmt="(1x,a,f4.2)", advance="no")", ", gamma
write(10, fmt="(1x,a,f4.2)", advance="no")", ", mu
write(10, fmt="(1x,a,f4.2)", advance="no")", ", adv
write(10, fmt="(1x,a,f5.3)", advance="no")", ", delta
write(10, fmt="(1x,a,f10.8)", advance="no")", ", sum(pr(1,1:rmax))/rmax
write(10, fmt="(1x,a,f10.8)", advance="no")", ", sum(pr(2,1:rmax))/rmax
write(10, fmt="(1x,a,f10.8)", advance="no")", ", sum(pr(3,1:rmax))/rmax
write(10, fmt="(1x,a,f10.8)", advance="no")", ", sum(psr(1:rmax))/rmax
write(10, fmt="(1x,a,f10.8)", advance="no")", ", sum(rvnr(1:rmax))/rmax
write(10, fmt="(1x,a,f10.8)", advance="no")", ", sum(qsr(1:rmax))/rmax
write(10, fmt="(1x,a,f10.8)", advance="no")", ", sum(csr(1:rmax))/rmax
write(10, fmt="(1x,a,f10.8)", advance="no")", ", sum(pisr(1:rmax))/rmax
write(10, fmt="(1x,a,f9.5)", advance="no")", ", sum(real(clr(1:rmax)))/rmax
write(10, fmt="(1x,a,f9.5)", advance="no")", ", 100*sum(real(clr1(1:rmax)))/rmax
write(10, fmt="(1x,a,f9.5)", advance="no")", ", 100*sum(real(clr2(1:rmax)))/rmax
write(10, fmt="(1x,a,f9.5)", advance="no")", ", 100*sum(symmr1(1:rmax))/sum(real(clr1(1:rmax)))
write(10, fmt="(1x,a,f9.5)", advance="no")", ", sum(pisr1(1:rmax))/sum(real(clr1(1:rmax)))
write(10, fmt="(1x,a,f9.5)", advance="no")", ", 100*sum(pir1(1:rmax))/sum(real(clr1(1:rmax)))
write(10, fmt="(1x,a,f9.5)", advance="no")", ", sum(real(clr0(1:rmax))*real(clr(1:rmax)))/sum(real(clr0(1:rmax)))
write(10, fmt="(1x,a,f9.5)", advance="no")", ", sum(pisr0(1:rmax))/sum(real(clr0(1:rmax)))
write(10, fmt="(1x,a,f9.5)", advance="yes")", ", 100*sum(pir0(1:rmax))/sum(real(clr0(1:rmax)))
end do
close(10)
contains
function ran2 ( idum, iv, iy, idum2 )
! This function is taken from the replication code of Calvano, Calzolari, Denicolo, and
! Pastorello (AER, 2020), and itself is a modification of the RAN2 function in Press,
! Teukolsky, Vetterling, and Flannery (1992).
! Thread safe function that generates U(0,1) random deviates (to be used with OpenMP)
! See function RAN2 on p. 272 of NRF77
! Long period (> 2 × 10^18) random number generator of L’Ecuyer with Bays-Durham shuffle
! and added safeguards. Returns a uniform random deviate between 0.0 and 1.0 (exclusive
! of the endpoint values). Call with idum a negative integer to initialize; thereafter, do not
! alter idum between successive deviates in a sequence. RNMX should approximate the largest
! floating value that is less than 1.
! Always set idum2 = 123456789, iv = 0, iy = 0 upon initialization
!
implicit none
!
! Declaring dummy variables
!
integer, intent(inout) :: idum
integer, intent(inout) :: iv(32)
integer, intent(inout) :: iy
integer, intent(inout) :: idum2
!
! Declaring function's type
!
real(8) :: ran2
!
! Declaring local variables and parameters
!
integer, parameter :: IM1 = 2147483563, IM2 = 2147483399, IMM1 = IM1-1, IA1 = 40014, &
IA2 = 40692, IQ1 = 53668, IQ2 = 52774, IR1 = 12211, IR2 = 3791, NDIV = 1+IMM1/32
real(8), parameter :: AM = 1.d0/IM1, EPS = 1.2d-7, RNMX = 1.d0-EPS
integer :: j, k
!
! Beginning execution
!
! Initializing
!
if (idum <= 0) then
!
idum = max(-idum,1)
idum2 = idum
do j = 32+8,1,-1
!
k = idum/IQ1
idum = IA1*(idum-k*IQ1)-k*IR1
if (idum < 0) idum = idum+IM1
if (j <= 32) iv(j) = idum
!
end do
!
iy = iv(1)
!
end if
!
! Start here when not initializing
!
k = idum/IQ1
idum = IA1*(idum-k*IQ1)-k*IR1
if (idum < 0) idum = idum+IM1
k = idum2/IQ2
idum2 = IA2*(idum2-k*IQ2)-k*IR2
if (idum2 < 0) idum2 = idum2+IM2
j = 1+iy/NDIV
iy = iv(j)-idum2
iv(j) = idum
!
if (iy < 1) iy = iy+IMM1
ran2 = min(AM*iy,RNMX)
return
!
! Ending execution and returning control
!
end function ran2
end program
qlearning