qlearning

最新推荐文章于 2024-10-01 23:05:47 发布

??.man

最新推荐文章于 2024-10-01 23:05:47 发布

阅读量1.7k

点赞数 27

文章标签：算法

本文链接：https://blog.csdn.net/weixin_45709541/article/details/139086925

版权

program qlearning

use iso_fortran_env
use omp_lib
implicit none

character(4) :: n_string
character(4) :: mu_string
character(4) :: advstate_string
character(4) :: rndshow_string
character(4) :: taupr_string
character(4) :: ah_string
character(4) :: showtwo_string
character(10) :: rmax_string
character(10) :: gridstep_string
character(10) :: seq_string
character(10) :: grid_string
character(50) :: filename
character(50) :: filename_grid
character :: bsp = char(8)

! initialize the parameters

integer, parameter :: m = 15 ! number of equally-spaced price points
integer, parameter :: maxt = 1e9 ! maximum number of periods
integer, parameter :: norm = 100000 ! number of periods for “forward” simulation after convergence

real(8), parameter :: amin = 1.0 ! minimum value price grid
real(8), parameter :: amax = 2.1 ! maximum value price grid

real(8), parameter :: a0 = 0 ! aggregate demand
real(8), parameter :: c = 1 ! marginal cost (same for all firms)
real(8), parameter :: sigma = 0.01 ! scale parameter random draws

real(8) :: a ! vertical differentiation parameter (same for all firms)

! initialize all other variables

real(8) :: alpha ! learning rate
real(8) :: beta ! experimentation rate
real(8) :: gamma ! proportion of consumers who is shown k products
real(8) :: delta ! discount factor
real(8) :: adv ! adv

real(8) :: mu ! product differentiation parameter
real(8) :: taupr ! probability of losing advantage if other DPDP conditions are satisfied
real(8) :: ah ! demand shock (set to non-zero in case of stochastic demand)

integer :: n ! number of firms
integer :: advstate ! indicator for whether dynamic “advantage” is state variable as well
integer :: rndshow ! indicator for whether buy-box is random
integer :: showtwo ! show two firms in buy-box (in case of three firms)
integer :: rmax ! repetitions

integer :: gridstep, seq, s, gridmax, stat, t, i, j, l, r, temp, cnt, ct, g, mxt, CycleLength,&
hh, tied(m), xx, state, statePrime, iPeriod, idum, iv(32), iy, idum2, numCores
real(8) :: newq, oldq, rvn, uniform_random_value, uniform_random_value1,&
uniform_random_value2, qnb, qnb2, qb, qb2, p1, p2, p3, phi, uniform_random_value3, epst,&
start_grid, finish_grid, start, finish, tmr, mmax

real(8), dimension(3,1) :: p, plast, r1
real(8), dimension(3,100) :: temp2
real(8), dimension(100,1) :: VisitedStates
real(8), dimension(1,norm) :: firmadvantage
real(8), dimension(m,1) :: pi0, pi00, pi0a
real(8), dimension(m) :: qi0, qi0a, aa
real(8), dimension(3) :: aset
integer, dimension(1) :: tp, firmadv, LastObservedAdvantage, LastObservedTP

! initialize allocatable variables

integer, dimension(😃, allocatable :: mxtr, clr, clr0, clr1, clr2
integer(kind=int64), dimension(😃, allocatable :: tr ! make sure the largest possible value is big enough!
integer, dimension(:😅, allocatable :: aPrime, LastObservedPrices, strategy, strategyPrime, strat0, lp, mantle
real(8), dimension(😃, allocatable :: psr, rvnr, csr, pisr, qsr, symmr, symmr1, pisr1, pisr0, pir1, pir0
real(8), dimension(:😅, allocatable :: PriceCycle, qt, pit, qbt, qnbt, maxValQ, p0, maxVQ0, price, quantity, profit, &
revenue, grid, pr, pir, revenue_tot, quantity_tot, profit_tot, price_sw, consumersurplus, oldPrice, newPrice
real(8), dimension(:,:😅, allocatable :: q

do i = 1, m
aa(i) = amin+(i-1)*(amax-amin)/(m-1)
end do
!2 0.25 0 0 0 0 0 1000 1 71 grid1
n=2
mu=0.25
advstate=0
taupr=0
rndshow=0
ah=0
showtwo=0
rmax=10!00
gridstep=1
seq=71
grid_string=“grid2”

s = (advstate*(n-1)+1)*m**n ! cardinality of state space

a = 2 ! baseline value of a
aset = (/ a-ah, a, a+ah /) ! possible values of a

allocate(q(s,m,n),aPrime(n,1),LastObservedPrices(n,1),qt(n,1),pit(n,1),qbt(n,1),qnbt(n,1),maxValQ(s,n),&
strategy(s,n),strategyPrime(s,n),strat0(n,1),p0(n,1),maxVQ0(n,1),lp(n,1),mantle(n,1),&
price(n,norm), quantity(n,norm), profit(n,norm), revenue(n,norm),revenue_tot(1,norm),pr(3,rmax),&
pir(3,rmax),psr(rmax),rvnr(rmax),csr(rmax),pisr(rmax),qsr(rmax),tr(rmax),mxtr(rmax),clr(rmax),&
clr0(rmax),clr1(rmax),clr2(rmax),symmr(rmax),pisr1(rmax),pisr0(rmax),pir1(rmax),pir0(rmax),symmr1(rmax),&
quantity_tot(1,norm),profit_tot(1,norm),price_sw(1,norm),consumersurplus(1,norm),oldPrice(n,1),&
newPrice(n,1))

filename_grid = trim(grid_string) // ‘.txt’
open(12, file=filename_grid)
! first determine length of file
gridmax=-1
stat=0
do while (stat==0)
gridmax=gridmax+1
read(12,*,iostat=stat)
end do
allocate(grid(5,gridmax))
close(12)

! then read the file
open(12, file=filename_grid)
read(12,*) grid

if (gridstep==0) then ! use entire grid
gridstep = gridmax
seq = 1
seq_string = ‘1’
end if

if (taupr0) then
taupr_string = ‘0’
else
taupr_string = taupr_string(3:4)
end if
if (ah0) then
ah_string = ‘0’
else
ah_string = ah_string(3:4)
end if

filename = ‘output_’ // ‘.csv’
open (10, file=filename, status=‘unknown’)
write(10,*)“grid, r, periods, time, notconv, n, advstate, rndshow, showtwo, taupr, ah, alpha, beta, gamma, mu, adv, &
delta, price1,price2, price3, pricesw, revenue, sales, cs, profits, cyclelength, prc1c, prc2c, prcsymm1c, &
indprof1c, highfirm1c, cyclelengthproper, indprof0c, highfirm0c”

! grid loop
do g = (seq-1)gridstep+1,seqgridstep;

 write(*, fmt="(1x,a,i0)", advance="yes")"Grid = ", g

 alpha = grid(1,g)
 beta = grid(2,g)
 gamma = grid(3,g)
 adv = grid(4,g)
 delta = grid(5,g)

 ! determine initial Q-function
 if (n==2) then
    do j = 1, m
       do i = 1, m
          p1 = aa(j)
          p2 = aa(i)
          if (p1==p2) then
             phi=0.5
          else if (p1<p2) then
             phi=1.0
          else
             phi=0.0
          end if
          qb = exp((a-p1)/mu)/(exp((a-p1)/mu)+exp(a0/mu))
          qnb = exp((a-p1)/mu)/(exp((a-p1)/mu)+exp((a-p2)/mu)+exp(a0/mu))

          pi0(i,1) = (p1-c)*(phi*gamma*qb+(1-gamma)*qnb)
       end do
       qi0(j) = (sum(pi0)/m)/(1-delta)
    end do
 else if ((n==3) .and. (showtwo==1)) then ! n==3, two are shown in buy-box
    do j = 1, m
       do i = 1, m
          do l = 1, m
             p1 = aa(j)
             p2 = aa(i)
             p3 = aa(l)
             ! note that phi includes qb
             if ((p1==p2) .and. (p1==p3)) then ! 0.67 probability of getting the buy-box (with firm 2 or 3)
                phi = (1./3.)*exp((a-p1)/mu)/(exp((a-p1)/mu)+exp((a-p2)/mu)+exp(a0/mu))+ &
                      (1./3.)*exp((a-p1)/mu)/(exp((a-p1)/mu)+exp((a-p3)/mu)+exp(a0/mu))
             else if ((p1==p2) .and. (p1>p3)) then ! 0.5 probability of getting the buy-box (with firm 3)
                phi = (1./2.)*exp((a-p1)/mu)/(exp((a-p1)/mu)+exp((a-p3)/mu)+exp(a0/mu))
             else if ((p1==p2) .and. (p1<p3)) then ! 1.0 probability of getting the buy-box (with firm 2)
                phi = exp((a-p1)/mu)/(exp((a-p1)/mu)+exp((a-p2)/mu)+exp(a0/mu))
             else if ((p1==p3) .and. (p1>p2)) then ! 0.5 probability of getting the buy-box (with firm 2)
                phi = (1./2.)*exp((a-p1)/mu)/(exp((a-p1)/mu)+exp((a-p2)/mu)+exp(a0/mu))
             else if ((p1==p3) .and. (p1<p2)) then ! 1.0 probability of getting the buy-box (with firm 3)
                phi = exp((a-p1)/mu)/(exp((a-p1)/mu)+exp((a-p3)/mu)+exp(a0/mu))
             else if ((p1<p2) .and. (p2<p3)) then ! 1.0 probability of getting the buy-box (with firm 2)
                phi = exp((a-p1)/mu)/(exp((a-p1)/mu)+exp((a-p2)/mu)+exp(a0/mu))
             else if ((p1<p3) .and. (p3<p2)) then ! 1.0 probability of getting the buy-box (with firm 3)
                phi = exp((a-p1)/mu)/(exp((a-p1)/mu)+exp((a-p3)/mu)+exp(a0/mu))
             else if ((p1<p2) .and. (p2==p3)) then ! 1.0 probability of getting the buy-box (with firm 2 or 3)
                phi =(1./2.)*exp((a-p1)/mu)/(exp((a-p1)/mu)+exp((a-p2)/mu)+exp(a0/mu))+ &
                     (1./2.)*exp((a-p1)/mu)/(exp((a-p1)/mu)+exp((a-p3)/mu)+exp(a0/mu))
             else if ((p1<p2) .and. (p3<p1)) then ! 1.0 probability of getting the buy-box (with firm 3)
                phi = exp((a-p1)/mu)/(exp((a-p1)/mu)+exp((a-p3)/mu)+exp(a0/mu))
             else if ((p1<p3) .and. (p2<p1)) then ! 1.0 probability of getting the buy-box (with firm 2)
                phi = exp((a-p1)/mu)/(exp((a-p1)/mu)+exp((a-p2)/mu)+exp(a0/mu))
             else if ((p1>p2) .and. (p1>p3)) then ! 0.0 probability of getting the buy-box
                phi = 0.0
             end if

             qnb = exp((a-p1)/mu)/(exp((a-p1)/mu)+exp((a-p2)/mu)+exp((a-p3)/mu)+exp(a0/mu))

             pi0(l,1) = (p1-c)*(phi*gamma+(1-gamma)*qnb)
          end do
          pi00(i,1)= sum(pi0)/m
       end do
       qi0(j) = (sum(pi00)/m)/(1-delta)
    end do
 else ! n==3
    do j = 1, m
       do i = 1, m
          do l = 1, m
             p1 = aa(j)
             p2 = aa(i)
             p3 = aa(l)
             if (p1==p2 .and. p1==p3) then
                phi = 1./3.
             else if (p1==p2 .and. p1<p3) then
                phi = 0.5
             else if (p1==p3 .and. p1<p2) then
                phi = 0.5
             else if (p1<p2 .and. p1<p3) then
                phi = 1.0
             else
                phi = 0.0
             end if

             qb = exp((a-p1)/mu)/(exp((a-p1)/mu)+exp(a0/mu))
             qnb = exp((a-p1)/mu)/(exp((a-p1)/mu)+exp((a-p2)/mu)+exp((a-p3)/mu)+exp(a0/mu))

             pi0(l,1) = (p1-c)*(phi*gamma*qb+(1-gamma)*qnb)
          end do
          pi00(i,1)= sum(pi0)/m
       end do
       qi0(j) = (sum(pi00)/m)/(1-delta)
    end do
 end if

 do j = 1, n
    maxVQ0(j,1) = maxval(qi0)
    strat0(j,1) = maxloc(qi0,dim=1)
 end do

 call cpu_time(start)

 do r = 1, rmax
    ! reseeds the random number generator using ten times negative of grid number
    idum2 = 123456789
    iv=0
    iy=0
    idum=-10*r

    do j = 1, n
       q(1:s,1:m,j)=spread(qi0, 1, s)
       maxValQ(1:s,j) = spread(maxVQ0(j,1),1,s)
       strategyPrime(1:s,j) = spread(strat0(j,1),1,s)
    end do
    strategy = strategyPrime

    t = 1
    cnt = 0
    qt=0
    pit=0
    p=0
    plast=0
    lp=0
    mxt=0
    mantle=0 ! do this before main loop
    firmadv=0 ! do this before main loop

    ! determine initial state
    uniform_random_value = ran2(idum, iv, iy, idum2)
    state = int(s * uniform_random_value + 1)

    epst=1 ! initialize epsilon

    do while (t <= maxt .and. cnt < norm)
       if (t==maxt) then
          print*, "maximum number of periods reached"
          mxt=1
       end if

       if (ah>0) then ! determine stochastic demand realization
          uniform_random_value = ran2(idum, iv, iy, idum2)
          a = aset(int(3 * uniform_random_value + 1))
       else
          a = aset(2)
       end if

       ! determine actions firms
       do j = 1, n
          uniform_random_value = ran2(idum, iv, iy, idum2)
          if (uniform_random_value <epst) then       ! exploration mode
             uniform_random_value = ran2(idum, iv, iy, idum2)
             aPrime(j,1) = int(m * uniform_random_value + 1)
          else                                       ! exploitation model
             aPrime(j,1) = strategyPrime(state,j)    ! take best action ("greedy")
          end if
       end do
       epst = epst*exp(-beta)

       ! get prices using action space
       do j = 1, n
          p(j,1) = aa(aPrime(j,1))
       end do

       ! random draw to determine if advantage is exogenously taken away or not in case of DPDP (used in case of "richer form" of DPDP)
       uniform_random_value = ran2(idum, iv, iy, idum2)

       ! dynamic price-directed prominence
       if (adv > 0) then ! if PDP is in effect, skip all this
          mantle=0
          firmadv=0
          if (t>1) then
             if ((n==3) .and. (showtwo==1)) then
                mantle=1 ! default is now that  you get the mantle, note that taupr option not enabled for showtwo==1
                ! tp is now firm who doesn't get the buy-box!
                do j = 1,n
                   if (p(j,1)>plast(j,1)) then ! if charging more than in last period, you do not get the mantle
                      mantle(j,1)=0
                   end if
                end do
                mantle(tp(1),1)=0 ! firm that wasn't displayed previously firm never gets the mantle
             else
                if ((p(tp(1),1) <= plast(tp(1),1) .and. uniform_random_value <= 1-taupr)) then
                   mantle(tp(1),1)=1
                end if
             end if
          end if
       end if

       ! determine who got the buy box
       uniform_random_value1 = ran2(idum, iv, iy, idum2)
       uniform_random_value2 = ran2(idum, iv, iy, idum2)

       r1(1,1) = sigma*uniform_random_value1*(amax-amin)/(m-1)
       r1(2,1) = sigma*uniform_random_value2*(amax-amin)/(m-1)

       if (n==3) then
          uniform_random_value3 = ran2(idum, iv, iy, idum2)

          r1(3,1) = sigma*uniform_random_value3*(amax-amin)/(m-1)
       end if

       if (t==1) then ! no firm can have the advantage in the first period so mantle will be 0
          if ((n==3) .and. (showtwo==1)) then
             tp = minloc(-p(1:n,1)+r1(1:n,1))
          else
             tp = minloc(p(1:n,1)-r1(1:n,1)) ! rv-adjusted low price firm gets displayed this period
          end if
       else if ((n==3) .and. (showtwo==1)) then
          tp = minloc(-p(1:n,1)+r1(1:n,1)+adv*mantle(1:n,1))
       else if ((mantle(tp(1),1)==1) .and. (p(tp(1),1) > minval(p(1:n,1)+adv))) then
          tp = minloc(p(1:n,1)-r1(1:n,1)) ! firm that has the mantle got strictly ADV-undercut and so choose the rv-adjusted low price firm to be displayed
       else if (mantle(tp(1),1)==0) then ! mantle==0 always if adv=0, from code above
          tp = minloc(p(1:n,1)-r1(1:n,1)) ! the displayed firm from previous period lost the mantle, so just show the rv-adjusted low-price firm
          ! if we reach this and didn't encounter the relevant case, then the displayed firm didn't change so tp(1) doesn't change either
       end if

       if(rndshow==1) then
          tp=minloc(-r1(1:n,1))
       end if

       if ((n==3) .and. (showtwo==1)) then
          lp(1:n,1)=1
          lp(tp(1),1)=0
       else
          lp(1:n,1)=0
          lp(tp(1),1)=1
       end if

       ! determine new state s(t+1)
       firmadv=advstate*tp(1)
       ! note: in case of n=3 and showtwo=1, this will be the firm that is *not* displayed
       temp = aPrime(1,1)
       do j = 2, n
          temp = temp + (aPrime(j,1) - 1)*(m**(j-1))
       end do
       if (advstate>0) then
          statePrime = (firmadv(1)-1)*(m**n)+temp !
       else
          statePrime = temp
       end if

       ! determine market shares and profits
       do j = 1, n
          qbt(j,1) = exp((a-p(j,1))/mu)/(sum(lp(1:n,1)*exp((a-p(1:n,1))/mu))+exp(a0/mu))
          qnbt(j,1) = exp((a-p(j,1))/mu)/(sum(exp((a-p(1:n,1))/mu))+exp(a0/mu))
          qt(j,1) = lp(j,1)*gamma*qbt(j,1)+(1-gamma)*qnbt(j,1)
          pit(j,1) = (p(j,1)-c)*qt(j,1)
       end do

       ! update Q-function
       do j = 1, n
          ! Calvano et al notation
          oldq = q(state,aPrime(j,1),j)
          newq = oldq + alpha*(pit(j,1)+delta*maxValQ(statePrime,j)-oldq)
          if (t==1 .and. advstate==1) then
             ! -> initial state in which no one has advantage is never reached for t>1 and so is not part of q-matrix, so updating not needed
             newq = oldq
          end if
          q(state,aPrime(j,1),j)=newq

          if (newq > maxValQ(state,j)) then
             maxValQ(state,j) = newq ! update maxValQ to higher value
             if (strategyPrime(state,j) /= aPrime(j,1)) then ! only if this corresponds to new action, update strategyPrime
                strategyPrime(state,j) = aPrime(j,1);
             end if
          end if

          if ((newq < maxValQ(state,j)) .and. (strategyPrime(state,j)==aPrime(j,1))) then
             ! updating the cell with the maxValQ to lower value, so then best action might change
             ! deal with ties randomly
             mmax = maxval(q(state,1:m,j), dim=1) ! maximum value
             tied = 0
             hh = 0
             do i = 1, m
                if (abs(q(state,i,j)-mmax) <= epsilon(q(state,1:m,j))) then
                   hh = hh+1
                   tied(hh) = i
                end if
             end do
             if (hh > 1) then
                uniform_random_value = ran2(idum, iv, iy, idum2)
                xx = int(hh * uniform_random_value + 1)

                strategyPrime(state,j) = tied(xx)
             else ! no ties
                strategyPrime(state,j) = tied(1)
             end if

             maxValQ(state,j) = q(state,strategyPrime(state,j),j);

          end if

       end do

       ! determine convergence
       if (all(strategyPrime(state,1:n)==strategy(state,1:n))) then
          cnt = cnt+1
       else
          cnt = 0
       end if

       plast(1:n,1)=p(1:n,1)

       strategy(state,1:n)=strategyPrime(state,1:n)
       state=statePrime

       t = t + 1
    end do

    LastObservedPrices = aPrime
    LastObservedAdvantage = firmadv
    LastObservedTP = tp(1) ! not the same as LastObservedAdvantage, which equals zero outside of smarterAI

    ! compute cycle length
    ! runs *twice* (using last actions first run as starting point to eliminate possible experimentation)
    do i = 1, 2
       VisitedStates = 0

       iPeriod = 1
       temp2 = 0

       do
          ! determine current state from actions and buybox outcomes, starting with ones from end of Q-learning loop
          temp = aPrime(1,1)
          do j = 2, n
             temp = temp + (aPrime(j,1) - 1)*(m**(j-1))
          end do
          if (advstate>0) then
             temp = (firmadv(1)-1)*(m**n)+temp
          end if

          if ((iPeriod>1) .and. (any(VisitedStates(1:iPeriod-1,1)==temp))) then
             exit
          end if

          do j = 1, n
             temp2(j,iPeriod) = aa(aPrime(j,1))
             oldPrice(j,1) = aa(aPrime(j,1))
          end do

          VisitedStates(iPeriod,1) = temp

          aPrime = reshape(strategy(temp,1:n),(/n,1/))

          if (advstate > 0) then ! no need to compute buy-box winners if not in smarterAI (because identity of winner is not in state space)

             do j = 1, n
                newPrice(j,1) = aa(aPrime(j,1))
             end do

             ! random draw to determine if advantage is realized in case of DPDP (random tau)
             uniform_random_value = ran2(idum, iv, iy, idum2)

             mantle=0
             firmadv=0
             if (adv > 0) then
                mantle=0
                firmadv=0
                if ((n==3) .and. (showtwo==1)) then ! show two firms
                   mantle=1 ! default is now  you get the mantle, not enabled for taupr option
                   ! tp is now firm who doesn't get the buy-box!
                   do j = 1,n
                      if (newPrice(j,1)>oldPrice(j,1)) then ! if charging more than in last period, you do not get the mantle
                         mantle(j,1)=0
                      end if
                   end do
                   mantle(tp(1),1)=0 ! highest priced firm never gets the mantle
                else
                   if ((newPrice(tp(1),1) <= oldPrice(tp(1),1) .and. uniform_random_value <= 1-taupr)) then
                      mantle(tp(1),1)=1
                   end if
                end if
             end if

             ! determine who got the buy box
             uniform_random_value1 = ran2(idum, iv, iy, idum2)
             uniform_random_value2 = ran2(idum, iv, iy, idum2)

             r1(1,1) = sigma*uniform_random_value1*(amax-amin)/(m-1)
             r1(2,1) = sigma*uniform_random_value2*(amax-amin)/(m-1)

             if (n==3) then
                uniform_random_value3 = ran2(idum, iv, iy, idum2)

                r1(3,1) = sigma*uniform_random_value3*(amax-amin)/(m-1)
             end if


             if ((n==3) .and. (showtwo==1)) then
                tp = minloc(-newPrice(1:n,1)+r1(1:n,1)+adv*mantle(1:n,1))
             else if ((mantle(tp(1),1)==1) .and. (newPrice(tp(1),1) > minval(newPrice(1:n,1)+adv))) then
                tp = minloc(newPrice(1:n,1)-r1(1:n,1)) ! firm that has the mantle got strictly ADV-undercut and so choose the rv-adjusted low price firm to be displayed
             else if (mantle(tp(1),1)==0) then
                tp = minloc(newPrice(1:n,1)-r1(1:n,1)) ! the displayed firm from previous period lost the mantle, so just show the rv-adjusted low-price firm
             end if ! if we reach this and didn't encounter the relevant case, then the displayed firm didn't change so tp(1) doesn't change either

             if(rndshow==1) then
                tp = minloc(-r1(1:n,1))
             end if

             firmadv=advstate*tp(1)

          end if

          iPeriod = iPeriod+1

       end do
    end do

    CycleLength = iPeriod-1

    allocate(PriceCycle(n,CycleLength)) ! size of PriceCycle depends on cycle length
    PriceCycle(1:n,1:CycleLength)=temp2(1:n,1:CycleLength)

    ! forward simulate 100k periods using converged prices to determine profits, revenue, cs, etc.
    ct = 1
    do while (ct<norm+1)

       if (ct==norm+1) exit

       if (ah>0) then ! determine stochastic demand realization
          uniform_random_value = ran2(idum, iv, iy, idum2)
          a = aset(int(3 * uniform_random_value + 1))
       else
          a = aset(2)
       end if

       ! ct==1 case
       if (ct==1) then
          aPrime = LastObservedPrices
          do j = 1, n
             price(j,ct) = aa(aPrime(j,1))
          end do

          tp=LastObservedTP(1) ! set displayed firm to the last one displayed in Q-loop

          if ((n==3) .and. (showtwo==1)) then !
             lp(1:n,1)=1
             lp(tp(1),1)=0
          else
             lp(1:n,1)=0
             lp(tp(1),1)=1
          end if

          ! determine current state firm and new actions
          firmadvantage(1,ct) = advstate*tp(1)

          temp = aPrime(1,1)
          do j = 2, n
             temp = temp + (aPrime(j,1) - 1)*(m**(j-1))
          end do
          if (firmadvantage(1,ct)>0) then
             temp = (firmadvantage(1,ct)-1)*(m**n)+temp
          end if

          do j = 1, n
             aPrime(j,1) = strategy(temp,j) ! pull new actions
          end do

       else ! ct>1 case

          ! get prices using action space, using actions either pulled from end of (ct==1) above or at end of current case
          do j = 1, n
             price(j,ct) = aa(aPrime(j,1))
          end do

          mantle = 0
          firmadvantage(1,ct) = 0

          ! random draw to determine if advantage  is lost in case of DPDP (random tau)
          uniform_random_value = ran2(idum, iv, iy, idum2)

          if (adv > 0) then
             mantle=0
             firmadv=0
             if ((n==3) .and. (showtwo==1)) then ! show two firms
                mantle=1 ! default is now  you get the mantle, note that taupr option not enabled for showtwo==1
                ! tp is now firm who doesn't get the buy-box!
                do j = 1,n
                   if (price(j,ct)>price(j,ct-1)) then ! if charging more than in last period, you do not get the mantle
                      mantle(j,1)=0
                   end if
                end do
                mantle(tp(1),1)=0 ! firm that wasn't displayed previously never gets the mantle
             else
                if ((price(tp(1),ct) <= price(tp(1),ct-1) .and. uniform_random_value <= 1-taupr)) then
                   mantle(tp(1),1)=1
                end if
             end if
          end if

          ! determine who got the buy box
          uniform_random_value1 = ran2(idum, iv, iy, idum2)
          uniform_random_value2 = ran2(idum, iv, iy, idum2)

          r1(1,1) = sigma*uniform_random_value1*(amax-amin)/(m-1)
          r1(2,1) = sigma*uniform_random_value2*(amax-amin)/(m-1)

          if (n==3) then
             uniform_random_value3 = ran2(idum, iv, iy, idum2)

             r1(3,1) = sigma*uniform_random_value3*(amax-amin)/(m-1)
          end if

          if ((n==3) .and. (showtwo==1)) then
             tp = minloc(-price(1:n,ct)+r1(1:n,1)+adv*mantle(1:n,1))
          else if ((mantle(tp(1),1)==1) .and. (price(tp(1),ct) > minval(price(1:n,ct)+adv))) then
             tp = minloc(price(1:n,ct)-r1(1:n,1)) ! firm that has the mantle got strictly ADV-undercut and so choose the rv-adjusted low price firm to be displayed
          else if (mantle(tp(1),1)==0) then
             tp = minloc(price(1:n,ct)-r1(1:n,1)) ! the displayed firm from previous period lost the mantle, so just show the rv-adjusted low-price firm
	 ! if we reach this and didn't encounter the relevant case, then the displayed firm didn't change so tp(1) doesn't change either
          end if

          if(rndshow==1) then
             tp=minloc(-r1(1:n,1))
          end if

          if ((n==3) .and. (showtwo==1)) then !
             lp(1:n,1)=1
             lp(tp(1),1)=0
          else
             lp(1:n,1)=0
             lp(tp(1),1)=1
          end if

          ! determine current state firm and new actions
          firmadvantage(1,ct) = advstate*tp(1)
          temp = aPrime(1,1)
          do j = 2, n
             temp = temp + (aPrime(j,1) - 1)*(m**(j-1))
          end do
          if (firmadvantage(1,ct)>0) then
             temp = (firmadvantage(1,ct)-1)*(m**n)+temp
          end if

          do j = 1, n
             aPrime(j,1) = strategy(temp,j) ! pull new actions
          end do

       end if

       ! determine market shares and profits using previous actions (not the ones just pulled at end of above if-then)
       do j = 1, n
          qbt(j,1) = exp((a-price(j,ct))/mu)/(sum(lp(1:n,1)*exp((a-price(1:n,ct))/mu))+exp(a0/mu))
          qnbt(j,1) = exp((a-price(j,ct))/mu)/(sum(exp((a-price(1:n,ct))/mu))+exp(a0/mu))

          quantity(j,ct) = lp(j,1)*gamma*qbt(j,1)+(1-gamma)*qnbt(j,1)
          revenue(j,ct) = price(j,ct)*quantity(j,ct)
          profit(j,ct) = (price(j,ct)-c)*quantity(j,ct)
       end do
       revenue_tot(1,ct) =  sum(price(1:n,ct)*quantity(1:n,ct))
       quantity_tot(1,ct) = sum(quantity(1:n,ct))
       profit_tot(1,ct) = sum(profit(1:n,ct))
       price_sw(1,ct) = sum(revenue(1:n,ct)/quantity_tot(1,ct))
       consumersurplus(1,ct) = gamma*mu*log(sum(lp(1:n,1)*exp((a-price(1:n,ct))/mu))+exp(a0/mu))&
            + (1-gamma)*mu*log(sum(exp((a-price(1:n,ct))/mu))+exp(a0/mu))

       ct=ct+1 ! bump counter up
    end do

    pr(1,r)=sum(price(1,1:norm))/norm
    pr(2,r)=sum(price(2,1:norm))/norm
    if (n==3) then
       pr(3,r)=sum(price(3,1:norm))/norm
    end if
    psr(r)=sum(price_sw(1,1:norm))/norm
    rvnr(r)=sum(revenue_tot(1,1:norm))/norm
    csr(r)=sum(consumersurplus(1,1:norm))/norm
    pir(1,r)=sum(profit(1,1:norm))/norm
    pir(2,r)=sum(profit(2,1:norm))/norm
    if (n==3) then
       pir(3,r)=sum(profit(3,1:norm))/norm
    end if
    pisr(r)=sum(profit_tot(1,1:norm))/norm
    qsr(r)=sum(quantity_tot(1,1:norm))/norm

    tr(r)=t
    clr(r)=CycleLength
    clr0(r)=1        ! other cycles
    clr1(r)=0
    clr2(r)=0
    pisr1(r)=0       ! industry profit 1-price cycles
    pisr0(r)=pisr(r) ! industry profit other cycles
    pir1(r)=0
    ! high profit
    pir0(r)=pir(1,r)/pisr(r) ! only correct for n=2
    if (pir(2,r)>pir(1,r)) then
       pir0(r)=pir(2,r)/pisr(r)
    end if

    if (CycleLength==1) then
       clr0(r)=0
       clr1(r)=1
       pisr0(r)=0
       pisr1(r)=pisr(r)
       ! high profit
       pir1(r)=pir(1,r)/pisr(r) ! only correct for n=2
       if (pir(2,r)>pir(1,r)) then
          pir1(r)=pir(2,r)/pisr(r)
       end if
       pir0(r)=0
    else if (CycleLength==2) then ! part of other cycles
       clr2(r)=1
    end if

    symmr(r)=0; symmr1(r)=0
    if (n==2) then
       if (all(PriceCycle(1,1:CycleLength)==PriceCycle(2,1:CycleLength))) then
          symmr(r)=1
          if (CycleLength==1) then
             symmr1(r)=1
          end if
       end if
    end if

    mxtr(r)=mxt
    deallocate(PriceCycle)

 end do

 call cpu_time(finish)
 tmr = finish-start

 print *
 write(*, fmt="(1x,a,i14)", advance="yes")"Average number of repetitions:    ",sum(tr(1:rmax))/rmax
 print*, "Sessions not converged:             ",sum(mxtr(1:rmax))
 print*, "Number of firms:                    ",n
 print*, "Advantage part of state space:      ",advstate
 print*, "Randomly show products:             ",rndshow
 print*, "Show two firms in buy-box:          ",showtwo
 print '(" Tau probability:                            ",f4.2)',taupr
 print '(" Demand shock:                               ",f4.2)',ah
 print '(" Alpha:                                    ",f6.4)',alpha
 print '(" Beta:                                 ",f10.8)',beta
 print '(" Gamma:                                      ",f4.2)',gamma
 print '(" Mu:                                         ",f4.2)',mu
 print '(" ADV:                                        ",f4.2)',adv
 print '(" Delta:                                     ",f5.3)',delta
 write(*, fmt="(1x,a,i0)", advance="no") "It took about ",  floor(tmr/60)
 write(*, fmt="(1x,a,i0)", advance="no") "minutes and ",  nint(tmr-floor(tmr/60)*60)
 write(*,*) "seconds to finish"
 print*, "------------------------------------------------"
 print '(" Average price firm 1:                 ",f10.8)',sum(pr(1,1:rmax))/rmax
 print '(" Average price firm 2:                 ",f10.8)',sum(pr(2,1:rmax))/rmax
 if (n==3) then
    print '(" Average price firm 3:                 ",f10.8)',sum(pr(3,1:rmax))/rmax
 end if
 print '(" Average time to convergence (sec):",f14.8)',tmr/rmax
 print '(" Sales-weighted average price:         ",f10.8)',sum(psr(1:rmax))/rmax
 print '(" Total revenue:                        ",f10.8)',sum(rvnr(1:rmax))/rmax
 print '(" Total sales:                          ",f10.8)',sum(qsr(1:rmax))/rmax
 print '(" Consumer surplus:                     ",f10.8)',sum(csr(1:rmax))/rmax
 print '(" Total profits:                        ",f10.8)',sum(pisr(1:rmax))/rmax
 print*, "------------------------------------------------"
 print*, "CYCLES"
 print*, "------------------------------------------------"
 print '(" Average cycle length:                 ",f9.5)',sum(real(clr(1:rmax)))/rmax
 print '(" Percentage cycles of length 1:        ",f9.5)',100*sum(real(clr1(1:rmax)))/rmax
 print '(" Percentage cycles of length 2:        ",f9.5)',100*sum(real(clr2(1:rmax)))/rmax
 print*, "------------------------------------------------"
 print*, "1-price cycles"
 print*, "------------------------------------------------"
 print '(" Percentage symmetric:                 ",f9.5)',100*sum(symmr1(1:rmax))/sum(real(clr1(1:rmax)))
 print '(" Industry profits:                     ",f9.5)',sum(pisr1(1:rmax))/sum(real(clr1(1:rmax)))
 print '(" High-profit firm''s share:             ",f9.5)',100*sum(pir1(1:rmax))/sum(real(clr1(1:rmax)))
 print*, "------------------------------------------------"
 print*, "Other cycles"
 print*, "------------------------------------------------"
 print '(" Avg cycle length (proper cycles):     ",f9.5)',sum(real(clr0(1:rmax))*real(clr(1:rmax)))/sum(real(clr0(1:rmax)))
 print '(" Industry profits:                     ",f9.5)',sum(pisr0(1:rmax))/sum(real(clr0(1:rmax)))
 print '(" High-profit firm''s share:             ",f9.5)',100*sum(pir0(1:rmax))/sum(real(clr0(1:rmax)))
 print*, "------------------------------------------------"

 write(10, fmt="(1x,a,i0)", advance="no")"", g
 write(10, fmt="(1x,a,i0)", advance="no")", ", rmax
 write(10, fmt="(1x,a,i14)", advance="no")", ", sum(tr(1:rmax))/rmax
 write(10, fmt="(1x,a,f14.8)", advance="no")", ", tmr/rmax
 write(10, fmt="(1x,a,i0)", advance="no")", ", sum(mxtr(1:rmax))
 write(10, fmt="(1x,a,i0)", advance="no")", ", n
 write(10, fmt="(1x,a,i0)", advance="no")", ", advstate
 write(10, fmt="(1x,a,i0)", advance="no")", ", rndshow
 write(10, fmt="(1x,a,i0)", advance="no")", ", showtwo
 write(10, fmt="(1x,a,f4.2)", advance="no")", ", taupr
 write(10, fmt="(1x,a,f4.2)", advance="no")", ", ah
 write(10, fmt="(1x,a,f6.4)", advance="no")", ", alpha
 write(10, fmt="(1x,a,f10.8)", advance="no")", ", beta
 write(10, fmt="(1x,a,f4.2)", advance="no")", ", gamma
 write(10, fmt="(1x,a,f4.2)", advance="no")", ", mu
 write(10, fmt="(1x,a,f4.2)", advance="no")", ", adv
 write(10, fmt="(1x,a,f5.3)", advance="no")", ", delta
 write(10, fmt="(1x,a,f10.8)", advance="no")", ", sum(pr(1,1:rmax))/rmax
 write(10, fmt="(1x,a,f10.8)", advance="no")", ", sum(pr(2,1:rmax))/rmax
 write(10, fmt="(1x,a,f10.8)", advance="no")", ", sum(pr(3,1:rmax))/rmax
 write(10, fmt="(1x,a,f10.8)", advance="no")", ", sum(psr(1:rmax))/rmax
 write(10, fmt="(1x,a,f10.8)", advance="no")", ", sum(rvnr(1:rmax))/rmax
 write(10, fmt="(1x,a,f10.8)", advance="no")", ", sum(qsr(1:rmax))/rmax
 write(10, fmt="(1x,a,f10.8)", advance="no")", ", sum(csr(1:rmax))/rmax
 write(10, fmt="(1x,a,f10.8)", advance="no")", ", sum(pisr(1:rmax))/rmax
 write(10, fmt="(1x,a,f9.5)", advance="no")", ", sum(real(clr(1:rmax)))/rmax
 write(10, fmt="(1x,a,f9.5)", advance="no")", ", 100*sum(real(clr1(1:rmax)))/rmax
 write(10, fmt="(1x,a,f9.5)", advance="no")", ", 100*sum(real(clr2(1:rmax)))/rmax
 write(10, fmt="(1x,a,f9.5)", advance="no")", ", 100*sum(symmr1(1:rmax))/sum(real(clr1(1:rmax)))
 write(10, fmt="(1x,a,f9.5)", advance="no")", ", sum(pisr1(1:rmax))/sum(real(clr1(1:rmax)))
 write(10, fmt="(1x,a,f9.5)", advance="no")", ", 100*sum(pir1(1:rmax))/sum(real(clr1(1:rmax)))
 write(10, fmt="(1x,a,f9.5)", advance="no")", ", sum(real(clr0(1:rmax))*real(clr(1:rmax)))/sum(real(clr0(1:rmax)))
 write(10, fmt="(1x,a,f9.5)", advance="no")", ", sum(pisr0(1:rmax))/sum(real(clr0(1:rmax)))
 write(10, fmt="(1x,a,f9.5)", advance="yes")", ", 100*sum(pir0(1:rmax))/sum(real(clr0(1:rmax)))

end do

close(10)

contains
function ran2 ( idum, iv, iy, idum2 )
! This function is taken from the replication code of Calvano, Calzolari, Denicolo, and
! Pastorello (AER, 2020), and itself is a modification of the RAN2 function in Press,
! Teukolsky, Vetterling, and Flannery (1992).

! Thread safe function that generates U(0,1) random deviates (to be used with OpenMP)
! See function RAN2 on p. 272 of NRF77
! Long period (> 2 × 10^18) random number generator of L’Ecuyer with Bays-Durham shuffle
! and added safeguards. Returns a uniform random deviate between 0.0 and 1.0 (exclusive
! of the endpoint values). Call with idum a negative integer to initialize; thereafter, do not
! alter idum between successive deviates in a sequence. RNMX should approximate the largest
! floating value that is less than 1.
! Always set idum2 = 123456789, iv = 0, iy = 0 upon initialization
!
implicit none
!
! Declaring dummy variables
!
integer, intent(inout) :: idum
integer, intent(inout) :: iv(32)
integer, intent(inout) :: iy
integer, intent(inout) :: idum2
!
! Declaring function's type
!
real(8) :: ran2
!
! Declaring local variables and parameters
!
integer, parameter :: IM1 = 2147483563, IM2 = 2147483399, IMM1 = IM1-1, IA1 = 40014, &
    IA2 = 40692, IQ1 = 53668, IQ2 = 52774, IR1 = 12211, IR2 = 3791, NDIV = 1+IMM1/32
real(8), parameter :: AM = 1.d0/IM1, EPS = 1.2d-7, RNMX = 1.d0-EPS
integer :: j, k

!
! Beginning execution
!
! Initializing
!
if (idum <= 0) then
    !
    idum = max(-idum,1)
    idum2 = idum
    do j = 32+8,1,-1
        !
        k = idum/IQ1
        idum = IA1*(idum-k*IQ1)-k*IR1
        if (idum < 0) idum = idum+IM1
        if (j <= 32) iv(j) = idum
        !
    end do
    !
    iy = iv(1)
    !
end if
!
! Start here when not initializing
!
k = idum/IQ1
idum = IA1*(idum-k*IQ1)-k*IR1
if (idum < 0) idum = idum+IM1
k = idum2/IQ2
idum2 = IA2*(idum2-k*IQ2)-k*IR2
if (idum2 < 0) idum2 = idum2+IM2
j = 1+iy/NDIV
iy = iv(j)-idum2
iv(j) = idum
!
if (iy < 1) iy = iy+IMM1
ran2 = min(AM*iy,RNMX)
return
!
! Ending execution and returning control
!

end function ran2

end program

qlearning