使用ppo强化学习算法预测双色球彩票程序

注释:本程序仅用于学习参考,切勿用于其他用途。
使用百度parl框架搭建,在百度ai studio上成功运行。

#!/usr/bin/env python
# coding: utf-8
from parl.core.fluid.layers import layers
from parl.core.fluid import layers
import math
from gym import spaces, logger
from gym.utils import seeding
from paddle import fluid
from parl.algorithms.fluid import PPO  # 直接从parl库中导入DDQN算法,无需自己重写算法
import paddle
paddle.enable_static()
import gym
import numpy as np
import parl
from parl.utils import logger
from parl.utils.rl_utils import calc_gae, calc_discount_sum_rewards
import shutil
# 在执行四轴飞行器悬浮任务时,没有将4个电机的输出值统一的话训练不会收敛,改用CartPole
# 四轴飞行器悬浮任务改用终端执行训练
'''
envs='Quadrotor'
task='hovering_control'
'''

gamma = 0.9
lam = 0.98
kl_targ = 0.3
episodes_per_batch = 1000
loss_type = 'CLIP'
train_total_steps = 1e10
test_every_steps = 1e5


class RouletteEnv(gym.Env):
    """Simple roulette environment
    The roulette wheel has 37 spots. If the bet is 0 and a 0 comes up,
    you win a reward of 35. If the parity of your bet matches the parity
    of the spin, you win 1. Otherwise you receive a reward of -1.
    The long run reward for playing 0 should be -1/37 for any state
    The last action (38) stops the rollout for a return of 0 (walking away)
    """

    def __init__(self):
        # print("初始化环境")
        self.n = [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
                  16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28,
                  29, 30, 31, 32, 33]   # 设置34种动作
        self.action_space = len(self.n)
        self.seed()
        self.num = 0
        f = open('res.csv', 'r')
        data = f.readlines()
        f.close()
        allres = []
        for i in data:
            tmpc = i.strip().split(',')
            tmp = [int(i) for i in tmpc[1:]]
            allres.append(tmp)
        self.data = allres[:]
        self.nextNumber = self.data[1]
        self.number = self.data[0]
        self.select = []
    def seed(self, seed=None):
        self.np_random, seed = seeding.np_random(seed)
        return [seed]

    def step(self, action):
        for i in range(7):
            action[i]=int(action[i])
        red6 = action[:6]
        blue = action[-1]
        action = sorted(red6)
 
        action.append(blue)
        if (len(set(action))<7):
            return np.array(self.number), -2000, True, {}
        if(action[-1]>16):
            return np.array(self.number), -3000, True, {}
        # print(action)
        for i in range(7):
            self.select.append(int(action[i]))
        sz = self.rule(self.nextNumber,self.select)
        self.select = []
        reward = sz * 1000
        isOver = True
        # if reward > 0:
        #     isOver = True
        # else:
        #     isOver = False
        return np.array(self.number), reward, isOver, {}

    def reset(self):
        self.num += 1
        if (self.num + 1) >= len(self.data):
            print("结束一轮")
            self.__init__()
            self.num = 0
        self.number = self.data[self.num]
        self.nextNumber = self.data[self.num + 1]
        return np.array(self.number)

    def rule(self, z, select):
        allcount = -6
        d = z
        red = select[:6]
        blue = select[-1]
        zrnum = 0  # 中红球的数量
        zblue = False
        for r in red:
            if r in d[:6]:
                zrnum += 1
        if int(blue) == int(d[-1]):
            zblue = True
        # 1
        if zblue and zrnum == 6:
            allcount = 10

        # 2
        elif zrnum == 6:
            allcount = 5

        # 3
        elif zblue and zrnum == 5:
            allcount = 4

        # 4
        elif (zblue and zrnum == 4) or zrnum == 5:
            allcount = 3

        # 5
        elif (zblue and zrnum == 3) or zrnum == 4:
            allcount = 2

        # 6
        elif zblue:
            allcount = 1

        return allcount
def action_mapping(model_output_act, low_bound, high_bound):
    """ mapping action space [-1, 1] of model output
        to new action space [low_bound, high_bound].

    Args:
        model_output_act: np.array, which value is in [-1, 1]
        low_bound: float, low bound of env action space
        high_bound: float, high bound of env action space

    Returns:
        action: np.array, which value is in [low_bound, high_bound]
    """
    assert high_bound > low_bound
    action = low_bound + (model_output_act - (-1.0)) * (
            (high_bound - low_bound) / 2.0)
    return action

def gosmall(data,max,min):
        _range = (max - min) / 2
        res = (data-min) / _range
        return res

class PPOModel(parl.core.fluid.Model):
    def __init__(self, obs_dim, act_dim, init_logvar=-1.0):
        self.policy_model = PolicyModel(obs_dim, act_dim, init_logvar)
        self.value_model = ValueModel(obs_dim, act_dim)
        self.policy_lr = self.policy_model.lr
        self.value_lr = self.value_model.lr

    def policy(self, obs):
        return self.policy_model.policy(obs)

    def policy_sample(self, obs):
        return self.policy_model.sample(obs)

    def value(self, obs):
        return self.value_model.value(obs)


class PolicyModel(parl.core.fluid.Model):
    def __init__(self, obs_dim, act_dim, init_logvar):
        self.obs_dim = obs_dim
        self.act_dim = act_dim
        hid1_size = obs_dim * 100
        hid3_size = act_dim * 100
        hid2_size = int(np.sqrt(hid1_size * hid3_size))

        self.lr = 9e-4 / np.sqrt(hid2_size)

        self.fc1 = layers.fc(size=hid1_size, act='tanh')
        self.fc2 = layers.fc(size=hid2_size, act='tanh')
        self.fc3 = layers.fc(size=hid3_size, act='tanh')
        self.fc4 = layers.fc(size=act_dim, act='tanh')

        self.logvars = layers.create_parameter(
            shape=[act_dim],
            dtype='float32',
            default_initializer=fluid.initializer.ConstantInitializer(
                init_logvar))

    def policy(self, obs):
        hid1 = self.fc1(obs)
        hid2 = self.fc2(hid1)
        hid3 = self.fc3(hid2)
        means = self.fc4(hid3)
        logvars = self.logvars()
        return means, logvars

    def sample(self, obs):
        means, logvars = self.policy(obs)
        sampled_act = means + (
            layers.exp(logvars / 2.0) *  # stddev
            layers.gaussian_random(shape=(self.act_dim, ), dtype='float32'))
        return sampled_act


class ValueModel(parl.core.fluid.Model):
    def __init__(self, obs_dim, act_dim):
        super(ValueModel, self).__init__()
        hid1_size = obs_dim * 100
        hid3_size = 50
        hid2_size = int(np.sqrt(hid1_size * hid3_size))

        self.lr = 1e-2 / np.sqrt(hid2_size)

        self.fc1 = layers.fc(size=hid1_size, act='tanh')
        self.fc2 = layers.fc(size=hid2_size, act='tanh')
        self.fc3 = layers.fc(size=hid3_size, act='tanh')
        self.fc4 = layers.fc(size=1)

    def value(self, obs):
        hid1 = self.fc1(obs)
        hid2 = self.fc2(hid1)
        hid3 = self.fc3(hid2)
        V = self.fc4(hid3)
        V = layers.squeeze(V, axes=[])
        return V




class PPOAgent(parl.core.fluid.agent.Agent):
    def __init__(self,
                 algorithm,
                 obs_dim,
                 act_dim,
                 kl_targ,
                 loss_type,
                 beta=1.0,
                 epsilon=0.2,
                 policy_learn_times=20,
                 value_learn_times=10,
                 value_batch_size=256):
        self.alg = algorithm
        self.obs_dim = obs_dim
        self.act_dim = act_dim
        assert loss_type == 'CLIP' or loss_type == 'KLPEN'
        self.loss_type = loss_type
        super(PPOAgent, self).__init__(algorithm)

        self.policy_learn_times = policy_learn_times
        # Adaptive kl penalty coefficient
        self.beta = beta
        self.kl_targ = kl_targ

        self.value_learn_times = value_learn_times
        self.value_batch_size = value_batch_size
        self.value_learn_buffer = None

    def build_program(self):
        self.policy_predict_program = fluid.Program()
        self.policy_sample_program = fluid.Program()
        self.policy_learn_program = fluid.Program()
        self.value_predict_program = fluid.Program()
        self.value_learn_program = fluid.Program()

        with fluid.program_guard(self.policy_sample_program):
            obs = layers.data(
                name='obs', shape=[self.obs_dim], dtype='float32')
            sampled_act = self.alg.sample(obs)
            self.policy_sample_output = [sampled_act]

        with fluid.program_guard(self.policy_predict_program):
            obs = layers.data(
                name='obs', shape=[self.obs_dim], dtype='float32')
            means = self.alg.predict(obs)
            self.policy_predict_output = [means]

        with fluid.program_guard(self.policy_learn_program):
            obs = layers.data(
                name='obs', shape=[self.obs_dim], dtype='float32')
            actions = layers.data(
                name='actions', shape=[self.act_dim], dtype='float32')
            advantages = layers.data(
                name='advantages', shape=[1], dtype='float32')
            if self.loss_type == 'KLPEN':
                beta = layers.data(name='beta', shape=[], dtype='float32')
                loss, kl = self.alg.policy_learn(obs, actions, advantages,
                                                 beta)
            else:
                loss, kl = self.alg.policy_learn(obs, actions, advantages)

            self.policy_learn_output = [loss, kl]

        with fluid.program_guard(self.value_predict_program):
            obs = layers.data(
                name='obs', shape=[self.obs_dim], dtype='float32')
            value = self.alg.value_predict(obs)
            self.value_predict_output = [value]

        with fluid.program_guard(self.value_learn_program):
            obs = layers.data(
                name='obs', shape=[self.obs_dim], dtype='float32')
            val = layers.data(name='val', shape=[], dtype='float32')
            value_loss = self.alg.value_learn(obs, val)
            self.value_learn_output = [value_loss]

    def policy_sample(self, obs):
        feed = {'obs': obs}
        sampled_act = self.fluid_executor.run(
            self.policy_sample_program,
            feed=feed,
            fetch_list=self.policy_sample_output)[0]
        # print('policy_sample',sampled_act)
        return sampled_act

    def policy_predict(self, obs):
        feed = {'obs': obs}
        means = self.fluid_executor.run(
            self.policy_predict_program,
            feed=feed,
            fetch_list=self.policy_predict_output)[0]
        return means

    def value_predict(self, obs):
        feed = {'obs': obs}
        value = self.fluid_executor.run(
            self.value_predict_program,
            feed=feed,
            fetch_list=self.value_predict_output)
        return value
    #用ppo算法更新policy
    def _batch_policy_learn(self, obs, actions, advantages):
        if self.loss_type == 'KLPEN':
            feed = {
                'obs': obs,
                'actions': actions,
                'advantages': advantages,
                'beta': self.beta
            }
        else:
            feed = {'obs': obs, 'actions': actions, 'advantages': advantages}
        [loss, kl] = self.fluid_executor.run(
            self.policy_learn_program,
            feed=feed,
            fetch_list=self.policy_learn_output)
        return loss, kl
    #用ppo算法更新policy
    def _batch_value_learn(self, obs, val):
        feed = {'obs': obs, 'val': val}
        value_loss = self.fluid_executor.run(
            self.value_learn_program,
            feed=feed,
            fetch_list=self.value_learn_output)[0]
        return value_loss
    #用ppo算法更新policy
    def policy_learn(self, obs, actions, advantages):
        """ Learn policy:

        1. Sync parameters of policy model to old policy model
        2. Fix old policy model, and learn policy model multi times
        3. if use KLPEN loss, Adjust kl loss coefficient: beta
        """
        self.alg.sync_old_policy()

        all_loss, all_kl = [], []
        for _ in range(self.policy_learn_times):
            loss, kl = self._batch_policy_learn(obs, actions, advantages)
            # print(loss)
            all_loss.append(loss)
            all_kl.append(kl)

        if self.loss_type == 'KLPEN':
            # Adative KL penalty coefficient
            if kl > self.kl_targ * 2:
                self.beta = 1.5 * self.beta
            elif kl < self.kl_targ / 2:
                self.beta = self.beta / 1.5

        return np.mean(all_loss), np.mean(all_kl)
    #用ppo算法更新value
    def value_learn(self, obs, value):
        """ Fit model to current data batch + previous data batch
        """
        data_size = obs.shape[0]

        if self.value_learn_buffer is None:
            obs_train, value_train = obs, value
        else:
            obs_train = np.concatenate([obs, self.value_learn_buffer[0]])
            value_train = np.concatenate([value, self.value_learn_buffer[1]])
        self.value_learn_buffer = (obs, value)

        all_loss = []
        for _ in range(self.value_learn_times):
            random_ids = np.arange(obs_train.shape[0])
            np.random.shuffle(random_ids)
            shuffle_obs_train = obs_train[random_ids]
            shuffle_value_train = value_train[random_ids]
            start = 0
            while start < data_size:
                end = start + self.value_batch_size
                value_loss = self._batch_value_learn(
                    shuffle_obs_train[start:end, :],
                    shuffle_value_train[start:end])
                all_loss.append(value_loss)
                start += self.value_batch_size
        return np.mean(all_loss)


class Scaler(object):
    """ Generate scale and offset based on running mean and stddev along axis=0

        offset = running mean
        scale = 1 / (stddev + 0.1) / 3 (i.e. 3x stddev = +/- 1.0)
    """

    def __init__(self, obs_dim):
        """
        Args:
            obs_dim: dimension of axis=1
        """
        self.vars = np.zeros(obs_dim)
        self.means = np.zeros(obs_dim)
        self.cnt = 0
        self.first_pass = True

    def update(self, x):
        """ Update running mean and variance (this is an exact method)
        Args:
            x: NumPy array, shape = (N, obs_dim)

        see: https://stats.stackexchange.com/questions/43159/how-to-calculate-pooled-
               variance-of-two-groups-given-known-group-variances-mean
        """
        if self.first_pass:
            self.means = np.mean(x, axis=0)
            self.vars = np.var(x, axis=0)
            self.cnt = x.shape[0]
            self.first_pass = False
        else:
            n = x.shape[0]
            new_data_var = np.var(x, axis=0)
            new_data_mean = np.mean(x, axis=0)
            new_data_mean_sq = np.square(new_data_mean)
            new_means = (
                (self.means * self.cnt) + (new_data_mean * n)) / (self.cnt + n)
            self.vars = (((self.cnt * (self.vars + np.square(self.means))) +
                          (n * (new_data_var + new_data_mean_sq))) /
                         (self.cnt + n) - np.square(new_means))
            self.vars = np.maximum(
                0.0, self.vars)  # occasionally goes negative, clip
            self.means = new_means
            self.cnt += n

    def get(self):
        """ returns 2-tuple: (scale, offset) """
        return 1 / (np.sqrt(self.vars) + 0.1) / 3, self.means



# 训练episode
def run_train_episode(env, agent,scaler):
    obs = env.reset()
    # print("obs = ", obs)
    observes, actions, rewards, unscaled_obs = [], [], [], []
    step = 1.0
    scale, offset = scaler.get()
    scale[-1] = 1.0  # don't scale time step feature
    offset[-1] = 0.0  # don't offset time step feature
    while True:
        obs = obs.reshape((1, -1))
        obs = np.append(obs, [[step]], axis=1)  # add time step feature
        unscaled_obs.append(obs)
        obs = (obs - offset) * scale  # center and scale observations
        obs = obs.astype('float32')
        observes.append(obs)
        reward = 0
        # try:
        action0 = agent.policy_sample(obs)
        # action0 = np.argmax(action0[-1])
        action1 = np.clip(np.random.normal(action0, 1.0), -1.0, 1.0)
        # action = np.clip(action, -1.0, 1.0)
        # 之前在训练四轴飞行器时经常会报错,但是这里又不打印出越界的值..
        # for i in range(len(action1[0])):
        #
        #     if action1[0][i] > 1:
        #         print('大于1', action1[0][i])
        #     elif action1[0][i] < -1:
        #         print('小于-1', action1[0][i])

        # 用了clip这里还可能会报错
        action2 = action_mapping(action1, 1, 33)

        action1 = action1.reshape((1, -1)).astype('float32')
        # print("action = ",action)
        actions.append(action1)


        action = action2.reshape((1, -1)).astype('float32')
        obs, reward, done, _ = env.step(np.squeeze(action))
        # print(obs, reward, done, _ )
            # reward = np.clip(reward, -1.0, 1.0)
            # reward = gosmall(reward,17721088,-17721088)
        # except Exception as e:
        #     print(e)
        #     print("action 出错--,action0={},action1={},action2={},action={}".format(action0,action1,action2,action))

        rewards.append(reward)
        step += 1e-3  # increment time step feature

        if done:
            break

    return (np.concatenate(observes), np.concatenate(actions),
            np.array(rewards, dtype='float32'), np.concatenate(unscaled_obs))


# 验证
def run_evaluate_episode(env, agent,scaler):
    obs = env.reset()
    print("验证")
    rewards = []
    step = 0.0
    scale, offset = scaler.get()
    scale[-1] = 1.0  # don't scale time step feature
    offset[-1] = 0.0  # don't offset time step feature
    # while True:
    obs = obs.reshape((1, -1))
    obs = np.append(obs, [[step]], axis=1)  # add time step feature
    obs = (obs - offset) * scale  # center and scale observations
    obs = obs.astype('float32')

    # try:
    action0 = agent.policy_sample(obs)
    # action0 = np.argmax(action0[-1])
    action1 = np.clip(action0, -1.0, 1.0)
    # action = np.clip(action, -1.0, 1.0)
    # 之前在训练四轴飞行器时经常会报错,但是这里又不打印出越界的值..
    # for i in range(len(action1[0])):
    #
    #     if action1[0][i] > 1:
    #         print('大于1', action1[0][i])
    #     elif action1[0][i] < -1:
    #         print('小于-1', action1[0][i])

    # 用了clip这里还可能会报错
    action2 = action_mapping(action1, 1, 33)

    action = action2.reshape((1, -1)).astype('float32')
    # actions.append(action)

    obs, reward, done, _ = env.step(np.squeeze(action))
    # reward = np.clip(reward, -1.0, 1.0)
    # reward = gosmall(reward,17720188,-17720188)
    # except Exception as e:
    #     print(e)
    #     print("action 出错--,action0={},action1={},action2={},action={}".format(action0, action1, action2, action))
    #
    # action = agent.policy_predict(obs)
    # action = np.clip(action, -1.0, 1.0)
    # # for i in range(len(action[0])):
    # #
    # #     if action[0][i] > 1:
    # #         print('大于1', action[0][i])
    # #     elif action[0][i] < -1:
    # #         print('小于-1', action[0][i])
    # action = action_mapping(action, 1,17721088)
    #
    # obs, reward, done, _ = env.step(np.squeeze(action))
    rewards.append(reward)

    step += 1e-3  # increment time step feature

        # if done :
        #     break
    return np.sum(rewards)


# 收集trajectories中的数据
def collect_trajectories(env, agent,  scaler, episodes):
    logger.info("收集训练数据={}".format(episodes))
    trajectories, all_unscaled_obs = [], []
    for e in range(episodes):
        obs, actions, rewards, unscaled_obs = run_train_episode(
            env, agent,scaler)
        trajectories.append({
            'obs': obs,
            'actions': actions,
            'rewards': rewards,
        })
        all_unscaled_obs.append(unscaled_obs)
    # update running statistics for scaling observations
    logger.info("收集训练数据结束")
    scaler.update(np.concatenate(all_unscaled_obs))
    return trajectories


# 产生训练数据
def build_train_data(trajectories, agent):
    train_obs, train_actions, train_advantages, train_discount_sum_rewards = [], [], [], []
    for trajectory in trajectories:
        pred_values = agent.value_predict(trajectory['obs'])
        # print(pred_values)
        # scale rewards
        scale_rewards = trajectory['rewards'] * (1 - gamma)

        discount_sum_rewards = calc_discount_sum_rewards(
            scale_rewards, gamma).astype('float32')
        # print(pred_values)
        advantages = calc_gae(scale_rewards, pred_values, 0, gamma,
                              lam)

        # normalize advantages
        advantages = (advantages - advantages.mean()) / (
                advantages.std() + 1e-6)
        advantages = advantages.astype('float32')

        train_obs.append(trajectory['obs'])
        train_actions.append(trajectory['actions'])
        train_advantages.append(advantages)
        train_discount_sum_rewards.append(discount_sum_rewards)

    train_obs = np.concatenate(train_obs)
    train_actions = np.concatenate(train_actions)
    train_advantages = np.concatenate(train_advantages)
    train_discount_sum_rewards = np.concatenate(train_discount_sum_rewards)
    # print("train_obs={}, train_actions={}, train_advantages={}, train_discount_sum_rewards={}".format(train_obs, train_actions, train_advantages, train_discount_sum_rewards))
    return train_obs, train_actions, train_advantages, train_discount_sum_rewards


def main(index_model):
    # env = ContinuousCartPoleEnv()
    env = RouletteEnv()
    # env = make_env(envs, task=task)
    obs_dim = 7
    act_dim = 7
    obs_dim += 1  # add 1 to obs dim for time step feature  应该是为了方便引入衰减因子

    scaler = Scaler(obs_dim)

    model = PPOModel(obs_dim, act_dim)
    alg = PPO(
        model,
        act_dim=act_dim,
        policy_lr=model.policy_lr,
        value_lr=model.value_lr)
    agent = PPOAgent(
        alg, obs_dim, act_dim, kl_targ, loss_type=loss_type)

    # 运行几个episode来初始化 scaler
    logger.info("预存数据")
    collect_trajectories(env, agent,  scaler,episodes=500)
    logger.info("预存数据结束")
    test_flag = 0
    total_steps = 0
     # 重新加载模型
    # index_model = index_model
    # agent.restore('./ormodel_dir/{}/policy_steps_{}.ckpt'.format(index_model,index_model),agent.policy_learn_program)
    # agent.restore('./ormodel_dir/{}/volicy_steps_{}.ckpt'.format(index_model,index_model), agent.value_learn_program)
    # print('restore ckpt success')
    logger.info("train_total_steps={}".format(train_total_steps))
    while total_steps < train_total_steps:

        trajectories = collect_trajectories(
            env, agent, scaler,episodes=episodes_per_batch)

        total_steps += sum([t['obs'].shape[0] for t in trajectories])
        total_train_rewards = sum([np.sum(t['rewards']) for t in trajectories])
        # 产生训练数据
        # logger.info("产生训练数据")
        train_obs, train_actions, train_advantages, train_discount_sum_rewards = build_train_data(
            trajectories, agent)
        # 计算policy_loss, kl
        # logger.info("开始学习")
        policy_loss, kl = agent.policy_learn(train_obs, train_actions,
                                             train_advantages)
        value_loss = agent.value_learn(train_obs, train_discount_sum_rewards)

        logger.info(
            'Steps {}, Train reward: {}, Policy loss: {}, KL: {}, Value loss: {}'
                .format(total_steps, total_train_rewards / episodes_per_batch,
                        policy_loss, kl, value_loss))
        if total_steps // test_every_steps >= test_flag:
            while total_steps // test_every_steps >= test_flag:
                test_flag += 1
            eval_reward = run_evaluate_episode(env, agent,scaler)
            logger.info('Steps {}, Evaluate reward: {}'.format(
                total_steps, eval_reward))
            print("保存模型_",str(int(total_steps/100000)))
            # 每评估一次,就保存一次模型,以训练的step数命名
            pckpt = 'ormodel_dir/{}/policy_steps_{}.ckpt'.format(int(total_steps/100000),int(total_steps/100000))
            agent.save(pckpt, agent.policy_learn_program)
            vckpt = 'ormodel_dir/{}/volicy_steps_{}.ckpt'.format(int(total_steps/100000),int(total_steps/100000))
            agent.save(vckpt, agent.value_learn_program)
            tmp_m = int(total_steps/100000)
            try:
                if(tmp_m > 5):
                    path = './ormodel_dir/'+str(tmp_m-5)
                    shutil.rmtree(path)
            except:
                print("删除失败",tmp_m)


if __name__ == "__main__":
    main(12)


  • 1
    点赞
  • 8
    收藏
    觉得还不错? 一键收藏
  • 10
    评论

“相关推荐”对你有帮助么?

  • 非常没帮助
  • 没帮助
  • 一般
  • 有帮助
  • 非常有帮助
提交
评论 10
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值