自己训练一个小模型
Chatgpt都出4o为什么还要自己训练一个小模型呢?
-
如果你能自己训练一个小模型,说明你已经掌握Transformer了
-
有些场景就需要小模型,小而美,就像单片机就是有它的市场
-
训练小模型不是目的,目的是通过训练小模型跑通大模型训练的基本流程
想要看懂或写出训练小模型的代码,需要学习前置 Transformer模型、机器学习、线性代数等相关知识
看不懂也没关系,直接运行试试效果
上代码
!pip install numpy requests torch tiktoken matplotlib pandas
import os import requests import math import tiktoken import torch import torch.nn as nn from torch.nn import functional as F # Hyperparameters batch_size = 4 # How many batches per training step context_length = 16 # Length of the token chunk each batch d_model = 64 # The size of our model token embeddings num_blocks = 8 # Number of transformer blocks num_heads = 4 # Number of heads in Multi-head attention learning_rate = 1e-3 # 0.001 dropout = 0.1 # Dropout rate max_iters = 5000 # Total of training iterations <- Change this to smaller number for testing eval_interval = 50 # How often to evaluate eval_iters = 20 # Number of iterations to average for evaluation device = 'cuda' if torch.cuda.is_available() else 'cpu' # Use GPU if it's available. TORCH_SEED = 1337 torch.manual_seed(TORCH_SEED) # Load training data if not os.path.exists('data/sales_textbook.txt'): url = 'https://huggingface.co/datasets/goendalf666/sales-textbook_for_convincing_and_selling/raw/main/sales_textbook.txt' with open('data/sales_textbook.txt', 'w') as f: f.write(requests.get(url).text) with open('data/sales_textbook.txt', 'r', encoding='utf-8') as f: text = f.read() # Using TikToken (Same as GPT3) to tokenize the source text encoding = tiktoken.get_encoding("cl100k_base") tokenized_text = encoding.encode(text) max_token_value = max(tokenized_text) + 1 # the maximum value of the tokenized numbers tokenized_text = torch.tensor(tokenized_text, dtype=torch.long, device=device) # put tokenized text into tensor # Split train and validation split_idx = int(len(tokenized_text) * 0.9) train_data = tokenized_text[:split_idx] val_data = tokenized_text[split_idx:] # Define Feed Forward Network class FeedForward(nn.Module): def __init__(self): super().__init__() self.d_model = d_model self.dropout = dropout self.ffn = nn.Sequential( nn.Linear(in_features=self.d_model, out_features=self.d_model * 4), nn.ReLU(), nn.Linear(in_features=self.d_model * 4, out_features=self.d_model), nn.Dropout(dropout), ) def forward(self, x): return self.ffn(x) # Define Scaled Dot Product Attention class Attention(nn.Module): def __init__(self, head_size: int): super().__init__() self.d_model = d_model self.head_size = head_size self.context_length = context_length self.dropout = dropout self.key_layer = nn.Linear(in_features=self.d_model, out_features=self.head_size, bias=False) self.query_layer = nn.Linear(in_features=self.d_model, out_features=self.head_size, bias=False) self.value_layer = nn.Linear(in_features=self.d_model, out_features=self.head_size, bias=False) self.register_buffer('tril', torch.tril( torch.ones((self.context_length, self.context_length)))) # Lower triangular mask self.dropout_layer = nn.Dropout(self.dropout) def forward(self, x): B, T, C = x.shape # Batch size, Time steps(current context_length), Channels(dimensions) assert T <= self.context_length assert C == self.d_model q = self.query_layer(x) k = self.key_layer(x) v = self.value_layer(x) # Scaled dot product attention: Q @ K^T / sqrt(d_k) weights = (q @ k.transpose(-2, -1)) * (1.0 / math.sqrt(k.size(-1))) # Apply masked attention weights = weights.masked_fill(self.tril[:T, :T] == 0, float('-inf')) weights = F.softmax(input=weights, dim=-1) weights = self.dropout_layer(weights) # Apply dot product attention: weights @ V out = weights @ v return out class MultiHeadAttention(nn.Module): def __init__(self, head_size: int): super().__init__() self.num_heads = num_heads self.head_size = head_size self.d_model = d_model self.context_length = context_length self.dropout = dropout self.heads = nn.ModuleList([Attention(head_size=self.head_size) for _ in range(self.num_heads)]) self.projection_layer = nn.Linear(in_features=self.d_model, out_features=self.d_model) self.dropout_layer = nn.Dropout(dropout) def forward(self, x): out = torch.cat([h(x) for h in self.heads], dim=-1) out = self.projection_layer(out) out = self.dropout_layer(out) return out class TransformerBlock(nn.Module): def __init__(self, num_heads: int): super().__init__() self.d_model = d_model self.context_length = context_length self.head_size = d_model // num_heads # head size should be divisible by d_model self.num_heads = num_heads self.dropout = dropout self.multi_head_attention_layer = MultiHeadAttention(head_size=self.head_size) self.feed_forward_layer = FeedForward() self.layer_norm_1 = nn.LayerNorm(normalized_shape=self.d_model) self.layer_norm_2 = nn.LayerNorm(normalized_shape=self.d_model) def forward(self, x): # Note: The order of the operations is different from the original Transformer paper # The order here is: LayerNorm -> Multi-head attention -> LayerNorm -> Feed forward x = x + self.multi_head_attention_layer(self.layer_norm_1(x)) # Residual connection x = x + self.feed_forward_layer(self.layer_norm_2(x)) # Residual connection return x class TransformerLanguageModel(nn.Module): def __init__(self): super().__init__() self.d_model = d_model self.context_length = context_length self.num_heads = num_heads self.num_blocks = num_blocks self.dropout = dropout self.max_token_value = max_token_value # Set up token embedding look-up table self.token_embedding_lookup_table = nn.Embedding(num_embeddings=self.max_token_value + 1, embedding_dim=self.d_model) # Run all the transformer blocks # Different from original paper, here we add a final layer norm after all the blocks self.transformer_blocks = nn.Sequential(*( [TransformerBlock(num_heads=self.num_heads) for _ in range(self.num_blocks)] + [nn.LayerNorm(self.d_model)] )) self.language_model_out_linear_layer = nn.Linear(in_features=self.d_model, out_features=self.max_token_value) def forward(self, idx, targets=None): B, T = idx.shape """ # Set up position embedding look-up table # following the same approach as the original Transformer paper (Sine and Cosine functions) """ position_encoding_lookup_table = torch.zeros(self.context_length, self.d_model) position = torch.arange(0, self.context_length, dtype=torch.float).unsqueeze(1) div_term = torch.exp(torch.arange(0, self.d_model, 2).float() * (-math.log(10000.0) / self.d_model)) position_encoding_lookup_table[:, 0::2] = torch.sin(position * div_term) position_encoding_lookup_table[:, 1::2] = torch.cos(position * div_term) # change position_encoding_lookup_table from (context_length, d_model) to (T, d_model) position_embedding = position_encoding_lookup_table[:T, :].to(device) x = self.token_embedding_lookup_table(idx) + position_embedding x = self.transformer_blocks(x) # The "logits" are the output values of our model before applying softmax logits = self.language_model_out_linear_layer(x) if targets is not None: B, T, C = logits.shape logits_reshaped = logits.view(B * T, C) targets_reshaped = targets.view(B * T) loss = F.cross_entropy(input=logits_reshaped, target=targets_reshaped) else: loss = None return logits, loss def generate(self, idx, max_new_tokens): # idx is (B,T) array of indices in the current context for _ in range(max_new_tokens): # Crop idx to the max size of our positional embeddings table idx_crop = idx[:, -self.context_length:] # Get predictions logits, loss = self(idx_crop) # Get the last time step from logits where the dimensions of the logits are (B,T,C) logits_last_timestep = logits[:, -1, :] # Apply softmax to get probabilities probs = F.softmax(input=logits_last_timestep, dim=-1) # Sample from the probabilities' distribution. idx_next = torch.multinomial(input=probs, num_samples=1) # Append the sampled indexes idx_next to idx idx = torch.cat((idx, idx_next), dim=1) return idx # Initialize the model model = TransformerLanguageModel() model = model.to(device) # Get input embedding batch def get_batch(split: str): data = train_data if split == 'train' else val_data idxs = torch.randint(low=0, high=len(data) - context_length, size=(batch_size,)) x = torch.stack([data[idx:idx + context_length] for idx in idxs]).to(device) y = torch.stack([data[idx + 1:idx + context_length + 1] for idx in idxs]).to(device) return x, y # Calculate loss @torch.no_grad() def estimate_loss(): out = {} model.eval() for split in ['train', 'valid']: losses = torch.zeros(eval_iters) for k in range(eval_iters): x_batch, y_batch = get_batch(split) logits, loss = model(x_batch, y_batch) losses[k] = loss.item() out[split] = losses.mean() model.train() return out # Use AdamW optimizer optimizer = torch.optim.AdamW(params=model.parameters(), lr=learning_rate) tracked_losses = list() for step in range(max_iters): if step % eval_iters == 0 or step == max_iters - 1: losses = estimate_loss() tracked_losses.append(losses) print('Step:', step, 'Training Loss:', round(losses['train'].item(), 3), 'Validation Loss:', round(losses['valid'].item(), 3)) xb, yb = get_batch('train') logits, loss = model(xb, yb) optimizer.zero_grad(set_to_none=True) loss.backward() optimizer.step() # Save the model state dictionary torch.save(model.state_dict(), 'model-ckpt.pt') # Generate model.eval() start = 'The salesperson' start_ids = encoding.encode(start) x = (torch.tensor(start_ids, dtype=torch.long, device=device)[None, ...]) y = model.generate(x, max_new_tokens=100) print('---------------') print(encoding.decode(y[0].tolist())) print('---------------')
Step: 0 Training Loss: 11.663 Validation Loss: 11.716 Step: 20 Training Loss: 10.297 Validation Loss: 10.478 Step: 40 Training Loss: 8.867 Validation Loss: 9.022 Step: 60 Training Loss: 7.346 Validation Loss: 7.613 Step: 80 Training Loss: 6.878 Validation Loss: 7.297 Step: 100 Training Loss: 6.659 Validation Loss: 7.208 Step: 120 Training Loss: 6.544 Validation Loss: 7.104 Step: 140 Training Loss: 6.325 Validation Loss: 7.199 Step: 160 Training Loss: 6.34 Validation Loss: 6.684 Step: 180 Training Loss: 6.154 Validation Loss: 6.89 Step: 200 Training Loss: 6.202 Validation Loss: 6.673 Step: 220 Training Loss: 6.045 Validation Loss: 6.761 Step: 240 Training Loss: 5.871 Validation Loss: 6.497 Step: 260 Training Loss: 5.957 Validation Loss: 6.347 Step: 280 Training Loss: 5.679 Validation Loss: 6.389 Step: 300 Training Loss: 5.816 Validation Loss: 6.603 Step: 320 Training Loss: 5.415 Validation Loss: 6.496 Step: 340 Training Loss: 5.32 Validation Loss: 6.1 Step: 360 Training Loss: 5.206 Validation Loss: 6.222 Step: 380 Training Loss: 5.403 Validation Loss: 6.451 Step: 400 Training Loss: 5.317 Validation Loss: 5.937 Step: 420 Training Loss: 5.159 Validation Loss: 6.033 Step: 440 Training Loss: 5.153 Validation Loss: 6.333 Step: 460 Training Loss: 5.232 Validation Loss: 6.001 Step: 480 Training Loss: 5.126 Validation Loss: 6.067 Step: 500 Training Loss: 5.123 Validation Loss: 6.044 Step: 520 Training Loss: 4.966 Validation Loss: 5.676 Step: 540 Training Loss: 4.774 Validation Loss: 6.023 Step: 560 Training Loss: 4.792 Validation Loss: 6.079 Step: 580 Training Loss: 4.743 Validation Loss: 5.722 Step: 600 Training Loss: 4.818 Validation Loss: 5.686 Step: 620 Training Loss: 4.675 Validation Loss: 5.741 Step: 640 Training Loss: 4.805 Validation Loss: 6.014 Step: 660 Training Loss: 4.81 Validation Loss: 5.758 Step: 680 Training Loss: 4.727 Validation Loss: 5.723 Step: 700 Training Loss: 4.737 Validation Loss: 5.792 Step: 720 Training Loss: 4.609 Validation Loss: 5.761 Step: 740 Training Loss: 5.018 Validation Loss: 5.705 Step: 760 Training Loss: 4.906 Validation Loss: 5.721 Step: 780 Training Loss: 4.791 Validation Loss: 5.779 Step: 800 Training Loss: 4.467 Validation Loss: 5.881 Step: 820 Training Loss: 4.443 Validation Loss: 5.502 Step: 840 Training Loss: 4.567 Validation Loss: 5.832 Step: 860 Training Loss: 4.577 Validation Loss: 5.956 Step: 880 Training Loss: 4.55 Validation Loss: 5.583 Step: 900 Training Loss: 4.478 Validation Loss: 5.465 Step: 920 Training Loss: 4.237 Validation Loss: 5.674 Step: 940 Training Loss: 4.462 Validation Loss: 5.427 Step: 960 Training Loss: 4.323 Validation Loss: 5.632 Step: 980 Training Loss: 4.323 Validation Loss: 5.711 Step: 1000 Training Loss: 4.304 Validation Loss: 5.374 Step: 1020 Training Loss: 4.295 Validation Loss: 5.597 Step: 1040 Training Loss: 4.312 Validation Loss: 5.54 Step: 1060 Training Loss: 4.351 Validation Loss: 5.456 Step: 1080 Training Loss: 4.128 Validation Loss: 5.524 Step: 1100 Training Loss: 4.285 Validation Loss: 5.44 Step: 1120 Training Loss: 4.359 Validation Loss: 5.447 Step: 1140 Training Loss: 4.276 Validation Loss: 5.527 Step: 1160 Training Loss: 4.179 Validation Loss: 5.415 Step: 1180 Training Loss: 4.057 Validation Loss: 5.42 Step: 1200 Training Loss: 4.238 Validation Loss: 5.296 Step: 1220 Training Loss: 3.979 Validation Loss: 5.535 Step: 1240 Training Loss: 4.145 Validation Loss: 5.417 Step: 1260 Training Loss: 4.093 Validation Loss: 5.34 Step: 1280 Training Loss: 4.173 Validation Loss: 5.361 Step: 1300 Training Loss: 3.876 Validation Loss: 5.449 Step: 1320 Training Loss: 3.941 Validation Loss: 5.343 Step: 1340 Training Loss: 4.172 Validation Loss: 5.335 Step: 1360 Training Loss: 3.757 Validation Loss: 5.173 Step: 1380 Training Loss: 4.106 Validation Loss: 5.207 Step: 1400 Training Loss: 3.975 Validation Loss: 5.349 Step: 1420 Training Loss: 4.11 Validation Loss: 5.224 Step: 1440 Training Loss: 3.915 Validation Loss: 5.341 Step: 1460 Training Loss: 4.05 Validation Loss: 5.302 Step: 1480 Training Loss: 3.927 Validation Loss: 5.487 Step: 1500 Training Loss: 3.952 Validation Loss: 5.191 Step: 1520 Training Loss: 4.182 Validation Loss: 5.066 Step: 1540 Training Loss: 3.851 Validation Loss: 5.205 Step: 1560 Training Loss: 4.062 Validation Loss: 5.039 Step: 1580 Training Loss: 3.848 Validation Loss: 4.952 Step: 1600 Training Loss: 3.94 Validation Loss: 5.343 Step: 1620 Training Loss: 3.78 Validation Loss: 5.243 Step: 1640 Training Loss: 3.814 Validation Loss: 5.364 Step: 1660 Training Loss: 3.979 Validation Loss: 5.25 Step: 1680 Training Loss: 3.717 Validation Loss: 5.067 Step: 1700 Training Loss: 3.681 Validation Loss: 5.574 Step: 1720 Training Loss: 3.753 Validation Loss: 5.119 Step: 1740 Training Loss: 3.584 Validation Loss: 5.335 Step: 1760 Training Loss: 3.819 Validation Loss: 4.949 Step: 1780 Training Loss: 3.823 Validation Loss: 4.921 Step: 1800 Training Loss: 3.795 Validation Loss: 5.031 Step: 1820 Training Loss: 3.54 Validation Loss: 5.292 Step: 1840 Training Loss: 4.003 Validation Loss: 4.95 Step: 1860 Training Loss: 3.759 Validation Loss: 4.86 Step: 1880 Training Loss: 3.871 Validation Loss: 5.262 Step: 1900 Training Loss: 3.791 Validation Loss: 4.975 Step: 1920 Training Loss: 3.768 Validation Loss: 5.329 Step: 1940 Training Loss: 3.689 Validation Loss: 5.011 Step: 1960 Training Loss: 3.52 Validation Loss: 4.926 Step: 1980 Training Loss: 3.648 Validation Loss: 5.128 Step: 2000 Training Loss: 3.696 Validation Loss: 5.011 Step: 2020 Training Loss: 3.756 Validation Loss: 5.086 Step: 2040 Training Loss: 3.835 Validation Loss: 4.961 Step: 2060 Training Loss: 3.626 Validation Loss: 5.27 Step: 2080 Training Loss: 3.751 Validation Loss: 5.27 Step: 2100 Training Loss: 3.856 Validation Loss: 4.967 Step: 2120 Training Loss: 3.76 Validation Loss: 4.968 Step: 2140 Training Loss: 3.678 Validation Loss: 4.971 Step: 2160 Training Loss: 3.759 Validation Loss: 4.821 Step: 2180 Training Loss: 3.504 Validation Loss: 5.243 Step: 2200 Training Loss: 3.85 Validation Loss: 5.345 Step: 2220 Training Loss: 3.74 Validation Loss: 5.287 Step: 2240 Training Loss: 3.66 Validation Loss: 5.219 Step: 2260 Training Loss: 3.684 Validation Loss: 5.101 Step: 2280 Training Loss: 3.523 Validation Loss: 4.998 Step: 2300 Training Loss: 3.628 Validation Loss: 5.237 Step: 2320 Training Loss: 3.545 Validation Loss: 5.442 Step: 2340 Training Loss: 3.428 Validation Loss: 5.192 Step: 2360 Training Loss: 3.658 Validation Loss: 5.11 Step: 2380 Training Loss: 3.592 Validation Loss: 5.14 Step: 2400 Training Loss: 3.573 Validation Loss: 5.069 Step: 2420 Training Loss: 3.414 Validation Loss: 4.745 Step: 2440 Training Loss: 3.459 Validation Loss: 5.28 Step: 2460 Training Loss: 3.678 Validation Loss: 5.044 Step: 2480 Training Loss: 3.409 Validation Loss: 4.935 Step: 2500 Training Loss: 3.484 Validation Loss: 5.054 Step: 2520 Training Loss: 3.659 Validation Loss: 5.335 Step: 2540 Training Loss: 3.423 Validation Loss: 5.333 Step: 2560 Training Loss: 3.57 Validation Loss: 5.237 Step: 2580 Training Loss: 3.57 Validation Loss: 4.961 Step: 2600 Training Loss: 3.67 Validation Loss: 5.023 Step: 2620 Training Loss: 3.451 Validation Loss: 4.958 Step: 2640 Training Loss: 3.542 Validation Loss: 5.144 Step: 2660 Training Loss: 3.474 Validation Loss: 5.076 Step: 2680 Training Loss: 3.482 Validation Loss: 4.937 Step: 2700 Training Loss: 3.428 Validation Loss: 5.087 Step: 2720 Training Loss: 3.377 Validation Loss: 5.171 Step: 2740 Training Loss: 3.404 Validation Loss: 4.779 Step: 2760 Training Loss: 3.2 Validation Loss: 5.077 Step: 2780 Training Loss: 3.28 Validation Loss: 5.184 Step: 2800 Training Loss: 3.138 Validation Loss: 5.165 Step: 2820 Training Loss: 3.374 Validation Loss: 5.091 Step: 2840 Training Loss: 3.29 Validation Loss: 5.2 Step: 2860 Training Loss: 3.375 Validation Loss: 5.022 Step: 2880 Training Loss: 3.45 Validation Loss: 4.919 Step: 2900 Training Loss: 3.465 Validation Loss: 5.134 Step: 2920 Training Loss: 3.457 Validation Loss: 5.227 Step: 2940 Training Loss: 3.322 Validation Loss: 4.94 Step: 2960 Training Loss: 3.203 Validation Loss: 5.068 Step: 2980 Training Loss: 3.372 Validation Loss: 4.924 Step: 3000 Training Loss: 3.512 Validation Loss: 5.071 Step: 3020 Training Loss: 3.469 Validation Loss: 4.782 Step: 3040 Training Loss: 3.343 Validation Loss: 5.275 Step: 3060 Training Loss: 3.201 Validation Loss: 4.854 Step: 3080 Training Loss: 3.313 Validation Loss: 5.037 Step: 3100 Training Loss: 3.41 Validation Loss: 4.707 Step: 3120 Training Loss: 3.201 Validation Loss: 5.013 Step: 3140 Training Loss: 3.344 Validation Loss: 4.895 Step: 3160 Training Loss: 3.307 Validation Loss: 4.915 Step: 3180 Training Loss: 3.186 Validation Loss: 4.955 Step: 3200 Training Loss: 3.262 Validation Loss: 5.005 Step: 3220 Training Loss: 3.331 Validation Loss: 4.845 Step: 3240 Training Loss: 3.301 Validation Loss: 5.017 Step: 3260 Training Loss: 3.529 Validation Loss: 4.58 Step: 3280 Training Loss: 3.269 Validation Loss: 4.887 Step: 3300 Training Loss: 3.1 Validation Loss: 5.046 Step: 3320 Training Loss: 3.239 Validation Loss: 4.825 Step: 3340 Training Loss: 3.341 Validation Loss: 5.413 Step: 3360 Training Loss: 3.288 Validation Loss: 4.929 Step: 3380 Training Loss: 3.315 Validation Loss: 5.259 Step: 3400 Training Loss: 3.19 Validation Loss: 4.979 Step: 3420 Training Loss: 3.237 Validation Loss: 5.082 Step: 3440 Training Loss: 3.168 Validation Loss: 5.336 Step: 3460 Training Loss: 3.305 Validation Loss: 5.259 Step: 3480 Training Loss: 3.142 Validation Loss: 4.798 Step: 3500 Training Loss: 3.179 Validation Loss: 5.061 Step: 3520 Training Loss: 3.238 Validation Loss: 5.056 Step: 3540 Training Loss: 3.171 Validation Loss: 4.955 Step: 3560 Training Loss: 3.141 Validation Loss: 4.828 Step: 3580 Training Loss: 3.154 Validation Loss: 4.858 Step: 3600 Training Loss: 3.245 Validation Loss: 5.185 Step: 3620 Training Loss: 3.076 Validation Loss: 4.518 Step: 3640 Training Loss: 3.208 Validation Loss: 4.755 Step: 3660 Training Loss: 3.343 Validation Loss: 4.94 Step: 3680 Training Loss: 3.109 Validation Loss: 4.749 Step: 3700 Training Loss: 3.137 Validation Loss: 4.929 Step: 3720 Training Loss: 3.105 Validation Loss: 4.806 Step: 3740 Training Loss: 3.053 Validation Loss: 4.917 Step: 3760 Training Loss: 3.379 Validation Loss: 4.991 Step: 3780 Training Loss: 3.278 Validation Loss: 5.268 Step: 3800 Training Loss: 3.11 Validation Loss: 5.2 Step: 3820 Training Loss: 3.049 Validation Loss: 5.134 Step: 3840 Training Loss: 3.182 Validation Loss: 4.849 Step: 3860 Training Loss: 2.989 Validation Loss: 5.004 Step: 3880 Training Loss: 3.27 Validation Loss: 4.796 Step: 3900 Training Loss: 3.007 Validation Loss: 4.805 Step: 3920 Training Loss: 3.151 Validation Loss: 4.856 Step: 3940 Training Loss: 3.125 Validation Loss: 4.832 Step: 3960 Training Loss: 3.058 Validation Loss: 4.629 Step: 3980 Training Loss: 3.031 Validation Loss: 4.963 Step: 4000 Training Loss: 3.118 Validation Loss: 4.976 Step: 4020 Training Loss: 3.152 Validation Loss: 4.949 Step: 4040 Training Loss: 3.049 Validation Loss: 5.054 Step: 4060 Training Loss: 3.065 Validation Loss: 5.069 Step: 4080 Training Loss: 3.193 Validation Loss: 5.184 Step: 4100 Training Loss: 2.92 Validation Loss: 5.0 Step: 4120 Training Loss: 3.167 Validation Loss: 4.822 Step: 4140 Training Loss: 3.117 Validation Loss: 4.895 Step: 4160 Training Loss: 3.153 Validation Loss: 5.004 Step: 4180 Training Loss: 3.213 Validation Loss: 4.874 Step: 4200 Training Loss: 2.952 Validation Loss: 4.93 Step: 4220 Training Loss: 3.089 Validation Loss: 5.009 Step: 4240 Training Loss: 2.934 Validation Loss: 5.001 Step: 4260 Training Loss: 3.035 Validation Loss: 5.085 Step: 4280 Training Loss: 2.786 Validation Loss: 4.974 Step: 4300 Training Loss: 3.009 Validation Loss: 4.948 Step: 4320 Training Loss: 2.893 Validation Loss: 5.033 Step: 4340 Training Loss: 2.859 Validation Loss: 4.889 Step: 4360 Training Loss: 3.022 Validation Loss: 4.746 Step: 4380 Training Loss: 2.983 Validation Loss: 5.146 Step: 4400 Training Loss: 3.125 Validation Loss: 4.891 Step: 4420 Training Loss: 3.003 Validation Loss: 5.253 Step: 4440 Training Loss: 2.952 Validation Loss: 5.039 Step: 4460 Training Loss: 3.043 Validation Loss: 4.736 Step: 4480 Training Loss: 2.811 Validation Loss: 5.291 Step: 4500 Training Loss: 2.927 Validation Loss: 4.883 Step: 4520 Training Loss: 2.983 Validation Loss: 4.685 Step: 4540 Training Loss: 3.092 Validation Loss: 4.898 Step: 4560 Training Loss: 3.034 Validation Loss: 4.876 Step: 4580 Training Loss: 3.036 Validation Loss: 5.188 Step: 4600 Training Loss: 2.715 Validation Loss: 4.858 Step: 4620 Training Loss: 3.009 Validation Loss: 5.125 Step: 4640 Training Loss: 2.923 Validation Loss: 4.92 Step: 4660 Training Loss: 2.869 Validation Loss: 4.923 Step: 4680 Training Loss: 2.809 Validation Loss: 5.075 Step: 4700 Training Loss: 3.002 Validation Loss: 5.103 Step: 4720 Training Loss: 2.921 Validation Loss: 5.054 Step: 4740 Training Loss: 2.81 Validation Loss: 5.074 Step: 4760 Training Loss: 2.951 Validation Loss: 5.228 Step: 4780 Training Loss: 2.919 Validation Loss: 4.913 Step: 4800 Training Loss: 2.953 Validation Loss: 5.215 Step: 4820 Training Loss: 3.022 Validation Loss: 4.832 Step: 4840 Training Loss: 2.766 Validation Loss: 5.119 Step: 4860 Training Loss: 2.898 Validation Loss: 5.103 Step: 4880 Training Loss: 2.977 Validation Loss: 4.885 Step: 4900 Training Loss: 3.036 Validation Loss: 5.128 Step: 4920 Training Loss: 2.913 Validation Loss: 4.799 Step: 4940 Training Loss: 2.966 Validation Loss: 4.863 Step: 4960 Training Loss: 2.723 Validation Loss: 4.828 Step: 4980 Training Loss: 2.752 Validation Loss: 4.666 Step: 4999 Training Loss: 2.828 Validation Loss: 5.13 --------------- The salesperson, the customer, the salesperson can effectively gather information, and ultimately increasing the likelihood of the sale. 1. Be mindful of reinforcing of-ended questions are identified persuasive and manipulative use can significantly impact, demonstrating genuine interest requires a deeper level that we have successfully suit their approach to share their responses. Some customers, while some the root cause for them, showcasing patterns, sales professionals can meet their concerns and increase their requirements. When faced with your product or service is not just about attacks but crafted situations ---------------
在线查看或执行:https://colab.research.google.com/drive/1hvgnvZhTNqJbfaHIN_29h0p9lqSx1IXU?usp=sharing
原创声明:本文为本人原创作品,首发于AI ONES https://wuxiongwei.com,如果转载,请保留本文链接,谢谢。