零基础学python-5.3 数字变量与除法

本文介绍了Python中变量的基本概念及使用注意事项,并对比了传统除法与地板除的区别。通过实例展示了变量赋值过程,强调变量使用前需先赋值的原则。同时,详细解释了在Python 2.7与Python 3.4中除法运算的不同表现。

1.回顾一下变量

1)变量在它第一次赋值时创建

2)变量在表达式中使用将被替换为它们的值

3)变量在表达式中使用以前必须已经赋值

4)变量像对象一样不需要在一开始进行声明

我们下面把正反两种例子都举出来:

2.传统除法与地板除

传统除法:使用/

地板除:使用//,等价于传统除法后使用math.floor方法

从上图我们可以得出以上结论

我们上面的结论是在python3.4上面实现的

我们对比一下python2.7与python3.4同样的表达式输出的结果

 

在python2.7与python3.4所输出的结果截然不同,这是我们后面需要注意的地方。

 


 

就说到这里,谢谢大家

------------------------------------------------------------------

点击跳转零基础学python-目录

import pandas as pd import numpy as np import matplotlib.pyplot as plt from sklearn.model_selection import train_test_split from sklearn.preprocessing import LabelEncoder, RobustScaler from sklearn.metrics import classification_report, accuracy_score import xgboost as xgb import torch import torch.nn as nn import torch.optim as optim from torch.utils.data import TensorDataset, DataLoader from scipy.stats.mstats import winsorize # ---------------------- 1. 加载数据 ---------------------- df = pd.read_csv("../data/900_gather_feature_results.csv") # 替换为您的文件路径 print(f"数据形状: {df.shape}") print(f"标签类别分布:\n{df['second_id'].value_counts()}") # ---------------------- 2. 特征标签分离 ---------------------- # 排除非数值特征(app_name、timestamp)和标签 X = df.drop(["second_id", "app_name", "timestamp"], axis=1) y = df["second_id"] # ---------------------- 3. 处理分类特征(protocol) ---------------------- categorical_cols = X.select_dtypes(include=["object"]).columns.tolist() print(f"分类特征: {categorical_cols}") for col in categorical_cols: le = LabelEncoder() X[col] = le.fit_transform(X[col]) # 字符串→整数编码 # ---------------------- 4. 关键特征修复:处理导致inf的根源问题 ---------------------- # 4.1 处理duration(所有比率特征的分母) if "duration" in X.columns: # 替换0或inf为中位数(避免除法计算错误) duration_median = X["duration"].replace([0, np.inf], np.nan).median() X["duration"] = X["duration"].replace([0, np.inf], duration_median) X["duration"] = X["duration"].fillna(duration_median) # 处理剩余缺失值 # 4.2 重新计算所有比率特征(确保分母有效) ratio_features = [ "bytes_rate", "fwd_bytes_rate", "bwd_bytes_rate", "packets_rate", "fwd_packets_rate", "bwd_packets_rate" ] for feature in ratio_features: if feature in X.columns: numerator = feature.split("_")[0] + "_total_payload_bytes" if numerator in X.columns and "duration" in X.columns: # 计算比率时再次检查分母(避免重复出现0或inf) X[feature] = X[numerator] / X["duration"] X[feature] = X[feature].replace(np.inf, X[feature].median()) # 处理剩余inf # ---------------------- 5. 通用异常值缺失值处理 ---------------------- print("\n===== 数据异常值检查(修复前) =====") print("NaN总数:", X.isnull().sum().sum()) print("inf总数:", (X == np.inf).sum().sum()) # 5.1 填充所有缺失值(用中位数,鲁棒性更强) for col in X.columns: X[col] = X[col].fillna(X[col].median()) # 5.2 处理剩余极端值(使用2.5%截断,避免过度截断) for col in X.columns: X[col] = winsorize(X[col], limits=[0.025, 0.025]) # 保留95%数据 # 在步骤5.3之前添加全局inf处理(替换所有特征的inf) for col in X.columns: col_median = X[col].median() # 计算列中位数 # 替换正负无穷大值为中位数 X[col] = X[col].replace([np.inf, -np.inf], col_median) # 5.3 验证数据干净度 print("\n===== 数据异常值检查(修复后) =====") print("NaN总数:", X.isnull().sum().sum()) print("inf总数:", (X == np.inf).sum().sum()) print("特征数量:", X.shape) # ---------------------- 6. 划分训练集测试集 ---------------------- X_train, X_test, y_train, y_test = train_test_split( X, y, test_size=0.2, random_state=42, stratify=y # 分层抽样保证类别分布 ) # ---------------------- 7. 特征标准化(鲁棒标准化) ---------------------- scaler = RobustScaler() # 对异常值更鲁棒的标准化 X_train_scaled = scaler.fit_transform(X_train) X_test_scaled = scaler.transform(X_test) # ---------------------- 8. XGBoost训练评估 ---------------------- dtrain = xgb.DMatrix(X_train_scaled, label=y_train) dtest = xgb.DMatrix(X_test_scaled, label=y_test) num_classes = len(y.unique()) params = { "objective": "multi:softmax", "num_class": num_classes, "max_depth": 5, # 减少过拟合风险 "eta": 0.1, "eval_metric": "mlogloss", "verbosity": 0, "random_state": 42 } num_round = 100 model_xgb = xgb.train(params, dtrain, num_round) y_pred_xgb = model_xgb.predict(dtest) print(f"\nXGBoost 测试集准确率: {accuracy_score(y_test, y_pred_xgb):.4f}") print("分类报告:\n", classification_report(y_test, y_pred_xgb)) # ---------------------- 9. 神经网络训练(可选) ---------------------- X_train_tensor = torch.FloatTensor(X_train_scaled) y_train_tensor = torch.LongTensor(y_train.values) X_test_tensor = torch.FloatTensor(X_test_scaled) y_test_tensor = torch.LongTensor(y_test.values) batch_size = 64 train_dataset = TensorDataset(X_train_tensor, y_train_tensor) test_dataset = TensorDataset(X_test_tensor, y_test_tensor) train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True) class MLP(nn.Module): def __init__(self, input_size, hidden_size, num_classes): super(MLP, self).__init__() self.fc1 = nn.Linear(input_size, hidden_size) self.relu = nn.ReLU() self.fc2 = nn.Linear(hidden_size, hidden_size // 2) self.dropout = nn.Dropout(0.3) # 防止过拟合 self.fc3 = nn.Linear(hidden_size // 2, num_classes) def forward(self, x): x = self.fc1(x) x = self.relu(x) x = self.fc2(x) x = self.relu(x) x = self.dropout(x) x = self.fc3(x) return x input_size = X_train.shape[1] hidden_size = 256 num_classes = len(y.unique()) model_nn = MLP(input_size, hidden_size, num_classes) criterion = nn.CrossEntropyLoss() optimizer = optim.Adam(model_nn.parameters(), lr=0.001) num_epochs = 50 for epoch in range(num_epochs): model_nn.train() running_loss = 0.0 for batch_X, batch_y in train_loader: optimizer.zero_grad() outputs = model_nn(batch_X) loss = criterion(outputs, batch_y) loss.backward() optimizer.step() running_loss += loss.item() * batch_X.size(0) epoch_loss = running_loss / len(train_loader.dataset) if (epoch + 1) % 10 == 0: print(f"Epoch {epoch+1}/{num_epochs}, Loss: {epoch_loss:.4f}") model_nn.eval() y_pred_nn = [] with torch.no_grad(): for batch_X, _ in DataLoader(test_dataset, batch_size=batch_size): outputs = model_nn(batch_X) _, predicted = torch.max(outputs.data, 1) y_pred_nn.extend(predicted.numpy()) print(f"\n神经网络 测试集准确率: {accuracy_score(y_test, y_pred_nn):.4f}") 给出完整的修改代码
最新发布
11-07
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值