GAN的实现

keras代码来源

生成器:由输入的噪音生成一个单通道图片

def generator_model():
    model = Sequential()
    model.add(Dense(input_dim=100, output_dim=1024))     # 100到1024全连接
    model.add(Activation('tanh'))
    model.add(Dense(128*7*7))                            # 全链接以用来生成128通道大小7*7的图片
    model.add(BatchNormalization())
    model.add(Activation('tanh'))
    model.add(Reshape((7, 7, 128), input_shape=(128*7*7,)))
    model.add(UpSampling2D(size=(2, 2)))                 # 放大图片14*14(插值或反卷积操作)
    model.add(Conv2D(64, (5, 5), padding='same'))        # 降维到64
    model.add(Activation('tanh'))
    model.add(UpSampling2D(size=(2, 2)))                 # 再放大图片28*28
    model.add(Conv2D(1, (5, 5), padding='same'))         # 降维到1
    model.add(Activation('tanh'))
    return model

判别器:输入图片输出一个标量,像一个CNN

def discriminator_model():
    model = Sequential()
    model.add(
            Conv2D(64, (5, 5),
            padding='same',
            input_shape=(28, 28, 1))
            )
    model.add(Activation('tanh'))
    model.add(MaxPooling2D(pool_size=(2, 2)))
    model.add(Conv2D(128, (5, 5)))
    model.add(Activation('tanh'))
    model.add(MaxPooling2D(pool_size=(2, 2))) # “表示层”,特征提取层
    model.add(Flatten())
    model.add(Dense(1024))
    model.add(Activation('tanh'))
    model.add(Dense(1))
    model.add(Activation('sigmoid'))
    return model

训练方法

def generator_containing_discriminator(g, d):
    model = Sequential()
    model.add(g)
    d.trainable = False
    model.add(d)
    return model
def train(BATCH_SIZE):
	d = discriminator_model()
    g = generator_model()
    d_on_g = generator_containing_discriminator(g, d)
    for epoch in range(100):

        for index in range(int(X_train.shape[0]/BATCH_SIZE)):
            noise = np.random.uniform(-1, 1, size=(BATCH_SIZE, 100))
            image_batch = X_train[index*BATCH_SIZE:(index+1)*BATCH_SIZE]
            generated_images = g.predict(noise, verbose=0)
            
            X = np.concatenate((image_batch, generated_images)) # 真实样本,生成样本的数据混合
            y = [1] * BATCH_SIZE + [0] * BATCH_SIZE             # 设置标签,image_batch为1,generated_images为0
            d_loss = d.train_on_batch(X, y)                     # 对判别器进行训练
            
            noise = np.random.uniform(-1, 1, (BATCH_SIZE, 100))
            d.trainable = False                                 # 固定判别器参数
            g_loss = d_on_g.train_on_batch(noise, [1] * BATCH_SIZE)# 对g(非固定参数)+d(固定参数)进行训练
            d.trainable = True                                  # 解开判别器的限制
            
            if index % 10 == 9:
                g.save_weights('generator', True)
                d.save_weights('discriminator', True)

全部代码

from keras.models import Sequential
from keras.layers import Dense
from keras.layers import Reshape
from keras.layers.core import Activation
from keras.layers.normalization import BatchNormalization
from keras.layers.convolutional import UpSampling2D
from keras.layers.convolutional import Conv2D, MaxPooling2D
from keras.layers.core import Flatten
from keras.optimizers import SGD
from keras.datasets import mnist
import numpy as np
from PIL import Image
import argparse
import math


def generator_model():
    model = Sequential()
    model.add(Dense(input_dim=100, output_dim=1024))
    model.add(Activation('tanh'))
    model.add(Dense(128*7*7))
    model.add(BatchNormalization())
    model.add(Activation('tanh'))
    model.add(Reshape((7, 7, 128), input_shape=(128*7*7,)))
    model.add(UpSampling2D(size=(2, 2)))
    model.add(Conv2D(64, (5, 5), padding='same'))
    model.add(Activation('tanh'))
    model.add(UpSampling2D(size=(2, 2)))
    model.add(Conv2D(1, (5, 5), padding='same'))
    model.add(Activation('tanh'))
    return model


def discriminator_model():
    model = Sequential()
    model.add(
            Conv2D(64, (5, 5),
            padding='same',
            input_shape=(28, 28, 1))
            )
    model.add(Activation('tanh'))
    model.add(MaxPooling2D(pool_size=(2, 2)))
    model.add(Conv2D(128, (5, 5)))
    model.add(Activation('tanh'))
    model.add(MaxPooling2D(pool_size=(2, 2)))
    model.add(Flatten())
    model.add(Dense(1024))
    model.add(Activation('tanh'))
    model.add(Dense(1))
    model.add(Activation('sigmoid'))
    return model


def generator_containing_discriminator(g, d):
    model = Sequential()
    model.add(g)
    d.trainable = False
    model.add(d)
    return model


def combine_images(generated_images):
    num = generated_images.shape[0]
    width = int(math.sqrt(num))
    height = int(math.ceil(float(num)/width))
    shape = generated_images.shape[1:3]
    image = np.zeros((height*shape[0], width*shape[1]),
                     dtype=generated_images.dtype)
    for index, img in enumerate(generated_images):
        i = int(index/width)
        j = index % width
        image[i*shape[0]:(i+1)*shape[0], j*shape[1]:(j+1)*shape[1]] = \
            img[:, :, 0]
    return image


def train(BATCH_SIZE):
    (X_train, y_train), (X_test, y_test) = mnist.load_data()
    X_train = (X_train.astype(np.float32) - 127.5)/127.5
    X_train = X_train[:, :, :, None]
    X_test = X_test[:, :, :, None]
    # X_train = X_train.reshape((X_train.shape, 1) + X_train.shape[1:])
    d = discriminator_model()
    g = generator_model()
    d_on_g = generator_containing_discriminator(g, d)
    d_optim = SGD(lr=0.0005, momentum=0.9, nesterov=True)
    g_optim = SGD(lr=0.0005, momentum=0.9, nesterov=True)
    g.compile(loss='binary_crossentropy', optimizer="SGD")
    d_on_g.compile(loss='binary_crossentropy', optimizer=g_optim)
    d.trainable = True
    d.compile(loss='binary_crossentropy', optimizer=d_optim)
    for epoch in range(100):
        print("Epoch is", epoch)
        print("Number of batches", int(X_train.shape[0]/BATCH_SIZE))
        for index in range(int(X_train.shape[0]/BATCH_SIZE)):
            noise = np.random.uniform(-1, 1, size=(BATCH_SIZE, 100))
            image_batch = X_train[index*BATCH_SIZE:(index+1)*BATCH_SIZE]
            generated_images = g.predict(noise, verbose=0)
            if index % 20 == 0:
                image = combine_images(generated_images)
                image = image*127.5+127.5
                Image.fromarray(image.astype(np.uint8)).save(
                    str(epoch)+"_"+str(index)+".png")
            X = np.concatenate((image_batch, generated_images))
            y = [1] * BATCH_SIZE + [0] * BATCH_SIZE
            d_loss = d.train_on_batch(X, y)
            print("batch %d d_loss : %f" % (index, d_loss))
            noise = np.random.uniform(-1, 1, (BATCH_SIZE, 100))
            d.trainable = False
            g_loss = d_on_g.train_on_batch(noise, [1] * BATCH_SIZE)
            d.trainable = True
            print("batch %d g_loss : %f" % (index, g_loss))
            if index % 10 == 9:
                g.save_weights('generator', True)
                d.save_weights('discriminator', True)


def generate(BATCH_SIZE, nice=False):
    g = generator_model()
    g.compile(loss='binary_crossentropy', optimizer="SGD")
    g.load_weights('generator')
    if nice:
        d = discriminator_model()
        d.compile(loss='binary_crossentropy', optimizer="SGD")
        d.load_weights('discriminator')
        noise = np.random.uniform(-1, 1, (BATCH_SIZE*20, 100))
        generated_images = g.predict(noise, verbose=1)
        d_pret = d.predict(generated_images, verbose=1)
        index = np.arange(0, BATCH_SIZE*20)
        index.resize((BATCH_SIZE*20, 1))
        pre_with_index = list(np.append(d_pret, index, axis=1))
        pre_with_index.sort(key=lambda x: x[0], reverse=True)
        nice_images = np.zeros((BATCH_SIZE,) + generated_images.shape[1:3], dtype=np.float32)
        nice_images = nice_images[:, :, :, None]
        for i in range(BATCH_SIZE):
            idx = int(pre_with_index[i][1])
            nice_images[i, :, :, 0] = generated_images[idx, :, :, 0]
        image = combine_images(nice_images)
    else:
        noise = np.random.uniform(-1, 1, (BATCH_SIZE, 100))
        generated_images = g.predict(noise, verbose=1)
        image = combine_images(generated_images)
    image = image*127.5+127.5
    Image.fromarray(image.astype(np.uint8)).save(
        "generated_image.png")


def get_args():
    parser = argparse.ArgumentParser()
    parser.add_argument("--mode", type=str)
    parser.add_argument("--batch_size", type=int, default=128)
    parser.add_argument("--nice", dest="nice", action="store_true")
    parser.set_defaults(nice=False)
    args = parser.parse_args()
    return args

if __name__ == "__main__":
    args = get_args()
    if args.mode == "train":
        train(BATCH_SIZE=args.batch_size)
    elif args.mode == "generate":
        generate(BATCH_SIZE=args.batch_size, nice=args.nice)

### 微调 DeepSeek 并实现 RAG #### 定义微调目标和数据准备 为了使 DeepSeek 获得特定领域的知识或新知识,可以通过细调来达成这一目的[^3]。这不仅能够提升模型在特定领域内的表现,还能优化其处理复杂查询的能力。 ```python from datasets import load_dataset, DatasetDict # 加载自定义的数据集并将其分割成训练集和测试集 data_files = {"train": "path_to_train_data", "test": "path_to_test_data"} dataset = load_dataset('csv', data_files=data_files) # 如果需要的话,可以对数据集进行预处理操作 def preprocess_function(examples): return tokenizer(examples['text'], truncation=True) tokenized_datasets = dataset.map(preprocess_function, batched=True) ``` #### 执行微调过程 完成数据准备工作之后,下一步就是设置微调参数,并启动实际的微调流程。此阶段涉及到选择合适的硬件资源、配置超参数以及监控训练进度等方面的工作。 ```python import transformers from transformers import AutoModelForSequenceClassification, TrainingArguments, Trainer model_name = 'deepseek-model-name' model = AutoModelForSequenceClassification.from_pretrained(model_name, num_labels=2) training_args = TrainingArguments( output_dir='./results', evaluation_strategy='epoch', learning_rate=2e-5, per_device_train_batch_size=8, per_device_eval_batch_size=8, num_train_epochs=3, weight_decay=0.01, ) trainer = Trainer( model=model, args=training_args, train_dataset=tokenized_datasets["train"], eval_dataset=tokenized_datasets["test"] ) trainer.train() ``` #### 整合 RAG 技术 当完成了上述微调工作后,接下来要做的便是引入检索增强生成技术(Retrieval-Augmented Generation),即RAG。该方法允许模型利用外部文档库中的信息作为补充材料,在生成回复时提供更加精准的内容支持[^1]。 ```python from langchain.chains.hybrid_search_chain import HybridSearchChain from langchain.vectorstores import FAISS from langchain.embeddings.openai import OpenAIEmbeddings embeddings = OpenAIEmbeddings() vectorstore = FAISS.load_local("faiss_index_directory", embeddings) rag_model = HybridSearchChain.from_llm_and_vectorstore(llm=model, vectorstore=vectorstore) ``` 通过这种方式,不仅可以保持原有优势——如无需频繁更新内部参数即可适应新的知识点;同时也克服了一些局限性,比如减少了对外部数据库依赖所带来的额外开销等问题[^2]。
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值