Datawhale AI 夏令营 “AIGC”方向 Task3:进阶上分-实战优化

Task3:进阶上分-实战优化

提示:本文为对平台文档的解读注释和补充


代码阅读和理解

提示:使用通义千问辅助注释

下载并安装 ComfyUI

# 导入 pathlib 模块中的 Path 类
from pathlib import Path

# 定义选项字典,并允许用户通过参数设置是否执行某些操作
OPTIONS = {}

# 用户可配置选项
UPDATE_COMFY_UI = True  # 是否更新 ComfyUI
INSTALL_COMFYUI_MANAGER = True  # 是否安装 ComfyUI Manager
INSTALL_KOLORS = True  # 是否安装 KOLORS
INSTALL_CUSTOM_NODES_DEPENDENCIES = True  # 是否安装自定义节点依赖

# 将用户选择的选项保存到 OPTIONS 字典中
OPTIONS['UPDATE_COMFY_UI'] = UPDATE_COMFY_UI
OPTIONS['INSTALL_COMFYUI_MANAGER'] = INSTALL_COMFYUI_MANAGER
OPTIONS['INSTALL_KOLORS'] = INSTALL_KOLORS
OPTIONS['INSTALL_CUSTOM_NODES_DEPENDENCIES'] = INSTALL_CUSTOM_NODES_DEPENDENCIES

# 获取当前工作目录
current_dir = !pwd
# 设置 ComfyUI 的工作空间路径
WORKSPACE = f"{current_dir[0]}/ComfyUI"

# 切换到指定的工作空间目录
%cd /mnt/workspace/

# 如果 ComfyUI 文件夹不存在,则初始化并克隆 ComfyUI 仓库
![ ! -d $WORKSPACE ] && echo -= 初始设置 ComfyUI =- && git clone https://github.com/comfyanonymous/ComfyUI
%cd $WORKSPACE

# 如果用户选择了更新 ComfyUI,则执行 git pull 更新代码
if OPTIONS['UPDATE_COMFY_UI']:
  !echo "-= 更新 ComfyUI =-"
  !git pull

# 如果用户选择了安装 ComfyUI Manager,则进行安装
if OPTIONS['INSTALL_COMFYUI_MANAGER']:
  %cd custom_nodes
  # 如果 ComfyUI-Manager 文件夹不存在,则初始化并克隆 ComfyUI-Manager 仓库
  ![ ! -d ComfyUI-Manager ] && echo -= 初始设置 ComfyUI-Manager =- && git clone https://github.com/ltdrdata/ComfyUI-Manager
  %cd ComfyUI-Manager
  # 更新 ComfyUI-Manager
  !git pull

# 如果用户选择了安装 KOLORS,则进行安装
if OPTIONS['INSTALL_KOLORS']:
  %cd ../
  # 如果 ComfyUI-KwaiKolorsWrapper 文件夹不存在,则初始化并克隆 KwaiKolorsWrapper 仓库
  ![ ! -d ComfyUI-KwaiKolorsWrapper ] && echo -= 初始设置 KOLORS =- && git clone https://github.com/kijai/ComfyUI-KwaiKolorsWrapper.git
  %cd ComfyUI-KwaiKolorsWrapper
  # 更新 KwaiKolorsWrapper
  !git pull

# 返回到 ComfyUI 主目录
%cd $WORKSPACE

# 如果用户选择了安装自定义节点依赖,则执行安装脚本
if OPTIONS['INSTALL_CUSTOM_NODES_DEPENDENCIES']:
  # 输出当前工作目录
  !pwd
  # 安装自定义节点依赖
  !echo "-= 安装自定义节点依赖 =-"
  # 运行安装依赖的 Python 脚本(如果文件不存在则运行)
  ![ -f "custom_nodes/ComfyUI-Manager/scripts/colab-dependencies.py" ] && python "custom_nodes/ComfyUI-Manager/scripts/colab-dependencies.py"

# 下载 cloudflared 工具
!wget "https://modelscope.oss-cn-beijing.aliyuncs.com/resource/cloudflared-linux-amd64.deb"
# 安装下载的 deb 包
!dpkg -i cloudflared-linux-amd64.deb

下载模型

# 导入必要的模块
from pathlib import Path

# 定义选项字典
OPTIONS = {}

# 下载 unet 部分的模型文件
!wget -c "https://modelscope.cn/models/Kwai-Kolors/Kolors/resolve/master/unet/diffusion_pytorch_model.fp16.safetensors" -P ./models/diffusers/Kolors/unet/
!wget -c "https://modelscope.cn/models/Kwai-Kolors/Kolors/resolve/master/unet/config.json" -P ./models/diffusers/Kolors/unet/

# 下载 encoder 部分的模型文件
!modelscope download --model=ZhipuAI/chatglm3-6b-base --local_dir ./models/diffusers/Kolors/text_encoder/

# 下载 VAE (变分自动编码器) 部分的模型文件
!wget -c "https://modelscope.cn/models/AI-ModelScope/sdxl-vae-fp16-fix/resolve/master/sdxl.vae.safetensors" -P ./models/vae/

# 下载 scheduler (调度器) 配置文件
!wget -c "https://modelscope.cn/models/Kwai-Kolors/Kolors/resolve/master/scheduler/scheduler_config.json" -P ./models/diffusers/Kolors/scheduler/

# 下载 model index (模型索引) 文件
!wget -c "https://modelscope.cn/models/Kwai-Kolors/Kolors/resolve/master/model_index.json" -P ./models/diffusers/Kolors/

安装 LoRA 节点

# 定义 LoadKolorsLoRA 类,用于加载 LoRA 权重到 Kolors 模型
lora_node = """
import torch
from peft import LoraConfig, inject_adapter_in_model

class LoadKolorsLoRA:
    # 定义节点的输入类型
    @classmethod
    def INPUT_TYPES(s):
        return {
            "required": {
                "kolors_model": ("KOLORSMODEL", ),  # 输入的 Kolors 模型
                "lora_path": ("STRING", {"multiline": False, "default": "",}),  # LoRA 权重文件路径
                "lora_alpha": ("FLOAT", {"default": 2.0, "min": 0.0, "max": 4.0, "step": 0.01}),  # LoRA 的 alpha 参数
            },
        }

    # 定义节点的返回类型
    RETURN_TYPES = ("KOLORSMODEL",)
    RETURN_NAMES = ("kolors_model",)
    FUNCTION = "add_lora"  # 节点的主要功能函数
    CATEGORY = "KwaiKolorsWrapper"  # 节点的分类标签

    # 定义转换状态字典的方法
    def convert_state_dict(self, state_dict):
        # 定义需要转换的前缀和后缀映射
        prefix_rename_dict = {
            "blocks.7.transformer_blocks": "down_blocks.1.attentions.0.transformer_blocks",
            "blocks.10.transformer_blocks": "down_blocks.1.attentions.1.transformer_blocks",
            # ... 其他映射
            "blocks.41.transformer_blocks": "up_blocks.1.attentions.2.transformer_blocks",
        }
        suffix_rename_dict = {
            ".to_out.lora_A.default.weight": ".to_out.0.lora_A.default.weight",
            ".to_out.lora_B.default.weight": ".to_out.0.lora_B.default.weight",
        }
        state_dict_ = {}  # 新的状态字典
        for name, param in state_dict.items():
            # 重命名键名
            for prefix in prefix_rename_dict:
                if name.startswith(prefix):
                    name = name.replace(prefix, prefix_rename_dict[prefix])
            for suffix in suffix_rename_dict:
                if name.endswith(suffix):
                    name = name.replace(suffix, suffix_rename_dict[suffix])
            state_dict_[name] = param  # 将重命名后的键值对添加到新的状态字典中
        # 获取 LoRA 的秩
        lora_rank = state_dict_["up_blocks.1.attentions.2.transformer_blocks.1.attn2.to_q.lora_A.default.weight"].shape[0]
        return state_dict_, lora_rank

    # 加载 LoRA 的方法
    def load_lora(self, model, lora_rank, lora_alpha, state_dict):
        # 定义 LoRA 配置
        lora_config = LoraConfig(
            r=lora_rank,
            lora_alpha=lora_alpha,
            init_lora_weights="gaussian",
            target_modules=["to_q", "to_k", "to_v", "to_out.0"],
        )
        # 注入 LoRA 适配器到模型中
        model = inject_adapter_in_model(lora_config, model)
        # 加载 LoRA 权重
        model.load_state_dict(state_dict, strict=False)
        return model

    # 主要的功能函数
    def add_lora(self, kolors_model, lora_path, lora_alpha):
        # 加载 LoRA 权重
        state_dict = torch.load(lora_path, map_location="cpu")
        # 转换状态字典
        state_dict, lora_rank = self.convert_state_dict(state_dict)
        # 应用 LoRA 到 U-Net
        kolors_model["pipeline"].unet = self.load_lora(kolors_model["pipeline"].unet, lora_rank, lora_alpha, state_dict)
        return (kolors_model,)

# 注册节点类和显示名称
NODE_CLASS_MAPPINGS = {
    "LoadKolorsLoRA": LoadKolorsLoRA,
}
NODE_DISPLAY_NAME_MAPPINGS = {
    "LoadKolorsLoRA": "Load Kolors LoRA",
}

# 定义导出的变量
__all__ = ["NODE_CLASS_MAPPINGS", "NODE_DISPLAY_NAME_MAPPINGS"]
""".strip()

# 创建自定义节点目录(如果不存在)
os.makedirs("/mnt/workspace/ComfyUI/custom_nodes/ComfyUI-LoRA", exist_ok=True)

# 写入自定义节点的 Python 文件
with open("/mnt/workspace/ComfyUI/custom_nodes/ComfyUI-LoRA/__init__.py", "w", encoding="utf-8") as f:
    f.write(lora_node)

启动 ComfyUI

# 切换到 ComfyUI 的工作目录
%cd /mnt/workspace/ComfyUI

# 导入必要的模块
import subprocess
import threading
import time
import socket

# 定义一个函数来启动 cloudflared 并监听 ComfyUI 服务是否准备好
def iframe_thread(port):
    # 循环检查 ComfyUI 服务是否已经启动
    while True:
        time.sleep(0.5)
        # 创建一个 TCP 套接字
        sock = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
        # 尝试连接到 ComfyUI 服务
        result = sock.connect_ex(('127.0.0.1', port))
        # 如果连接成功,说明服务已经启动
        if result == 0:
            break
        # 关闭套接字
        sock.close()
    
    # 输出信息表明 ComfyUI 服务已启动
    print("\nComfyUI 完成加载,尝试启动 cloudflared (如果卡在这里,可能是 cloudflared 出现问题)\n")
    
    # 启动 cloudflared,创建一个隧道以从外部网络访问 ComfyUI
    p = subprocess.Popen(["cloudflared", "tunnel", "--url", f"http://127.0.0.1:{port}"], stdout=subprocess.PIPE, stderr=subprocess.PIPE)
    
    # 读取 cloudflared 的标准错误输出(其中包含隧道 URL)
    for line in p.stderr:
        # 解码每行输出
        l = line.decode()
        # 检查是否有隧道 URL
        if "trycloudflare.com " in l:
            # 打印隧道 URL
            print("这是访问 ComfyUI 的 URL:", l[l.find("http"):], end='')
        # 可选地打印每行输出
        # print(l, end='')

# 创建一个后台线程来运行 iframe_thread 函数
threading.Thread(target=iframe_thread, daemon=True, args=(8188,)).start()

# 使用 nohup 命令启动 ComfyUI 服务,但不打印服务器信息
!python main.py --dont-print-server

尝试ComfyUI工作流

ComfyUI工作流是一个基于节点的图形用户界面(GUI),专门为Stable Diffusion设计,用于创建复杂的图像生成工作流程。在这个系统中,用户可以通过拖拽和连接不同的节点来构建从加载模型到生成图像的整个流程。

我将Task2里使用通义千问来生成的提示词转变为了英文,并使用ComfyUI进行了生成图片参数的调整。

对于其参数,我也使用了通义千问进行辅助学习,得知参数分别含义如下:

Width & Height: 定义生成图像的分辨率。
Seed: 随机种子,用于保证每次生成的图像都是可复现的。给定相同的种子,生成的图像应该是相同的。
Control After Generate: 这个选项可能指定了在生成图像之后应用的控制策略。“fixed” 表示保持固定的控制信号,即在整个生成过程中保持不变。
Steps: 采样过程中的迭代次数。
cfg: 条件引导力度(Conditioning Guidance Scale)。较大的值会使生成的图像更接近输入的提示,较小的值则可能导致图像偏离提示。
Scheduler: 采样调度器。此次选择了 “EulerDiscreteScheduler”,这是用于控制采样过程的一种算法。其他可能的选择还包括 “DDIM”、“PNDM” 等。
Denoise Strength: 去噪强度。数值越高,生成的图像越清晰,但可能丢失一些噪声带来的细节。
上图为我设置的参数

下面是我按照他生成结果,整理提示词生成的图片,故事剧情同上集:
prompt:

Anime, reality, whole body, medieval style, yellow eyes, a young woman with long golden hair wearing red and white armor, holding a long sword, stood in the center of the cavalry in front of the castle. She was holding a long sword to celebrate joining the cavalry, full of confidence and determination |
Anime, reality, whole body, medieval style, yellow eyes, a young woman with long blond hair wearing red and white armor, riding on a horse, holding a sword, followed the cavalry to the Devil Camp, with a desolate ruins and a gloomy castle in the background  |
Anime, reality, whole body, side, medieval style, yellow eyes, long blond hair, young women wearing red and white armor, standing on the battlefield with a long sword and cutting at the enemy, surrounded by defeated devil soldiers, her expression shows the ease and pride after victory |
Anime, reality, whole body, side, medieval style, yellow eyes, long blond hair young women wearing red and white armor, girls holding long swords stood vigilantly in the palace of the devil boss, standing side by side with the devil boss, facing each other. The demon boss is burly, with dark red skin and sharp horns, eyes emitting a dark green glow, wearing heavy black armor, and holding a huge axe. Both of them are ready to go to war |
Anime, reality, whole body, side, medieval style, yellow eyes, long blond hair girl wearing red and white armor, the girl holding a sword and the devil boss in the throne room fierce battle, the two people fight together, the flame is fierce, the expression is tense and focused, the battle is fierce. The demon boss is burly, with dark red skin and sharp horns, eyes emitting a dark green glow, wearing heavy black armor, and holding a huge axe |
Anime, reality, whole body, side, medieval style, yellow eyes, blond hair girl wearing red and white armor, girl lying on the floor of the throne room devil boss, expression pain, surrounded by traces of battle, devil boss stood opposite to celebrate. The demon boss is burly, with dark red skin and sharp horns, eyes emitting a dark green glow, wearing heavy black armor, and holding a huge axe |
Anime, reality, whole body, future technology style, golden eyes, blonde girl with long hair, wearing high-tech brain computer interface helmet, leaning against the bed in the hospital ward, touching the helmet, looking confused, recalling the previous battle |
The anime, reality and perspective come from the outside of the laboratory through the glass window. The future scientific and technological style, yellow eyes and scientific and technological style, the blonde girl with long hair wearing a high-tech brain computer interface helmet leans against the bed in the hospital ward and is observed by the laboratory staff through the glass window. The laboratory staff wear white coats and record data, with focused and professional expressions

negative prompt

baddream,EasyNegative,verybadimagenegative vl.3, censored,lowres,bad anatomy,bad hands, text,error,missing fingers,extradigit,fewer digits,cropped,worst quality, low quality,signature,watermark,username,blurry,missing arms, long neck,humpbacked, bad feet

在这里插入图片描述
在这里插入图片描述
在这里插入图片描述
在这里插入图片描述
在这里插入图片描述
在这里插入图片描述
在这里插入图片描述

在这里插入图片描述

可以看到,生成的图像在风格上整体算比较符合提示词的要求,但连贯性很差,且对两个人物(及多个人物)理解有很大问题。这方面的改进我认为可以使用更多保护连贯故事的图片以及多人物的图片数据集进行模型的训练,这也是我后续改进的方向。


总结

本人在此次活动Task3完成了对ComfyUI的了解和尝试,后续会继续从模型调参与改进数据集,提示词,等方面进行学习和优化。
还是推荐给大家一个提示词网站:
http://www.atoolbox.net/Tool.php?Id=1101

  • 16
    点赞
  • 12
    收藏
    觉得还不错? 一键收藏
  • 0
    评论
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值