赛事任务:
1.参赛者需在可图Kolors 模型的基础上训练LoRA 模型,生成无限风格,如水墨画风格、水彩风格、赛博朋克风格、日漫风格......
2.基于LoRA模型生成 8 张图片组成连贯故事,故事内容可自定义
使用ComfyUI,其通过模块化的设计,把图像生成的过程分解成了许多小的步骤,每个步骤都是一个节点。这些节点可以连接起来形成一个工作流程,这样就可以根据需要定制自己的图像,流程可视化且操作方便。
核心模块由模型加载器、提示词管理器、采样器、解码器组成
选择模型,构造工作流调整参数后即可执行生成图像
import subprocess
import threading
import time
import socket
import urllib.request
def iframe_thread(port):
while True:
time.sleep(0.5)
sock = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
result = sock.connect_ex(('127.0.0.1', port))
if result == 0:
break
sock.close()
print("\nComfyUI finished loading, trying to launch cloudflared (if it gets stuck here cloudflared is having issues)\n")
p = subprocess.Popen(["cloudflared", "tunnel", "--url", "http://127.0.0.1:{}".format(port)], stdout=subprocess.PIPE, stderr=subprocess.PIPE)
for line in p.stderr:
l = line.decode()
if "trycloudflare.com " in l:
print("This is the URL to access ComfyUI:", l[l.find("http"):], end='')
#print(l, end='')
threading.Thread(target=iframe_thread, daemon=True, args=(8188,)).start()
不断地尝试连接本地服务器 (127.0.0.1
) 上的指定端口(这里是8188
),每次尝试之间会等待0.5秒。通过 socket.connect_ex()
方法,判断端口是否开放。如果端口开放(返回值为0),循环停止,继续执行后续代码。如果端口未开放,关闭套接字并继续循环。
通过 subprocess.Popen()
启动 cloudflared
隧道,连接到指定的本地端口,并重定向输出和错误流。读取隧道启动过程中的标准错误输出 stderr
,如果在输出中找到包含 trycloudflare.com
的URL,就将该URL打印到控制台,用于访问 ComfyUI
。
import comfy.options
comfy.options.enable_args_parsing()
import os
import importlib.util
import folder_paths
import time
from comfy.cli_args import args
def execute_prestartup_script():
def execute_script(script_path):
module_name = os.path.splitext(script_path)[0]
try:
spec = importlib.util.spec_from_file_location(module_name, script_path)
module = importlib.util.module_from_spec(spec)
spec.loader.exec_module(module)
return True
except Exception as e:
print(f"Failed to execute startup-script: {script_path} / {e}")
return False
if args.disable_all_custom_nodes:
return
node_paths = folder_paths.get_folder_paths("custom_nodes")
for custom_node_path in node_paths:
possible_modules = os.listdir(custom_node_path)
node_prestartup_times = []
for possible_module in possible_modules:
module_path = os.path.join(custom_node_path, possible_module)
if os.path.isfile(module_path) or module_path.endswith(".disabled") or module_path == "__pycache__":
continue
script_path = os.path.join(module_path, "prestartup_script.py")
if os.path.exists(script_path):
time_before = time.perf_counter()
success = execute_script(script_path)
node_prestartup_times.append((time.perf_counter() - time_before, module_path, success))
if len(node_prestartup_times) > 0:
print("\nPrestartup times for custom nodes:")
for n in sorted(node_prestartup_times):
if n[2]:
import_message = ""
else:
import_message = " (PRESTARTUP FAILED)"
print("{:6.1f} seconds{}:".format(n[0], import_message), n[1])
print()
execute_prestartup_script()
# Main code
import asyncio
import itertools
import shutil
import threading
import gc
import logging
if os.name == "nt":
logging.getLogger("xformers").addFilter(lambda record: 'A matching Triton is not available' not in record.getMessage())
if __name__ == "__main__":
if args.cuda_device is not None:
os.environ['CUDA_VISIBLE_DEVICES'] = str(args.cuda_device)
logging.info("Set cuda device to: {}".format(args.cuda_device))
if args.deterministic:
if 'CUBLAS_WORKSPACE_CONFIG' not in os.environ:
os.environ['CUBLAS_WORKSPACE_CONFIG'] = ":4096:8"
import cuda_malloc
if args.windows_standalone_build:
try:
import fix_torch
except:
pass
import comfy.utils
import yaml
import execution
import server
from server import BinaryEventTypes
import nodes
import comfy.model_management
def cuda_malloc_warning():
device = comfy.model_management.get_torch_device()
device_name = comfy.model_management.get_torch_device_name(device)
cuda_malloc_warning = False
if "cudaMallocAsync" in device_name:
for b in cuda_malloc.blacklist:
if b in device_name:
cuda_malloc_warning = True
if cuda_malloc_warning:
logging.warning("\nWARNING: this card most likely does not support cuda-malloc, if you get \"CUDA error\" please run ComfyUI with: --disable-cuda-malloc\n")
def prompt_worker(q, server):
e = execution.PromptExecutor(server, lru_size=args.cache_lru)
last_gc_collect = 0
need_gc = False
gc_collect_interval = 10.0
while True:
timeout = 1000.0
if need_gc:
timeout = max(gc_collect_interval - (current_time - last_gc_collect), 0.0)
queue_item = q.get(timeout=timeout)
if queue_item is not None:
item, item_id = queue_item
execution_start_time = time.perf_counter()
prompt_id = item[1]
server.last_prompt_id = prompt_id
e.execute(item[2], prompt_id, item[3], item[4])
need_gc = True
q.task_done(item_id,
e.history_result,
status=execution.PromptQueue.ExecutionStatus(
status_str='success' if e.success else 'error',
completed=e.success,
messages=e.status_messages))
if server.client_id is not None:
server.send_sync("executing", { "node": None, "prompt_id": prompt_id }, server.client_id)
current_time = time.perf_counter()
execution_time = current_time - execution_start_time
logging.info("Prompt executed in {:.2f} seconds".format(execution_time))
flags = q.get_flags()
free_memory = flags.get("free_memory", False)
if flags.get("unload_models", free_memory):
comfy.model_management.unload_all_models()
need_gc = True
last_gc_collect = 0
if free_memory:
e.reset()
need_gc = True
last_gc_collect = 0
if need_gc:
current_time = time.perf_counter()
if (current_time - last_gc_collect) > gc_collect_interval:
comfy.model_management.cleanup_models()
gc.collect()
comfy.model_management.soft_empty_cache()
last_gc_collect = current_time
need_gc = False
async def run(server, address='', port=8188, verbose=True, call_on_start=None):
await asyncio.gather(server.start(address, port, verbose, call_on_start), server.publish_loop())
def hijack_progress(server):
def hook(value, total, preview_image):
comfy.model_management.throw_exception_if_processing_interrupted()
progress = {"value": value, "max": total, "prompt_id": server.last_prompt_id, "node": server.last_node_id}
server.send_sync("progress", progress, server.client_id)
if preview_image is not None:
server.send_sync(BinaryEventTypes.UNENCODED_PREVIEW_IMAGE, preview_image, server.client_id)
comfy.utils.set_progress_bar_global_hook(hook)
def cleanup_temp():
temp_dir = folder_paths.get_temp_directory()
if os.path.exists(temp_dir):
shutil.rmtree(temp_dir, ignore_errors=True)
def load_extra_path_config(yaml_path):
with open(yaml_path, 'r') as stream:
config = yaml.safe_load(stream)
for c in config:
conf = config[c]
if conf is None:
continue
base_path = None
if "base_path" in conf:
base_path = conf.pop("base_path")
for x in conf:
for y in conf[x].split("\n"):
if len(y) == 0:
continue
full_path = y
if base_path is not None:
full_path = os.path.join(base_path, full_path)
logging.info("Adding extra search path {} {}".format(x, full_path))
folder_paths.add_model_folder_path(x, full_path)
if __name__ == "__main__":
if args.temp_directory:
temp_dir = os.path.join(os.path.abspath(args.temp_directory), "temp")
logging.info(f"Setting temp directory to: {temp_dir}")
folder_paths.set_temp_directory(temp_dir)
cleanup_temp()
if args.windows_standalone_build:
try:
import new_updater
new_updater.update_windows_updater()
except:
pass
loop = asyncio.new_event_loop()
asyncio.set_event_loop(loop)
server = server.PromptServer(loop)
q = execution.PromptQueue(server)
extra_model_paths_config_path = os.path.join(os.path.dirname(os.path.realpath(__file__)), "extra_model_paths.yaml")
if os.path.isfile(extra_model_paths_config_path):
load_extra_path_config(extra_model_paths_config_path)
if args.extra_model_paths_config:
for config_path in itertools.chain(*args.extra_model_paths_config):
load_extra_path_config(config_path)
nodes.init_extra_nodes(init_custom_nodes=not args.disable_all_custom_nodes)
cuda_malloc_warning()
server.add_routes()
hijack_progress(server)
threading.Thread(target=prompt_worker, daemon=True, args=(q, server,)).start()
if args.output_directory:
output_dir = os.path.abspath(args.output_directory)
logging.info(f"Setting output directory to: {output_dir}")
folder_paths.set_output_directory(output_dir)
#These are the default folders that checkpoints, clip and vae models will be saved to when using CheckpointSave, etc.. nodes
folder_paths.add_model_folder_path("checkpoints", os.path.join(folder_paths.get_output_directory(), "checkpoints"))
folder_paths.add_model_folder_path("clip", os.path.join(folder_paths.get_output_directory(), "clip"))
folder_paths.add_model_folder_path("vae", os.path.join(folder_paths.get_output_directory(), "vae"))
if args.input_directory:
input_dir = os.path.abspath(args.input_directory)
logging.info(f"Setting input directory to: {input_dir}")
folder_paths.set_input_directory(input_dir)
if args.quick_test_for_ci:
exit(0)
call_on_start = None
if args.auto_launch:
def startup_server(scheme, address, port):
import webbrowser
if os.name == 'nt' and address == '0.0.0.0':
address = '127.0.0.1'
webbrowser.open(f"{scheme}://{address}:{port}")
call_on_start = startup_server
try:
loop.run_until_complete(server.setup())
loop.run_until_complete(run(server, address=args.listen, port=args.port, verbose=not args.dont_print_server, call_on_start=call_on_start))
except KeyboardInterrupt:
logging.info("\nStopped server")
cleanup_temp()
启动和管理ComfyUI服务器。它通过多线程、异步编程和动态模块加载的方式,实现了对任务的高效调度和资源管理(如 CUDA 设备和内存的优化使用)。同时,还支持加载自定义节点和扩展配置,是一个高度可配置和扩展的系统。
主要工作流程为:
1.初始化临时目录
2.独立构建更新检查
3. 实例化服务器对象 server.PromptServer
,并为其传入事件循环。实例化任务队列 execution.PromptQueue
,用于管理和调度任务。
4. 加载模型路径配置
5. 初始化自定义节点
6. CUDA 内存分配检查和报警
7. 添加服务器路由和进度钩子,设置全局进度条钩子 hijack_progress
,以便在任务执行时能够实时更新进度信息给客户端。。并启动任务执行线程,这使得任务处理与主事件循环并行进行。
8. 自动启动服务器(在服务器启动后自动在浏览器中打开服务器地址)再启动事件循环并运行服务器,使用 asyncio
的事件循环 loop
运行服务器,监听指定地址和端口,处理客户端请求。
LoRA(Low-Rank Adaptation)是一种用于神经网络模型参数高效调优的方法,特别适用于在预训练模型(如大型语言模型、视觉模型等)上进行微调的场景。它的核心思想是通过低秩矩阵分解来减少参数更新的复杂性,从而降低计算成本和内存需求。
LoRA 原理
LoRA 的基本思想是将神经网络中的部分权重矩阵进行低秩分解,将原始的权重矩阵W 表示为两个低秩矩阵 A和B 的乘积: W′=W+ΔW=W+A×B其中:
- W是预训练模型的权重矩阵,保持不变。
- ΔW是通过 A×B计算得到的低秩近似更新矩阵。
- AAA 和 BBB 是低秩矩阵,通常 A的维度是 (d,r)而 BBB 的维度是(r,k),其中 r是秩远小于d 和 k的值。
通过这种方式,LoRA 只需更新低秩矩阵 A和 B,而不需要更新完整的权重矩阵W,这显著减少了需要调优的参数量。
LoRA 的优势
-
参数效率高:LoRA 通过低秩分解大大减少了需要更新的参数数量,降低了计算和存储成本。
-
快速微调:由于参数量大幅减少,模型微调速度更快,适合资源受限的场景。
-
适应性强:LoRA 能够在不同任务中有效地微调预训练模型,而不需要重新训练整个模型。
-
节省内存:LoRA 只需存储低秩矩阵 A和 B的权重,相比于更新整个权重矩阵所需的内存开销更低。
-
可扩展性:LoRA 可以在各种预训练模型上应用,包括语言模型、视觉模型等。