QWEN-VL 图文数据标注工具

最新推荐文章于 2024-06-18 14:53:19 发布

zRezin

最新推荐文章于 2024-06-18 14:53:19 发布

阅读量1.2k

点赞数 13

分类专栏：大模型文章标签： json 大模型标注 QWEN-VL数据集

本文链接：https://blog.csdn.net/shuaikang9864/article/details/135170491

版权

大模型专栏收录该内容

3 篇文章 0 订阅

订阅专栏

达摩院发布的QWEN系列大模型确实是不错，我们在基础模型对于工业场景/工业图文场景下的判断测评中超过了miniGPT，visualGLM，Owl-PLUG等一众多模态大模型。选定其为开发基座较好。

github链接：GitHub - QwenLM/Qwen-VL: The official repo of Qwen-VL (通义千问-VL) chat & pretrained large vision language model proposed by Alibaba Cloud.

huggingface:https://huggingface.co/Qwen/Qwen-VL

模型部署部分csdn有，出现各种问题可能是transformers版本问题，这里后面再说。

在github中给出了微调数据集的模板：

[
  {
    "id": "identity_0",
    "conversations": [
      {
        "from": "user",
        "value": "你好"
      },
      {
        "from": "assistant",
        "value": "我是Qwen-VL,一个支持视觉输入的大模型。"
      }
    ]
  },
  {
    "id": "identity_1",
    "conversations": [
      {
        "from": "user",
        "value": "Picture 1: <img>https://qianwen-res.oss-cn-beijing.aliyuncs.com/Qwen-VL/assets/demo.jpeg</img>\n图中的狗是什么品种？"
      },
      {
        "from": "assistant",
        "value": "图中是一只拉布拉多犬。"
      },
      {
        "from": "user",
        "value": "框出图中的格子衬衫"
      },
      {
        "from": "assistant",
        "value": "<ref>格子衬衫</ref><box>(588,499),(725,789)</box>"
      }
    ]
  },
  { 
    "id": "identity_2",
    "conversations": [
      {
        "from": "user",
        "value": "Picture 1: <img>assets/mm_tutorial/Chongqing.jpeg</img>\nPicture 2: <img>assets/mm_tutorial/Beijing.jpeg</img>\n图中都是哪"
      },
      {
        "from": "assistant",
        "value": "第一张图片是重庆的城市天际线，第二张图片是北京的天际线。"
      }
    ]
  }
]

可以选择手撸，为了方便，写了一个简便的标注工具

标注后可以形成按规定的json文件，并把图片也收集到该文件夹中。label.py

import tkinter as tk
from tkinter import filedialog, Label, simpledialog
from PIL import Image, ImageTk
import json
import random
import string
import os
import shutil
import subprocess
import sys

# 创建主窗口
root = tk.Tk()
root.title("Image Viewer with Dialogue")
root.geometry("1000x600")  # 设置窗口默认大小

dialogue_entries = []  # 用来存储动态创建的对话输入框

# 加载并显示图片的函数
def load_and_display_image():
    global image_label
    global image_path
    file_path = filedialog.askopenfilename(filetypes=[("Image files", "*.png;*.jpg;*.jpeg")])
    image_path = file_path
    if file_path:
        image = Image.open(file_path)
        # 根据原图片宽高比确定图像显示区域的宽度
        width = int((400 / image.height) * image.width)
        image = image.resize((width, 400), Image.ANTIALIAS)
        photo = ImageTk.PhotoImage(image)
        
        if 'image_label' in globals():
            image_label.config(image=photo)
        else:
            image_label = tk.Label(image_frame, image=photo)
            image_label.pack(padx=7, pady=7)
        image_label.image = photo  # keep a reference to the image
        # 创建添加对话的按钮
        add_dialogue_button = tk.Button(root, text="Add Dialogue", command=add_dialogue_boxes)
        add_dialogue_button.pack(side="top", padx=(5, 0), pady=(7, 0))

        # 创建保存按钮
        save_button = tk.Button(root, text="Save", command=save_to_json)
        save_button.pack(side="top", pady=(7, 0))

        # 创建文本提示框和输入框
        global input_box_1,input_box_2
        input_box_1 = create_labeled_input(root, "Input with the picture:")
        input_box_2 = create_labeled_input(root, "Assistant:")



# 创建左侧的图片显示框架
image_frame = tk.Frame(root)
image_frame.pack(side="left", anchor="nw", padx=7, pady=7)

# 创建打开图片的按钮
open_button = tk.Button(root, text="Open Image", command=load_and_display_image)
open_button.pack(side="top", pady=(7, 0))

# 创建文本提示框和输入框的函数
def create_labeled_input(parent, label_text):
    label = tk.Label(parent, text=label_text)
    label.pack(side="top")
    entry = tk.Entry(parent, width=50)
    entry.pack(side="top")
    return entry



# 动态添加对话输入框的函数
def add_dialogue_boxes():
    user_entry = create_labeled_input(root, "User:")
    assistant_entry = create_labeled_input(root, "Assistant:")
    dialogue_entries.append((user_entry, assistant_entry))


# 将输入的文本保存为 JSON 文件的函数
def save_to_json():
    save_dir = "saves"

    # 生成一个10位的随机字符串作为文件名
    random_filename = ''.join(random.choices(string.ascii_lowercase + string.digits, k=10))
    
    # 确保save_dir存在
    if not os.path.exists(save_dir):
        os.makedirs(save_dir)

    # 定义JSON文件的完整路径
    json_file_path = os.path.join(save_dir, random_filename + '.json')
    # 定义图片文件的完整路径
    image_file_path = os.path.join(save_dir, random_filename + '.jpg')


    cat1 = "Picture 1: <img>"
    cat2 = random_filename
    cat3 = ".jpg</img>\n"
    cat4 = input_box_1.get()
    cat = cat1+cat2+cat3+cat4
    dialogue_data = {
        
        "conversations": []
    }

    dialogue_data["conversations"].append({
        "from":"user",
        "value": cat
    })
    
    dialogue_data["conversations"].append({
        "from": "assistant",
        "value": input_box_2.get()
    })
    for user_entry, assistant_entry in dialogue_entries:
        dialogue_data["conversations"].append({
            "from": "user",
            "value": user_entry.get()
        })
        dialogue_data["conversations"].append({
            "from": "assistant",
            "assistant": assistant_entry.get()
        })
    
    # 把对话数据保存到JSON文件
    with open(json_file_path, 'w', encoding='utf-8') as json_file:
        json.dump(dialogue_data, json_file, ensure_ascii=False, indent=4)

    # 把图片文件保存到指定的文件夹
    if image_path and os.path.isfile(image_path):
        shutil.copy2(image_path, image_file_path)
    
    
    subprocess.Popen(["python", "test.py"])
    sys.exit()




root.mainloop()

标注后，所有的标注文件和图片在saves文件下，可以在代码中自行更改保存文件夹。

另外需要一个脚本来聚合json文件：merge.py

import os
import json
from collections import OrderedDict
import re

# 刚才标注完的数据目录，建议把merge.py放在其上一级
directory_path = 'saves/'

# 创建一个空列表来存储合并后的内容
merged_data = []

#建议设置成到时候训练用的数据的绝对路径，方便qwen finetune时遍历
image_path_prefix = 'pathtoyourimages/'

# 正则表达式匹配 <img>*.jpg</img>
img_pattern = re.compile(r'<img>(.*?\.jpg)</img>')

# 递归函数来更新图片路径
def update_img_paths(obj):
    if isinstance(obj, OrderedDict):
        for key, value in obj.items():
            if isinstance(value, (OrderedDict, list)):
                update_img_paths(value)
            elif isinstance(value, str):
                obj[key] = img_pattern.sub(
                    f'<img>{image_path_prefix}\\1<img>', value)
    elif isinstance(obj, list):
        for i, item in enumerate(obj):
            if isinstance(item, (OrderedDict, list)):
                update_img_paths(item)
            elif isinstance(item, str):
                obj[i] = img_pattern.sub(
                    f'<img>{image_path_prefix}\\1<img>', item)

# 遍历目录中的所有文件
for filename in os.listdir(directory_path):
    # 检查文件是否是JSON文件
    if filename.endswith('.json'):
        # 构建完整的文件路径
        file_path = os.path.join(directory_path, filename)
        # 打开并读取JSON文件
        with open(file_path, 'r', encoding='utf-8') as file:
            try:
                # 加载JSON内容
                content = json.load(file, object_pairs_hook=OrderedDict)
                # 创建一个新的OrderedDict以将'id'字段放在最前面
                new_content = OrderedDict()
                new_content['id'] = os.path.splitext(filename)[0]
                new_content.update(content)
                update_img_paths(new_content)
                # 将此内容添加到合并后的数据列表
                merged_data.append(new_content)
            except json.JSONDecodeError as e:
                print(f"Error reading {filename}: {e}")

# 指定新JSON文件的名称
output_filename = 'merged_data.json'
# 构建完整的输出文件路径
output_filepath = output_filename

# 写入合并后的数据到新的JSON文件
with open(output_filepath, 'w', encoding='utf-8') as output_file:
    json.dump(merged_data, output_file, ensure_ascii=False, indent=4)

print(f"Merge complete. Combined file created at {output_filepath}")

在以上文件中，建议将label.py和merge.py，saves文件夹放在同一级，输出后的文件merged_data.json也在这一级文件，之后移动图片到训练数据集文件夹，并更改merge.py中的image_path_prefix路径，即可完成所有标注文件储存。