背景
提升大模型的图表识别能力,需要大量数据投喂。生成带有标签的随机多样式的数据是关键。我的想法是找一些模板,将其中的value改成随机变量。
方法
import os
import random
output_directory = './gauge_html'
os.makedirs(output_directory, exist_ok=True)
html_template = """
<!DOCTYPE html>
<html lang="en" style="height: 100%">
<head>
<meta charset="utf-8">
</head>
<body style="height: 100%; margin: 0">
<div id="container" style="height: 100%"></div>
<script type="text/javascript" src="https://registry.npmmirror.com/echarts/5.5.0/files/dist/echarts.min.js"></script>
<script type="text/javascript">
var dom = document.getElementById('container');
var myChart = echarts.init(dom, null, {{
renderer: 'canvas',
useDirtyRect: false
}});
var app = {{}};
var option;
option = {{
animation: false,
series: [
{{
type: 'gauge',
max: {max_value},
progress: {{
show: true,
width: {progress_width}
}},
axisLine: {{
lineStyle: {{
width: {axis_line_width},
color: '{axis_line_color}'
}}
}},
axisTick: {{
show: true,
length: {axis_tick_length},
splitNumber: 5
}},
splitLine: {{
length: {split_line_length},
lineStyle: {{
width: {split_line_width},
color: '{split_line_color}'
}}
}},
axisLabel: {{
distance: {axis_label_distance},
color: '{axis_label_color}',
fontSize: {axis_label_font_size}
}},
anchor: {{
show: {show_anchor},
size: {anchor_size},
itemStyle: {{
borderWidth: {anchor_border_width}
}}
}},
title: {{
show: false
}},
detail: {{
show: {show_detail},
valueAnimation: true,
fontSize: {detail_font_size},
offsetCenter: [0, '70%'],
formatter: function (value) {{
return value.toFixed(2);
}}
}},
data: [
{{
value: {value_placeholder},
name: 'speed'
}}
]
}}
]
}};
myChart.setOption(option);
</script>
</body>
</html>
"""
在echats上面找到的模板,将value和其他需要改变的值设置为随机变量,改变随机变量来得到多种样式的html。
def get_random_style(show_detail):
return {
"progress_width": random.choice([15, 20, 25]),
"axis_line_width": random.choice([12, 18, 22]),
"axis_tick_length": random.choice([5, 8, 10]),
"split_line_length": random.choice([10, 15, 20]),
"split_line_width": random.choice([1, 2, 3]),
"axis_label_distance": random.choice([15, 25, 35]),
"axis_label_font_size": random.choice([15, 20, 25]),
"show_anchor": True,
"anchor_size": random.choice([15, 25, 30]),
"anchor_border_width": random.choice([5, 10, 15]),
"show_detail": show_detail,
"detail_font_size": random.choice([60, 70, 80]),
"axis_line_color": '#{0:06x}'.format(random.randint(0, 0xFFFFFF)), # 随机生成十六进制颜色
"split_line_color": '#{0:06x}'.format(random.randint(0, 0xFFFFFF)), # 随机生成十六进制颜色
"axis_label_color": '#{0:06x}'.format(random.randint(0, 0xFFFFFF)) # 随机生成十六进制颜色
}
max_gauge_value = 100
total_files = 200
num_files_with_value = int(total_files * 0.1)
num_files_without_value = total_files - num_files_with_value
def generate_html_files(num_files, show_detail, prefix):
for i in range(1, num_files + 1):
value = round(random.uniform(0, max_gauge_value), 2)
style = get_random_style(show_detail)
file_content = html_template.format(
max_value=max_gauge_value,
progress_width=style["progress_width"],
axis_line_width=style["axis_line_width"],
axis_tick_length=style["axis_tick_length"],
split_line_length=style["split_line_length"],
split_line_width=style["split_line_width"],
axis_label_distance=style["axis_label_distance"],
axis_label_font_size=style["axis_label_font_size"],
show_anchor=str(style["show_anchor"]).lower(),
anchor_size=style["anchor_size"],
anchor_border_width=style["anchor_border_width"],
show_detail=str(style["show_detail"]).lower(),
detail_font_size=style["detail_font_size"],
axis_line_color=style["axis_line_color"],
split_line_color=style["split_line_color"],
axis_label_color=style["axis_label_color"],
value_placeholder=value
)
file_name = f"{prefix}_{i}.html"
file_path = os.path.join(output_directory, file_name)
with open(file_path, "w") as file:
file.write(file_content)
generate_html_files(num_files_with_value, True, "gauge_with_value")
generate_html_files(num_files_without_value, False, "gauge_without_value")
结果
能批量生成,同一类型不同格式的html数据。