#!/bin/bash
echo "args: lang font ftail filecount; format: lang.font.exp[1-filecount].ftail"
lang=$1
font=$2
ftail=$3
num=$4
trlist=""
boxlist=""
if [ "$font" = "" ]
then
pre="$lang"
else
pre="${lang}.${font}"
fi
for((i=1;i<=num;i++))
do
fn="${pre}.exp${i}.$ftail"
cpre="${pre}.exp${i}"
#tesseract $fn $cpre -l chi_sim batch.nochop makebox #box file;
tesseract $fn $cpre -l chi_sim nobatch box.train #tr file
boxlist="$boxlist ${cpre}.box"
trlist="$trlist ${cpre}.tr"
done
#------------------------------------------------------------------------------------------#多文件和历史文件的处理。 最后是合并生成字库。 上面主要生成box和tr文件。 可以增量、和历史数据混合。
unicharset_extractor $boxlist #unicharset
mftraining -U unicharset -O ${lang}.unicharset $trlist
cntraining $trlist #normproto
mv inttemp ${lang}.inttemp
mv pffmtable ${lang}.pffmtable
mv Microfeat ${lang}.Microfeat
mv normproto ${pre}.normproto
combine_tessdata ${lang}.
/bin/cp -f ${lang}.traineddata /usr/local/share/tessdata/
echo "args: lang font ftail filecount; format: lang.font.exp[1-filecount].ftail"
lang=$1
font=$2
ftail=$3
num=$4
trlist=""
boxlist=""
if [ "$font" = "" ]
then
pre="$lang"
else
pre="${lang}.${font}"
fi
for((i=1;i<=num;i++))
do
fn="${pre}.exp${i}.$ftail"
cpre="${pre}.exp${i}"
#tesseract $fn $cpre -l chi_sim batch.nochop makebox #box file;
tesseract $fn $cpre -l chi_sim nobatch box.train #tr file
boxlist="$boxlist ${cpre}.box"
trlist="$trlist ${cpre}.tr"
done
#------------------------------------------------------------------------------------------#多文件和历史文件的处理。 最后是合并生成字库。 上面主要生成box和tr文件。 可以增量、和历史数据混合。
unicharset_extractor $boxlist #unicharset
mftraining -U unicharset -O ${lang}.unicharset $trlist
cntraining $trlist #normproto
mv inttemp ${lang}.inttemp
mv pffmtable ${lang}.pffmtable
mv Microfeat ${lang}.Microfeat
mv normproto ${pre}.normproto
combine_tessdata ${lang}.
/bin/cp -f ${lang}.traineddata /usr/local/share/tessdata/