1、 grep vs awk 效果如下:
3 、awk的精准匹配及函数调用判断添加后的脚本如下:
#!/bin/bash
#======begin 全局变量定义======#
start_n=1
end_n=10
fifthc_id=4721
sixthc_id=1
key_id=282
value_id=2145
pre_fifth_n=555
pre_sixth_n=666
#======end 全局变量定义======#
#======begin 函数定义======#
###文件拆分###
File_split()
{
for i in `seq ${start_n} ${end_n}`
do
if [ -f c${i}.txt ];then
n=`grep -w 第几大分类 c${i}.txt|cut
-d':' -f2`
grep -v -e "五级分类\|属性\|六级分类" c${i}.txt >
${i}.txt
grep
五级分类 c${i}.txt > fifth_classification${i}.txt
if [ $? -eq 1 ];then
echo -e "\033[31;49;1mc${i}分类的五级分类不存在: \033[39;49;0m\n"
rm -f fifth_classification${i}.txt
else
echo ${n}
sed -i "s/^/${n}-/g" fifth_classification${i}.txt
cat fifth_classification${i}.txt >>
fifth_classification.txt
rm -f fifth_classification${i}.txt
fi
grep
六级分类 c${i}.txt > sixth_classification${i}.txt
if [ $? -eq 1 ];then
echo -e "\033[31;49;1mc${i}分类的六级分类不存在: \033[39;49;0m\n"
rm -f sixth_classification${i}.txt
else
sed -i "s/^/${n}-/g" sixth_classification${i}.txt
cat sixth_classification${i}.txt >>
sixth_classification.txt
rm -f sixth_classification${i}.txt
fi
grep
属性 c${i}.txt > attribute${i}.txt
sed -i "s/^/${n}-/g" attribute${i}.txt
cat attribute${i}.txt >>
attribute.txt rm -f attribute${i}.txt
else
echo -e
"\033[31;49;1m第c${i}.txt分类文件不存在!\033[39;39;0m"
echo
"+===================================+"
continue
fi
done
}
###五级分类顺序递增处理###
fifth_classification_processing()
{
n=`grep 五级分类 fifth_classification.txt -c`
awk -F'-' '{print $1,"-"}' fifth_classification.txt | sed "s/
//g" >> start.txt
awk -F'——' '{print $2,"——",$3}'
fifth_classification.txt | sed "s/ //g" >>
end.txt
for i in `seq 1 ${n}`
do
echo 五级分类${i}—— >>
xxm.txt
done
paste start.txt xxm.txt end.txt |sed 's/\t//g' >>
fifth_classification_new.txt
rm -f start.txt xxm.txt end.txt fifth_classification.txt
}
###六级分类顺序递增处理###
sixth_classification_processing()
{
n=`grep 六级分类 sixth_classification.txt -c`
awk -F'-' '{print $1,"-"}' sixth_classification.txt | sed "s/
//g" >> start.txt
awk -F'——' '{print $2,"——",$3}'
sixth_classification.txt | sed "s/ //g" >>
end.txt
for i in `seq 1 ${n}`
do
echo 六级分类${i}—— >>
xxm.txt
done
paste start.txt xxm.txt end.txt |sed 's/\t//g' >>
sixth_classification_new.txt
rm -f start.txt xxm.txt end.txt sixth_classification.txt
}
###属性顺序递增处理###
attribute_processing()
{
n=`grep 属性 attribute.txt
-c`
end_a=$((${key_id}+${n}-1))
if [ ${n} -eq 0 ];then
rm -f attribute.txt
echo -e "\033[31;49;1m有分类的属性值不存在,请检查!!!: \033[39;49;0m\n"
#exit
else
awk -F'-' '{print $1,"-"}' attribute.txt | sed "s/ //g" >>
start.txt
awk -F'——' '{print
$2,"——",$3,"——",$4}' attribute.txt |sed "s/
//g" >> end.txt
for i in `seq ${key_id} ${end_a}`
do
echo 属性${i}—— >>
xxm.txt
done
paste start.txt xxm.txt end.txt |sed 's/\t//g' >>
attribute_new.txt
rm -f start.txt xxm.txt end.txt attribute.txt
fi
}
###文件处理整合###
file_processing()
{
for i in `seq ${start_n} ${end_n}`
do
if [ -f ${i}.txt ] ;then
rm -f ${i}.txt
fi
done
for i in 'fifth_classification_new.txt
attribute_new.txt sixth_classification_new.txt '
do
rm -f ${i}
done
File_split;
n1=`grep 五级分类 fifth_classification.txt -c`
if [ ${n1} -eq 0 ];then
rm -f fifth_classification.txt
echo -e "\033[31;49;1此次处理的分类的五级分类不存在: \033[39;49;0m\n"
else
fifth_classification_processing;
fi
n2=`grep 六级分类 sixth_classification.txt -c`
if [ ${n2} -eq 0 ];then
rm -f sixth_classification.txt
echo -e "\033[31;49;1此次处理的分类的六级分类不存在: \033[39;49;0m\n"
else
sixth_classification_processing;
fi
attribute_processing;
}
###一级分类###
First_classification()
{
CLASSIFY_NAME=`grep -w 一级分类 $1.txt|awk -F'——' '{print $2}'`
echo -e "${n}\t${CLASSIFY_NAME}\t1\t0" >>
TD_CLASSIFY.txt
}
###二级分类###
Second_classification()
{
a=(`grep 二级分类 $1.txt |awk
-F'——' '{print $2}'|sed "s/、/ /g"` )
n1=`echo ${#a[*]}`
for i in `seq -f g 1 ${n1}`
do
j=$((10#${i}-1))
echo -e "${n}${i}\t${a[${j}]}\t2\t${n}" >>
TD_CLASSIFY.txt
done
}
###三级分类###
Third_classification()
{
n1=`grep '三级分类' ${raw_file}.txt|wc -l`
for i in `seq 1 ${n1}`
do
two=`grep -w "三级分类${i}" ${raw_file}.txt|awk
-F'——' '{print $2}' `
PARENT_ID=`awk -v xm=${two} '{if ($2==xm) {print $1}}'
TD_CLASSIFY.txt |grep ^${n}`
a=(`grep -w "三级分类${i}" ${raw_file}.txt|awk
-F'——' '{print $3}' |sed "s/、/ /g"`)
n2=`echo ${#a[*]}`
for j in `seq -f g 1 ${n2}`
do
let n3=10#${j}-1
echo -e "${PARENT_ID}${j}\t${a[${n3}]}\t3\t${PARENT_ID}" >>
TD_CLASSIFY.txt
done
done
}
###四级分类###
Fourth_clssification()
{
n1=`grep '四级分类' ${raw_file}.txt|wc -l`
for i in `seq 1 ${n1}`
do
three=`grep -w "四级分类${i}" ${raw_file}.txt|awk
-F'——' '{print $2}' `
PARENT_ID=`awk -v xm=${three} '{if($2==xm) {print $1}}'
TD_CLASSIFY.txt |grep ^${n}`
a=(`grep -w "四级分类${i}" ${raw_file}.txt|awk
-F'——' '{print $3}' |sed "s/、/ /g"`)
n2=`echo ${#a[*]}`
for j in `seq -f g 1 ${n2}`
do
let n3=10#${j}-1
echo -e "${PARENT_ID}${j}\t${a[${n3}]}\t4\t${PARENT_ID}" >>
TD_CLASSIFY.txt
done
done
}
###五级分类###
Fifth_classification()
{
n1=`grep 五级分类 fifth_classification_new.txt -c`
for i in `seq 1 ${n1}`
do
four=`grep -w "五级分类${i}" fifth_classification_new.txt|awk -F'——' '{print
$2}' `
xxm=`grep -w "五级分类${i}" fifth_classification_new.txt|awk -F'-' '{print $1}'`
PARENT_ID=`awk -v xm=${four} '{if ($2==xm ) {print $1}}'
TD_CLASSIFY.txt |grep ^${xxm}`
for a in `grep -w "五级分类${i}" fifth_classification_new.txt|awk -F'——' '{print
$3}' |sed "s/、/ /g"`
do
echo -e "${pre_fifth_n}`seq -f g ${fifthc_id}
${fifthc_id}`\t${a}\t5\t${PARENT_ID}" >>
TD_CLASSIFY.txt
let fifthc_id=${fifthc_id}+1
done
done
}
###六级分类###
Sixth_classification()
{
n1=`grep 六级分类 sixth_classification_new.txt -c`
for i in `seq 1 ${n1}`
do
five=`grep -w "六级分类${i}" sixth_classification_new.txt|awk -F'——' '{print
$2}' `
xxm=`grep -w "六级分类${i}" sixth_classification_new.txt|awk -F'-' '{print $1}'`
PARENT_ID=`awk -v xm=${five} '{if($2==xm) {print $1}}'
TD_CLASSIFY.txt`
for a in `grep -w "六级分类${i}" sixth_classification_new.txt|awk -F'——' '{print
$3}' |sed "s/、/ /g"`
do
echo -e "${pre_sixth_n}`seq -f g ${sixthc_id}
${sixthc_id}`\t${a}\t6\t${PARENT_ID}" >>
TD_CLASSIFY.txt
let sixthc_id=${sixthc_id}+1
done
done
}
###检查分类是否有重复###
check_classification_repeated()
{
pra=$1
awk '{print $1,$2}' TD_CLASSIFY.txt |sort|uniq -c|grep -v 1 >
/tmp/Repeated_classification_list
if [ $? -eq 0 ];
then
if [ ${pra} -eq ${pre_fifth_n} ];
then
echo -e "\033[31;49;1m五级分类有重复分类,详情如下:
\033[39;49;0m\n"
echo -e "\033[31;49;1m出现次数 分类名称\033[39;49;0m"
cat /tmp/Repeated_classification_list
elif [ ${pra} -eq ${pre_sixth_n} ];then
echo -e "\033[31;49;1m六级分类有重复分类,详情如下: \033[39;49;0m\n"
echo -e "\033[31;49;1m出现次数 分类名称\033[39;49;0m"
cat /tmp/Repeated_classification_list
else
echo -e "\033[31;49;1m第${pra}分类有重复分类,详情如下:
\033[39;49;0m\n"
echo -e "\033[31;49;1m出现次数
分类名称\033[39;49;0m"
cat /tmp/Repeated_classification_list
fi
else
if [ ${pra} -eq ${pre_fifth_n} ];then
echo -e "五级分类没有重复类问题 ^_^\n"
elif [ ${pra} -eq ${pre_sixth_n} ];then
echo -e "六级分类没有重复类问题 ^_^\n"
else
echo -e "第${pra}分类没有重复类问题 ^_^\n"
fi
fi
}
###检查分类是否有父类###
check_classification_parentid()
{
pra=$1
n1=`wc -l TD_CLASSIFY.txt |cut -d' ' -f1`
n2=`awk 'BEGIN {n=0} {if (length($4)!=0) n+=1} END {print n} '
TD_CLASSIFY.txt `
if [ ${n1} -ne ${n2} ];
then
if [ ${pra} -eq ${pre_fifth_n}
];
then
echo -e
"\033[31;49;1m五级分类对应的上联分类不存在\033[39;49;0m"
awk '{if (length($4)==0) {print "分类"$1,$2}}' TD_CLASSIFY.txt
elif [ ${pra} -eq ${pre_sixth_n} ];then
echo -e
"\033[31;49;1m六级分类对应的上联分类不存在\033[39;49;0m"
awk '{if (length($4)==0) {print "分类"$1,$2}}' TD_CLASSIFY.txt
else
echo -e
"\033[31;49;1m第${pra}分类对应的上联分类不存在\033[39;49;0m"
awk '{if (length($4)==0) {print "分类"$1,$2}}'
TD_CLASSIFY.txt
fi
else
if [ ${pra} -eq ${pre_fifth_n}
];
then
echo -e "五级分类对应的上联分类均存在 ^_^\n"
echo
"+===================================+"
elif [ ${pra} -eq
${pre_sixth_n} ];then
echo -e "六级分类对应的上联分类均存在 ^_^\n"
echo "+===================================+"
else
echo -e "第${pra}分类对应的上联分类均存在 ^_^\n"
echo
"+===================================+"
fi
fi
}
###获取属性的KEY值###
get_attribute_key()
{
if [ -f TD_ATTRIBUTE_KEY.txt ] ;then
rm -f TD_ATTRIBUTE_KEY.txt
fi
n1=$((`grep '属性' attribute_new.txt|wc
-l`-1))
a=(`grep "属性" attribute_new.txt | awk
-F'——' '{print
$3}' `)
for i in `seq -f g 0 ${n1}`
do
CLASSIFY_NAME=`grep -w "属性${key_id}" attribute_new.txt|awk
-F'——' '{print $2}'`
xxm=`grep -w "属性${key_id}" attribute_new.txt|awk
-F'-' '{print $1}'`
c1=`awk -v xm=${CLASSIFY_NAME} '{if($2==xm)
{print $1}}' TD_CLASSIFY.txt|wc -l`
if [ ${c1} -ne 1 ] ;then
awk -v xm=${CLASSIFY_NAME} '{if($2==xm) {print
$1}}' TD_CLASSIFY.txt|grep ^${xxm} > /dev/null
if [ $? -eq 1 ];then
CLASSIFY_ID=`awk -v
xm=${CLASSIFY_NAME} '{if($2==xm) {print $1}}' TD_CLASSIFY.txt|grep
^555`
else CLASSIFY_ID=`awk -v xm=${CLASSIFY_NAME} '{if($2==xm) {print $1}}'
TD_CLASSIFY.txt|grep ^${xxm}`
fi
fi
if [ ${c1} -eq 1 ]
;then
CLASSIFY_ID=`awk -v xm=${CLASSIFY_NAME} '{if($2==xm) {print $1}}'
TD_CLASSIFY.txt`
fi
echo -e
"${key_id}\t${a[10#${i}]}\t${CLASSIFY_ID}" >> TD_ATTRIBUTE_KEY.txt
key_id=$((${key_id}+1))
done
}
###获取属性的VALUE值###
get_attribute_value()
{
if [ -f TD_ATTRIBUTE_VALUE.txt ];then
rm -f TD_ATTRIBUTE_VALUE.txt
fi
#n1=`grep '属性' attribute_new.txt|wc -l`
b=`head -1 TD_ATTRIBUTE_KEY.txt |awk '{print $1}'`
e=`tail -1 TD_ATTRIBUTE_KEY.txt |awk '{print $1}'`
cp TD_ATTRIBUTE_KEY.txt xxm_tmp.txt
awk '{print $1,"-"}' xxm_tmp.txt | sed "s/ //g" >
start.txt
awk '{print $2,"_",$3}' xxm_tmp.txt | sed "s/ //g" >
end.txt
paste start.txt end.txt |sed 's/\t//g' > middle.txt
for i in `seq ${b} ${e}`
do
CLASSIFY_NAME=`grep -w "属性${i}" attribute_new.txt|awk
-F'——' '{print $2}'`
KEY_NAME=`grep -w "属性${i}" attribute_new.txt|awk
-F'——' '{print $3}'`
xxm=`grep -w "属性${i}" attribute_new.txt|awk -F'-' '{print $1}'`
c1=`awk -v xm=${CLASSIFY_NAME} '{if($2==xm) {print $1}}'
TD_CLASSIFY.txt|wc -l`
if [ ${c1} -ne 1 ] ;then
awk -v xm=${CLASSIFY_NAME} '{if($2==xm) {print
$1}}' TD_CLASSIFY.txt|grep ^${xxm} > /dev/null
if [ $? -eq 1 ];then
CLASSIFY_ID=`awk -v
xm=${CLASSIFY_NAME} '{if($2==xm) {print $1}}' TD_CLASSIFY.txt|grep
^555`
else
CLASSIFY_ID=`awk -v xm=${CLASSIFY_NAME} '{if($2==xm) {print $1}}'
TD_CLASSIFY.txt|grep ^${xxm}`
fi
fi
if [ ${c1} -eq 1 ]
;then
CLASSIFY_ID=`awk -v xm=${CLASSIFY_NAME} '{if($2==xm) {print $1}}'
TD_CLASSIFY.txt`
fi
combined_index=`echo -e "${KEY_NAME}_${CLASSIFY_ID}"`
KEY_ID=`awk -F'-' -v xm=${combined_index}
'{if($2==xm) {print $1}}' middle.txt`
a=(`grep -w "属性${i}" attribute_new.txt | awk
-F'——' '{print $4}'|sed "s/、/
/g" `)
n2=$((`echo ${#a[*]}`-1))
for tt in `seq 0 ${n2}`
do
echo -e "${value_id}\t${a[10#${tt}]}\t${KEY_ID}" >>
TD_ATTRIBUTE_VALUE.txt
let value_id=${value_id}+1
done
done
rm -f start.txt end.txt xxm_tmp.txt middle.txt
}
###检查属性key对应的上联分类是否存在###
check_attribute_key()
{
n1=`wc -l TD_ATTRIBUTE_KEY.txt |cut -d' ' -f1`
n2=`awk 'BEGIN {n=0} {if (length($3)!=0) n+=1} END {print n}'
TD_ATTRIBUTE_KEY.txt`
if [ ${n1} -ne ${n2} ];then
echo -e "\033[31;49;1m下列属性KEY对应的上联分类不存在 \033[39;49;0m\n"
awk '{if (length($3)==0) {print NR,"属性KEY"$1,$2}}'
TD_ATTRIBUTE_KEY.txt
#exit
else
echo -e "TD_ATTRIBUTE_KEY is ok ^_^\n"
echo "+===================================+"
fi
}
###检查属性value对应的上联属性key是否存在###
check_attribute_value()
{
n1=`wc -l TD_ATTRIBUTE_VALUE.txt |cut -d' ' -f1`
n2=`awk 'BEGIN {n=0} {if (length($3)!=0) n+=1} END {print n}'
TD_ATTRIBUTE_VALUE.txt`
if [ ${n1} -ne ${n2} ];then
echo -e "\033[31;49;1m下列属性VALUE对应的上联属性KEY不存在 \033[39;49;0m\n"
awk '{if (length($3)==0) {print "属性VALUE_ID"$1,$2}}'
TD_ATTRIBUTE_VALUE.txt
exit
else
echo -e "TD_ATTRIBUTE_VALUE is ok ^_^\n"
fi
}
###一级&二级&三级&四级分类###
First_to_Fourth_classification()
{
if [ -f TD_CLASSIFY.txt ];then
rm -f TD_CLASSIFY.txt
fi
for raw_file in `seq ${start_n} ${end_n}`
do
###获得第几个分类###
if [ -f ${raw_file}.txt ];then
n=`grep -w 第几大分类 ${raw_file}.txt|cut -d':' -f2`
First_classification ${raw_file};
Second_classification ${raw_file};
Third_classification ${raw_file};
Fourth_clssification ${raw_file};
check_classification_repeated ${n};
check_classification_parentid ${n};
else
continue
fi
done
}
###单独第五级分类###
single_fifth_classification()
{
if [ -f fifth_classification_new.txt ];then
Fifth_classification;
check_classification_repeated ${pre_fifth_n};
check_classification_parentid ${pre_fifth_n};
fi
}
###单独第六级分类###
single_sixth_classification()
{
if [ -f sixth_classification_new.txt ];then
Sixth_classification;
check_classification_repeated ${pre_sixth_n};
check_classification_parentid ${pre_sixth_n};
fi
}
###属性KEY###
attribute_key()
{
get_attribute_key;
check_attribute_key;
}
###属性VALUE###
attribute_value()
{
get_attribute_value;
check_attribute_value
}
#======end 函数定义======#
#======begin 函数调用======#
file_processing;
First_to_Fourth_classification;
single_fifth_classification;
single_sixth_classification;
attribute_key;
attribute_value
#======end 函数调用======#