linux awk高级脚本,shell脚本——awk的精准匹配强于grep -w

1、 grep vs awk 效果如下:

a4c26d1e5885305701be709a3d33442f.png

3 、awk的精准匹配及函数调用判断添加后的脚本如下:

#!/bin/bash

#======begin 全局变量定义======#

start_n=1

end_n=10

fifthc_id=4721

sixthc_id=1

key_id=282

value_id=2145

pre_fifth_n=555

pre_sixth_n=666

#======end 全局变量定义======#

#======begin 函数定义======#

###文件拆分###

File_split()

{

for i in `seq ${start_n} ${end_n}`

do

if [ -f c${i}.txt ];then

n=`grep -w 第几大分类 c${i}.txt|cut

-d':' -f2`

grep -v -e "五级分类\|属性\|六级分类" c${i}.txt >

${i}.txt

grep

五级分类 c${i}.txt > fifth_classification${i}.txt

if [ $? -eq 1 ];then

echo -e "\033[31;49;1mc${i}分类的五级分类不存在: \033[39;49;0m\n"

rm -f fifth_classification${i}.txt

else

echo ${n}

sed -i "s/^/${n}-/g" fifth_classification${i}.txt

cat fifth_classification${i}.txt >>

fifth_classification.txt

rm -f fifth_classification${i}.txt

fi

grep

六级分类 c${i}.txt > sixth_classification${i}.txt

if [ $? -eq 1 ];then

echo -e "\033[31;49;1mc${i}分类的六级分类不存在: \033[39;49;0m\n"

rm -f sixth_classification${i}.txt

else

sed -i "s/^/${n}-/g" sixth_classification${i}.txt

cat sixth_classification${i}.txt >>

sixth_classification.txt

rm -f sixth_classification${i}.txt

fi

grep

属性 c${i}.txt > attribute${i}.txt

sed -i "s/^/${n}-/g" attribute${i}.txt

cat attribute${i}.txt >>

attribute.txt rm -f attribute${i}.txt

else

echo -e

"\033[31;49;1m第c${i}.txt分类文件不存在!\033[39;39;0m"

echo

"+===================================+"

continue

fi

done

}

###五级分类顺序递增处理###

fifth_classification_processing()

{

n=`grep 五级分类 fifth_classification.txt -c`

awk -F'-' '{print $1,"-"}' fifth_classification.txt | sed "s/

//g" >> start.txt

awk -F'——' '{print $2,"——",$3}'

fifth_classification.txt | sed "s/ //g" >>

end.txt

for i in `seq 1 ${n}`

do

echo 五级分类${i}—— >>

xxm.txt

done

paste start.txt xxm.txt end.txt |sed 's/\t//g' >>

fifth_classification_new.txt

rm -f start.txt xxm.txt end.txt fifth_classification.txt

}

###六级分类顺序递增处理###

sixth_classification_processing()

{

n=`grep 六级分类 sixth_classification.txt -c`

awk -F'-' '{print $1,"-"}' sixth_classification.txt | sed "s/

//g" >> start.txt

awk -F'——' '{print $2,"——",$3}'

sixth_classification.txt | sed "s/ //g" >>

end.txt

for i in `seq 1 ${n}`

do

echo 六级分类${i}—— >>

xxm.txt

done

paste start.txt xxm.txt end.txt |sed 's/\t//g' >>

sixth_classification_new.txt

rm -f start.txt xxm.txt end.txt sixth_classification.txt

}

###属性顺序递增处理###

attribute_processing()

{

n=`grep 属性 attribute.txt

-c`

end_a=$((${key_id}+${n}-1))

if [ ${n} -eq 0 ];then

rm -f attribute.txt

echo -e "\033[31;49;1m有分类的属性值不存在,请检查!!!: \033[39;49;0m\n"

#exit

else

awk -F'-' '{print $1,"-"}' attribute.txt | sed "s/ //g" >>

start.txt

awk -F'——' '{print

$2,"——",$3,"——",$4}' attribute.txt |sed "s/

//g" >> end.txt

for i in `seq ${key_id} ${end_a}`

do

echo 属性${i}—— >>

xxm.txt

done

paste start.txt xxm.txt end.txt |sed 's/\t//g' >>

attribute_new.txt

rm -f start.txt xxm.txt end.txt attribute.txt

fi

}

###文件处理整合###

file_processing()

{

for i in `seq ${start_n} ${end_n}`

do

if [ -f ${i}.txt ] ;then

rm -f ${i}.txt

fi

done

for i in 'fifth_classification_new.txt

attribute_new.txt sixth_classification_new.txt '

do

rm -f ${i}

done

File_split;

n1=`grep 五级分类 fifth_classification.txt -c`

if [ ${n1} -eq 0 ];then

rm -f fifth_classification.txt

echo -e "\033[31;49;1此次处理的分类的五级分类不存在: \033[39;49;0m\n"

else

fifth_classification_processing;

fi

n2=`grep 六级分类 sixth_classification.txt -c`

if [ ${n2} -eq 0 ];then

rm -f sixth_classification.txt

echo -e "\033[31;49;1此次处理的分类的六级分类不存在: \033[39;49;0m\n"

else

sixth_classification_processing;

fi

attribute_processing;

}

###一级分类###

First_classification()

{

CLASSIFY_NAME=`grep -w 一级分类 $1.txt|awk -F'——' '{print $2}'`

echo -e "${n}\t${CLASSIFY_NAME}\t1\t0" >>

TD_CLASSIFY.txt

}

###二级分类###

Second_classification()

{

a=(`grep 二级分类 $1.txt |awk

-F'——' '{print $2}'|sed "s/、/ /g"` )

n1=`echo ${#a[*]}`

for i in `seq -f g 1 ${n1}`

do

j=$((10#${i}-1))

echo -e "${n}${i}\t${a[${j}]}\t2\t${n}" >>

TD_CLASSIFY.txt

done

}

###三级分类###

Third_classification()

{

n1=`grep '三级分类' ${raw_file}.txt|wc -l`

for i in `seq 1 ${n1}`

do

two=`grep -w "三级分类${i}" ${raw_file}.txt|awk

-F'——' '{print $2}' `

PARENT_ID=`awk -v xm=${two} '{if ($2==xm) {print $1}}'

TD_CLASSIFY.txt |grep ^${n}`

a=(`grep -w "三级分类${i}" ${raw_file}.txt|awk

-F'——' '{print $3}' |sed "s/、/ /g"`)

n2=`echo ${#a[*]}`

for j in `seq -f g 1 ${n2}`

do

let n3=10#${j}-1

echo -e "${PARENT_ID}${j}\t${a[${n3}]}\t3\t${PARENT_ID}" >>

TD_CLASSIFY.txt

done

done

}

###四级分类###

Fourth_clssification()

{

n1=`grep '四级分类' ${raw_file}.txt|wc -l`

for i in `seq 1 ${n1}`

do

three=`grep -w "四级分类${i}" ${raw_file}.txt|awk

-F'——' '{print $2}' `

PARENT_ID=`awk -v xm=${three} '{if($2==xm) {print $1}}'

TD_CLASSIFY.txt |grep ^${n}`

a=(`grep -w "四级分类${i}" ${raw_file}.txt|awk

-F'——' '{print $3}' |sed "s/、/ /g"`)

n2=`echo ${#a[*]}`

for j in `seq -f g 1 ${n2}`

do

let n3=10#${j}-1

echo -e "${PARENT_ID}${j}\t${a[${n3}]}\t4\t${PARENT_ID}" >>

TD_CLASSIFY.txt

done

done

}

###五级分类###

Fifth_classification()

{

n1=`grep 五级分类 fifth_classification_new.txt -c`

for i in `seq 1 ${n1}`

do

four=`grep -w "五级分类${i}" fifth_classification_new.txt|awk -F'——' '{print

$2}' `

xxm=`grep -w "五级分类${i}" fifth_classification_new.txt|awk -F'-' '{print $1}'`

PARENT_ID=`awk -v xm=${four} '{if ($2==xm ) {print $1}}'

TD_CLASSIFY.txt |grep ^${xxm}`

for a in `grep -w "五级分类${i}" fifth_classification_new.txt|awk -F'——' '{print

$3}' |sed "s/、/ /g"`

do

echo -e "${pre_fifth_n}`seq -f g ${fifthc_id}

${fifthc_id}`\t${a}\t5\t${PARENT_ID}" >>

TD_CLASSIFY.txt

let fifthc_id=${fifthc_id}+1

done

done

}

###六级分类###

Sixth_classification()

{

n1=`grep 六级分类 sixth_classification_new.txt -c`

for i in `seq 1 ${n1}`

do

five=`grep -w "六级分类${i}" sixth_classification_new.txt|awk -F'——' '{print

$2}' `

xxm=`grep -w "六级分类${i}" sixth_classification_new.txt|awk -F'-' '{print $1}'`

PARENT_ID=`awk -v xm=${five} '{if($2==xm) {print $1}}'

TD_CLASSIFY.txt`

for a in `grep -w "六级分类${i}" sixth_classification_new.txt|awk -F'——' '{print

$3}' |sed "s/、/ /g"`

do

echo -e "${pre_sixth_n}`seq -f g ${sixthc_id}

${sixthc_id}`\t${a}\t6\t${PARENT_ID}" >>

TD_CLASSIFY.txt

let sixthc_id=${sixthc_id}+1

done

done

}

###检查分类是否有重复###

check_classification_repeated()

{

pra=$1

awk '{print $1,$2}' TD_CLASSIFY.txt |sort|uniq -c|grep -v 1 >

/tmp/Repeated_classification_list

if [ $? -eq 0 ];

then

if [ ${pra} -eq ${pre_fifth_n} ];

then

echo -e "\033[31;49;1m五级分类有重复分类,详情如下:

\033[39;49;0m\n"

echo -e "\033[31;49;1m出现次数 分类名称\033[39;49;0m"

cat /tmp/Repeated_classification_list

elif [ ${pra} -eq ${pre_sixth_n} ];then

echo -e "\033[31;49;1m六级分类有重复分类,详情如下: \033[39;49;0m\n"

echo -e "\033[31;49;1m出现次数 分类名称\033[39;49;0m"

cat /tmp/Repeated_classification_list

else

echo -e "\033[31;49;1m第${pra}分类有重复分类,详情如下:

\033[39;49;0m\n"

echo -e "\033[31;49;1m出现次数

分类名称\033[39;49;0m"

cat /tmp/Repeated_classification_list

fi

else

if [ ${pra} -eq ${pre_fifth_n} ];then

echo -e "五级分类没有重复类问题 ^_^\n"

elif [ ${pra} -eq ${pre_sixth_n} ];then

echo -e "六级分类没有重复类问题 ^_^\n"

else

echo -e "第${pra}分类没有重复类问题 ^_^\n"

fi

fi

}

###检查分类是否有父类###

check_classification_parentid()

{

pra=$1

n1=`wc -l TD_CLASSIFY.txt |cut -d' ' -f1`

n2=`awk 'BEGIN {n=0} {if (length($4)!=0) n+=1} END {print n} '

TD_CLASSIFY.txt `

if [ ${n1} -ne ${n2} ];

then

if [ ${pra} -eq ${pre_fifth_n}

];

then

echo -e

"\033[31;49;1m五级分类对应的上联分类不存在\033[39;49;0m"

awk '{if (length($4)==0) {print "分类"$1,$2}}' TD_CLASSIFY.txt

elif [ ${pra} -eq ${pre_sixth_n} ];then

echo -e

"\033[31;49;1m六级分类对应的上联分类不存在\033[39;49;0m"

awk '{if (length($4)==0) {print "分类"$1,$2}}' TD_CLASSIFY.txt

else

echo -e

"\033[31;49;1m第${pra}分类对应的上联分类不存在\033[39;49;0m"

awk '{if (length($4)==0) {print "分类"$1,$2}}'

TD_CLASSIFY.txt

fi

else

if [ ${pra} -eq ${pre_fifth_n}

];

then

echo -e "五级分类对应的上联分类均存在 ^_^\n"

echo

"+===================================+"

elif [ ${pra} -eq

${pre_sixth_n} ];then

echo -e "六级分类对应的上联分类均存在 ^_^\n"

echo "+===================================+"

else

echo -e "第${pra}分类对应的上联分类均存在 ^_^\n"

echo

"+===================================+"

fi

fi

}

###获取属性的KEY值###

get_attribute_key()

{

if [ -f TD_ATTRIBUTE_KEY.txt ] ;then

rm -f TD_ATTRIBUTE_KEY.txt

fi

n1=$((`grep '属性' attribute_new.txt|wc

-l`-1))

a=(`grep "属性" attribute_new.txt | awk

-F'——' '{print

$3}' `)

for i in `seq -f g 0 ${n1}`

do

CLASSIFY_NAME=`grep -w "属性${key_id}" attribute_new.txt|awk

-F'——' '{print $2}'`

xxm=`grep -w "属性${key_id}" attribute_new.txt|awk

-F'-' '{print $1}'`

c1=`awk -v xm=${CLASSIFY_NAME} '{if($2==xm)

{print $1}}' TD_CLASSIFY.txt|wc -l`

if [ ${c1} -ne 1 ] ;then

awk -v xm=${CLASSIFY_NAME} '{if($2==xm) {print

$1}}' TD_CLASSIFY.txt|grep ^${xxm} > /dev/null

if [ $? -eq 1 ];then

CLASSIFY_ID=`awk -v

xm=${CLASSIFY_NAME} '{if($2==xm) {print $1}}' TD_CLASSIFY.txt|grep

^555`

else CLASSIFY_ID=`awk -v xm=${CLASSIFY_NAME} '{if($2==xm) {print $1}}'

TD_CLASSIFY.txt|grep ^${xxm}`

fi

fi

if [ ${c1} -eq 1 ]

;then

CLASSIFY_ID=`awk -v xm=${CLASSIFY_NAME} '{if($2==xm) {print $1}}'

TD_CLASSIFY.txt`

fi

echo -e

"${key_id}\t${a[10#${i}]}\t${CLASSIFY_ID}" >> TD_ATTRIBUTE_KEY.txt

key_id=$((${key_id}+1))

done

}

###获取属性的VALUE值###

get_attribute_value()

{

if [ -f TD_ATTRIBUTE_VALUE.txt ];then

rm -f TD_ATTRIBUTE_VALUE.txt

fi

#n1=`grep '属性' attribute_new.txt|wc -l`

b=`head -1 TD_ATTRIBUTE_KEY.txt |awk '{print $1}'`

e=`tail -1 TD_ATTRIBUTE_KEY.txt |awk '{print $1}'`

cp TD_ATTRIBUTE_KEY.txt xxm_tmp.txt

awk '{print $1,"-"}' xxm_tmp.txt | sed "s/ //g" >

start.txt

awk '{print $2,"_",$3}' xxm_tmp.txt | sed "s/ //g" >

end.txt

paste start.txt end.txt |sed 's/\t//g' > middle.txt

for i in `seq ${b} ${e}`

do

CLASSIFY_NAME=`grep -w "属性${i}" attribute_new.txt|awk

-F'——' '{print $2}'`

KEY_NAME=`grep -w "属性${i}" attribute_new.txt|awk

-F'——' '{print $3}'`

xxm=`grep -w "属性${i}" attribute_new.txt|awk -F'-' '{print $1}'`

c1=`awk -v xm=${CLASSIFY_NAME} '{if($2==xm) {print $1}}'

TD_CLASSIFY.txt|wc -l`

if [ ${c1} -ne 1 ] ;then

awk -v xm=${CLASSIFY_NAME} '{if($2==xm) {print

$1}}' TD_CLASSIFY.txt|grep ^${xxm} > /dev/null

if [ $? -eq 1 ];then

CLASSIFY_ID=`awk -v

xm=${CLASSIFY_NAME} '{if($2==xm) {print $1}}' TD_CLASSIFY.txt|grep

^555`

else

CLASSIFY_ID=`awk -v xm=${CLASSIFY_NAME} '{if($2==xm) {print $1}}'

TD_CLASSIFY.txt|grep ^${xxm}`

fi

fi

if [ ${c1} -eq 1 ]

;then

CLASSIFY_ID=`awk -v xm=${CLASSIFY_NAME} '{if($2==xm) {print $1}}'

TD_CLASSIFY.txt`

fi

combined_index=`echo -e "${KEY_NAME}_${CLASSIFY_ID}"`

KEY_ID=`awk -F'-' -v xm=${combined_index}

'{if($2==xm) {print $1}}' middle.txt`

a=(`grep -w "属性${i}" attribute_new.txt | awk

-F'——' '{print $4}'|sed "s/、/

/g" `)

n2=$((`echo ${#a[*]}`-1))

for tt in `seq 0 ${n2}`

do

echo -e "${value_id}\t${a[10#${tt}]}\t${KEY_ID}" >>

TD_ATTRIBUTE_VALUE.txt

let value_id=${value_id}+1

done

done

rm -f start.txt end.txt xxm_tmp.txt middle.txt

}

###检查属性key对应的上联分类是否存在###

check_attribute_key()

{

n1=`wc -l TD_ATTRIBUTE_KEY.txt |cut -d' ' -f1`

n2=`awk 'BEGIN {n=0} {if (length($3)!=0) n+=1} END {print n}'

TD_ATTRIBUTE_KEY.txt`

if [ ${n1} -ne ${n2} ];then

echo -e "\033[31;49;1m下列属性KEY对应的上联分类不存在 \033[39;49;0m\n"

awk '{if (length($3)==0) {print NR,"属性KEY"$1,$2}}'

TD_ATTRIBUTE_KEY.txt

#exit

else

echo -e "TD_ATTRIBUTE_KEY is ok ^_^\n"

echo "+===================================+"

fi

}

###检查属性value对应的上联属性key是否存在###

check_attribute_value()

{

n1=`wc -l TD_ATTRIBUTE_VALUE.txt |cut -d' ' -f1`

n2=`awk 'BEGIN {n=0} {if (length($3)!=0) n+=1} END {print n}'

TD_ATTRIBUTE_VALUE.txt`

if [ ${n1} -ne ${n2} ];then

echo -e "\033[31;49;1m下列属性VALUE对应的上联属性KEY不存在 \033[39;49;0m\n"

awk '{if (length($3)==0) {print "属性VALUE_ID"$1,$2}}'

TD_ATTRIBUTE_VALUE.txt

exit

else

echo -e "TD_ATTRIBUTE_VALUE is ok ^_^\n"

fi

}

###一级&二级&三级&四级分类###

First_to_Fourth_classification()

{

if [ -f TD_CLASSIFY.txt ];then

rm -f TD_CLASSIFY.txt

fi

for raw_file in `seq ${start_n} ${end_n}`

do

###获得第几个分类###

if [ -f ${raw_file}.txt ];then

n=`grep -w 第几大分类 ${raw_file}.txt|cut -d':' -f2`

First_classification ${raw_file};

Second_classification ${raw_file};

Third_classification ${raw_file};

Fourth_clssification ${raw_file};

check_classification_repeated ${n};

check_classification_parentid ${n};

else

continue

fi

done

}

###单独第五级分类###

single_fifth_classification()

{

if [ -f fifth_classification_new.txt ];then

Fifth_classification;

check_classification_repeated ${pre_fifth_n};

check_classification_parentid ${pre_fifth_n};

fi

}

###单独第六级分类###

single_sixth_classification()

{

if [ -f sixth_classification_new.txt ];then

Sixth_classification;

check_classification_repeated ${pre_sixth_n};

check_classification_parentid ${pre_sixth_n};

fi

}

###属性KEY###

attribute_key()

{

get_attribute_key;

check_attribute_key;

}

###属性VALUE###

attribute_value()

{

get_attribute_value;

check_attribute_value

}

#======end 函数定义======#

#======begin 函数调用======#

file_processing;

First_to_Fourth_classification;

single_fifth_classification;

single_sixth_classification;

attribute_key;

attribute_value

#======end 函数调用======#

  • 0
    点赞
  • 0
    收藏
    觉得还不错? 一键收藏
  • 0
    评论

“相关推荐”对你有帮助么?

  • 非常没帮助
  • 没帮助
  • 一般
  • 有帮助
  • 非常有帮助
提交
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值