awk实现distinct、count函数
#!/bin/bash
# in linux command line
# run:
# . ./awkfunc.sh
# then you can use the functions as following
# ./awkfunc.sh distinct citynameage.txt 2 1 beijing
# to display defferent names in beijing
# example:
# getvalues citynameage.txt 2 1 beijing
# assume there are 3 fields in tablefilename.txt, they are city, name, age
# to display names and how many persons in beijing
function getvalues
{
if [[ $# -eq 1 ]]; then
local filename="$1"
awk -F '\t' "BEGIN{OFS=\"\t\"}{print \$0}" ${filename}
elif [[ $# -eq 2 ]]; then
local filename="$1"
local fieldid=$2
awk -F '\t' "BEGIN{OFS=\"\t\"}{print \$${fieldid}}" ${filename}
elif [[ $# -eq 4 ]]; then
local filename="$1"
local fieldid=$2
local conditionkeyid=$3
local conditionvalue=$4
awk -F '\t' "BEGIN{OFS=\"\t\"}\$${conditionkeyid}==\"${conditionvalue}\"{print \$${fieldid}}" ${filename}
elif [[ $# -eq 6 ]]; then
local filename="$1"
local fieldid=$2
local conditionkeyid1=$3
local conditionvalue1=$4
local conditionkeyid2=$5
local conditionvalue2=$6
awk -F '\t' "BEGIN{OFS=\"\t\"}\$${conditionkeyid1}==\"${conditionvalue1}\"&&\$${conditionkeyid2}==\"${conditionvalue2}\"{print \$${fieldid}}" ${filename}
else
echo ''
fi
}
# example:
# groupcount citynameage.txt 2 1 beijing
# assume there are 3 fields in tablefilename.txt, they are city, name, age
# to display different names and how many persons in beijing
function groupcount
{
if [[ $# -eq 1 ]]; then
local filename="$1"
awk -F '\t' "{print \$0}" ${filename} | sort
elif [[ $# -eq 2 ]]; then
local filename="$1"
local fieldid=$2
awk -F '\t' "BEGIN{OFS=\"\t\"}{fieldvalue=\$${fieldid};a[fieldvalue]++}END{for(id in a) print id,a[id]}" ${filename} | sort
elif [[ $# -eq 4 ]]; then
local filename="$1"
local fieldid=$2
local conditionkeyid=$3
local conditionvalue=$4
awk -F '\t' "BEGIN{OFS=\"\t\"}\$${conditionkeyid}==\"${conditionvalue}\"{fieldvalue=\$${fieldid};a[fieldvalue]++}END{for(id in a) print id,a[id]}" ${filename} | sort
elif [[ $# -eq 6 ]]; then
local filename="$1"
local fieldid=$2
local conditionkeyid1=$3
local conditionvalue1=$4
local conditionkeyid2=$5
local conditionvalue2=$6
awk -F '\t' "BEGIN{OFS=\"\t\"}\$${conditionkeyid1}==\"${conditionvalue1}\"&&\$${conditionkeyid2}==\"${conditionvalue2}\"{fieldvalue=\$${fieldid};a[fieldvalue]++}END{for(id in a) print id,a[id]}" ${filename} | sort
else
echo ''
fi
}
# example:
# distinct citynameage.txt 2 1 beijing
# assume there are 3 fields in tablefilename.txt, they are city, name, age
# to display defferent names in beijing
function distinct
{
groupcount $* | awk -F '\t' 'NF==2{print $1}'
}
# example:
# countdistinct citynameage.txt 2 1 beijing
# assume there are 3 fields in tablefilename.txt, they are city, name, age
# to display how many different names in beijing
function countdistinct
{
groupcount $* | wc -l
}
# example:
# replacerecordseperator citynameage.txt " "
# to replace recored seperator with " "
function replacerecordseperator
{
if [[ $# -eq 1 ]]; then
filename="$1"
awk "{print \$0}" ${filename}
elif [[ $# -eq 2 ]]; then
filename="$1"
sep="$2"
awk "BEGIN{ORS=\"${sep}\"}{print \$0}" ${filename}
else
echo ''
fi
}
if [[ $# -gt 1 ]]; then
if [[ $1 == "getvalues" ]] || [[ $1 == "groupcount" ]] || [[ $1 == "distinct" ]] || [[ $1 == "countdistinct" ]] || [[ $1 == "replacerecordseperator" ]]; then
eval $*
fi
fi