您可以使用此 awk
来存储 $5
,密钥为 ($1,$2,$4)
。在处理 file2 时,它用逗号分割存储的值,并通过附加 *
替换每个逗号分隔的值。如果未找到元素(即 sub
返回 0
),则我们通过添加 !,
来添加每个值:
awk 'NR==FNR {
map[$1,$4] = $5
next
}
($1,$4) in map {
n = split(map[$1,$4],a,/,/)
for (i=1; i<=n; ++i)
if (sub(a[i],"&*",$5) == 0)
$5 = a[i] "!," $5
} 1' file1 file2
chr1 123896 rs0987522 A T*,C,G
chr5 678452 rs8733521 G C!,A*,T
,
对于您显示的示例,请尝试遵循 awk
程序。
awk '
FNR==NR{
arr1[$1,$4]=$5
next
}
{
val=""
delete arr2;delete arr3;delete arr4;delete arr5
num1=split(arr1[$1,arr2,",")
for(i=1;i<=num1;i++){ arr4[arr2[i]] }
num2=split($NF,arr3,")
}
(($1,$4) in arr1){
for(i=1;i<=num2;i++){
val=(val?val ",":"")(arr3[i] in arr4?arr3[i]"*":arr3[i]"!")
if(arr3[i] in arr4){ arr5[arr3[i]] }
}
for(i=1;i<=num1;i++){
if(!(arr2[i] in arr5)){
val=val "," arr2[i]
}
}
$5=val
}
1
' Input_file2 Input_file1
说明:为以上添加详细说明。
awk ' ##Starting awk program from here.
FNR==NR{ ##Checking FNR==NR which will be true when file2 is being read.
arr1[$1,$4]=$5 ##Creating arr1 with index of 1st,2nd and 4th field and value of 5th field.
next ##next will skip all further statements from here.
}
{
val="" ##Nullifying val here.
delete arr2;delete arr3;delete arr4;delete arr5 ##Deleting arrays here.
num1=split(arr1[$1,") ##Splitting arr1 with index of $1,$4 here to arr2.
for(i=1;i<=num1;i++){ arr4[arr2[i]] } ##Running loop till num1,creating arr4 with value of arr2 index of i here.
num2=split($NF,") ##Splitting current line last field to arr3 with separator of comma here.
}
(($1,$4) in arr1){ ##Checking if $1,$4 of current line are present in arr1 then do following.
for(i=1;i<=num2;i++){ ##Running for loop till num2 here.
val=(val?val ",":"")(arr3[i] in arr4?arr3[i]"*":arr3[i]"!") ##Creating val which compares values of file1 and file2 is they are common then add * or add ! of file1 current value(one of the 5th field values).
if(arr3[i] in arr4){ arr5[arr3[i]] } ##If arr3 value is present in arr4 then create arr5 with index of value of arr3 with index of i.
}
for(i=1;i<=num1;i++){ ##Running loop till value of num1 here.
if(!(arr2[i] in arr5)){ ##If value of arr2 is NOT present in arr5(to get values which are already printed common ones of file1,file2) then do following.
val=val "," arr2[i] ##Append arr2 value to val.
}
}
$5=val ##Assign val to 5th field here.
}
1 ##Printing edited/non-edited line here.
' Input_file2 Input_file1 ##Mentioning Input_file names here.
本文链接:https://www.f2er.com/5480.html