请尝试以下操作,我假设应该将>
开头的Input_file1列与Input_file2第一列的第一列进行比较(由于示例令人困惑,因此基于OP的尝试,已将其写入)
awk '
FNR==NR{
start_point[$1]=$2
end_point[$1]=$3
next
}
/^>/{
sub(/^>/,"")
val=$0
next
}
{
print val ORS substr($0,start_point[val],end_point[val])
val=""
}
' Input_file2 Input_file1
说明: 添加上述代码的说明。
awk ' ##Starting awk program from here.
FNR==NR{ ##Checking condition FNR==NR which will be TRUE when first Input_file named Input_file2 is being read.
start_point[$1]=$2 ##Creating an array named start_point with index $1 of current line and its value is $2.
end_point[$1]=$3 ##Creating an array named end_point with index $1 of current line and its value is $3.
next ##next will skip all further statements from here.
}
/^>/{ ##Checking condition if a line starts from > then do following.
sub(/^>/,"") ##Substituting starting > with NULL.
val=$0 ##Creating a variable val whose value is $0.
next ##next will skip all further statements from here.
}
{
print val ORS substr($0,end_point[val]) ##Printing val newline(ORS) and sub-string of current line whose start value is value of start_point[val] and end point is value of end_point[val].
val="" ##Nullifying variable val here.
}
' Input_file2 Input_file1 ##Mentioning Input_file names here.
,
如果我的理解正确,请尝试以下操作:
awk '
FNR==NR {
name[NR] = $1
start[NR] = $2
len[NR] = $3 - $2
count = NR
next
}
/^>/ {
sub(/^>/,"")
genotype=$0
next
}
{
for (i = 1; i <= count; i++) {
print ">" name[i] > genotype
print substr($0,start[i],len[i]) >> genotype
}
close(genotype)
}' input_file2 input_file1
input_file1:
>genotype1
aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa
>genotype2
bbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbb
>genotype3
nnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnn
Input_file2:
gene1 10 20
gene2 40 50
gene3 20 25
[结果]
基因型1:
>gene1
aaaaaaaaaa
>gene2
aaaaaaaaaa
>gene3
aaaaa
基因型2:
>gene1
bbbbbbbbbb
>gene2
bbbbbbbbbb
>gene3
bbbbb
基因型3:
>gene1
nnnnnnnnnn
>gene2
nnnnnnnnnn
>gene3
nnnnn
[EDIT]
如果要将输出文件存储到其他目录,
请改为尝试以下操作:
dir="./outdir" # directory name to store the output files
# you can modify the name as you want
mkdir -p "$dir"
awk -v dir="$dir" '
FNR==NR {
name[NR] = $1
start[NR] = $2
len[NR] = $3 - $2
count = NR
next
}
/^>/ {
sub(/^>/,"")
genotype=$0
next
}
{
for (i = 1; i <= count; i++) {
print ">" name[i] > dir"/"genotype
print substr($0,len[i]) >> dir"/"genotype
}
close(dir"/"genotype)
}' input_file2 input_file1
- 第一两行在bash中执行,以定义目标目录并对其进行mkdir。
- 然后通过
awk
选项将目录名称传递到-v
希望这会有所帮助。
本文链接:https://www.f2er.com/3019390.html