此操作从数据库下载更新的fasta文件(蛋白质序列),与asyncio
相比,我使用requests
可以更快地工作,但是我不认为下载实际上是异步进行的
import os
import aiohttp
import aiofiles
import asyncio
folder = '~/base/fastas/proteomes/'
upos = {'UP000005640': 'Human_Homo_sapien','UP000002254': 'Dog_Boxer_Canis_Lupus_familiaris','UP000002311': 'Yeast_Saccharomyces_cerevisiae','UP000000589': 'Mouse_Mus_musculus','UP000006718': 'Monkey_Rhesus_macaque_Macaca_mulatta','UP000009130': 'Monkey_Cynomolgus_Macaca_fascicularis','UP000002494': 'Rat_Rattus_norvegicus','UP000000625': 'Escherichia_coli',}
#https://www.uniprot.org/uniprot/?query=proteome:UP000005640&format=fasta Example link
startline = r'https://www.uniprot.org/uniprot/?query=proteome:'
endline = r'&format=fasta&include=False' #include is true to include isoforms,make false for only canonical sequences
async def fetch(session,link,folderlocation,name):
async with session.get(link,timeout=0) as response:
try:
file = await aiofiles.open(folderlocation,mode='w')
file.write(await response.text())
await file.close()
print(name,'ended')
except FileNotFoundError:
loc = ''.join((r'/'.join((folderlocation.split('/')[:-1])),'/'))
command = ' '.join(('mkdir -p',loc))
os.system(command)
file = await aiofiles.open(folderlocation,'ended')
async def rfunc():
async with aiohttp.ClientSession() as session:
for upo,name in upos.items():
print(name,'started')
link = ''.join((startline,upo,endline))
folderlocation =''.join((folder,name,'.fasta'))
await fetch(session,name)
loop = asyncio.get_event_loop()
loop.run_until_complete(rfunc())
我的运行结果:
In [5]: runfile('~/base/Fasta Proteome Updater.py')
Human_Homo_sapien started
Human_Homo_sapien ended
Dog_Boxer_Canis_Lupus_familiaris started
Dog_Boxer_Canis_Lupus_familiaris ended
Yeast_Saccharomyces_cerevisiae started
Yeast_Saccharomyces_cerevisiae ended
Mouse_Mus_musculus started
Mouse_Mus_musculus ended
Monkey_Rhesus_macaque_Macaca_mulatta started
Monkey_Rhesus_macaque_Macaca_mulatta ended
Monkey_Cynomolgus_Macaca_fascicularis started
Monkey_Cynomolgus_Macaca_fascicularis ended
Rat_Rattus_norvegicus started
Rat_Rattus_norvegicus ended
Escherichia_coli started
Escherichia_coli ended
打印输出似乎表明一次下载一次,这里有什么问题吗?