从不同目录python中的多个文件读取特定数据

如果我有许多这样的文件:

[

从不同目录python中的多个文件读取特定数据

每个文件夹中还有3个这样的文件:

[

从不同目录python中的多个文件读取特定数据

现在每个文件夹中都有一个.txt文件,如下所示:

[

从不同目录python中的多个文件读取特定数据

对于每个.txt文件,我都需要从文件中第6列中获取红色圆圈中的值,而我只对包含以下内容的行感兴趣,这些行包含cope1,cope2,cope3,cope4和cope5开始(以蓝色突出显示)。其他一切都可以忽略。

我需要分别显示每个文件夹的数据

应该是:

[

从不同目录python中的多个文件读取特定数据

我需要从每个文件中读取相关数据并将其存储在合理的数据结构中。

我需要这样显示的所有数据,这样我才能获得FFA应付1的平均值-应付5 对于10个文件夹中的每个3个文件夹,依次类推。

大致表示为:

从不同目录python中的多个文件读取特定数据

在这里为冗长的问题python新手致歉!非常感谢所有帮助。

import os
import csv
import statistics

def main():
    values = {}
    ffaResults = {}
    lingualResults = {}
    ppaResults = {}

    dir = os.path.join("fmriroi","roi_data")
    subdirs = os.listdir(dir)
    for subdir in subdirs:
        subdirpath = os.path.join(dir,subdir)
        subsubdirs = os.listdir(subdirpath)
        for subsubdir in subsubdirs:

            if subsubdir == "ffa":
                dirpath = os.path.join(subdirpath,subsubdir)
                files = os.listdir(dirpath)
                for filename in files:
                    path = os.path.join(dirpath,filename)
                    with open(path,"r") as f:
                        content = csv.reader(f,delimiter=" ")
                        for row in content:
                            if "cope" in row[1]:
                                name = row[1].split("/")[1]
                                if not name in values:
                                    ffaResults[name] = [float(row[6])]
                                else:
                                    ffaResults[name].append(float(row[6]))  

            if subsubdir == "lingual_gyrus":
                dirpath = os.path.join(subdirpath,delimiter=" ")
                        for row in content:
                            if "cope" in row[1]:
                                name = row[1].split("/")[1]
                                if not name in lingualResults:
                                    lingualResults[name] = [float(row[6])]
                                else:
                                    lingualResults[name].append(float(row[6]))

            if subsubdir == "ppa":
                dirpath = os.path.join(subdirpath,delimiter=" ")
                        for row in content:
                            if "cope" in row[1]:
                                name = row[1].split("/")[1]
                                if not name in ppaResults:
                                    ppaResults[name] = [float(row[6])]
                                else:
                                    ppaResults[name].append(float(row[6]))
    res = {}
    
    for k in ffaResults:
        res[k] = statistics.mean(values[k])
    print(res)

if __name__ == "__main__":
    main()

输出:必填

从不同目录python中的多个文件读取特定数据

gao421184686 回答:从不同目录python中的多个文件读取特定数据

我同意评论中@Silveris的建议。您可以将我的代码用于单个文件:

import re

datas = []

with open('file.txt','r',encoding='utf-8') as f:
    lines = f.readlines()
    i = 1
    data = {}
    for line in lines:
        cope = [item for item in line.split() if re.match(rf'cope{i}',item)]
        if cope != []:
            data[''.join(cope)] = line.split()[5]
            i += 1

datas.append(data)
,

以下是我添加到您的代码中的代码:

import os
import csv
import pprint
import statistics

def main():
    values = {}
    ffaResults = {}
    lingualResults = {}
    ppaResults = {}

    dir = os.path.join("fmriroi","roi_data")
    subdirs = os.listdir(dir)
    for subdir in subdirs:
        subdirpath = os.path.join(dir,subdir)
        subsubdirs = os.listdir(subdirpath)
        for subsubdir in subsubdirs:

            if subsubdir == "ffa":
                dirpath = os.path.join(subdirpath,subsubdir)
                files = os.listdir(dirpath)
                for filename in files:
                    path = os.path.join(dirpath,filename)
                    with open(path,"r") as f:
                        content = csv.reader(f,delimiter=" ")
                        for row in content:
                            if "cope" in row[1]:
                                name = row[1].split("/")[1]
                                if not name in values:
                                    ffaResults[name] = [float(row[6])]
                                else:
                                    ffaResults[name].append(float(row[6]))  

            if subsubdir == "lingual_gyrus":
                dirpath = os.path.join(subdirpath,delimiter=" ")
                        for row in content:
                            if "cope" in row[1]:
                                name = row[1].split("/")[1]
                                if not name in lingualResults:
                                    lingualResults[name] = [float(row[6])]
                                else:
                                    lingualResults[name].append(float(row[6]))

            if subsubdir == "ppa":
                dirpath = os.path.join(subdirpath,delimiter=" ")
                        for row in content:
                            if "cope" in row[1]:
                                name = row[1].split("/")[1]
                                if not name in ppaResults:
                                    ppaResults[name] = [float(row[6])]
                                else:
                                    ppaResults[name].append(float(row[6]))
    res = {"ffa": {},"lingual": {},"ppa": {}}
    for k in ffaResults:
        res["ffa"][k] = statistics.mean(ffaResults[k])
    for k in lingualResults:
        res["lingual"][k] = statistics.mean(lingualResults[k])
    for k in ppaResults:
        res["ppa"][k] = statistics.mean(ppaResults[k])

    pprint.pprint(res)

if __name__ == "__main__":
    main()

它提供以下输出:

{'ffa': {'cope1': 0.4376,'cope2': 0.3582,'cope3': 0.6315,'cope4': 0.1722,'cope5': 0.3518},'lingual': {'cope1': -0.08865060000000001,'cope2': -0.150985,'cope3': -0.162005,'cope4': -0.130845,'cope5': -0.126411},'ppa': {'cope1': 0.74836,'cope2': 0.9444,'cope3': 0.300482,'cope4': 1.12435,'cope5': 0.8332200000000001}}
本文链接:https://www.f2er.com/3051560.html

大家都在问