您在这里:
>>> result = []
>>> for entry in mylist:
if entry.endswith('[country]'):
country= entry[:entry.rindex('[')]
result.append([country])
else:
result[-1].append(entry)
>>> result
[['Pakistan','Karachi','lahore','islamabad'],['UAE','dubai','sharjah'],['India','goa','chennai']]
,
您可以执行以下操作。
results = []
tmp =[]
with open("data.txt","r") as file:
for line in file:
if "country" in line:
if tmp:
results.append(tmp)
tmp = []
line = line.replace("[country]","")
tmp.append(line.strip())
results.append(tmp)
print(results)
,
mylist = list(open('data.txt','r'))
superlist = []
countrylist = []
for entry in mylist:
if '[country]' in entry:
superlist.append(countrylist)
countrylist = [entry.replace('[country]','')]
else:
countrylist.append(entry)
if len(countrylist) > 0:
superlist.append(countrylist)
,
temp = StringIO("""
Pakistan[country]\n
Karachi\n
lahore\n
islamabad\n
UAE[country]\n
dubai\n
sharjah\n
India[country]\n
goa\n
chennai\n
""")
df = pd.read_csv(temp,sep='\s+',engine='python',header=None)
使用
re.split
country_l = ' '.join(list(df[0]))
[i.replace('[country]','').split() for i in re.split('\s(?=\w*\[country\])',country_l)]
##output
[['Pakistan','chennai']]
,
my_list = list(open('data.txt','r'))
j = -1
for country in my_list:
if country.__contains__('[country]'):
country = country[:country.find('[country]')]
result.append([country])
j += 1
else:
country = country[:country.find('\n')]
result[j].append(country)
,
免责声明:此解决方案不适合胆小者,也不真正优先考虑可读性。
我很感兴趣,当前发布的答案都无法通过end
解决此问题。这是通过列表理解而不使用.split("[country]")
循环的解决方案:
for
然后,如果您能够更改数据文件的格式,从而将标记 # read file this way to preserve line structure:
with open('data.txt','r') as f:
data = f.read().strip().split("\n")
# First: move the "[country]" tag from behind to in front of the name.
# Also,I replace "[country]" with "*" for no other reason than that it's shorter
data = ",".join(["*"+x.replace("[country]","") if x.endswith("[country]") else x for x in data])
# Then,split on "[country]",that is: ",*",# But keep in mind that the very first country will have prefix "*",not ",*"
data = [x.replace("*","").split(",") for x in data.split(",*")]
print(data)
#[['Pakistan','chennai']]
设置在国家名称的前面而不是后面,那么它会更简单一些,您将摆脱一个简单的单线:
[country]
本文链接:https://www.f2er.com/3138622.html