像这样吗?
Doc = docx2python('C:/Users/Sam/Data/Information.docx')
d=Doc.body[0]
# Putting some data into d for testing.
# Remove this for actual production.
d= [[['Event Info','1)\tHalf (1 or 2)','2)\tMinutes (on video)','3)\tSeconds (on video)','4)\tStaff,0 = N/A)',]]]
# We'll need regular expressions.
import re
# Helper functions.
def startsWithADigit(x):
return re.match(r"^[0-9]",x)
def getStuffAfterPotentialTabCharacter(x):
return x.split("\t")[-1]
def getFirstWord(x):
return re.sub(r"([a-zA-Z]+).*",r'\1',x)
# Get rid of indented lists.
l=d[0][0]
# Get stuff after potential tab characters.
p=[getStuffAfterPotentialTabCharacter(x) for x in l]
# Get the first word in each record,as that seems to be requested.
q=[getFirstWord(x) for x in p]
# Print the result.
for x in q:
print(x)
本文链接:https://www.f2er.com/2721452.html