Skip to content
Snippets Groups Projects
Commit f0e4d76e authored by Nina Brolich's avatar Nina Brolich
Browse files

added commentary

parent 88cfb557
No related branches found
No related tags found
No related merge requests found
......@@ -4,7 +4,7 @@ import re
import os
from tqdm import tqdm
#helper-methode zum oeffnen der xml-files
def openXML(filepath):
with open(filepath, mode='r', encoding='utf-8') as file:
content = file.read()
......@@ -12,7 +12,7 @@ def openXML(filepath):
doc = BeautifulSoup(content, 'xml')
return doc
#helper-methode zum schreiben der outputfiles in xml-format
def writeXML(outputpath, outputdir, doc):
if not os.path.isdir(outputdir):
os.makedirs(outputdir)
......@@ -20,6 +20,7 @@ def writeXML(outputpath, outputdir, doc):
file.write(str(doc))
file.close()
#helper-methode zum schreiben der outputfiles in txt-format
def writeTXT(outputpath, outputdir, doc):
if not os.path.isdir(outputdir):
os.makedirs(outputdir)
......@@ -28,6 +29,7 @@ def writeTXT(outputpath, outputdir, doc):
file.write(str(doc))
file.close()
#umwandlung in plain text
def toPlainText(doc):
# hand arzt
for hand in doc.findAll("add", hand="arzt"):
......@@ -72,12 +74,14 @@ def toPlainText(doc):
return doctext.strip()
#helper-methode um eckige klammern hinzuzufuegen
def addbrackets(tag, symbol):
rep = "[" + symbol + "]"
tag.insert(0, rep)
tag.append(rep)
#beginn main
#aktueller timestamp fuer ordnerbennenung
dt = datetime.now().strftime("%Y-%m-%d-%H-%M-%S")
cwd = os.getcwd()
......@@ -85,6 +89,7 @@ dir = os.path.join(cwd, "teis")
outputdir = os.path.join(cwd, "teis_converted" + "_" + dt )
directorylist = os.listdir(dir)
#alle input directories durchgehen, fuer alle files umwandeln und in output-directory schreiben
for directory in tqdm(directorylist):
directorypath = os.path.join(dir, directory)
filelist = os.listdir(directorypath)
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment