Skip to content
Snippets Groups Projects
Commit 88cfb557 authored by Nina Brolich's avatar Nina Brolich
Browse files

added datetime to directory name

parent 41c2d466
No related branches found
No related tags found
No related merge requests found
from bs4 import BeautifulSoup
from datetime import datetime
import re
import os
from tqdm import tqdm
......@@ -19,6 +20,13 @@ def writeXML(outputpath, outputdir, doc):
file.write(str(doc))
file.close()
def writeTXT(outputpath, outputdir, doc):
if not os.path.isdir(outputdir):
os.makedirs(outputdir)
outputpath_txt = outputpath.split('.')[0] + ".txt"
with open(outputpath_txt, mode='w', encoding='utf-8') as file:
file.write(str(doc))
file.close()
def toPlainText(doc):
# hand arzt
......@@ -70,9 +78,11 @@ def addbrackets(tag, symbol):
tag.insert(0, rep)
tag.append(rep)
dt = datetime.now().strftime("%Y-%m-%d-%H-%M-%S")
cwd = os.getcwd()
dir = os.path.join(cwd, "teis")
outputdir = os.path.join(cwd, "teis_converted")
outputdir = os.path.join(cwd, "teis_converted" + "_" + dt )
directorylist = os.listdir(dir)
for directory in tqdm(directorylist):
......@@ -85,4 +95,4 @@ for directory in tqdm(directorylist):
outputpath = os.path.join(outputdir, directory, file)
doc = openXML(filepath)
res = toPlainText(doc)
writeXML(outputpath, os.path.join(outputdir, directory), res)
writeTXT(outputpath, os.path.join(outputdir, directory), res)
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment