Commit 4c0b94bc authored by Tom Kunze's avatar Tom Kunze
Browse files

util/readMetaData: fix parsing of SS17 pdfs

A inverted comma was added in the term.
parent 0ffecd03
......@@ -45,10 +45,10 @@ def readMeta(pdf, isFileName=True):
### this works for pdfs that have nice unicode dots in the header, which are
### to my knowledge those of 12 SS and later
ss_long_filter_regex = re.compile("^SS ?20(..) • ([\S ]+) • ([\S ]+) • ([\S ]+)", re.MULTILINE | re.UNICODE)
ss_short_filter_regex = re.compile("^SS ?(..) • ([\S ]+) • ([\S ]+) • ([\S ]+)", re.MULTILINE | re.UNICODE)
ws_long_filter_regex = re.compile("^WS ?..(..)/.. ? • ([\S ]+) • ([\S ]+) • ([\S ]+)", re.MULTILINE | re.UNICODE)
ws_short_filter_regex = re.compile("^WS ?(..)/.. • ([\S ]+) • ([\S ]+) • ([\S ]+)", re.MULTILINE | re.UNICODE)
ss_long_filter_regex = re.compile("^SS(?: |')?20(..) • ([\S ]+) • ([\S ]+) • ([\S ]+)", re.MULTILINE | re.UNICODE)
ss_short_filter_regex = re.compile("^SS(?: |')?(..) • ([\S ]+) • ([\S ]+) • ([\S ]+)", re.MULTILINE | re.UNICODE)
ws_long_filter_regex = re.compile("^WS(?: |')?..(..)/.. ? • ([\S ]+) • ([\S ]+) • ([\S ]+)", re.MULTILINE | re.UNICODE)
ws_short_filter_regex = re.compile("^WS(?: |')?(..)/.. • ([\S ]+) • ([\S ]+) • ([\S ]+)", re.MULTILINE | re.UNICODE)
meta = {}
match = None
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment