Commit 3ceabebc authored by Tom Kunze's avatar Tom Kunze

readMetaData: Fix parsing of WS16/17 pdfs

Closes #102
parent 1e4f4bbf
......@@ -45,10 +45,10 @@ def readMeta(pdf, isFileName=True):
### this works for pdfs that have nice unicode dots in the header, which are
### to my knowledge those of 12 SS and later
ss_long_filter_regex = re.compile("^SS 20(..) • ([\S ]+) • ([\S ]+) • ([\S ]+)", re.MULTILINE | re.UNICODE)
ss_short_filter_regex = re.compile("^SS (..) • ([\S ]+) • ([\S ]+) • ([\S ]+)", re.MULTILINE | re.UNICODE)
ws_long_filter_regex = re.compile("^WS ..(..)/.. ? • ([\S ]+) • ([\S ]+) • ([\S ]+)", re.MULTILINE | re.UNICODE)
ws_short_filter_regex = re.compile("^WS (..)/.. • ([\S ]+) • ([\S ]+) • ([\S ]+)", re.MULTILINE | re.UNICODE)
ss_long_filter_regex = re.compile("^SS ?20(..) • ([\S ]+) • ([\S ]+) • ([\S ]+)", re.MULTILINE | re.UNICODE)
ss_short_filter_regex = re.compile("^SS ?(..) • ([\S ]+) • ([\S ]+) • ([\S ]+)", re.MULTILINE | re.UNICODE)
ws_long_filter_regex = re.compile("^WS ?..(..)/.. ? • ([\S ]+) • ([\S ]+) • ([\S ]+)", re.MULTILINE | re.UNICODE)
ws_short_filter_regex = re.compile("^WS ?(..)/.. • ([\S ]+) • ([\S ]+) • ([\S ]+)", re.MULTILINE | re.UNICODE)
meta = {}
match = None
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment