Commit 3ceabebc authored by Tom Kunze's avatar Tom Kunze
Browse files

readMetaData: Fix parsing of WS16/17 pdfs

Closes #102
parent 1e4f4bbf
...@@ -45,10 +45,10 @@ def readMeta(pdf, isFileName=True): ...@@ -45,10 +45,10 @@ def readMeta(pdf, isFileName=True):
### this works for pdfs that have nice unicode dots in the header, which are ### this works for pdfs that have nice unicode dots in the header, which are
### to my knowledge those of 12 SS and later ### to my knowledge those of 12 SS and later
ss_long_filter_regex = re.compile("^SS 20(..) • ([\S ]+) • ([\S ]+) • ([\S ]+)", re.MULTILINE | re.UNICODE) ss_long_filter_regex = re.compile("^SS ?20(..) • ([\S ]+) • ([\S ]+) • ([\S ]+)", re.MULTILINE | re.UNICODE)
ss_short_filter_regex = re.compile("^SS (..) • ([\S ]+) • ([\S ]+) • ([\S ]+)", re.MULTILINE | re.UNICODE) ss_short_filter_regex = re.compile("^SS ?(..) • ([\S ]+) • ([\S ]+) • ([\S ]+)", re.MULTILINE | re.UNICODE)
ws_long_filter_regex = re.compile("^WS ..(..)/.. ? • ([\S ]+) • ([\S ]+) • ([\S ]+)", re.MULTILINE | re.UNICODE) ws_long_filter_regex = re.compile("^WS ?..(..)/.. ? • ([\S ]+) • ([\S ]+) • ([\S ]+)", re.MULTILINE | re.UNICODE)
ws_short_filter_regex = re.compile("^WS (..)/.. • ([\S ]+) • ([\S ]+) • ([\S ]+)", re.MULTILINE | re.UNICODE) ws_short_filter_regex = re.compile("^WS ?(..)/.. • ([\S ]+) • ([\S ]+) • ([\S ]+)", re.MULTILINE | re.UNICODE)
meta = {} meta = {}
match = None match = None
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment