| 123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292 |
- #!/usr/bin/env python2
- # vim: set fdm=marker:
- # imports {{{1
- from sys import version_info
- from subprocess import check_output
- import json
- import re
- try:
- from vim_pandoc.bib.collator import SourceCollator
- from vim_pandoc.bib.util import flatten
- except:
- from collator import SourceCollator
- from util import flatten
- # _bib_extensions {{{1
- # Filetypes that citeproc.py will attempt to parse.
- _bib_extensions = ["bib",\
- "bibtex",\
- "ris",\
- "mods",\
- "json",\
- "enl",\
- "wos",\
- "medline",\
- "copac",\
- "xml"]
- # _significant_tags {{{1
- # Tags that citeproc.py will search in, together with scaling
- # factors for relative importance. These are currently non-functional.
- _significant_tags = {"id": 0.5,
- "author": 1.0,
- "issued": 1.0,
- "title": 1.0,
- "publisher": 1.0,
- "abstract": 0.1}
- # _variable_type {{{1
- # Map of tags -> types.
- _variable_type = {
- "abstract": "plain",
- "annote": "plain",
- "archive": "plain",
- "archive_location": "plain",
- "archive-place": "plain",
- "authority": "plain",
- "call-number": "plain",
- "citation-label": "plain",
- "citation-number": "plain",
- "collection-title": "plain",
- "container-title": "plain",
- "container-title-short": "plain",
- "dimensions": "plain",
- "doi": "plain",
- "event": "plain",
- "event-place": "plain",
- "first-reference-note-number": "plain",
- "genre": "plain",
- "isbn": "plain",
- "issn": "plain",
- "jurisdiction": "plain",
- "keyword": "plain",
- "locator": "plain",
- "medium": "plain",
- "note": "plain",
- "original-publisher": "plain",
- "original-publisher-place": "plain",
- "original-title": "plain",
- "page": "plain",
- "page-first": "plain",
- "pmcid": "plain",
- "pmid": "plain",
- "publisher": "plain",
- "publisher-place": "plain",
- "references": "plain",
- "reviewed-title": "plain",
- "scale": "plain",
- "section": "plain",
- "source": "plain",
- "status": "plain",
- "title": "plain",
- "title-short": "plain",
- "url": "plain",
- "version": "plain",
- "year-suffix": "plain",
- "chapter-number": "number",
- "collection-number": "number",
- "edition": "number",
- "issue": "number",
- "number": "number",
- "number-of-pages": "number",
- "number-of-volumes": "number",
- "volume": "number",
- "accessed": "date",
- "container": "date",
- "event-date": "date",
- "issued": "date",
- "original-date": "date",
- "submitted": "date",
- "author": "name",
- "collection-editor": "name",
- "composer": "name",
- "container-author": "name",
- "director": "name",
- "editor": "name",
- "editorial-director": "name",
- "illustrator": "name",
- "interviewer": "name",
- "original-author": "name",
- "recipient": "name",
- "reviewed-author": "name",
- "translator": "name"
- }
- class CSLItem: #{{{1
- # This class implements various helper methods for CSL-JSON formatted bibliography
- # entries.
- def __init__(self, entry): #{{{2
- self.data = entry
- def as_array(self, variable_name): #{{{2
- def plain(variable_contents): #{{{3
- # Takes the contents of a 'plain' variable and splits it into an array.
- if version_info.major == 2:
- return unicode(variable_contents).split('\n')
- else:
- return variable_contents.split('\n')
- def number(variable_contents): #{{{3
- if version_info.major == 2:
- return [unicode(variable_contents)]
- else:
- return [variable_contents]
- def name(variable_contents): #{{{3
- # Parses "name" CSL Variables and returns an array of names.
- def surname(author):
- # Concat dropping particle and non-dropping particle with family name.
- return [" ".join((author.get("dropping-particle", ""),
- author.get("non-dropping-particle", ""),
- author.get("family", ""))).strip()]
- def given_names(author):
- return [author.get("given", "").strip()]
- def literal_name(author):
- # It seems likely there is some particular reason for the author being
- # a literal, so don't try and do clever stuff like splitting into tokens...
- return [author.get("literal", "").strip()]
- names = []
- for author in variable_contents:
- name = ""
- if "literal" in author:
- name = literal_name(author)
- else:
- name = surname(author) + given_names(author)
- names.append(name)
- return names
- def date(variable_contents): #{{{3
- # Currently a placeholder. Will parse 'date' CSL variables and return an array of
- # strings for matches.
- def date_parse(raw_date_array):
- # Presently, this function returns the date in yyyy-mm-dd format. In future, it
- # will provide a variety of alternative forms.
- if version_info.major == 2:
- date = [unicode(x) for x in raw_date_array]
- else:
- date = [str(x) for x in raw_date_array]
- return ["-".join(date)]
- def date_parts(date_parts_contents):
- # Call date_parts for each element.
- response = []
- for date in date_parts_contents:
- response.extend(date_parse(date))
- return response
- def season(season_type):
- # Not actually clear from the spec what is meant to go in here. Zotero doesn't
- # 'do' seasons, and I can't work it out from the pandoc-citeproc source. Will
- # try and make this work when I have useful internet
- season_lookup = {1: "spring",
- 2: "summer",
- 3: "autumn",
- 4: "winter"}
- return []
- def circa(circa_boolean):
- return []
- def literal(date_string):
- return [date_string]
- date_function_lookup = {"date-parts": date_parts,
- "season": season,
- "circa": circa,
- "literal": literal,
- "raw": literal}
- response = []
- for element in variable_contents:
- response.extend(date_function_lookup[element](variable_contents[element]))
- return response
- # }}}3
- variable_contents = self.data.get(variable_name, False)
- if variable_contents:
- return eval(_variable_type.get(variable_name, "plain"))(variable_contents)
- else:
- return []
- def match(self, query): #{{{2
- # Matching engine. Returns 1 if match found, 0 otherwise.
- # Expects query to be a compiled regexp.
- # Very simple, just searches for substrings. Could be updated
- # to provide a 'matches' value for ranking? Using numbers here
- # so as to permit this future application.
- matched = False
- for variable in _significant_tags:
- for token in self.as_array(variable):
- matched = matched or query.search(flatten(token))
- if matched:
- break
- if matched:
- return 1
- else:
- return 0
- def matches(self, query): #{{{2
- # Provides a boolean match response to query.
- # Expects query to be a compiled regexp.
- if self.match(query) == 0:
- return False
- else:
- return True
- def relevance(self, query): #{{{2
- # Returns the relevance of an item for a query
- query = re.compile(query, re.I)
- relevance = float(0.0)
- tags_matched = []
- for tag in _significant_tags:
- for token in self.as_array(tag):
- if query.search(flatten(token)):
- tags_matched.append(tag)
- break
- if tags_matched != []:
- relevance = sum([_significant_tags[t] for t in tags_matched])
- return relevance
- class CiteprocSource: #{{{1
- def __init__(self, bib): #{{{2
- try:
- citeproc_output = check_output(["pandoc-citeproc", "-j", bib])
- except:
- print('pandoc: pandoc-citeproc had an error. maybe the bibliography file is corrupted?')
- self.data = []
- return
- if version_info.major == 2:
- raw_bib = json.loads(citeproc_output)
- elif version_info.major == 3:
- raw_bib = json.loads(citeproc_output.decode("utf-8"))
- self.data = [CSLItem(entry) for entry in raw_bib]
- def __iter__(self): #{{{2
- for a in self.data:
- yield a
- class CiteprocCollator(SourceCollator): #{{{1
- def collate(self): #{{{2
- data = []
- for bib in self.find_bibfiles():
- for item in CiteprocSource(bib):
- if item.matches(re.compile(self.query, re.I)) and item not in data:
- data.append(item)
- data.sort(key=lambda i: i.relevance(self.query), reverse=True)
- return [item.data for item in data]
|