ruby - 如何自动将 BibTex 引文转换为 Zotero 可解析的内容？

Question

我有一个引用系统，可以将用户注释发布到 wiki ( Researchr )。以编程方式，我可以访问每个条目的完整 BibTeX 记录，并且我还在各个页面上显示它（例如- 单击 BibTeX）。这是为了方便其他引文管理器的用户自动导入他们感兴趣的论文的引文。我还希望其他引文管理器，尤其是 Zotero，能够自动检测和导入引文。

Zotero列出了许多公开它可以理解的元数据的方法，包括带有 RDF、COiNS、Dublin Core 和 unAPI 的元标记。是否有用于将 BibTeX 自动转换为任何这些标准的 Ruby 库 - 或 Javascript 库？我可能可以创建一些东西，但如果存在一些东西，它会更加健壮（BibTeX 有这么多的出版物类型和字段等）。

score 2 · Accepted Answer

2

这里有一个 BibTeX2RDF 转换器，可能是你想要的。

于 2012-03-08T02:00:15.870 回答

score 1 · Accepted Answer

unAPI 不是数据标准——它是一种提供数据的方式（给 Zotero 和其他程序）。Zotero 导入 Bibtex，因此通过 unAPI 为 Bibtex 提供服务就可以了。Inspire 是一个例子：http: //inspirehep.net/

score 0 · Accepted Answer

.bib现在可以直接在 Zotero中直接导入 bibtex 类型的文件。但是，我注意到我的 bibtex 文件通常不如 Zotero 完整（特别是他们经常错过 DOI），而且我在 Zotero 中没有找到“自动完成”功能（基于 bibtex 条目中的数据）。

所以我.bib用 Zotero 导入文件，以确保它们都在里面。然后我运行一个 python 脚本来获取它可以为该.bib文件中的条目找到的所有丢失的 DOI，并将它们导出到一个空格分隔的.txt文件中。：

# pip install habanero
from habanero import Crossref
import re


def titletodoi(keyword):
    cr = Crossref()
    result = cr.works(query=keyword)
    items = result["message"]["items"]
    item_title = items[0]["title"]
    tmp = ""
    for it in item_title:
        tmp += it
    title = keyword.replace(" ", "").lower()
    title = re.sub(r"\W", "", title)
    # print('title: ' + title)
    tmp = tmp.replace(" ", "").lower()
    tmp = re.sub(r"\W", "", tmp)
    # print('tmp: ' + tmp)
    if title == tmp:
        doi = items[0]["DOI"]
        return doi
    else:
        return None


def get_dois(titles):
    dois = []
    for title in titles:
        try:
            doi = titletodoi(title)
            print(f"doi={doi}, title={title}")
            if not doi is None:
                dois.append(doi)
        except:
            pass
            # print("An exception occurred")
    print(f"dois={dois}")
    return dois


def read_titles_from_file(filepath):
    with open(filepath) as f:
        lines = f.read().splitlines()
    split_lines = splits_lines(lines)
    return split_lines


def splits_lines(lines):
    split_lines = []
    for line in lines:
        new_lines = line.split(";")
        for new_line in new_lines:
            split_lines.append(new_line)
    return split_lines


def write_dois_to_file(dois, filename, separation_char):
    textfile = open(filename, "w")
    for doi in dois:
        textfile.write(doi + separation_char)
    textfile.close()


filepath = "list_of_titles.txt"
titles = read_titles_from_file(filepath)
dois = get_dois(titles)
write_dois_to_file(dois, "dois_space.txt", " ")
write_dois_to_file(dois, "dois_per_line.txt", "\n")

的 DOI.txt被输入 Zotero 的魔杖。接下来，我（手动）通过选择最新添加的条目来删除重复项（因为它来自具有最多数据的魔杖）。

之后，我运行另一个脚本来将我.tex和.bib文件中的所有参考 id 更新为 Zotero 生成的那些：

# Importing library
import bibtexparser
from bibtexparser.bparser import BibTexParser
from bibtexparser.customization import *
import os, fnmatch

import Levenshtein as lev


# Let's define a function to customize our entries.
# It takes a record and return this record.
def customizations(record):
    """Use some functions delivered by the library

    :param record: a record
    :returns: -- customized record
    """
    record = type(record)
    record = author(record)
    record = editor(record)
    record = journal(record)
    record = keyword(record)
    record = link(record)
    record = page_double_hyphen(record)
    record = doi(record)
    return record


def get_references(filepath):
    with open(filepath) as bibtex_file:
        parser = BibTexParser()
        parser.customization = customizations
        bib_database = bibtexparser.load(bibtex_file, parser=parser)
        # print(bib_database.entries)
    return bib_database


def get_reference_mapping(main_filepath, sub_filepath):
    found_sub = []
    found_main = []
    main_into_sub = []

    main_references = get_references(main_filepath)
    sub_references = get_references(sub_filepath)

    for main_entry in main_references.entries:
        for sub_entry in sub_references.entries:

            # Match the reference ID if 85% similair titles are detected
            lev_ratio = lev.ratio(
                remove_curly_braces(main_entry["title"]).lower(),
                remove_curly_braces(sub_entry["title"]).lower(),
            )
            if lev_ratio > 0.85:
                print(f"lev_ratio={lev_ratio}")

                if main_entry["ID"] != sub_entry["ID"]:
                    print(f'replace: {sub_entry["ID"]} with: {main_entry["ID"]}')
                    main_into_sub.append([main_entry, sub_entry])

                    # Keep track of which entries have been found
                    found_sub.append(sub_entry)
                    found_main.append(main_entry)
    return (
        main_into_sub,
        found_main,
        found_sub,
        main_references.entries,
        sub_references.entries,
    )


def remove_curly_braces(string):
    left = string.replace("{", "")
    right = left.replace("{", "")
    return right


def replace_references(main_into_sub, directory):
    for pair in main_into_sub:
        main = pair[0]["ID"]
        sub = pair[1]["ID"]
        print(f"replace: {sub} with: {main}")

        # UNCOMMENT IF YOU WANT TO ACTUALLY DO THE PRINTED REPLACEMENT
        # findReplace(latex_root_dir, sub, main, "*.tex")
        # findReplace(latex_root_dir, sub, main, "*.bib")


def findReplace(directory, find, replace, filePattern):
    for path, dirs, files in os.walk(os.path.abspath(directory)):
        for filename in fnmatch.filter(files, filePattern):
            filepath = os.path.join(path, filename)
            with open(filepath) as f:
                s = f.read()
            s = s.replace(find, replace)
            with open(filepath, "w") as f:
                f.write(s)


def list_missing(main_references, sub_references):
    for sub in sub_references:
        if not sub["ID"] in list(map(lambda x: x["ID"], main_references)):
            print(f'the following reference has a changed title:{sub["ID"]}')


latex_root_dir = "some_path/"
main_filepath = f"{latex_root_dir}latex/Literature_study/zotero.bib"
sub_filepath = f"{latex_root_dir}latex/Literature_study/references.bib"
(
    main_into_sub,
    found_main,
    found_sub,
    main_references,
    sub_references,
) = get_reference_mapping(main_filepath, sub_filepath)
replace_references(main_into_sub, latex_root_dir)
list_missing(main_references, sub_references)


# For those references which have levenshtein ratio below 85 you can specify a manual swap:
manual_swap = []  # main into sub
# manual_swap.append(["cantley_impact_2021","cantley2021impact"])
# manual_swap.append(["widemann_envision_2021","widemann2020envision"])
for pair in manual_swap:
    main = pair[0]
    sub = pair[1]
    print(f"replace: {sub} with: {main}")

    # UNCOMMENT IF YOU WANT TO ACTUALLY DO THE PRINTED REPLACEMENT
    # findReplace(latex_root_dir, sub, main, "*.tex")
    # findReplace(latex_root_dir, sub, main, "*.bib")

ruby - 如何自动将 BibTex 引文转换为 Zotero 可解析的内容？

3 回答 3

Related

Reference