Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 4 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -48,6 +48,10 @@ build/
### Linux ###
*~

### Python ###
__pycache__/
*.py[cod]

# KDE directory preferences
.directory

Expand Down
46 changes: 34 additions & 12 deletions bin/arxiv_to_publications_correct.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,17 @@

import bibtexparser
from bibtexparser.bwriter import BibTexWriter
from requests.exceptions import RequestException


def fetch_doi_content(url, accept_header, description):
try:
response = requests.get(url, headers={'Accept': accept_header}, timeout=30)
response.raise_for_status()
except RequestException as exc:
print(f'Ignoring {url}, failed to fetch {description}: {exc}\n\n')
return None
return response


if __name__ == '__main__':
Expand All @@ -21,25 +32,36 @@

for url, id_db in zip(doi_list, id_list):
print(f'Working on {id_db} with URL {url}')
req = requests.get(url, headers={'Accept': 'application/x-bibtex'})
if not req.status_code == 200:
print(f'Ignoring {url}, got status code {req.status_code}\n\n')
req = fetch_doi_content(url, 'application/x-bibtex', 'BibTeX')
if req is None:
continue
bib = req.content.decode()
req = requests.get(url, headers={'Accept': 'application/json'})
if not req.status_code == 200:
print(f'Ignoring {url}, got status code {req.status_code}\n\n')
req = fetch_doi_content(url, 'application/json', 'metadata')
if req is None:
continue
try:
data = req.json()
except ValueError as exc:
print(f'Ignoring {url}, invalid metadata response: {exc}\n\n')
continue
data = req.json()

if len(data['author']) > 1:
id = data['author'][0]['family'] + 'EtAl' + str(data['issued']['date-parts'][0][0])
else:
id = data['author'][0]['family'] + str(data['issued']['date-parts'][0][0])
try:
if len(data['author']) > 1:
id = data['author'][0]['family'] + 'EtAl' + str(data['issued']['date-parts'][0][0])
else:
id = data['author'][0]['family'] + str(data['issued']['date-parts'][0][0])
except (KeyError, IndexError, TypeError) as exc:
print(f'Ignoring {url}, incomplete metadata response: {exc}\n\n')
continue
id = id.replace(" ", "_")

entries = db.get_entry_dict()
assert entries[id_db]["ENTRYTYPE"] == 'unpublished', "original entry in bib file was NOT unpublished !"
if id_db not in entries:
print(f'Ignoring {id_db}, entry not found in bibliography.\n\n')
continue
if entries[id_db]["ENTRYTYPE"] != 'unpublished':
print(f'Ignoring {id_db}, original entry in bib file was not unpublished.\n\n')
continue
db.entries.remove(entries[id_db])

# Check for duplicate keys in the remaining database and add letter suffixes if needed
Expand Down