Coverage for src/ptf_tools/doi.py: 18%
305 statements
« prev ^ index » next coverage.py v7.8.2, created at 2025-10-31 09:10 +0000
« prev ^ index » next coverage.py v7.8.2, created at 2025-10-31 09:10 +0000
1import os
2from datetime import datetime
4import requests
5from django.conf import settings
6from django.core.exceptions import ObjectDoesNotExist
7from django.template.loader import render_to_string
8from lxml import etree
9from ptf.cmds.xml import xml_utils
10from ptf.display.resolver import find_id_type
11from ptf.templatetags.helpers import search_license
13from mersenne_tools.models import DOIBatch
16def get_doibatch(resource):
17 doibatch = None
18 try:
19 doibatch = resource.doibatch
20 except ObjectDoesNotExist:
21 pass
23 return doibatch
26def checkDOIExistence(resource):
27 """
28 @param models.Resource:
29 @return: True if the doi has been recorded = the url redirects to a landing page
30 """
32 result = True
34 # check DOI
35 url = settings.DOI_BASE_URL + resource.doi
36 r = requests.get(url, allow_redirects=False)
37 resource_url = resource.get_url_absolute()
39 if resource.classname == "Collection":
40 # get_url_absolute returns <site_url>/item/<COLID> but doi.org redirects to <site_url>
41 resource_url = resource.extlink_set.get(rel="website", metadata="website").location
42 elif resource_url != r.headers["Location"] and "/item/" in r.headers["Location"]:
43 # Old DOIs were recorded with /item/<PID>
44 resource_url = (
45 resource.get_collection().extlink_set.get(rel="website", metadata="website").location
46 )
47 resource_url += f"/item/{resource.pid}"
49 if r.status_code == 302 and resource_url == r.headers["Location"]:
50 result = True
52 elif r.status_code == 302 and resource_url != r.headers["Location"]:
53 # log = f"Mauvaise URL pour le DOI : {resource_url} - {r.headers['Location']}"
54 result = False
56 return result
59# recordDOI par resource (article)
60# problématique liée à l'enregistrement des DOI chez CROSSREF :
61# - pour enregistrer un DOI, on utilise le DOI du journal comme référence : CROSSREF prend ça comme une demande d'enregistrement/modification !
62# du DOI du journal...
63# ce qui se passe lorsque l'on envoie plusieurs requêtes les unes à la suite des autres (Record all DOIs), c'est que l'ordre de traitement est
64# différent (aléatoire) de l'ordre d'envoi et on obtient ces erreurs :
65# "Record not processed because submitted version: 201810150907372216 is less or equal to previously submitted version {1}"
66# ( MAIS le record impliqué ici est celui du journal, celui de l'article ne pose globalement pas de pb)
67# car il y a un timestamp dans chaque requête
68#
69# pour contrer ces erreurs (avant on ne diagnostiquait que le nombre de failure_count et donc il y en avait une) il faut interpréter le xml de retour ::
70# <record_diagnostic status="Success">
71# <doi>10.5802/alco.21</doi>
72# <msg>Successfully updated</msg>
73# C'est ce qui est retenu (dans checkDOIBatch).
74#
77def recordDOI(resource, testing=False):
78 """
79 @param resource:
80 @param testing: Boolean set to True when testing
81 @return: data {status: 200 ou 400, 'message': msg}
82 """
84 doibatch = get_doibatch(resource)
85 if doibatch: 85 ↛ 88line 85 didn't jump to line 88 because the condition on line 85 was always true
86 doibatch.delete()
88 doibatch = DOIBatch(resource=resource, status="En cours")
89 doibatch.save()
90 context = {}
91 context["doi_batch_id"] = f"{doibatch.pk:04d}"
92 # https://data.crossref.org/reports/help/schema_doc/4.4.2/schema_4_4_2.html#timestamp
93 timestamp = datetime.now().strftime("%Y%m%d%H%M%S%f") # len = 20, must be 19
94 context["timestamp"] = timestamp[0:19]
95 context["mail"] = settings.CROSSREF_MAIL
96 template = f"crossref/{resource.classname.lower()}_doi_register.xml"
97 crossref_user = None
98 crossref_pwd = None
100 # hack pour déterminer la date de publication pour une resource
101 if resource.classname == "Article": 101 ↛ 144line 101 didn't jump to line 144 because the condition on line 101 was always true
102 # si un article n'a pas de contributeurs, on enregistre un posted-content de type other
103 # https://data.crossref.org/reports/help/schema_doc/4.4.2/schema_4_4_2.html#posted_content
104 if not resource.get_author_contributions() and resource.classname == "Article": 104 ↛ 105line 104 didn't jump to line 105 because the condition on line 104 was never true
105 template = "crossref/posted-content.xml"
107 # on est en présence d'un objet qui a besoin d'une date de publication
108 if not resource.date_published and not resource.date_online_first: 108 ↛ 110line 108 didn't jump to line 110 because the condition on line 108 was never true
109 # on extrapole la date du volume
110 date = resource.my_container.year
111 try:
112 date = datetime.strptime(date, "%Y")
113 resource.DOIdate = "<year>%s</year>" % resource.my_container.year
114 except ValueError:
115 # on suppose que la date est du format 2010-2011, on garde la 2eme année du range
116 year = resource.my_container.year.split("-")[1]
117 resource.DOIdate = "<year>%s</year>" % year
118 resource.my_container.year = year
119 else:
120 # on renseigne la date selon le format voulu par CROSSREF
121 if resource.date_published: 121 ↛ 122line 121 didn't jump to line 122 because the condition on line 121 was never true
122 resource.DOIdate = resource.date_published.strftime(
123 "<month>%m</month><day>%d</day><year>%Y</year>"
124 )
126 # on check aussi la date du container
127 date = resource.my_container.year
128 try:
129 date = datetime.strptime(date, "%Y")
130 except ValueError:
131 # on suppose que la date est du format 2010-2011, on garde la 2eme année du range
132 year = resource.my_container.year.split("-")[1]
133 resource.my_container.year = year
134 else:
135 # Online First
136 # TODO: Is it possible to send 2 dates to Crossref ?
137 # You can send multiple <publication_date> but it is for multiple media_type (print vs online)
138 resource.DOIdate = resource.date_online_first.strftime(
139 "<month>%m</month><day>%d</day><year>%Y</year>"
140 )
142 # Le year du container vaut '0'
144 elif resource.classname == "Container":
145 if not resource.doi:
146 return {"message": "Erreur, le numéro n'a pas de doi."}
147 if resource.ctype.startswith("book"):
148 # PS : pas de gestion des chapitres pour les livres, tout est fait dans le template au moment de l'enregistrement du book
149 # template en fct du ctype
150 if resource.my_collection.issn or resource.my_collection.e_issn:
151 template = "crossref/book_series_metadata.xml"
152 else:
153 template = "crossref/book_set_metadata.xml"
154 # else #book tout seul n'appartenant pas à une série
155 # template = book_metadata
156 context["book_type"] = resource.ctype[5:].replace("-", "_")
157 for bookpart in resource.article_set.all():
158 doibatch = get_doibatch(bookpart)
159 if doibatch:
160 doibatch.delete()
161 doibatch = DOIBatch(resource=bookpart, status="En cours")
162 doibatch.save()
164 elif resource.ctype.startswith("issue_special"):
165 template = "crossref/issue_doi_register.xml"
166 title = resource.title_html
167 context["title"] = title
169 date = resource.year
170 try:
171 date = datetime.strptime(date, "%Y")
172 resource.DOIdate = "<year>%s</year>" % resource.year
173 except ValueError:
174 # on suppose que la date est du format 2010-2011, on garde la 2eme année du range
175 year = resource.year.split("-")[1]
176 resource.DOIdate = "<year>%s</year>" % year
178 elif resource.classname == "TranslatedArticle":
179 with open(
180 os.path.join(settings.LOG_DIR, "record_doi.log"), "a", encoding="utf-8"
181 ) as file_:
182 file_.write(resource.doi + "\n")
184 resource.DOIdate = resource.date_published.strftime(
185 "<month>%m</month><day>%d</day><year>%Y</year>"
186 )
187 context["collection"] = resource.original_article.get_top_collection()
189 context["resource"] = resource
191 preprint_id = preprint_type = None
192 qs = resource.extid_set.filter(id_type="preprint")
193 if qs: 193 ↛ 194line 193 didn't jump to line 194 because the condition on line 193 was never true
194 extid = qs.first()
195 preprint_id = extid.id_value
196 preprint_type = find_id_type(preprint_id)
197 # crossref allows "doi" and "arxiv", but not "hal"
198 if preprint_type == "hal":
199 preprint_type = "other"
200 context["preprint_id"] = preprint_id
201 context["preprint_type"] = preprint_type
202 abstract = resource.abstract_set.filter(lang=resource.lang, tag="abstract").first()
203 if abstract: 203 ↛ 207line 203 didn't jump to line 207 because the condition on line 203 was always true
204 context["abstract"] = xml_utils.get_crossref_jats_from_xml_with_formula(
205 abstract.value_xml, with_mathml=True
206 )
207 if hasattr(resource, "licence") and resource.license is not None: 207 ↛ 208line 207 didn't jump to line 208 because the condition on line 207 was never true
208 context["license"] = search_license(resource)
210 rdoi = None
211 qs = resource.extid_set.filter(id_type="rdoi")
212 if qs: 212 ↛ 213line 212 didn't jump to line 213 because the condition on line 212 was never true
213 rdoi = qs.first().id_value
214 context["rdoi"] = rdoi
216 try:
217 xml = render_to_string(template_name=template, context=context)
218 doibatch.xml = xml
219 doibatch.save()
220 except Exception as e:
221 if resource.classname == "TranslatedArticle":
222 with open(
223 os.path.join(settings.LOG_DIR, "record_doi.log"), "a", encoding="utf-8"
224 ) as file_:
225 file_.write(str(e) + "\n")
226 raise e
228 files = {"file": (f"{doibatch.pk}.xml", xml)}
230 data = {"status": 404}
231 if not testing: 231 ↛ 232line 231 didn't jump to line 232 because the condition on line 231 was never true
232 if resource.classname == "TranslatedArticle":
233 crossref_user, crossref_pwd = get_user_pwd_crossref(resource.original_article)
235 with open(
236 os.path.join(settings.LOG_DIR, "record_doi.log"), "a", encoding="utf-8"
237 ) as file_:
238 file_.write("Call crossref\n")
240 elif resource.classname == "Container" and resource.ctype.startswith("book"):
241 # pas de doi niveau container, alors pour obtenir les identifiants crossref on part sur le 1er book part
242 crossref_user, crossref_pwd = get_user_pwd_crossref(resource.article_set.first())
243 else:
244 crossref_user, crossref_pwd = get_user_pwd_crossref(resource)
246 crossref_batch_url = settings.CROSSREF_BATCHURL_TPL % (crossref_user, crossref_pwd)
248 r = requests.post(crossref_batch_url, files=files)
249 body = r.text.encode("utf8")
250 if r.status_code == 200:
251 xml = etree.XML(body)
252 title = xml.xpath("//*/title")[0].text
253 if title == "SUCCESS":
254 data["status"] = r.status_code
255 elif r.status_code == 401:
256 doibatch.status = "Erreur"
257 doibatch.log = "Pb d'authentification"
258 doibatch.save()
259 else:
260 doibatch.status = "Erreur"
261 doibatch.save()
262 data["message"] = body[:1000].decode("utf-8")
264 if resource.classname == "TranslatedArticle":
265 with open(
266 os.path.join(settings.LOG_DIR, "record_doi.log"), "a", encoding="utf-8"
267 ) as file_:
268 file_.write(doibatch.status + "\n")
269 return data
272def recordPendingPublication(resource, testing=False):
273 """
274 @param resource:
275 @param testing: Boolean set to True when testing
276 @return: data {status: 200 ou 400, 'message': msg}
277 """
279 doibatch = get_doibatch(resource)
280 if doibatch:
281 doibatch.delete()
283 doibatch = DOIBatch(resource=resource, status="En cours")
284 doibatch.save()
285 context = {}
286 context["doi_batch_id"] = f"{doibatch.pk:04d}"
287 # https://data.crossref.org/reports/help/schema_doc/4.4.2/schema_4_4_2.html#timestamp
288 timestamp = datetime.now().strftime("%Y%m%d%H%M%S%f") # len = 20, must be 19
289 context["timestamp"] = timestamp[0:19]
290 context["mail"] = settings.CROSSREF_MAIL
291 template = f"crossref/{resource.classname.lower()}_pending_publication_register.xml"
292 crossref_user = None
293 crossref_pwd = None
295 if resource.classname == "Article":
296 date = resource.date_accepted
297 resource.month_accepted = date.month
298 resource.day_accepted = date.day
299 resource.year_accepted = date.year
301 context["resource"] = resource
303 try:
304 xml = render_to_string(template_name=template, context=context)
305 doibatch.xml = xml
306 doibatch.save()
307 except Exception as e:
308 raise e
310 files = {"file": (f"{doibatch.pk}.xml", xml)}
312 data = {"status": 404}
313 if not testing:
314 crossref_user, crossref_pwd = get_user_pwd_crossref(resource)
315 crossref_batch_url = settings.CROSSREF_BATCHURL_TPL % (crossref_user, crossref_pwd)
317 r = requests.post(crossref_batch_url, files=files)
318 body = r.text.encode("utf8")
319 if r.status_code == 200:
320 xml = etree.XML(body)
321 title = xml.xpath("//*/title")[0].text
322 if title == "SUCCESS":
323 data["status"] = r.status_code
324 elif r.status_code == 401:
325 doibatch.status = "Erreur"
326 doibatch.log = "Pb d'authentification"
327 doibatch.save()
328 else:
329 doibatch.status = "Erreur"
330 doibatch.save()
331 data["message"] = body[:1000].decode("utf-8")
333 if resource.classname == "TranslatedArticle":
334 with open(
335 os.path.join(settings.LOG_DIR, "record_doi.log"), "a", encoding="utf-8"
336 ) as file_:
337 file_.write(doibatch.status + "\n")
338 return data
341def get_user_pwd_crossref(resource):
342 # get CROSSREF credentials from DOI prefix
343 if resource.classname == "Collection":
344 doi = resource.get_top_collection().doi
345 else:
346 doi = resource.doi
348 prefix = doi.split("/")[0]
349 md_prefix = prefix.split(".")[1]
350 crossref_user_const = "CROSSREF_USER_" + md_prefix
351 crossref_pwd_const = "CROSSREF_PWD_" + md_prefix
352 try:
353 crossref_user = getattr(settings, crossref_user_const)
354 crossref_pwd = getattr(settings, crossref_pwd_const)
355 except AttributeError:
356 crossref_user = settings.CROSSREF_USER_5802
357 crossref_pwd = settings.CROSSREF_PWD_5802
358 return crossref_user, crossref_pwd
361def checkDOI(resource):
362 """
363 check DOI status using the resource DOIBatch
364 @param resource: the resource to check
365 @return: DOIBatch with status and log updated or None if the resource has no DOIBatch
366 """
367 # si on est dans le cas d'un book-part vu que l'enregistrement se fait niveau container, on ne peut pas interroger le batch
368 # lié au book-part, car il a été créé juste pour afficher "En cours" sur le niveau book-part
369 if (
370 resource.classname == "Article"
371 and resource.my_container.ctype.startswith("book")
372 and resource.my_container.ctype != "book-lecture-notes"
373 ):
374 return None
376 doibatch = get_doibatch(resource)
377 if doibatch is None:
378 doibatch = DOIBatch(resource=resource, status="En cours")
379 doibatch.save()
381 doibatch = checkDOIBatch(doibatch)
383 return doibatch
386def checkDOIBatch(doibatch):
387 """
388 check DOI batch status by HTTP request
389 @param doibatch: DOIBatch
390 @return: DOIBatch with status and log updated
391 """
393 # si on est dans le cas d'un book-part vu que l'enregistrement se fait niveau container, on ne peut pas interroger le batch
394 # lié au book-part, car il a été créé juste pour afficher "En cours" sur le niveau book-part
395 # if resource.classname == "Article" and resource.my_container.ctype.startswith("book"):
396 # return None
397 #
398 # doibatch = get_doibatch(resource)
399 # if doibatch is not None:
400 # doibatch = checkDOIBatch(doibatch)
401 #
402 # return doibatch
404 resource = doibatch.resource
405 crossref_user, crossref_pwd = get_user_pwd_crossref(resource)
406 url = settings.CROSSREF_BASE_CHECKBATCH_URL_TPL % (crossref_user, crossref_pwd)
407 url = url.format(doibatch.pk)
408 r = requests.get(url)
409 if r.status_code == 200:
410 # analyse du xml de retour
411 dataXml = r.text.encode("utf8")
412 tree = etree.XML(dataXml)
413 elem = tree.xpath("/doi_batch_diagnostic")[0]
414 batch_status = elem.attrib["status"]
415 if batch_status == "completed":
416 # le batch a été traité
417 doibatch.status = "batch terminé"
418 doibatch.log = "Pas de DOI associé dans le batch : voir le xml"
419 diags = tree.xpath("//*/record_diagnostic")
420 for diag in diags:
421 doi = diag.xpath("doi")[0].text
422 log = diag.xpath("msg")[0].text
423 status = diag.attrib["status"]
424 if doi == doibatch.resource.doi:
425 if status == "Success":
426 doibatch.status = "Enregistré"
427 else:
428 doibatch.status = "Erreur"
429 else:
430 doibatch.status = "Erreur"
431 doibatch.log = log
433 elif batch_status == "in_process" or batch_status == "queued":
434 doibatch.status = "En cours"
435 doibatch.log = "batch en cours de traitement"
436 else: # rafraichit trop tot apres Record DOI
437 doibatch.status = "Erreur"
438 doibatch.log = (
439 f"Attention, il se peut qu'il faille rafraichir un peu plus tard {r.text} "
440 )
441 else:
442 doibatch.status = "Erreur"
443 doibatch.log = r.text
444 doibatch.save()
445 return doibatch
448def removeOldDataInCrossref(article, testing=False):
449 """
450 The CRAS 2002-2019 articles were registered by Elsevier
451 To remove some metadata in Crossref, we need to provide a separate XML with the fields to remove
453 @param article:
454 @param testing: Boolean set to True when testing
455 @return: data {status: 200 ou 400, 'message': msg}
456 """
458 doibatch = get_doibatch(article)
459 if doibatch:
460 doibatch.delete()
462 doibatch = DOIBatch(resource=article, status="En cours")
463 doibatch.save()
465 context = {"resource": article, "doi_batch_id": f"{doibatch.pk:04d}"}
467 timestamp = datetime.now().strftime("%Y%m%d%H%M%S%f") # len = 20, must be 19
468 context["timestamp"] = timestamp[0:19]
470 context["mail"] = settings.CROSSREF_MAIL
471 template = "crossref/article_remove_old_data.xml"
473 if article.date_published:
474 article.DOIdate = article.date_published.strftime(
475 "<month>%m</month><day>%d</day><year>%Y</year>"
476 )
478 try:
479 xml = render_to_string(template_name=template, context=context)
481 if testing:
482 print(xml)
484 doibatch.xml = xml
485 doibatch.save()
486 except Exception as e:
487 raise e
489 files = {"file": (f"{doibatch.pk}.xml", xml)}
491 data = {"status": 404}
492 if not testing:
493 crossref_user, crossref_pwd = get_user_pwd_crossref(article)
494 crossref_batch_url = settings.CROSSREF_BATCHURL_TPL % (crossref_user, crossref_pwd)
496 r = requests.post(crossref_batch_url, files=files)
497 body = r.text.encode("utf8")
498 if r.status_code == 200:
499 xml = etree.XML(body)
500 title = xml.xpath("//*/title")[0].text
501 if title == "SUCCESS":
502 data["status"] = r.status_code
503 elif r.status_code == 401:
504 doibatch.status = "Erreur"
505 doibatch.log = "Pb d'authentification"
506 doibatch.save()
507 else:
508 doibatch.status = "Erreur"
509 doibatch.save()
510 data["message"] = body[:1000].decode("utf-8")
512 return data