Coverage for src/ptf_tools/doi.py: 21%
256 statements
« prev ^ index » next coverage.py v7.8.2, created at 2025-06-25 10:06 +0000
« prev ^ index » next coverage.py v7.8.2, created at 2025-06-25 10:06 +0000
1import os
2from datetime import datetime
4import requests
5from django.conf import settings
6from django.core.exceptions import ObjectDoesNotExist
7from django.template.loader import render_to_string
8from lxml import etree
9from ptf.cmds.xml import xml_utils
10from ptf.display.resolver import find_id_type
11from ptf.templatetags.helpers import search_license
13from mersenne_tools.models import DOIBatch
16def get_doibatch(resource):
17 doibatch = None
18 try:
19 doibatch = resource.doibatch
20 except ObjectDoesNotExist:
21 pass
23 return doibatch
26def get_or_create_doibatch(resource):
27 """
28 @param models.Resource:
29 @return: new or updated doibatch
30 """
32 update_doi = False
33 # check DOI
34 url = settings.DOI_BASE_URL + resource.doi
35 r = requests.get(url, allow_redirects=False)
36 if r.status_code == 302 and resource.get_url_absolute() == r.headers["Location"]:
37 status = "Enregistré"
38 log = "Vérifié sur CROSSREF"
39 update_doi = True
40 elif r.status_code == 302 and resource.get_url_absolute() != r.headers["Location"]:
41 status = "Erreur"
42 log = "Mauvaise URL pour le DOI !!!/à réenregistrer"
43 update_doi = True
44 doibatch = get_doibatch(resource)
45 if update_doi:
46 if doibatch:
47 doibatch.status = status
48 doibatch.log = log
49 else:
50 doibatch = DOIBatch(resource=resource, status=status, log=log)
51 doibatch.save()
52 return doibatch
54 # si on est dans le cas d'un book-part vu que l'enregistrement se fait niveau container, on ne peut pas interroger le batch
55 # lié au book-part, car il a été créé juste pour afficher "En cours" sur le niveau book-part
56 if (
57 doibatch
58 and resource.classname == "Article"
59 and resource.my_container.ctype.startswith("book")
60 ):
61 doibatch.delete()
62 doibatch = None
64 if doibatch:
65 doibatch = checkDOIBatch(doibatch)
67 return doibatch
70# recordDOI par resource (article)
71# problématique liée à l'enregistrement des DOI chez CROSSREF :
72# - pour enregistrer un DOI, on utilise le DOI du journal comme référence : CROSSREF prend ça comme une demande d'enregistrement/modification !
73# du DOI du journal...
74# ce qui se passe lorsque l'on envoie plusieurs requêtes les unes à la suite des autres (Record all DOIs), c'est que l'ordre de traitement est
75# différent (aléatoire) de l'ordre d'envoi et on obtient ces erreurs :
76# "Record not processed because submitted version: 201810150907372216 is less or equal to previously submitted version {1}"
77# ( MAIS le record impliqué ici est celui du journal, celui de l'article ne pose globalement pas de pb)
78# car il y a un timestamp dans chaque requête
79#
80# pour contrer ces erreurs (avant on ne diagnostiquait que le nombre de failure_count et donc il y en avait une) il faut interpréter le xml de retour ::
81# <record_diagnostic status="Success">
82# <doi>10.5802/alco.21</doi>
83# <msg>Successfully updated</msg>
84# C'est ce qui est retenu (dans checkDOIBatch).
85#
88def recordDOI(resource, testing=False):
89 """
90 @param resource:
91 @param testing: Boolean set to True when testing
92 @return: data {status: 200 ou 400, 'message': msg}
93 """
95 doibatch = get_doibatch(resource)
96 if doibatch: 96 ↛ 99line 96 didn't jump to line 99 because the condition on line 96 was always true
97 doibatch.delete()
99 doibatch = DOIBatch(resource=resource, status="En cours")
100 doibatch.save()
101 context = {}
102 context["doi_batch_id"] = f"{doibatch.pk:04d}"
103 # https://data.crossref.org/reports/help/schema_doc/4.4.2/schema_4_4_2.html#timestamp
104 timestamp = datetime.now().strftime("%Y%m%d%H%M%S%f") # len = 20, must be 19
105 context["timestamp"] = timestamp[0:19]
106 context["mail"] = settings.CROSSREF_MAIL
107 template = f"crossref/{resource.classname.lower()}_doi_register.xml"
108 crossref_user = None
109 crossref_pwd = None
111 # hack pour déterminer la date de publication pour une resource
112 if resource.classname == "Article": 112 ↛ 155line 112 didn't jump to line 155 because the condition on line 112 was always true
113 # si un article n'a pas de contributeurs, on enregistre un posted-content de type other
114 # https://data.crossref.org/reports/help/schema_doc/4.4.2/schema_4_4_2.html#posted_content
115 if not resource.get_author_contributions() and resource.classname == "Article": 115 ↛ 116line 115 didn't jump to line 116 because the condition on line 115 was never true
116 template = "crossref/posted-content.xml"
118 # on est en présence d'un objet qui a besoin d'une date de publication
119 if not resource.date_published and not resource.date_online_first: 119 ↛ 121line 119 didn't jump to line 121 because the condition on line 119 was never true
120 # on extrapole la date du volume
121 date = resource.my_container.year
122 try:
123 date = datetime.strptime(date, "%Y")
124 resource.DOIdate = "<year>%s</year>" % resource.my_container.year
125 except ValueError:
126 # on suppose que la date est du format 2010-2011, on garde la 2eme année du range
127 year = resource.my_container.year.split("-")[1]
128 resource.DOIdate = "<year>%s</year>" % year
129 resource.my_container.year = year
130 else:
131 # on renseigne la date selon le format voulu par CROSSREF
132 if resource.date_published: 132 ↛ 133line 132 didn't jump to line 133 because the condition on line 132 was never true
133 resource.DOIdate = resource.date_published.strftime(
134 "<month>%m</month><day>%d</day><year>%Y</year>"
135 )
137 # on check aussi la date du container
138 date = resource.my_container.year
139 try:
140 date = datetime.strptime(date, "%Y")
141 except ValueError:
142 # on suppose que la date est du format 2010-2011, on garde la 2eme année du range
143 year = resource.my_container.year.split("-")[1]
144 resource.my_container.year = year
145 else:
146 # Online First
147 # TODO: Is it possible to send 2 dates to Crossref ?
148 # You can send multiple <publication_date> but it is for multiple media_type (print vs online)
149 resource.DOIdate = resource.date_online_first.strftime(
150 "<month>%m</month><day>%d</day><year>%Y</year>"
151 )
153 # Le year du container vaut '0'
155 elif resource.classname == "Container":
156 if not resource.doi:
157 return {"message": "Erreur, le numéro n'a pas de doi."}
158 if resource.ctype.startswith("book"):
159 # PS : pas de gestion des chapitres pour les livres, tout est fait dans le template au moment de l'enregistrement du book
160 # template en fct du ctype
161 if resource.my_collection.issn or resource.my_collection.e_issn:
162 template = "crossref/book_series_metadata.xml"
163 else:
164 template = "crossref/book_set_metadata.xml"
165 # else #book tout seul n'appartenant pas à une série
166 # template = book_metadata
167 context["book_type"] = resource.ctype[5:].replace("-", "_")
168 for bookpart in resource.article_set.all():
169 doibatch = get_doibatch(bookpart)
170 if doibatch:
171 doibatch.delete()
172 doibatch = DOIBatch(resource=bookpart, status="En cours")
173 doibatch.save()
175 elif resource.ctype.startswith("issue_special"):
176 template = "crossref/issue_doi_register.xml"
177 title = resource.title_html
178 context["title"] = title
180 date = resource.year
181 try:
182 date = datetime.strptime(date, "%Y")
183 resource.DOIdate = "<year>%s</year>" % resource.year
184 except ValueError:
185 # on suppose que la date est du format 2010-2011, on garde la 2eme année du range
186 year = resource.year.split("-")[1]
187 resource.DOIdate = "<year>%s</year>" % year
189 elif resource.classname == "TranslatedArticle":
190 with open(
191 os.path.join(settings.LOG_DIR, "record_doi.log"), "a", encoding="utf-8"
192 ) as file_:
193 file_.write(resource.doi + "\n")
195 resource.DOIdate = resource.date_published.strftime(
196 "<month>%m</month><day>%d</day><year>%Y</year>"
197 )
198 context["collection"] = resource.original_article.get_top_collection()
200 context["resource"] = resource
202 preprint_id = preprint_type = None
203 qs = resource.extid_set.filter(id_type="preprint")
204 if qs: 204 ↛ 205line 204 didn't jump to line 205 because the condition on line 204 was never true
205 extid = qs.first()
206 preprint_id = extid.id_value
207 preprint_type = find_id_type(preprint_id)
208 # crossref allows "doi" and "arxiv", but not "hal"
209 if preprint_type == "hal":
210 preprint_type = "other"
211 context["preprint_id"] = preprint_id
212 context["preprint_type"] = preprint_type
213 abstract = resource.abstract_set.filter(lang=resource.lang, tag="abstract").first()
214 if abstract: 214 ↛ 218line 214 didn't jump to line 218 because the condition on line 214 was always true
215 context["abstract"] = xml_utils.get_crossref_jats_from_xml_with_formula(
216 abstract.value_xml, with_mathml=True
217 )
218 if resource.license: 218 ↛ 219line 218 didn't jump to line 219 because the condition on line 218 was never true
219 context["license"] = search_license(resource)
221 rdoi = None
222 qs = resource.extid_set.filter(id_type="rdoi")
223 if qs: 223 ↛ 224line 223 didn't jump to line 224 because the condition on line 223 was never true
224 rdoi = qs.first().id_value
225 context["rdoi"] = rdoi
227 try:
228 xml = render_to_string(template_name=template, context=context)
229 doibatch.xml = xml
230 doibatch.save()
231 except Exception as e:
232 if resource.classname == "TranslatedArticle":
233 with open(
234 os.path.join(settings.LOG_DIR, "record_doi.log"), "a", encoding="utf-8"
235 ) as file_:
236 file_.write(str(e) + "\n")
237 raise e
239 files = {"file": (f"{doibatch.pk}.xml", xml)}
241 data = {"status": 404}
242 if not testing: 242 ↛ 243line 242 didn't jump to line 243 because the condition on line 242 was never true
243 if resource.classname == "TranslatedArticle":
244 crossref_user, crossref_pwd = get_user_pwd_crossref(resource.original_article)
246 with open(
247 os.path.join(settings.LOG_DIR, "record_doi.log"), "a", encoding="utf-8"
248 ) as file_:
249 file_.write("Call crossref\n")
251 elif resource.classname == "Container" and resource.ctype.startswith("book"):
252 # pas de doi niveau container, alors pour obtenir les identifiants crossref on part sur le 1er book part
253 crossref_user, crossref_pwd = get_user_pwd_crossref(resource.article_set.first())
254 else:
255 crossref_user, crossref_pwd = get_user_pwd_crossref(resource)
257 crossref_batch_url = settings.CROSSREF_BATCHURL_TPL % (crossref_user, crossref_pwd)
259 r = requests.post(crossref_batch_url, files=files)
260 body = r.text.encode("utf8")
261 if r.status_code == 200:
262 xml = etree.XML(body)
263 title = xml.xpath("//*/title")[0].text
264 if title == "SUCCESS":
265 data["status"] = r.status_code
266 elif r.status_code == 401:
267 doibatch.status = "Erreur"
268 doibatch.log = "Pb d'authentification"
269 doibatch.save()
270 else:
271 doibatch.status = "Erreur"
272 doibatch.save()
273 data["message"] = body[:1000].decode("utf-8")
275 if resource.classname == "TranslatedArticle":
276 with open(
277 os.path.join(settings.LOG_DIR, "record_doi.log"), "a", encoding="utf-8"
278 ) as file_:
279 file_.write(doibatch.status + "\n")
280 return data
283def get_user_pwd_crossref(resource):
284 # get CROSSREF credentials from DOI prefix
285 doi = resource.doi
286 prefix = doi.split("/")[0]
287 md_prefix = prefix.split(".")[1]
288 crossref_user_const = "CROSSREF_USER_" + md_prefix
289 crossref_pwd_const = "CROSSREF_PWD_" + md_prefix
290 try:
291 crossref_user = getattr(settings, crossref_user_const)
292 crossref_pwd = getattr(settings, crossref_pwd_const)
293 except AttributeError:
294 crossref_user = settings.CROSSREF_USER_5802
295 crossref_pwd = settings.CROSSREF_PWD_5802
296 return crossref_user, crossref_pwd
299def checkDOIBatch(doibatch):
300 """
301 check DOI batch status by HTTP request
302 @param doibatch: DOIBatch
303 @return: DOIBatch with status and log updated
304 """
306 resource = doibatch.resource
307 crossref_user, crossref_pwd = get_user_pwd_crossref(resource)
308 url = settings.CROSSREF_BASE_CHECKBATCH_URL_TPL % (crossref_user, crossref_pwd)
309 url = url.format(doibatch.pk)
310 r = requests.get(url)
311 if r.status_code == 200:
312 # analyse du xml de retour
313 dataXml = r.text.encode("utf8")
314 tree = etree.XML(dataXml)
315 elem = tree.xpath("/doi_batch_diagnostic")[0]
316 batch_status = elem.attrib["status"]
317 if batch_status == "completed":
318 # le batch a été traité
319 doibatch.status = "batch terminé"
320 doibatch.log = "Pas de DOI associé dans le batch : voir le xml"
321 diags = tree.xpath("//*/record_diagnostic")
322 for diag in diags:
323 doi = diag.xpath("doi")[0].text
324 log = diag.xpath("msg")[0].text
325 status = diag.attrib["status"]
326 if doi == doibatch.resource.doi:
327 if status == "Success":
328 doibatch.status = "Enregistré"
329 else:
330 doibatch.status = "Erreur"
331 else:
332 doibatch.status = "Erreur"
333 doibatch.log = log
335 elif batch_status == "in_process" or batch_status == "queued":
336 doibatch.status = "En cours"
337 doibatch.log = "batch en cours de traitement"
338 else: # rafraichit trop tot apres Record DOI
339 doibatch.status = "Erreur"
340 doibatch.log = (
341 f"Attention, il se peut qu'il faille rafraichir un peu plus tard {r.text} "
342 )
343 else:
344 doibatch.status = "Erreur"
345 doibatch.log = r.text
346 doibatch.save()
347 return doibatch
350def removeOldDataInCrossref(article, testing=False):
351 """
352 The CRAS 2002-2019 articles were registered by Elsevier
353 To remove some metadata in Crossref, we need to provide a separate XML with the fields to remove
355 @param article:
356 @param testing: Boolean set to True when testing
357 @return: data {status: 200 ou 400, 'message': msg}
358 """
360 doibatch = get_doibatch(article)
361 if doibatch:
362 doibatch.delete()
364 doibatch = DOIBatch(resource=article, status="En cours")
365 doibatch.save()
367 context = {"resource": article, "doi_batch_id": f"{doibatch.pk:04d}"}
369 timestamp = datetime.now().strftime("%Y%m%d%H%M%S%f") # len = 20, must be 19
370 context["timestamp"] = timestamp[0:19]
372 context["mail"] = settings.CROSSREF_MAIL
373 template = "crossref/article_remove_old_data.xml"
375 if article.date_published:
376 article.DOIdate = article.date_published.strftime(
377 "<month>%m</month><day>%d</day><year>%Y</year>"
378 )
380 try:
381 xml = render_to_string(template_name=template, context=context)
383 if testing:
384 print(xml)
386 doibatch.xml = xml
387 doibatch.save()
388 except Exception as e:
389 raise e
391 files = {"file": (f"{doibatch.pk}.xml", xml)}
393 data = {"status": 404}
394 if not testing:
395 crossref_user, crossref_pwd = get_user_pwd_crossref(article)
396 crossref_batch_url = settings.CROSSREF_BATCHURL_TPL % (crossref_user, crossref_pwd)
398 r = requests.post(crossref_batch_url, files=files)
399 body = r.text.encode("utf8")
400 if r.status_code == 200:
401 xml = etree.XML(body)
402 title = xml.xpath("//*/title")[0].text
403 if title == "SUCCESS":
404 data["status"] = r.status_code
405 elif r.status_code == 401:
406 doibatch.status = "Erreur"
407 doibatch.log = "Pb d'authentification"
408 doibatch.save()
409 else:
410 doibatch.status = "Erreur"
411 doibatch.save()
412 data["message"] = body[:1000].decode("utf-8")
414 return data