Coverage for src/ptf_tools/doaj.py: 61%
195 statements
« prev ^ index » next coverage.py v7.7.0, created at 2025-04-03 12:11 +0000
« prev ^ index » next coverage.py v7.7.0, created at 2025-04-03 12:11 +0000
1import json
2import re
4import requests
6from django.conf import settings
7from django.db.models import Q
8from django.http import Http404
10from mersenne_tools.models import DOAJBatch
11from ptf import model_helpers
12from ptf.cmds.xml.xml_utils import remove_html
13from ptf.models import Container
16def has_date_online_first(document):
17 return hasattr(document, "date_online_first") and document.date_online_first
20def has_date_published(document):
21 return hasattr(document, "date_published") and document.date_published
24def has_publication_date(document):
25 return has_date_online_first(document) or has_date_published(document)
28def is_published(document):
29 if not hasattr(document, "do_not_publish"): 29 ↛ 30line 29 didn't jump to line 30 because the condition on line 29 was never true
30 return True
31 return not document.do_not_publish
34def get_names(resource, role):
35 names = []
36 for contribution in resource.contributions.all():
37 if contribution.role == role: 37 ↛ 36line 37 didn't jump to line 36 because the condition on line 37 was always true
38 person = {"name": str(contribution)}
39 addresses = contribution.contribaddress_set.all()
40 if addresses: 40 ↛ 41line 40 didn't jump to line 41 because the condition on line 40 was never true
41 person["affiliation"] = "; ".join([c.address for c in addresses if c.address])
42 if contribution.orcid: 42 ↛ 47line 42 didn't jump to line 47 because the condition on line 42 was always true
43 orcid = contribution.orcid.strip()
44 orcid = orcid.encode("ascii", "ignore").decode("utf-8")
45 if re.match(r"^\d{4}-\d{4}-\d{4}-\d{3}(\d|X)$", orcid): 45 ↛ 47line 45 didn't jump to line 47 because the condition on line 45 was always true
46 person["orcid_id"] = "https://orcid.org/" + orcid
47 names.append(person)
48 return names
51def get_token(colid):
52 token = None
53 if colid == "PCJ": 53 ↛ 54line 53 didn't jump to line 54 because the condition on line 53 was never true
54 token = settings.DOAJ_TOKEN_PCJ
55 elif colid == "OJMO": 55 ↛ 56line 55 didn't jump to line 56 because the condition on line 55 was never true
56 token = settings.DOAJ_TOKEN_OJMO
57 elif colid.startswith("CR") and len(colid) > 2: 57 ↛ 59line 57 didn't jump to line 59 because the condition on line 57 was always true
58 token = settings.DOAJ_TOKEN_CR
59 return token
62def doaj_pid_register(pid):
63 resource = model_helpers.get_resource(pid)
64 if not resource: 64 ↛ 65line 64 didn't jump to line 65 because the condition on line 64 was never true
65 raise Http404
67 container = None
68 if resource.classname == "Container": 68 ↛ 71line 68 didn't jump to line 71 because the condition on line 68 was always true
69 container = resource.container
71 if not container: 71 ↛ 72line 71 didn't jump to line 72 because the condition on line 71 was never true
72 raise Http404
74 collection = container.get_collection()
75 if not collection: 75 ↛ 76line 75 didn't jump to line 76 because the condition on line 75 was never true
76 raise Http404
78 results = []
79 data, response = None, None
80 token = get_token(collection.pid)
81 if token: 81 ↛ 88line 81 didn't jump to line 88 because the condition on line 81 was always true
82 for article in resource.container.article_set.all():
83 if is_published(article) and has_publication_date(article): 83 ↛ 82line 83 didn't jump to line 82 because the condition on line 83 was always true
84 data = doaj_resource_register(article)
85 if data:
86 results.append(data)
88 if results:
89 url = f"https://doaj.org/api/bulk/articles?api_key={token}"
90 response = requests.post(url, json=results)
91 container_batch = DOAJBatch.objects.get_or_create(resource=resource)[0]
92 if response.status_code == 201: 92 ↛ 93line 92 didn't jump to line 93 because the condition on line 92 was never true
93 container_batch.status = DOAJBatch.REGISTERED
94 results = response.json()
95 data = {
96 "doaj_status": response.status_code,
97 "doaj_message": [r["status"] for r in results],
98 "doaj_id": [r["id"] for r in results],
99 "doaj_location": [r["location"] for r in results],
100 }
101 for article in resource.container.article_set.all():
102 if is_published(article) and has_publication_date(article):
103 article_batch = DOAJBatch.objects.get_or_create(resource=article)[0]
104 article_batch.status = DOAJBatch.REGISTERED
105 article_batch.save()
106 else:
107 container_batch.status = DOAJBatch.ERROR
108 if response.text: 108 ↛ 110line 108 didn't jump to line 110 because the condition on line 108 was always true
109 container_batch.log = response.text
110 container_batch.save()
111 return data, response
114def doaj_resource_register(resource):
115 container = None
116 if resource.classname == "Article":
117 document = resource.article
118 container = document.my_container
119 fpage = document.fpage
120 lpage = document.lpage
121 elif resource.classname == "Container": 121 ↛ 126line 121 didn't jump to line 126 because the condition on line 121 was always true
122 document = resource.container
123 container = document
124 fpage = lpage = ""
126 if not container: 126 ↛ 127line 126 didn't jump to line 127 because the condition on line 126 was never true
127 return None
129 doi = resource.doi
130 collection = container.get_collection()
131 if not doi or not collection: 131 ↛ 132line 131 didn't jump to line 132 because the condition on line 131 was never true
132 return None
134 if collection.pid.startswith("CR") and not doi.startswith("10.5802/cr"):
135 return None
137 month = year = ""
138 if container.year != "0": 138 ↛ 141line 138 didn't jump to line 141 because the condition on line 138 was always true
139 year = container.year.split("-")[-1]
141 if has_date_online_first(document):
142 month = document.date_online_first.strftime("%B")
143 year = document.date_online_first.strftime("%Y")
144 elif has_date_published(document): 144 ↛ 145line 144 didn't jump to line 145 because the condition on line 144 was never true
145 month = document.date_published.strftime("%B")
146 year = document.date_published.strftime("%Y")
148 volume = number = ""
149 if not container.to_appear(): 149 ↛ 156line 149 didn't jump to line 156 because the condition on line 149 was always true
150 is_cr = container.is_cr()
151 if container.volume: 151 ↛ 152line 151 didn't jump to line 152 because the condition on line 151 was never true
152 volume = container.volume
153 if container.number and not (is_cr and container.number[0] == "G"): 153 ↛ 156line 153 didn't jump to line 156 because the condition on line 153 was always true
154 number = container.number
156 eissn = collection.e_issn
157 pissn = "" # collection.issn
158 colid = collection.pid.lower()
159 domain = settings.SITE_REGISTER[colid]["site_domain"]
160 if colid == "pcj": 160 ↛ 161line 160 didn't jump to line 161 because the condition on line 160 was never true
161 domain = "peercommunityjournal.org"
163 url = f"https://{domain}/articles/{doi}/"
164 lang = resource.lang if resource.lang and resource.lang != "und" else ""
165 authors = get_names(resource, "author")
166 publisher = container.my_publisher
167 pub_name = publisher.pub_name if publisher and publisher.pub_name else ""
169 data = {"admin": {}, "bibjson": {"journal": {}}}
170 data["admin"]["publisher_record_id"] = doi
171 data["bibjson"]["title"] = remove_html(document.title_tex)
172 data["bibjson"]["month"] = month
173 data["bibjson"]["year"] = year
175 keywords = [
176 kwd.value for kwd in document.kwd_set.all() if kwd.type != "msc" and kwd.lang == lang
177 ]
179 abstract = (
180 document.abstract_set.all().filter(Q(lang="en") | Q(lang="und")).order_by("lang").first()
181 )
182 data["bibjson"]["abstract"] = remove_html(abstract.value_tex) if abstract else ""
183 data["bibjson"]["author"] = authors
184 data["bibjson"]["keywords"] = keywords
185 data["bibjson"]["link"] = [{"url": url, "type": "fulltext", "content_type": "HTML"}]
187 data["bibjson"]["identifier"] = [{"type": "doi", "id": doi}]
188 if eissn: 188 ↛ 189line 188 didn't jump to line 189 because the condition on line 188 was never true
189 data["bibjson"]["identifier"].append({"type": "eissn", "id": eissn})
190 if pissn: 190 ↛ 191line 190 didn't jump to line 191 because the condition on line 190 was never true
191 data["bibjson"]["identifier"].append({"type": "pissn", "id": pissn})
192 if not eissn and colid == "pcj": 192 ↛ 193line 192 didn't jump to line 193 because the condition on line 192 was never true
193 data["bibjson"]["identifier"].append({"type": "eissn", "id": "2804-3871"})
195 data["bibjson"]["journal"]["country"] = "FR"
196 data["bibjson"]["journal"]["title"] = collection.title_tex
197 data["bibjson"]["journal"]["start_page"] = fpage
198 data["bibjson"]["journal"]["end_page"] = lpage
199 data["bibjson"]["journal"]["language"] = [lang]
200 data["bibjson"]["journal"]["number"] = number
201 data["bibjson"]["journal"]["volume"] = volume
202 data["bibjson"]["journal"]["publisher"] = pub_name
203 return data
206def doaj_delete_article(doi):
207 colid = ""
208 resource = model_helpers.get_resource_by_doi(doi)
209 if resource:
210 colid = resource.article.my_container.get_collection().pid
212 token = get_token(colid)
213 url = f"https://doaj.org/api/search/articles/{doi}"
214 response = requests.get(url)
215 if response.status_code == 200:
216 results = response.json().get("results")
217 if results:
218 article_id = results[0].get("id", "")
219 url = f"https://doaj.org/api/articles/{article_id}?api_key={token}"
220 response = requests.delete(url)
221 if response.status_code == 204:
222 return doi + " deleted"
223 else:
224 return doi + " not found or article already deleted"
225 return doi + " deletion failed"
228def doaj_delete_articles_in_collection(colid, check_published=True):
229 for container in Container.objects.filter(pid__startswith=colid):
230 print(container)
231 for article in container.article_set.all():
232 try:
233 if check_published:
234 if is_published(article) and has_publication_date(article):
235 doaj_delete_article(article.doi)
236 else:
237 doaj_delete_article(article.doi)
238 except Exception as ex:
239 print(ex)
242def doaj_retrieve_applications():
243 application_ids = [
244 "798d4f21a22d43579cea322bed8a560e",
245 "7a0889a89de64979a3d5e26aace31db7",
246 "0e30bf1ac2514bcda8d1cc0855237cd4",
247 "4bcd45d13d23475bb246cbce9eaed9ee",
248 "d85467c6c5914759886aa29481cce4b4",
249 "11b60f2f3dd64ec087510dff3d82e0ab",
250 "71951ece12524e45abac7628de6a8d22",
251 ]
253 for app_id in application_ids:
254 response = requests.get("https://doaj.org/api/search/journals/" + app_id)
255 if response.status_code == 200:
256 results = response.json().get("results")
257 if results:
258 filename = results[0]["bibjson"]["title"] + ".json"
259 with open(filename, "w") as fio:
260 json.dump(results, fio)