Coverage for src/ptf_tools/doaj.py: 61%
195 statements
« prev ^ index » next coverage.py v7.8.2, created at 2025-09-08 12:26 +0000
« prev ^ index » next coverage.py v7.8.2, created at 2025-09-08 12:26 +0000
1import json
2import re
4import requests
5from django.conf import settings
6from django.db.models import Q
7from django.http import Http404
8from ptf import model_helpers
9from ptf.cmds.xml.xml_utils import remove_html
10from ptf.models import Container
11from ptf.utils import is_cr
13from mersenne_tools.models import DOAJBatch
16def has_date_online_first(document):
17 return hasattr(document, "date_online_first") and document.date_online_first
20def has_date_published(document):
21 return hasattr(document, "date_published") and document.date_published
24def has_publication_date(document):
25 return has_date_online_first(document) or has_date_published(document)
28def is_published(document):
29 if not hasattr(document, "do_not_publish"): 29 ↛ 30line 29 didn't jump to line 30 because the condition on line 29 was never true
30 return True
31 return not document.do_not_publish
34def get_names(resource, role):
35 names = []
36 for contribution in resource.contributions.all():
37 if contribution.role == role: 37 ↛ 36line 37 didn't jump to line 36 because the condition on line 37 was always true
38 person = {"name": str(contribution)}
39 addresses = contribution.contribaddress_set.all()
40 if addresses: 40 ↛ 41line 40 didn't jump to line 41 because the condition on line 40 was never true
41 person["affiliation"] = "; ".join([c.address for c in addresses if c.address])
42 if contribution.orcid: 42 ↛ 47line 42 didn't jump to line 47 because the condition on line 42 was always true
43 orcid = contribution.orcid.strip()
44 orcid = orcid.encode("ascii", "ignore").decode("utf-8")
45 if re.match(r"^\d{4}-\d{4}-\d{4}-\d{3}(\d|X)$", orcid): 45 ↛ 47line 45 didn't jump to line 47 because the condition on line 45 was always true
46 person["orcid_id"] = "https://orcid.org/" + orcid
47 names.append(person)
48 return names
51def get_token(colid):
52 token = None
53 if colid == "PCJ": 53 ↛ 54line 53 didn't jump to line 54 because the condition on line 53 was never true
54 token = settings.DOAJ_TOKEN_PCJ
55 elif colid == "OJMO": 55 ↛ 56line 55 didn't jump to line 56 because the condition on line 55 was never true
56 token = settings.DOAJ_TOKEN_OJMO
57 elif colid.startswith("CR") and len(colid) > 2: 57 ↛ 59line 57 didn't jump to line 59 because the condition on line 57 was always true
58 token = settings.DOAJ_TOKEN_CR
59 return token
62def doaj_pid_register(pid):
63 resource = model_helpers.get_resource(pid)
64 if not resource: 64 ↛ 65line 64 didn't jump to line 65 because the condition on line 64 was never true
65 raise Http404
67 container = None
68 if resource.classname == "Container": 68 ↛ 71line 68 didn't jump to line 71 because the condition on line 68 was always true
69 container = resource.container
71 if not container: 71 ↛ 72line 71 didn't jump to line 72 because the condition on line 71 was never true
72 raise Http404
74 collection = container.get_collection()
75 if not collection: 75 ↛ 76line 75 didn't jump to line 76 because the condition on line 75 was never true
76 raise Http404
78 results = []
79 data, response = None, None
80 token = get_token(collection.pid)
81 if token: 81 ↛ 88line 81 didn't jump to line 88 because the condition on line 81 was always true
82 for article in resource.container.article_set.all():
83 if is_published(article) and has_publication_date(article): 83 ↛ 82line 83 didn't jump to line 82 because the condition on line 83 was always true
84 data = doaj_resource_register(article)
85 if data:
86 results.append(data)
88 if results:
89 url = f"https://doaj.org/api/bulk/articles?api_key={token}"
90 response = requests.post(url, json=results)
91 container_batch = DOAJBatch.objects.get_or_create(resource=resource)[0]
92 if response.status_code == 201: 92 ↛ 93line 92 didn't jump to line 93 because the condition on line 92 was never true
93 container_batch.status = DOAJBatch.REGISTERED
94 results = response.json()
95 data = {
96 "doaj_status": response.status_code,
97 "doaj_message": [r["status"] for r in results],
98 "doaj_id": [r["id"] for r in results],
99 "doaj_location": [r["location"] for r in results],
100 }
101 for article in resource.container.article_set.all():
102 if is_published(article) and has_publication_date(article):
103 article_batch = DOAJBatch.objects.get_or_create(resource=article)[0]
104 article_batch.status = DOAJBatch.REGISTERED
105 article_batch.save()
106 else:
107 container_batch.status = DOAJBatch.ERROR
108 if response.text: 108 ↛ 110line 108 didn't jump to line 110 because the condition on line 108 was always true
109 container_batch.log = response.text
110 container_batch.save()
111 return data, response
114def doaj_resource_register(resource):
115 container = None
116 if resource.classname == "Article":
117 document = resource.article
118 container = document.my_container
119 fpage = document.fpage
120 lpage = document.lpage
121 elif resource.classname == "Container": 121 ↛ 126line 121 didn't jump to line 126 because the condition on line 121 was always true
122 document = resource.container
123 container = document
124 fpage = lpage = ""
126 if not container: 126 ↛ 127line 126 didn't jump to line 127 because the condition on line 126 was never true
127 return None
129 doi = resource.doi
130 collection = container.get_collection()
131 if not doi or not collection: 131 ↛ 132line 131 didn't jump to line 132 because the condition on line 131 was never true
132 return None
134 if collection.pid.startswith("CR") and not doi.startswith("10.5802/cr"):
135 return None
137 month = year = ""
138 if container.year != "0": 138 ↛ 141line 138 didn't jump to line 141 because the condition on line 138 was always true
139 year = container.year.split("-")[-1]
141 if has_date_online_first(document):
142 month = document.date_online_first.strftime("%B")
143 year = document.date_online_first.strftime("%Y")
144 elif has_date_published(document): 144 ↛ 145line 144 didn't jump to line 145 because the condition on line 144 was never true
145 month = document.date_published.strftime("%B")
146 year = document.date_published.strftime("%Y")
148 volume = number = ""
149 if not container.to_appear(): 149 ↛ 155line 149 didn't jump to line 155 because the condition on line 149 was always true
150 if container.volume: 150 ↛ 151line 150 didn't jump to line 151 because the condition on line 150 was never true
151 volume = container.volume
152 if container.number and not (is_cr() and container.number[0] == "G"): 152 ↛ 155line 152 didn't jump to line 155 because the condition on line 152 was always true
153 number = container.number
155 eissn = collection.e_issn
156 pissn = "" # collection.issn
157 colid = collection.pid.lower()
158 domain = settings.SITE_REGISTER[colid]["site_domain"]
159 if colid == "pcj": 159 ↛ 160line 159 didn't jump to line 160 because the condition on line 159 was never true
160 domain = "peercommunityjournal.org"
162 url = f"https://{domain}/articles/{doi}/"
163 lang = resource.lang if resource.lang and resource.lang != "und" else ""
164 authors = get_names(resource, "author")
165 publisher = container.my_publisher
166 pub_name = publisher.pub_name if publisher and publisher.pub_name else ""
168 data = {"admin": {}, "bibjson": {"journal": {}}}
169 data["admin"]["publisher_record_id"] = doi
170 data["bibjson"]["title"] = remove_html(document.title_tex)
171 data["bibjson"]["month"] = month
172 data["bibjson"]["year"] = year
174 keywords = [
175 kwd.value for kwd in document.kwd_set.all() if kwd.type != "msc" and kwd.lang == lang
176 ]
178 abstract = (
179 document.abstract_set.all().filter(Q(lang="en") | Q(lang="und")).order_by("lang").first()
180 )
181 data["bibjson"]["abstract"] = remove_html(abstract.value_tex) if abstract else ""
182 data["bibjson"]["author"] = authors
183 data["bibjson"]["keywords"] = keywords
184 data["bibjson"]["link"] = [{"url": url, "type": "fulltext", "content_type": "HTML"}]
186 data["bibjson"]["identifier"] = [{"type": "doi", "id": doi}]
187 if eissn: 187 ↛ 188line 187 didn't jump to line 188 because the condition on line 187 was never true
188 data["bibjson"]["identifier"].append({"type": "eissn", "id": eissn})
189 if pissn: 189 ↛ 190line 189 didn't jump to line 190 because the condition on line 189 was never true
190 data["bibjson"]["identifier"].append({"type": "pissn", "id": pissn})
191 if not eissn and colid == "pcj": 191 ↛ 192line 191 didn't jump to line 192 because the condition on line 191 was never true
192 data["bibjson"]["identifier"].append({"type": "eissn", "id": "2804-3871"})
194 data["bibjson"]["journal"]["country"] = "FR"
195 data["bibjson"]["journal"]["title"] = collection.title_tex
196 data["bibjson"]["journal"]["start_page"] = fpage
197 data["bibjson"]["journal"]["end_page"] = lpage
198 data["bibjson"]["journal"]["language"] = [lang]
199 data["bibjson"]["journal"]["number"] = number
200 data["bibjson"]["journal"]["volume"] = volume
201 data["bibjson"]["journal"]["publisher"] = pub_name
202 return data
205def doaj_delete_article(doi):
206 colid = ""
207 resource = model_helpers.get_resource_by_doi(doi)
208 if resource:
209 colid = resource.article.my_container.get_collection().pid
211 token = get_token(colid)
212 url = f"https://doaj.org/api/search/articles/{doi}"
213 response = requests.get(url)
214 if response.status_code == 200:
215 results = response.json().get("results")
216 if results:
217 article_id = results[0].get("id", "")
218 url = f"https://doaj.org/api/articles/{article_id}?api_key={token}"
219 response = requests.delete(url)
220 if response.status_code == 204:
221 return doi + " deleted"
222 else:
223 return doi + " not found or article already deleted"
224 return doi + " deletion failed"
227def doaj_delete_articles_in_collection(colid, check_published=True):
228 for container in Container.objects.filter(pid__startswith=colid):
229 print(container)
230 for article in container.article_set.all():
231 try:
232 if check_published:
233 if is_published(article) and has_publication_date(article):
234 doaj_delete_article(article.doi)
235 else:
236 doaj_delete_article(article.doi)
237 except Exception as ex:
238 print(ex)
241def doaj_retrieve_applications():
242 application_ids = [
243 "798d4f21a22d43579cea322bed8a560e",
244 "7a0889a89de64979a3d5e26aace31db7",
245 "0e30bf1ac2514bcda8d1cc0855237cd4",
246 "4bcd45d13d23475bb246cbce9eaed9ee",
247 "d85467c6c5914759886aa29481cce4b4",
248 "11b60f2f3dd64ec087510dff3d82e0ab",
249 "71951ece12524e45abac7628de6a8d22",
250 ]
252 for app_id in application_ids:
253 response = requests.get("https://doaj.org/api/search/journals/" + app_id)
254 if response.status_code == 200:
255 results = response.json().get("results")
256 if results:
257 filename = results[0]["bibjson"]["title"] + ".json"
258 with open(filename, "w") as fio:
259 json.dump(results, fio)