Coverage for src/ptf_tools/doaj.py: 61%
195 statements
« prev ^ index » next coverage.py v7.7.0, created at 2025-04-09 14:54 +0000
« prev ^ index » next coverage.py v7.7.0, created at 2025-04-09 14:54 +0000
1import json
2import re
4import requests
5from django.conf import settings
6from django.db.models import Q
7from django.http import Http404
8from ptf import model_helpers
9from ptf.cmds.xml.xml_utils import remove_html
10from ptf.models import Container
12from mersenne_tools.models import DOAJBatch
15def has_date_online_first(document):
16 return hasattr(document, "date_online_first") and document.date_online_first
19def has_date_published(document):
20 return hasattr(document, "date_published") and document.date_published
23def has_publication_date(document):
24 return has_date_online_first(document) or has_date_published(document)
27def is_published(document):
28 if not hasattr(document, "do_not_publish"): 28 ↛ 29line 28 didn't jump to line 29 because the condition on line 28 was never true
29 return True
30 return not document.do_not_publish
33def get_names(resource, role):
34 names = []
35 for contribution in resource.contributions.all():
36 if contribution.role == role: 36 ↛ 35line 36 didn't jump to line 35 because the condition on line 36 was always true
37 person = {"name": str(contribution)}
38 addresses = contribution.contribaddress_set.all()
39 if addresses: 39 ↛ 40line 39 didn't jump to line 40 because the condition on line 39 was never true
40 person["affiliation"] = "; ".join([c.address for c in addresses if c.address])
41 if contribution.orcid: 41 ↛ 46line 41 didn't jump to line 46 because the condition on line 41 was always true
42 orcid = contribution.orcid.strip()
43 orcid = orcid.encode("ascii", "ignore").decode("utf-8")
44 if re.match(r"^\d{4}-\d{4}-\d{4}-\d{3}(\d|X)$", orcid): 44 ↛ 46line 44 didn't jump to line 46 because the condition on line 44 was always true
45 person["orcid_id"] = "https://orcid.org/" + orcid
46 names.append(person)
47 return names
50def get_token(colid):
51 token = None
52 if colid == "PCJ": 52 ↛ 53line 52 didn't jump to line 53 because the condition on line 52 was never true
53 token = settings.DOAJ_TOKEN_PCJ
54 elif colid == "OJMO": 54 ↛ 55line 54 didn't jump to line 55 because the condition on line 54 was never true
55 token = settings.DOAJ_TOKEN_OJMO
56 elif colid.startswith("CR") and len(colid) > 2: 56 ↛ 58line 56 didn't jump to line 58 because the condition on line 56 was always true
57 token = settings.DOAJ_TOKEN_CR
58 return token
61def doaj_pid_register(pid):
62 resource = model_helpers.get_resource(pid)
63 if not resource: 63 ↛ 64line 63 didn't jump to line 64 because the condition on line 63 was never true
64 raise Http404
66 container = None
67 if resource.classname == "Container": 67 ↛ 70line 67 didn't jump to line 70 because the condition on line 67 was always true
68 container = resource.container
70 if not container: 70 ↛ 71line 70 didn't jump to line 71 because the condition on line 70 was never true
71 raise Http404
73 collection = container.get_collection()
74 if not collection: 74 ↛ 75line 74 didn't jump to line 75 because the condition on line 74 was never true
75 raise Http404
77 results = []
78 data, response = None, None
79 token = get_token(collection.pid)
80 if token: 80 ↛ 87line 80 didn't jump to line 87 because the condition on line 80 was always true
81 for article in resource.container.article_set.all():
82 if is_published(article) and has_publication_date(article): 82 ↛ 81line 82 didn't jump to line 81 because the condition on line 82 was always true
83 data = doaj_resource_register(article)
84 if data:
85 results.append(data)
87 if results:
88 url = f"https://doaj.org/api/bulk/articles?api_key={token}"
89 response = requests.post(url, json=results)
90 container_batch = DOAJBatch.objects.get_or_create(resource=resource)[0]
91 if response.status_code == 201: 91 ↛ 92line 91 didn't jump to line 92 because the condition on line 91 was never true
92 container_batch.status = DOAJBatch.REGISTERED
93 results = response.json()
94 data = {
95 "doaj_status": response.status_code,
96 "doaj_message": [r["status"] for r in results],
97 "doaj_id": [r["id"] for r in results],
98 "doaj_location": [r["location"] for r in results],
99 }
100 for article in resource.container.article_set.all():
101 if is_published(article) and has_publication_date(article):
102 article_batch = DOAJBatch.objects.get_or_create(resource=article)[0]
103 article_batch.status = DOAJBatch.REGISTERED
104 article_batch.save()
105 else:
106 container_batch.status = DOAJBatch.ERROR
107 if response.text: 107 ↛ 109line 107 didn't jump to line 109 because the condition on line 107 was always true
108 container_batch.log = response.text
109 container_batch.save()
110 return data, response
113def doaj_resource_register(resource):
114 container = None
115 if resource.classname == "Article":
116 document = resource.article
117 container = document.my_container
118 fpage = document.fpage
119 lpage = document.lpage
120 elif resource.classname == "Container": 120 ↛ 125line 120 didn't jump to line 125 because the condition on line 120 was always true
121 document = resource.container
122 container = document
123 fpage = lpage = ""
125 if not container: 125 ↛ 126line 125 didn't jump to line 126 because the condition on line 125 was never true
126 return None
128 doi = resource.doi
129 collection = container.get_collection()
130 if not doi or not collection: 130 ↛ 131line 130 didn't jump to line 131 because the condition on line 130 was never true
131 return None
133 if collection.pid.startswith("CR") and not doi.startswith("10.5802/cr"):
134 return None
136 month = year = ""
137 if container.year != "0": 137 ↛ 140line 137 didn't jump to line 140 because the condition on line 137 was always true
138 year = container.year.split("-")[-1]
140 if has_date_online_first(document):
141 month = document.date_online_first.strftime("%B")
142 year = document.date_online_first.strftime("%Y")
143 elif has_date_published(document): 143 ↛ 144line 143 didn't jump to line 144 because the condition on line 143 was never true
144 month = document.date_published.strftime("%B")
145 year = document.date_published.strftime("%Y")
147 volume = number = ""
148 if not container.to_appear(): 148 ↛ 155line 148 didn't jump to line 155 because the condition on line 148 was always true
149 is_cr = container.is_cr()
150 if container.volume: 150 ↛ 151line 150 didn't jump to line 151 because the condition on line 150 was never true
151 volume = container.volume
152 if container.number and not (is_cr and container.number[0] == "G"): 152 ↛ 155line 152 didn't jump to line 155 because the condition on line 152 was always true
153 number = container.number
155 eissn = collection.e_issn
156 pissn = "" # collection.issn
157 colid = collection.pid.lower()
158 domain = settings.SITE_REGISTER[colid]["site_domain"]
159 if colid == "pcj": 159 ↛ 160line 159 didn't jump to line 160 because the condition on line 159 was never true
160 domain = "peercommunityjournal.org"
162 url = f"https://{domain}/articles/{doi}/"
163 lang = resource.lang if resource.lang and resource.lang != "und" else ""
164 authors = get_names(resource, "author")
165 publisher = container.my_publisher
166 pub_name = publisher.pub_name if publisher and publisher.pub_name else ""
168 data = {"admin": {}, "bibjson": {"journal": {}}}
169 data["admin"]["publisher_record_id"] = doi
170 data["bibjson"]["title"] = remove_html(document.title_tex)
171 data["bibjson"]["month"] = month
172 data["bibjson"]["year"] = year
174 keywords = [
175 kwd.value for kwd in document.kwd_set.all() if kwd.type != "msc" and kwd.lang == lang
176 ]
178 abstract = (
179 document.abstract_set.all().filter(Q(lang="en") | Q(lang="und")).order_by("lang").first()
180 )
181 data["bibjson"]["abstract"] = remove_html(abstract.value_tex) if abstract else ""
182 data["bibjson"]["author"] = authors
183 data["bibjson"]["keywords"] = keywords
184 data["bibjson"]["link"] = [{"url": url, "type": "fulltext", "content_type": "HTML"}]
186 data["bibjson"]["identifier"] = [{"type": "doi", "id": doi}]
187 if eissn: 187 ↛ 188line 187 didn't jump to line 188 because the condition on line 187 was never true
188 data["bibjson"]["identifier"].append({"type": "eissn", "id": eissn})
189 if pissn: 189 ↛ 190line 189 didn't jump to line 190 because the condition on line 189 was never true
190 data["bibjson"]["identifier"].append({"type": "pissn", "id": pissn})
191 if not eissn and colid == "pcj": 191 ↛ 192line 191 didn't jump to line 192 because the condition on line 191 was never true
192 data["bibjson"]["identifier"].append({"type": "eissn", "id": "2804-3871"})
194 data["bibjson"]["journal"]["country"] = "FR"
195 data["bibjson"]["journal"]["title"] = collection.title_tex
196 data["bibjson"]["journal"]["start_page"] = fpage
197 data["bibjson"]["journal"]["end_page"] = lpage
198 data["bibjson"]["journal"]["language"] = [lang]
199 data["bibjson"]["journal"]["number"] = number
200 data["bibjson"]["journal"]["volume"] = volume
201 data["bibjson"]["journal"]["publisher"] = pub_name
202 return data
205def doaj_delete_article(doi):
206 colid = ""
207 resource = model_helpers.get_resource_by_doi(doi)
208 if resource:
209 colid = resource.article.my_container.get_collection().pid
211 token = get_token(colid)
212 url = f"https://doaj.org/api/search/articles/{doi}"
213 response = requests.get(url)
214 if response.status_code == 200:
215 results = response.json().get("results")
216 if results:
217 article_id = results[0].get("id", "")
218 url = f"https://doaj.org/api/articles/{article_id}?api_key={token}"
219 response = requests.delete(url)
220 if response.status_code == 204:
221 return doi + " deleted"
222 else:
223 return doi + " not found or article already deleted"
224 return doi + " deletion failed"
227def doaj_delete_articles_in_collection(colid, check_published=True):
228 for container in Container.objects.filter(pid__startswith=colid):
229 print(container)
230 for article in container.article_set.all():
231 try:
232 if check_published:
233 if is_published(article) and has_publication_date(article):
234 doaj_delete_article(article.doi)
235 else:
236 doaj_delete_article(article.doi)
237 except Exception as ex:
238 print(ex)
241def doaj_retrieve_applications():
242 application_ids = [
243 "798d4f21a22d43579cea322bed8a560e",
244 "7a0889a89de64979a3d5e26aace31db7",
245 "0e30bf1ac2514bcda8d1cc0855237cd4",
246 "4bcd45d13d23475bb246cbce9eaed9ee",
247 "d85467c6c5914759886aa29481cce4b4",
248 "11b60f2f3dd64ec087510dff3d82e0ab",
249 "71951ece12524e45abac7628de6a8d22",
250 ]
252 for app_id in application_ids:
253 response = requests.get("https://doaj.org/api/search/journals/" + app_id)
254 if response.status_code == 200:
255 results = response.json().get("results")
256 if results:
257 filename = results[0]["bibjson"]["title"] + ".json"
258 with open(filename, "w") as fio:
259 json.dump(results, fio)