Coverage for src/ptf_tools/doaj.py: 61%

1import json

2import re

4import requests

5from django.conf import settings

6from django.db.models import Q

7from django.http import Http404

8from ptf import model_helpers

9from ptf.cmds.xml.xml_utils import remove_html

10from ptf.models import Container

12from mersenne_tools.models import DOAJBatch

15def has_date_online_first(document):

16 return hasattr(document, "date_online_first") and document.date_online_first

19def has_date_published(document):

20 return hasattr(document, "date_published") and document.date_published

23def has_publication_date(document):

24 return has_date_online_first(document) or has_date_published(document)

27def is_published(document):

28 if not hasattr(document, "do_not_publish"): 28 ↛ 29line 28 didn't jump to line 29 because the condition on line 28 was never true

29 return True

30 return not document.do_not_publish

33def get_names(resource, role):

34 names = []

35 for contribution in resource.contributions.all():

36 if contribution.role == role: 36 ↛ 35line 36 didn't jump to line 35 because the condition on line 36 was always true

37 person = {"name": str(contribution)}

38 addresses = contribution.contribaddress_set.all()

39 if addresses: 39 ↛ 40line 39 didn't jump to line 40 because the condition on line 39 was never true

40 person["affiliation"] = "; ".join([c.address for c in addresses if c.address])

41 if contribution.orcid: 41 ↛ 46line 41 didn't jump to line 46 because the condition on line 41 was always true

42 orcid = contribution.orcid.strip()

43 orcid = orcid.encode("ascii", "ignore").decode("utf-8")

44 if re.match(r"^\d{4}-\d{4}-\d{4}-\d{3}(\d|X)$", orcid): 44 ↛ 46line 44 didn't jump to line 46 because the condition on line 44 was always true

45 person["orcid_id"] = "https://orcid.org/" + orcid

46 names.append(person)

47 return names

50def get_token(colid):

51 token = None

52 if colid == "PCJ": 52 ↛ 53line 52 didn't jump to line 53 because the condition on line 52 was never true

53 token = settings.DOAJ_TOKEN_PCJ

54 elif colid == "OJMO": 54 ↛ 55line 54 didn't jump to line 55 because the condition on line 54 was never true

55 token = settings.DOAJ_TOKEN_OJMO

56 elif colid.startswith("CR") and len(colid) > 2: 56 ↛ 58line 56 didn't jump to line 58 because the condition on line 56 was always true

57 token = settings.DOAJ_TOKEN_CR

58 return token

61def doaj_pid_register(pid):

62 resource = model_helpers.get_resource(pid)

63 if not resource: 63 ↛ 64line 63 didn't jump to line 64 because the condition on line 63 was never true

64 raise Http404

66 container = None

67 if resource.classname == "Container": 67 ↛ 70line 67 didn't jump to line 70 because the condition on line 67 was always true

68 container = resource.container

70 if not container: 70 ↛ 71line 70 didn't jump to line 71 because the condition on line 70 was never true

71 raise Http404

73 collection = container.get_collection()

74 if not collection: 74 ↛ 75line 74 didn't jump to line 75 because the condition on line 74 was never true

75 raise Http404

77 results = []

78 data, response = None, None

79 token = get_token(collection.pid)

80 if token: 80 ↛ 87line 80 didn't jump to line 87 because the condition on line 80 was always true

81 for article in resource.container.article_set.all():

82 if is_published(article) and has_publication_date(article): 82 ↛ 81line 82 didn't jump to line 81 because the condition on line 82 was always true

83 data = doaj_resource_register(article)

84 if data:

85 results.append(data)

87 if results:

88 url = f"https://doaj.org/api/bulk/articles?api_key={token}"

89 response = requests.post(url, json=results)

90 container_batch = DOAJBatch.objects.get_or_create(resource=resource)[0]

91 if response.status_code == 201: 91 ↛ 92line 91 didn't jump to line 92 because the condition on line 91 was never true

92 container_batch.status = DOAJBatch.REGISTERED

93 results = response.json()

94 data = {

95 "doaj_status": response.status_code,

96 "doaj_message": [r["status"] for r in results],

97 "doaj_id": [r["id"] for r in results],

98 "doaj_location": [r["location"] for r in results],

99 }

100 for article in resource.container.article_set.all():

101 if is_published(article) and has_publication_date(article):

102 article_batch = DOAJBatch.objects.get_or_create(resource=article)[0]

103 article_batch.status = DOAJBatch.REGISTERED

104 article_batch.save()

105 else:

106 container_batch.status = DOAJBatch.ERROR

107 if response.text: 107 ↛ 109line 107 didn't jump to line 109 because the condition on line 107 was always true

108 container_batch.log = response.text

109 container_batch.save()

110 return data, response

111

112

113def doaj_resource_register(resource):

114 container = None

115 if resource.classname == "Article":

116 document = resource.article

117 container = document.my_container

118 fpage = document.fpage

119 lpage = document.lpage

120 elif resource.classname == "Container": 120 ↛ 125line 120 didn't jump to line 125 because the condition on line 120 was always true

121 document = resource.container

122 container = document

123 fpage = lpage = ""

124

125 if not container: 125 ↛ 126line 125 didn't jump to line 126 because the condition on line 125 was never true

126 return None

127

128 doi = resource.doi

129 collection = container.get_collection()

130 if not doi or not collection: 130 ↛ 131line 130 didn't jump to line 131 because the condition on line 130 was never true

131 return None

132

133 if collection.pid.startswith("CR") and not doi.startswith("10.5802/cr"):

134 return None

135

136 month = year = ""

137 if container.year != "0": 137 ↛ 140line 137 didn't jump to line 140 because the condition on line 137 was always true

138 year = container.year.split("-")[-1]

139

140 if has_date_online_first(document):

141 month = document.date_online_first.strftime("%B")

142 year = document.date_online_first.strftime("%Y")

143 elif has_date_published(document): 143 ↛ 144line 143 didn't jump to line 144 because the condition on line 143 was never true

144 month = document.date_published.strftime("%B")

145 year = document.date_published.strftime("%Y")

146

147 volume = number = ""

148 if not container.to_appear(): 148 ↛ 155line 148 didn't jump to line 155 because the condition on line 148 was always true

149 is_cr = container.is_cr()

150 if container.volume: 150 ↛ 151line 150 didn't jump to line 151 because the condition on line 150 was never true

151 volume = container.volume

152 if container.number and not (is_cr and container.number[0] == "G"): 152 ↛ 155line 152 didn't jump to line 155 because the condition on line 152 was always true

153 number = container.number

154

155 eissn = collection.e_issn

156 pissn = "" # collection.issn

157 colid = collection.pid.lower()

158 domain = settings.SITE_REGISTER[colid]["site_domain"]

159 if colid == "pcj": 159 ↛ 160line 159 didn't jump to line 160 because the condition on line 159 was never true

160 domain = "peercommunityjournal.org"

161

162 url = f"https://{domain}/articles/{doi}/"

163 lang = resource.lang if resource.lang and resource.lang != "und" else ""

164 authors = get_names(resource, "author")

165 publisher = container.my_publisher

166 pub_name = publisher.pub_name if publisher and publisher.pub_name else ""

167

168 data = {"admin": {}, "bibjson": {"journal": {}}}

169 data["admin"]["publisher_record_id"] = doi

170 data["bibjson"]["title"] = remove_html(document.title_tex)

171 data["bibjson"]["month"] = month

172 data["bibjson"]["year"] = year

173

174 keywords = [

175 kwd.value for kwd in document.kwd_set.all() if kwd.type != "msc" and kwd.lang == lang

176 ]

177

178 abstract = (

179 document.abstract_set.all().filter(Q(lang="en") | Q(lang="und")).order_by("lang").first()

180 )

181 data["bibjson"]["abstract"] = remove_html(abstract.value_tex) if abstract else ""

182 data["bibjson"]["author"] = authors

183 data["bibjson"]["keywords"] = keywords

184 data["bibjson"]["link"] = [{"url": url, "type": "fulltext", "content_type": "HTML"}]

185

186 data["bibjson"]["identifier"] = [{"type": "doi", "id": doi}]

187 if eissn: 187 ↛ 188line 187 didn't jump to line 188 because the condition on line 187 was never true

188 data["bibjson"]["identifier"].append({"type": "eissn", "id": eissn})

189 if pissn: 189 ↛ 190line 189 didn't jump to line 190 because the condition on line 189 was never true

190 data["bibjson"]["identifier"].append({"type": "pissn", "id": pissn})

191 if not eissn and colid == "pcj": 191 ↛ 192line 191 didn't jump to line 192 because the condition on line 191 was never true

192 data["bibjson"]["identifier"].append({"type": "eissn", "id": "2804-3871"})

193

194 data["bibjson"]["journal"]["country"] = "FR"

195 data["bibjson"]["journal"]["title"] = collection.title_tex

196 data["bibjson"]["journal"]["start_page"] = fpage

197 data["bibjson"]["journal"]["end_page"] = lpage

198 data["bibjson"]["journal"]["language"] = [lang]

199 data["bibjson"]["journal"]["number"] = number

200 data["bibjson"]["journal"]["volume"] = volume

201 data["bibjson"]["journal"]["publisher"] = pub_name

202 return data

203

204

205def doaj_delete_article(doi):

206 colid = ""

207 resource = model_helpers.get_resource_by_doi(doi)

208 if resource:

209 colid = resource.article.my_container.get_collection().pid

210

211 token = get_token(colid)

212 url = f"https://doaj.org/api/search/articles/{doi}"

213 response = requests.get(url)

214 if response.status_code == 200:

215 results = response.json().get("results")

216 if results:

217 article_id = results[0].get("id", "")

218 url = f"https://doaj.org/api/articles/{article_id}?api_key={token}"

219 response = requests.delete(url)

220 if response.status_code == 204:

221 return doi + " deleted"

222 else:

223 return doi + " not found or article already deleted"

224 return doi + " deletion failed"

225

226

227def doaj_delete_articles_in_collection(colid, check_published=True):

228 for container in Container.objects.filter(pid__startswith=colid):

229 print(container)

230 for article in container.article_set.all():

231 try:

232 if check_published:

233 if is_published(article) and has_publication_date(article):

234 doaj_delete_article(article.doi)

235 else:

236 doaj_delete_article(article.doi)

237 except Exception as ex:

238 print(ex)

239

240

241def doaj_retrieve_applications():

242 application_ids = [

243 "798d4f21a22d43579cea322bed8a560e",

244 "7a0889a89de64979a3d5e26aace31db7",

245 "0e30bf1ac2514bcda8d1cc0855237cd4",

246 "4bcd45d13d23475bb246cbce9eaed9ee",

247 "d85467c6c5914759886aa29481cce4b4",

248 "11b60f2f3dd64ec087510dff3d82e0ab",

249 "71951ece12524e45abac7628de6a8d22",

250 ]

251

252 for app_id in application_ids:

253 response = requests.get("https://doaj.org/api/search/journals/" + app_id)

254 if response.status_code == 200:

255 results = response.json().get("results")

256 if results:

257 filename = results[0]["bibjson"]["title"] + ".json"

258 with open(filename, "w") as fio:

259 json.dump(results, fio)