Coverage for src/ptf_tools/doaj.py: 61%

195 statements  

« prev     ^ index     » next       coverage.py v7.7.0, created at 2025-04-09 14:54 +0000

1import json 

2import re 

3 

4import requests 

5from django.conf import settings 

6from django.db.models import Q 

7from django.http import Http404 

8from ptf import model_helpers 

9from ptf.cmds.xml.xml_utils import remove_html 

10from ptf.models import Container 

11 

12from mersenne_tools.models import DOAJBatch 

13 

14 

15def has_date_online_first(document): 

16 return hasattr(document, "date_online_first") and document.date_online_first 

17 

18 

19def has_date_published(document): 

20 return hasattr(document, "date_published") and document.date_published 

21 

22 

23def has_publication_date(document): 

24 return has_date_online_first(document) or has_date_published(document) 

25 

26 

27def is_published(document): 

28 if not hasattr(document, "do_not_publish"): 28 ↛ 29line 28 didn't jump to line 29 because the condition on line 28 was never true

29 return True 

30 return not document.do_not_publish 

31 

32 

33def get_names(resource, role): 

34 names = [] 

35 for contribution in resource.contributions.all(): 

36 if contribution.role == role: 36 ↛ 35line 36 didn't jump to line 35 because the condition on line 36 was always true

37 person = {"name": str(contribution)} 

38 addresses = contribution.contribaddress_set.all() 

39 if addresses: 39 ↛ 40line 39 didn't jump to line 40 because the condition on line 39 was never true

40 person["affiliation"] = "; ".join([c.address for c in addresses if c.address]) 

41 if contribution.orcid: 41 ↛ 46line 41 didn't jump to line 46 because the condition on line 41 was always true

42 orcid = contribution.orcid.strip() 

43 orcid = orcid.encode("ascii", "ignore").decode("utf-8") 

44 if re.match(r"^\d{4}-\d{4}-\d{4}-\d{3}(\d|X)$", orcid): 44 ↛ 46line 44 didn't jump to line 46 because the condition on line 44 was always true

45 person["orcid_id"] = "https://orcid.org/" + orcid 

46 names.append(person) 

47 return names 

48 

49 

50def get_token(colid): 

51 token = None 

52 if colid == "PCJ": 52 ↛ 53line 52 didn't jump to line 53 because the condition on line 52 was never true

53 token = settings.DOAJ_TOKEN_PCJ 

54 elif colid == "OJMO": 54 ↛ 55line 54 didn't jump to line 55 because the condition on line 54 was never true

55 token = settings.DOAJ_TOKEN_OJMO 

56 elif colid.startswith("CR") and len(colid) > 2: 56 ↛ 58line 56 didn't jump to line 58 because the condition on line 56 was always true

57 token = settings.DOAJ_TOKEN_CR 

58 return token 

59 

60 

61def doaj_pid_register(pid): 

62 resource = model_helpers.get_resource(pid) 

63 if not resource: 63 ↛ 64line 63 didn't jump to line 64 because the condition on line 63 was never true

64 raise Http404 

65 

66 container = None 

67 if resource.classname == "Container": 67 ↛ 70line 67 didn't jump to line 70 because the condition on line 67 was always true

68 container = resource.container 

69 

70 if not container: 70 ↛ 71line 70 didn't jump to line 71 because the condition on line 70 was never true

71 raise Http404 

72 

73 collection = container.get_collection() 

74 if not collection: 74 ↛ 75line 74 didn't jump to line 75 because the condition on line 74 was never true

75 raise Http404 

76 

77 results = [] 

78 data, response = None, None 

79 token = get_token(collection.pid) 

80 if token: 80 ↛ 87line 80 didn't jump to line 87 because the condition on line 80 was always true

81 for article in resource.container.article_set.all(): 

82 if is_published(article) and has_publication_date(article): 82 ↛ 81line 82 didn't jump to line 81 because the condition on line 82 was always true

83 data = doaj_resource_register(article) 

84 if data: 

85 results.append(data) 

86 

87 if results: 

88 url = f"https://doaj.org/api/bulk/articles?api_key={token}" 

89 response = requests.post(url, json=results) 

90 container_batch = DOAJBatch.objects.get_or_create(resource=resource)[0] 

91 if response.status_code == 201: 91 ↛ 92line 91 didn't jump to line 92 because the condition on line 91 was never true

92 container_batch.status = DOAJBatch.REGISTERED 

93 results = response.json() 

94 data = { 

95 "doaj_status": response.status_code, 

96 "doaj_message": [r["status"] for r in results], 

97 "doaj_id": [r["id"] for r in results], 

98 "doaj_location": [r["location"] for r in results], 

99 } 

100 for article in resource.container.article_set.all(): 

101 if is_published(article) and has_publication_date(article): 

102 article_batch = DOAJBatch.objects.get_or_create(resource=article)[0] 

103 article_batch.status = DOAJBatch.REGISTERED 

104 article_batch.save() 

105 else: 

106 container_batch.status = DOAJBatch.ERROR 

107 if response.text: 107 ↛ 109line 107 didn't jump to line 109 because the condition on line 107 was always true

108 container_batch.log = response.text 

109 container_batch.save() 

110 return data, response 

111 

112 

113def doaj_resource_register(resource): 

114 container = None 

115 if resource.classname == "Article": 

116 document = resource.article 

117 container = document.my_container 

118 fpage = document.fpage 

119 lpage = document.lpage 

120 elif resource.classname == "Container": 120 ↛ 125line 120 didn't jump to line 125 because the condition on line 120 was always true

121 document = resource.container 

122 container = document 

123 fpage = lpage = "" 

124 

125 if not container: 125 ↛ 126line 125 didn't jump to line 126 because the condition on line 125 was never true

126 return None 

127 

128 doi = resource.doi 

129 collection = container.get_collection() 

130 if not doi or not collection: 130 ↛ 131line 130 didn't jump to line 131 because the condition on line 130 was never true

131 return None 

132 

133 if collection.pid.startswith("CR") and not doi.startswith("10.5802/cr"): 

134 return None 

135 

136 month = year = "" 

137 if container.year != "0": 137 ↛ 140line 137 didn't jump to line 140 because the condition on line 137 was always true

138 year = container.year.split("-")[-1] 

139 

140 if has_date_online_first(document): 

141 month = document.date_online_first.strftime("%B") 

142 year = document.date_online_first.strftime("%Y") 

143 elif has_date_published(document): 143 ↛ 144line 143 didn't jump to line 144 because the condition on line 143 was never true

144 month = document.date_published.strftime("%B") 

145 year = document.date_published.strftime("%Y") 

146 

147 volume = number = "" 

148 if not container.to_appear(): 148 ↛ 155line 148 didn't jump to line 155 because the condition on line 148 was always true

149 is_cr = container.is_cr() 

150 if container.volume: 150 ↛ 151line 150 didn't jump to line 151 because the condition on line 150 was never true

151 volume = container.volume 

152 if container.number and not (is_cr and container.number[0] == "G"): 152 ↛ 155line 152 didn't jump to line 155 because the condition on line 152 was always true

153 number = container.number 

154 

155 eissn = collection.e_issn 

156 pissn = "" # collection.issn 

157 colid = collection.pid.lower() 

158 domain = settings.SITE_REGISTER[colid]["site_domain"] 

159 if colid == "pcj": 159 ↛ 160line 159 didn't jump to line 160 because the condition on line 159 was never true

160 domain = "peercommunityjournal.org" 

161 

162 url = f"https://{domain}/articles/{doi}/" 

163 lang = resource.lang if resource.lang and resource.lang != "und" else "" 

164 authors = get_names(resource, "author") 

165 publisher = container.my_publisher 

166 pub_name = publisher.pub_name if publisher and publisher.pub_name else "" 

167 

168 data = {"admin": {}, "bibjson": {"journal": {}}} 

169 data["admin"]["publisher_record_id"] = doi 

170 data["bibjson"]["title"] = remove_html(document.title_tex) 

171 data["bibjson"]["month"] = month 

172 data["bibjson"]["year"] = year 

173 

174 keywords = [ 

175 kwd.value for kwd in document.kwd_set.all() if kwd.type != "msc" and kwd.lang == lang 

176 ] 

177 

178 abstract = ( 

179 document.abstract_set.all().filter(Q(lang="en") | Q(lang="und")).order_by("lang").first() 

180 ) 

181 data["bibjson"]["abstract"] = remove_html(abstract.value_tex) if abstract else "" 

182 data["bibjson"]["author"] = authors 

183 data["bibjson"]["keywords"] = keywords 

184 data["bibjson"]["link"] = [{"url": url, "type": "fulltext", "content_type": "HTML"}] 

185 

186 data["bibjson"]["identifier"] = [{"type": "doi", "id": doi}] 

187 if eissn: 187 ↛ 188line 187 didn't jump to line 188 because the condition on line 187 was never true

188 data["bibjson"]["identifier"].append({"type": "eissn", "id": eissn}) 

189 if pissn: 189 ↛ 190line 189 didn't jump to line 190 because the condition on line 189 was never true

190 data["bibjson"]["identifier"].append({"type": "pissn", "id": pissn}) 

191 if not eissn and colid == "pcj": 191 ↛ 192line 191 didn't jump to line 192 because the condition on line 191 was never true

192 data["bibjson"]["identifier"].append({"type": "eissn", "id": "2804-3871"}) 

193 

194 data["bibjson"]["journal"]["country"] = "FR" 

195 data["bibjson"]["journal"]["title"] = collection.title_tex 

196 data["bibjson"]["journal"]["start_page"] = fpage 

197 data["bibjson"]["journal"]["end_page"] = lpage 

198 data["bibjson"]["journal"]["language"] = [lang] 

199 data["bibjson"]["journal"]["number"] = number 

200 data["bibjson"]["journal"]["volume"] = volume 

201 data["bibjson"]["journal"]["publisher"] = pub_name 

202 return data 

203 

204 

205def doaj_delete_article(doi): 

206 colid = "" 

207 resource = model_helpers.get_resource_by_doi(doi) 

208 if resource: 

209 colid = resource.article.my_container.get_collection().pid 

210 

211 token = get_token(colid) 

212 url = f"https://doaj.org/api/search/articles/{doi}" 

213 response = requests.get(url) 

214 if response.status_code == 200: 

215 results = response.json().get("results") 

216 if results: 

217 article_id = results[0].get("id", "") 

218 url = f"https://doaj.org/api/articles/{article_id}?api_key={token}" 

219 response = requests.delete(url) 

220 if response.status_code == 204: 

221 return doi + " deleted" 

222 else: 

223 return doi + " not found or article already deleted" 

224 return doi + " deletion failed" 

225 

226 

227def doaj_delete_articles_in_collection(colid, check_published=True): 

228 for container in Container.objects.filter(pid__startswith=colid): 

229 print(container) 

230 for article in container.article_set.all(): 

231 try: 

232 if check_published: 

233 if is_published(article) and has_publication_date(article): 

234 doaj_delete_article(article.doi) 

235 else: 

236 doaj_delete_article(article.doi) 

237 except Exception as ex: 

238 print(ex) 

239 

240 

241def doaj_retrieve_applications(): 

242 application_ids = [ 

243 "798d4f21a22d43579cea322bed8a560e", 

244 "7a0889a89de64979a3d5e26aace31db7", 

245 "0e30bf1ac2514bcda8d1cc0855237cd4", 

246 "4bcd45d13d23475bb246cbce9eaed9ee", 

247 "d85467c6c5914759886aa29481cce4b4", 

248 "11b60f2f3dd64ec087510dff3d82e0ab", 

249 "71951ece12524e45abac7628de6a8d22", 

250 ] 

251 

252 for app_id in application_ids: 

253 response = requests.get("https://doaj.org/api/search/journals/" + app_id) 

254 if response.status_code == 200: 

255 results = response.json().get("results") 

256 if results: 

257 filename = results[0]["bibjson"]["title"] + ".json" 

258 with open(filename, "w") as fio: 

259 json.dump(results, fio)