Coverage for src/ptf_tools/indexingChecker.py: 32%
94 statements
« prev ^ index » next coverage.py v7.8.2, created at 2025-10-23 12:08 +0000
« prev ^ index » next coverage.py v7.8.2, created at 2025-10-23 12:08 +0000
1from abc import ABC
3from django.db.models import Q
4from ptf import model_helpers
5from ptf.external.ads import AdsQuery
6from ptf.model_data import ArticleData
7from ptf.models import Article
10class ReferencingChecker(ABC):
11 def __init__(self):
12 self._file = None
13 self._journals = {}
14 self._collection = None
16 def check_references(self, collect_id, file=None):
17 self._file = file
18 self._collection = model_helpers.get_collection(collect_id)
19 journal = Journal(self._collection.title_sort, collect_id, self._get_search_site())
20 present = self._getPresent()
21 articles = Article.objects.filter(
22 Q(doi__startswith="10.5802") | Q(doi__startswith="10.24072"),
23 my_container__my_collection__pid=collect_id,
24 ).order_by("doi")
25 for article in articles:
26 found_article = present.get(article.doi, None)
27 if not found_article:
28 print(article.doi)
29 journal.articles().append(article)
30 return journal
32 def _getPresent(self) -> {}:
33 pass
35 def _get_search_site(self):
36 pass
39class ReferencingCheckerAds(ReferencingChecker):
40 def _getPresent(self):
41 query = AdsQuery()
42 present = query.query_referencing(self._collection.issn)
43 return present
45 def _get_search_site(self):
46 return "Astrophysics data system"
49class ReferencingCheckerWos(ReferencingChecker):
50 _present = {}
51 _journal_urls = {}
52 WOS_URL = "https://www-webofscience-com.insmi.bib.cnrs.fr/wos/woscc/summary/"
54 def __init__(self):
55 super().__init__()
56 # self._journal_urls["ACIRM"]
57 # self._journal_urls["AFST"]
58 # self._journal_urls["AHL"]
59 self._journal_urls["AIF"] = "7be6ad27-c4e8-4d06-953f-de920648d6a1-017e12baba/relevance/1"
60 self._journal_urls["ALCO"] = "d746bded-1c8e-471b-8dad-fcf6ebf4d78a-0151682211/relevance/1"
61 # self._journal_urls["AMBP"]
62 # self._journal_urls["ART"]
63 # self._journal_urls["CCIRM"]
64 # self._journal_urls["CML"]
65 # self._journal_urls["CR"]
66 self._journal_urls["CRBIOL"] = (
67 "80c0d659-b891-49bc-a75c-d99cd0d3829e-017e12e4d7/relevance/1"
68 )
69 self._journal_urls["CRGEOS"] = (
70 "d9839690-0a66-4f25-b548-7bac6eb7733e-017e12ead4/relevance/1"
71 )
72 self._journal_urls["CRMATH"] = (
73 "0597e745-d8ea-4302-929d-531a63336b9c-017e12f042/relevance/1"
74 )
75 self._journal_urls["CRCHIM"] = (
76 "ccbc06d0-cec5-4f11-af58-7cf65a382ff0-017e1474bf/relevance/1"
77 )
78 self._journal_urls["CRMECA"] = (
79 "6a085310-b201-41fe-a8f6-a54ce21a8f70-017e147a3b/relevance/1"
80 )
81 self._journal_urls["CRPHYS"] = (
82 "c1c357d6-1311-435b-8fed-fd053950812d-017e14b2dc/relevance/1"
83 )
84 # self._journal_urls["IGT"] = "c1c357d6-1311-435b-8fed-fd053950812d-017e14b2dc/relevance/1"
85 # self._journal_urls["JEDP"]
86 self._journal_urls["JEP"] = "981b6ca8-1571-44cc-9398-6990014fa0ea-017df92ec6/relevance/1"
87 # self._journal_urls["JSE"]
88 # self._journal_urls["JTNB"]
89 # self._journal_urls["MRR"]
90 # self._journal_urls["MSIA"]
91 # self._journal_urls["OGEO"]
92 # self._journal_urls["OJMO"]
93 self._journal_urls["PCJ"] = "1d3e3435-5434-437a-b791-61e9e55b785b-017e359369/relevance/1"
94 # self._journal_urls["PMB"]
95 # self._journal_urls["PMIHES"]
96 # self._journal_urls["ROIA"] = "c1c357d6-1311-435b-8fed-fd053950812d-017e14b2dc/relevance/1"
97 # self._journal_urls["SMAI-JCM"]
98 # self._journal_urls["TSG"]
99 # self._journal_urls["SLSEDP"]
100 # self._journal_urls["WBLN"]
101 # self._journal_urls["XUPS"]
102 # self._journal_urls["MALSM"]
103 # self._journal_urls["HOUCHES"]
105 def make_journal(self, collect_id):
106 self._collection = model_helpers.get_collection(collect_id)
107 if self._collection is None:
108 return None
109 if self._journal_urls.get(collect_id):
110 journal = Journal(
111 self._collection.title_sort,
112 collect_id,
113 self._get_search_site(),
114 self.WOS_URL + self._journal_urls[collect_id],
115 )
116 else:
117 journal = Journal(self._collection.title_sort, collect_id, self._get_search_site())
118 return journal
120 def _parseline(self, line):
121 title = ""
122 if line.startswith("TI"):
123 title = line[6:].rstrip()
124 if line.startswith("DO"):
125 doi = line[6:].rstrip()
126 self._present[doi] = ArticleData(doi, title)
128 def _getPresent(self):
129 for line in self._file:
130 self._parseline(line.decode())
131 return self._present
133 def _get_search_site(self):
134 return "Web of Science"
137class Journal:
138 _title: str
139 _collection_pid: str
140 _search_site: str
141 _search_url: str = None
142 _articles: [Article]
144 def __init__(self, title, collection_pid, search_site, search_url=None):
145 self._title = title
146 self._collection_pid = collection_pid
147 self._search_site = search_site
148 if search_url:
149 self._search_url = search_url
150 self._articles = []
152 def collection_pid(self) -> str:
153 return self._collection_pid
155 def title(self) -> str:
156 return self._title
158 def articles(self) -> [Article]:
159 return self._articles
161 def search_site(self) -> str:
162 return self._search_site
164 def search_url(self) -> str:
165 return self._search_url