Coverage for src/ptf_tools/indexingChecker.py: 32%

94 statements  

« prev     ^ index     » next       coverage.py v7.8.2, created at 2025-10-23 12:08 +0000

1from abc import ABC 

2 

3from django.db.models import Q 

4from ptf import model_helpers 

5from ptf.external.ads import AdsQuery 

6from ptf.model_data import ArticleData 

7from ptf.models import Article 

8 

9 

10class ReferencingChecker(ABC): 

11 def __init__(self): 

12 self._file = None 

13 self._journals = {} 

14 self._collection = None 

15 

16 def check_references(self, collect_id, file=None): 

17 self._file = file 

18 self._collection = model_helpers.get_collection(collect_id) 

19 journal = Journal(self._collection.title_sort, collect_id, self._get_search_site()) 

20 present = self._getPresent() 

21 articles = Article.objects.filter( 

22 Q(doi__startswith="10.5802") | Q(doi__startswith="10.24072"), 

23 my_container__my_collection__pid=collect_id, 

24 ).order_by("doi") 

25 for article in articles: 

26 found_article = present.get(article.doi, None) 

27 if not found_article: 

28 print(article.doi) 

29 journal.articles().append(article) 

30 return journal 

31 

32 def _getPresent(self) -> {}: 

33 pass 

34 

35 def _get_search_site(self): 

36 pass 

37 

38 

39class ReferencingCheckerAds(ReferencingChecker): 

40 def _getPresent(self): 

41 query = AdsQuery() 

42 present = query.query_referencing(self._collection.issn) 

43 return present 

44 

45 def _get_search_site(self): 

46 return "Astrophysics data system" 

47 

48 

49class ReferencingCheckerWos(ReferencingChecker): 

50 _present = {} 

51 _journal_urls = {} 

52 WOS_URL = "https://www-webofscience-com.insmi.bib.cnrs.fr/wos/woscc/summary/" 

53 

54 def __init__(self): 

55 super().__init__() 

56 # self._journal_urls["ACIRM"] 

57 # self._journal_urls["AFST"] 

58 # self._journal_urls["AHL"] 

59 self._journal_urls["AIF"] = "7be6ad27-c4e8-4d06-953f-de920648d6a1-017e12baba/relevance/1" 

60 self._journal_urls["ALCO"] = "d746bded-1c8e-471b-8dad-fcf6ebf4d78a-0151682211/relevance/1" 

61 # self._journal_urls["AMBP"] 

62 # self._journal_urls["ART"] 

63 # self._journal_urls["CCIRM"] 

64 # self._journal_urls["CML"] 

65 # self._journal_urls["CR"] 

66 self._journal_urls["CRBIOL"] = ( 

67 "80c0d659-b891-49bc-a75c-d99cd0d3829e-017e12e4d7/relevance/1" 

68 ) 

69 self._journal_urls["CRGEOS"] = ( 

70 "d9839690-0a66-4f25-b548-7bac6eb7733e-017e12ead4/relevance/1" 

71 ) 

72 self._journal_urls["CRMATH"] = ( 

73 "0597e745-d8ea-4302-929d-531a63336b9c-017e12f042/relevance/1" 

74 ) 

75 self._journal_urls["CRCHIM"] = ( 

76 "ccbc06d0-cec5-4f11-af58-7cf65a382ff0-017e1474bf/relevance/1" 

77 ) 

78 self._journal_urls["CRMECA"] = ( 

79 "6a085310-b201-41fe-a8f6-a54ce21a8f70-017e147a3b/relevance/1" 

80 ) 

81 self._journal_urls["CRPHYS"] = ( 

82 "c1c357d6-1311-435b-8fed-fd053950812d-017e14b2dc/relevance/1" 

83 ) 

84 # self._journal_urls["IGT"] = "c1c357d6-1311-435b-8fed-fd053950812d-017e14b2dc/relevance/1" 

85 # self._journal_urls["JEDP"] 

86 self._journal_urls["JEP"] = "981b6ca8-1571-44cc-9398-6990014fa0ea-017df92ec6/relevance/1" 

87 # self._journal_urls["JSE"] 

88 # self._journal_urls["JTNB"] 

89 # self._journal_urls["MRR"] 

90 # self._journal_urls["MSIA"] 

91 # self._journal_urls["OGEO"] 

92 # self._journal_urls["OJMO"] 

93 self._journal_urls["PCJ"] = "1d3e3435-5434-437a-b791-61e9e55b785b-017e359369/relevance/1" 

94 # self._journal_urls["PMB"] 

95 # self._journal_urls["PMIHES"] 

96 # self._journal_urls["ROIA"] = "c1c357d6-1311-435b-8fed-fd053950812d-017e14b2dc/relevance/1" 

97 # self._journal_urls["SMAI-JCM"] 

98 # self._journal_urls["TSG"] 

99 # self._journal_urls["SLSEDP"] 

100 # self._journal_urls["WBLN"] 

101 # self._journal_urls["XUPS"] 

102 # self._journal_urls["MALSM"] 

103 # self._journal_urls["HOUCHES"] 

104 

105 def make_journal(self, collect_id): 

106 self._collection = model_helpers.get_collection(collect_id) 

107 if self._collection is None: 

108 return None 

109 if self._journal_urls.get(collect_id): 

110 journal = Journal( 

111 self._collection.title_sort, 

112 collect_id, 

113 self._get_search_site(), 

114 self.WOS_URL + self._journal_urls[collect_id], 

115 ) 

116 else: 

117 journal = Journal(self._collection.title_sort, collect_id, self._get_search_site()) 

118 return journal 

119 

120 def _parseline(self, line): 

121 title = "" 

122 if line.startswith("TI"): 

123 title = line[6:].rstrip() 

124 if line.startswith("DO"): 

125 doi = line[6:].rstrip() 

126 self._present[doi] = ArticleData(doi, title) 

127 

128 def _getPresent(self): 

129 for line in self._file: 

130 self._parseline(line.decode()) 

131 return self._present 

132 

133 def _get_search_site(self): 

134 return "Web of Science" 

135 

136 

137class Journal: 

138 _title: str 

139 _collection_pid: str 

140 _search_site: str 

141 _search_url: str = None 

142 _articles: [Article] 

143 

144 def __init__(self, title, collection_pid, search_site, search_url=None): 

145 self._title = title 

146 self._collection_pid = collection_pid 

147 self._search_site = search_site 

148 if search_url: 

149 self._search_url = search_url 

150 self._articles = [] 

151 

152 def collection_pid(self) -> str: 

153 return self._collection_pid 

154 

155 def title(self) -> str: 

156 return self._title 

157 

158 def articles(self) -> [Article]: 

159 return self._articles 

160 

161 def search_site(self) -> str: 

162 return self._search_site 

163 

164 def search_url(self) -> str: 

165 return self._search_url