55from typing import Optional
66from lxml import etree
77
8- MATERIALSDBINDEXURL = "http://www.materialsdb.org/download/ProducerIndex.xml"
8+ MATERIALSDBINDEXURLLIST = [
9+ "http://www.materialsdb.org/download/ProducerIndex.xml" ,
10+ "http://www.materialsdb.org/download/generic/GenericIndex.xml" ,
11+ ]
912
1013
1114def get_cache_folder ():
@@ -20,12 +23,12 @@ def get_cache_folder():
2023 return cache_dir
2124
2225
23- def get_cached_index_path () -> pathlib .Path :
24- return get_cache_folder () / "ProducerIndex.xml"
26+ def get_cached_index_path (index ) -> pathlib .Path :
27+ return get_cache_folder () / pathlib . Path ( index ). name
2528
2629
27- def parse_cached_index () -> etree ._ElementTree :
28- path = get_cached_index_path ()
30+ def parse_cached_index (index ) -> etree ._ElementTree :
31+ path = get_cached_index_path (index )
2932 if path .exists ():
3033 return etree .parse (str (path ))
3134 root = etree .Element ("root" )
@@ -54,10 +57,25 @@ def get_producers_dir() -> pathlib.Path:
5457 return producer_path
5558
5659
57- def update_producers_data ():
58- cached_index = parse_cached_index ()
60+ Report = namedtuple ("Report" , ["existing" , "updated" , "deleted" ])
61+
62+
63+ def update_producers_data (url_list = MATERIALSDBINDEXURLLIST ):
64+ existing = []
65+ updated = []
66+ deleted = []
67+ for index in url_list :
68+ report = update_producers_from_index (index )
69+ existing .extend (report .existing )
70+ updated .extend (report .updated )
71+ deleted .extend (report .deleted )
72+ return Report (existing , updated , deleted )
73+
74+
75+ def update_producers_from_index (index ):
76+ cached_index = parse_cached_index (index )
5977 cached_root = cached_index .getroot ()
60- new_index = etree .parse (MATERIALSDBINDEXURL )
78+ new_index = etree .parse (index )
6179 new_root = new_index .getroot ()
6280 producers_dir = get_producers_dir ()
6381 has_index_update = False
@@ -78,8 +96,7 @@ def update_producers_data():
7896 urllib .request .urlretrieve (company .get ("href" ), producer_path )
7997 updated .append (producer_path )
8098 if has_index_update :
81- new_index .write (str (get_cached_index_path ()))
82- Report = namedtuple ("Report" , ["existing" , "updated" , "deleted" ])
99+ new_index .write (str (get_cached_index_path (index )))
83100 return Report (existing , updated , deleted )
84101
85102
0 commit comments