-
-
Notifications
You must be signed in to change notification settings - Fork 3
Expand file tree
/
Copy pathsync_catalog.py
More file actions
87 lines (70 loc) · 2.6 KB
/
sync_catalog.py
File metadata and controls
87 lines (70 loc) · 2.6 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
#
# Copyright (c) nexB Inc. and others. All rights reserved.
# SPDX-License-Identifier: Apache-2.0
# See http://www.apache.org/licenses/LICENSE-2.0 for the license text.
# See https://aboutcode.org for more information about nexB OSS projects.
#
import json
import sys
from datetime import datetime
from datetime import timezone
from pathlib import Path
from urllib.parse import urljoin
import requests
from aboutcode.pipeline import BasePipeline
from aboutcode.pipeline import LoopProgress
ROOT_PATH = Path(__file__).parent
CATALOG_PATH = ROOT_PATH / "catalog"
CATALOG_INDEX = CATALOG_PATH / "index.json"
PAGE_DIRECTORY = CATALOG_PATH / "pages"
class NuGetCatalogMirror(BasePipeline):
url = "https://api.nuget.org/v3/catalog0/"
@classmethod
def steps(cls):
return (
cls.check_new_catalog,
cls.collect_new_catalog,
)
def check_new_catalog(self):
start_page = self.get_catalog_page_count()
self.fetch_and_write(urljoin(self.url, "index.json"), CATALOG_INDEX)
end_page = self.get_catalog_page_count()
self.pages_to_collect = range(start_page, end_page)
def collect_new_catalog(self):
latest_pages = list(self.pages_to_collect)
page_count = len(latest_pages)
self.log(f"Collecting {page_count:,d} latest catalog pages.")
progress = LoopProgress(
total_iterations=page_count,
logger=self.log,
)
for page in progress.iter(latest_pages):
page_id = f"page{page}.json"
self.fetch_and_write(
url=urljoin(self.url, page_id),
path=PAGE_DIRECTORY / page_id,
)
def log(self, message):
now_local = datetime.now(timezone.utc).astimezone()
timestamp = now_local.strftime("%Y-%m-%d %H:%M:%S.%f")[:-3]
message = f"{timestamp} {message}"
print(message)
def fetch(self, url):
response = requests.get(url, timeout=5)
response.raise_for_status()
return response.json() or {}
def get_catalog_page_count(self):
if CATALOG_INDEX.exists():
with CATALOG_INDEX.open("r", encoding="utf-8") as f:
index = json.load(f)
return index.get("count", 0)
return 0
def fetch_and_write(self, url, path):
response = self.fetch(url)
path.parent.mkdir(parents=True, exist_ok=True)
with open(path, "w", encoding="utf-8") as f:
json.dump(response, f, indent=2)
if __name__ == "__main__":
status_code, error_msg = NuGetCatalogMirror().execute()
print(error_msg)
sys.exit(status_code)