|
3 | 3 | import json |
4 | 4 | import os |
5 | 5 | import sys |
| 6 | +import time |
6 | 7 |
|
7 | 8 | import fsspec |
| 9 | +import httpx |
8 | 10 | import pandas as pd |
9 | | -import requests |
10 | 11 |
|
11 | 12 |
|
12 | 13 | def generate_path(*, date: datetime.date, bucket: str, category: str) -> str: |
@@ -77,43 +78,110 @@ def load_files_from_json(file_path: str) -> list[dict[str, str]]: |
77 | 78 | sys.exit(1) |
78 | 79 |
|
79 | 80 |
|
| 81 | +def _get_api_key(env: str) -> str: |
| 82 | + var = 'OFFSETS_DB_API_KEY_PRODUCTION' if env == 'production' else 'OFFSETS_DB_API_KEY_STAGING' |
| 83 | + key = os.environ.get(var) |
| 84 | + if key is None: |
| 85 | + raise ValueError(f'{var} environment variable not set') |
| 86 | + return key |
| 87 | + |
| 88 | + |
| 89 | +def _request(method: str, url: str, headers: dict, timeout: float = 30, **kwargs) -> httpx.Response: |
| 90 | + try: |
| 91 | + return httpx.request(method, url, headers=headers, timeout=timeout, **kwargs) |
| 92 | + except httpx.ConnectError: |
| 93 | + print(f'Error: could not connect to {url}') |
| 94 | + print('Is the API server running?') |
| 95 | + sys.exit(1) |
| 96 | + except httpx.TimeoutException: |
| 97 | + print(f'Error: request to {url} timed out after {timeout:.0f}s') |
| 98 | + sys.exit(1) |
| 99 | + |
| 100 | + |
| 101 | +def _poll_until_complete( |
| 102 | + *, |
| 103 | + base_url: str, |
| 104 | + file_ids: list[int], |
| 105 | + headers: dict, |
| 106 | + initial_delay: float = 2.0, |
| 107 | + max_delay: float = 30.0, |
| 108 | + timeout: float = 600.0, |
| 109 | +) -> list[dict]: |
| 110 | + """Poll file statuses with exponential backoff until all leave pending state.""" |
| 111 | + pending = set(file_ids) |
| 112 | + results: dict[int, dict] = {} |
| 113 | + deadline = time.monotonic() + timeout |
| 114 | + delay = initial_delay |
| 115 | + |
| 116 | + print(f'\nPolling status for {len(file_ids)} file(s)...') |
| 117 | + |
| 118 | + while pending: |
| 119 | + if time.monotonic() > deadline: |
| 120 | + timed_out = [str(i) for i in pending] |
| 121 | + print(f'Timed out waiting for file(s): {", ".join(timed_out)}') |
| 122 | + sys.exit(1) |
| 123 | + |
| 124 | + time.sleep(delay) |
| 125 | + delay = min(delay * 2, max_delay) |
| 126 | + |
| 127 | + for file_id in list(pending): |
| 128 | + resp = _request('GET', f'{base_url.rstrip("/")}/{file_id}', headers=headers) |
| 129 | + if not resp.is_success: |
| 130 | + print(f' [{file_id}] HTTP {resp.status_code} polling status — skipping') |
| 131 | + continue |
| 132 | + file = resp.json() |
| 133 | + if file['status'] != 'pending': |
| 134 | + pending.discard(file_id) |
| 135 | + results[file_id] = file |
| 136 | + icon = '✓' if file['status'] == 'success' else '✗' |
| 137 | + error = f' error: {file["error"]}' if file.get('error') else '' |
| 138 | + print( |
| 139 | + f' {icon} [{file_id}] {file["category"]:8s} {file["status"]:8s} {file["url"]}{error}' |
| 140 | + ) |
| 141 | + |
| 142 | + return list(results.values()) |
| 143 | + |
| 144 | + |
80 | 145 | def post_data_to_environment( |
81 | 146 | *, |
82 | 147 | env: str, |
83 | 148 | url: str, |
84 | 149 | files: list[dict[str, str]], |
| 150 | + post_timeout: float = 300, |
85 | 151 | ) -> None: |
86 | | - """Post file definitions to the API.""" |
87 | | - # Get API key from environment |
88 | | - if env == 'production': |
89 | | - api_key = os.environ.get('OFFSETS_DB_API_KEY_PRODUCTION') |
90 | | - if api_key is None: |
91 | | - raise ValueError('OFFSETS_DB_API_KEY_PRODUCTION environment variable not set') |
92 | | - else: |
93 | | - api_key = os.environ.get('OFFSETS_DB_API_KEY_STAGING') |
94 | | - if api_key is None: |
95 | | - raise ValueError('OFFSETS_DB_API_KEY_STAGING environment variable not set') |
96 | | - |
97 | 152 | headers = { |
98 | 153 | 'accept': 'application/json', |
99 | 154 | 'Content-Type': 'application/json', |
100 | | - 'X-API-KEY': api_key, |
| 155 | + 'X-API-KEY': _get_api_key(env), |
101 | 156 | } |
102 | 157 |
|
103 | | - print(f'\nSending {len(files)} files to {url}:') |
| 158 | + print(f'\nSending {len(files)} file(s) to {url}:') |
104 | 159 | for file in files: |
105 | 160 | print(f'- {file["category"]}: {file["url"]}') |
106 | 161 |
|
107 | | - # Send the request |
108 | | - response = requests.post(url, headers=headers, data=json.dumps(files)) |
| 162 | + response = _request('POST', url, headers=headers, json=files, timeout=post_timeout) |
| 163 | + if not response.is_success: |
| 164 | + print(f'\nFailed in {env}: HTTP {response.status_code} {response.reason_phrase}') |
| 165 | + if body := response.text.strip(): |
| 166 | + print(body) |
| 167 | + sys.exit(1) |
| 168 | + |
| 169 | + queued = response.json() |
| 170 | + file_ids = [f['id'] for f in queued] |
| 171 | + print(f'Queued {len(file_ids)} file(s) with ids: {file_ids}') |
109 | 172 |
|
110 | | - # Log the response |
111 | | - if response.ok: |
112 | | - print(f'\nSuccess in {env}:', response.json()) |
113 | | - else: |
114 | | - print(f'\nFailed in {env}:', response.text) |
| 173 | + results = _poll_until_complete(base_url=url, file_ids=file_ids, headers=headers) |
| 174 | + |
| 175 | + if failures := [f for f in results if f['status'] == 'failure']: |
| 176 | + print(f'\n{len(failures)} file(s) failed in {env}:') |
| 177 | + for f in failures: |
| 178 | + print(f' - [{f["id"]}] {f["url"]}') |
| 179 | + if f.get('error'): |
| 180 | + print(f' {f["error"]}') |
115 | 181 | sys.exit(1) |
116 | 182 |
|
| 183 | + print(f'\nAll {len(results)} file(s) processed successfully in {env}.') |
| 184 | + |
117 | 185 |
|
118 | 186 | def main(): |
119 | 187 | parser = argparse.ArgumentParser( |
|
0 commit comments