1+ /* eslint-disable prefer-destructuring */
12import type { ParsedUrlQuery } from 'querystring'
23import { findSimilarStrings } from 'src/helpers/string'
4+ import { axiosHeadOrUndefined } from 'src/io/axiosFetch'
5+ import { isGithubUrlOrShortcut , parseGitHubRepoUrlOrShortcut } from 'src/io/fetchSingleDatasetFromGithub'
36
47import { Dataset } from 'src/types'
58import {
@@ -10,9 +13,11 @@ import {
1013} from 'src/io/fetchDatasetsIndex'
1114import { getQueryParamMaybe } from 'src/io/getQueryParamMaybe'
1215import { useRecoilValue , useSetRecoilState } from 'recoil'
13- import { datasetCurrentAtom , datasetsAtom , datasetServerUrlAtom , datasetUpdatedAtom } from 'src/state/dataset.state'
16+ import { datasetCurrentAtom , datasetsAtom , datasetUpdatedAtom } from 'src/state/dataset.state'
1417import { useQuery } from 'react-query'
1518import { isNil } from 'lodash'
19+ import urljoin from 'url-join'
20+ import { URL_GITHUB_DATA_RAW } from 'src/constants'
1621
1722export async function getDatasetFromUrlParams ( urlQuery : ParsedUrlQuery , datasets : Dataset [ ] ) {
1823 // Retrieve dataset-related URL params and try to find a dataset based on these params
@@ -41,8 +46,62 @@ export async function getDatasetFromUrlParams(urlQuery: ParsedUrlQuery, datasets
4146 return dataset
4247}
4348
44- export async function initializeDatasets ( urlQuery : ParsedUrlQuery , datasetServerUrlDefault : string ) {
45- const datasetServerUrl = getQueryParamMaybe ( urlQuery , 'dataset-server' ) ?? datasetServerUrlDefault
49+ export async function getGithubDatasetServerUrl ( ) : Promise < string | undefined > {
50+ const BRANCH_NAME = process . env . BRANCH_NAME
51+ if ( ! BRANCH_NAME ) {
52+ return undefined
53+ }
54+
55+ const githubDatasetServerUrl = urljoin ( URL_GITHUB_DATA_RAW , BRANCH_NAME , 'data_output' )
56+ const githubIndexJsonUrl = urljoin ( githubDatasetServerUrl , 'index.json' )
57+
58+ const headRes = await axiosHeadOrUndefined ( githubIndexJsonUrl )
59+
60+ if ( headRes ) {
61+ return githubDatasetServerUrl
62+ }
63+
64+ return undefined
65+ }
66+
67+ export function toAbsoluteUrl ( url : string ) : string {
68+ if ( typeof window !== 'undefined' && url . slice ( 0 ) === '/' ) {
69+ return urljoin ( window . location . origin , url )
70+ }
71+ return url
72+ }
73+
74+ export async function getDatasetServerUrl ( urlQuery : ParsedUrlQuery ) {
75+ // Get dataset URL from query URL params.
76+ let datasetServerUrl = getQueryParamMaybe ( urlQuery , 'dataset-server' )
77+
78+ // If the URL is formatted as a GitHub URL or as a GitHub URL shortcut, use it without any checking
79+ if ( datasetServerUrl && isGithubUrlOrShortcut ( datasetServerUrl ) ) {
80+ const { owner, repo, branch, path } = await parseGitHubRepoUrlOrShortcut ( datasetServerUrl )
81+ return urljoin ( 'https://raw.githubusercontent.com' , owner , repo , branch , path )
82+ }
83+
84+ // If requested to try GitHub-hosted datasets either using `DATA_TRY_GITHUB_BRANCH` env var (e.g. from
85+ // `.env` file), or using `&dataset-server=gh` or `&dataset-server=github` URL parameters, then check if the
86+ // corresponding branch in the default data repo on GitHub contains an `index.json` file. And and if yes, use it.
87+ const datasetServerTryGithubBranch =
88+ process . env . DATA_TRY_GITHUB_BRANCH === '1' || ( datasetServerUrl && [ 'gh' , 'github' ] . includes ( datasetServerUrl ) )
89+ if ( datasetServerTryGithubBranch ) {
90+ const githubDatasetServerUrl = await getGithubDatasetServerUrl ( )
91+ if ( githubDatasetServerUrl ) {
92+ datasetServerUrl = githubDatasetServerUrl
93+ }
94+ }
95+
96+ // If none of the above, use hardcoded default URL (from `.env` file)
97+ datasetServerUrl = datasetServerUrl ?? process . env . DATA_FULL_DOMAIN ?? '/'
98+
99+ // If the URL happens to be a relative path, then convert to absolute URL (on the app's current host)
100+ return toAbsoluteUrl ( datasetServerUrl )
101+ }
102+
103+ export async function initializeDatasets ( urlQuery : ParsedUrlQuery ) {
104+ const datasetServerUrl = await getDatasetServerUrl ( urlQuery )
46105
47106 const datasetsIndexJson = await fetchDatasetsIndex ( datasetServerUrl )
48107
@@ -57,11 +116,10 @@ export async function initializeDatasets(urlQuery: ParsedUrlQuery, datasetServer
57116/** Refetch dataset index periodically and update the local copy of if */
58117export function useUpdatedDatasetIndex ( ) {
59118 const setDatasetsState = useSetRecoilState ( datasetsAtom )
60- const datasetServerUrl = useRecoilValue ( datasetServerUrlAtom )
61119 useQuery (
62120 'refetchDatasetIndex' ,
63121 async ( ) => {
64- const { currentDataset : _ , ...datasetsState } = await initializeDatasets ( { } , datasetServerUrl )
122+ const { currentDataset : _ , ...datasetsState } = await initializeDatasets ( { } )
65123 setDatasetsState ( datasetsState )
66124 } ,
67125 {
@@ -89,10 +147,9 @@ export function useUpdatedDataset() {
89147 'currentDatasetState' ,
90148 async ( ) => {
91149 const path = datasetCurrent ?. path
92- const refAccession = datasetCurrent ?. attributes . reference . value
93150 const updatedAt = datasetCurrent ?. version ?. updatedAt
94- if ( ! isNil ( refAccession ) && ! isNil ( updatedAt ) ) {
95- const candidateDatasets = filterDatasets ( datasets , path , refAccession )
151+ if ( ! isNil ( updatedAt ) ) {
152+ const candidateDatasets = filterDatasets ( datasets , path )
96153 const updatedDataset = candidateDatasets . find ( ( candidate ) => {
97154 const candidateTag = candidate . version ?. updatedAt
98155 return candidateTag && candidateTag > updatedAt
0 commit comments