1+ /* eslint-disable prefer-destructuring */
12import type { ParsedUrlQuery } from 'querystring'
23import { findSimilarStrings } from 'src/helpers/string'
4+ import { axiosHeadOrUndefined } from 'src/io/axiosFetch'
5+ import { isGithubUrlOrShortcut , parseGitHubRepoUrlOrShortcut } from 'src/io/fetchSingleDatasetFromGithub'
36
47import { Dataset } from 'src/types'
58import {
@@ -10,9 +13,11 @@ import {
1013} from 'src/io/fetchDatasetsIndex'
1114import { getQueryParamMaybe } from 'src/io/getQueryParamMaybe'
1215import { useRecoilValue , useSetRecoilState } from 'recoil'
13- import { datasetCurrentAtom , datasetsAtom , datasetServerUrlAtom , datasetUpdatedAtom } from 'src/state/dataset.state'
16+ import { datasetCurrentAtom , datasetsAtom , datasetUpdatedAtom } from 'src/state/dataset.state'
1417import { useQuery } from 'react-query'
1518import { isNil } from 'lodash'
19+ import urljoin from 'url-join'
20+ import { URL_GITHUB_DATA_RAW } from 'src/constants'
1621
1722export async function getDatasetFromUrlParams ( urlQuery : ParsedUrlQuery , datasets : Dataset [ ] ) {
1823 // Retrieve dataset-related URL params and try to find a dataset based on these params
@@ -41,8 +46,60 @@ export async function getDatasetFromUrlParams(urlQuery: ParsedUrlQuery, datasets
4146 return dataset
4247}
4348
44- export async function initializeDatasets ( urlQuery : ParsedUrlQuery , datasetServerUrlDefault : string ) {
45- const datasetServerUrl = getQueryParamMaybe ( urlQuery , 'dataset-server' ) ?? datasetServerUrlDefault
49+ export async function getGithubDatasetServerUrl ( ) : Promise < string | undefined > {
50+ const BRANCH_NAME = process . env . BRANCH_NAME
51+ if ( ! BRANCH_NAME ) {
52+ return undefined
53+ }
54+
55+ const githubDatasetServerUrl = urljoin ( URL_GITHUB_DATA_RAW , BRANCH_NAME )
56+ const githubIndexJsonUrl = urljoin ( githubDatasetServerUrl , 'data_output' , 'index.json' )
57+
58+ if ( await axiosHeadOrUndefined ( githubIndexJsonUrl ) ) {
59+ return githubDatasetServerUrl
60+ }
61+
62+ return undefined
63+ }
64+
65+ export function toAbsoluteUrl ( url : string ) : string {
66+ if ( typeof window !== 'undefined' && url . slice ( 0 ) === '/' ) {
67+ return urljoin ( window . location . origin , url )
68+ }
69+ return url
70+ }
71+
72+ export async function getDatasetServerUrl ( urlQuery : ParsedUrlQuery ) {
73+ // Get dataset URL from query URL params.
74+ let datasetServerUrl = getQueryParamMaybe ( urlQuery , 'dataset-server' )
75+
76+ // If the URL is formatted as a GitHub URL or as a GitHub URL shortcut, use it without any checking
77+ if ( datasetServerUrl && isGithubUrlOrShortcut ( datasetServerUrl ) ) {
78+ const { owner, repo, branch, path } = await parseGitHubRepoUrlOrShortcut ( datasetServerUrl )
79+ return urljoin ( 'https://raw.githubusercontent.com' , owner , repo , branch , path )
80+ }
81+
82+ // If requested to try GitHub-hosted datasets either using `DATA_TRY_GITHUB_BRANCH` env var (e.g. from
83+ // `.env` file), or using `&dataset-server=gh` or `&dataset-server=github` URL parameters, then check if the
84+ // corresponding branch in the default data repo on GitHub contains an `index.json` file. And and if yes, use it.
85+ const datasetServerTryGithubBranch =
86+ process . env . DATA_TRY_GITHUB_BRANCH === '1' || ( datasetServerUrl && [ 'gh' , 'github' ] . includes ( datasetServerUrl ) )
87+ if ( datasetServerTryGithubBranch ) {
88+ const githubDatasetServerUrl = await getGithubDatasetServerUrl ( )
89+ if ( githubDatasetServerUrl ) {
90+ datasetServerUrl = githubDatasetServerUrl
91+ }
92+ }
93+
94+ // If none of the above, use hardcoded default URL (from `.env` file)
95+ datasetServerUrl = datasetServerUrl ?? process . env . DATA_FULL_DOMAIN ?? '/'
96+
97+ // If the URL happens to be a relative path, then convert to absolute URL (on the app's current host)
98+ return toAbsoluteUrl ( datasetServerUrl )
99+ }
100+
101+ export async function initializeDatasets ( urlQuery : ParsedUrlQuery ) {
102+ const datasetServerUrl = await getDatasetServerUrl ( urlQuery )
46103
47104 const datasetsIndexJson = await fetchDatasetsIndex ( datasetServerUrl )
48105
@@ -57,11 +114,10 @@ export async function initializeDatasets(urlQuery: ParsedUrlQuery, datasetServer
57114/** Refetch dataset index periodically and update the local copy of if */
58115export function useUpdatedDatasetIndex ( ) {
59116 const setDatasetsState = useSetRecoilState ( datasetsAtom )
60- const datasetServerUrl = useRecoilValue ( datasetServerUrlAtom )
61117 useQuery (
62118 'refetchDatasetIndex' ,
63119 async ( ) => {
64- const { currentDataset : _ , ...datasetsState } = await initializeDatasets ( { } , datasetServerUrl )
120+ const { currentDataset : _ , ...datasetsState } = await initializeDatasets ( { } )
65121 setDatasetsState ( datasetsState )
66122 } ,
67123 {
@@ -89,10 +145,9 @@ export function useUpdatedDataset() {
89145 'currentDatasetState' ,
90146 async ( ) => {
91147 const path = datasetCurrent ?. path
92- const refAccession = datasetCurrent ?. attributes . reference . value
93148 const updatedAt = datasetCurrent ?. version ?. updatedAt
94- if ( ! isNil ( refAccession ) && ! isNil ( updatedAt ) ) {
95- const candidateDatasets = filterDatasets ( datasets , path , refAccession )
149+ if ( ! isNil ( updatedAt ) ) {
150+ const candidateDatasets = filterDatasets ( datasets , path )
96151 const updatedDataset = candidateDatasets . find ( ( candidate ) => {
97152 const candidateTag = candidate . version ?. updatedAt
98153 return candidateTag && candidateTag > updatedAt
0 commit comments