Skip to content
126 changes: 91 additions & 35 deletions src/commands/run.ts
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@ import type { ExecaError } from 'execa';
import mime from 'mime';
import { minVersion } from 'semver';

import { APIFY_ENV_VARS } from '@apify/consts';
import { ACTOR_ENV_VARS, APIFY_ENV_VARS } from '@apify/consts';
import { validateInputSchema, validateInputUsingValidator } from '@apify/input_schema';

import { ApifyCommand, StdinMode } from '../lib/command-framework/apify-command.js';
Expand All @@ -26,6 +26,7 @@ import { useActorConfig } from '../lib/hooks/useActorConfig.js';
import { ProjectLanguage, useCwdProject } from '../lib/hooks/useCwdProject.js';
import { useModuleVersion } from '../lib/hooks/useModuleVersion.js';
import { getAjvValidator, getDefaultsFromInputSchema, readInputSchema } from '../lib/input_schema.js';
import { CRAWLEE_INPUT_KEY_ENV, resolveInputKey, TEMP_INPUT_KEY_PREFIX } from '../lib/input-key.js';
import { error, info, warning } from '../lib/outputs.js';
import { replaceSecretsValue } from '../lib/secrets.js';
import {
Expand All @@ -42,6 +43,19 @@ import {
purgeDefaultQueue,
} from '../lib/utils.js';

interface TempInputResult {
tempInputKey: string;
tempInputFilePath: string;
}

interface OverwrittenInputResult {
existingInput: ReturnType<typeof getLocalInput>;
inputFilePath: string;
writtenAt: number;
}

type ValidateAndStoreInputResult = TempInputResult | OverwrittenInputResult;

enum RunType {
DirectFile = 0,
Module = 1,
Expand Down Expand Up @@ -124,6 +138,7 @@ export class RunCommand extends ApifyCommand<typeof RunCommand> {
const { config: localConfig } = localConfigResult.unwrap();

const actualStoragePath = getLocalStorageDir();
const resolvedInputKey = resolveInputKey();

const projectRuntimeResult = await useCwdProject({ cwd });

Expand Down Expand Up @@ -233,13 +248,17 @@ export class RunCommand extends ApifyCommand<typeof RunCommand> {
CRAWLEE_PURGE_ON_START = '1';

if (crawleeVersion.isNone()) {
await Promise.all([purgeDefaultQueue(), purgeDefaultKeyValueStore(), purgeDefaultDataset()]);
await Promise.all([
purgeDefaultQueue(),
purgeDefaultKeyValueStore(resolvedInputKey),
purgeDefaultDataset(),
]);
info({ message: 'All default local stores were purged.' });
}
}

if (!this.flags.purge) {
const isStorageEmpty = await checkIfStorageIsEmpty();
const isStorageEmpty = await checkIfStorageIsEmpty(resolvedInputKey);

if (!isStorageEmpty && !this.flags.resurrect) {
warning({
Expand All @@ -258,13 +277,35 @@ export class RunCommand extends ApifyCommand<typeof RunCommand> {
return;
}

const storedInputResults = await this.validateAndStoreInput(inputOverride);
const storedInputResults = await this.validateAndStoreInput(inputOverride, resolvedInputKey);

// When a temp input file was created, disable crawlee's purge so it doesn't
// delete the temp file (its name doesn't match the input key regex that purge skips).
// Also determine the effective input key for env vars (temp key overrides resolved key).
let effectiveInputKey = resolvedInputKey;
if (storedInputResults && 'tempInputKey' in storedInputResults) {
if (this.flags.purge && crawleeVersion.isSome()) {
// Crawlee would have purged on start, but we need to disable that to protect
// the temp file. Purge from CLI side instead, preserving both input files.
await Promise.all([
purgeDefaultQueue(),
purgeDefaultKeyValueStore(resolvedInputKey, storedInputResults.tempInputKey),
purgeDefaultDataset(),
]);
}
CRAWLEE_PURGE_ON_START = '0';
effectiveInputKey = storedInputResults.tempInputKey;
}

// Attach env vars from local config files
// Attach env vars from local config files.
// Set all three input key env vars so both Node.js and Python SDKs pick up the resolved key.
const localEnvVars: Record<string, string> = {
[APIFY_ENV_VARS.LOCAL_STORAGE_DIR]: actualStoragePath,
CRAWLEE_STORAGE_DIR: actualStoragePath,
CRAWLEE_PURGE_ON_START,
[ACTOR_ENV_VARS.INPUT_KEY]: effectiveInputKey,
[APIFY_ENV_VARS.INPUT_KEY]: effectiveInputKey,
[CRAWLEE_INPUT_KEY_ENV]: effectiveInputKey,
};

if (proxy && proxy.password) localEnvVars[APIFY_ENV_VARS.PROXY_PASSWORD] = proxy.password;
Expand All @@ -279,8 +320,8 @@ export class RunCommand extends ApifyCommand<typeof RunCommand> {
Object.assign(localEnvVars, updatedEnv);
}

// NOTE: User can overwrite env vars
const env = Object.assign(localEnvVars, process.env);
// localEnvVars must take priority so the CLI can redirect the SDK to temp input files
const env = { ...process.env, ...localEnvVars };

if (!userId) {
warning({
Expand Down Expand Up @@ -393,7 +434,10 @@ export class RunCommand extends ApifyCommand<typeof RunCommand> {
}
} finally {
if (storedInputResults) {
if (storedInputResults.existingInput) {
if ('tempInputKey' in storedInputResults) {
// Temp input file: just delete it, user's INPUT.json was never touched
await deleteFile(storedInputResults.tempInputFilePath);
} else if (storedInputResults.existingInput) {
// Check if the input file was modified since we modified it. If it was, we abort the re-overwrite and warn the user
const stats = await stat(storedInputResults.inputFilePath);

Expand All @@ -420,10 +464,17 @@ export class RunCommand extends ApifyCommand<typeof RunCommand> {
}

/**
* Ensures the input that the actor will be ran with locally matches the input schema (and prefills default values if missing)
* Validates the input against the input schema and writes to disk only when necessary.
* When the user already has an input file and no override is provided, it writes the
* merged defaults to a separate temp file so the user's file is never touched.
* The caller redirects the SDK to the temp file via the ACTOR_INPUT_KEY env var.
* @param inputOverride Optional input received through command flags
* @param resolvedInputKey The input key resolved from env vars (default "INPUT")
*/
private async validateAndStoreInput(inputOverride?: { input: Record<string, unknown>; source: string }) {
private async validateAndStoreInput(
inputOverride?: { input: Record<string, unknown>; source: string },
resolvedInputKey = 'INPUT',
): Promise<ValidateAndStoreInputResult | null> {
const { inputSchema } = await readInputSchema({ cwd: process.cwd() });

if (!inputSchema) {
Expand All @@ -432,22 +483,18 @@ export class RunCommand extends ApifyCommand<typeof RunCommand> {
}

// We cannot validate input schema if it is not found -> default to no validation and overriding if flags are given
const existingInput = getLocalInput(process.cwd());
// Write the override to a temp file so the user's input file is never touched.
const defaultStorePath = join(process.cwd(), getLocalKeyValueStorePath());
await mkdir(defaultStorePath, { recursive: true });

// Prepare the file path for where we'll temporarily store the validated input
const inputFilePath = join(
process.cwd(),
getLocalKeyValueStorePath(),
existingInput?.fileName ?? 'INPUT.json',
);
const tempInputKey = `${TEMP_INPUT_KEY_PREFIX}${resolvedInputKey}`;
const tempInputFilePath = join(defaultStorePath, `${tempInputKey}.json`);

await mkdir(dirname(inputFilePath), { recursive: true });
await writeFile(inputFilePath, JSON.stringify(inputOverride.input, null, 2));
await writeFile(tempInputFilePath, JSON.stringify(inputOverride.input, null, 2));

return {
existingInput,
inputFilePath,
writtenAt: Date.now(),
tempInputKey,
tempInputFilePath,
};
}

Expand All @@ -458,11 +505,15 @@ export class RunCommand extends ApifyCommand<typeof RunCommand> {
const defaults = getDefaultsFromInputSchema(inputSchema);
const compiledInputSchema = getAjvValidator(inputSchema, validator);

// Step 2: try to fetch the existing INPUT from the local storage
const existingInput = getLocalInput(process.cwd());
// Step 2: try to fetch the existing input from the local storage
const existingInput = getLocalInput(process.cwd(), resolvedInputKey);

// Prepare the file path for where we'll temporarily store the validated input
const inputFilePath = join(process.cwd(), getLocalKeyValueStorePath(), existingInput?.fileName ?? 'INPUT.json');
const inputFilePath = join(
process.cwd(),
getLocalKeyValueStorePath(),
existingInput?.fileName ?? `${resolvedInputKey}.json`,
);

let errorHeader: string;

Expand Down Expand Up @@ -501,13 +552,16 @@ export class RunCommand extends ApifyCommand<typeof RunCommand> {
);
}

// Write to a temp file so the user's input file is never touched.
const tempInputKey = `${TEMP_INPUT_KEY_PREFIX}${resolvedInputKey}`;
const tempInputFilePath = join(dirname(inputFilePath), `${tempInputKey}.json`);

await mkdir(dirname(inputFilePath), { recursive: true });
await writeFile(inputFilePath, JSON.stringify(fullInputOverride, null, 2));
await writeFile(tempInputFilePath, JSON.stringify(fullInputOverride, null, 2));

return {
existingInput,
inputFilePath,
writtenAt: Date.now(),
tempInputKey,
tempInputFilePath,
};
}

Expand Down Expand Up @@ -546,14 +600,16 @@ export class RunCommand extends ApifyCommand<typeof RunCommand> {
);
}

// Step 4: store the input
await mkdir(dirname(inputFilePath), { recursive: true });
await writeFile(inputFilePath, JSON.stringify(fullInput, null, 2));
// Write merged input to a temp file so the user's INPUT.json is never touched.
// The SDK is redirected to this file via the ACTOR_INPUT_KEY env var.
const tempInputKey = `${TEMP_INPUT_KEY_PREFIX}${resolvedInputKey}`;
const tempInputFilePath = join(dirname(inputFilePath), `${tempInputKey}.json`);

await writeFile(tempInputFilePath, JSON.stringify(fullInput, null, 2));

return {
existingInput,
inputFilePath,
writtenAt: Date.now(),
tempInputKey,
tempInputFilePath,
};
}

Expand Down
4 changes: 1 addition & 3 deletions src/lib/consts.ts
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@
import { homedir } from 'node:os';
import { join } from 'node:path';

import { KEY_VALUE_STORE_KEYS, META_ORIGINS } from '@apify/consts';
import { META_ORIGINS } from '@apify/consts';

import pkg from '../../package.json' with { type: 'json' };

Expand Down Expand Up @@ -49,8 +49,6 @@ export const LOCAL_CONFIG_NAME = 'actor.json';

export const LOCAL_CONFIG_PATH = join(ACTOR_SPECIFICATION_FOLDER, LOCAL_CONFIG_NAME);

export const INPUT_FILE_REG_EXP = new RegExp(`(^${KEY_VALUE_STORE_KEYS.INPUT}(?:\\.[^.]+)?$)`);

export const SUPPORTED_NODEJS_VERSION = pkg.engines.node;

export const APIFY_CLIENT_DEFAULT_HEADERS = { 'X-Apify-Request-Origin': META_ORIGINS.CLI };
Expand Down
28 changes: 28 additions & 0 deletions src/lib/input-key.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,28 @@
import escapeStringRegexp from 'escape-string-regexp';

import { ACTOR_ENV_VARS, APIFY_ENV_VARS } from '@apify/consts';

export const CRAWLEE_INPUT_KEY_ENV = 'CRAWLEE_INPUT_KEY';

export const TEMP_INPUT_KEY_PREFIX = '__CLI_';

/**
* Resolves the input key from environment variables in priority order:
* ACTOR_INPUT_KEY > APIFY_INPUT_KEY > CRAWLEE_INPUT_KEY > "INPUT"
*/
export function resolveInputKey(): string {
return (
process.env[ACTOR_ENV_VARS.INPUT_KEY] ||
process.env[APIFY_ENV_VARS.INPUT_KEY] ||
process.env[CRAWLEE_INPUT_KEY_ENV] ||
'INPUT'
);
}

/**
* Creates a RegExp that matches the given key with an optional file extension.
* e.g. inputFileRegExp('INPUT') matches 'INPUT', 'INPUT.json', 'INPUT.bin'
*/
export function inputFileRegExp(key: string): RegExp {
return new RegExp(`(^${escapeStringRegexp(key)}(?:\\.[^.]+)?$)`);
}
20 changes: 10 additions & 10 deletions src/lib/utils.ts
Original file line number Diff line number Diff line change
Expand Up @@ -41,12 +41,12 @@ import {
CommandExitCodes,
DEFAULT_LOCAL_STORAGE_DIR,
GLOBAL_CONFIGS_FOLDER,
INPUT_FILE_REG_EXP,
LOCAL_CONFIG_PATH,
MINIMUM_SUPPORTED_PYTHON_VERSION,
SUPPORTED_NODEJS_VERSION,
} from './consts.js';
import { deleteFile, ensureFolderExistsSync, rimrafPromised } from './files.js';
import { inputFileRegExp, TEMP_INPUT_KEY_PREFIX } from './input-key.js';
import type { AuthJSON } from './types.js';
import { cliDebugPrint } from './utils/cliDebugPrint.js';

Expand Down Expand Up @@ -504,15 +504,15 @@ export const createActZip = async (zipName: string, pathsToZip: string[], cwd: s
/**
* Get Actor input from local store
*/
export const getLocalInput = (cwd: string) => {
export const getLocalInput = (cwd: string, inputKey?: string) => {
const defaultLocalStorePath = getLocalKeyValueStorePath();

const folderExists = existsSync(join(cwd, defaultLocalStorePath));

if (!folderExists) return;

const files = readdirSync(join(cwd, defaultLocalStorePath));
const inputName = files.find((file) => !!file.match(INPUT_FILE_REG_EXP));
const inputName = files.find((file) => !!file.match(inputFileRegExp(inputKey ?? 'INPUT')));

// No input file
if (!inputName) return;
Expand All @@ -536,16 +536,17 @@ export const purgeDefaultDataset = async () => {
}
};

export const purgeDefaultKeyValueStore = async () => {
export const purgeDefaultKeyValueStore = async (...inputKeys: string[]) => {
const defaultKeyValueStorePath = getLocalKeyValueStorePath();
if (!existsSync(getLocalStorageDir()) || !existsSync(defaultKeyValueStorePath)) {
return;
}
const filesToDelete = readdirSync(defaultKeyValueStorePath);
const preserveRegExps = (inputKeys.length > 0 ? inputKeys : ['INPUT']).map(inputFileRegExp);

const deletePromises: Promise<void>[] = [];
filesToDelete.forEach((file) => {
if (!file.match(INPUT_FILE_REG_EXP)) {
if (!preserveRegExps.some((re) => re.test(file))) {
deletePromises.push(deleteFile(join(defaultKeyValueStorePath, file)));
}
});
Expand Down Expand Up @@ -626,13 +627,12 @@ export const getNpmCmd = (): string => {
/**
* Returns true if apify storage is empty (expect INPUT.*)
*/
export const checkIfStorageIsEmpty = async () => {
export const checkIfStorageIsEmpty = async (inputKey?: string) => {
const key = inputKey || KEY_VALUE_STORE_KEYS.INPUT;
const filesWithoutInput = await glob([
`${getLocalStorageDir()}/**`,
// Omit INPUT.* file
`!${getLocalKeyValueStorePath()}/${KEY_VALUE_STORE_KEYS.INPUT}.*`,
// Omit INPUT_CLI-* files
`!${getLocalKeyValueStorePath()}/${KEY_VALUE_STORE_KEYS.INPUT}_CLI-*`,
`!${getLocalKeyValueStorePath()}/${key}.*`,
`!${getLocalKeyValueStorePath()}/${TEMP_INPUT_KEY_PREFIX}${key}.*`,
]);

return filesWithoutInput.length === 0;
Expand Down
11 changes: 7 additions & 4 deletions test/local/commands/crawlee/run.test.ts
Original file line number Diff line number Diff line change
Expand Up @@ -68,22 +68,25 @@ describe('apify run', () => {
expect(lastErrorMessage()).toMatch(/Field awesome is required/i);
});

it('prefills input with defaults', async () => {
it('validates input without modifying file', async () => {
await writeFile(inputPath, originalInput);

await testRunCommand(RunCommand, {});

const output = JSON.parse(await readFile(outputPath, 'utf8'));
expect(output).toStrictEqual({ awesome: true, help: 'this_maze_is_not_meant_for_you' });

const inputAfterRun = await readFile(inputPath, 'utf8');
expect(inputAfterRun).toBe(originalInput);
});

it('should restore the original input file after run', async () => {
it('does not modify input file during run', async () => {
await writeFile(inputPath, originalInputWithExtraField);

await testRunCommand(RunCommand, {});

const input = JSON.parse(await readFile(inputPath, 'utf8'));
expect(input).toStrictEqual({ awesome: true, extra: 'field' });
const inputAfterRun = await readFile(inputPath, 'utf8');
expect(inputAfterRun).toBe(originalInputWithExtraField);

const output = JSON.parse(await readFile(outputPath, 'utf8'));
expect(output).toStrictEqual({ awesome: true, help: 'this_maze_is_not_meant_for_you', extra: 'field' });
Expand Down
Loading