Source code for azure.ai.ml.entities._job.parallel.run_function

# ---------------------------------------------------------
# Copyright (c) Microsoft Corporation. All rights reserved.
# ---------------------------------------------------------


from typing import Any, Optional, Union

from azure.ai.ml.constants import ParallelTaskType
from azure.ai.ml.entities._assets.environment import Environment

from .parallel_task import ParallelTask



[docs]
class RunFunction(ParallelTask):
    """Run Function.

    :param code: A local or remote path pointing at source code.
    :type code: str
    :param entry_script: User script which will be run in parallel on multiple nodes. This is
        specified as a local file path.
        The entry_script should contain two functions:
        ``init()``: this function should be used for any costly or common preparation for subsequent inferences,
        e.g., deserializing and loading the model into a global object.
        ``run(mini_batch)``: The method to be parallelized. Each invocation will have one mini-batch.
        'mini_batch': Batch inference will invoke run method and pass either a list or a Pandas DataFrame as an
        argument to the method. Each entry in min_batch will be a filepath if input is a FileDataset,
        a Pandas DataFrame if input is a TabularDataset.
        run() method should return a Pandas DataFrame or an array.
        For append_row output_action, these returned elements are appended into the common output file.
        For summary_only, the contents of the elements are ignored. For all output actions,
        each returned output element indicates one successful inference of input element in the input mini-batch.
        Each parallel worker process will call `init` once and then loop over `run` function until all mini-batches
        are processed.
    :type entry_script: str
    :param program_arguments: The arguments of the parallel task.
    :type args: str
    :param model: The model of the parallel task.
    :type model: str
    :param append_row_to: All values output by run() method invocations will be aggregated into
        one unique file which is created in the output location.
        if it is not set, 'summary_only' would invoked,  which means user script is expected to store the output itself.
    :type append_row_to: str
    :param environment: Environment that training job will run in.
    :type environment: Union[Environment, str]
    """

    def __init__(
        self,
        *,
        code: Optional[str] = None,
        entry_script: Optional[str] = None,
        program_arguments: Optional[str] = None,
        model: Optional[str] = None,
        append_row_to: Optional[str] = None,
        environment: Optional[Union[Environment, str]] = None,
        **kwargs: Any,
    ):
        super().__init__(
            code=code,
            entry_script=entry_script,
            program_arguments=program_arguments,
            model=model,
            append_row_to=append_row_to,
            environment=environment,
            type=ParallelTaskType.RUN_FUNCTION,
        )