From 5f197cbbafbe4b7731280521691b0c0fc19f9a3e Mon Sep 17 00:00:00 2001
From: Rita Agarwala <rita.agarwala@salesforce.com>
Date: Wed, 29 Apr 2026 14:12:29 +0530
Subject: [PATCH 01/19] Improving external SDK for function

---
 src/datacustomcode/cli.py                     |  86 +++++-
 src/datacustomcode/deploy.py                  |  41 +++
 .../function/feature_types/chunking.py        |  23 +-
 src/datacustomcode/function_utils.py          | 245 ++++++++++++++++++
 src/datacustomcode/run.py                     |  62 ++++-
 src/datacustomcode/template.py                |  30 ++-
 src/datacustomcode/templates/__init__.py      |   0
 .../function/chunking/payload/config.json     |   1 +
 .../function/chunking/payload/entrypoint.py   |  74 ++++++
 9 files changed, 532 insertions(+), 30 deletions(-)
 create mode 100644 src/datacustomcode/function_utils.py
 create mode 100644 src/datacustomcode/templates/__init__.py
 create mode 100644 src/datacustomcode/templates/function/chunking/payload/config.json
 create mode 100644 src/datacustomcode/templates/function/chunking/payload/entrypoint.py

diff --git a/src/datacustomcode/cli.py b/src/datacustomcode/cli.py
index c6e9c5c..33a064c 100644
--- a/src/datacustomcode/cli.py
+++ b/src/datacustomcode/cli.py
@@ -74,6 +74,29 @@ def _configure_client_credentials(
     )
 
 
+def _generate_function_test_file(entrypoint_path: str) -> Optional[str]:
+    """Generate test.json file for a function.
+
+    Args:
+        entrypoint_path: Path to the function's entrypoint.py
+
+    Returns:
+        Path to generated test.json, or None if generation failed
+    """
+    from datacustomcode.template import generate_test_json
+
+    tests_dir = os.path.join(os.path.dirname(entrypoint_path), "tests")
+    os.makedirs(tests_dir, exist_ok=True)
+    test_json_path = os.path.join(tests_dir, "test.json")
+
+    try:
+        generate_test_json(entrypoint_path, test_json_path)
+        return test_json_path
+    except Exception as e:
+        logger.warning(f"Could not generate test.json: {e}")
+        return None
+
+
 @cli.command()
 @click.option("--profile", default="default", help="Credential profile name")
 @click.option(
@@ -162,7 +185,6 @@ def zip(path: str, network: str):
 
     Choose based on your workload requirements.""",
 )
-@click.option("--function-invoke-opt")
 @click.option(
     "--sf-cli-org",
     default=None,
@@ -176,13 +198,14 @@ def deploy(
     cpu_size: str,
     profile: str,
     network: str,
-    function_invoke_opt: str,
     sf_cli_org: Optional[str],
 ):
     from datacustomcode.deploy import (
         COMPUTE_TYPES,
         CodeExtensionMetadata,
+        USE_IN_FEATURE_MAPPING_FOR_CONNECT_API,
         deploy_full,
+        infer_use_in_feature,
     )
     from datacustomcode.token_provider import (
         CredentialsTokenProvider,
@@ -211,15 +234,21 @@ def deploy(
     )
 
     if package_type == "function":
-        if not function_invoke_opt:
+        # Infer use_in_feature from function signature
+        entrypoint_path = os.path.join(path, "entrypoint.py")
+        use_in_feature = infer_use_in_feature(entrypoint_path)
+        if use_in_feature:
+            logger.info(f"Inferred use_in_feature: {use_in_feature}")
+        else:
             click.secho(
-                "Error: Function invoke options are required for function package type",
+                "Error: Could not infer function invoke options. Please provide --use-in-feature",
                 fg="red",
             )
             raise click.Abort()
-        else:
-            function_invoke_options = function_invoke_opt.split(",")
-            metadata.functionInvokeOptions = function_invoke_options
+
+        # Map user-provided feature names to API names
+        mapped_feature = USE_IN_FEATURE_MAPPING_FOR_CONNECT_API.get(use_in_feature, use_in_feature)
+        metadata.functionInvokeOptions = [mapped_feature]
 
     try:
         if sf_cli_org:
@@ -238,19 +267,29 @@ def deploy(
 @click.option(
     "--code-type", default="script", type=click.Choice(["script", "function"])
 )
-def init(directory: str, code_type: str):
+@click.option(
+    "--use-in-feature",
+    default="SearchIndexChunking",
+    help="Feature to invoke the function (only applicable for functions). If not provided, will be inferred from function signature.",
+)
+def init(directory: str, code_type: str, use_in_feature: Optional[str]):
     from datacustomcode.scan import (
         dc_config_json_from_file,
         update_config,
         write_sdk_config,
     )
-    from datacustomcode.template import copy_function_template, copy_script_template
+    from datacustomcode.template import (
+        copy_function_template,
+        copy_script_template,
+    )
 
     click.echo("Copying template to " + click.style(directory, fg="blue", bold=True))
     if code_type == "script":
         copy_script_template(directory)
     elif code_type == "function":
-        copy_function_template(directory)
+        # Default to SearchIndexChunking if not provided
+        feature = use_in_feature
+        copy_function_template(directory, feature)
     entrypoint_path = os.path.join(directory, "payload", "entrypoint.py")
     config_location = os.path.join(os.path.dirname(entrypoint_path), "config.json")
 
@@ -265,6 +304,7 @@ def init(directory: str, code_type: str):
     updated_config_json = update_config(entrypoint_path)
     with open(config_location, "w") as f:
         json.dump(updated_config_json, f, indent=2)
+
     click.echo(
         "Start developing by updating the code in "
         + click.style(entrypoint_path, fg="blue", bold=True)
@@ -275,6 +315,23 @@ def init(directory: str, code_type: str):
         + " to automatically update config.json when you make changes to your code"
     )
 
+    # Generate test.json for functions
+    if code_type == "function":
+        test_json_path = _generate_function_test_file(entrypoint_path)
+        if test_json_path:
+            click.echo(
+                "Generated test file at "
+                + click.style(test_json_path, fg="blue", bold=True)
+            )
+            click.echo(
+                "Test your function locally with "
+                + click.style(
+                    f"datacustomcode run {entrypoint_path} --test_with {test_json_path}",
+                    fg="blue",
+                    bold=True,
+                )
+            )
+
 
 @cli.command()
 @click.argument("filename")
@@ -312,6 +369,12 @@ def scan(filename: str, config: str, dry_run: bool, no_requirements: bool):
 @click.option("--config-file", default=None)
 @click.option("--dependencies", default=[], multiple=True)
 @click.option("--profile", default="default")
+@click.option(
+    "--test_with",
+    default=None,
+    type=click.Path(exists=True),
+    help="Path to test JSON file for function testing",
+)
 @click.option(
     "--sf-cli-org",
     default=None,
@@ -322,10 +385,11 @@ def run(
     config_file: Union[str, None],
     dependencies: List[str],
     profile: str,
+    test_with: Optional[str],
     sf_cli_org: Optional[str],
 ):
     from datacustomcode.run import run_entrypoint
 
     run_entrypoint(
-        entrypoint, config_file, dependencies, profile, sf_cli_org=sf_cli_org
+        entrypoint, config_file, dependencies, profile, test_file=test_with, sf_cli_org=sf_cli_org
     )
diff --git a/src/datacustomcode/deploy.py b/src/datacustomcode/deploy.py
index 114252a..4249a59 100644
--- a/src/datacustomcode/deploy.py
+++ b/src/datacustomcode/deploy.py
@@ -65,6 +65,47 @@ def _sanitize_api_name(name: str) -> str:
     return sanitized
 
 
+# Mapping from user-facing feature names to internal API names
+USE_IN_FEATURE_MAPPING_FOR_CONNECT_API = {
+    "SearchIndexChunking": "UnstructuredChunking",
+}
+
+# Mapping from Pydantic request/response types to feature names
+REQUEST_TYPE_TO_FEATURE = {
+    "SearchIndexChunkingV1Request": "SearchIndexChunking",
+    "SearchIndexChunkingV1Response": "SearchIndexChunking",
+}
+
+def infer_use_in_feature(entrypoint_path: str) -> Union[str, None]:
+    """Infer the use_in_feature from function signature.
+
+    Checks both the request parameter type and return type annotation.
+    Both must map to the same feature for a valid inference.
+
+    Args:
+        entrypoint_path: Path to the entrypoint.py file
+
+    Returns:
+        The feature name if both request and response match, None otherwise
+    """
+    from datacustomcode.function_utils import inspect_function_types
+
+    request_type_name, response_type_name = inspect_function_types(entrypoint_path)
+
+    if not request_type_name or not response_type_name:
+        return None
+
+    # Look up features for both types
+    request_feature = REQUEST_TYPE_TO_FEATURE.get(request_type_name)
+    response_feature = REQUEST_TYPE_TO_FEATURE.get(response_type_name)
+
+    # Both must be present and must match
+    if request_feature and response_feature and request_feature == response_feature:
+        return request_feature
+
+    return None
+
+
 class CodeExtensionMetadata(BaseModel):
     name: str
     version: str
diff --git a/src/datacustomcode/function/feature_types/chunking.py b/src/datacustomcode/function/feature_types/chunking.py
index bdf0d91..53b9860 100644
--- a/src/datacustomcode/function/feature_types/chunking.py
+++ b/src/datacustomcode/function/feature_types/chunking.py
@@ -28,7 +28,7 @@
 from pydantic import BaseModel, Field
 
 
-class DocElement(BaseModel):
+class SearchIndexDocElement(BaseModel):
     """Document element to be chunked"""
 
     text: str = Field(..., description="Text content to be chunked")
@@ -37,7 +37,7 @@ class DocElement(BaseModel):
     )
 
 
-class ChunkOutput(BaseModel):
+class SearchIndexChunkOutput(BaseModel):
     """Output chunk from the chunking process"""
 
     chunk_id: str = Field(..., description="UUID for this chunk")
@@ -55,20 +55,17 @@ class ChunkOutput(BaseModel):
     )
 
 
-class StatusResponse(BaseModel):
+class SearchIndexStatusResponse(BaseModel):
     """Status response for operation"""
 
     status_type: str = Field(..., description="'success' or 'error'")
     status_message: str = Field(..., description="Human-readable status")
 
 
-class UdsChunkingV1BatchRequest(BaseModel):
+class SearchIndexChunkingV1Request(BaseModel):
     """Batch request for UDS chunking"""
 
-    version: Literal["v1"] = Field(
-        default="v1", description="API version, must be 'v1'"
-    )
-    input: List[DocElement] = Field(
+    input: List[SearchIndexDocElement] = Field(
         ..., min_length=1, description="List of documents (min 1)"
     )
     max_characters: int = Field(..., description="Max chars per chunk (default: 100)")
@@ -77,13 +74,9 @@ class UdsChunkingV1BatchRequest(BaseModel):
     )
 
 
-class UdsChunkingV1BatchResponse(BaseModel):
+class SearchIndexChunkingV1Response(BaseModel):
     """Batch response for UDS chunking"""
-
-    version: Literal["v1"] = Field(
-        default="v1", description="API version, must be 'v1'"
-    )
-    output: List[ChunkOutput] = Field(
+    output: List[SearchIndexChunkOutput] = Field(
         default_factory=list, description="Flat list of chunks from all docs"
     )
-    status: StatusResponse = Field(..., description="Overall operation status")
+    status: SearchIndexStatusResponse = Field(..., description="Overall operation status")
diff --git a/src/datacustomcode/function_utils.py b/src/datacustomcode/function_utils.py
new file mode 100644
index 0000000..18d623c
--- /dev/null
+++ b/src/datacustomcode/function_utils.py
@@ -0,0 +1,245 @@
+# Copyright (c) 2025, Salesforce, Inc.
+# SPDX-License-Identifier: Apache-2
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+"""Utilities for inspecting and working with function entrypoints."""
+
+import importlib.util
+import inspect
+import json
+import sys
+import typing
+from typing import Any, Optional, Tuple
+
+
+def load_function_module(entrypoint_path: str, module_name: str = "function_module"):
+    """Load a function entrypoint as a Python module.
+
+    Args:
+        entrypoint_path: Path to the entrypoint.py file
+        module_name: Name to assign to the module
+
+    Returns:
+        The loaded module
+
+    Raises:
+        ImportError: If the module cannot be loaded
+    """
+    spec = importlib.util.spec_from_file_location(module_name, entrypoint_path)
+    if spec is None or spec.loader is None:
+        raise ImportError(f"Could not load module from {entrypoint_path}")
+
+    module = importlib.util.module_from_spec(spec)
+    spec.loader.exec_module(module)
+    return module
+
+
+def get_function_callable(module):
+    """Get the 'function' callable from a module.
+
+    Args:
+        module: The module to extract the function from
+
+    Returns:
+        The function callable
+
+    Raises:
+        AttributeError: If module doesn't have a 'function' attribute
+    """
+    if not hasattr(module, "function"):
+        raise AttributeError(f"Module does not have a 'function' callable")
+    return module.function
+
+
+def get_type_name(type_annotation: Any) -> Optional[str]:
+    """Extract the type name from a type annotation.
+
+    Args:
+        type_annotation: A type annotation object
+
+    Returns:
+        The type name as a string, or None if it cannot be determined
+    """
+    if type_annotation == inspect.Parameter.empty:
+        return None
+
+    if hasattr(type_annotation, "__name__"):
+        return type_annotation.__name__
+
+    return str(type_annotation)
+
+
+def get_function_signature_types(
+    function_callable,
+) -> Tuple[Optional[Any], Optional[Any], Optional[str], Optional[str]]:
+    """Extract request and response types from a function signature.
+
+    Args:
+        function_callable: The function to inspect
+
+    Returns:
+        Tuple of (request_type, response_type, request_type_name, response_type_name)
+        Any of these can be None if not found
+    """
+    sig = inspect.signature(function_callable)
+    params = list(sig.parameters.values())
+
+    request_type = None
+    request_type_name = None
+    if len(params) >= 1:
+        request_type = params[0].annotation
+        request_type_name = get_type_name(request_type)
+
+    response_type = sig.return_annotation
+    response_type_name = get_type_name(response_type)
+
+    return request_type, response_type, request_type_name, response_type_name
+
+
+def inspect_function_types(
+    entrypoint_path: str,
+) -> Tuple[Optional[str], Optional[str]]:
+    """Inspect a function entrypoint and extract type names.
+
+    Args:
+        entrypoint_path: Path to the entrypoint.py file
+
+    Returns:
+        Tuple of (request_type_name, response_type_name)
+        Either can be None if not found or on error
+
+    Example:
+        >>> request_name, response_name = inspect_function_types("payload/entrypoint.py")
+        >>> print(request_name)  # "SearchIndexChunkingV1Request"
+        >>> print(response_name)  # "SearchIndexChunkingV1Response"
+    """
+    try:
+        module = load_function_module(entrypoint_path, "temp_module")
+        function_callable = get_function_callable(module)
+        _, _, request_type_name, response_type_name = get_function_signature_types(
+            function_callable
+        )
+        return request_type_name, response_type_name
+    except Exception:
+        return None, None
+
+
+def get_request_type(entrypoint_path: str) -> Optional[Any]:
+    """Get the request type annotation from a function entrypoint.
+
+    Args:
+        entrypoint_path: Path to the entrypoint.py file
+
+    Returns:
+        The request type (Pydantic model class), or None if not found
+
+    Raises:
+        ImportError: If the module cannot be loaded
+        AttributeError: If the function is not found
+        ValueError: If the function signature is invalid
+    """
+    module = load_function_module(entrypoint_path)
+    function_callable = get_function_callable(module)
+
+    sig = inspect.signature(function_callable)
+    params = list(sig.parameters.values())
+
+    if len(params) < 1:
+        raise ValueError("Function must accept at least one parameter (request)")
+
+    request_type = params[0].annotation
+    if request_type == inspect.Parameter.empty:
+        raise ValueError("Function request parameter must have a type annotation")
+
+    return request_type
+
+
+def generate_sample_value(field_type, field_name: str):
+    """Generate a sample value based on field type.
+
+    Args:
+        field_type: The type annotation of the field
+        field_name: The name of the field (used for contextual sample generation)
+
+    Returns:
+        A sample value appropriate for the field type
+    """
+    origin = typing.get_origin(field_type)
+
+    if origin is list or field_type is list:
+        args = typing.get_args(field_type)
+        if args:
+            return [generate_sample_value(args[0], field_name)]
+        return []
+    elif origin is dict or field_type is dict:
+        return {}
+    elif field_type is str or origin is typing.Literal:
+        if "version" in field_name.lower():
+            return "v1"
+        return f"sample_{field_name}"
+    elif field_type is int:
+        if "max" in field_name.lower() or "characters" in field_name.lower():
+            return 100
+        return 1
+    elif field_type is float:
+        return 1.0
+    elif field_type is bool:
+        return True
+    elif hasattr(field_type, "model_fields"):
+        # Nested Pydantic model
+        nested_data = {}
+        for nested_field_name, nested_field_info in field_type.model_fields.items():
+            if nested_field_info.is_required():
+                nested_data[nested_field_name] = generate_sample_value(
+                    nested_field_info.annotation, nested_field_name
+                )
+        return nested_data
+    else:
+        return None
+
+
+def generate_test_json(entrypoint_path: str, output_path: str) -> None:
+    """Generate a sample test.json file for a function.
+
+    Args:
+        entrypoint_path: Path to the function entrypoint.py
+        output_path: Output path for test.json
+
+    Raises:
+        ImportError: If the module cannot be loaded
+        AttributeError: If the function is not found
+        ValueError: If the request type is not a Pydantic model
+    """
+    # Get the request type
+    request_type = get_request_type(entrypoint_path)
+
+    # Check if it's a Pydantic model
+    if not hasattr(request_type, "model_fields"):
+        raise ValueError(f"Request parameter type must be a Pydantic model")
+
+    # Generate sample data
+    sample_data = {}
+    for field_name, field_info in request_type.model_fields.items():
+        if field_info.is_required():
+            sample_data[field_name] = generate_sample_value(
+                field_info.annotation, field_name
+            )
+        elif field_info.default is not None:
+            sample_data[field_name] = field_info.default
+
+    sample_instance = request_type(**sample_data)
+
+    # Write to file
+    with open(output_path, "w") as f:
+        json.dump(sample_instance.model_dump(), f, indent=2)
\ No newline at end of file
diff --git a/src/datacustomcode/run.py b/src/datacustomcode/run.py
index 0e5052a..cd6cc75 100644
--- a/src/datacustomcode/run.py
+++ b/src/datacustomcode/run.py
@@ -70,6 +70,7 @@ def run_entrypoint(
     config_file: Union[str, None],
     dependencies: List[str],
     profile: str,
+    test_file: Optional[str] = None,
     sf_cli_org: Optional[str] = None,
 ) -> None:
     """Run the entrypoint for script or function with the given config and dependencies.
@@ -79,6 +80,7 @@ def run_entrypoint(
         config_file: The config file to use.
         dependencies: The dependencies to import.
         profile: The credentials profile to use.
+        test_file: Optional test JSON file for function testing.
         sf_cli_org: Optional SF CLI org alias or username. If provided, credentials
             are fetched via `sf org display` instead of from credentials.ini.
     """
@@ -138,7 +140,65 @@ def run_entrypoint(
                     raise exc
             except (ModuleNotFoundError, AttributeError) as inner_exc:
                 raise inner_exc from exc
-    runpy.run_path(entrypoint, init_globals=globals(), run_name="__main__")
+
+    # Handle test file for functions
+    if test_file and package_type == "function":
+        run_function_with_test(entrypoint, test_file)
+    else:
+        runpy.run_path(entrypoint, init_globals=globals(), run_name="__main__")
+
+
+def run_function_with_test(entrypoint: str, test_file: str) -> None:
+    """Run a function with test data from a JSON file.
+
+    Dependencies are already loaded by this point, so we just import
+    the entrypoint module and call the function directly.
+
+    Args:
+        entrypoint: Path to the function entrypoint.py
+        test_file: Path to test JSON file containing request data
+    """
+    from datacustomcode.function_utils import (
+        get_function_callable,
+        get_request_type,
+        load_function_module,
+    )
+
+    # Import the entrypoint module in the current environment (with all dependencies loaded)
+    module = load_function_module(entrypoint, "entrypoint_module")
+    function_callable = get_function_callable(module)
+    request_type = get_request_type(entrypoint)
+
+    # Load and parse the test JSON
+    with open(test_file, "r") as f:
+        test_data = json.load(f)
+
+    # Use Pydantic to parse and validate the request
+    try:
+        request = request_type(**test_data)
+    except Exception as e:
+        raise ValueError(f"Failed to parse test data as {request_type.__name__}: {e}") from e
+
+    # Import Runtime
+    from datacustomcode.function import Runtime
+
+    # Call the function with test data
+    print(f"Running function with test data from {test_file}...")
+    result = function_callable(request, Runtime())
+
+    # Pretty print the result
+    print("\n" + "=" * 80)
+    print("RESULT:")
+    print("=" * 80)
+    if hasattr(result, "model_dump"):
+        # Pydantic v2
+        print(json.dumps(result.model_dump(), indent=2))
+    elif hasattr(result, "dict"):
+        # Pydantic v1
+        print(json.dumps(result.dict(), indent=2))
+    else:
+        print(result)
+    print("=" * 80)
 
 
 def add_py_folder(entrypoint: str):
diff --git a/src/datacustomcode/template.py b/src/datacustomcode/template.py
index 195d4a2..424cfb6 100644
--- a/src/datacustomcode/template.py
+++ b/src/datacustomcode/template.py
@@ -37,11 +37,21 @@ def copy_script_template(target_dir: str) -> None:
             shutil.copy2(source, destination)
 
 
-def copy_function_template(target_dir: str) -> None:
+MAPPED_FOLDER = {"SearchIndexChunking": "chunking"}
+
+
+def copy_function_template(target_dir: str, use_in_feature: str) -> None:
     os.makedirs(target_dir, exist_ok=True)
 
-    for item in os.listdir(function_template_dir):
-        source = os.path.join(function_template_dir, item)
+    if use_in_feature and use_in_feature in MAPPED_FOLDER:
+        feature_function_template_dir = os.path.join(
+            function_template_dir, MAPPED_FOLDER[use_in_feature]
+        )
+    else:
+        feature_function_template_dir = function_template_dir
+
+    for item in os.listdir(feature_function_template_dir):
+        source = os.path.join(feature_function_template_dir, item)
         destination = os.path.join(target_dir, item)
 
         if os.path.isdir(source):
@@ -50,3 +60,17 @@ def copy_function_template(target_dir: str) -> None:
         else:
             logger.debug(f"Copying file {source} to {destination}...")
             shutil.copy2(source, destination)
+
+
+# Re-export generate_test_json from function_utils for backwards compatibility
+def generate_test_json(entrypoint_path: str, output_path: str) -> None:
+    """Generate a sample test.json file for a function.
+
+    Args:
+        entrypoint_path: Path to the function entrypoint.py
+        output_path: Output path for test.json
+    """
+    from datacustomcode.function_utils import generate_test_json as _generate_test_json
+
+    _generate_test_json(entrypoint_path, output_path)
+    logger.debug(f"Generated test JSON at {output_path}")
diff --git a/src/datacustomcode/templates/__init__.py b/src/datacustomcode/templates/__init__.py
new file mode 100644
index 0000000..e69de29
diff --git a/src/datacustomcode/templates/function/chunking/payload/config.json b/src/datacustomcode/templates/function/chunking/payload/config.json
new file mode 100644
index 0000000..9e26dfe
--- /dev/null
+++ b/src/datacustomcode/templates/function/chunking/payload/config.json
@@ -0,0 +1 @@
+{}
\ No newline at end of file
diff --git a/src/datacustomcode/templates/function/chunking/payload/entrypoint.py b/src/datacustomcode/templates/function/chunking/payload/entrypoint.py
new file mode 100644
index 0000000..d6be950
--- /dev/null
+++ b/src/datacustomcode/templates/function/chunking/payload/entrypoint.py
@@ -0,0 +1,74 @@
+import logging
+
+from datacustomcode.function import Runtime
+
+logger = logging.getLogger(__name__)
+logging.basicConfig(level=logging.INFO)
+
+
+from datacustomcode.function.feature_types.chunking import (
+    SearchIndexChunkingV1Request,
+    SearchIndexChunkingV1Response,
+    SearchIndexChunkOutput,
+    SearchIndexStatusResponse
+)
+
+
+def function(request: SearchIndexChunkingV1Request, runtime: Runtime) -> SearchIndexChunkingV1Response:
+    print(f"Received {len(request.input)} documents to chunk")
+    print(f"Max characters per chunk: {request.max_characters}")
+
+    chunks = []
+    chunk_id = 1
+
+    # Process each document
+    for doc_idx, doc in enumerate(request.input):
+        # Access fields - works identically in both Pydantic and betterproto!
+        text = doc.text
+        metadata = doc.metadata if hasattr(doc.metadata, '__iter__') else {}
+
+        print(f"📄 Processing document {doc_idx + 1}: {len(text)} characters")
+
+        # Chunk the text
+        max_chars = request.max_characters
+        chunk_start = 0
+
+        while chunk_start < len(text):
+            chunk_end = min(chunk_start + max_chars, len(text))
+            chunk_text = text[chunk_start:chunk_end]
+
+            # Try to break at word boundary if not at end
+            if chunk_end < len(text) and not text[chunk_end].isspace():
+                # Look for last space in chunk
+                last_space = chunk_text.rfind(' ')
+                if last_space > max_chars * 0.8:  # Only if space is in last 20%
+                    chunk_end = chunk_start + last_space
+                    chunk_text = text[chunk_start:chunk_end]
+
+
+            # Create ChunkOutput object
+            chunk_output = SearchIndexChunkOutput(
+                chunk_id=f"chunk_{chunk_id:04d}",
+                chunk_type="text",
+                text=chunk_text.strip(),
+                seq_no=chunk_id,
+                metadata={k: str(v) for k, v in (dict(metadata) if metadata else {}).items()},
+                tag_metadata={},
+                citations={}
+            )
+            chunks.append(chunk_output)
+
+            print(f"  ✂️  Chunk {chunk_id}: {len(chunk_text)} chars")
+            chunk_id += 1
+            chunk_start = chunk_end
+
+    print(f"✅ Generated {len(chunks)} chunks total")
+
+    # Return UdsChunkingV1BatchResponse object
+    return SearchIndexChunkingV1Response(
+        output=chunks,
+        status=SearchIndexStatusResponse(
+            status_type="success",
+            status_message=f"Successfully chunked {len(request.input)} documents into {len(chunks)} chunks"
+        )
+    )

From 804d08456278593aec31dd502f22b8949cf72c74 Mon Sep 17 00:00:00 2001
From: Rita Agarwala <rita.agarwala@salesforce.com>
Date: Wed, 29 Apr 2026 14:24:45 +0530
Subject: [PATCH 02/19] Improving external SDK for function

---
 src/datacustomcode/cli.py      |  3 ++-
 src/datacustomcode/run.py      |  4 ----
 src/datacustomcode/template.py | 12 ------------
 3 files changed, 2 insertions(+), 17 deletions(-)

diff --git a/src/datacustomcode/cli.py b/src/datacustomcode/cli.py
index 33a064c..94bc95a 100644
--- a/src/datacustomcode/cli.py
+++ b/src/datacustomcode/cli.py
@@ -83,7 +83,7 @@ def _generate_function_test_file(entrypoint_path: str) -> Optional[str]:
     Returns:
         Path to generated test.json, or None if generation failed
     """
-    from datacustomcode.template import generate_test_json
+    from datacustomcode.function_utils import generate_test_json
 
     tests_dir = os.path.join(os.path.dirname(entrypoint_path), "tests")
     os.makedirs(tests_dir, exist_ok=True)
@@ -91,6 +91,7 @@ def _generate_function_test_file(entrypoint_path: str) -> Optional[str]:
 
     try:
         generate_test_json(entrypoint_path, test_json_path)
+        logger.debug(f"Generated test JSON at {test_json_path}")
         return test_json_path
     except Exception as e:
         logger.warning(f"Could not generate test.json: {e}")
diff --git a/src/datacustomcode/run.py b/src/datacustomcode/run.py
index cd6cc75..605bc00 100644
--- a/src/datacustomcode/run.py
+++ b/src/datacustomcode/run.py
@@ -191,11 +191,7 @@ def run_function_with_test(entrypoint: str, test_file: str) -> None:
     print("RESULT:")
     print("=" * 80)
     if hasattr(result, "model_dump"):
-        # Pydantic v2
         print(json.dumps(result.model_dump(), indent=2))
-    elif hasattr(result, "dict"):
-        # Pydantic v1
-        print(json.dumps(result.dict(), indent=2))
     else:
         print(result)
     print("=" * 80)
diff --git a/src/datacustomcode/template.py b/src/datacustomcode/template.py
index 424cfb6..15c1e11 100644
--- a/src/datacustomcode/template.py
+++ b/src/datacustomcode/template.py
@@ -62,15 +62,3 @@ def copy_function_template(target_dir: str, use_in_feature: str) -> None:
             shutil.copy2(source, destination)
 
 
-# Re-export generate_test_json from function_utils for backwards compatibility
-def generate_test_json(entrypoint_path: str, output_path: str) -> None:
-    """Generate a sample test.json file for a function.
-
-    Args:
-        entrypoint_path: Path to the function entrypoint.py
-        output_path: Output path for test.json
-    """
-    from datacustomcode.function_utils import generate_test_json as _generate_test_json
-
-    _generate_test_json(entrypoint_path, output_path)
-    logger.debug(f"Generated test JSON at {output_path}")

From 7be51c3e7d137ff75f7934e396efd5fa604f6ca7 Mon Sep 17 00:00:00 2001
From: Rita Agarwala <rita.agarwala@salesforce.com>
Date: Wed, 29 Apr 2026 14:39:08 +0530
Subject: [PATCH 03/19] Improving external SDK for function

---
 src/datacustomcode/function_utils.py | 46 +++++++++++++++++-----------
 1 file changed, 28 insertions(+), 18 deletions(-)

diff --git a/src/datacustomcode/function_utils.py b/src/datacustomcode/function_utils.py
index 18d623c..fe0727e 100644
--- a/src/datacustomcode/function_utils.py
+++ b/src/datacustomcode/function_utils.py
@@ -165,6 +165,30 @@ def get_request_type(entrypoint_path: str) -> Optional[Any]:
     return request_type
 
 
+def _generate_model_sample_data(model_type):
+    """Generate sample data for all fields in a Pydantic model.
+
+    Args:
+        model_type: A Pydantic model class
+
+    Returns:
+        Dictionary with sample data for all fields
+    """
+    from pydantic_core import PydanticUndefined
+
+    sample_data = {}
+    for field_name, field_info in model_type.model_fields.items():
+        # Check if field has a real default value
+        if field_info.default is not PydanticUndefined:
+            sample_data[field_name] = field_info.default
+        else:
+            # Required field or field without default - generate sample
+            sample_data[field_name] = generate_sample_value(
+                field_info.annotation, field_name
+            )
+    return sample_data
+
+
 def generate_sample_value(field_type, field_name: str):
     """Generate a sample value based on field type.
 
@@ -197,14 +221,8 @@ def generate_sample_value(field_type, field_name: str):
     elif field_type is bool:
         return True
     elif hasattr(field_type, "model_fields"):
-        # Nested Pydantic model
-        nested_data = {}
-        for nested_field_name, nested_field_info in field_type.model_fields.items():
-            if nested_field_info.is_required():
-                nested_data[nested_field_name] = generate_sample_value(
-                    nested_field_info.annotation, nested_field_name
-                )
-        return nested_data
+        # Nested Pydantic model - use shared helper
+        return _generate_model_sample_data(field_type)
     else:
         return None
 
@@ -228,16 +246,8 @@ def generate_test_json(entrypoint_path: str, output_path: str) -> None:
     if not hasattr(request_type, "model_fields"):
         raise ValueError(f"Request parameter type must be a Pydantic model")
 
-    # Generate sample data
-    sample_data = {}
-    for field_name, field_info in request_type.model_fields.items():
-        if field_info.is_required():
-            sample_data[field_name] = generate_sample_value(
-                field_info.annotation, field_name
-            )
-        elif field_info.default is not None:
-            sample_data[field_name] = field_info.default
-
+    # Generate sample data for ALL fields (use defaults where available)
+    sample_data = _generate_model_sample_data(request_type)
     sample_instance = request_type(**sample_data)
 
     # Write to file

From 89ab4bc9b1337253512c1aedc9691835296398bd Mon Sep 17 00:00:00 2001
From: Rita Agarwala <rita.agarwala@salesforce.com>
Date: Wed, 29 Apr 2026 17:12:38 +0530
Subject: [PATCH 04/19] Improving external SDK for function

---
 src/datacustomcode/cli.py                     |  23 ++--
 src/datacustomcode/constants.py               |  45 +++++++
 src/datacustomcode/deploy.py                  |  21 ++--
 src/datacustomcode/function_utils.py          | 110 +++++++++++++++++-
 src/datacustomcode/template.py                |  38 ++++--
 .../function/chunking/payload/entrypoint.py   |  61 +++++-----
 6 files changed, 227 insertions(+), 71 deletions(-)
 create mode 100644 src/datacustomcode/constants.py

diff --git a/src/datacustomcode/cli.py b/src/datacustomcode/cli.py
index 94bc95a..78eaa74 100644
--- a/src/datacustomcode/cli.py
+++ b/src/datacustomcode/cli.py
@@ -27,6 +27,13 @@
 
 from datacustomcode import AuthType
 from datacustomcode.auth import configure_oauth_tokens
+from datacustomcode.constants import (
+    CONFIG_FILE,
+    ENTRYPOINT_FILE,
+    PAYLOAD_DIR,
+    TEST_FILE,
+    TESTS_DIR,
+)
 from datacustomcode.scan import find_base_directory, get_package_type
 
 
@@ -85,9 +92,9 @@ def _generate_function_test_file(entrypoint_path: str) -> Optional[str]:
     """
     from datacustomcode.function_utils import generate_test_json
 
-    tests_dir = os.path.join(os.path.dirname(entrypoint_path), "tests")
+    tests_dir = os.path.join(os.path.dirname(entrypoint_path), TESTS_DIR)
     os.makedirs(tests_dir, exist_ok=True)
-    test_json_path = os.path.join(tests_dir, "test.json")
+    test_json_path = os.path.join(tests_dir, TEST_FILE)
 
     try:
         generate_test_json(entrypoint_path, test_json_path)
@@ -236,7 +243,7 @@ def deploy(
 
     if package_type == "function":
         # Infer use_in_feature from function signature
-        entrypoint_path = os.path.join(path, "entrypoint.py")
+        entrypoint_path = os.path.join(path, ENTRYPOINT_FILE)
         use_in_feature = infer_use_in_feature(entrypoint_path)
         if use_in_feature:
             logger.info(f"Inferred use_in_feature: {use_in_feature}")
@@ -288,11 +295,9 @@ def init(directory: str, code_type: str, use_in_feature: Optional[str]):
     if code_type == "script":
         copy_script_template(directory)
     elif code_type == "function":
-        # Default to SearchIndexChunking if not provided
-        feature = use_in_feature
-        copy_function_template(directory, feature)
-    entrypoint_path = os.path.join(directory, "payload", "entrypoint.py")
-    config_location = os.path.join(os.path.dirname(entrypoint_path), "config.json")
+        copy_function_template(directory, use_in_feature)
+    entrypoint_path = os.path.join(directory, PAYLOAD_DIR, ENTRYPOINT_FILE)
+    config_location = os.path.join(os.path.dirname(entrypoint_path), CONFIG_FILE)
 
     # Write package type to SDK-specific config
     sdk_config = {"type": code_type}
@@ -344,7 +349,7 @@ def init(directory: str, code_type: str, use_in_feature: Optional[str]):
 def scan(filename: str, config: str, dry_run: bool, no_requirements: bool):
     from datacustomcode.scan import update_config, write_requirements_file
 
-    config_location = config or os.path.join(os.path.dirname(filename), "config.json")
+    config_location = config or os.path.join(os.path.dirname(filename), CONFIG_FILE)
     click.echo(
         "Dumping scan results to config file: "
         + click.style(config_location, fg="blue", bold=True)
diff --git a/src/datacustomcode/constants.py b/src/datacustomcode/constants.py
new file mode 100644
index 0000000..e0f3b2c
--- /dev/null
+++ b/src/datacustomcode/constants.py
@@ -0,0 +1,45 @@
+# Copyright (c) 2025, Salesforce, Inc.
+# SPDX-License-Identifier: Apache-2
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+"""Constants used throughout the datacustomcode package."""
+
+# File and directory names
+ENTRYPOINT_FILE = "entrypoint.py"
+CONFIG_FILE = "config.json"
+PAYLOAD_DIR = "payload"
+TESTS_DIR = "tests"
+TEST_FILE = "test.json"
+REQUIREMENTS_FILE = "requirements.txt"
+
+# Default values
+DEFAULT_PROFILE = "default"
+DEFAULT_NETWORK = "default"
+DEFAULT_CPU_SIZE = "CPU_2XL"
+
+# Feature to template folder mapping
+FEATURE_TEMPLATE_MAPPING = {
+    "SearchIndexChunking": "chunking",
+}
+
+# Feature name to Connect API name mapping
+USE_IN_FEATURE_MAPPING_FOR_CONNECT_API = {
+    "SearchIndexChunking": "UnstructuredChunking",
+}
+
+# Pydantic request/response type names to feature names
+REQUEST_TYPE_TO_FEATURE = {
+    "SearchIndexChunkingV1Request": "SearchIndexChunking",
+    "SearchIndexChunkingV1Response": "SearchIndexChunking",
+}
\ No newline at end of file
diff --git a/src/datacustomcode/deploy.py b/src/datacustomcode/deploy.py
index 4249a59..db26e3c 100644
--- a/src/datacustomcode/deploy.py
+++ b/src/datacustomcode/deploy.py
@@ -35,6 +35,10 @@
 import requests
 
 from datacustomcode.cmd import cmd_output
+from datacustomcode.constants import (
+    REQUEST_TYPE_TO_FEATURE,
+    USE_IN_FEATURE_MAPPING_FOR_CONNECT_API,
+)
 from datacustomcode.scan import find_base_directory, get_package_type
 
 DATA_CUSTOM_CODE_PATH = "services/data/v63.0/ssot/data-custom-code"
@@ -65,32 +69,23 @@ def _sanitize_api_name(name: str) -> str:
     return sanitized
 
 
-# Mapping from user-facing feature names to internal API names
-USE_IN_FEATURE_MAPPING_FOR_CONNECT_API = {
-    "SearchIndexChunking": "UnstructuredChunking",
-}
-
-# Mapping from Pydantic request/response types to feature names
-REQUEST_TYPE_TO_FEATURE = {
-    "SearchIndexChunkingV1Request": "SearchIndexChunking",
-    "SearchIndexChunkingV1Response": "SearchIndexChunking",
-}
-
 def infer_use_in_feature(entrypoint_path: str) -> Union[str, None]:
     """Infer the use_in_feature from function signature.
 
     Checks both the request parameter type and return type annotation.
     Both must map to the same feature for a valid inference.
 
+    Uses static AST parsing to avoid importing dependencies.
+
     Args:
         entrypoint_path: Path to the entrypoint.py file
 
     Returns:
         The feature name if both request and response match, None otherwise
     """
-    from datacustomcode.function_utils import inspect_function_types
+    from datacustomcode.function_utils import inspect_function_types_static
 
-    request_type_name, response_type_name = inspect_function_types(entrypoint_path)
+    request_type_name, response_type_name = inspect_function_types_static(entrypoint_path)
 
     if not request_type_name or not response_type_name:
         return None
diff --git a/src/datacustomcode/function_utils.py b/src/datacustomcode/function_utils.py
index fe0727e..1dd57a4 100644
--- a/src/datacustomcode/function_utils.py
+++ b/src/datacustomcode/function_utils.py
@@ -15,6 +15,7 @@
 
 """Utilities for inspecting and working with function entrypoints."""
 
+import ast
 import importlib.util
 import inspect
 import json
@@ -107,6 +108,93 @@ def get_function_signature_types(
     return request_type, response_type, request_type_name, response_type_name
 
 
+def inspect_function_types_static(entrypoint_path: str) -> Tuple[Optional[str], Optional[str]]:
+    """Inspect function types using static AST parsing (no imports).
+
+    This parses the Python file without executing it, so it doesn't
+    require dependencies to be installed.
+
+    Args:
+        entrypoint_path: Path to the entrypoint.py file
+
+    Returns:
+        Tuple of (request_type_name, response_type_name)
+    """
+    try:
+        with open(entrypoint_path, 'r') as f:
+            tree = ast.parse(f.read(), filename=entrypoint_path)
+
+        # Find the 'function' definition
+        for node in ast.walk(tree):
+            if isinstance(node, ast.FunctionDef) and node.name == "function":
+                # Get request type (first parameter annotation)
+                request_type_name = None
+                if node.args.args and len(node.args.args) > 0:
+                    first_param = node.args.args[0]
+                    if first_param.annotation:
+                        request_type_name = _get_type_name_from_ast(first_param.annotation)
+
+                # Get response type (return annotation)
+                response_type_name = None
+                if node.returns:
+                    response_type_name = _get_type_name_from_ast(node.returns)
+
+                return request_type_name, response_type_name
+
+        return None, None
+    except Exception:
+        return None, None
+
+
+def _get_type_name_from_ast(annotation) -> Optional[str]:
+    """Extract type name from an AST annotation node."""
+    if isinstance(annotation, ast.Name):
+        # Simple type: MyType
+        return annotation.id
+    elif isinstance(annotation, ast.Attribute):
+        # Module.Type - just return the type name
+        return annotation.attr
+    elif isinstance(annotation, ast.Subscript):
+        # Generic type: List[MyType], Optional[MyType]
+        # Return the base type name
+        return _get_type_name_from_ast(annotation.value)
+    return None
+
+
+def _import_pydantic_model(entrypoint_path: str, type_name: str) -> Optional[Any]:
+    """Import a Pydantic model by finding its import statement.
+
+    Parses the entrypoint to find where the type is imported from,
+    then imports just that module (not the entrypoint itself).
+
+    Args:
+        entrypoint_path: Path to entrypoint.py
+        type_name: Name of the type to import (e.g., "SearchIndexChunkingV1Request")
+
+    Returns:
+        The Pydantic model class, or None if not found
+    """
+    try:
+        with open(entrypoint_path, 'r') as f:
+            tree = ast.parse(f.read(), filename=entrypoint_path)
+
+        # Find where this type is imported from
+        for node in ast.walk(tree):
+            if isinstance(node, ast.ImportFrom):
+                # from module import Type1, Type2
+                for alias in node.names:
+                    if alias.name == type_name:
+                        # Found it! Import from the module
+                        module_name = node.module
+                        if module_name:
+                            module = importlib.import_module(module_name)
+                            return getattr(module, type_name, None)
+
+        return None
+    except Exception:
+        return None
+
+
 def inspect_function_types(
     entrypoint_path: str,
 ) -> Tuple[Optional[str], Optional[str]]:
@@ -230,17 +318,29 @@ def generate_sample_value(field_type, field_name: str):
 def generate_test_json(entrypoint_path: str, output_path: str) -> None:
     """Generate a sample test.json file for a function.
 
+    First tries static AST parsing to get type names, then uses those
+    to import only the Pydantic model classes (not the entrypoint).
+
     Args:
         entrypoint_path: Path to the function entrypoint.py
         output_path: Output path for test.json
 
     Raises:
-        ImportError: If the module cannot be loaded
-        AttributeError: If the function is not found
-        ValueError: If the request type is not a Pydantic model
+        ImportError: If the Pydantic model cannot be loaded
+        ValueError: If the request type is not found or not a Pydantic model
     """
-    # Get the request type
-    request_type = get_request_type(entrypoint_path)
+    # First, get the type name using static parsing (no imports)
+    request_type_name, _ = inspect_function_types_static(entrypoint_path)
+
+    if not request_type_name:
+        raise ValueError("Could not determine request type from function signature")
+
+    # Now try to import the Pydantic model class
+    # Look for it in the entrypoint's imports
+    request_type = _import_pydantic_model(entrypoint_path, request_type_name)
+
+    if not request_type:
+        raise ValueError(f"Could not import Pydantic model: {request_type_name}")
 
     # Check if it's a Pydantic model
     if not hasattr(request_type, "model_fields"):
diff --git a/src/datacustomcode/template.py b/src/datacustomcode/template.py
index 15c1e11..6f52624 100644
--- a/src/datacustomcode/template.py
+++ b/src/datacustomcode/template.py
@@ -17,6 +17,8 @@
 
 from loguru import logger
 
+from datacustomcode.constants import FEATURE_TEMPLATE_MAPPING
+
 script_template_dir = os.path.join(os.path.dirname(__file__), "templates", "script")
 function_template_dir = os.path.join(os.path.dirname(__file__), "templates", "function")
 
@@ -37,23 +39,18 @@ def copy_script_template(target_dir: str) -> None:
             shutil.copy2(source, destination)
 
 
-MAPPED_FOLDER = {"SearchIndexChunking": "chunking"}
-
-
 def copy_function_template(target_dir: str, use_in_feature: str) -> None:
     os.makedirs(target_dir, exist_ok=True)
 
-    if use_in_feature and use_in_feature in MAPPED_FOLDER:
-        feature_function_template_dir = os.path.join(
-            function_template_dir, MAPPED_FOLDER[use_in_feature]
-        )
-    else:
-        feature_function_template_dir = function_template_dir
-
-    for item in os.listdir(feature_function_template_dir):
-        source = os.path.join(feature_function_template_dir, item)
+    # First, copy common files from base function template
+    for item in os.listdir(function_template_dir):
+        source = os.path.join(function_template_dir, item)
         destination = os.path.join(target_dir, item)
 
+        # Skip feature-specific subdirectories
+        if os.path.isdir(source) and item in FEATURE_TEMPLATE_MAPPING.values():
+            continue
+
         if os.path.isdir(source):
             logger.debug(f"Copying directory {source} to {destination}...")
             shutil.copytree(source, destination, dirs_exist_ok=True)
@@ -61,4 +58,21 @@ def copy_function_template(target_dir: str, use_in_feature: str) -> None:
             logger.debug(f"Copying file {source} to {destination}...")
             shutil.copy2(source, destination)
 
+    # Then, copy feature-specific files (overwriting if needed)
+    if use_in_feature and use_in_feature in FEATURE_TEMPLATE_MAPPING:
+        feature_function_template_dir = os.path.join(
+            function_template_dir, FEATURE_TEMPLATE_MAPPING[use_in_feature]
+        )
+
+        for item in os.listdir(feature_function_template_dir):
+            source = os.path.join(feature_function_template_dir, item)
+            destination = os.path.join(target_dir, item)
+
+            if os.path.isdir(source):
+                logger.debug(f"Copying feature-specific directory {source} to {destination}...")
+                shutil.copytree(source, destination, dirs_exist_ok=True)
+            else:
+                logger.debug(f"Copying feature-specific file {source} to {destination}...")
+                shutil.copy2(source, destination)
+
 
diff --git a/src/datacustomcode/templates/function/chunking/payload/entrypoint.py b/src/datacustomcode/templates/function/chunking/payload/entrypoint.py
index d6be950..4796ef2 100644
--- a/src/datacustomcode/templates/function/chunking/payload/entrypoint.py
+++ b/src/datacustomcode/templates/function/chunking/payload/entrypoint.py
@@ -1,74 +1,71 @@
 import logging
 
-from datacustomcode.function import Runtime
-
-logger = logging.getLogger(__name__)
-logging.basicConfig(level=logging.INFO)
-
+from langchain_text_splitters import RecursiveCharacterTextSplitter
 
+from datacustomcode.function import Runtime
 from datacustomcode.function.feature_types.chunking import (
     SearchIndexChunkingV1Request,
     SearchIndexChunkingV1Response,
     SearchIndexChunkOutput,
-    SearchIndexStatusResponse
+    SearchIndexStatusResponse,
 )
 
+logger = logging.getLogger(__name__)
+logging.basicConfig(level=logging.INFO)
+
 
-def function(request: SearchIndexChunkingV1Request, runtime: Runtime) -> SearchIndexChunkingV1Response:
+def function(
+    request: SearchIndexChunkingV1Request, runtime: Runtime
+) -> SearchIndexChunkingV1Response:
     print(f"Received {len(request.input)} documents to chunk")
     print(f"Max characters per chunk: {request.max_characters}")
 
+    # Initialize RecursiveCharacterTextSplitter
+    # It tries to split on: "\n\n", "\n", " ", "" (in that order)
+    text_splitter = RecursiveCharacterTextSplitter(
+        chunk_size=request.max_characters,
+        chunk_overlap=20,  # Small overlap to maintain context
+        length_function=len,
+        separators=["\n\n", "\n", " ", ""],
+    )
+
     chunks = []
     chunk_id = 1
 
     # Process each document
     for doc_idx, doc in enumerate(request.input):
-        # Access fields - works identically in both Pydantic and betterproto!
         text = doc.text
-        metadata = doc.metadata if hasattr(doc.metadata, '__iter__') else {}
+        metadata = doc.metadata if hasattr(doc.metadata, "__iter__") else {}
 
         print(f"📄 Processing document {doc_idx + 1}: {len(text)} characters")
 
-        # Chunk the text
-        max_chars = request.max_characters
-        chunk_start = 0
-
-        while chunk_start < len(text):
-            chunk_end = min(chunk_start + max_chars, len(text))
-            chunk_text = text[chunk_start:chunk_end]
-
-            # Try to break at word boundary if not at end
-            if chunk_end < len(text) and not text[chunk_end].isspace():
-                # Look for last space in chunk
-                last_space = chunk_text.rfind(' ')
-                if last_space > max_chars * 0.8:  # Only if space is in last 20%
-                    chunk_end = chunk_start + last_space
-                    chunk_text = text[chunk_start:chunk_end]
-
+        # Split the text using RecursiveCharacterTextSplitter
+        text_chunks = text_splitter.split_text(text)
 
-            # Create ChunkOutput object
+        # Create chunk outputs
+        for chunk_text in text_chunks:
             chunk_output = SearchIndexChunkOutput(
                 chunk_id=f"chunk_{chunk_id:04d}",
                 chunk_type="text",
                 text=chunk_text.strip(),
                 seq_no=chunk_id,
-                metadata={k: str(v) for k, v in (dict(metadata) if metadata else {}).items()},
+                metadata={
+                    k: str(v) for k, v in (dict(metadata) if metadata else {}).items()
+                },
                 tag_metadata={},
-                citations={}
+                citations={},
             )
             chunks.append(chunk_output)
 
             print(f"  ✂️  Chunk {chunk_id}: {len(chunk_text)} chars")
             chunk_id += 1
-            chunk_start = chunk_end
 
     print(f"✅ Generated {len(chunks)} chunks total")
 
-    # Return UdsChunkingV1BatchResponse object
     return SearchIndexChunkingV1Response(
         output=chunks,
         status=SearchIndexStatusResponse(
             status_type="success",
-            status_message=f"Successfully chunked {len(request.input)} documents into {len(chunks)} chunks"
-        )
+            status_message=f"Successfully chunked {len(request.input)} documents into {len(chunks)} chunks",
+        ),
     )

From 5cf33a90d64f55302b4ade252fbf87d1074511a1 Mon Sep 17 00:00:00 2001
From: Rita Agarwala <rita.agarwala@salesforce.com>
Date: Wed, 29 Apr 2026 17:48:14 +0530
Subject: [PATCH 05/19] Updating sf_cli_integration.yml

---
 .github/workflows/sf_cli_integration.yml                    | 6 +++++-
 .../templates/function/chunking/requirements.txt            | 2 ++
 2 files changed, 7 insertions(+), 1 deletion(-)
 create mode 100644 src/datacustomcode/templates/function/chunking/requirements.txt

diff --git a/.github/workflows/sf_cli_integration.yml b/.github/workflows/sf_cli_integration.yml
index b9ff172..6a046f5 100644
--- a/.github/workflows/sf_cli_integration.yml
+++ b/.github/workflows/sf_cli_integration.yml
@@ -200,6 +200,10 @@ jobs:
           echo "::error::testFunction/.datacustomcode_proj/sdk_config.json not found after function init."
           exit 1
         }
+        test -f testFunction/payload/tests/test.json || {
+          echo "::error::testFunction/payload/tests/test.json not found after function init."
+          exit 1
+        }
 
       # ── Function: scan ────────────────────────────────────────────────────────
 
@@ -251,7 +255,7 @@ jobs:
 
       # ── Function: run ─────────────────────────────────────────────────────────
 
-    - name: '[function] run — sf data-code-extension function run --entrypoint testFunction/payload/entrypoint.py -o dev1'
+    - name: '[function] run — sf data-code-extension function run --entrypoint testFunction/payload/entrypoint.py  --test_with testFunction/payload/tests/test.json -o dev1'
       run: |
         sf data-code-extension function run \
           --entrypoint testFunction/payload/entrypoint.py \
diff --git a/src/datacustomcode/templates/function/chunking/requirements.txt b/src/datacustomcode/templates/function/chunking/requirements.txt
new file mode 100644
index 0000000..f872675
--- /dev/null
+++ b/src/datacustomcode/templates/function/chunking/requirements.txt
@@ -0,0 +1,2 @@
+# Packages required for the chunking function
+langchain-text-splitters>=0.3.0
\ No newline at end of file

From 874f821332d981d2ad127f32a0cd47e824be0eef Mon Sep 17 00:00:00 2001
From: Rita Agarwala <rita.agarwala@salesforce.com>
Date: Wed, 29 Apr 2026 17:50:14 +0530
Subject: [PATCH 06/19] Updating sf_cli_integration.yml

---
 .github/workflows/sf_cli_integration.yml | 1 +
 1 file changed, 1 insertion(+)

diff --git a/.github/workflows/sf_cli_integration.yml b/.github/workflows/sf_cli_integration.yml
index 6a046f5..0d9c17a 100644
--- a/.github/workflows/sf_cli_integration.yml
+++ b/.github/workflows/sf_cli_integration.yml
@@ -259,6 +259,7 @@ jobs:
       run: |
         sf data-code-extension function run \
           --entrypoint testFunction/payload/entrypoint.py \
+          --test_with testFunction/payload/tests/test.json \
           -o dev1 || {
           echo "::error::sf data-code-extension function run FAILED. Check mock server output above; the --entrypoint flag or SF CLI org auth contract may have changed."
           exit 1

From f9b0deb9f4f76d9b9c600bbd33cae91293bc139d Mon Sep 17 00:00:00 2001
From: Rita Agarwala <rita.agarwala@salesforce.com>
Date: Wed, 29 Apr 2026 17:53:30 +0530
Subject: [PATCH 07/19] Make lint

---
 src/datacustomcode/cli.py                     | 18 +++++++------
 src/datacustomcode/constants.py               |  2 +-
 src/datacustomcode/deploy.py                  |  9 +++----
 .../function/feature_types/chunking.py        |  6 +++--
 src/datacustomcode/function_utils.py          | 25 ++++++++++++-------
 src/datacustomcode/run.py                     |  4 ++-
 src/datacustomcode/template.py                | 10 +++++---
 .../function/chunking/requirements.txt        |  2 +-
 8 files changed, 46 insertions(+), 30 deletions(-)

diff --git a/src/datacustomcode/cli.py b/src/datacustomcode/cli.py
index 78eaa74..381deff 100644
--- a/src/datacustomcode/cli.py
+++ b/src/datacustomcode/cli.py
@@ -210,8 +210,8 @@ def deploy(
 ):
     from datacustomcode.deploy import (
         COMPUTE_TYPES,
-        CodeExtensionMetadata,
         USE_IN_FEATURE_MAPPING_FOR_CONNECT_API,
+        CodeExtensionMetadata,
         deploy_full,
         infer_use_in_feature,
     )
@@ -255,7 +255,9 @@ def deploy(
             raise click.Abort()
 
         # Map user-provided feature names to API names
-        mapped_feature = USE_IN_FEATURE_MAPPING_FOR_CONNECT_API.get(use_in_feature, use_in_feature)
+        mapped_feature = USE_IN_FEATURE_MAPPING_FOR_CONNECT_API.get(
+            use_in_feature, use_in_feature
+        )
         metadata.functionInvokeOptions = [mapped_feature]
 
     try:
@@ -286,10 +288,7 @@ def init(directory: str, code_type: str, use_in_feature: Optional[str]):
         update_config,
         write_sdk_config,
     )
-    from datacustomcode.template import (
-        copy_function_template,
-        copy_script_template,
-    )
+    from datacustomcode.template import copy_function_template, copy_script_template
 
     click.echo("Copying template to " + click.style(directory, fg="blue", bold=True))
     if code_type == "script":
@@ -397,5 +396,10 @@ def run(
     from datacustomcode.run import run_entrypoint
 
     run_entrypoint(
-        entrypoint, config_file, dependencies, profile, test_file=test_with, sf_cli_org=sf_cli_org
+        entrypoint,
+        config_file,
+        dependencies,
+        profile,
+        test_file=test_with,
+        sf_cli_org=sf_cli_org,
     )
diff --git a/src/datacustomcode/constants.py b/src/datacustomcode/constants.py
index e0f3b2c..76b6a7c 100644
--- a/src/datacustomcode/constants.py
+++ b/src/datacustomcode/constants.py
@@ -42,4 +42,4 @@
 REQUEST_TYPE_TO_FEATURE = {
     "SearchIndexChunkingV1Request": "SearchIndexChunking",
     "SearchIndexChunkingV1Response": "SearchIndexChunking",
-}
\ No newline at end of file
+}
diff --git a/src/datacustomcode/deploy.py b/src/datacustomcode/deploy.py
index db26e3c..65495e6 100644
--- a/src/datacustomcode/deploy.py
+++ b/src/datacustomcode/deploy.py
@@ -35,10 +35,7 @@
 import requests
 
 from datacustomcode.cmd import cmd_output
-from datacustomcode.constants import (
-    REQUEST_TYPE_TO_FEATURE,
-    USE_IN_FEATURE_MAPPING_FOR_CONNECT_API,
-)
+from datacustomcode.constants import REQUEST_TYPE_TO_FEATURE
 from datacustomcode.scan import find_base_directory, get_package_type
 
 DATA_CUSTOM_CODE_PATH = "services/data/v63.0/ssot/data-custom-code"
@@ -85,7 +82,9 @@ def infer_use_in_feature(entrypoint_path: str) -> Union[str, None]:
     """
     from datacustomcode.function_utils import inspect_function_types_static
 
-    request_type_name, response_type_name = inspect_function_types_static(entrypoint_path)
+    request_type_name, response_type_name = inspect_function_types_static(
+        entrypoint_path
+    )
 
     if not request_type_name or not response_type_name:
         return None
diff --git a/src/datacustomcode/function/feature_types/chunking.py b/src/datacustomcode/function/feature_types/chunking.py
index 53b9860..1a2f1d7 100644
--- a/src/datacustomcode/function/feature_types/chunking.py
+++ b/src/datacustomcode/function/feature_types/chunking.py
@@ -22,7 +22,6 @@
     Any,
     Dict,
     List,
-    Literal,
 )
 
 from pydantic import BaseModel, Field
@@ -76,7 +75,10 @@ class SearchIndexChunkingV1Request(BaseModel):
 
 class SearchIndexChunkingV1Response(BaseModel):
     """Batch response for UDS chunking"""
+
     output: List[SearchIndexChunkOutput] = Field(
         default_factory=list, description="Flat list of chunks from all docs"
     )
-    status: SearchIndexStatusResponse = Field(..., description="Overall operation status")
+    status: SearchIndexStatusResponse = Field(
+        ..., description="Overall operation status"
+    )
diff --git a/src/datacustomcode/function_utils.py b/src/datacustomcode/function_utils.py
index 1dd57a4..f803b7a 100644
--- a/src/datacustomcode/function_utils.py
+++ b/src/datacustomcode/function_utils.py
@@ -19,9 +19,12 @@
 import importlib.util
 import inspect
 import json
-import sys
 import typing
-from typing import Any, Optional, Tuple
+from typing import (
+    Any,
+    Optional,
+    Tuple,
+)
 
 
 def load_function_module(entrypoint_path: str, module_name: str = "function_module"):
@@ -59,7 +62,7 @@ def get_function_callable(module):
         AttributeError: If module doesn't have a 'function' attribute
     """
     if not hasattr(module, "function"):
-        raise AttributeError(f"Module does not have a 'function' callable")
+        raise AttributeError("Module does not have a 'function' callable")
     return module.function
 
 
@@ -108,7 +111,9 @@ def get_function_signature_types(
     return request_type, response_type, request_type_name, response_type_name
 
 
-def inspect_function_types_static(entrypoint_path: str) -> Tuple[Optional[str], Optional[str]]:
+def inspect_function_types_static(
+    entrypoint_path: str,
+) -> Tuple[Optional[str], Optional[str]]:
     """Inspect function types using static AST parsing (no imports).
 
     This parses the Python file without executing it, so it doesn't
@@ -121,7 +126,7 @@ def inspect_function_types_static(entrypoint_path: str) -> Tuple[Optional[str],
         Tuple of (request_type_name, response_type_name)
     """
     try:
-        with open(entrypoint_path, 'r') as f:
+        with open(entrypoint_path, "r") as f:
             tree = ast.parse(f.read(), filename=entrypoint_path)
 
         # Find the 'function' definition
@@ -132,7 +137,9 @@ def inspect_function_types_static(entrypoint_path: str) -> Tuple[Optional[str],
                 if node.args.args and len(node.args.args) > 0:
                     first_param = node.args.args[0]
                     if first_param.annotation:
-                        request_type_name = _get_type_name_from_ast(first_param.annotation)
+                        request_type_name = _get_type_name_from_ast(
+                            first_param.annotation
+                        )
 
                 # Get response type (return annotation)
                 response_type_name = None
@@ -175,7 +182,7 @@ def _import_pydantic_model(entrypoint_path: str, type_name: str) -> Optional[Any
         The Pydantic model class, or None if not found
     """
     try:
-        with open(entrypoint_path, 'r') as f:
+        with open(entrypoint_path, "r") as f:
             tree = ast.parse(f.read(), filename=entrypoint_path)
 
         # Find where this type is imported from
@@ -344,7 +351,7 @@ def generate_test_json(entrypoint_path: str, output_path: str) -> None:
 
     # Check if it's a Pydantic model
     if not hasattr(request_type, "model_fields"):
-        raise ValueError(f"Request parameter type must be a Pydantic model")
+        raise ValueError("Request parameter type must be a Pydantic model")
 
     # Generate sample data for ALL fields (use defaults where available)
     sample_data = _generate_model_sample_data(request_type)
@@ -352,4 +359,4 @@ def generate_test_json(entrypoint_path: str, output_path: str) -> None:
 
     # Write to file
     with open(output_path, "w") as f:
-        json.dump(sample_instance.model_dump(), f, indent=2)
\ No newline at end of file
+        json.dump(sample_instance.model_dump(), f, indent=2)
diff --git a/src/datacustomcode/run.py b/src/datacustomcode/run.py
index 605bc00..004f724 100644
--- a/src/datacustomcode/run.py
+++ b/src/datacustomcode/run.py
@@ -177,7 +177,9 @@ def run_function_with_test(entrypoint: str, test_file: str) -> None:
     try:
         request = request_type(**test_data)
     except Exception as e:
-        raise ValueError(f"Failed to parse test data as {request_type.__name__}: {e}") from e
+        raise ValueError(
+            f"Failed to parse test data as {request_type.__name__}: {e}"
+        ) from e
 
     # Import Runtime
     from datacustomcode.function import Runtime
diff --git a/src/datacustomcode/template.py b/src/datacustomcode/template.py
index 6f52624..8fb67ad 100644
--- a/src/datacustomcode/template.py
+++ b/src/datacustomcode/template.py
@@ -69,10 +69,12 @@ def copy_function_template(target_dir: str, use_in_feature: str) -> None:
             destination = os.path.join(target_dir, item)
 
             if os.path.isdir(source):
-                logger.debug(f"Copying feature-specific directory {source} to {destination}...")
+                logger.debug(
+                    f"Copying feature-specific directory {source} to {destination}..."
+                )
                 shutil.copytree(source, destination, dirs_exist_ok=True)
             else:
-                logger.debug(f"Copying feature-specific file {source} to {destination}...")
+                logger.debug(
+                    f"Copying feature-specific file {source} to {destination}..."
+                )
                 shutil.copy2(source, destination)
-
-
diff --git a/src/datacustomcode/templates/function/chunking/requirements.txt b/src/datacustomcode/templates/function/chunking/requirements.txt
index f872675..7f5990c 100644
--- a/src/datacustomcode/templates/function/chunking/requirements.txt
+++ b/src/datacustomcode/templates/function/chunking/requirements.txt
@@ -1,2 +1,2 @@
 # Packages required for the chunking function
-langchain-text-splitters>=0.3.0
\ No newline at end of file
+langchain-text-splitters>=0.3.0

From 3702b2b0fe74680f2ac56724b786af78bb7ba346 Mon Sep 17 00:00:00 2001
From: Rita Agarwala <rita.agarwala@salesforce.com>
Date: Wed, 29 Apr 2026 17:57:27 +0530
Subject: [PATCH 08/19] Make lint

---
 src/datacustomcode/cli.py            | 2 +-
 src/datacustomcode/function_utils.py | 6 +++---
 src/datacustomcode/template.py       | 3 ++-
 3 files changed, 6 insertions(+), 5 deletions(-)

diff --git a/src/datacustomcode/cli.py b/src/datacustomcode/cli.py
index 381deff..c061371 100644
--- a/src/datacustomcode/cli.py
+++ b/src/datacustomcode/cli.py
@@ -208,9 +208,9 @@ def deploy(
     network: str,
     sf_cli_org: Optional[str],
 ):
+    from datacustomcode.constants import USE_IN_FEATURE_MAPPING_FOR_CONNECT_API
     from datacustomcode.deploy import (
         COMPUTE_TYPES,
-        USE_IN_FEATURE_MAPPING_FOR_CONNECT_API,
         CodeExtensionMetadata,
         deploy_full,
         infer_use_in_feature,
diff --git a/src/datacustomcode/function_utils.py b/src/datacustomcode/function_utils.py
index f803b7a..e1c91aa 100644
--- a/src/datacustomcode/function_utils.py
+++ b/src/datacustomcode/function_utils.py
@@ -79,7 +79,7 @@ def get_type_name(type_annotation: Any) -> Optional[str]:
         return None
 
     if hasattr(type_annotation, "__name__"):
-        return type_annotation.__name__
+        return str(type_annotation.__name__)
 
     return str(type_annotation)
 
@@ -230,14 +230,14 @@ def inspect_function_types(
         return None, None
 
 
-def get_request_type(entrypoint_path: str) -> Optional[Any]:
+def get_request_type(entrypoint_path: str) -> Any:
     """Get the request type annotation from a function entrypoint.
 
     Args:
         entrypoint_path: Path to the entrypoint.py file
 
     Returns:
-        The request type (Pydantic model class), or None if not found
+        The request type (Pydantic model class)
 
     Raises:
         ImportError: If the module cannot be loaded
diff --git a/src/datacustomcode/template.py b/src/datacustomcode/template.py
index 8fb67ad..6807510 100644
--- a/src/datacustomcode/template.py
+++ b/src/datacustomcode/template.py
@@ -14,6 +14,7 @@
 # limitations under the License.
 import os
 import shutil
+from typing import Optional
 
 from loguru import logger
 
@@ -39,7 +40,7 @@ def copy_script_template(target_dir: str) -> None:
             shutil.copy2(source, destination)
 
 
-def copy_function_template(target_dir: str, use_in_feature: str) -> None:
+def copy_function_template(target_dir: str, use_in_feature: Optional[str]) -> None:
     os.makedirs(target_dir, exist_ok=True)
 
     # First, copy common files from base function template

From 6a2b7bdeda74e7924c9aca85813750f407ac5355 Mon Sep 17 00:00:00 2001
From: Rita Agarwala <rita.agarwala@salesforce.com>
Date: Wed, 29 Apr 2026 22:34:34 +0530
Subject: [PATCH 09/19] changing the argument name

---
 src/datacustomcode/cli.py | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/src/datacustomcode/cli.py b/src/datacustomcode/cli.py
index c061371..a3aeda2 100644
--- a/src/datacustomcode/cli.py
+++ b/src/datacustomcode/cli.py
@@ -280,7 +280,7 @@ def deploy(
 @click.option(
     "--use-in-feature",
     default="SearchIndexChunking",
-    help="Feature to invoke the function (only applicable for functions). If not provided, will be inferred from function signature.",
+    help="Feature where this function will be used (only applicable for function).",
 )
 def init(directory: str, code_type: str, use_in_feature: Optional[str]):
     from datacustomcode.scan import (
@@ -331,7 +331,7 @@ def init(directory: str, code_type: str, use_in_feature: Optional[str]):
             click.echo(
                 "Test your function locally with "
                 + click.style(
-                    f"datacustomcode run {entrypoint_path} --test_with {test_json_path}",
+                    f"datacustomcode run {entrypoint_path} --test-with {test_json_path}",
                     fg="blue",
                     bold=True,
                 )
@@ -375,7 +375,7 @@ def scan(filename: str, config: str, dry_run: bool, no_requirements: bool):
 @click.option("--dependencies", default=[], multiple=True)
 @click.option("--profile", default="default")
 @click.option(
-    "--test_with",
+    "--test-with",
     default=None,
     type=click.Path(exists=True),
     help="Path to test JSON file for function testing",

From b0608ea8c6082a6af8b789a9480d093831e21be2 Mon Sep 17 00:00:00 2001
From: Rita Agarwala <rita.agarwala@salesforce.com>
Date: Thu, 30 Apr 2026 13:26:50 +0530
Subject: [PATCH 10/19] Removing function_invoke_option testcases

---
 tests/test_cli.py             | 11 +++++------
 tests/test_sf_cli_contract.py | 20 --------------------
 2 files changed, 5 insertions(+), 26 deletions(-)

diff --git a/tests/test_cli.py b/tests/test_cli.py
index e26cbdc..7765560 100644
--- a/tests/test_cli.py
+++ b/tests/test_cli.py
@@ -103,16 +103,19 @@ def test_deploy_command_success(self, mock_token_provider, mock_deploy_full):
             assert call_args[0][2].access_token == "test_token"
             assert call_args[0][2].instance_url == "https://instance.example.com"
 
+    @patch("datacustomcode.deploy.infer_use_in_feature")
     @patch("datacustomcode.deploy.deploy_full")
     @patch("datacustomcode.token_provider.CredentialsTokenProvider")
     def test_deploy_command_function_invoke_options(
-        self, mock_token_provider, mock_deploy_full
+        self, mock_token_provider, mock_deploy_full, mock_infer_feature
     ):
         """Test deploy command with function invoke options."""
         mock_provider_instance = mock_token_provider.return_value
         mock_provider_instance.get_token.return_value = AccessTokenResponse(
             access_token="test_token", instance_url="https://instance.example.com"
         )
+        # Mock infer_use_in_feature to return a valid feature
+        mock_infer_feature.return_value = "SearchIndexChunking"
 
         runner = CliRunner()
         with runner.isolated_filesystem():
@@ -122,16 +125,12 @@ def test_deploy_command_function_invoke_options(
             write_sdk_config(".", sdk_config)
             result = runner.invoke(
                 deploy,
-                ["--name", "test-job", "--function-invoke-opt", "option1,option2"],
+                ["--name", "test-job"],
             )
 
             assert result.exit_code == 0
             mock_deploy_full.assert_called_once()
 
-            # Check that deploy_full was called with function invoke options
-            call_args = mock_deploy_full.call_args
-            assert call_args[0][1].functionInvokeOptions == ["option1", "option2"]
-
     @patch("datacustomcode.token_provider.CredentialsTokenProvider")
     def test_deploy_command_credentials_error(self, mock_token_provider):
         """Test deploy command when credentials are not available."""
diff --git a/tests/test_sf_cli_contract.py b/tests/test_sf_cli_contract.py
index b96ab35..412b4c2 100644
--- a/tests/test_sf_cli_contract.py
+++ b/tests/test_sf_cli_contract.py
@@ -188,26 +188,6 @@ def test_accepts_network_flag(
         result = runner.invoke(deploy, [*self._BASE_ARGS, "--network", "custom"])
         assert result.exit_code != 2, result.output
 
-    @patch("datacustomcode.token_provider.SFCLITokenProvider")
-    @patch("datacustomcode.deploy.deploy_full")
-    @patch("datacustomcode.cli.find_base_directory")
-    @patch("datacustomcode.cli.get_package_type")
-    def test_accepts_function_invoke_opt_flag(
-        self, mock_pkg_type, mock_find_base, mock_deploy_full, mock_sf_cli_provider
-    ):
-        mock_find_base.return_value = "payload"
-        mock_pkg_type.return_value = "function"
-        mock_provider_instance = mock_sf_cli_provider.return_value
-        mock_provider_instance.get_token.return_value = AccessTokenResponse(
-            access_token="tok", instance_url="https://example.com"
-        )
-        runner = CliRunner()
-        result = runner.invoke(
-            deploy, [*self._BASE_ARGS, "--function-invoke-opt", "ASYNC"]
-        )
-        assert result.exit_code != 2, result.output
-
-
 class TestRunArgContract:
     """
     SF CLI spawn:

From 9b3cd220c4c022c2b7bc3326caad9434b676bc8c Mon Sep 17 00:00:00 2001
From: Rita Agarwala <rita.agarwala@salesforce.com>
Date: Thu, 30 Apr 2026 17:27:16 +0530
Subject: [PATCH 11/19] Adding testcase for function_utils.py

---
 tests/test_function_utils.py | 247 +++++++++++++++++++++++++++++++++++
 1 file changed, 247 insertions(+)
 create mode 100644 tests/test_function_utils.py

diff --git a/tests/test_function_utils.py b/tests/test_function_utils.py
new file mode 100644
index 0000000..f5d7bba
--- /dev/null
+++ b/tests/test_function_utils.py
@@ -0,0 +1,247 @@
+# Copyright (c) 2025, Salesforce, Inc.
+# SPDX-License-Identifier: Apache-2
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import json
+import os
+import shutil
+import sys
+import tempfile
+import textwrap
+from typing import List
+
+import pytest
+from pydantic import BaseModel
+
+from datacustomcode import function_utils
+
+
+class SampleRequest(BaseModel):
+    message: str
+    count: int = 5
+    tags: List[str] = []
+    version: str = "v1"
+
+
+@pytest.fixture
+def sample_entrypoint():
+    """Create a temporary entrypoint file with a function."""
+    with tempfile.NamedTemporaryFile(
+        mode="w", suffix=".py", delete=False
+    ) as temp_file:
+        entrypoint_content = textwrap.dedent(
+            """
+            from typing import List
+            from pydantic import BaseModel
+
+            class SampleRequest(BaseModel):
+                message: str
+                count: int = 5
+                tags: List[str] = []
+                version: str = "v1"
+
+            class SampleResponse(BaseModel):
+                result: str
+                success: bool = True
+
+            def function(request: SampleRequest) -> SampleResponse:
+                return SampleResponse(result=f"Processed {request.message}")
+            """
+        )
+        temp_file.write(entrypoint_content)
+        temp_file_path = temp_file.name
+
+    yield temp_file_path
+
+    if os.path.exists(temp_file_path):
+        os.unlink(temp_file_path)
+
+
+@pytest.fixture
+def entrypoint_no_annotations():
+    """Create an entrypoint with no type annotations."""
+    with tempfile.NamedTemporaryFile(
+        mode="w", suffix=".py", delete=False
+    ) as temp_file:
+        entrypoint_content = textwrap.dedent(
+            """
+            def function(request):
+                return {"result": "no annotations"}
+            """
+        )
+        temp_file.write(entrypoint_content)
+        temp_file_path = temp_file.name
+
+    yield temp_file_path
+
+    if os.path.exists(temp_file_path):
+        os.unlink(temp_file_path)
+
+
+def test_get_function_signature_types(sample_entrypoint, entrypoint_no_annotations):
+    """Test extracting request and response types from function signatures."""
+    module = function_utils.load_function_module(sample_entrypoint)
+    func = function_utils.get_function_callable(module)
+    req_type, resp_type, req_name, resp_name = (
+        function_utils.get_function_signature_types(func)
+    )
+
+    assert req_name == "SampleRequest"
+    assert resp_name == "SampleResponse"
+    assert req_type is not None
+    assert resp_type is not None
+
+    module_no_annot = function_utils.load_function_module(entrypoint_no_annotations)
+    func_no_annot = function_utils.get_function_callable(module_no_annot)
+    req_type, resp_type, req_name, resp_name = (
+        function_utils.get_function_signature_types(func_no_annot)
+    )
+
+    assert req_name is None
+    assert resp_name is None
+
+
+def test_inspect_function_types_static(sample_entrypoint, entrypoint_no_annotations):
+    """Test static AST-based inspection of function types."""
+    req_name, resp_name = function_utils.inspect_function_types_static(
+        sample_entrypoint
+    )
+    assert req_name == "SampleRequest"
+    assert resp_name == "SampleResponse"
+
+    req_name, resp_name = function_utils.inspect_function_types_static(
+        entrypoint_no_annotations
+    )
+    assert req_name is None
+    assert resp_name is None
+
+def test_inspect_function_types(sample_entrypoint):
+    """Test dynamic inspection of function types."""
+    req_name, resp_name = function_utils.inspect_function_types(sample_entrypoint)
+    assert req_name == "SampleRequest"
+    assert resp_name == "SampleResponse"
+
+    req_name, resp_name = function_utils.inspect_function_types("/nonexistent/file.py")
+    assert req_name is None
+    assert resp_name is None
+
+
+def test_get_request_type(sample_entrypoint, entrypoint_no_annotations):
+    """Test getting request type from entrypoint."""
+    req_type = function_utils.get_request_type(sample_entrypoint)
+    assert req_type is not None
+    assert hasattr(req_type, "model_fields")
+
+    with pytest.raises(ValueError, match="must have a type annotation"):
+        function_utils.get_request_type(entrypoint_no_annotations)
+
+
+def test_generate_test_json():
+    """Test generating test.json file from entrypoint with simple and complex nested types."""
+    temp_dir = tempfile.mkdtemp()
+    models_file = os.path.join(temp_dir, "test_models.py")
+
+    try:
+        # Test 1: Simple request type
+        entrypoint_simple = os.path.join(temp_dir, "entrypoint_simple.py")
+        output_simple = os.path.join(temp_dir, "test_simple.json")
+
+        with open(models_file, "w") as f:
+            models_content = textwrap.dedent(
+                """
+                from pydantic import BaseModel
+                from typing import List
+
+                class SimpleRequest(BaseModel):
+                    message: str
+                    count: int = 5
+                    tags: List[str] = []
+                    version: str = "v1"
+
+                class NestedConfig(BaseModel):
+                    host: str
+                    port: int = 8080
+                    enabled: bool = True
+
+                class ComplexRequest(BaseModel):
+                    name: str
+                    max_items: int = 100
+                    config: NestedConfig
+                    metadata: dict = {}
+                """
+            )
+            f.write(models_content)
+
+        with open(entrypoint_simple, "w") as f:
+            entrypoint_content = textwrap.dedent(
+                """
+                from test_models import SimpleRequest
+
+                def function(request: SimpleRequest):
+                    return {"result": "ok"}
+                """
+            )
+            f.write(entrypoint_content)
+
+        sys.path.insert(0, temp_dir)
+
+        function_utils.generate_test_json(entrypoint_simple, output_simple)
+        assert os.path.exists(output_simple)
+
+        with open(output_simple, "r") as f:
+            data = json.load(f)
+
+        assert "message" in data
+        assert data["count"] == 5
+        assert data["version"] == "v1"
+        assert data["tags"] == []
+
+        # Test 2: Complex request type with nested models
+        entrypoint_complex = os.path.join(temp_dir, "entrypoint_complex.py")
+        output_complex = os.path.join(temp_dir, "test_complex.json")
+
+        with open(entrypoint_complex, "w") as f:
+            entrypoint_content = textwrap.dedent(
+                """
+                from test_models import ComplexRequest
+
+                def function(request: ComplexRequest):
+                    return {"result": "ok"}
+                """
+            )
+            f.write(entrypoint_content)
+
+        function_utils.generate_test_json(entrypoint_complex, output_complex)
+        assert os.path.exists(output_complex)
+
+        with open(output_complex, "r") as f:
+            complex_data = json.load(f)
+
+        assert "name" in complex_data
+        assert "max_items" in complex_data
+        assert complex_data["max_items"] == 100
+        assert "config" in complex_data
+        assert isinstance(complex_data["config"], dict)
+        assert "host" in complex_data["config"]
+        assert "port" in complex_data["config"]
+        assert complex_data["config"]["port"] == 8080
+        assert complex_data["config"]["enabled"] is True
+        assert "metadata" in complex_data
+        assert complex_data["metadata"] == {}
+
+    finally:
+        if temp_dir in sys.path:
+            sys.path.remove(temp_dir)
+        if os.path.exists(temp_dir):
+            shutil.rmtree(temp_dir)
\ No newline at end of file

From 45547c181b1ed52d79290ab67480516394b03edf Mon Sep 17 00:00:00 2001
From: Rita Agarwala <rita.agarwala@salesforce.com>
Date: Thu, 30 Apr 2026 17:50:23 +0530
Subject: [PATCH 12/19] Adding unit test

---
 tests/test_function_utils.py | 9 ---------
 1 file changed, 9 deletions(-)

diff --git a/tests/test_function_utils.py b/tests/test_function_utils.py
index f5d7bba..081c743 100644
--- a/tests/test_function_utils.py
+++ b/tests/test_function_utils.py
@@ -19,21 +19,12 @@
 import sys
 import tempfile
 import textwrap
-from typing import List
 
 import pytest
-from pydantic import BaseModel
 
 from datacustomcode import function_utils
 
 
-class SampleRequest(BaseModel):
-    message: str
-    count: int = 5
-    tags: List[str] = []
-    version: str = "v1"
-
-
 @pytest.fixture
 def sample_entrypoint():
     """Create a temporary entrypoint file with a function."""

From 2c17783570f233e2e67ee4bc4e412aedf0ec3366 Mon Sep 17 00:00:00 2001
From: Rita Agarwala <rita.agarwala@salesforce.com>
Date: Thu, 30 Apr 2026 18:40:18 +0530
Subject: [PATCH 13/19] Correcting the testcase

---
 .github/workflows/sf_cli_integration.yml | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/.github/workflows/sf_cli_integration.yml b/.github/workflows/sf_cli_integration.yml
index 0d9c17a..bc8a86b 100644
--- a/.github/workflows/sf_cli_integration.yml
+++ b/.github/workflows/sf_cli_integration.yml
@@ -255,11 +255,11 @@ jobs:
 
       # ── Function: run ─────────────────────────────────────────────────────────
 
-    - name: '[function] run — sf data-code-extension function run --entrypoint testFunction/payload/entrypoint.py  --test_with testFunction/payload/tests/test.json -o dev1'
+    - name: '[function] run — sf data-code-extension function run --entrypoint testFunction/payload/entrypoint.py  --test-with testFunction/payload/tests/test.json -o dev1'
       run: |
         sf data-code-extension function run \
           --entrypoint testFunction/payload/entrypoint.py \
-          --test_with testFunction/payload/tests/test.json \
+          --test-with testFunction/payload/tests/test.json \
           -o dev1 || {
           echo "::error::sf data-code-extension function run FAILED. Check mock server output above; the --entrypoint flag or SF CLI org auth contract may have changed."
           exit 1

From 8a9e8f082cd470b3ba0cb3adde7ec34b8d75a5f0 Mon Sep 17 00:00:00 2001
From: Rita Agarwala <rita.agarwala@salesforce.com>
Date: Thu, 30 Apr 2026 18:58:29 +0530
Subject: [PATCH 14/19] Fixing lint error

---
 src/datacustomcode/cli.py                           |  6 ++++--
 src/datacustomcode/function_utils.py                |  4 +++-
 src/datacustomcode/run.py                           |  3 ++-
 .../function/chunking/payload/entrypoint.py         |  5 ++++-
 tests/test_function_utils.py                        | 13 +++++--------
 tests/test_sf_cli_contract.py                       |  1 +
 6 files changed, 19 insertions(+), 13 deletions(-)

diff --git a/src/datacustomcode/cli.py b/src/datacustomcode/cli.py
index a3aeda2..84da911 100644
--- a/src/datacustomcode/cli.py
+++ b/src/datacustomcode/cli.py
@@ -249,7 +249,8 @@ def deploy(
             logger.info(f"Inferred use_in_feature: {use_in_feature}")
         else:
             click.secho(
-                "Error: Could not infer function invoke options. Please provide --use-in-feature",
+                "Error: Could not infer function invoke options. "
+                "Please provide --use-in-feature",
                 fg="red",
             )
             raise click.Abort()
@@ -331,7 +332,8 @@ def init(directory: str, code_type: str, use_in_feature: Optional[str]):
             click.echo(
                 "Test your function locally with "
                 + click.style(
-                    f"datacustomcode run {entrypoint_path} --test-with {test_json_path}",
+                    f"datacustomcode run {entrypoint_path} "
+                    f"--test-with {test_json_path}",
                     fg="blue",
                     bold=True,
                 )
diff --git a/src/datacustomcode/function_utils.py b/src/datacustomcode/function_utils.py
index e1c91aa..8e6f12e 100644
--- a/src/datacustomcode/function_utils.py
+++ b/src/datacustomcode/function_utils.py
@@ -215,7 +215,9 @@ def inspect_function_types(
         Either can be None if not found or on error
 
     Example:
-        >>> request_name, response_name = inspect_function_types("payload/entrypoint.py")
+        >>> request_name, response_name = inspect_function_types(
+        ...     "payload/entrypoint.py"
+        ... )
         >>> print(request_name)  # "SearchIndexChunkingV1Request"
         >>> print(response_name)  # "SearchIndexChunkingV1Response"
     """
diff --git a/src/datacustomcode/run.py b/src/datacustomcode/run.py
index 004f724..6322270 100644
--- a/src/datacustomcode/run.py
+++ b/src/datacustomcode/run.py
@@ -164,7 +164,8 @@ def run_function_with_test(entrypoint: str, test_file: str) -> None:
         load_function_module,
     )
 
-    # Import the entrypoint module in the current environment (with all dependencies loaded)
+    # Import the entrypoint module in the current environment
+    # (with all dependencies loaded)
     module = load_function_module(entrypoint, "entrypoint_module")
     function_callable = get_function_callable(module)
     request_type = get_request_type(entrypoint)
diff --git a/src/datacustomcode/templates/function/chunking/payload/entrypoint.py b/src/datacustomcode/templates/function/chunking/payload/entrypoint.py
index 4796ef2..baa9a31 100644
--- a/src/datacustomcode/templates/function/chunking/payload/entrypoint.py
+++ b/src/datacustomcode/templates/function/chunking/payload/entrypoint.py
@@ -66,6 +66,9 @@ def function(
         output=chunks,
         status=SearchIndexStatusResponse(
             status_type="success",
-            status_message=f"Successfully chunked {len(request.input)} documents into {len(chunks)} chunks",
+            status_message=(
+                f"Successfully chunked {len(request.input)} documents "
+                f"into {len(chunks)} chunks"
+            ),
         ),
     )
diff --git a/tests/test_function_utils.py b/tests/test_function_utils.py
index 081c743..cc0f51d 100644
--- a/tests/test_function_utils.py
+++ b/tests/test_function_utils.py
@@ -28,9 +28,7 @@
 @pytest.fixture
 def sample_entrypoint():
     """Create a temporary entrypoint file with a function."""
-    with tempfile.NamedTemporaryFile(
-        mode="w", suffix=".py", delete=False
-    ) as temp_file:
+    with tempfile.NamedTemporaryFile(mode="w", suffix=".py", delete=False) as temp_file:
         entrypoint_content = textwrap.dedent(
             """
             from typing import List
@@ -62,9 +60,7 @@ def function(request: SampleRequest) -> SampleResponse:
 @pytest.fixture
 def entrypoint_no_annotations():
     """Create an entrypoint with no type annotations."""
-    with tempfile.NamedTemporaryFile(
-        mode="w", suffix=".py", delete=False
-    ) as temp_file:
+    with tempfile.NamedTemporaryFile(mode="w", suffix=".py", delete=False) as temp_file:
         entrypoint_content = textwrap.dedent(
             """
             def function(request):
@@ -117,6 +113,7 @@ def test_inspect_function_types_static(sample_entrypoint, entrypoint_no_annotati
     assert req_name is None
     assert resp_name is None
 
+
 def test_inspect_function_types(sample_entrypoint):
     """Test dynamic inspection of function types."""
     req_name, resp_name = function_utils.inspect_function_types(sample_entrypoint)
@@ -139,7 +136,7 @@ def test_get_request_type(sample_entrypoint, entrypoint_no_annotations):
 
 
 def test_generate_test_json():
-    """Test generating test.json file from entrypoint with simple and complex nested types."""
+    """Test generating test.json with simple and complex nested types."""
     temp_dir = tempfile.mkdtemp()
     models_file = os.path.join(temp_dir, "test_models.py")
 
@@ -235,4 +232,4 @@ def function(request: ComplexRequest):
         if temp_dir in sys.path:
             sys.path.remove(temp_dir)
         if os.path.exists(temp_dir):
-            shutil.rmtree(temp_dir)
\ No newline at end of file
+            shutil.rmtree(temp_dir)
diff --git a/tests/test_sf_cli_contract.py b/tests/test_sf_cli_contract.py
index 412b4c2..f53123e 100644
--- a/tests/test_sf_cli_contract.py
+++ b/tests/test_sf_cli_contract.py
@@ -188,6 +188,7 @@ def test_accepts_network_flag(
         result = runner.invoke(deploy, [*self._BASE_ARGS, "--network", "custom"])
         assert result.exit_code != 2, result.output
 
+
 class TestRunArgContract:
     """
     SF CLI spawn:

From e000a0333f60e9f603fd397f34d6be51a6dfd182 Mon Sep 17 00:00:00 2001
From: Rita Agarwala <rita.agarwala@salesforce.com>
Date: Fri, 1 May 2026 09:47:27 +0530
Subject: [PATCH 15/19] Removing unnecessary emoji

---
 .../templates/function/chunking/payload/entrypoint.py         | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/src/datacustomcode/templates/function/chunking/payload/entrypoint.py b/src/datacustomcode/templates/function/chunking/payload/entrypoint.py
index baa9a31..311a51b 100644
--- a/src/datacustomcode/templates/function/chunking/payload/entrypoint.py
+++ b/src/datacustomcode/templates/function/chunking/payload/entrypoint.py
@@ -57,10 +57,10 @@ def function(
             )
             chunks.append(chunk_output)
 
-            print(f"  ✂️  Chunk {chunk_id}: {len(chunk_text)} chars")
+            print(f"Chunk {chunk_id}: {len(chunk_text)} chars")
             chunk_id += 1
 
-    print(f"✅ Generated {len(chunks)} chunks total")
+    print(f"Generated {len(chunks)} chunks total")
 
     return SearchIndexChunkingV1Response(
         output=chunks,

From 6df64a5b2413c6f43244b6a4e7225a874990bbbd Mon Sep 17 00:00:00 2001
From: Rita Agarwala <rita.agarwala@salesforce.com>
Date: Fri, 1 May 2026 18:49:23 +0530
Subject: [PATCH 16/19] SearchIndexChunking contract

---
 .../function/feature_types/chunking.py        | 165 +++++++++++++-----
 src/datacustomcode/function_utils.py          |   5 +-
 2 files changed, 129 insertions(+), 41 deletions(-)

diff --git a/src/datacustomcode/function/feature_types/chunking.py b/src/datacustomcode/function/feature_types/chunking.py
index 1a2f1d7..ab3108a 100644
--- a/src/datacustomcode/function/feature_types/chunking.py
+++ b/src/datacustomcode/function/feature_types/chunking.py
@@ -14,71 +14,156 @@
 # limitations under the License.
 
 """
-Pydantic models for byoc-function-proto (uds_chunking.proto)
-Auto-generated - validation rules from buf.validate
+Pydantic models for Search Index Chunking V1
 """
-
 from typing import (
-    Any,
     Dict,
     List,
+    Union
 )
 
-from pydantic import BaseModel, Field
+from enum import Enum
+from pydantic import BaseModel, Field, ConfigDict
 
 
-class SearchIndexDocElement(BaseModel):
-    """Document element to be chunked"""
+class DocumentType(str, Enum):
+    """Document type enumeration"""
+    TEXT = "Text"
 
-    text: str = Field(..., description="Text content to be chunked")
-    metadata: Dict[str, Any] = Field(
-        default_factory=dict, description="Source document metadata"
-    )
 
+class SearchIndexChunkingV1PrependField(BaseModel):
+    """Field to prepend to chunk content"""
+    dmo_name: str = Field(
+        default="",
+        description="Data Model Object name",
+        examples=["udmo_1__dlm"]
+    )
+    field_name: str = Field(
+        default="",
+        description="Field name to prepend",
+        examples=["ResolvedFilePath__c"]
+    )
+    value: str = Field(
+        default="",
+        description="Field value to prepend",
+        examples=["udlo_1__dll:quarterly_report.pdf"]
+    )
+    model_config = ConfigDict(extra='ignore')
 
-class SearchIndexChunkOutput(BaseModel):
-    """Output chunk from the chunking process"""
 
-    chunk_id: str = Field(..., description="UUID for this chunk")
-    chunk_type: str = Field(..., description="Type: 'text'")
-    text: str = Field(..., description="Chunk text content")
-    seq_no: int = Field(..., description="Sequential chunk number (1-based)")
-    metadata: Dict[str, str] = Field(
-        default_factory=dict, description="Metadata from source (DMO fields)"
+class SearchIndexChunkingV1Metadata(BaseModel):
+    """Metadata for input documents"""
+    type: DocumentType = Field(
+        default=DocumentType.TEXT,
+        description="Document type (Text)",
+        examples=["Text"]
     )
-    tag_metadata: Dict[str, Any] = Field(
-        default_factory=dict, description="Additional tags"
+    page_number: int = Field(
+        default=0,
+        description="Page number in the source document (0-based)",
+        examples=[1]
     )
-    citations: Dict[str, Any] = Field(
-        default_factory=dict, description="Citation information"
+    speaker: str = Field(
+        default="",
+        description="Speaker name for audio/video transcripts",
+        examples=["Narrator"]
     )
+    start_timestamp: str = Field(
+        default="",
+        description="Start timestamp in ISO8601 format: YYYY-MM-DDTHH:MM:SS.ffffff",
+        examples=["2026-03-25T02:01:24.918000"]
+    )
+    end_timestamp: str = Field(
+        default="",
+        description="End timestamp in ISO8601 format: YYYY-MM-DDTHH:MM:SS.ffffff",
+        examples=["2026-03-25T02:01:30.500000"]
+    )
+    text_as_html: str = Field(
+        default="",
+        description="HTML representation of the document text",
+        examples=["<p>Online Remittance Instructions</p>"]
+    )
+    source_dmo_fields: Dict[str, Union[str, int]] = Field(
+        default_factory=dict,
+        description="Source Data Model Object fields as key-value pairs (values can be string or int)",
+        examples=[
+            {
+                "FilePath__c": "quarterly_report.pdf",
+                "Size__c": 1377454,
+                "ContentType__c": "pdf",
+                "LastModified__c": "2026-03-25T02:01:24.918000"
+            }
+        ]
+    )
+    prepend: List[SearchIndexChunkingV1PrependField] = Field(
+        default_factory=list,
+        description="List of fields to prepend to each chunk"
+    )
+    model_config = ConfigDict(extra='ignore')
 
 
-class SearchIndexStatusResponse(BaseModel):
-    """Status response for operation"""
+class SearchIndexChunkingV1DocElement(BaseModel):
+    """Document element to be chunked"""
+    text: str = Field(
+        default="",
+        description="Text content to be chunked",
+        examples=["Online Remittance Instructions\n\nTransfer proceeds from the sale of your ESOP/RSUs easily."]
+    )
+    metadata: SearchIndexChunkingV1Metadata = Field(
+        default_factory=SearchIndexChunkingV1Metadata,
+        description="Source document metadata"
+    )
+    model_config = ConfigDict(extra='ignore')
 
-    status_type: str = Field(..., description="'success' or 'error'")
-    status_message: str = Field(..., description="Human-readable status")
 
+class SearchIndexChunkingV1Output(BaseModel):
+    """Output chunk from the chunking process"""
+    text: str = Field(
+        default="",
+        description="Chunk text content",
+        examples=["Online Remittance Instructions"]
+    )
+    seq_no: int = Field(
+        default=0,
+        description="Sequential chunk number (1-based)",
+        ge=1,
+        examples=[1]
+    )
+    chunk_id: str = Field(
+        default="",
+        description="Unique identifier for this chunk (UUID format)",
+        examples=["550e8400-e29b-41d4-a716-446655440000"]
+    )
+    chunk_type: str = Field(
+        default="",
+        description="Type of chunk (e.g., 'text')",
+        examples=["text"]
+    )
+    citations: Dict[str, str] = Field(
+        default_factory=dict,
+        description="Citation information as key-value pairs",
+        examples=[{"source": "quarterly_report.pdf"}]
+    )
+    metadata: str = Field(
+        default="",
+        description="JSON string containing metadata about the chunking output",
+        examples=['{"page": 1}']
+    )
+    model_config = ConfigDict(extra='ignore')
 
-class SearchIndexChunkingV1Request(BaseModel):
-    """Batch request for UDS chunking"""
 
-    input: List[SearchIndexDocElement] = Field(
-        ..., min_length=1, description="List of documents (min 1)"
-    )
-    max_characters: int = Field(..., description="Max chars per chunk (default: 100)")
-    additional_params: Dict[str, Any] = Field(
-        default_factory=dict, description="Future extension point"
+class SearchIndexChunkingV1Request(BaseModel):
+    """Request for Search Index Chunking"""
+    input: List[SearchIndexChunkingV1DocElement] = Field(
+        default_factory=list,
+        description="List of documents to be chunked"
     )
+    model_config = ConfigDict(extra='ignore')
 
 
 class SearchIndexChunkingV1Response(BaseModel):
     """Batch response for UDS chunking"""
-
-    output: List[SearchIndexChunkOutput] = Field(
+    output: List[SearchIndexChunkingV1Output] = Field(
         default_factory=list, description="Flat list of chunks from all docs"
     )
-    status: SearchIndexStatusResponse = Field(
-        ..., description="Overall operation status"
-    )
+    model_config = ConfigDict(extra='ignore')
diff --git a/src/datacustomcode/function_utils.py b/src/datacustomcode/function_utils.py
index 8e6f12e..c499526 100644
--- a/src/datacustomcode/function_utils.py
+++ b/src/datacustomcode/function_utils.py
@@ -275,8 +275,11 @@ def _generate_model_sample_data(model_type):
 
     sample_data = {}
     for field_name, field_info in model_type.model_fields.items():
+        # Use examples if available
+        if field_info.examples and len(field_info.examples) > 0:
+            sample_data[field_name] = field_info.examples[0]
         # Check if field has a real default value
-        if field_info.default is not PydanticUndefined:
+        elif field_info.default is not PydanticUndefined:
             sample_data[field_name] = field_info.default
         else:
             # Required field or field without default - generate sample

From 2116e5ea03e69020a8ed6deca8ba825eaa530908 Mon Sep 17 00:00:00 2001
From: Rita Agarwala <rita.agarwala@salesforce.com>
Date: Sat, 2 May 2026 08:59:52 +0530
Subject: [PATCH 17/19] Removing dependency from chunking example

---
 .github/workflows/sf_cli_integration.yml      |  10 +-
 .../function/feature_types/chunking.py        | 135 +++++++++-------
 .../function/chunking/payload/entrypoint.py   | 151 +++++++++++++-----
 .../function/chunking/requirements.txt        |   1 -
 4 files changed, 196 insertions(+), 101 deletions(-)

diff --git a/.github/workflows/sf_cli_integration.yml b/.github/workflows/sf_cli_integration.yml
index bc8a86b..5a515d1 100644
--- a/.github/workflows/sf_cli_integration.yml
+++ b/.github/workflows/sf_cli_integration.yml
@@ -259,11 +259,10 @@ jobs:
       run: |
         sf data-code-extension function run \
           --entrypoint testFunction/payload/entrypoint.py \
-          --test-with testFunction/payload/tests/test.json \
-          -o dev1 || {
-          echo "::error::sf data-code-extension function run FAILED. Check mock server output above; the --entrypoint flag or SF CLI org auth contract may have changed."
-          exit 1
-        }
+          --test-with testFunction/payload/tests/test.json || {
+            echo "::error::sf data-code-extension function run FAILED. Check mock server output above; the --entrypoint flag or SF CLI org auth contract may have changed."
+            exit 1
+          }
 
       # ── Function: deploy ─────────────────────────────────────────────────────
 
@@ -275,7 +274,6 @@ jobs:
           --description "Test function deploy" \
           --package-dir testFunction/payload \
           --cpu-size CPU_2XL \
-          --function-invoke-opt UnstructuredChunking \
           -o dev1 || {
           echo "::error::sf data-code-extension function deploy FAILED. Check mock server output above for which endpoint failed. The deploy command flags or API contract may have changed."
           exit 1
diff --git a/src/datacustomcode/function/feature_types/chunking.py b/src/datacustomcode/function/feature_types/chunking.py
index ab3108a..1c1a28a 100644
--- a/src/datacustomcode/function/feature_types/chunking.py
+++ b/src/datacustomcode/function/feature_types/chunking.py
@@ -16,154 +16,181 @@
 """
 Pydantic models for Search Index Chunking V1
 """
+from enum import Enum
 from typing import (
     Dict,
     List,
-    Union
+    Union,
 )
 
-from enum import Enum
-from pydantic import BaseModel, Field, ConfigDict
+from pydantic import (
+    BaseModel,
+    ConfigDict,
+    Field,
+)
 
 
 class DocumentType(str, Enum):
     """Document type enumeration"""
+
     TEXT = "Text"
+    TITLE = "Title"
+    TABLE = "Table"
+    IMAGE = "Image"
+    LIST_ITEM = "ListItem"
+    CODE_SNIPPET = "CodeSnippet"
+    PAGE_METADATA = "PageMetadata"
+
+
+class ChunkType(str, Enum):
+    TEXT = "text"
 
 
 class SearchIndexChunkingV1PrependField(BaseModel):
     """Field to prepend to chunk content"""
+
     dmo_name: str = Field(
-        default="",
-        description="Data Model Object name",
-        examples=["udmo_1__dlm"]
+        default="", description="Data Model Object name", examples=["udmo_1__dlm"]
     )
     field_name: str = Field(
         default="",
         description="Field name to prepend",
-        examples=["ResolvedFilePath__c"]
+        examples=["ResolvedFilePath__c"],
     )
     value: str = Field(
         default="",
         description="Field value to prepend",
-        examples=["udlo_1__dll:quarterly_report.pdf"]
+        examples=["udlo_1__dll:quarterly_report.pdf"],
     )
-    model_config = ConfigDict(extra='ignore')
+    model_config = ConfigDict(extra="ignore")
 
 
-class SearchIndexChunkingV1Metadata(BaseModel):
-    """Metadata for input documents"""
-    type: DocumentType = Field(
-        default=DocumentType.TEXT,
-        description="Document type (Text)",
-        examples=["Text"]
-    )
-    page_number: int = Field(
-        default=0,
-        description="Page number in the source document (0-based)",
-        examples=[1]
-    )
+class SearchIndexChunkingV1TranscriptField(BaseModel):
+    """Field to prepend to chunk content"""
+
     speaker: str = Field(
         default="",
         description="Speaker name for audio/video transcripts",
-        examples=["Narrator"]
+        examples=["Agent"],
     )
     start_timestamp: str = Field(
         default="",
         description="Start timestamp in ISO8601 format: YYYY-MM-DDTHH:MM:SS.ffffff",
-        examples=["2026-03-25T02:01:24.918000"]
+        examples=["2026-03-25T02:01:24.918000"],
     )
     end_timestamp: str = Field(
         default="",
         description="End timestamp in ISO8601 format: YYYY-MM-DDTHH:MM:SS.ffffff",
-        examples=["2026-03-25T02:01:30.500000"]
+        examples=["2026-03-25T02:01:30.500000"],
+    )
+    model_config = ConfigDict(extra="ignore")
+
+
+class SearchIndexChunkingV1Metadata(BaseModel):
+    """Metadata for input documents"""
+
+    type: DocumentType = Field(
+        default=DocumentType.TEXT, description="Document type (Text)", examples=["Text"]
+    )
+    transcript_fields: SearchIndexChunkingV1TranscriptField = Field(
+        default_factory=SearchIndexChunkingV1TranscriptField,
+        description=(
+            "Transcript information. Will only be there in case of audio-video files"
+        ),
+    )
+    page_number: int = Field(
+        default=0,
+        description="Page number in the source document (0-based)",
+        examples=[1],
     )
     text_as_html: str = Field(
         default="",
         description="HTML representation of the document text",
-        examples=["<p>Online Remittance Instructions</p>"]
+        examples=["<p>Online Remittance Instructions</p>"],
     )
     source_dmo_fields: Dict[str, Union[str, int]] = Field(
         default_factory=dict,
-        description="Source Data Model Object fields as key-value pairs (values can be string or int)",
+        description=(
+            "Source Data Model Object fields as key-value pairs "
+            "(values can be string or int)"
+        ),
         examples=[
             {
                 "FilePath__c": "quarterly_report.pdf",
                 "Size__c": 1377454,
                 "ContentType__c": "pdf",
-                "LastModified__c": "2026-03-25T02:01:24.918000"
+                "LastModified__c": "2026-03-25T02:01:24.918000",
             }
-        ]
+        ],
     )
     prepend: List[SearchIndexChunkingV1PrependField] = Field(
-        default_factory=list,
-        description="List of fields to prepend to each chunk"
+        default_factory=list, description="List of fields to prepend to each chunk"
     )
-    model_config = ConfigDict(extra='ignore')
+    model_config = ConfigDict(extra="ignore")
 
 
 class SearchIndexChunkingV1DocElement(BaseModel):
     """Document element to be chunked"""
+
     text: str = Field(
         default="",
         description="Text content to be chunked",
-        examples=["Online Remittance Instructions\n\nTransfer proceeds from the sale of your ESOP/RSUs easily."]
+        examples=[
+            (
+                "Online Remittance Instructions\n\n"
+                "Transfer proceeds from the sale of your ESOP/RSUs easily."
+            )
+        ],
     )
     metadata: SearchIndexChunkingV1Metadata = Field(
         default_factory=SearchIndexChunkingV1Metadata,
-        description="Source document metadata"
+        description="Source document metadata",
     )
-    model_config = ConfigDict(extra='ignore')
+    model_config = ConfigDict(extra="ignore")
 
 
 class SearchIndexChunkingV1Output(BaseModel):
     """Output chunk from the chunking process"""
+
     text: str = Field(
         default="",
         description="Chunk text content",
-        examples=["Online Remittance Instructions"]
+        examples=["Online Remittance Instructions"],
     )
     seq_no: int = Field(
-        default=0,
-        description="Sequential chunk number (1-based)",
-        ge=1,
-        examples=[1]
+        default=0, description="Sequential chunk number (1-based)", ge=1, examples=[1]
     )
     chunk_id: str = Field(
         default="",
         description="Unique identifier for this chunk (UUID format)",
-        examples=["550e8400-e29b-41d4-a716-446655440000"]
+        examples=["550e8400-e29b-41d4-a716-446655440000"],
     )
-    chunk_type: str = Field(
-        default="",
+    chunk_type: ChunkType = Field(
+        default=ChunkType.TEXT,
         description="Type of chunk (e.g., 'text')",
-        examples=["text"]
+        examples=["text"],
     )
     citations: Dict[str, str] = Field(
         default_factory=dict,
         description="Citation information as key-value pairs",
-        examples=[{"source": "quarterly_report.pdf"}]
+        examples=[{"source": "quarterly_report.pdf"}],
     )
-    metadata: str = Field(
-        default="",
-        description="JSON string containing metadata about the chunking output",
-        examples=['{"page": 1}']
-    )
-    model_config = ConfigDict(extra='ignore')
+    model_config = ConfigDict(extra="ignore")
 
 
 class SearchIndexChunkingV1Request(BaseModel):
     """Request for Search Index Chunking"""
+
     input: List[SearchIndexChunkingV1DocElement] = Field(
-        default_factory=list,
-        description="List of documents to be chunked"
+        default_factory=list, description="List of documents to be chunked"
     )
-    model_config = ConfigDict(extra='ignore')
+    model_config = ConfigDict(extra="ignore")
 
 
 class SearchIndexChunkingV1Response(BaseModel):
     """Batch response for UDS chunking"""
+
     output: List[SearchIndexChunkingV1Output] = Field(
         default_factory=list, description="Flat list of chunks from all docs"
     )
-    model_config = ConfigDict(extra='ignore')
+    model_config = ConfigDict(extra="ignore")
diff --git a/src/datacustomcode/templates/function/chunking/payload/entrypoint.py b/src/datacustomcode/templates/function/chunking/payload/entrypoint.py
index 311a51b..dd199a7 100644
--- a/src/datacustomcode/templates/function/chunking/payload/entrypoint.py
+++ b/src/datacustomcode/templates/function/chunking/payload/entrypoint.py
@@ -1,74 +1,145 @@
 import logging
-
-from langchain_text_splitters import RecursiveCharacterTextSplitter
+import uuid
 
 from datacustomcode.function import Runtime
 from datacustomcode.function.feature_types.chunking import (
+    ChunkType,
+    SearchIndexChunkingV1Output,
     SearchIndexChunkingV1Request,
     SearchIndexChunkingV1Response,
-    SearchIndexChunkOutput,
-    SearchIndexStatusResponse,
 )
 
 logger = logging.getLogger(__name__)
 logging.basicConfig(level=logging.INFO)
 
+# Default max chunk size (can be overridden if contract adds max_characters field)
+DEFAULT_MAX_CHUNK_SIZE = 50
+
+
+def split_text_into_chunks(text: str, max_size: int, overlap: int = 20):
+    """Split text into chunks with overlap, trying to break at natural boundaries.
+
+    Tries to break at natural boundaries in order of preference:
+    1. Paragraph boundaries (\\n\\n)
+    2. Line boundaries (\\n)
+    3. Sentence boundaries (. ! ?)
+    4. Word boundaries (space)
+    5. Hard cut if no good boundary found
+
+    Args:
+        text: Text to split
+        max_size: Maximum characters per chunk
+        overlap: Number of characters to overlap between chunks
+
+    Returns:
+        List of text chunks
+    """
+    if len(text) <= max_size:
+        return [text]
+
+    chunks = []
+    start = 0
+
+    while start < len(text):
+        # Determine end position for this chunk
+        end = start + max_size
+
+        if end >= len(text):
+            # Last chunk
+            chunks.append(text[start:])
+            break
+
+        # Try to find a good breaking point (in order of preference)
+        chunk_text = text[start:end]
+        break_point = None
+
+        # Try to break at paragraph boundary (\n\n)
+        last_paragraph = chunk_text.rfind("\n\n")
+        if last_paragraph > max_size * 0.5:  # Only if it's past halfway
+            break_point = start + last_paragraph + 2  # +2 to skip the \n\n
+
+        # Try to break at line boundary (\n)
+        if break_point is None:
+            last_newline = chunk_text.rfind("\n")
+            if last_newline > max_size * 0.5:
+                break_point = start + last_newline + 1
+
+        # Try to break at sentence boundary (. ! ?)
+        if break_point is None:
+            for punct in [". ", "! ", "? "]:
+                last_sentence = chunk_text.rfind(punct)
+                if last_sentence > max_size * 0.5:
+                    break_point = start + last_sentence + len(punct)
+                    break
+
+        # Try to break at word boundary (space)
+        if break_point is None:
+            last_space = chunk_text.rfind(" ")
+            if last_space > max_size * 0.5:
+                break_point = start + last_space + 1
+
+        # If no good breaking point, just hard cut
+        if break_point is None:
+            break_point = end
+
+        chunks.append(text[start:break_point].strip())
+
+        # Move start position with overlap
+        start = max(break_point - overlap, start + 1)
+
+    return chunks
+
 
 def function(
     request: SearchIndexChunkingV1Request, runtime: Runtime
 ) -> SearchIndexChunkingV1Response:
-    print(f"Received {len(request.input)} documents to chunk")
-    print(f"Max characters per chunk: {request.max_characters}")
+    """Chunk documents into smaller pieces for search indexing.
+
+    Args:
+        request: SearchIndexChunkingV1Request with input documents
+        runtime: Runtime context (unused but required by contract)
 
-    # Initialize RecursiveCharacterTextSplitter
-    # It tries to split on: "\n\n", "\n", " ", "" (in that order)
-    text_splitter = RecursiveCharacterTextSplitter(
-        chunk_size=request.max_characters,
-        chunk_overlap=20,  # Small overlap to maintain context
-        length_function=len,
-        separators=["\n\n", "\n", " ", ""],
-    )
+    Returns:
+        SearchIndexChunkingV1Response with chunked output
+    """
+    print(f"Received {len(request.input)} documents to chunk")
 
     chunks = []
-    chunk_id = 1
+    seq_no = 1
+
+    # Use default max chunk size
+    max_chunk_size = DEFAULT_MAX_CHUNK_SIZE
 
     # Process each document
     for doc_idx, doc in enumerate(request.input):
         text = doc.text
-        metadata = doc.metadata if hasattr(doc.metadata, "__iter__") else {}
+        metadata = doc.metadata
 
-        print(f"📄 Processing document {doc_idx + 1}: {len(text)} characters")
+        print(f"Processing document {doc_idx + 1}: {len(text)} characters")
 
-        # Split the text using RecursiveCharacterTextSplitter
-        text_chunks = text_splitter.split_text(text)
+        # Split the text using our simple chunking algorithm
+        text_chunks = split_text_into_chunks(text, max_chunk_size, overlap=20)
 
         # Create chunk outputs
         for chunk_text in text_chunks:
-            chunk_output = SearchIndexChunkOutput(
-                chunk_id=f"chunk_{chunk_id:04d}",
-                chunk_type="text",
+            # Create citations from source_dmo_fields if available
+            citations = {}
+            if metadata.source_dmo_fields:
+                for key, value in metadata.source_dmo_fields.items():
+                    citations[key] = str(value)
+
+            chunk_output = SearchIndexChunkingV1Output(
+                chunk_id=str(uuid.uuid4()),
+                chunk_type=ChunkType.TEXT,
                 text=chunk_text.strip(),
-                seq_no=chunk_id,
-                metadata={
-                    k: str(v) for k, v in (dict(metadata) if metadata else {}).items()
-                },
-                tag_metadata={},
-                citations={},
+                seq_no=seq_no,
+                citations=citations,
             )
             chunks.append(chunk_output)
 
-            print(f"Chunk {chunk_id}: {len(chunk_text)} chars")
-            chunk_id += 1
+            print(f"Chunk {seq_no}: {len(chunk_text)} chars")
+            seq_no += 1
 
     print(f"Generated {len(chunks)} chunks total")
 
-    return SearchIndexChunkingV1Response(
-        output=chunks,
-        status=SearchIndexStatusResponse(
-            status_type="success",
-            status_message=(
-                f"Successfully chunked {len(request.input)} documents "
-                f"into {len(chunks)} chunks"
-            ),
-        ),
-    )
+    return SearchIndexChunkingV1Response(output=chunks)
diff --git a/src/datacustomcode/templates/function/chunking/requirements.txt b/src/datacustomcode/templates/function/chunking/requirements.txt
index 7f5990c..219536a 100644
--- a/src/datacustomcode/templates/function/chunking/requirements.txt
+++ b/src/datacustomcode/templates/function/chunking/requirements.txt
@@ -1,2 +1 @@
 # Packages required for the chunking function
-langchain-text-splitters>=0.3.0

From cb102376835a6bc8637a250cdde7d7e6b93baff6 Mon Sep 17 00:00:00 2001
From: Rita Agarwala <rita.agarwala@salesforce.com>
Date: Sat, 2 May 2026 11:45:20 +0530
Subject: [PATCH 18/19] Updating DocumentType value

---
 .../function/feature_types/chunking.py           | 16 ++++++++--------
 1 file changed, 8 insertions(+), 8 deletions(-)

diff --git a/src/datacustomcode/function/feature_types/chunking.py b/src/datacustomcode/function/feature_types/chunking.py
index 1c1a28a..fe0a239 100644
--- a/src/datacustomcode/function/feature_types/chunking.py
+++ b/src/datacustomcode/function/feature_types/chunking.py
@@ -33,13 +33,13 @@
 class DocumentType(str, Enum):
     """Document type enumeration"""
 
-    TEXT = "Text"
-    TITLE = "Title"
-    TABLE = "Table"
-    IMAGE = "Image"
-    LIST_ITEM = "ListItem"
-    CODE_SNIPPET = "CodeSnippet"
-    PAGE_METADATA = "PageMetadata"
+    TEXT = "text"
+    TITLE = "title"
+    TABLE = "table"
+    IMAGE = "image"
+    LIST_ITEM = "list_item"
+    CODE_SNIPPET = "code_snippet"
+    PAGE_METADATA = "page_metadata"
 
 
 class ChunkType(str, Enum):
@@ -90,7 +90,7 @@ class SearchIndexChunkingV1Metadata(BaseModel):
     """Metadata for input documents"""
 
     type: DocumentType = Field(
-        default=DocumentType.TEXT, description="Document type (Text)", examples=["Text"]
+        default=DocumentType.TEXT, description="Document type (text)", examples=["text"]
     )
     transcript_fields: SearchIndexChunkingV1TranscriptField = Field(
         default_factory=SearchIndexChunkingV1TranscriptField,

From 2c8f040c56ad14e0d615aee5066dad1c09c2df09 Mon Sep 17 00:00:00 2001
From: Rita Agarwala <rita.agarwala@salesforce.com>
Date: Mon, 4 May 2026 13:15:38 +0530
Subject: [PATCH 19/19] Making text_as_html optional as it can be null

---
 src/datacustomcode/function/feature_types/chunking.py | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/src/datacustomcode/function/feature_types/chunking.py b/src/datacustomcode/function/feature_types/chunking.py
index fe0a239..1425921 100644
--- a/src/datacustomcode/function/feature_types/chunking.py
+++ b/src/datacustomcode/function/feature_types/chunking.py
@@ -20,6 +20,7 @@
 from typing import (
     Dict,
     List,
+    Optional,
     Union,
 )
 
@@ -103,8 +104,8 @@ class SearchIndexChunkingV1Metadata(BaseModel):
         description="Page number in the source document (0-based)",
         examples=[1],
     )
-    text_as_html: str = Field(
-        default="",
+    text_as_html: Optional[str] = Field(
+        default=None,
         description="HTML representation of the document text",
         examples=["<p>Online Remittance Instructions</p>"],
     )