Azure / static-web-apps

Azure Static Web Apps. For bugs and feature requests, please create an issue in this repo. For community discussions, latest updates, kindly refer to the Discussions Tab. To know what's new in Static Web Apps, visit https://aka.ms/swa/ThisMonth

Home Page:https://aka.ms/swa

Geek Repo:Geek Repo

Github PK Tool:Github PK Tool

Python 3.10 SWA managed functions with response streaming intermittently fail to load on cold start.

rowanhoy opened this issue · comments

commented

Describe the bug

Python 3.10 SWA managed functions that implement streaming, intermittently fail to load on cold start. This issue does not occur with identical code/host.json in a self-managed consumption function app deployment, only when deployed as part of a SWA.

Due to startup failures clients frequently return 404 errors.

image

Application Insights shows that the functions are not always loaded

image

App insights logs from failed startup:

timestamp [UTC]	message
5/24/2024, 9:20:02.469 AM	Initializing Warmup Extension.
5/24/2024, 9:20:02.520 AM	Initializing Host. OperationId: 'ceff11f0-71a4-4cec-92b2-8815f2aac725'.
5/24/2024, 9:20:02.521 AM	Host initialization: ConsecutiveErrors=0, StartupCount=3, OperationId=ceff11f0-71a4-4cec-92b2-8815f2aac725
5/24/2024, 9:20:02.531 AM	"ApplicationInsightsLoggerOptions
{
  ""SamplingSettings"": {
    ""EvaluationInterval"": ""00:00:15"",
    ""InitialSamplingPercentage"": 100.0,
    ""MaxSamplingPercentage"": 100.0,
    ""MaxTelemetryItemsPerSecond"": 20.0,
    ""MinSamplingPercentage"": 0.1,
    ""MovingAverageRatio"": 0.25,
    ""SamplingPercentageDecreaseTimeout"": ""00:02:00"",
    ""SamplingPercentageIncreaseTimeout"": ""00:15:00""
  },
  ""SamplingExcludedTypes"": null,
  ""SamplingIncludedTypes"": null,
  ""SnapshotConfiguration"": null,
  ""EnablePerformanceCountersCollection"": true,
  ""HttpAutoCollectionOptions"": {
    ""EnableHttpTriggerExtendedInfoCollection"": true,
    ""EnableW3CDistributedTracing"": true,
    ""EnableResponseHeaderInjection"": true
  },
  ""LiveMetricsInitializationDelay"": ""00:00:15"",
  ""EnableLiveMetrics"": true,
  ""EnableLiveMetricsFilters"": false,
  ""EnableQueryStringTracing"": false,
  ""EnableDependencyTracking"": true,
  ""DependencyTrackingOptions"": null,
  ""TokenCredentialOptions"": null,
  ""DiagnosticsEventListenerLogLevel"": null,
  ""EnableAutocollectedMetricsExtractor"": false,
  ""EnableMetricsCustomDimensionOptimization"": false
}"
5/24/2024, 9:20:02.532 AM	"LoggerFilterOptions
{
  ""MinLevel"": ""None"",
  ""Rules"": [
    {
      ""ProviderName"": null,
      ""CategoryName"": null,
      ""LogLevel"": null,
      ""Filter"": ""<AddFilter>b__0""
    },
    {
      ""ProviderName"": ""Microsoft.Azure.WebJobs.Script.WebHost.Diagnostics.SystemLoggerProvider"",
      ""CategoryName"": null,
      ""LogLevel"": ""None"",
      ""Filter"": null
    },
    {
      ""ProviderName"": ""Microsoft.Azure.WebJobs.Script.WebHost.Diagnostics.SystemLoggerProvider"",
      ""CategoryName"": null,
      ""LogLevel"": null,
      ""Filter"": ""<AddFilter>b__0""
    },
    {
      ""ProviderName"": ""Microsoft.Azure.WebJobs.Logging.ApplicationInsights.ApplicationInsightsLoggerProvider"",
      ""CategoryName"": null,
      ""LogLevel"": ""Trace"",
      ""Filter"": null
    }
  ]
}"
5/24/2024, 9:20:02.533 AM	"LanguageWorkerOptions
{
  ""WorkerConfigs"": [
    {
      ""Description"": {
        ""Language"": ""python"",
        ""DefaultRuntimeName"": null,
        ""DefaultRuntimeVersion"": ""3.10"",
        ""SupportedArchitectures"": [
          ""X64"",
          ""X86"",
          ""Arm64""
        ],
        ""SupportedOperatingSystems"": [
          ""LINUX"",
          ""OSX"",
          ""WINDOWS""
        ],
        ""SupportedRuntimeVersions"": [
          ""3.7"",
          ""3.8"",
          ""3.9"",
          ""3.10"",
          ""3.11""
        ],
        ""SanitizeRuntimeVersionRegex"": null,
        ""WorkerIndexing"": ""true"",
        ""Extensions"": [
          "".py""
        ],
        ""UseStdErrorStreamForErrorsOnly"": false,
        ""DefaultExecutablePath"": ""python"",
        ""DefaultWorkerPath"": ""/azure-functions-host/workers/python/3.10/LINUX/X64/worker.py"",
        ""WorkerDirectory"": ""/azure-functions-host/workers/python"",
        ""Arguments"": [],
        ""WorkerArguments"": null
      },
      ""Arguments"": {
        ""ExecutablePath"": ""python"",
        ""ExecutableArguments"": [],
        ""WorkerPath"": ""/azure-functions-host/workers/python/3.10/LINUX/X64/worker.py"",
        ""WorkerArguments"": []
      },
      ""CountOptions"": {
        ""SetProcessCountToNumberOfCpuCores"": false,
        ""ProcessCount"": 1,
        ""MaxProcessCount"": 10,
        ""ProcessStartupInterval"": ""00:00:10"",
        ""ProcessStartupTimeout"": ""00:01:00"",
        ""InitializationTimeout"": ""00:00:10"",
        ""EnvironmentReloadTimeout"": ""00:00:30"",
        ""ProcessRestartInterval"": ""00:00:10"",
        ""ProcessShutdownTimeout"": ""00:00:10""
      }
    }
  ]
}"
5/24/2024, 9:20:02.533 AM	"LoggerFilterOptions
{
  ""MinLevel"": ""None"",
  ""Rules"": [
    {
      ""ProviderName"": null,
      ""CategoryName"": null,
      ""LogLevel"": null,
      ""Filter"": ""<AddFilter>b__0""
    },
    {
      ""ProviderName"": ""Microsoft.Azure.WebJobs.Script.WebHost.Diagnostics.SystemLoggerProvider"",
      ""CategoryName"": null,
      ""LogLevel"": ""None"",
      ""Filter"": null
    },
    {
      ""ProviderName"": ""Microsoft.Azure.WebJobs.Script.WebHost.Diagnostics.SystemLoggerProvider"",
      ""CategoryName"": null,
      ""LogLevel"": null,
      ""Filter"": ""<AddFilter>b__0""
    },
    {
      ""ProviderName"": ""Microsoft.Azure.WebJobs.Logging.ApplicationInsights.ApplicationInsightsLoggerProvider"",
      ""CategoryName"": null,
      ""LogLevel"": ""Trace"",
      ""Filter"": null
    }
  ]
}"
5/24/2024, 9:20:02.534 AM	"FunctionResultAggregatorOptions
{
  ""BatchSize"": 1000,
  ""FlushTimeout"": ""00:00:30"",
  ""IsEnabled"": true
}"
5/24/2024, 9:20:02.534 AM	"ConcurrencyOptions
{
  ""DynamicConcurrencyEnabled"": false,
  ""MaximumFunctionConcurrency"": 500,
  ""CPUThreshold"": 0.8,
  ""SnapshotPersistenceEnabled"": true
}"
5/24/2024, 9:20:02.535 AM	"SingletonOptions
{
  ""LockPeriod"": ""00:00:15"",
  ""ListenerLockPeriod"": ""00:01:00"",
  ""LockAcquisitionTimeout"": ""10675199.02:48:05.4775807"",
  ""LockAcquisitionPollingInterval"": ""00:00:05"",
  ""ListenerLockRecoveryPollingInterval"": ""00:01:00""
}"
5/24/2024, 9:20:02.550 AM	"ScaleOptions
{
  ""ScaleMetricsMaxAge"": ""00:02:00"",
  ""ScaleMetricsSampleInterval"": ""00:00:10"",
  ""MetricsPurgeEnabled"": true,
  ""IsTargetScalingEnabled"": true,
  ""IsRuntimeScalingEnabled"": false
}"
5/24/2024, 9:20:02.551 AM	Starting JobHost
5/24/2024, 9:20:02.552 AM	Loading functions metadata
5/24/2024, 9:20:02.552 AM	Starting Host (HostId=049749cd-d7f8-4817-a36f-6d3713a6, InstanceId=99e0e332-9338-414b-9ec7-37cb3a2be210, Version=4.34.1.1, ProcessId=60, AppDomainId=1, InDebugMode=False, InDiagnosticMode=False, FunctionsExtensionVersion=~4)
5/24/2024, 9:20:02.553 AM	1 functions found (Custom)
5/24/2024, 9:20:02.553 AM	Reading functions metadata (Custom)
5/24/2024, 9:20:02.554 AM	Generating 0 job function(s)
5/24/2024, 9:20:02.554 AM	0 functions loaded
5/24/2024, 9:20:02.557 AM	No job functions found. Try making your job classes and methods public. If you're using binding extensions (e.g. Azure Storage, ServiceBus, Timers, etc.) make sure you've called the registration method for the extension(s) in your startup code (e.g. builder.AddAzureStorage(), builder.AddServiceBus(), builder.AddTimers(), etc.).
5/24/2024, 9:20:02.558 AM	"HttpOptions
{
  ""DynamicThrottlesEnabled"": false,
  ""EnableChunkedRequestBinding"": false,
  ""MaxConcurrentRequests"": -1,
  ""MaxOutstandingRequests"": -1,
  ""RoutePrefix"": ""api""
}"
5/24/2024, 9:20:02.559 AM	Host initialized (7ms)
5/24/2024, 9:20:02.559 AM	"Initializing function HTTP routes
No HTTP routes mapped"
5/24/2024, 9:20:02.560 AM	Job host started
5/24/2024, 9:20:02.560 AM	Host started (7ms)
5/24/2024, 9:20:07.531 AM	Host lock lease acquired by instance ID '000000000000000000000000ED651336'.

To Reproduce
Steps to reproduce the behavior:

  1. Deploy a SWA with a Python 3.10 streaming function. I used the below snippets (lightly modified from the MS announcement post) to reproduce the issue.

function_app.py

import time
import json
import azure.functions as func
from azurefunctions.extensions.http.fastapi import Request, StreamingResponse

app = func.FunctionApp(http_auth_level=func.AuthLevel.ANONYMOUS)

def generate_count():
    """Generate a stream of chronological numbers."""
    count = 0
    while count < 100:
        yield f'data: {json.dumps({"chat": count })}\n\n'
        count += 1
    yield f'data: {json.dumps({"type": "stream-end"})}\n\n'

@app.route(route="stream-chat-query", methods=[func.HttpMethod.GET])
async def stream_chat_query(req: Request) -> StreamingResponse:
    """Endpoint to stream of chronological numbers."""
    return StreamingResponse(generate_count(), media_type="text/event-stream")

requirements.txt

azure-functions
azurefunctions-extensions-http-fastapi

host.json

{
  "version": "2.0",
  "logging": {
    "applicationInsights": {
      "samplingSettings": {
        "isEnabled": true
      }
    }
  },
  "extensionBundle": {
    "id": "Microsoft.Azure.Functions.ExtensionBundle",
    "version": "[4.0.0, 5.0.0)"
  }
}

staticwebapp.config.json

{
  "routes": [
    {
      "route": "/login",
      "rewrite": "/.auth/login/aad"
    },
    {
      "route": "/.auth/login/aad",
      "allowedRoles": [
        "anonymous"
      ]
    },
    {
      "route": "/*",
      "allowedRoles": [
        "authenticated"
      ]
    }
  ],
  "responseOverrides": {
    "401": {
      "statusCode": 302,
      "redirect": "/login"
    }
  },
  "navigationFallback": {
    "rewrite": "index.html",
    "exclude": [
      "*.{png,jpg,gif,svg}",
      "*.css"
    ]
  },
  "auth": {
    "identityProviders": {
      "azureActiveDirectory": {
        "registration": {
          "openIdIssuer": "https://login.microsoftonline.com/######################/v2.0",
          "clientIdSettingName": "ENTRA_CLIENT_ID",
          "clientSecretSettingName": "ENTRA_CLIENT_SECRET"
        }
      }
    }
  },
  "platform": {
    "apiRuntime": "python:3.10"
  }
}

environment variables

[
  {
    "name": "PYTHON_ENABLE_INIT_INDEXING",
    "value": "1"
  },
  {
    "name": "PYTHON_ISOLATE_WORKER_DEPENDENCIES",
    "value": "1"
  }
]
  1. Trigger the function via front end, if successful wait for the functions to de-allocate and attempt again.

Expected behavior
Managed python functions with steaming should load consistently, similar to self-managed consumption functions.

Additional context

  • Issue was experienced on a standard SWA plan deployed to East Asia with distributed functions disabled.
  • Occasionally during a successful startup http 500 errors were returned to the client until the functions had fully initialized.
commented

No longer able to reproduce this issue - no code changes.