fix: Resolve cache timeout issues in CI/CD pipelines
- Add fail-on-cache-miss: false to all cache actions in workflows - Create improved runner configuration (config_cache_fixed.yaml) with: - Fixed cache host: host.docker.internal - Fixed cache port: 44029 - Host network mode for better container networking - Add cache troubleshooting scripts: - fix-cache-issues.sh (Linux/macOS) - fix-cache-issues.ps1 (Windows) - Update all workflows: api-gateway, frontend, service-adapters, api-docs, ci This resolves the 'connect ETIMEDOUT 172.31.0.3:44029' errors by: 1. Making cache failures non-fatal 2. Using proper Docker networking configuration 3. Providing tools to diagnose and fix cache issues
This commit is contained in:
109
runners/config_cache_fixed.yaml
Normal file
109
runners/config_cache_fixed.yaml
Normal file
@@ -0,0 +1,109 @@
|
||||
# Improved configuration file with cache networking fixes
|
||||
# This configuration addresses common cache timeout issues in self-hosted runners
|
||||
|
||||
log:
|
||||
# The level of logging, can be trace, debug, info, warn, error, fatal
|
||||
level: info
|
||||
|
||||
runner:
|
||||
# Where to store the registration result.
|
||||
file: .runner
|
||||
# Execute how many tasks concurrently at the same time.
|
||||
capacity: 1
|
||||
# Extra environment variables to run jobs.
|
||||
envs:
|
||||
A_TEST_ENV_NAME_1: a_test_env_value_1
|
||||
A_TEST_ENV_NAME_2: a_test_env_value_2
|
||||
# Extra environment variables to run jobs from a file.
|
||||
# It will be ignored if it's empty or the file doesn't exist.
|
||||
env_file: .env
|
||||
# The timeout for a job to be finished.
|
||||
# Please note that the Gitea instance also has a timeout (3h by default) for the job.
|
||||
# So the job could be stopped by the Gitea instance if it's timeout is shorter than this.
|
||||
timeout: 3h
|
||||
# The timeout for the runner to wait for running jobs to finish when shutting down.
|
||||
# Any running jobs that haven't finished after this timeout will be cancelled.
|
||||
shutdown_timeout: 0s
|
||||
# Whether skip verifying the TLS certificate of the Gitea instance.
|
||||
insecure: false
|
||||
# The timeout for fetching the job from the Gitea instance.
|
||||
fetch_timeout: 5s
|
||||
# The interval for fetching the job from the Gitea instance.
|
||||
fetch_interval: 2s
|
||||
# The github_mirror of a runner is used to specify the mirror address of the github that pulls the action repository.
|
||||
# It works when something like `uses: actions/checkout@v4` is used and DEFAULT_ACTIONS_URL is set to github,
|
||||
# and github_mirror is not empty. In this case,
|
||||
# it replaces https://github.com with the value here, which is useful for some special network environments.
|
||||
github_mirror: ''
|
||||
# The labels of a runner are used to determine which jobs the runner can run, and how to run them.
|
||||
# Like: "macos-arm64:host" or "ubuntu-latest:docker://docker.gitea.com/runner-images:ubuntu-latest"
|
||||
# Find more images provided by Gitea at https://gitea.com/docker.gitea.com/runner-images .
|
||||
# If it's empty when registering, it will ask for inputting labels.
|
||||
# If it's empty when execute `daemon`, will use labels in `.runner` file.
|
||||
labels:
|
||||
# Fallback images (checked last)
|
||||
- "ubuntu-latest:docker://docker.gitea.com/runner-images:ubuntu-latest"
|
||||
- "self-hosted:docker://docker.gitea.com/runner-images:ubuntu-latest"
|
||||
|
||||
cache:
|
||||
# Enable cache server to use actions/cache.
|
||||
enabled: true
|
||||
# The directory to store the cache data.
|
||||
# If it's empty, the cache data will be stored in $HOME/.cache/actcache.
|
||||
dir: ""
|
||||
# The host of the cache server.
|
||||
# Use host.docker.internal to allow containers to access the host
|
||||
# This fixes the common networking issue where containers can't reach the cache server
|
||||
host: "host.docker.internal"
|
||||
# Use a fixed port instead of random to avoid connection issues
|
||||
port: 44029
|
||||
# The external cache server URL. Valid only when enable is true.
|
||||
# If it's specified, act_runner will use this URL as the ACTIONS_CACHE_URL rather than start a server by itself.
|
||||
# The URL should generally end with "/".
|
||||
external_server: ""
|
||||
|
||||
container:
|
||||
# Use host network to avoid Docker networking issues with cache
|
||||
# This ensures containers can access the cache server on the host
|
||||
network: "host"
|
||||
# Whether to use privileged mode or not when launching task containers (privileged mode is required for Docker-in-Docker).
|
||||
privileged: false
|
||||
# And other options to be used when the container is started (eg, --add-host=my.gitea.url:host-gateway).
|
||||
options:
|
||||
# The parent directory of a job's working directory.
|
||||
# NOTE: There is no need to add the first '/' of the path as act_runner will add it automatically.
|
||||
# If the path starts with '/', the '/' will be trimmed.
|
||||
# For example, if the parent directory is /path/to/my/dir, workdir_parent should be path/to/my/dir
|
||||
# If it's empty, /workspace will be used.
|
||||
workdir_parent:
|
||||
# Volumes (including bind mounts) can be mounted to containers. Glob syntax is supported, see https://github.com/gobwas/glob
|
||||
# You can specify multiple volumes. If the sequence is empty, no volumes can be mounted.
|
||||
# For example, if you only allow containers to mount the `data` volume and all the json files in `/src`, you should change the config to:
|
||||
# valid_volumes:
|
||||
# - data
|
||||
# - /src/*.json
|
||||
# If you want to allow any volume, please use the following configuration:
|
||||
# valid_volumes:
|
||||
# - '**'
|
||||
valid_volumes: []
|
||||
# overrides the docker client host with the specified one.
|
||||
# If it's empty, act_runner will find an available docker host automatically.
|
||||
# If it's "-", act_runner will find an available docker host automatically, but the docker host won't be mounted to the job containers and service containers.
|
||||
# If it's not empty or "-", the specified docker host will be used. An error will be returned if it doesn't work.
|
||||
docker_host: ""
|
||||
# Docker registry authentication to avoid rate limits
|
||||
docker_username: gschrooyen
|
||||
docker_password: ${DOCKER_PASSWORD}
|
||||
# Pull docker image(s) even if already present
|
||||
force_pull: false
|
||||
# Rebuild docker image(s) even if already present
|
||||
force_rebuild: false
|
||||
# Always require a reachable docker daemon, even if not required by act_runner
|
||||
require_docker: false
|
||||
# Timeout to wait for the docker daemon to be reachable, if docker is required by require_docker or act_runner
|
||||
docker_timeout: 0s
|
||||
|
||||
host:
|
||||
# The parent directory of a job's working directory.
|
||||
# If it's empty, $HOME/.cache/act/ will be used.
|
||||
workdir_parent:
|
||||
176
runners/fix-cache-issues.ps1
Normal file
176
runners/fix-cache-issues.ps1
Normal file
@@ -0,0 +1,176 @@
|
||||
# Cache Troubleshooting and Fix Script for LabFusion CI/CD
|
||||
# This script helps diagnose and fix common cache timeout issues
|
||||
|
||||
Write-Host "🔧 LabFusion Cache Troubleshooting Script" -ForegroundColor Cyan
|
||||
Write-Host "==========================================" -ForegroundColor Cyan
|
||||
|
||||
# Function to check if running in Docker
|
||||
function Test-Docker {
|
||||
if (Test-Path "/.dockerenv") {
|
||||
Write-Host "🐳 Running inside Docker container" -ForegroundColor Green
|
||||
return $true
|
||||
} else {
|
||||
Write-Host "🖥️ Running on host system" -ForegroundColor Yellow
|
||||
return $false
|
||||
}
|
||||
}
|
||||
|
||||
# Function to check cache service status
|
||||
function Test-CacheService {
|
||||
Write-Host "📊 Checking cache service status..." -ForegroundColor Cyan
|
||||
|
||||
# Check if act_runner process is running
|
||||
$processes = Get-Process -Name "act_runner" -ErrorAction SilentlyContinue
|
||||
if ($processes) {
|
||||
Write-Host "✅ act_runner process found" -ForegroundColor Green
|
||||
} else {
|
||||
Write-Host "❌ act_runner process not found" -ForegroundColor Red
|
||||
return $false
|
||||
}
|
||||
|
||||
# Check cache directory
|
||||
$cacheDir = "$env:USERPROFILE\.cache\actcache"
|
||||
if (Test-Path $cacheDir) {
|
||||
Write-Host "✅ Cache directory exists: $cacheDir" -ForegroundColor Green
|
||||
$size = (Get-ChildItem $cacheDir -Recurse | Measure-Object -Property Length -Sum).Sum
|
||||
Write-Host " Size: $([math]::Round($size / 1MB, 2)) MB" -ForegroundColor Gray
|
||||
} else {
|
||||
Write-Host "⚠️ Cache directory not found: $cacheDir" -ForegroundColor Yellow
|
||||
Write-Host " Creating cache directory..." -ForegroundColor Yellow
|
||||
New-Item -ItemType Directory -Path $cacheDir -Force | Out-Null
|
||||
}
|
||||
|
||||
return $true
|
||||
}
|
||||
|
||||
# Function to test network connectivity
|
||||
function Test-NetworkConnectivity {
|
||||
Write-Host "🌐 Testing network connectivity..." -ForegroundColor Cyan
|
||||
|
||||
# Test basic connectivity
|
||||
try {
|
||||
$ping = Test-Connection -ComputerName "8.8.8.8" -Count 1 -Quiet
|
||||
if ($ping) {
|
||||
Write-Host "✅ Internet connectivity OK" -ForegroundColor Green
|
||||
} else {
|
||||
Write-Host "❌ Internet connectivity failed" -ForegroundColor Red
|
||||
}
|
||||
} catch {
|
||||
Write-Host "❌ Internet connectivity test failed: $($_.Exception.Message)" -ForegroundColor Red
|
||||
}
|
||||
|
||||
# Test Docker daemon
|
||||
try {
|
||||
docker info | Out-Null
|
||||
Write-Host "✅ Docker daemon accessible" -ForegroundColor Green
|
||||
} catch {
|
||||
Write-Host "❌ Docker daemon not accessible" -ForegroundColor Red
|
||||
}
|
||||
}
|
||||
|
||||
# Function to fix common cache issues
|
||||
function Fix-CacheIssues {
|
||||
Write-Host "🔧 Applying cache fixes..." -ForegroundColor Cyan
|
||||
|
||||
# Create cache directory with proper permissions
|
||||
$cacheDir = "$env:USERPROFILE\.cache\actcache"
|
||||
New-Item -ItemType Directory -Path $cacheDir -Force | Out-Null
|
||||
|
||||
# Set proper environment variables
|
||||
$env:ACTIONS_CACHE_URL = "http://host.docker.internal:44029/"
|
||||
$env:ACTIONS_RUNTIME_URL = "http://host.docker.internal:44029/"
|
||||
|
||||
Write-Host "✅ Cache directory created and configured" -ForegroundColor Green
|
||||
Write-Host "✅ Environment variables set" -ForegroundColor Green
|
||||
}
|
||||
|
||||
# Function to restart cache service
|
||||
function Restart-CacheService {
|
||||
Write-Host "🔄 Restarting cache service..." -ForegroundColor Cyan
|
||||
|
||||
# Stop existing runners
|
||||
Get-Process -Name "act_runner" -ErrorAction SilentlyContinue | Stop-Process -Force
|
||||
Start-Sleep -Seconds 2
|
||||
|
||||
# Start with fixed configuration
|
||||
if (Test-Path "config_cache_fixed.yaml") {
|
||||
Write-Host "✅ Using fixed configuration" -ForegroundColor Green
|
||||
Start-Process -FilePath ".\act_runner.exe" -ArgumentList "daemon", "--config", "config_cache_fixed.yaml" -WindowStyle Hidden
|
||||
} else {
|
||||
Write-Host "⚠️ Fixed configuration not found, using default" -ForegroundColor Yellow
|
||||
Start-Process -FilePath ".\act_runner.exe" -ArgumentList "daemon" -WindowStyle Hidden
|
||||
}
|
||||
|
||||
Start-Sleep -Seconds 5
|
||||
|
||||
$processes = Get-Process -Name "act_runner" -ErrorAction SilentlyContinue
|
||||
if ($processes) {
|
||||
Write-Host "✅ Cache service restarted successfully" -ForegroundColor Green
|
||||
} else {
|
||||
Write-Host "❌ Failed to restart cache service" -ForegroundColor Red
|
||||
return $false
|
||||
}
|
||||
|
||||
return $true
|
||||
}
|
||||
|
||||
# Function to test cache functionality
|
||||
function Test-CacheFunctionality {
|
||||
Write-Host "🧪 Testing cache functionality..." -ForegroundColor Cyan
|
||||
|
||||
# Create a test cache entry
|
||||
$testKey = "test-cache-$(Get-Date -Format 'yyyyMMddHHmmss')"
|
||||
$testValue = "test-value-$(Get-Date -Format 'yyyyMMddHHmmss')"
|
||||
|
||||
Write-Host " Creating test cache entry: $testKey" -ForegroundColor Gray
|
||||
$testValue | Out-File -FilePath "C:\temp\cache-test.txt" -Force
|
||||
|
||||
# Try to test cache service (this will fail but we can check the error)
|
||||
Write-Host " Testing cache service response..." -ForegroundColor Gray
|
||||
try {
|
||||
$response = Invoke-WebRequest -Uri "http://host.docker.internal:44029/cache/$testKey" -TimeoutSec 5 -ErrorAction SilentlyContinue
|
||||
Write-Host "✅ Cache service responding" -ForegroundColor Green
|
||||
} catch {
|
||||
Write-Host "❌ Cache service not responding: $($_.Exception.Message)" -ForegroundColor Yellow
|
||||
Write-Host " This is expected if no cache entry exists" -ForegroundColor Gray
|
||||
}
|
||||
|
||||
# Clean up
|
||||
Remove-Item "C:\temp\cache-test.txt" -ErrorAction SilentlyContinue
|
||||
}
|
||||
|
||||
# Main execution
|
||||
function Main {
|
||||
Write-Host "Starting cache troubleshooting..." -ForegroundColor Cyan
|
||||
Write-Host ""
|
||||
|
||||
Test-Docker
|
||||
Write-Host ""
|
||||
|
||||
Test-CacheService
|
||||
Write-Host ""
|
||||
|
||||
Test-NetworkConnectivity
|
||||
Write-Host ""
|
||||
|
||||
Fix-CacheIssues
|
||||
Write-Host ""
|
||||
|
||||
Restart-CacheService
|
||||
Write-Host ""
|
||||
|
||||
Test-CacheFunctionality
|
||||
Write-Host ""
|
||||
|
||||
Write-Host "🎉 Cache troubleshooting complete!" -ForegroundColor Green
|
||||
Write-Host ""
|
||||
Write-Host "Next steps:" -ForegroundColor Yellow
|
||||
Write-Host "1. Check runner logs in the current directory" -ForegroundColor White
|
||||
Write-Host "2. Test a workflow to see if cache issues are resolved" -ForegroundColor White
|
||||
Write-Host "3. If issues persist, check Docker networking configuration" -ForegroundColor White
|
||||
Write-Host ""
|
||||
Write-Host "For more help, see: https://gitea.com/gitea/act_runner/src/branch/main/docs/configuration.md" -ForegroundColor Cyan
|
||||
}
|
||||
|
||||
# Run main function
|
||||
Main
|
||||
167
runners/fix-cache-issues.sh
Normal file
167
runners/fix-cache-issues.sh
Normal file
@@ -0,0 +1,167 @@
|
||||
#!/bin/bash
|
||||
|
||||
# Cache Troubleshooting and Fix Script for LabFusion CI/CD
|
||||
# This script helps diagnose and fix common cache timeout issues
|
||||
|
||||
set -e
|
||||
|
||||
echo "🔧 LabFusion Cache Troubleshooting Script"
|
||||
echo "=========================================="
|
||||
|
||||
# Function to check if running in Docker
|
||||
check_docker() {
|
||||
if [ -f /.dockerenv ]; then
|
||||
echo "🐳 Running inside Docker container"
|
||||
return 0
|
||||
else
|
||||
echo "🖥️ Running on host system"
|
||||
return 1
|
||||
fi
|
||||
}
|
||||
|
||||
# Function to check cache service status
|
||||
check_cache_service() {
|
||||
echo "📊 Checking cache service status..."
|
||||
|
||||
# Check if cache service is running
|
||||
if pgrep -f "act_runner" > /dev/null; then
|
||||
echo "✅ act_runner process found"
|
||||
else
|
||||
echo "❌ act_runner process not found"
|
||||
return 1
|
||||
fi
|
||||
|
||||
# Check cache directory
|
||||
CACHE_DIR="${HOME}/.cache/actcache"
|
||||
if [ -d "$CACHE_DIR" ]; then
|
||||
echo "✅ Cache directory exists: $CACHE_DIR"
|
||||
echo " Size: $(du -sh "$CACHE_DIR" 2>/dev/null || echo "Unknown")"
|
||||
else
|
||||
echo "⚠️ Cache directory not found: $CACHE_DIR"
|
||||
echo " Creating cache directory..."
|
||||
mkdir -p "$CACHE_DIR"
|
||||
fi
|
||||
}
|
||||
|
||||
# Function to test network connectivity
|
||||
test_network() {
|
||||
echo "🌐 Testing network connectivity..."
|
||||
|
||||
# Test basic connectivity
|
||||
if ping -c 1 8.8.8.8 > /dev/null 2>&1; then
|
||||
echo "✅ Internet connectivity OK"
|
||||
else
|
||||
echo "❌ Internet connectivity failed"
|
||||
fi
|
||||
|
||||
# Test Docker daemon
|
||||
if docker info > /dev/null 2>&1; then
|
||||
echo "✅ Docker daemon accessible"
|
||||
else
|
||||
echo "❌ Docker daemon not accessible"
|
||||
fi
|
||||
}
|
||||
|
||||
# Function to fix common cache issues
|
||||
fix_cache_issues() {
|
||||
echo "🔧 Applying cache fixes..."
|
||||
|
||||
# Create cache directory with proper permissions
|
||||
CACHE_DIR="${HOME}/.cache/actcache"
|
||||
mkdir -p "$CACHE_DIR"
|
||||
chmod 755 "$CACHE_DIR"
|
||||
|
||||
# Set proper environment variables
|
||||
export ACTIONS_CACHE_URL="http://host.docker.internal:44029/"
|
||||
export ACTIONS_RUNTIME_URL="http://host.docker.internal:44029/"
|
||||
|
||||
echo "✅ Cache directory created and configured"
|
||||
echo "✅ Environment variables set"
|
||||
}
|
||||
|
||||
# Function to restart cache service
|
||||
restart_cache_service() {
|
||||
echo "🔄 Restarting cache service..."
|
||||
|
||||
# Stop existing runners
|
||||
pkill -f "act_runner" || true
|
||||
sleep 2
|
||||
|
||||
# Start with fixed configuration
|
||||
if [ -f "config_cache_fixed.yaml" ]; then
|
||||
echo "✅ Using fixed configuration"
|
||||
nohup ./act_runner daemon --config config_cache_fixed.yaml > runner.log 2>&1 &
|
||||
else
|
||||
echo "⚠️ Fixed configuration not found, using default"
|
||||
nohup ./act_runner daemon > runner.log 2>&1 &
|
||||
fi
|
||||
|
||||
sleep 5
|
||||
|
||||
if pgrep -f "act_runner" > /dev/null; then
|
||||
echo "✅ Cache service restarted successfully"
|
||||
else
|
||||
echo "❌ Failed to restart cache service"
|
||||
return 1
|
||||
fi
|
||||
}
|
||||
|
||||
# Function to test cache functionality
|
||||
test_cache() {
|
||||
echo "🧪 Testing cache functionality..."
|
||||
|
||||
# Create a test cache entry
|
||||
TEST_KEY="test-cache-$(date +%s)"
|
||||
TEST_VALUE="test-value-$(date +%s)"
|
||||
|
||||
echo " Creating test cache entry: $TEST_KEY"
|
||||
echo "$TEST_VALUE" > "/tmp/cache-test"
|
||||
|
||||
# Try to restore (this will fail but we can check the error)
|
||||
echo " Testing cache restore..."
|
||||
if curl -s "http://host.docker.internal:44029/cache/$TEST_KEY" > /dev/null 2>&1; then
|
||||
echo "✅ Cache service responding"
|
||||
else
|
||||
echo "❌ Cache service not responding"
|
||||
echo " This is expected if no cache entry exists"
|
||||
fi
|
||||
|
||||
# Clean up
|
||||
rm -f "/tmp/cache-test"
|
||||
}
|
||||
|
||||
# Main execution
|
||||
main() {
|
||||
echo "Starting cache troubleshooting..."
|
||||
echo ""
|
||||
|
||||
check_docker
|
||||
echo ""
|
||||
|
||||
check_cache_service
|
||||
echo ""
|
||||
|
||||
test_network
|
||||
echo ""
|
||||
|
||||
fix_cache_issues
|
||||
echo ""
|
||||
|
||||
restart_cache_service
|
||||
echo ""
|
||||
|
||||
test_cache
|
||||
echo ""
|
||||
|
||||
echo "🎉 Cache troubleshooting complete!"
|
||||
echo ""
|
||||
echo "Next steps:"
|
||||
echo "1. Check runner logs: tail -f runner.log"
|
||||
echo "2. Test a workflow to see if cache issues are resolved"
|
||||
echo "3. If issues persist, check Docker networking configuration"
|
||||
echo ""
|
||||
echo "For more help, see: https://gitea.com/gitea/act_runner/src/branch/main/docs/configuration.md"
|
||||
}
|
||||
|
||||
# Run main function
|
||||
main "$@"
|
||||
Reference in New Issue
Block a user