fix: Resolve cache timeout issues in CI/CD pipelines
- Add fail-on-cache-miss: false to all cache actions in workflows - Create improved runner configuration (config_cache_fixed.yaml) with: - Fixed cache host: host.docker.internal - Fixed cache port: 44029 - Host network mode for better container networking - Add cache troubleshooting scripts: - fix-cache-issues.sh (Linux/macOS) - fix-cache-issues.ps1 (Windows) - Update all workflows: api-gateway, frontend, service-adapters, api-docs, ci This resolves the 'connect ETIMEDOUT 172.31.0.3:44029' errors by: 1. Making cache failures non-fatal 2. Using proper Docker networking configuration 3. Providing tools to diagnose and fix cache issues
This commit is contained in:
167
runners/fix-cache-issues.sh
Normal file
167
runners/fix-cache-issues.sh
Normal file
@@ -0,0 +1,167 @@
|
||||
#!/bin/bash
|
||||
|
||||
# Cache Troubleshooting and Fix Script for LabFusion CI/CD
|
||||
# This script helps diagnose and fix common cache timeout issues
|
||||
|
||||
set -e
|
||||
|
||||
echo "🔧 LabFusion Cache Troubleshooting Script"
|
||||
echo "=========================================="
|
||||
|
||||
# Function to check if running in Docker
|
||||
check_docker() {
|
||||
if [ -f /.dockerenv ]; then
|
||||
echo "🐳 Running inside Docker container"
|
||||
return 0
|
||||
else
|
||||
echo "🖥️ Running on host system"
|
||||
return 1
|
||||
fi
|
||||
}
|
||||
|
||||
# Function to check cache service status
|
||||
check_cache_service() {
|
||||
echo "📊 Checking cache service status..."
|
||||
|
||||
# Check if cache service is running
|
||||
if pgrep -f "act_runner" > /dev/null; then
|
||||
echo "✅ act_runner process found"
|
||||
else
|
||||
echo "❌ act_runner process not found"
|
||||
return 1
|
||||
fi
|
||||
|
||||
# Check cache directory
|
||||
CACHE_DIR="${HOME}/.cache/actcache"
|
||||
if [ -d "$CACHE_DIR" ]; then
|
||||
echo "✅ Cache directory exists: $CACHE_DIR"
|
||||
echo " Size: $(du -sh "$CACHE_DIR" 2>/dev/null || echo "Unknown")"
|
||||
else
|
||||
echo "⚠️ Cache directory not found: $CACHE_DIR"
|
||||
echo " Creating cache directory..."
|
||||
mkdir -p "$CACHE_DIR"
|
||||
fi
|
||||
}
|
||||
|
||||
# Function to test network connectivity
|
||||
test_network() {
|
||||
echo "🌐 Testing network connectivity..."
|
||||
|
||||
# Test basic connectivity
|
||||
if ping -c 1 8.8.8.8 > /dev/null 2>&1; then
|
||||
echo "✅ Internet connectivity OK"
|
||||
else
|
||||
echo "❌ Internet connectivity failed"
|
||||
fi
|
||||
|
||||
# Test Docker daemon
|
||||
if docker info > /dev/null 2>&1; then
|
||||
echo "✅ Docker daemon accessible"
|
||||
else
|
||||
echo "❌ Docker daemon not accessible"
|
||||
fi
|
||||
}
|
||||
|
||||
# Function to fix common cache issues
|
||||
fix_cache_issues() {
|
||||
echo "🔧 Applying cache fixes..."
|
||||
|
||||
# Create cache directory with proper permissions
|
||||
CACHE_DIR="${HOME}/.cache/actcache"
|
||||
mkdir -p "$CACHE_DIR"
|
||||
chmod 755 "$CACHE_DIR"
|
||||
|
||||
# Set proper environment variables
|
||||
export ACTIONS_CACHE_URL="http://host.docker.internal:44029/"
|
||||
export ACTIONS_RUNTIME_URL="http://host.docker.internal:44029/"
|
||||
|
||||
echo "✅ Cache directory created and configured"
|
||||
echo "✅ Environment variables set"
|
||||
}
|
||||
|
||||
# Function to restart cache service
|
||||
restart_cache_service() {
|
||||
echo "🔄 Restarting cache service..."
|
||||
|
||||
# Stop existing runners
|
||||
pkill -f "act_runner" || true
|
||||
sleep 2
|
||||
|
||||
# Start with fixed configuration
|
||||
if [ -f "config_cache_fixed.yaml" ]; then
|
||||
echo "✅ Using fixed configuration"
|
||||
nohup ./act_runner daemon --config config_cache_fixed.yaml > runner.log 2>&1 &
|
||||
else
|
||||
echo "⚠️ Fixed configuration not found, using default"
|
||||
nohup ./act_runner daemon > runner.log 2>&1 &
|
||||
fi
|
||||
|
||||
sleep 5
|
||||
|
||||
if pgrep -f "act_runner" > /dev/null; then
|
||||
echo "✅ Cache service restarted successfully"
|
||||
else
|
||||
echo "❌ Failed to restart cache service"
|
||||
return 1
|
||||
fi
|
||||
}
|
||||
|
||||
# Function to test cache functionality
|
||||
test_cache() {
|
||||
echo "🧪 Testing cache functionality..."
|
||||
|
||||
# Create a test cache entry
|
||||
TEST_KEY="test-cache-$(date +%s)"
|
||||
TEST_VALUE="test-value-$(date +%s)"
|
||||
|
||||
echo " Creating test cache entry: $TEST_KEY"
|
||||
echo "$TEST_VALUE" > "/tmp/cache-test"
|
||||
|
||||
# Try to restore (this will fail but we can check the error)
|
||||
echo " Testing cache restore..."
|
||||
if curl -s "http://host.docker.internal:44029/cache/$TEST_KEY" > /dev/null 2>&1; then
|
||||
echo "✅ Cache service responding"
|
||||
else
|
||||
echo "❌ Cache service not responding"
|
||||
echo " This is expected if no cache entry exists"
|
||||
fi
|
||||
|
||||
# Clean up
|
||||
rm -f "/tmp/cache-test"
|
||||
}
|
||||
|
||||
# Main execution
|
||||
main() {
|
||||
echo "Starting cache troubleshooting..."
|
||||
echo ""
|
||||
|
||||
check_docker
|
||||
echo ""
|
||||
|
||||
check_cache_service
|
||||
echo ""
|
||||
|
||||
test_network
|
||||
echo ""
|
||||
|
||||
fix_cache_issues
|
||||
echo ""
|
||||
|
||||
restart_cache_service
|
||||
echo ""
|
||||
|
||||
test_cache
|
||||
echo ""
|
||||
|
||||
echo "🎉 Cache troubleshooting complete!"
|
||||
echo ""
|
||||
echo "Next steps:"
|
||||
echo "1. Check runner logs: tail -f runner.log"
|
||||
echo "2. Test a workflow to see if cache issues are resolved"
|
||||
echo "3. If issues persist, check Docker networking configuration"
|
||||
echo ""
|
||||
echo "For more help, see: https://gitea.com/gitea/act_runner/src/branch/main/docs/configuration.md"
|
||||
}
|
||||
|
||||
# Run main function
|
||||
main "$@"
|
||||
Reference in New Issue
Block a user