Prechádzať zdrojové kódy

feat: add data freshness and database backup disaster recovery scripts

lanfr144 1 týždeň pred
rodič
commit
d1297aa1ab
2 zmenil súbory, kde vykonal 83 pridanie a 0 odobranie
  1. 30 0
      backup_db.sh
  2. 53 0
      data_sync.sh

+ 30 - 0
backup_db.sh

@@ -0,0 +1,30 @@
+#!/bin/bash
+# backup_db.sh - Automated Disaster Recovery Backup Script
+
+BACKUP_DIR="./backups"
+DB_CONTAINER="food_project-mysql-1"
+DB_NAME="food_db"
+RETENTION_DAYS=7
+DATE=$(date +"%Y%m%d_%H%M%S")
+BACKUP_FILE="$BACKUP_DIR/food_db_$DATE.sql.gz"
+
+echo "Starting Database Backup for $DB_NAME..."
+
+# Ensure backup directory exists
+mkdir -p "$BACKUP_DIR"
+
+# Execute mysqldump inside the container and pipe to gzip
+sudo docker exec $DB_CONTAINER mysqldump -u root -proot_pass $DB_NAME | gzip > "$BACKUP_FILE"
+
+if [ $? -eq 0 ]; then
+    echo "Backup successfully saved to $BACKUP_FILE"
+else
+    echo "Error: Database backup failed!"
+    exit 1
+fi
+
+# Apply retention policy
+echo "Applying retention policy: keeping backups for $RETENTION_DAYS days..."
+find "$BACKUP_DIR" -name "food_db_*.sql.gz" -type f -mtime +$RETENTION_DAYS -exec rm {} \;
+
+echo "Backup process completed."

+ 53 - 0
data_sync.sh

@@ -0,0 +1,53 @@
+#!/bin/bash
+# data_sync.sh - Automated Data Freshness Pipeline
+
+ONLINE_MODE=0
+DATA_DIR="./data"
+INGEST_FILE="en.openfoodfacts.org.products.csv"
+URL="https://static.openfoodfacts.org/data/en.openfoodfacts.org.products.csv"
+
+# Parse arguments
+while [[ "$#" -gt 0 ]]; do
+    case $1 in
+        --online) ONLINE_MODE=1 ;;
+        *) echo "Unknown parameter: $1"; exit 1 ;;
+    esac
+    shift
+done
+
+echo "Starting Data Freshness Sync..."
+mkdir -p "$DATA_DIR"
+
+if [ "$ONLINE_MODE" -eq 1 ]; then
+    echo "Online mode enabled. Checking for latest OpenFoodFacts database..."
+    if ping -c 1 google.com &> /dev/null; then
+        echo "Internet connection verified. Downloading latest dataset..."
+        # Use -N to only download if newer than local file
+        wget -N -P "$DATA_DIR" "$URL"
+        if [ $? -eq 0 ]; then
+            echo "Download complete."
+        else
+            echo "Failed to download dataset."
+        fi
+    else
+        echo "No internet access detected. Falling back to offline mode."
+    fi
+else
+    echo "Offline mode. Checking $DATA_DIR for manually dropped files..."
+fi
+
+# Check if file exists to trigger ingestion
+if [ -f "$DATA_DIR/$INGEST_FILE" ]; then
+    # We should only ingest if the file is new or modified. 
+    # For simplicity, we just trigger ingestion if the file exists.
+    # Ingest script handles DROP TABLE if needed, but wait: ingest_csv appends by default or we can modify it.
+    echo "Found dataset: $DATA_DIR/$INGEST_FILE"
+    echo "Triggering ingestion pipeline via Docker Compose..."
+    sudo docker-compose run --rm ingest ./ingest_csv.py "data/$INGEST_FILE"
+    
+    # After successful ingestion, move or rename to prevent infinite loops on offline cron
+    mv "$DATA_DIR/$INGEST_FILE" "$DATA_DIR/$INGEST_FILE.processed"
+    echo "Ingestion complete and file archived."
+else
+    echo "No dataset found in $DATA_DIR. Nothing to ingest."
+fi