3 săptămâni în urmă · ab7e3b1d3a
--- a/app.py
+++ b/app.py
@@ -128,7 +128,7 @@ with tab_chat:
 
															         with st.spinner("Analyzing locally..."):
														
 
															             try:
														
 
															-                response = ollama.chat(model='llama3', messages=[
														
 
															+                response = ollama.chat(model='mistral', messages=[
														
 
															                     {'role': 'system', 'content': sys_prompt},
														
 
															                     {'role': 'user', 'content': prompt}
														
 
															                 ])
														
--- a/check_projects.py
+++ b/check_projects.py
@@ -0,0 +1,15 @@
 
															+import requests
														
 
															+import urllib3
														
 
															+urllib3.disable_warnings()
														
 
															+
														
 
															+auth = requests.post(
														
 
															+    'https://192.168.130.161/taiga/api/v1/auth', 
														
 
															+    json={'type': 'normal', 'username': 'FrancoisLange', 'password': 'BTSai123'}, 
														
 
															+    verify=False
														
 
															+).json()
														
 
															+
														
 
															+headers = {'Authorization': f'Bearer {auth["auth_token"]}'}
														
 
															+projs = requests.get('https://192.168.130.161/taiga/api/v1/projects', headers=headers, verify=False).json()
														
 
															+print("Projects:")
														
 
															+for p in projs:
														
 
															+    print(f"ID: {p['id']}, Name: {p['name']}, Slug: {p['slug']}")
														
--- a/ingest_csv.py
+++ b/ingest_csv.py
@@ -25,42 +25,62 @@ def ingest_file(filename, engine):
 
															         print(f"File {filename} not found locally.")
														
 
															         return False
														
 
															-    print(f"\n🚀 Found {filename}! Starting ingestion via SQLAlchemy pipeline...")
														
 
															-    
														
 
															-    expected_columns = [
														
 
															-        "code", "url", "creator", "created_t", "created_datetime", "last_modified_t", 
														
 
															-        "last_modified_datetime", "product_name", "generic_name", "quantity", "packaging", 
														
 
															-        "brands", "categories", "origins", "labels", "stores", "countries", "ingredients_text", 
														
 
															-        "allergens", "traces"
														
 
															-    ]
														
 
															+    print(f"\n🚀 Found {filename}! Starting extreme batch ingestion...")
														
 
															     chunk_size = 5000 
														
 
															     total_processed = 0
														
 
															-    for chunk in pd.read_csv(filename, sep='\t', dtype=str, chunksize=chunk_size, on_bad_lines='skip'):
														
 
															-        # Filter explicitly to schema
														
 
															-        available_cols = [col for col in expected_columns if col in chunk.columns]
														
 
															-        df = chunk[available_cols]
														
 
															-        
														
 
															-        # Pandas to_sql safely transforms NaNs to SQL NULLs internally
														
 
															+    # Read dynamically without filtering. Setting low_memory=False to let pandas parse column types flexibly
														
 
															+    for chunk in pd.read_csv(filename, sep='\t', dtype=str, chunksize=chunk_size, on_bad_lines='skip', low_memory=False):
														
 
															         try:
														
 
															-            # We use 'append' because the products table already exists with primary keys
														
 
															-            # To handle duplicate 'code' primary keys effortlessly, we drop duplicates from the dataframe before insert
														
 
															-            # Or depend on PyMySQL. But pandas natively crashes on dupes unless managed. 
														
 
															-            df = df.drop_duplicates(subset=['code'])
														
 
															+            # Drop duplicates by code natively
														
 
															+            if 'code' in chunk.columns:
														
 
															+                df = chunk.drop_duplicates(subset=['code'])
														
 
															+            else:
														
 
															+                df = chunk
														
 
															+                
														
 
															             df.to_sql('products', con=engine, if_exists='append', index=False)
														
 
															             total_processed += len(df)
														
 
															-            print(f"   Successfully appended {total_processed} rows...")
														
 
															+            print(f"   Successfully appended {total_processed} rows (Dynamic schema)...", end="\r")
														
 
															         except BaseException as e:
														
 
															-            # If a strict primary key duplicate existed in DB already from a previous chunk, ignore row crashes
														
 
															             if "Duplicate entry" in str(e):
														
 
															                 pass
														
 
															             else:
														
 
															-                 print(f"   [Warning] Chunk skipped due to internal structural error: {e}")
														
 
															+                 print(f"\n   [Warning] Chunk skipped due to internal structural error: {e}")
														
 
															-    print(f"✅ Finished importing {filename}.")
														
 
															+    print(f"\n✅ Finished importing {filename}.")
														
 
															     return True
														
 
															+def create_indexes(engine):
														
 
															+    print("\n🛠️ Creating performance indexes on newly generated table...")
														
 
															+    # B-TREE and FULLTEXT INDEXES created post-ingestion for extreme speed
														
 
															+    try:
														
 
															+        with engine.begin() as connection:
														
 
															+            print("  Building Primary Key on `code`...")
														
 
															+            # We must make `code` the primary key if pandas just made it a TEXT field
														
 
															+            # But MySQL cannot have a TEXT field as PRIMARY KEY without a length constraint.
														
 
															+            # Convert code to VARCHAR(50) first.
														
 
															+            connection.execute(urllib.parse.unquote("ALTER TABLE products MODIFY code VARCHAR(50);"))
														
 
															+            connection.execute(urllib.parse.unquote("ALTER TABLE products ADD PRIMARY KEY (code);"))
														
 
															+
														
 
															+            print("  Building Fulltext Indexes...")
														
 
															+            connection.execute(urllib.parse.unquote("CREATE FULLTEXT INDEX ft_idx_search ON products(product_name, ingredients_text, brands);"))
														
 
															+            
														
 
															+            print("  Building B-TREE Indexes on core macros...")
														
 
															+            # We attempt to index key macros if they exist
														
 
															+            macro_cols = ['energy-kcal_100g', 'fat_100g', 'carbohydrates_100g', 'proteins_100g']
														
 
															+            for col in macro_cols:
														
 
															+                # Convert TEXT to DOUBLE for numerical indexing and querying
														
 
															+                # We catch errors if the column doesn't exist to be safe
														
 
															+                try:
														
 
															+                    connection.execute(urllib.parse.unquote(f"ALTER TABLE products MODIFY `{col}` DOUBLE;"))
														
 
															+                    connection.execute(urllib.parse.unquote(f"CREATE INDEX idx_{col.replace('-', '_')} ON products(`{col}`);"))
														
 
															+                except:
														
 
															+                    pass
														
 
															+        print("✅ Indexing Complete!")
														
 
															+    except Exception as e:
														
 
															+        print(f"❌ Indexing encountered an issue: {e}")
														
 
															+
														
 
															 if __name__ == "__main__":
														
 
															     print("Initiating OpenFoodFacts CSV Ingestion Process...")
														
 
															     engine = get_loader_engine()
														
@@ -71,3 +91,7 @@ if __name__ == "__main__":
 
															     if not processed_en and not processed_fr:
														
 
															         print("\n❌ Could not find either 'en.openfoodfacts.org.products.csv' or 'fr.openfoodfacts.org.products.csv'.")
														
 
															         print("Please download them directly into the root folder and run this script again.")
														
 
															+    else:
														
 
															+        # Build indexes now that all data is appended!
														
 
															+        create_indexes(engine)
														
 
															+        print("\n🎉 Full database reload and indexing complete! Ready for AI RAG.")
														
--- a/setup_db.py
+++ b/setup_db.py
@@ -70,22 +70,17 @@ def run_db_setup():
 
															         created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP
														
 
															     ) ENGINE=InnoDB;
														
 
															     """)
														
 
															-    # 2. Products Table
														
 
															-    cursor.execute("""
														
 
															-    CREATE TABLE IF NOT EXISTS food_db.products (
														
 
															-        code VARCHAR(50) PRIMARY KEY, url TEXT, creator VARCHAR(255), created_t VARCHAR(50), 
														
 
															-        created_datetime VARCHAR(50), last_modified_t VARCHAR(50), last_modified_datetime VARCHAR(50), 
														
 
															-        product_name TEXT, generic_name TEXT, quantity VARCHAR(255), packaging TEXT, brands TEXT, 
														
 
															-        categories TEXT, origins TEXT, labels TEXT, stores TEXT, countries TEXT, ingredients_text TEXT, 
														
 
															-        allergens TEXT, traces TEXT, 
														
 
															-        FULLTEXT INDEX ft_idx_search (product_name, ingredients_text)
														
 
															-    ) ENGINE=InnoDB;
														
 
															-    """)
														
 
															+    # 2. Products Table (Dynamic Drop)
														
 
															+    # We drop the strict schema completely. `ingest_csv.py` will use pandas to automatically 
														
 
															+    # generate the table with 100% of the CSV columns dynamically defined as TEXT fields.
														
 
															+    cursor.execute("DROP TABLE IF EXISTS food_db.products;")
														
 
															     # Table Context Grants (SoD)
														
 
															     cursor.execute("GRANT SELECT, INSERT, UPDATE ON food_db.users TO 'db_app_auth'@'%';")
														
 
															-    cursor.execute("GRANT SELECT ON food_db.products TO 'db_reader'@'%';")
														
 
															-    cursor.execute("GRANT SELECT, INSERT, UPDATE, DELETE, DROP, CREATE ON food_db.products TO 'db_loader'@'%';")
														
 
															+    # Note: Reader/Loader grants on products table will be handled or applied at the database level
														
 
															+    # since the table won't exist until pandas creates it. Granting at db-level for these specific users.
														
 
															+    cursor.execute("GRANT SELECT ON food_db.* TO 'db_reader'@'%';")
														
 
															+    cursor.execute("GRANT SELECT, INSERT, UPDATE, DELETE, DROP, CREATE, ALTER, INDEX ON food_db.* TO 'db_loader'@'%';")
														
 
															     cursor.execute("FLUSH PRIVILEGES;")
														
 
															     print("\n✅ Database, Users, and Tables created successfully!")
														
--- a/setup_mail_forwarding.sh
+++ b/setup_mail_forwarding.sh
@@ -0,0 +1,26 @@
 
															+#!/bin/bash
														
 
															+# run this as root/sudo on the Ubuntu VM
														
 
															+
														
 
															+echo "Setting up centralized mail forwarding to lanfr144@gmail.com..."
														
 
															+
														
 
															+# 1. Update the skeleton directory so all NEW users created automatically forward mail
														
 
															+echo "lanfr144@gmail.com" | sudo tee /etc/skel/.forward
														
 
															+sudo chmod 644 /etc/skel/.forward
														
 
															+
														
 
															+# 2. Add forwarding to all dynamically created home directories
														
 
															+for user_dir in /home/*; do
														
 
															+  if [ -d "$user_dir" ]; then
														
 
															+    user_name=$(basename "$user_dir")
														
 
															+    echo "lanfr144@gmail.com" | sudo tee "$user_dir/.forward"
														
 
															+    sudo chown "$user_name":"$user_name" "$user_dir/.forward"
														
 
															+    sudo chmod 644 "$user_dir/.forward"
														
 
															+    echo "Configured for user: $user_name"
														
 
															+  fi
														
 
															+done
														
 
															+
														
 
															+# 3. Add forwarding for root manually
														
 
															+echo "lanfr144@gmail.com" | sudo tee /root/.forward
														
 
															+sudo chmod 644 /root/.forward
														
 
															+echo "Configured for root."
														
 
															+
														
 
															+echo "✅ All system mail will now forward to lanfr144@gmail.com"
														
--- a/start_batch_ingest.sh
+++ b/start_batch_ingest.sh
@@ -0,0 +1,30 @@
 
															+#!/bin/bash
														
 
															+# Local Food AI - Disconnected Ingestion Wrapper
														
 
															+# This script uses nohup to run the python ingestion script in the background.
														
 
															+# You can exit your SSH session safely after starting this script.
														
 
															+
														
 
															+echo "========================================================="
														
 
															+echo "🍔 Local Food AI: Extreme Batch Ingestion"
														
 
															+echo "========================================================="
														
 
															+
														
 
															+if [ ! -f "en.openfoodfacts.org.products.csv" ] && [ ! -f "fr.openfoodfacts.org.products.csv" ]; then
														
 
															+    echo "❌ Error: CSV files not found in the current directory."
														
 
															+    echo "Please download the massive CSVs before running this batch."
														
 
															+    exit 1
														
 
															+fi
														
 
															+
														
 
															+echo "🚀 Starting database wipe and reset..."
														
 
															+# Automatically run the new DB setup to drop the rigid table
														
 
															+python3 setup_db.py
														
 
															+
														
 
															+echo "🚀 Triggering background ingestion process via nohup..."
														
 
															+echo "All outputs will be saved to ingestion_process.log"
														
 
															+
														
 
															+# Run securely in background
														
 
															+nohup python3 -u ingest_csv.py > ingestion_process.log 2>&1 &
														
 
															+BG_PID=$!
														
 
															+
														
 
															+echo "✅ Process started in the background (PID: $BG_PID)"
														
 
															+echo "You can now safely close your terminal or turn off your computer."
														
 
															+echo "To monitor progress from the server later, run:"
														
 
															+echo "   tail -f ingestion_process.log"