há 3 semanas atrás · ab7e3b1d3a
--- a/app.py
+++ b/app.py
@@ -128,7 +128,7 @@ with tab_chat:
 
				         
			
 
				         with st.spinner("Analyzing locally..."):
			
 
				             try:
			
 
				-                response = ollama.chat(model='llama3', messages=[
			
 
				+                response = ollama.chat(model='mistral', messages=[
			
 
				                     {'role': 'system', 'content': sys_prompt},
			
 
				                     {'role': 'user', 'content': prompt}
			
 
				                 ])
			
--- a/check_projects.py
+++ b/check_projects.py
@@ -0,0 +1,15 @@
 
				+import requests
			
 
				+import urllib3
			
 
				+urllib3.disable_warnings()
			
 
				+
			
 
				+auth = requests.post(
			
 
				+    'https://192.168.130.161/taiga/api/v1/auth', 
			
 
				+    json={'type': 'normal', 'username': 'FrancoisLange', 'password': 'BTSai123'}, 
			
 
				+    verify=False
			
 
				+).json()
			
 
				+
			
 
				+headers = {'Authorization': f'Bearer {auth["auth_token"]}'}
			
 
				+projs = requests.get('https://192.168.130.161/taiga/api/v1/projects', headers=headers, verify=False).json()
			
 
				+print("Projects:")
			
 
				+for p in projs:
			
 
				+    print(f"ID: {p['id']}, Name: {p['name']}, Slug: {p['slug']}")
			
--- a/ingest_csv.py
+++ b/ingest_csv.py
@@ -25,42 +25,62 @@ def ingest_file(filename, engine):
 
				         print(f"File {filename} not found locally.")
			
 
				         return False
			
 
				         
			
 
				-    print(f"\n🚀 Found {filename}! Starting ingestion via SQLAlchemy pipeline...")
			
 
				-    
			
 
				-    expected_columns = [
			
 
				-        "code", "url", "creator", "created_t", "created_datetime", "last_modified_t", 
			
 
				-        "last_modified_datetime", "product_name", "generic_name", "quantity", "packaging", 
			
 
				-        "brands", "categories", "origins", "labels", "stores", "countries", "ingredients_text", 
			
 
				-        "allergens", "traces"
			
 
				-    ]
			
 
				+    print(f"\n🚀 Found {filename}! Starting extreme batch ingestion...")
			
 
				     
			
 
				     chunk_size = 5000 
			
 
				     total_processed = 0
			
 
				 
			
 
				-    for chunk in pd.read_csv(filename, sep='\t', dtype=str, chunksize=chunk_size, on_bad_lines='skip'):
			
 
				-        # Filter explicitly to schema
			
 
				-        available_cols = [col for col in expected_columns if col in chunk.columns]
			
 
				-        df = chunk[available_cols]
			
 
				-        
			
 
				-        # Pandas to_sql safely transforms NaNs to SQL NULLs internally
			
 
				+    # Read dynamically without filtering. Setting low_memory=False to let pandas parse column types flexibly
			
 
				+    for chunk in pd.read_csv(filename, sep='\t', dtype=str, chunksize=chunk_size, on_bad_lines='skip', low_memory=False):
			
 
				         try:
			
 
				-            # We use 'append' because the products table already exists with primary keys
			
 
				-            # To handle duplicate 'code' primary keys effortlessly, we drop duplicates from the dataframe before insert
			
 
				-            # Or depend on PyMySQL. But pandas natively crashes on dupes unless managed. 
			
 
				-            df = df.drop_duplicates(subset=['code'])
			
 
				+            # Drop duplicates by code natively
			
 
				+            if 'code' in chunk.columns:
			
 
				+                df = chunk.drop_duplicates(subset=['code'])
			
 
				+            else:
			
 
				+                df = chunk
			
 
				+                
			
 
				             df.to_sql('products', con=engine, if_exists='append', index=False)
			
 
				             total_processed += len(df)
			
 
				-            print(f"   Successfully appended {total_processed} rows...")
			
 
				+            print(f"   Successfully appended {total_processed} rows (Dynamic schema)...", end="\r")
			
 
				         except BaseException as e:
			
 
				-            # If a strict primary key duplicate existed in DB already from a previous chunk, ignore row crashes
			
 
				             if "Duplicate entry" in str(e):
			
 
				                 pass
			
 
				             else:
			
 
				-                 print(f"   [Warning] Chunk skipped due to internal structural error: {e}")
			
 
				+                 print(f"\n   [Warning] Chunk skipped due to internal structural error: {e}")
			
 
				         
			
 
				-    print(f"✅ Finished importing {filename}.")
			
 
				+    print(f"\n✅ Finished importing {filename}.")
			
 
				     return True
			
 
				 
			
 
				+def create_indexes(engine):
			
 
				+    print("\n🛠️ Creating performance indexes on newly generated table...")
			
 
				+    # B-TREE and FULLTEXT INDEXES created post-ingestion for extreme speed
			
 
				+    try:
			
 
				+        with engine.begin() as connection:
			
 
				+            print("  Building Primary Key on `code`...")
			
 
				+            # We must make `code` the primary key if pandas just made it a TEXT field
			
 
				+            # But MySQL cannot have a TEXT field as PRIMARY KEY without a length constraint.
			
 
				+            # Convert code to VARCHAR(50) first.
			
 
				+            connection.execute(urllib.parse.unquote("ALTER TABLE products MODIFY code VARCHAR(50);"))
			
 
				+            connection.execute(urllib.parse.unquote("ALTER TABLE products ADD PRIMARY KEY (code);"))
			
 
				+
			
 
				+            print("  Building Fulltext Indexes...")
			
 
				+            connection.execute(urllib.parse.unquote("CREATE FULLTEXT INDEX ft_idx_search ON products(product_name, ingredients_text, brands);"))
			
 
				+            
			
 
				+            print("  Building B-TREE Indexes on core macros...")
			
 
				+            # We attempt to index key macros if they exist
			
 
				+            macro_cols = ['energy-kcal_100g', 'fat_100g', 'carbohydrates_100g', 'proteins_100g']
			
 
				+            for col in macro_cols:
			
 
				+                # Convert TEXT to DOUBLE for numerical indexing and querying
			
 
				+                # We catch errors if the column doesn't exist to be safe
			
 
				+                try:
			
 
				+                    connection.execute(urllib.parse.unquote(f"ALTER TABLE products MODIFY `{col}` DOUBLE;"))
			
 
				+                    connection.execute(urllib.parse.unquote(f"CREATE INDEX idx_{col.replace('-', '_')} ON products(`{col}`);"))
			
 
				+                except:
			
 
				+                    pass
			
 
				+        print("✅ Indexing Complete!")
			
 
				+    except Exception as e:
			
 
				+        print(f"❌ Indexing encountered an issue: {e}")
			
 
				+
			
 
				 if __name__ == "__main__":
			
 
				     print("Initiating OpenFoodFacts CSV Ingestion Process...")
			
 
				     engine = get_loader_engine()
			
@@ -71,3 +91,7 @@ if __name__ == "__main__":
 
				     if not processed_en and not processed_fr:
			
 
				         print("\n❌ Could not find either 'en.openfoodfacts.org.products.csv' or 'fr.openfoodfacts.org.products.csv'.")
			
 
				         print("Please download them directly into the root folder and run this script again.")
			
 
				+    else:
			
 
				+        # Build indexes now that all data is appended!
			
 
				+        create_indexes(engine)
			
 
				+        print("\n🎉 Full database reload and indexing complete! Ready for AI RAG.")
			
--- a/setup_db.py
+++ b/setup_db.py
@@ -70,22 +70,17 @@ def run_db_setup():
 
				         created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP
			
 
				     ) ENGINE=InnoDB;
			
 
				     """)
			
 
				-    # 2. Products Table
			
 
				-    cursor.execute("""
			
 
				-    CREATE TABLE IF NOT EXISTS food_db.products (
			
 
				-        code VARCHAR(50) PRIMARY KEY, url TEXT, creator VARCHAR(255), created_t VARCHAR(50), 
			
 
				-        created_datetime VARCHAR(50), last_modified_t VARCHAR(50), last_modified_datetime VARCHAR(50), 
			
 
				-        product_name TEXT, generic_name TEXT, quantity VARCHAR(255), packaging TEXT, brands TEXT, 
			
 
				-        categories TEXT, origins TEXT, labels TEXT, stores TEXT, countries TEXT, ingredients_text TEXT, 
			
 
				-        allergens TEXT, traces TEXT, 
			
 
				-        FULLTEXT INDEX ft_idx_search (product_name, ingredients_text)
			
 
				-    ) ENGINE=InnoDB;
			
 
				-    """)
			
 
				+    # 2. Products Table (Dynamic Drop)
			
 
				+    # We drop the strict schema completely. `ingest_csv.py` will use pandas to automatically 
			
 
				+    # generate the table with 100% of the CSV columns dynamically defined as TEXT fields.
			
 
				+    cursor.execute("DROP TABLE IF EXISTS food_db.products;")
			
 
				     
			
 
				     # Table Context Grants (SoD)
			
 
				     cursor.execute("GRANT SELECT, INSERT, UPDATE ON food_db.users TO 'db_app_auth'@'%';")
			
 
				-    cursor.execute("GRANT SELECT ON food_db.products TO 'db_reader'@'%';")
			
 
				-    cursor.execute("GRANT SELECT, INSERT, UPDATE, DELETE, DROP, CREATE ON food_db.products TO 'db_loader'@'%';")
			
 
				+    # Note: Reader/Loader grants on products table will be handled or applied at the database level
			
 
				+    # since the table won't exist until pandas creates it. Granting at db-level for these specific users.
			
 
				+    cursor.execute("GRANT SELECT ON food_db.* TO 'db_reader'@'%';")
			
 
				+    cursor.execute("GRANT SELECT, INSERT, UPDATE, DELETE, DROP, CREATE, ALTER, INDEX ON food_db.* TO 'db_loader'@'%';")
			
 
				     cursor.execute("FLUSH PRIVILEGES;")
			
 
				 
			
 
				     print("\n✅ Database, Users, and Tables created successfully!")
			
--- a/setup_mail_forwarding.sh
+++ b/setup_mail_forwarding.sh
@@ -0,0 +1,26 @@
 
				+#!/bin/bash
			
 
				+# run this as root/sudo on the Ubuntu VM
			
 
				+
			
 
				+echo "Setting up centralized mail forwarding to lanfr144@gmail.com..."
			
 
				+
			
 
				+# 1. Update the skeleton directory so all NEW users created automatically forward mail
			
 
				+echo "lanfr144@gmail.com" | sudo tee /etc/skel/.forward
			
 
				+sudo chmod 644 /etc/skel/.forward
			
 
				+
			
 
				+# 2. Add forwarding to all dynamically created home directories
			
 
				+for user_dir in /home/*; do
			
 
				+  if [ -d "$user_dir" ]; then
			
 
				+    user_name=$(basename "$user_dir")
			
 
				+    echo "lanfr144@gmail.com" | sudo tee "$user_dir/.forward"
			
 
				+    sudo chown "$user_name":"$user_name" "$user_dir/.forward"
			
 
				+    sudo chmod 644 "$user_dir/.forward"
			
 
				+    echo "Configured for user: $user_name"
			
 
				+  fi
			
 
				+done
			
 
				+
			
 
				+# 3. Add forwarding for root manually
			
 
				+echo "lanfr144@gmail.com" | sudo tee /root/.forward
			
 
				+sudo chmod 644 /root/.forward
			
 
				+echo "Configured for root."
			
 
				+
			
 
				+echo "✅ All system mail will now forward to lanfr144@gmail.com"
			
--- a/start_batch_ingest.sh
+++ b/start_batch_ingest.sh
@@ -0,0 +1,30 @@
 
				+#!/bin/bash
			
 
				+# Local Food AI - Disconnected Ingestion Wrapper
			
 
				+# This script uses nohup to run the python ingestion script in the background.
			
 
				+# You can exit your SSH session safely after starting this script.
			
 
				+
			
 
				+echo "========================================================="
			
 
				+echo "🍔 Local Food AI: Extreme Batch Ingestion"
			
 
				+echo "========================================================="
			
 
				+
			
 
				+if [ ! -f "en.openfoodfacts.org.products.csv" ] && [ ! -f "fr.openfoodfacts.org.products.csv" ]; then
			
 
				+    echo "❌ Error: CSV files not found in the current directory."
			
 
				+    echo "Please download the massive CSVs before running this batch."
			
 
				+    exit 1
			
 
				+fi
			
 
				+
			
 
				+echo "🚀 Starting database wipe and reset..."
			
 
				+# Automatically run the new DB setup to drop the rigid table
			
 
				+python3 setup_db.py
			
 
				+
			
 
				+echo "🚀 Triggering background ingestion process via nohup..."
			
 
				+echo "All outputs will be saved to ingestion_process.log"
			
 
				+
			
 
				+# Run securely in background
			
 
				+nohup python3 -u ingest_csv.py > ingestion_process.log 2>&1 &
			
 
				+BG_PID=$!
			
 
				+
			
 
				+echo "✅ Process started in the background (PID: $BG_PID)"
			
 
				+echo "You can now safely close your terminal or turn off your computer."
			
 
				+echo "To monitor progress from the server later, run:"
			
 
				+echo "   tail -f ingestion_process.log"