seed_food_db.py 3.6 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112
  1. import ssl
  2. import sqlite3
  3. import urllib.request
  4. import os
  5. import csv
  6. import sys
  7. # Ensure the app directory is on the path so we can import database.py
  8. sys.path.insert(0, os.path.dirname(__file__))
  9. from database import create_tables
  10. DB_PATH = os.path.join(os.path.dirname(__file__), "localfood.db")
  11. def get_db_connection():
  12. conn = sqlite3.connect(DB_PATH, timeout=20.0, check_same_thread=False)
  13. conn.execute('pragma journal_mode=wal')
  14. return conn
  15. def fetch_and_seed():
  16. # Always ensure the schema exists before inserting
  17. create_tables()
  18. print("Loading fundamental internal CSV dataset...")
  19. csv_path = os.path.join(os.path.dirname(__file__), "nutrition.csv")
  20. try:
  21. with open(csv_path, 'r', encoding='utf-8') as f:
  22. lines = f.readlines()
  23. except Exception as e:
  24. print(f"Failed to read local dataset: {e}")
  25. return
  26. print(f"Dataset downloaded. Mapping data into localfood.db...")
  27. conn = get_db_connection()
  28. cursor = conn.cursor()
  29. reader = csv.reader(lines)
  30. try:
  31. headers = next(reader)
  32. except StopIteration:
  33. print("Empty dataset.")
  34. return
  35. def get_val(row, target_names, default="0"):
  36. for t_name in target_names:
  37. for i, h in enumerate(headers):
  38. if t_name.lower() in h.lower():
  39. # Remove non-numeric characters like "g" or "mg" but keep decimals
  40. val = ''.join(c for c in row[i] if c.isdigit() or c == '.')
  41. return val if val else default
  42. return default
  43. count = 0
  44. for row in reader:
  45. if len(row) < 3: continue
  46. # In this dataset, index 0 is always the food name
  47. name = row[0]
  48. try:
  49. calories = float(get_val(row, ['calorie', 'energy']))
  50. protein = float(get_val(row, ['protein']))
  51. fat = float(get_val(row, ['total_fat', 'fat_g', 'fat']))
  52. carbs = float(get_val(row, ['carbohydrate', 'carb']))
  53. fiber = float(get_val(row, ['fiber']))
  54. sugar = float(get_val(row, ['sugar']))
  55. sodium_mg = float(get_val(row, ['sodium']))
  56. vitamin_a = float(get_val(row, ['vitamin_a', 'vita']))
  57. vitamin_c = float(get_val(row, ['vitamin_c', 'vitc']))
  58. calcium = float(get_val(row, ['calcium']))
  59. iron = float(get_val(row, ['iron']))
  60. potassium = float(get_val(row, ['potassium']))
  61. cholesterol = float(get_val(row, ['cholesterol']))
  62. except ValueError:
  63. continue
  64. cursor.execute("SELECT id FROM foods WHERE name = ?", (name[:100],))
  65. if cursor.fetchone():
  66. continue
  67. cursor.execute('''
  68. INSERT INTO foods (name, category, calories, protein_g, fat_g, carbs_g, fiber_g, sugar_g, sodium_mg, vitamin_a_iu, vitamin_c_mg, calcium_mg, iron_mg, potassium_mg, cholesterol_mg, source)
  69. VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)
  70. ''', (
  71. name[:100],
  72. "Sourced Ingredient",
  73. calories,
  74. protein,
  75. fat,
  76. carbs,
  77. fiber,
  78. sugar,
  79. sodium_mg,
  80. vitamin_a,
  81. vitamin_c,
  82. calcium,
  83. iron,
  84. potassium,
  85. cholesterol,
  86. 'Sourced_CSV'
  87. ))
  88. count += 1
  89. if count >= 8000:
  90. break
  91. conn.commit()
  92. conn.close()
  93. print(f"Successfully seeded {count} foundational macro tracking items into localfood.db!")
  94. if __name__ == '__main__':
  95. fetch_and_seed()