seed_food_db.py 3.5 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108
  1. import ssl
  2. import sqlite3
  3. import urllib.request
  4. import os
  5. import csv
  6. DB_PATH = os.path.join(os.path.dirname(__file__), "localfood.db")
  7. def get_db_connection():
  8. conn = sqlite3.connect(DB_PATH, timeout=20.0, check_same_thread=False)
  9. conn.execute('pragma journal_mode=wal')
  10. return conn
  11. def fetch_and_seed():
  12. ctx = ssl.create_default_context()
  13. ctx.check_hostname = False
  14. ctx.verify_mode = ssl.CERT_NONE
  15. print("Loading fundamental internal CSV dataset...")
  16. csv_path = os.path.join(os.path.dirname(__file__), "nutrition.csv")
  17. try:
  18. with open(csv_path, 'r', encoding='utf-8') as f:
  19. lines = f.readlines()
  20. except Exception as e:
  21. print(f"Failed to read local dataset: {e}")
  22. return
  23. print(f"Dataset downloaded. Mapping data into localfood.db...")
  24. conn = get_db_connection()
  25. cursor = conn.cursor()
  26. reader = csv.reader(lines)
  27. try:
  28. headers = next(reader)
  29. except StopIteration:
  30. print("Empty dataset.")
  31. return
  32. def get_val(row, target_names, default="0"):
  33. for t_name in target_names:
  34. for i, h in enumerate(headers):
  35. if t_name.lower() in h.lower():
  36. # Remove non-numeric characters like "g" or "mg" but keep decimals
  37. val = ''.join(c for c in row[i] if c.isdigit() or c == '.')
  38. return val if val else default
  39. return default
  40. count = 0
  41. for row in reader:
  42. if len(row) < 3: continue
  43. # In this dataset, index 0 is always the food name
  44. name = row[0]
  45. try:
  46. calories = float(get_val(row, ['calorie', 'energy']))
  47. protein = float(get_val(row, ['protein']))
  48. fat = float(get_val(row, ['total_fat', 'fat_g', 'fat']))
  49. carbs = float(get_val(row, ['carbohydrate', 'carb']))
  50. fiber = float(get_val(row, ['fiber']))
  51. sugar = float(get_val(row, ['sugar']))
  52. sodium_mg = float(get_val(row, ['sodium']))
  53. vitamin_a = float(get_val(row, ['vitamin_a', 'vita']))
  54. vitamin_c = float(get_val(row, ['vitamin_c', 'vitc']))
  55. calcium = float(get_val(row, ['calcium']))
  56. iron = float(get_val(row, ['iron']))
  57. potassium = float(get_val(row, ['potassium']))
  58. cholesterol = float(get_val(row, ['cholesterol']))
  59. except ValueError:
  60. continue
  61. cursor.execute("SELECT id FROM foods WHERE name = ?", (name[:100],))
  62. if cursor.fetchone():
  63. continue
  64. cursor.execute('''
  65. INSERT INTO foods (name, category, calories, protein_g, fat_g, carbs_g, fiber_g, sugar_g, sodium_mg, vitamin_a_iu, vitamin_c_mg, calcium_mg, iron_mg, potassium_mg, cholesterol_mg, source)
  66. VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)
  67. ''', (
  68. name[:100],
  69. "Sourced Ingredient",
  70. calories,
  71. protein,
  72. fat,
  73. carbs,
  74. fiber,
  75. sugar,
  76. sodium_mg,
  77. vitamin_a,
  78. vitamin_c,
  79. calcium,
  80. iron,
  81. potassium,
  82. cholesterol,
  83. 'Sourced_CSV'
  84. ))
  85. count += 1
  86. if count >= 8000:
  87. break
  88. conn.commit()
  89. conn.close()
  90. print(f"Successfully seeded {count} foundational macro tracking items into localfood.db!")
  91. if __name__ == '__main__':
  92. fetch_and_seed()