1
0

convert_datatypes.py 2.9 KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576
  1. import pymysql
  2. import pandas as pd
  3. import getpass
  4. def detect_and_convert_types():
  5. print("Welcome to the Data Types Optimizer.")
  6. print("WARNING: This modifies your database schemas. You must authenticate as the database `db_owner`.\n")
  7. owner_pass = getpass.getpass("Enter the MySQL 'db_owner' password: ")
  8. try:
  9. conn = pymysql.connect(
  10. host='127.0.0.1',
  11. user='db_owner',
  12. password=owner_pass,
  13. database='food_db'
  14. )
  15. cursor = conn.cursor()
  16. except Exception as e:
  17. print(f"❌ Connection failed: {e}")
  18. return
  19. # Assuming we check common known numerical columns to shrink the DB footprint
  20. columns_to_inspect = ["quantity", "created_t", "last_modified_t"]
  21. for col in columns_to_inspect:
  22. print(f"\nAnalyzing column: `{col}`")
  23. try:
  24. # Check if column exists by picking 5000 non nulls
  25. query = f"SELECT `{col}` FROM products WHERE `{col}` IS NOT NULL AND `{col}` != '' LIMIT 5000"
  26. df = pd.read_sql(query, conn)
  27. except Exception as e:
  28. print(f" ⚠️ Could not read column `{col}`: {e}")
  29. continue
  30. if df.empty:
  31. print(f" ⏭️ Column `{col}` is entirely null/empty. Keeping as TEXT.")
  32. continue
  33. series = df[col].astype(str).str.strip()
  34. # INTEGER CHECK
  35. if series.str.match(r'^-?\d+$').all():
  36. print(f" ⚙️ Status: ALL INTS matched. Converting `{col}` to BIGINT.")
  37. try:
  38. cursor.execute(f"UPDATE products SET `{col}` = NULL WHERE `{col}` = '';")
  39. cursor.execute(f"ALTER TABLE products MODIFY COLUMN `{col}` BIGINT;")
  40. conn.commit()
  41. print(" ✅ Success")
  42. except Exception as e:
  43. print(f" ❌ Failed to alter table: {e}")
  44. continue
  45. # FLOAT CHECK
  46. test_float = series.str.replace(',', '.')
  47. if test_float.str.match(r'^-?\d*\.\d+$').any() and test_float.str.match(r'^-?\d*\.?\d+$').all():
  48. print(f" ⚙️ Status: FLOATS detected. Standardizing and converting `{col}` to DOUBLE...")
  49. try:
  50. cursor.execute(f"UPDATE products SET `{col}` = NULL WHERE `{col}` = '';")
  51. cursor.execute(f"UPDATE products SET `{col}` = REPLACE(`{col}`, ',', '.') WHERE `{col}` LIKE '%,%';")
  52. cursor.execute(f"ALTER TABLE products MODIFY COLUMN `{col}` DOUBLE;")
  53. conn.commit()
  54. print(" ✅ Success")
  55. except Exception as e:
  56. print(f" ❌ Failed to alter table: {e}")
  57. continue
  58. print(f" ⏭️ Keeping `{col}` as TEXT.")
  59. cursor.close()
  60. conn.close()
  61. print("\n🎉 Datatype conversion complete!")
  62. if __name__ == '__main__':
  63. detect_and_convert_types()