read_pdfs.py 648 B

1234567891011121314151617
  1. import os
  2. from pypdf import PdfReader
  3. def extract_pdf(pdf_path, txt_path):
  4. try:
  5. reader = PdfReader(pdf_path)
  6. text = ""
  7. for page in reader.pages:
  8. text += page.extract_text() + "\n"
  9. with open(txt_path, "w", encoding="utf-8") as f:
  10. f.write(text)
  11. print(f"Extracted {pdf_path} to {txt_path}")
  12. except Exception as e:
  13. print(f"Failed to read {pdf_path}: {e}")
  14. extract_pdf(r"c:\Users\lanfr144\Documents\DOPRO1\Antigravity\Food\Project.pdf", "project_text.txt")
  15. extract_pdf(r"c:\Users\lanfr144\Documents\DOPRO1\Antigravity\Food\Retro Planning.pdf", "retro_planning_text.txt")