import fitz doc = fitz.open(r'C:\git\spark-lesson\reference\sources\liu-discharge-transitions-thesis.pdf') print(f'Total pages: {len(doc)}') output = [] for i in range(len(doc)): text = doc[i].get_text() if text.strip(): output.append(f'=== PAGE {i+1} ===') output.append(text) full_text = '\n'.join(output) with open(r'C:\git\spark-lesson\reference\sources\liu-discharge-transitions-thesis.txt', 'w', encoding='utf-8') as f: f.write(full_text) print(f'Extracted {len(output)//2} pages') print(f'Total characters: {len(full_text)}') print(f'Total lines: {full_text.count(chr(10))}')