将单个PDF文件分页保存 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 import PyPDF2 pdf_file = open('./file.pdf', 'rb') pdf_reader = PyPDF2.PdfReader(pdf_file) for page_num in range(len(pdf_reader.pages)): page = pdf_reader.pages[page_num] pdf_writer = PyPDF2.PdfWriter() pdf_writer.add_page(page) output_file_name = f'page_{page_num+1}.pdf' output_file = open(output_file_name, 'wb') pdf_writer.write(output_file) output_file.close() pdf_file.close() 提取PDF中关键字所在的句子 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 import pdfplumber import re def extract_sentences_with_keyword(pdf_path, txt_path, keyword): with pdfplumber.open(pdf_path) as pdf: with open(txt_path, 'w', encoding='utf-8') as txt_file: for page in pdf.pages: text = page.extract_text() if text: text = text.replace('\n', ' ').replace('\r', '') # 去除换行符和回车符 sentences = re.split('[.!?。!?]', text) # 将文本按句子分割 for sentence in sentences: sentence = sentence.strip() # 去除句子两端的空格 if keyword in sentence: txt_file.write(sentence + '\n') # 将含有关键词的句子写入文本文件 pdf_file = 'file.pdf' txt_file = 'file.txt' keyword = '海洋热浪' extract_sentences_with_keyword(pdf_file, txt_file, keyword)