本文实例讲述了Python实现批量读取word中表格信息的方法。分享给大家供大家参考。具体如下:
单位收集了很多word格式的调查表,领导需要收集表单里的信息,我就把所有调查表放一个文件里,写了个python小程序把所需的信息打印出来
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
|
#coding:utf-8 import os import win32com from win32com.client import Dispatch, constants from docx import Document def parse_doc(f): """读取doc,返回姓名和行业 """ doc = w.Documents. Open ( FileName = f ) t = doc.Tables[ 0 ] # 根据文件中的图表选择信息 name = t.Rows[ 0 ].Cells[ 1 ]. Range .Text situation = t.Rows[ 0 ].Cells[ 5 ]. Range .Text people = t.Rows[ 1 ].Cells[ 1 ]. Range .Text title = t.Rows[ 1 ].Cells[ 3 ]. Range .Text print name, situation, people,title doc.Close() def parse_docx(f): """读取docx,返回姓名和行业 """ d = Document(f) t = d.tables[ 0 ] name = t.cell( 0 , 1 ).text situation = t.cell( 0 , 8 ).text people = t.cell( 1 , 2 ).text title = t.cell( 1 , 8 ).text print name, situation, people,title if __name__ = = "__main__" : w = win32com.client.Dispatch( 'Word.Application' ) # 遍历文件 PATH = "H:\work\\aaa" # windows文件路径 doc_files = os.listdir(PATH) for doc in doc_files: if os.path.splitext(doc)[ 1 ] = = '.docx' : try : parse_docx(PATH + '\\' + doc) except Exception as e: print e elif os.path.splitext(doc)[ 1 ] = = '.doc' : try : parse_doc(PATH + '\\' + doc) except Exception as e: print e |
希望本文所述对大家的Python程序设计有所帮助。