python读取word文档,插入mysql数据库的示例代码_Python

表格内容如下：

python读取word文档,插入mysql数据库的示例代码

1、实现批量导入word文档，取文档标题中的数字作为编号

2、除取上面打钩的内容需要匹配出来入库入库，其他内容全部直接入库mysql

python" id="highlighter_612591">

100

101

102

103

104

105

106

107

108

109

110

111

112

113

114

115

116

117

118

119

120

121

122

123

124

125

126

127

128

129

130

131

132

133

134

135

136

137

138

139

140

141

142

143

144

145

146

147

148

149

150

151

152

153

154

155

156

157

158

159

160

161

162

163

164

165

166

167

168

169

170

171

172

173

174

175

176

177

178

179

180

181

182

183

184

185

186

187

188

189

190

191

192

193

194

195

196

197

198

199

200

201

202

203

204

205

206

207

208

209

210

211

212

213

214

215

216

217

218

219

220

221

222

223

224

225

226

227

228

229

230

231

232

233

234

235

236

237

238

239

240

241

242

243

244

245

246

247

248

249

250

251

252

253

254

255

256

257

258

259

260

261

262

263

264

265

266

267

268

269

270

271

272

273

274

275

276

277

278

									# wuyanfeng

									# -*- coding:utf-8 -*-

									# 读取docx中的文本代码示例

									import docx

									import pymysql

									import re

									import os

									# 创建数据库链接

									conn = pymysql.connect(

									 host='rm-bp1vu5d84dg12c6d59o.mysql.rds.aliyuncs.com',

									 port=3306,

									 user='root',

									 passwd='wyf092415*',

									 db='pays',

									 charset='utf8',

									)

									# 创建游标

									cursor = conn.cursor()

									#切片函数

									def section(info,key,len11):

									 a = len(info)

									 print(a, type(a))

									 d = []

									 e = 0

									 g = -1

									 i = 0

									 task_class=[]

									 while i < len(info):

									  # for i in range(len(info)):

									  # i+=1

									  print("i::::", i)

									  try:

									   #c = info.index("a", e)

									   #print("c:::::", c)

									   c = info.index(key, e)

									   #print("c:::::", c)

									   print("c类型判断",type(c))

									  except valueerror:

									   print(valueerror)

									  try:

									   if (c != '') & (g < int(c)):

									    d.append(c)

									    g = c

									    i = c + 1

									    print("illlldddd:", i)

									    e = c + 1

									    continue

									   elif (c == ''):

									     break

									  except unboundlocalerror:

									   print(unboundlocalerror)

									   return task_class

									  break

									 print("d", d, type(d))

									 print(d[0], type(d[0]))

									 print("d的长度：",len(d))

									 #开始切片

									 if len(d) != 0:

									  for j in range(len(d)):

									   print("info11:::", info, type(info))

									   info = ''.join(info)

									   print("info222:::",info,type(info))

									   print("d[%d]"%j,d[j])

									   #print("d[j]:5"%j,info[d[j]:5])

									   llll = info[d[j]+1:d[j]+5]

									   print("d[%d]:5" % j, llll)

									   task_class.append(llll)

									   print("task_class::11", task_class)

									 task_class=",".join(task_class)

									 print("str1112222",task_class)

									 return task_class

									def insettable(file):

									 print("file：：：:::::::::::::::::", file)

									 print("type：：：:::::::::::::::::", type(file))

									 # file1 = file

									 # file1 = str(file1)

									 ddd = re.findall("知识库\\\(\d+)", file)

									 print("ddd：::::::::：：", ddd)

									 print("ddd[0]:::", ddd[0])

									 ddd = int(ddd[0])

									 print("ddd::::", type(ddd))

									 file = docx.document(file)

									 # 读取表格：

									 t = file.tables[0]

									 print(t)

									 print("1:", t.cell(0, 0).text) # 1

									 cell1 = t.cell(0, 0).text

									 print("tyep::::", type(t.cell(0, 0).text))

									 print("2:", t.cell(0, 1).text) # 2

									 cell2 = t.cell(0, 1).text

									 print("2:", t.cell(0, 2).text) # 2

									 cell3 = t.cell(0, 2).text

									 print("2:", t.cell(0, 3).text) # 2

									 cell4 = t.cell(0, 3).text

									 print("cell4:::::::::", cell4)

									 print("3:", t.cell(1, 0).text) # 3

									 cell5 = t.cell(1, 0).text

									 print("4:", t.cell(1, 2).text) # 4

									 cell6 = t.cell(1, 2).text

									 print("5:", t.cell(1, 3).text) # 5

									 task_type = t.cell(1, 3).text

									 # task_type = re.findall('.*[☑√](.*)$', cell7)

									 # task_type = ''.join(cell7)

									 print("task_type111111:", task_type)

									 # task_class = task_class[0:4]

									 '''低级处理方式

									 a = int(task_type.count("☑"))

									 print("a|||||||", a, type(a))

									 b = int(task_type.count("√"))

									 print("b|||||||", b, type(a))

									 if (a == 1) | (b == 1):

									  print("111111111111111111")

									  # task_type = re.findall('.*[☑√](.*)$', task_type)

									  task_type = re.findall('.*[☑√](.*)$', task_type)

									  print("task_type1", task_type)

									  task_type = ''.join(task_type)

									  print("task_type2", task_type)

									  task_type = task_type[0:4]

									  print("task_type3:d:%s，b=%d" % (a, b), task_type)

									 elif (a == 0) & (b == 0):

									  print("2222222222222222222")

									  task_type = '法定职责'

									  print("a:%s，b=%s" % (a, b), task_type)

									 elif (a == 2) | (b == 2):

									  print("333333333333333333333")

									  task_type = '法定职责,工作职责 '

									  print("a:%s，b=%s" % (a, b), task_type)

									 '''

									 #调用切片函数

									 task_type1 = section(task_type, "√", 4)

									 task_type2 = section(task_type, "☑", 4)

									 task_type1 = "".join(task_type1)

									 task_type2 = "".join(task_type2)

									 print("task_type1:::", task_type1,type(task_type1))

									 print("task_type2:::", task_type2,type(task_type2))

									 if task_type1.strip()!="":

									  task_type = task_type1

									  print("task_type111:::", task_type1)

									 elif task_type2.strip()!="":

									  task_type = task_type2

									  print("task_type222:::", task_type2)

									 print("6:", t.cell(1, 4).text) # 6

									 cell8 = t.cell(1, 4).text

									 print("7:", t.cell(2, 1).text) # 7

									 cell9 = t.cell(2, 1).text

									 # 获取文档对象

									 # file = docx.document("d:\\配置库\\公案app\\1.2 系统规格\\知识库\\14人员死亡先期处置.docx")

									 print("段落数:" + str(len(file.paragraphs))) # 段落数为13，每个回车隔离一段

									 lenn = len(file.paragraphs)

									 print("len:", lenn)

									 # 输出每一段的内容

									 for para in file.paragraphs:

									  print(para.text)

									 # 输出段落编号及段落内容

									 for i in range(len(file.paragraphs)):

									  print("第" + str(i) + "段的内容是：" + file.paragraphs[i].text)

									 list6 = []

									 for i in range(len(file.paragraphs)):

									  if 0 == i:

									   print("i:", i)

									   lis0 = file.paragraphs[i].text

									   print("list0:", lis0)

									   print(type(lis0))

									  elif 1 == i:

									   print("i:", i)

									   task_class = file.paragraphs[i].text

									   print("lis1", task_class,type(task_class))

									   '''低级处理方式

									   print("task_class111111:", task_class)

									   c = int(task_class.count("☑"))

									   task_class = ''.join(task_class)

									   #print(task_class.index('☑'))

									   print("c|||||||", c, type(c))

									   d = int(task_class.count("√"))

									   print(task_class.index('√'))

									   print("d|||||||", d, type(d))

									   task_class = re.findall(r'[☑√](?:.*)', task_class)

									   task_class = ''.join(task_class)

									   task_class = task_class[1:5]

									   print("task_class", task_class)

									  '''

									   #调用切片函数

									   task_class1 = section(task_class, "√", 4)

									   task_class2 = section(task_class, "☑", 4)

									   task_class1 = "".join(task_class1)

									   task_class2 = "".join(task_class2)

									   print("task_class1:::", task_class1,type(task_class1))

									   print("task_class2:::", task_class2,type(task_class2))

									   if task_class1.strip()!="":

									    task_class = task_class1

									    print("task_class11:::", task_class1)

									   elif task_class2.strip()!="":

									    task_class = task_class2

									    print("task_class22:::", task_class2)

									  if 2 == i:

									   print("i:", i)

									   lis2 = file.paragraphs[i].text

									   print("lis2", lis2)

									   print(type(lis2))

									   preparer = re.findall('填表单位：(.*?)$', lis2)

									   preparer = ''.join(preparer)

									   print("preparer:%s" % preparer)

									  # elif 3 == i:

									  #  print("i:", i)

									  #  lis3 = file.paragraphs[i].text

									  elif 3 == i:

									   print("i:", i)

									   lis4 = file.paragraphs[i].text

									   print("lis4", lis4)

									   print(type(lis4))

									  elif 3 < i < lenn - 1:

									   print("i:", i)

									   print(file.paragraphs[i].text)

									   print(type(file.paragraphs[i].text))

									   # list6[i-5] = list6.append(file.paragraphs[i].text)

									   list6.append(str(file.paragraphs[i].text).strip('\xa0'))

									   # list6.append("%s\n" % str(file.paragraphs[i].text).strip('\xa0'))

									   print(list6)

									 key_steps = "\n".join(list6)

									 # print("key_steps:\n",key_steps.strip('\n'))

									 cursor.execute(

									  "insert into `t_knowledge_base` (`no`, `preparer`, `task_class`, `task_name`, `task_specification`, `task_type`, `task_desc`, `task_basis`, `key_steps`) values ('%d','%s','%s','%s','%s','%s','%s',null,'%s')" % (

									   ddd, preparer, task_class, cell2, cell4, task_type, cell9, key_steps))

									 conn.commit()

									def traverse(f):

									 fs = os.listdir(f)

									 for f1 in fs:

									  tmp_path = os.path.join(f, f1)

									  if not os.path.isdir(tmp_path):

									   print('文件: %s' % tmp_path)

									   insettable(tmp_path)

									  else:

									   print('文件夹：%s' % tmp_path)

									   traverse(tmp_path)

									path = 'd:\\配置库\公案app\\1.2 系统规格\\知识库'

									traverse(path)

									#单文件调测

									# path = 'd:\\配置库\\公案app\\1.2 系统规格\\知识库\\14人员死亡先期处置.docx'

									# insettable(path)

									# 关闭游标

									cursor.close()

									# 关闭连接

									conn.close()