1. 前言
由于近期有任务需要,要写一个能够处理Excel的脚本,实现的功能是,在A表格上其中一列,对字符串进行分组和排序,然后根据排序好的A表格以固定格式自动填写到B表格上。
开始写脚本之前查了很多资料,最开始采用了openpyxl这个模块,用起来很顺手,使用这个对A表格其中一列进行了重新填写,但是后来发现,需要用到删除和插入空白行的操作,使用openpyxl比较困难,这个模块仅支持在表格的最后一行继续添加新行,不支持在中间插入和删除行。
在查找的过程中发现,网上流传了一些使用openpyxl进行插入删除行的操作,现整理一下。
2. 使用openpyxl
一种思路是将sheet数据转换成list,然后在list进行操作,这种方法可行,但是实际测试之后发现运行起来速度太慢了,数据1000多条,时间就已经等不起了。
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
|
# Creat insert row function group---------------------------------------------- def blankRowInsert(sheet, row_num, add_num): myList = Sheet2List(sheet) insertLine(myList, row_num, add_num, sheet.max_column) List2Sheet(sheet,myList) def Sheet2List(sheet): # 把一个表格中的数据全部导出到一个列表 listResult = [] for i in range ( 1 ,sheet.max_row + 1 ): lineData = [] for j in range ( 1 ,sheet.max_column + 1 ): cell = sheet.cell(row = i, column = j) lineData.append(cell.value) listResult.append(lineData) return listResult def insertLine(aList, row_num , add_num, maxColumn): # 对列表进行添加操作操作 for _ in range ( 1 ,add_num + 1 ): # ['']*N是创建一个个数为N的空格列表,插入列表aList aList.insert(row_num, [''] * maxColumn) def List2Sheet(sheet, list ): # 把数据写回sheet for i in range ( 1 , len ( list ) + 1 ): for j in range ( 1 , len ( list [ 0 ]) + 1 ): cell = sheet.cell(row = i, column = j) cell.value = list [i - 1 ][j - 1 ] # End of insert row function group--------------------------------------------- |
另外一种思路是直接自己给openpyxl这个轮子补胎,添加一个新的方法,笔者没有试验,下面的代码是StackOverflow相关问题上面贴的,如果各位有兴趣可以自己尝试。
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
|
def insert_rows( self , row_idx, cnt, above = False , copy_style = True , fill_formulae = True ): """Inserts new (empty) rows into worksheet at specified row index. :param row_idx: Row index specifying where to insert new rows. :param cnt: Number of rows to insert. :param above: Set True to insert rows above specified row index. :param copy_style: Set True if new rows should copy style of immediately above row. :param fill_formulae: Set True if new rows should take on formula from immediately above row, filled with references new to rows. Usage: * insert_rows(2, 10, above=True, copy_style=False) """ CELL_RE = re. compile ( "(?P<col>\$?[A-Z]+)(?P<row>\$?\d+)" ) row_idx = row_idx - 1 if above else row_idx def replace(m): row = m.group( 'row' ) prefix = "$" if row.find( "$" ) ! = - 1 else "" row = int (row.replace( "$" ,"")) row + = cnt if row > row_idx else 0 return m.group( 'col' ) + prefix + str (row) # First, we shift all cells down cnt rows... old_cells = set () old_fas = set () new_cells = dict () new_fas = dict () for c in self ._cells.values(): old_coor = c.coordinate # Shift all references to anything below row_idx if c.data_type = = Cell.TYPE_FORMULA: c.value = CELL_RE.sub( replace, c.value ) # Here, we need to properly update the formula references to reflect new row indices if old_coor in self .formula_attributes and 'ref' in self .formula_attributes[old_coor]: self .formula_attributes[old_coor][ 'ref' ] = CELL_RE.sub( replace, self .formula_attributes[old_coor][ 'ref' ] ) # Do the magic to set up our actual shift if c.row > row_idx: old_coor = c.coordinate old_cells.add((c.row,c.col_idx)) c.row + = cnt new_cells[(c.row,c.col_idx)] = c if old_coor in self .formula_attributes: old_fas.add(old_coor) fa = self .formula_attributes[old_coor].copy() new_fas[c.coordinate] = fa for coor in old_cells: del self ._cells[coor] self ._cells.update(new_cells) for fa in old_fas: del self .formula_attributes[fa] self .formula_attributes.update(new_fas) # Next, we need to shift all the Row Dimensions below our new rows down by cnt... for row in range ( len ( self .row_dimensions) - 1 + cnt,row_idx + cnt, - 1 ): new_rd = copy.copy( self .row_dimensions[row - cnt]) new_rd.index = row self .row_dimensions[row] = new_rd del self .row_dimensions[row - cnt] # Now, create our new rows, with all the pretty cells row_idx + = 1 for row in range (row_idx,row_idx + cnt): # Create a Row Dimension for our new row new_rd = copy.copy( self .row_dimensions[row - 1 ]) new_rd.index = row self .row_dimensions[row] = new_rd for col in range ( 1 , self .max_column): col = get_column_letter(col) cell = self .cell( '%s%d' % (col,row)) cell.value = None source = self .cell( '%s%d' % (col,row - 1 )) if copy_style: cell.number_format = source.number_format cell.font = source.font.copy() cell.alignment = source.alignment.copy() cell.border = source.border.copy() cell.fill = source.fill.copy() if fill_formulae and source.data_type = = Cell.TYPE_FORMULA: s_coor = source.coordinate if s_coor in self .formula_attributes and 'ref' not in self .formula_attributes[s_coor]: fa = self .formula_attributes[s_coor].copy() self .formula_attributes[cell.coordinate] = fa # print("Copying formula from cell %s%d to %s%d"%(col,row-1,col,row)) cell.value = re.sub( "(\$?[A-Z]{1,3}\$?)%d" % (row - 1 ), lambda m: m.group( 1 ) + str (row), source.value ) cell.data_type = Cell.TYPE_FORMULA # Check for Merged Cell Ranges that need to be expanded to contain new cells for cr_idx, cr in enumerate ( self .merged_cell_ranges): self .merged_cell_ranges[cr_idx] = CELL_RE.sub( replace, cr ) # Use way: # Worksheet.insert_rows = insert_rows |
3. 使用xlwings
进行一些列尝试和折腾之后,笔者放弃了使用openpyxl操作Excel插入和删除行了,到网上寻觅,发现了xlwings这个轮子,说明里写有api能够调用VBA的函数,这就很炫酷了,然后翻了翻文档,决定使用这个轮子操作,现贴出来笔者写的几段代码作为使用方法示范。
3.1. 删除行: range.api.EntireRow.Delete()
1
2
3
4
5
6
7
8
9
10
11
|
# Delete origin row temp_del = 0 if len (delete_list) > 0 : for delete_row in delete_list: # Report schedule print ( "Have alerady done: " + \ str ((temp_del * 100 ) / / delete_num) + "%" ) # Delete one row wb_sheet. range ( 'A' + str (delete_row - temp_del)).api.EntireRow.Delete() temp_del = temp_del + 1 wb.save() |
上面这段代码使用了一些小技巧,delete_list储存的是原表格中,需要删除的行号,在删除过程中由于总行数也在跟着减少,所以需要把绝对行号转成相对行号进行标记删除,这个转换就是temp_del变量的使用目的。
3.2. 插入行: sheet.api.Rows(row_number).Insert()
1
2
3
|
if key_word = = sheet. range ( 'A' + str (i_row + 1 )).value: # Insert new line sheet.api.Rows(i_row + 2 ).Insert() |
需要注意的是,这个VBA函数是向上插入空行,并且xlwings这个轮子只能在windows和macos的系统下使用,暂时不支持Linux。不过xlwings运行速度要远超过openpyxl,而且还能直接调用VBA的函数,对于WPS和Excel都能兼容,综合来看,还是选择xlwings比较好一些。
以上这篇Python操作Excel插入删除行的方法就是小编分享给大家的全部内容了,希望能给大家一个参考,也希望大家多多支持服务器之家。
原文链接:https://blog.csdn.net/j353838430/article/details/80329423