python实现excel公式格式化
小小明:
之前跟一些小伙伴有个讨论:
可以看到很多跟数据打交道的人,都面对过很复杂的excel公式嵌套层数特别多,肉眼观看很容易蒙圈。在这样的需求背景,小小明就为了解决大家的痛点,就做了一个牛逼的excel公式格式化的工具。
效果体验
先看看效果吧:
=IF(C11>100%*C4,IF(C11<=200%*C4,C11*50%-C4*15%,C11*60%-C4*35%),IF(C11<=C4*50%,C11*30%,C11*40%-C4*5%))
- 1
的格式化结果是:
=IF(
C11>100%*C4,
IF(
C11<=200%*C4,
C11*50%-C4*15%,
C11*60%-C4*35%
),
IF(
C11<=C4*50%,
C11*30%,
C11*40%-C4*5%
)
)
- 1
- 2
- 3
- 4
- 5
- 6
- 7
- 8
- 9
- 10
- 11
- 12
- 13
(SMA(MAX(CLOSE-DELAY(CLOSE,1),0),12,1)/SMA(ABS(CLOSE-DELAY(CLOSE,1)),12,1)*100-MIN(SMA(MAX(CLOSE-DELAY(
CLOSE,1),0),12,1)/SMA(ABS(CLOSE-DELAY(CLOSE,1)),12,1)*100,12))/(MAX(SMA(MAX(CLOSE-DELAY(CLOSE,1),0),12,
1)/SMA(ABS(CLOSE-DELAY(CLOSE,1)),12,1)*100,12)-MIN(SMA(MAX(CLOSE-DELAY(CLOSE,1),0),12,1)/SMA(ABS(
CLOSE-DELAY(CLOSE,1)),12,1)*100,12))
- 1
- 2
- 3
- 4
的格式化结果为:
(
SMA(MAX(CLOSE-DELAY(CLOSE,1),0),12,1)
/
SMA(ABS(CLOSE-DELAY(CLOSE,1)),12,1)
*
100-MIN(
SMA(MAX(CLOSE-DELAY(CLOSE,1),0),12,1)
/
SMA(ABS(CLOSE-DELAY(CLOSE,1)),12,1)*100,
12
)
)
/
(
MAX(
SMA(MAX(CLOSE-DELAY(CLOSE,1),0),12,1)
/
SMA(ABS(CLOSE-DELAY(CLOSE,1)),12,1)*100,
12
)
-
MIN(
SMA(MAX(CLOSE-DELAY(CLOSE,1),0),12,1)
/
SMA(ABS(CLOSE-DELAY(CLOSE,1)),12,1)*100,
12
)
)
- 1
- 2
- 3
- 4
- 5
- 6
- 7
- 8
- 9
- 10
- 11
- 12
- 13
- 14
- 15
- 16
- 17
- 18
- 19
- 20
- 21
- 22
- 23
- 24
- 25
- 26
- 27
- 28
=IF(ROW()>COLUMN(),"",IF(ROW()=COLUMN(),$B15,ROUNDDOWN($B15*INDIRECT(SUBSTITUTE(ADDRESS(1,3+COLUMN()-ROW(),
4),1,"")&56),0)))
- 1
- 2
的格式化结果为:
=IF(
ROW()>COLUMN(),
"",
IF(
ROW()=COLUMN(),
$B15,
ROUNDDOWN(
$B15*INDIRECT(
SUBSTITUTE(ADDRESS(1,3+COLUMN()-ROW(), 4),1,"")
&
56
),
0
)
)
)
- 1
- 2
- 3
- 4
- 5
- 6
- 7
- 8
- 9
- 10
- 11
- 12
- 13
- 14
- 15
- 16
(文末有体验网址)
接下来,将公布这套格式化程序的完整代码和开发思想,有技术能力的小伙伴可以考虑改进该代码。
完整代码
__author__ = 'xiaoxiaoming'
from collections import deque
import re
class Node:
def __init__(self, parent=None, tab_size=0):
self.parent = parent
self.tab_size = tab_size
self.data = []
def is_single_node(self):
for e in self.data:
if not isinstance(e, str):
return False
return True
def get_single_text(self):
return "".join(self.data)
def split_text_blocks(excel_func_text):
"""
将excel公式字符串,按照一定的规则切割成数组
:param excel_func_text: 被切割的excel公式字符串
:return: 切割后的结果
"""
excel_func_text = excel_func_text.replace('\n', '').replace('\r', '')
excel_func_text = re.sub(" +", " ", excel_func_text)
lines = []
i, j = 0, 0
while j < len(excel_func_text):
c = excel_func_text[j]
if (c == '(' and excel_func_text[j + 1] != ')') or c == ',':
lines.append(excel_func_text[i:j + 1])
i = j = j + 1
elif c == ')' and excel_func_text[j - 1] != '(':
if i < j:
lines.append(excel_func_text[i:j])
i = j # 起始文件块置于)处
# 以下代码查找,如果中间不包含(或),则将)和,之间的文本块加入到划分结果
k = excel_func_text.find(",", j + 1)
l = excel_func_text.find("(", j + 1, k)
m = excel_func_text.find(")", j + 1, k)
if k != -1 and l == -1 and m == -1:
lines.append(excel_func_text[i:k + 1])
i = j = k + 1
elif j + 1 < len(excel_func_text) and excel_func_text[j + 1] != ')':
lines.append(")")
lines.append(excel_func_text[j + 1])
i = j = j + 2
else:
lines.append(")")
i = j = j + 1
elif c == '"':
j = excel_func_text.find('"', j + 1) + 1
else:
j += 1
return lines
blank_char_count = 2
def combine_node(root, text_max_length=60, max_combine_layer=3):
"""
合并最内层的只有纯文本子节点的节点为单个文本节点
:param root: 被合并的节点
:param text_max_length: 合并后的文本长度不超过该参数,则应用该合并替换原节点
:param max_combine_layer: 最大合并层数
:return:
"""
for _ in range(max_combine_layer):
no_change = True
stack = deque([root])
while stack:
node = stack.pop()
tmp = {}
for i, e in enumerate(node.data):
if isinstance(e, Node):
if e.is_single_node():
single_text = e.get_single_text()
if len(single_text) < text_max_length:
tmp[i] = single_text
else:
stack.append(e)
for i, e in tmp.items():
node.data[i] = e
if len(tmp) != 0:
no_change = False
if no_change:
break
def node_next_line(node):
for i, e in enumerate(node.data):
if isinstance(e, str):
if i == 0 or i == len(node.data) - 1:
tab = node.tab_size - 1
else:
tab = node.tab_size
yield f"{' ' * blank_char_count * tab}{e}"
else:
yield from node_next_line(e)
def excel_func_format(excel_func_text, blank_count=2, combine_single_node=True, text_max_length=60,
max_combine_layer=3):
"""
将excel公式格式化成比较容易阅读的格式
:param excel_func_text: 被格式化的excel公式字符串
:param blank_count: 最终显示的格式化字符串的1个tab用几个空格表示
:param combine_single_node: 是否合并纯文本节点,该参数设置为True后面的参数才生效
:param text_max_length: 合并后的文本长度不超过该参数,则应用该合并替换原节点
:param max_combine_layer: 最大合并层数
:return: 格式化后的字符串
"""
global blank_char_count
blank_char_count = blank_count
blocks = split_text_blocks(excel_func_text)
# print("\n".join(blocks))
# print('-----------拆分结果-----------')
tab_size = 0
node = root = Node()
for block in blocks:
if block.endswith("("):
tab_size += 1
child_node = Node(node, tab_size)
node.data.append(child_node)
node = child_node
node.data.append(block)
elif block.startswith(")"):
tab_size -= 1
node.data.append(block)
node = node.parent
else:
node.data.append(block)
if combine_single_node:
combine_node(root, text_max_length, max_combine_layer)
result = [line for line in node_next_line(root)]
return "\n".join(result)
- 1
- 2
- 3
- 4
- 5
- 6
- 7
- 8
- 9
- 10
- 11
- 12
- 13
- 14
- 15
- 16
- 17
- 18
- 19
- 20
- 21
- 22
- 23
- 24
- 25
- 26
- 27
- 28
- 29
- 30
- 31
- 32
- 33
- 34
- 35
- 36
- 37
- 38
- 39
- 40
- 41
- 42
- 43
- 44
- 45
- 46
- 47
- 48
- 49
- 50
- 51
- 52
- 53
- 54
- 55
- 56
- 57
- 58
- 59
- 60
- 61
- 62
- 63
- 64
- 65
- 66
- 67
- 68
- 69
- 70
- 71
- 72
- 73
- 74
- 75
- 76
- 77
- 78
- 79
- 80
- 81
- 82
- 83
- 84
- 85
- 86
- 87
- 88
- 89
- 90
- 91
- 92
- 93
- 94
- 95
- 96
- 97
- 98
- 99
- 100
- 101
- 102
- 103
- 104
- 105
- 106
- 107
- 108
- 109
- 110
- 111
- 112
- 113
- 114
- 115
- 116
- 117
- 118
- 119
- 120
- 121
- 122
- 123
- 124
- 125
- 126
- 127
- 128
- 129
- 130
- 131
- 132
- 133
- 134
- 135
- 136
- 137
- 138
- 139
- 140
- 141
- 142
处理流程浅析
下面都以如下公式作为示例:
=IF(ROW()>COLUMN(),"",IF(ROW()=COLUMN(),$B15,ROUNDDOWN($B15*INDIRECT(SUBSTITUTE(ADDRESS(1,3+COLUMN()-ROW(),
4),1,"")&56),0)))
- 1
- 2
文本分块切分
def split_text_blocks(excel_func_text):
"""
将excel公式字符串,按照一定的规则切割成数组
:param excel_func_text: 被切割的excel公式字符串
:return: 切割后的结果
"""
excel_func_text = excel_func_text.replace('\n', '').replace('\r', '')
excel_func_text = re.sub(" +", " ", excel_func_text)
lines = []
i, j = 0, 0
while j < len(excel_func_text):
c = excel_func_text[j]
if (c == '(' and excel_func_text[j + 1] != ')') or c == ',':
lines.append(excel_func_text[i:j + 1])
i = j = j + 1
elif c == ')' and excel_func_text[j - 1] != '(':
if i < j:
lines.append(excel_func_text[i:j])
i = j # 起始文件块置于)处
# 以下代码查找,如果中间不包含(或),则将)和,之间的文本块加入到划分结果
k = excel_func_text.find(",", j + 1)
l = excel_func_text.find("(", j + 1, k)
m = excel_func_text.find(")", j + 1, k)
if k != -1 and l == -1 and m == -1:
lines.append(excel_func_text[i:k + 1])
i = j = k + 1
elif j + 1 < len(excel_func_text) and excel_func_text[j + 1] != ')':
lines.append(")")
lines.append(excel_func_text[j + 1])
i = j = j + 2
else:
lines.append(")")
i = j = j + 1
elif c == '"':
j = excel_func_text.find('"', j + 1) + 1
else:
j += 1
return lines
s = """=IF(ROW()>COLUMN(),"",IF(ROW()=COLUMN(),$B15,ROUNDDOWN($B15*INDIRECT(SUBSTITUTE(ADDRESS(1,3+COLUMN()-ROW(),
4),1,"")&56),0))) """
blocks = split_text_blocks(s)
for block in blocks:
print(block)
- 1
- 2
- 3
- 4
- 5
- 6
- 7
- 8
- 9
- 10
- 11
- 12
- 13
- 14
- 15
- 16
- 17
- 18
- 19
- 20
- 21
- 22
- 23
- 24
- 25
- 26
- 27
- 28
- 29
- 30
- 31
- 32
- 33
- 34
- 35
- 36
- 37
- 38
- 39
- 40
- 41
- 42
- 43
- 44
- 45
的运行结果为:
=IF(
ROW()>COLUMN(),
"",
IF(
ROW()=COLUMN(),
$B15,
ROUNDDOWN(
$B15*INDIRECT(
SUBSTITUTE(
ADDRESS(
1,
3+COLUMN()-ROW(),
4
),
1,
""
)
&
56
),
0
)
)
)
- 1
- 2
- 3
- 4
- 5
- 6
- 7
- 8
- 9
- 10
- 11
- 12
- 13
- 14
- 15
- 16
- 17
- 18
- 19
- 20
- 21
- 22
- 23
- 24
这端代码首先替换掉所有的换行符,将多个空格替换为单个空格,然后将左右括号和逗号作为切分点进行切分。
但存在一些特殊情况,例如ROW()和COLUMN()括号内部没有任何内容,所有这种括号应该作为普通字符处理,另外被""包含的字符串可能包含括号,也应该作为普通字符。
构建多叉树层次结构
设计数据结构:
class Node:
def __init__(self, parent=None, tab_size=0):
self.parent = parent
self.tab_size = tab_size
self.data = []
- 1
- 2
- 3
- 4
- 5
parent存储父节点的指针,tab_size存储当前节点的层级,data存储当前节点的所有数据。
构建代码:
tab_size = 0
node = root = Node()
for block in blocks:
if block.endswith("("):
tab_size += 1
child_node = Node(node, tab_size)
node.data.append(child_node)
node = child_node
node.data.append(block)
elif block.startswith(")"):
tab_size -= 1
node.data.append(block)
node = node.parent
else:
node.data.append(block)
- 1
- 2
- 3
- 4
- 5
- 6
- 7
- 8
- 9
- 10
- 11
- 12
- 13
- 14
- 15
构建完毕后,这段数据在内存中的结构(仅展示data)如下:
遍历打印这颗多叉树
def node_next_line(node):
for i, e in enumerate(node.data):
if isinstance(e, str):
if i == 0 or i == len(node.data) - 1:
tab = node.tab_size - 1
else:
tab = node.tab_size
yield f"{' ' * 2 * tab}{e}"
else:
yield from node_next_line(e)
result = [line for line in node_next_line(root)]
print("\n".join(result))
- 1
- 2
- 3
- 4
- 5
- 6
- 7
- 8
- 9
- 10
- 11
- 12
- 13
结果:
=IF(
ROW()>COLUMN(),
"",
IF(
ROW()=COLUMN(),
$B15,
ROUNDDOWN(
$B15*INDIRECT(
SUBSTITUTE(
ADDRESS(
1,
3+COLUMN()-ROW(),
4
),
1,
""
)
&
56
),
0
)
)
)
- 1
- 2
- 3
- 4
- 5
- 6
- 7
- 8
- 9
- 10
- 11
- 12
- 13
- 14
- 15
- 16
- 17
- 18
- 19
- 20
- 21
- 22
- 23
- 24
合并最内层的节点
显然将最内层的node5节点合并一下阅读性更好:
首先给数据结构增加判断是否为纯文本节点的方法:
class Node:
def __init__(self, parent=None, tab_size=0):
self.parent = parent
self.tab_size = tab_size
self.data = []
def is_single_node(self):
for e in self.data:
if not isinstance(e, str):
return False
return True
def get_single_text(self):
return "".join(self.data)
- 1
- 2
- 3
- 4
- 5
- 6
- 7
- 8
- 9
- 10
- 11
- 12
- 13
- 14
下面是合并纯文本节点的实现,max_combine_layer决定了合并的最大次数,如果合并后长度超过text_max_length参数,则不应用这次合并:
from collections import deque
def combine_node(root, text_max_length=60, max_combine_layer=3):
"""
合并最内层的只有纯文本子节点的节点为单个文本节点
:param root: 被合并的节点
:param text_max_length: 合并后的文本长度不超过该参数,则应用该合并替换原节点
:param max_combine_layer: 最大合并层数
:return:
"""
for _ in range(max_combine_layer):
no_change = True
stack = deque([root])
while stack:
node = stack.pop()
tmp = {}
for i, e in enumerate(node.data):
if isinstance(e, Node):
if e.is_single_node():
single_text = e.get_single_text()
if len(single_text) < text_max_length:
tmp[i] = single_text
else:
stack.append(e)
for i, e in tmp.items():
node.data[i] = e
if len(tmp) != 0:
no_change = False
if no_change:
break
- 1
- 2
- 3
- 4
- 5
- 6
- 7
- 8
- 9
- 10
- 11
- 12
- 13
- 14
- 15
- 16
- 17
- 18
- 19
- 20
- 21
- 22
- 23
- 24
- 25
- 26
- 27
- 28
- 29
- 30
合并一次:
combine_node(root, 100, 1)
result = [line for line in node_next_line(root)]
print("\n".join(result))
- 1
- 2
- 3
结果:
=IF(
ROW()>COLUMN(),
"",
IF(
ROW()=COLUMN(),
$B15,
ROUNDDOWN(
$B15*INDIRECT(
SUBSTITUTE(
ADDRESS(1,3+COLUMN()-ROW(), 4),
1,
""
)
&
56
),
0
)
)
)
- 1
- 2
- 3
- 4
- 5
- 6
- 7
- 8
- 9
- 10
- 11
- 12
- 13
- 14
- 15
- 16
- 17
- 18
- 19
- 20
合并二次:
combine_node(root, 100, 2)
result = [line for line in node_next_line(root)]
print("\n".join(result))
- 1
- 2
- 3
结果:
=IF(
ROW()>COLUMN(),
"",
IF(
ROW()=COLUMN(),
$B15,
ROUNDDOWN(
$B15*INDIRECT(
SUBSTITUTE(ADDRESS(1,3+COLUMN()-ROW(), 4),1,"")
&
56
),
0
)
)
)
- 1
- 2
- 3
- 4
- 5
- 6
- 7
- 8
- 9
- 10
- 11
- 12
- 13
- 14
- 15
- 16
合并三次:
combine_node(root, 100, 3)
result = [line for line in node_next_line(root)]
print("\n".join(result))
- 1
- 2
- 3
结果:
=IF(
ROW()>COLUMN(),
"",
IF(
ROW()=COLUMN(),
$B15,
ROUNDDOWN(
$B15*INDIRECT(SUBSTITUTE(ADDRESS(1,3+COLUMN()-ROW(), 4),1,"")&56),
0
)
)
)
- 1
- 2
- 3
- 4
- 5
- 6
- 7
- 8
- 9
- 10
- 11
- 12
合并三次后的内存情况:
体验网址
http://xiaoxiaoming.xyz:8088/excel
不保证永久有效。
GUI图形化工具开发
下面使用PySimpleGUI开发一个图形化工具,完整代码:
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
# 创建时间:2020/12/25 12:03
__author__ = 'xiaoxiaoming'
import PySimpleGUI as sg
import pyperclip
from excel_func_format import excel_func_format
sg.change_look_and_feel("GreenMono")
layout = [
[
sg.Text("代格式化的excel公式:")
], [
sg.Multiline(size=(80, 4), enter_submits=True, key='text'),
], [
sg.Checkbox('是否合并内层函数', default=True, key="is_combine", enable_events=True),
sg.Text("合并后的文件长度不超过:"), sg.In(size=(4, 1), key="length", default_text="60"),
sg.Text("最大合并层数:"), sg.In(size=(4, 1), key="layer", default_text="3"),
], [
sg.Text("每个tab用多少个空格表示:"),
sg.In(size=(4, 1), key="tab_size", default_text="2"),
], [
sg.Button('格式化', enable_events=True, key="start"),
sg.Button('清空已输入的公式', enable_events=True, key="clear"),
sg.Button('格式化剪切板的内容', enable_events=True, key="paste"),
sg.Button('复制结果到剪切板', enable_events=True, key="copy"),
], [
sg.Text("格式化的结果:")
], [
sg.Output(size=(80, 28), key="out")
]
]
window = sg.Window('Excel公式格式化/美化器', layout)
while True:
event, values = window.read()
if event in (None,):
break # 相当于关闭界面
elif event == "start":
window['out'].Update("")
result = excel_func_format(values['text'], int(values["tab_size"]), values["is_combine"],
int(values["length"]),
int(values["layer"]))
print(result, end="")
elif event == "clear":
window['text'].Update("")
elif event == "paste":
text = pyperclip.paste()
window['text'].Update(text)
window['out'].Update("")
result = excel_func_format(text, int(values["tab_size"]), values["is_combine"],
int(values["length"]),
int(values["layer"]))
print(result, end="")
elif event == "copy":
pyperclip.copy(window['out'].Get().strip())
elif event == "is_combine":
if values["is_combine"]:
window["length"].Update(disabled=False)
window["layer"].Update(disabled=False)
else:
window["length"].Update(disabled=True)
window["layer"].Update(disabled=True)
window.close()
- 1
- 2
- 3
- 4
- 5
- 6
- 7
- 8
- 9
- 10
- 11
- 12
- 13
- 14
- 15
- 16
- 17
- 18
- 19
- 20
- 21
- 22
- 23
- 24
- 25
- 26
- 27
- 28
- 29
- 30
- 31
- 32
- 33
- 34
- 35
- 36
- 37
- 38
- 39
- 40
- 41
- 42
- 43
- 44
- 45
- 46
- 47
- 48
- 49
- 50
- 51
- 52
- 53
- 54
- 55
- 56
- 57
- 58
- 59
- 60
- 61
- 62
- 63
- 64
- 65
- 66
- 67
打包exe:
pyinstaller -wF --icon=excel.ico excel_func_format_GUI.py
- 1
运行效果:
文章来源: xxmdmst.blog.csdn.net,作者:小小明-代码实体,版权归原作者所有,如需转载,请联系作者。
原文链接:xxmdmst.blog.csdn.net/article/details/111504489
- 点赞
- 收藏
- 关注作者
评论(0)