-
Notifications
You must be signed in to change notification settings - Fork 1
/
888触发菜单版本.py
1176 lines (739 loc) · 37.3 KB
/
888触发菜单版本.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
#资料:
# http://www.wb86.com/post/330.html
# https://tkdocs.com/tutorial/text.html
#2022-01-28,22点58
# 现在技术难点已经完全攻破.
#现在解释如下. 道理就是把文字片段的背景色进行修改.(点击界面下面的标注1---5),消除标签点击第一个标注为空按钮
#======这里面我们就标注为B-tag1,I-tag1,E-tag1......B-tag5,....B-tag5即可
#得到BIO文件之后,用户只需要根据自己定好的替换自己需要的标签即可.比如B-PRODUCT...
#解释一下为什么用背景色来区分各个标签.因为我们有时候要标注空格比如尼古拉斯 凯奇.
# 这个名字之间带个空格,那么我们空格也要标注为I-PERSON才行.用前景色无法把空格染色!所以我们用背景色技巧!
#====================第三版. 打开原来的bio文件进行读取之前bio结果.
#==================自定义配置都写最上面:#如何动态修改tkinter按钮.
#=========现在的方案是 按照bio里面的直接按照配色表生成.现在支持10个.#==========现在还是自动配色吧. 搞一个配色表存着太麻烦感觉.
# =======================配置!!!!!!!!!!!!!!!!!!!
color_and_biaoqian=[
#============这个网站能看查询所有颜色!!!!!!!http://color.liminba.com/c/ffb6c1/
['#FF6c82','per'], # 这个就是标准红色. 写red 和#FF0000都行.
['yellow','loc'],
['Blue','time'],
['Cyan','org'],
['orange','law'],
['PeachPuff', 'veh'],
['Gray', 'stg'],
# ['Brown', 'time4'],
# ['Tan', 'time5'],
# ['Beige', 'time6'],
# ['Beige', 'time6'],
# ['Beige', 'time6'],
# ['Beige', 'time6'],
]
#最全的颜色表,如果标签大于上面那个color_and_biaoqian 就会用clis里面的自动.填充.
clis= ['white', 'red' ,'yellow','Blue','Cyan','orange','PeachPuff','Gray', 'Brown', 'Tan','Beige','Beige','Beige','Beige','Beige','Beige','Beige','Beige','Beige','Beige','Beige','Beige','Beige','Beige','Beige','Beige','Beige','Beige','Beige','Beige','Beige','Beige','Beige','Beige','Beige','Beige','Beige','Beige','Beige','Beige','Beige','Beige','Beige','Beige','Beige','Beige','Beige','Beige','Beige','Beige','Beige',]
#=============支持2种模式, 可以先写bioes或者bio
tool_type='bioes'
# tool_type='bio'
yihangduoshaoniu=10 #====================按钮排版,一行多少个.
# 环境是win10 python3.6
from tkinter import *
#=============第一层是text
import tkinter
import tkinter as tk
root = Tk(className=' 信息抽取标注工具(一键生成bioes格式)')
# frame = Frame (root, relief=RAISED, borderwidth=20)
fon1=('宋体',15)
root.resizable(True,True)
#==============2023-02-09,10点01我们实现行号的添加!
class TextLineNumbers(tk.Canvas):
def __init__(self, *args, **kwargs):
tk.Canvas.__init__(self, *args, **kwargs)
self.textwidget = None
def attach(self, text_widget):
self.textwidget = text_widget
def redraw(self, *args):
'''redraw line numbers'''
self.delete("all")
#====================self.textwidget.index("@0,0") @0,0表示最接近左上角的行信息.
i = self.textwidget.index("@0,0")#找到0行0列的信息. ref: 参考文档: https://tkdocs.com/shipman/text-index.html
hang,lie=i.split('.')
while True:
dline = self.textwidget.dlineinfo(i)
hang,lie=i.split('.')#计算上面i行的信息. self.textwidget.get(i)
debug222=self.textwidget.get(i) #这里面显示滚动之后的左上角第一个字.#这时候我们读入的是1.4
if dline is None: break # Return tuple (x,y,width,height,baseline) giving the bounding box and baseline position of the visible part of the line containing the character at INDEX.
if lie=='0':#===首列才画行号.
y = dline[1]
linenum = hang #行信息
self.create_text(1, y, anchor="nw", text="{0:>4}".format(linenum),font=fon1) #创建行号. 2是x索引.
# i = self.textwidget.index("%s+1line" % i) #然后计算下一行.
i = self.textwidget.index(str(int(hang)+1)+'.'+'0') #然后计算下一行.
class CustomText(tk.Text):
def __init__(self, *args, **kwargs):
tk.Text.__init__(self, *args, **kwargs)
# create a proxy for the underlying widget
self._orig = self._w + "_orig"
self.tk.call("rename", self._w, self._orig)
self.tk.createcommand(self._w, self._proxy)
def _proxy(self, *args): #发送自定义信号change
# let the actual widget perform the requested action
result=1
try:
cmd = (self._orig,) + args
result = self.tk.call(cmd)
# generate an event if something was added or deleted,
# or the cursor position changed
if (args[0] in ("insert", "replace", "delete") or
args[0:3] == ("mark", "set", "insert") or
args[0:2] == ("xview", "moveto") or
args[0:2] == ("xview", "scroll") or
args[0:2] == ("yview", "moveto") or
args[0:2] == ("yview", "scroll")
): # //tkinter event_generate手册:http://tcl.tk/man/tcl8.5/TkCmd/event.htm#M34
self.event_generate("<<Change>>", when="tail")#触发change信号.
except:
pass
# return what the actual widget returned
return result
class Example(tk.Frame):#=-==============最终我们把text类包装成了ecample这个类.
def __init__(self, *args, **kwargs):
tk.Frame.__init__(self, *args, **kwargs)
self.text = CustomText(self, width=30,height=20,font=fon1,wrap = 'char',background='#C7EDCC')
self.vsb = tk.Scrollbar(self, orient="vertical", command=self.text.yview)#竖直方向的滑动杆.
self.vsb.pack(side="right", fill="y")
self.text.configure(yscrollcommand=self.vsb.set)
self.linenumbers = TextLineNumbers(self, width=50)#创建行号工具#最好让这个宽度跟整个页面长度配套.
self.linenumbers.attach(self.text) # 绑定行号工具到text空间.
self.linenumbers.pack(side="left", fill="y")
self.text.pack(side="right", fill="both", expand=True)
self.text.bind("<<Change>>", self._on_change)#绑定修改到重新绘制linenumber
self.text.bind("<Configure>", self._on_change)
self.text.insert("end", "one\ntwo\nthree\n")
self.text.insert("end", "five\n")
def _on_change(self, event):
self.linenumbers.redraw()
def changgelinenum(self):
print(111111)
self.linenumbers=TextLineNumbers(self, width=150)
self.linenumbers.attach(self.text) # 绑定行号工具到text空间.
self.linenumbers.pack(side="left", fill="y")
#==========================================================自定义编号结束.
# Example(root).pack(side="top", fill="both", expand=True)
aaa2=Example(root)
#=======reset 行号widget
if 0:
aaa2.changgelinenum()#=====================这种动态生成无效???????????????????????当行号上W会发生看不见的情况.目前无法解决.
aaa2.pack(side="top", fill="both", expand=True)
text = aaa2.text#=======添加了护眼色.
# text.insert('1.0', '贴入你要处理的sdfsadf张某,李某,sdfsdfasfasd王某他们杀人了fas]\n sdfsadf张某,李某,sdfsdfasfasd王某他们杀人了文字 中文 English 都行\n贴入你要处理的文字')# 1.0 第一行0列.
colorlist=[i[0] for i in color_and_biaoqian]
labellist=[i[1] for i in color_and_biaoqian]
if 0:
# ====滚动条竖直:
scroll = tkinter.Scrollbar()
# 放到窗口的右侧, 填充Y竖直方向
scroll.pack(side=tkinter.RIGHT,fill=tkinter.Y)
scroll.config(command=text.yview)
text.config(yscrollcommand=scroll.set)
#滚动条横向
s2 = Scrollbar(root, orient = HORIZONTAL)
s2.pack(side = BOTTOM, fill = X)
s2.config(command = text.xview)
text.config(xscrollcommand=s2.set)
# print( text.get('1.0', 'end'))
text.pack()
# frame.pack()
#第二层是frame 来放按钮的.
frame = Frame (root, relief=GROOVE, borderwidth=1)
frame.pack (side=TOP, fill=BOTH, ipadx=5, ipady=5, expand=1)
for i in colorlist:
text.tag_config(i, background=i) # 再为标签进行设置==类似html里面的div 里面class属性.
def helloCallBack(color):
# print(1111111111111)
try:
# print(SEL_FIRST,SEL_LAST)
# print(text.index("sel.first"),text.index("sel.last"))
#===============注意要先删除其他的标签.
if 1:
for i in colorlist:
text.tag_remove( i,text.index("sel.first"), text.index("sel.last")) # =======变色
if color !='white':#======white实际上是不进行背景色标注!这样效果最好!!!!!!a trick
text.tag_add(color, text.index("sel.first"),text.index("sel.last")) #=======变色
if 0:
print('=================debug')
for i in colorlist:
print(i,text.tag_ranges(i))
# print(text.tag_ranges(color))
# print(11111111111)
except:
pass
def helloCallBack_read_bio(i,j,color):
a=i
b=j
try:
#这时候clor重置了,所以要重新跑config.
text.tag_config(color, background=color) # 再为标签进行设置==类似html里面的div 里面class属性.
#===============注意要先删除其他的标签.
for i in colorlist:
text.tag_remove( i,a, b) # =======变色
if color !='white':#======white实际上是不进行背景色标注!这样效果最好!!!!!!a trick
text.tag_add(color, a,b) #=======变色
except:
pass
import kmp_for_array
def helloCallBack_quanbiaozhu(color):
if 1:
# text.tag_config(color, background=color) # 再为标签进行设置
wenben=text.get(text.index("sel.first"),text.index("sel.last"))
if 1:
#=============调用python的搜索
all_text=text.get('1.0',END)
tmp= all_text.split('\n')
out2=[]
for i in range(len(tmp)):#=========这里面需要字符串的kmp算法
kkk=kmp_for_array.kmp(tmp[i],wenben,return_all=True)
if kkk!=-1:
#================一个bug, 比如 aaa 然后我要把aa标注为红色.那么就会图2次.所以这里面我们强制让他只图最前面的aa,后的aa忽略#==============3.0版本修复了这个bug
last_tail=0
for j in kkk:
if j>=last_tail:
out2.append([f'{i+1}.{j}',f'{i+1}.{j+len(wenben)}' ])
last_tail=j+len(wenben)
for weizhi in out2:
#===============注意要先删除其他的标签.
for i in colorlist:
text.tag_remove( i,weizhi[0], weizhi[1]) # =======变色
if color !='white':#======white实际上是不进行背景色标注!这样效果最好!!!!!!a trick
text.tag_add(color,weizhi[0], weizhi[1]) #=======变色
# print(text.tag_ranges(color))
# import re
from keyword2 import KeywordProcessor#从https://github.com/Kashian/flashtext 做修改.因为他居然不支持中文!!!!!!!!
from tool_for_pipei import DFAFilter
import time
import re
def zhengzehelloCallBack_quanbiaozhu(color):
#======================2023-02-08,19点57出尝试用编辑器自己的search函数来看看速度.现在看没屁用.他不支持正则.
a=e.get()
zhengze=re.compile(a)
aaa=text.get('1.0',"end").split('\n')
# tmp=re.compile(a).findall(aaa)
tmp=[]
for i in aaa:
tmp+=zhengze.findall(i)
# tmp=re.findall(a,aaa) ########==========正则代码.
tmp=list(set(tmp))
if 1:
# for i1 in tmp:
#找到每一个索引:
# a=time.time()
# text.tag_config(color, background=color) # 再为标签进行设置
# wenben =i1
#===============改用https://github.com/Kashian/flashtext 这个库包. 比kmp快朵了!!!!!!!!!!!!
# keyword_processor = KeywordProcessor()
# # keyword_processor.add_keyword(<unclean name>, <standardised name>)
# print(tmp,'被查找的东西!!!!!!!')
# keyword_processor.add_keywords_from_list(tmp)
gfw = DFAFilter()
for jjj in tmp:
gfw.add(jjj)
all_text = text.get('1.0', END)
tmp2 = all_text.split('\n')
out2 = []
for i in range(len(tmp2)):
keywords_found = gfw.pipei_shortest(tmp2[i])
last_tail = 0
for j in keywords_found:
if j[0] >= last_tail:
out2.append([f'{i + 1}.{j[0]}', f'{i + 1}.{j[1] }'])
last_tail = j[1] + (j[1]-j[0])
print(1)
if 0:
# =============调用python的搜索
all_text = text.get('1.0', END)
tmp = all_text.split('\n')
out2 = []
for i in range(len(tmp)): # =========这里面需要字符串的kmp算法
kkk = kmp_for_array.kmp(tmp[i], wenben, return_all=True)
if kkk != -1 and kkk!=0:
# ================一个bug, 比如 aaa 然后我要把aa标注为红色.那么就会图2次.所以这里面我们强制让他只图最前面的aa,后的aa忽略#==============3.0版本修复了这个bug
last_tail = 0
for j in kkk:
if j >= last_tail:
out2.append([f'{i + 1}.{j}', f'{i + 1}.{j + len(wenben)}'])
last_tail = j + len(wenben)
# print('kmp:',time.time()-a)
# a = time.time()
for weizhi in out2:
# ===============注意要先删除其他的标签.
for i in colorlist:
text.tag_remove(i, weizhi[0], weizhi[1]) # =======变色
if color != 'white': # ======white实际上是不进行背景色标注!这样效果最好!!!!!!a trick
text.tag_add(color, weizhi[0], weizhi[1]) # =======变色
# print('tuse:', time.time() - a)
# print(11111111111)
#https://blog.csdn.net/wjciayf/article/details/79261005 颜色表
# c0='black'
# c1='red'
# c2='yellow'
# c3='Blue'
# c4='Cyan'
# c5='Lime'
#=================command 这个函数不能用for实现.改用动态生成.可以避免共享变量.
#放按钮使用.
aa=[0,1,2,3,4,5,6,7,8,9,10]
save_all_button=[]
paddd=12
def setup_button():
# b=tkinter.Button(frame,bg= 'white',text ="标注为空", command = lambda :helloCallBack('white'))
fun4('white')
# b.grid(row=0,column=1,padx=10)
# button_grid_info = b.grid_info()
# b.grid_forget()
if 0: #########2023-02-01,18点46这个地方可以删除国企按钮,其实没啥必要删除.因为你扩充时候可能还需要.扩充时候可以自己改代码19行. 添加自己需要的类别.
for i in save_all_button:#================这一步用来每次删除过期的按钮.
button_grid_info = i.grid_info()
i.grid_forget()
# row column可以自己修改按钮放的位置!!!!!
for i in range(len(color_and_biaoqian)):
#============callback函数里面不允许写变量.......... #参考https://www.cnpython.com/qa/68533
if 1: # lambda表达式.里面要写x=i, 然后再把x传入:右边的函数.这样可以脱离之前的变量控制.
# fffff=('宋体',10,'bold')
# b=tkinter.Button(frame, bg=colorlist[i],font=fffff,text =labellist[i], command = lambda x=i: helloCallBack(colorlist[x]))
# b.grid(row=0,column=(i+1),padx=paddd)
# save_all_button.append(b)#===============把变量存在全局变量里面,变量就不会跟着函数销毁了.
#================下面一排是全标注===全文有这个词的直接全标注上.
# b=tkinter.Button(frame, bg=colorlist[i], font=fffff,text =labellist[i]+'全标', command = lambda x=i:helloCallBack_quanbiaozhu(colorlist[x]))
# b.grid(row=1,column=(i+1),padx=paddd)
# save_all_button.append(b)
#==================================================
# b = tkinter.Button(frame, bg=colorlist[i],font=fffff,text=labellist[i] + '正则',
# command=lambda x=i: zhengzehelloCallBack_quanbiaozhu(colorlist[x]))
# b.grid(row=2, column=(i + 1), padx=paddd)
# save_all_button.append(b)
#=====================第四排综合前3排.
fun(i)
# xxx=i
# menubar.add_command(label=xxx, command=lambda x=xxx:helloCallBack_quanbiaozhu(colorlist[x]))
def fun4(i):
menubar = tkinter.Menu(frame,tearoff=0)
def helper1(x=i):
x = i
return helloCallBack_quanbiaozhu(x)
def helper2(x=i):
x = i
return zhengzehelloCallBack_quanbiaozhu(x)
xxx = '全标'
menubar.add_command(label=xxx, command=helper1)
xxx = '正则'
menubar.add_command(label=xxx, command=helper2)
# 事件处理函数一定要至少有一个参数,且第一个参数表示的是系统事件
def pop(event,a):
# 注意使用 event.x 和 event.x_root 的区别
# menubar.post(event.x, event.y)
# print(a)
menubar.post(event.x_root, event.y_root)
fffff=('宋体',10,'bold')
b=tkinter.Button(frame, bg=i,font=fffff,text ='annotate to None', command = lambda x=i: helloCallBack(i))
b.bind("<Button-3>", lambda event, a=3: pop(event, a))
b.grid(row=0,column=1,padx=paddd)
def fun(i):
menubar = tkinter.Menu(frame,tearoff=0)
def helper1(x=i):
x = i
return helloCallBack_quanbiaozhu(colorlist[x])
def helper2(x=i):
x = i
return zhengzehelloCallBack_quanbiaozhu(colorlist[x])
xxx = '全标'
menubar.add_command(label=xxx, command=helper1)
xxx = '正则'
menubar.add_command(label=xxx, command=helper2)
# 事件处理函数一定要至少有一个参数,且第一个参数表示的是系统事件
def pop(event,a):
# 注意使用 event.x 和 event.x_root 的区别
# menubar.post(event.x, event.y)
# print(a)
menubar.post(event.x_root, event.y_root)
fffff=('宋体',10,'bold')
b=tkinter.Button(frame, bg=colorlist[i],font=fffff,text =labellist[i], command = lambda x=i: helloCallBack(colorlist[x]))
b.bind("<Button-3>", lambda event, a=3: pop(event, a))
def fun(i):
menubar = tkinter.Menu(frame,tearoff=0)
def helper1(x=i):
x = i
return helloCallBack_quanbiaozhu(colorlist[x])
def helper2(x=i):
x = i
return zhengzehelloCallBack_quanbiaozhu(colorlist[x])
xxx = '全标'
menubar.add_command(label=xxx, command=helper1)
xxx = '正则'
menubar.add_command(label=xxx, command=helper2)
# 事件处理函数一定要至少有一个参数,且第一个参数表示的是系统事件
def pop(event,a):
# 注意使用 event.x 和 event.x_root 的区别
# menubar.post(event.x, event.y)
# print(a)
menubar.post(event.x_root, event.y_root)
fffff=('宋体',10,'bold')
b=tkinter.Button(frame, bg=colorlist[i],font=fffff,text =labellist[i], command = lambda x=i: helloCallBack(colorlist[x]))
b.bind("<Button-3>", lambda event, a=3: pop(event, a))
b.grid(row=i//yihangduoshaoniu,column=(i%yihangduoshaoniu+2),padx=paddd)
# save_all_button.append(b)
# b=tkinter.Button(frame, text ="test4", )
# b.bind("<Button-3>", lambda event,a=3:pop(event,a))# https://blog.csdn.net/qq_34633194/article/details/120631709 传参方法.
# b.grid(row=3,column=0,padx=10)
def save():
global text666
result=''
zhengchagn=0
savefor_rel={}
save_triple=[]
try:
moshi=cmb.get()
if 'es' in moshi:
tool_type='bioes'
else:
tool_type = 'bio'
result = text.get("1.0", "end") # 获取文本输入框的内容
with open('output.txt','w',encoding='utf-8') as f:
f.writelines(result)
result=result.replace('\r\n','\n')
# result=result.split('\n')
except:
text666.set('output.txt写入失败')
if 1:
# for i in colorlist:
# aaa=text.tag_ranges(i)###=得到的aaa标里面每2个表示开头结尾索引.
#=======下面都是简单的字符串处理而已
yuanwen=result.split('\n')
jieguo=[list('O'*len(i)) for i in yuanwen]
#=====根据颜色标注即可:
for dex,i in enumerate(colorlist):
aaa = text.tag_ranges(i) ###=得到的aaa标里面每2个表示开头结尾索引.
for j in range(len(aaa)//2):
a11= int(aaa[2*j].string.split('.')[0])#首航
a12= int(aaa[2*j].string.split('.')[1])#首列
a21= int(aaa[2*j+1].string.split('.')[0])#尾行
a22= int(aaa[2*j+1].string.split('.')[1])# 尾列
if 1:
# ===============这里来处理每个颜色保存到rel里面.
# (<textindex object: '1.4'>, <textindex object: '1.9'>) =aaa
save_triple.append([yuanwen[a11-1][a12:a22],a11-1,str(labellist[dex]),a12,a22-1])
pass
if tool_type!='bio':
if a11!=a21:
pass
else:
if a22-a12==1:#标注S!
jieguo[a11-1][a12]="S-"+str(labellist[dex])
else:
jieguo[a11-1][a12:a22]=["B-"+str(labellist[dex])]+["I-"+str(labellist[dex])]*(a22-a12-2)+["E-"+str(labellist[dex])]
if tool_type=='bio':
if a11 != a21:
pass
else:
jieguo[a11 - 1][a12:a22] = ["B-" + str(labellist[dex])] + ["I-" + str(labellist[dex])] * (
a22 - a12 - 1)
jieguo=[' '.join(i)+'\n' for i in jieguo]
with open('output.bio','w') as f:
f.writelines(jieguo)
zhengchagn=1
#=====================加一个ner整体输出.
#=========这个为了后续标注关系时候方便.
#输出样式: ner1_text ner1_label ner1_locationhead ner1_locationtail ner2_text ner2_label ner2_locationhead ner2tail.....########正文的该行.
#==================处理:
#=============非锁紧版本
if 0:
from collections import defaultdict
outfor_rel=defaultdict(list)
for i in save_triple:
outfor_rel[i[1]].append([i[0],i[2],str(i[3]),str(i[4])])
for i in outfor_rel:
outfor_rel[i].append(yuanwen[i])
outlist=['']*(max(outfor_rel.keys())+1)
sepp=' '
for i in sorted(outfor_rel.keys()):
for jj in range(len(outfor_rel[i])-1):
outlist[i]+=sepp+sepp.join(outfor_rel[i][jj])
outlist[i]+='######'+outfor_rel[i][-1]
outlist[i]=str(i)+sepp+outlist[i][len(sepp):]+'\n'
with open('output.ner','w',encoding='utf-8') as f:
f.writelines(outlist)
#============我感觉太乱,所以下面改成缩进版本.空格符用/了.
if 1:
from collections import defaultdict
outfor_rel = defaultdict(list)
for i in save_triple:
outfor_rel[i[1]].append([i[0], i[2], str(i[3]), str(i[4])])
for i in outfor_rel:
outfor_rel[i].append(yuanwen[i])
outlist=[]
if outfor_rel:
outlist = [''] * (max(outfor_rel.keys()) + 1)
sepp = ' '
suojin='\\'
for i in sorted(outfor_rel.keys()):
for jj in range(len(outfor_rel[i]) - 1):
outlist[i] += sepp + suojin.join(outfor_rel[i][jj])
outlist[i] += '######' + outfor_rel[i][-1]
outlist[i] = str(i) + sepp + outlist[i][len(sepp):] + '\n'
with open('output.ner', 'w', encoding='utf-8') as f:
f.writelines(outlist)
# zhengchagn=1
# except:
# text666.set('output.bio写入失败')
if 1:
pass
# for i in range(len(result)):
#
# savefor_rel[i+1]=result[i]
if zhengchagn:
text666.set('bio和txt都写入成功')
setup_button()
def chognzhi():
global text666
try:
#============填入文本.
with open('output.txt' ,encoding='utf-8') as f:
tmp=f.readlines()
text.delete('1.0','end')
text.insert('1.0',''.join(tmp))
with open('output.bio' ) as f:
tmp=f.readlines()
tmp3=tmp
tmp=' '.join(tmp).replace('\n',' ').split(' ')
tmp=[i[2:] for i in tmp if '-' in i]
tmp2=[]
for i in tmp:
if i not in tmp2:
tmp2.append(i)
tmp=tmp2
#=======进行配色.
#我们的默认标签, 只有当新的跟这里面的不一样时候才做修改.
global color_and_biaoqian
global colorlist
global labellist
# color_and_biaoqian = [
# ['white', '标注为空'],
# ['red','person'],
# ['yellow','address'],
# ['Blue','org'],
# ['Cyan','telephone'],
# ['orange','time'],
# ['PeachPuff', 'nation'],
# # ['Gray', 'time3'],
# # ['Brown', 'time4'],
# # ['Tan', 'time5'],
# # ['Beige', 'time6']
#
# ]
for i in range(len(tmp)):
if tmp[i] not in labellist:
color_and_biaoqian.append([clis[i+1],tmp[i]]) #替换就的按钮.
colorlist = [i[0] for i in color_and_biaoqian]
labellist = [i[1] for i in color_and_biaoqian]
setup_button()
#=============下面我们根据bio进行涂色.
tmp3=[i1.replace('\n','').split(' ') for i1 in tmp3]
for i in range(len(tmp3)):
for j in range(len(tmp3[i])):
if '-' in tmp3[i][j]:
aaa=tmp3[i][j][2:]
for jjj in color_and_biaoqian:
if jjj[1]==aaa:
color=jjj[0]
helloCallBack_read_bio(str(i+1)+'.'+str(j),str(i+1)+'.'+str(j+1),color)
text666.set('读取成功')
except:
text666.set('读取失败')
#=============第三版我们来实现读取bio文件的功能.为了方便就不加对话框了.直接读取output.bio
b=tkinter.Button(frame, text ="load bio和txt", command = chognzhi)
b.grid(row=0,column=0,padx=10)
# entryExample = Entry(root)
# entryExample.place(x = -10,
# y =-10,
# width=2000,
# height=100)
import tkinter
from tkinter import ttk # 导入ttk模块,因为下拉菜单控件在ttk中
# 创建下拉菜单
cmb = ttk.Combobox(frame,state="readonly")
cmb.grid(row=1, column=(1), padx=20)
# 设置下拉菜单中的值
cmb['value'] = ('标注模式bioes', '标注模式bio')
# 设置默认值,即默认下拉框中的内容
cmb.current(0)
# 默认值中的内容为索引,从0开始
import tkinter as tk
text666 = tk.StringVar()
aaa=tkinter.Label(frame,textvariable=text666, bg='yellow')
aaa.grid(row=2,column=0,padx=10)
text666.set("type regex to right")
b=tkinter.Button(frame, text ="save to BIO和txt", command = save)
b.grid(row=1,column=0,padx=10)
# b=tkinter.Button(root, text ="标注2", command = helloCallBack(c2))
# b.pack()
# b=tkinter.Button(root, text ="标注3", command = helloCallBack(c3))
# b.pack()
# b=tkinter.Button(root, text ="标注4", command = helloCallBack(c4))
# b.pack()
# b=tkinter.Button(root, text ="标注5", command = helloCallBack(c5))
# text.tag_add('highlightline', '5.0', '6.0')
E1 = Entry(frame, bd =5,)
E1.insert('0','.{1}某')
E1.grid(row=2, column=(1), padx=20)
e=E1
# b = tkinter.Button(frame, text='右边输入你要的正则再点我进行标注', command=lambda x=1:reg())
# b.grid(row=2, column=( 0), padx=10)
#按扭调用的函数,
def reg():
a = e.get()
# import base64
uuid='[[sep]]'
# import flask
# import json
global_fuwenben=[]
# import klembord #pip 一下.
waijie=0
# 参考:https://www.coder.work/article/7769372
# 2023-02-06,12点36 //加入样式复制功能.
def fun2(event):
#========
a= text.index(tk.INSERT)
try:
txt = text.get('sel.first', 'sel.last')
content = text.dump('sel.first', 'sel.last', tag=True, text=True)
global global_fuwenben
# content编码.
global_fuwenben=content
html_text = []
tmp=eval(str(content))
#我们往content里面加一个自己的id做校验. 因为如果有其他json会发生混淆.
content=[uuid]+content
tmp=json.dumps(content,ensure_ascii=True)
# tmp=tmp.encode('utf-8')
tttttttt=json.loads(tmp)
# print(tmp,9999999999999999999999999999999999999999999999999999999999999999999999999999)
# print(type(tmp))
tmp=str(tmp)
klembord.set_text(uuid)#===========剪贴板里面编解码有问题!!!!!!!!!!所以这里面用一个技巧只需要传递信号即可, 只需要对面校验uuid即可.
#==========2023-02-06,20点24 目前方案还是用自编码, 不用json, 即使他能用, 也会慢,因为非编辑器数据如果用jsonfy会卡.不如自己做一个头来判断速度快.#但是目前还是jsonfy好实现.先用着.
# [('tagon', 'sel', '1.0'), ('text', '贴入你要处', '1.0'), ('tagon', 'red', '1.5'), ('text', '理的sdfsadf张某,李某,sdfsdf', '1.5'), ('tagoff', 'red', '1.26'), ('text', 'asf', '1.26'), ('tagon', 'red', '1.29'), ('text', 'asd', '1.29'), ('tagoff', 'red', '1.32'), ('text', '王某他们杀人了fas]', '1.32')]
#=整理一下 #编码是 sep color1 sep text1 sep color2 sep text2
# tmp2=[]
#
# for i in range(len(content)):
# if content[i][0] =='text' and content[i-1][0]!='tagon':
# tmp2.append(sep_for_clipborad)
# tmp2.append('white')
# tmp2.append(sep_for_clipborad)
# tmp2.append(content[i][1])
# if content[i][1]=='tagon':
# tmp2.append(sep_for_clipborad)
# tmp2.append(content[i][1])
# if content[i][0] =='text' and content[i-1][0]=='tagon':
# tmp2.append(sep_for_clipborad)
# tmp2.append(content[i][1])
# print(tmp2,12341230947239047238947238947329847234234)
# klembord.set_text(''.join(tmp2))
# print(content)