#资料: # http://www.wb86.com/post/330.html # https://tkdocs.com/tutorial/text.html #2022-01-28,22点58 # 现在技术难点已经完全攻破. #现在解释如下. 道理就是把文字片段的背景色进行修改.(点击界面下面的标注1---5),消除标签点击第一个标注为空按钮 #======这里面我们就标注为B-tag1,I-tag1,E-tag1......B-tag5,....B-tag5即可 #得到BIO文件之后,用户只需要根据自己定好的替换自己需要的标签即可.比如B-PRODUCT... #解释一下为什么用背景色来区分各个标签.因为我们有时候要标注空格比如尼古拉斯 凯奇. # 这个名字之间带个空格,那么我们空格也要标注为I-PERSON才行.用前景色无法把空格染色!所以我们用背景色技巧! #====================第三版. 打开原来的bio文件进行读取之前bio结果. #==================自定义配置都写最上面:#如何动态修改tkinter按钮. #=========现在的方案是 按照bio里面的直接按照配色表生成.现在支持10个.#==========现在还是自动配色吧. 搞一个配色表存着太麻烦感觉. # =======================配置!!!!!!!!!!!!!!!!!!! color_and_biaoqian=[ #============这个网站能看查询所有颜色!!!!!!!http://color.liminba.com/c/ffb6c1/ ['#FF6c82','per'], # 这个就是标准红色. 写red 和#FF0000都行. ['yellow','loc'], ['Blue','time'], ['Cyan','org'], ['orange','law'], ['PeachPuff', 'veh'], ['Gray', 'stg'], # ['Brown', 'time4'], # ['Tan', 'time5'], # ['Beige', 'time6'], # ['Beige', 'time6'], # ['Beige', 'time6'], # ['Beige', 'time6'], ] #最全的颜色表,如果标签大于上面那个color_and_biaoqian 就会用clis里面的自动.填充. clis= ['white', 'red' ,'yellow','Blue','Cyan','orange','PeachPuff','Gray', 'Brown', 'Tan','Beige','Beige','Beige','Beige','Beige','Beige','Beige','Beige','Beige','Beige','Beige','Beige','Beige','Beige','Beige','Beige','Beige','Beige','Beige','Beige','Beige','Beige','Beige','Beige','Beige','Beige','Beige','Beige','Beige','Beige','Beige','Beige','Beige','Beige','Beige','Beige','Beige','Beige','Beige','Beige','Beige',] #=============支持2种模式, 可以先写bioes或者bio tool_type='bioes' # tool_type='bio' yihangduoshaoniu=10 #====================按钮排版,一行多少个. # 环境是win10 python3.6 from tkinter import * #=============第一层是text import tkinter import tkinter as tk root = Tk(className=' 信息抽取标注工具(一键生成bioes格式)') # frame = Frame (root, relief=RAISED, borderwidth=20) fon1=('宋体',15) root.resizable(True,True) #==============2023-02-09,10点01我们实现行号的添加! class TextLineNumbers(tk.Canvas): def __init__(self, *args, **kwargs): tk.Canvas.__init__(self, *args, **kwargs) self.textwidget = None def attach(self, text_widget): self.textwidget = text_widget def redraw(self, *args): '''redraw line numbers''' self.delete("all") #====================self.textwidget.index("@0,0") @0,0表示最接近左上角的行信息. i = self.textwidget.index("@0,0")#找到0行0列的信息. ref: 参考文档: https://tkdocs.com/shipman/text-index.html hang,lie=i.split('.') while True: dline = self.textwidget.dlineinfo(i) hang,lie=i.split('.')#计算上面i行的信息. self.textwidget.get(i) debug222=self.textwidget.get(i) #这里面显示滚动之后的左上角第一个字.#这时候我们读入的是1.4 if dline is None: break # Return tuple (x,y,width,height,baseline) giving the bounding box and baseline position of the visible part of the line containing the character at INDEX. if lie=='0':#===首列才画行号. y = dline[1] linenum = hang #行信息 self.create_text(1, y, anchor="nw", text="{0:>4}".format(linenum),font=fon1) #创建行号. 2是x索引. # i = self.textwidget.index("%s+1line" % i) #然后计算下一行. i = self.textwidget.index(str(int(hang)+1)+'.'+'0') #然后计算下一行. class CustomText(tk.Text): def __init__(self, *args, **kwargs): tk.Text.__init__(self, *args, **kwargs) # create a proxy for the underlying widget self._orig = self._w + "_orig" self.tk.call("rename", self._w, self._orig) self.tk.createcommand(self._w, self._proxy) def _proxy(self, *args): #发送自定义信号change # let the actual widget perform the requested action result=1 try: cmd = (self._orig,) + args result = self.tk.call(cmd) # generate an event if something was added or deleted, # or the cursor position changed if (args[0] in ("insert", "replace", "delete") or args[0:3] == ("mark", "set", "insert") or args[0:2] == ("xview", "moveto") or args[0:2] == ("xview", "scroll") or args[0:2] == ("yview", "moveto") or args[0:2] == ("yview", "scroll") ): # //tkinter event_generate手册:http://tcl.tk/man/tcl8.5/TkCmd/event.htm#M34 self.event_generate("<>", when="tail")#触发change信号. except: pass # return what the actual widget returned return result class Example(tk.Frame):#=-==============最终我们把text类包装成了ecample这个类. def __init__(self, *args, **kwargs): tk.Frame.__init__(self, *args, **kwargs) self.text = CustomText(self, width=30,height=20,font=fon1,wrap = 'char',background='#C7EDCC') self.vsb = tk.Scrollbar(self, orient="vertical", command=self.text.yview)#竖直方向的滑动杆. self.vsb.pack(side="right", fill="y") self.text.configure(yscrollcommand=self.vsb.set) self.linenumbers = TextLineNumbers(self, width=50)#创建行号工具#最好让这个宽度跟整个页面长度配套. self.linenumbers.attach(self.text) # 绑定行号工具到text空间. self.linenumbers.pack(side="left", fill="y") self.text.pack(side="right", fill="both", expand=True) self.text.bind("<>", self._on_change)#绑定修改到重新绘制linenumber self.text.bind("", self._on_change) self.text.insert("end", "one\ntwo\nthree\n") self.text.insert("end", "five\n") def _on_change(self, event): self.linenumbers.redraw() def changgelinenum(self): print(111111) self.linenumbers=TextLineNumbers(self, width=150) self.linenumbers.attach(self.text) # 绑定行号工具到text空间. self.linenumbers.pack(side="left", fill="y") #==========================================================自定义编号结束. # Example(root).pack(side="top", fill="both", expand=True) aaa2=Example(root) #=======reset 行号widget if 0: aaa2.changgelinenum()#=====================这种动态生成无效???????????????????????当行号上W会发生看不见的情况.目前无法解决. aaa2.pack(side="top", fill="both", expand=True) text = aaa2.text#=======添加了护眼色. # text.insert('1.0', '贴入你要处理的sdfsadf张某,李某,sdfsdfasfasd王某他们杀人了fas]\n sdfsadf张某,李某,sdfsdfasfasd王某他们杀人了文字 中文 English 都行\n贴入你要处理的文字')# 1.0 第一行0列. colorlist=[i[0] for i in color_and_biaoqian] labellist=[i[1] for i in color_and_biaoqian] if 0: # ====滚动条竖直: scroll = tkinter.Scrollbar() # 放到窗口的右侧, 填充Y竖直方向 scroll.pack(side=tkinter.RIGHT,fill=tkinter.Y) scroll.config(command=text.yview) text.config(yscrollcommand=scroll.set) #滚动条横向 s2 = Scrollbar(root, orient = HORIZONTAL) s2.pack(side = BOTTOM, fill = X) s2.config(command = text.xview) text.config(xscrollcommand=s2.set) # print( text.get('1.0', 'end')) text.pack() # frame.pack() #第二层是frame 来放按钮的. frame = Frame (root, relief=GROOVE, borderwidth=1) frame.pack (side=TOP, fill=BOTH, ipadx=5, ipady=5, expand=1) for i in colorlist: text.tag_config(i, background=i) # 再为标签进行设置==类似html里面的div 里面class属性. def helloCallBack(color): # print(1111111111111) try: # print(SEL_FIRST,SEL_LAST) # print(text.index("sel.first"),text.index("sel.last")) #===============注意要先删除其他的标签. if 1: for i in colorlist: text.tag_remove( i,text.index("sel.first"), text.index("sel.last")) # =======变色 if color !='white':#======white实际上是不进行背景色标注!这样效果最好!!!!!!a trick text.tag_add(color, text.index("sel.first"),text.index("sel.last")) #=======变色 if 0: print('=================debug') for i in colorlist: print(i,text.tag_ranges(i)) # print(text.tag_ranges(color)) # print(11111111111) except: pass def helloCallBack_read_bio(i,j,color): a=i b=j try: #这时候clor重置了,所以要重新跑config. text.tag_config(color, background=color) # 再为标签进行设置==类似html里面的div 里面class属性. #===============注意要先删除其他的标签. for i in colorlist: text.tag_remove( i,a, b) # =======变色 if color !='white':#======white实际上是不进行背景色标注!这样效果最好!!!!!!a trick text.tag_add(color, a,b) #=======变色 except: pass import kmp_for_array def helloCallBack_quanbiaozhu(color): if 1: # text.tag_config(color, background=color) # 再为标签进行设置 wenben=text.get(text.index("sel.first"),text.index("sel.last")) if 1: #=============调用python的搜索 all_text=text.get('1.0',END) tmp= all_text.split('\n') out2=[] for i in range(len(tmp)):#=========这里面需要字符串的kmp算法 kkk=kmp_for_array.kmp(tmp[i],wenben,return_all=True) if kkk!=-1: #================一个bug, 比如 aaa 然后我要把aa标注为红色.那么就会图2次.所以这里面我们强制让他只图最前面的aa,后的aa忽略#==============3.0版本修复了这个bug last_tail=0 for j in kkk: if j>=last_tail: out2.append([f'{i+1}.{j}',f'{i+1}.{j+len(wenben)}' ]) last_tail=j+len(wenben) for weizhi in out2: #===============注意要先删除其他的标签. for i in colorlist: text.tag_remove( i,weizhi[0], weizhi[1]) # =======变色 if color !='white':#======white实际上是不进行背景色标注!这样效果最好!!!!!!a trick text.tag_add(color,weizhi[0], weizhi[1]) #=======变色 # print(text.tag_ranges(color)) # import re from keyword2 import KeywordProcessor#从https://github.com/Kashian/flashtext 做修改.因为他居然不支持中文!!!!!!!! from tool_for_pipei import DFAFilter import time import re def zhengzehelloCallBack_quanbiaozhu(color): #======================2023-02-08,19点57出尝试用编辑器自己的search函数来看看速度.现在看没屁用.他不支持正则. a=e.get() zhengze=re.compile(a) aaa=text.get('1.0',"end").split('\n') # tmp=re.compile(a).findall(aaa) tmp=[] for i in aaa: tmp+=zhengze.findall(i) # tmp=re.findall(a,aaa) ########==========正则代码. tmp=list(set(tmp)) if 1: # for i1 in tmp: #找到每一个索引: # a=time.time() # text.tag_config(color, background=color) # 再为标签进行设置 # wenben =i1 #===============改用https://github.com/Kashian/flashtext 这个库包. 比kmp快朵了!!!!!!!!!!!! # keyword_processor = KeywordProcessor() # # keyword_processor.add_keyword(, ) # print(tmp,'被查找的东西!!!!!!!') # keyword_processor.add_keywords_from_list(tmp) gfw = DFAFilter() for jjj in tmp: gfw.add(jjj) all_text = text.get('1.0', END) tmp2 = all_text.split('\n') out2 = [] for i in range(len(tmp2)): keywords_found = gfw.pipei_shortest(tmp2[i]) last_tail = 0 for j in keywords_found: if j[0] >= last_tail: out2.append([f'{i + 1}.{j[0]}', f'{i + 1}.{j[1] }']) last_tail = j[1] + (j[1]-j[0]) print(1) if 0: # =============调用python的搜索 all_text = text.get('1.0', END) tmp = all_text.split('\n') out2 = [] for i in range(len(tmp)): # =========这里面需要字符串的kmp算法 kkk = kmp_for_array.kmp(tmp[i], wenben, return_all=True) if kkk != -1 and kkk!=0: # ================一个bug, 比如 aaa 然后我要把aa标注为红色.那么就会图2次.所以这里面我们强制让他只图最前面的aa,后的aa忽略#==============3.0版本修复了这个bug last_tail = 0 for j in kkk: if j >= last_tail: out2.append([f'{i + 1}.{j}', f'{i + 1}.{j + len(wenben)}']) last_tail = j + len(wenben) # print('kmp:',time.time()-a) # a = time.time() for weizhi in out2: # ===============注意要先删除其他的标签. for i in colorlist: text.tag_remove(i, weizhi[0], weizhi[1]) # =======变色 if color != 'white': # ======white实际上是不进行背景色标注!这样效果最好!!!!!!a trick text.tag_add(color, weizhi[0], weizhi[1]) # =======变色 # print('tuse:', time.time() - a) # print(11111111111) #https://blog.csdn.net/wjciayf/article/details/79261005 颜色表 # c0='black' # c1='red' # c2='yellow' # c3='Blue' # c4='Cyan' # c5='Lime' #=================command 这个函数不能用for实现.改用动态生成.可以避免共享变量. #放按钮使用. aa=[0,1,2,3,4,5,6,7,8,9,10] save_all_button=[] paddd=12 def setup_button(): # b=tkinter.Button(frame,bg= 'white',text ="标注为空", command = lambda :helloCallBack('white')) fun4('white') # b.grid(row=0,column=1,padx=10) # button_grid_info = b.grid_info() # b.grid_forget() if 0: #########2023-02-01,18点46这个地方可以删除国企按钮,其实没啥必要删除.因为你扩充时候可能还需要.扩充时候可以自己改代码19行. 添加自己需要的类别. for i in save_all_button:#================这一步用来每次删除过期的按钮. button_grid_info = i.grid_info() i.grid_forget() # row column可以自己修改按钮放的位置!!!!! for i in range(len(color_and_biaoqian)): #============callback函数里面不允许写变量.......... #参考https://www.cnpython.com/qa/68533 if 1: # lambda表达式.里面要写x=i, 然后再把x传入:右边的函数.这样可以脱离之前的变量控制. # fffff=('宋体',10,'bold') # b=tkinter.Button(frame, bg=colorlist[i],font=fffff,text =labellist[i], command = lambda x=i: helloCallBack(colorlist[x])) # b.grid(row=0,column=(i+1),padx=paddd) # save_all_button.append(b)#===============把变量存在全局变量里面,变量就不会跟着函数销毁了. #================下面一排是全标注===全文有这个词的直接全标注上. # b=tkinter.Button(frame, bg=colorlist[i], font=fffff,text =labellist[i]+'全标', command = lambda x=i:helloCallBack_quanbiaozhu(colorlist[x])) # b.grid(row=1,column=(i+1),padx=paddd) # save_all_button.append(b) #================================================== # b = tkinter.Button(frame, bg=colorlist[i],font=fffff,text=labellist[i] + '正则', # command=lambda x=i: zhengzehelloCallBack_quanbiaozhu(colorlist[x])) # b.grid(row=2, column=(i + 1), padx=paddd) # save_all_button.append(b) #=====================第四排综合前3排. fun(i) # xxx=i # menubar.add_command(label=xxx, command=lambda x=xxx:helloCallBack_quanbiaozhu(colorlist[x])) def fun4(i): menubar = tkinter.Menu(frame,tearoff=0) def helper1(x=i): x = i return helloCallBack_quanbiaozhu(x) def helper2(x=i): x = i return zhengzehelloCallBack_quanbiaozhu(x) xxx = '全标' menubar.add_command(label=xxx, command=helper1) xxx = '正则' menubar.add_command(label=xxx, command=helper2) # 事件处理函数一定要至少有一个参数,且第一个参数表示的是系统事件 def pop(event,a): # 注意使用 event.x 和 event.x_root 的区别 # menubar.post(event.x, event.y) # print(a) menubar.post(event.x_root, event.y_root) fffff=('宋体',10,'bold') b=tkinter.Button(frame, bg=i,font=fffff,text ='annotate to None', command = lambda x=i: helloCallBack(i)) b.bind("", lambda event, a=3: pop(event, a)) b.grid(row=0,column=1,padx=paddd) def fun(i): menubar = tkinter.Menu(frame,tearoff=0) def helper1(x=i): x = i return helloCallBack_quanbiaozhu(colorlist[x]) def helper2(x=i): x = i return zhengzehelloCallBack_quanbiaozhu(colorlist[x]) xxx = '全标' menubar.add_command(label=xxx, command=helper1) xxx = '正则' menubar.add_command(label=xxx, command=helper2) # 事件处理函数一定要至少有一个参数,且第一个参数表示的是系统事件 def pop(event,a): # 注意使用 event.x 和 event.x_root 的区别 # menubar.post(event.x, event.y) # print(a) menubar.post(event.x_root, event.y_root) fffff=('宋体',10,'bold') b=tkinter.Button(frame, bg=colorlist[i],font=fffff,text =labellist[i], command = lambda x=i: helloCallBack(colorlist[x])) b.bind("", lambda event, a=3: pop(event, a)) def fun(i): menubar = tkinter.Menu(frame,tearoff=0) def helper1(x=i): x = i return helloCallBack_quanbiaozhu(colorlist[x]) def helper2(x=i): x = i return zhengzehelloCallBack_quanbiaozhu(colorlist[x]) xxx = '全标' menubar.add_command(label=xxx, command=helper1) xxx = '正则' menubar.add_command(label=xxx, command=helper2) # 事件处理函数一定要至少有一个参数,且第一个参数表示的是系统事件 def pop(event,a): # 注意使用 event.x 和 event.x_root 的区别 # menubar.post(event.x, event.y) # print(a) menubar.post(event.x_root, event.y_root) fffff=('宋体',10,'bold') b=tkinter.Button(frame, bg=colorlist[i],font=fffff,text =labellist[i], command = lambda x=i: helloCallBack(colorlist[x])) b.bind("", lambda event, a=3: pop(event, a)) b.grid(row=i//yihangduoshaoniu,column=(i%yihangduoshaoniu+2),padx=paddd) # save_all_button.append(b) # b=tkinter.Button(frame, text ="test4", ) # b.bind("", lambda event,a=3:pop(event,a))# https://blog.csdn.net/qq_34633194/article/details/120631709 传参方法. # b.grid(row=3,column=0,padx=10) def save(): global text666 result='' zhengchagn=0 savefor_rel={} save_triple=[] try: moshi=cmb.get() if 'es' in moshi: tool_type='bioes' else: tool_type = 'bio' result = text.get("1.0", "end") # 获取文本输入框的内容 with open('output.txt','w',encoding='utf-8') as f: f.writelines(result) result=result.replace('\r\n','\n') # result=result.split('\n') except: text666.set('output.txt写入失败') if 1: # for i in colorlist: # aaa=text.tag_ranges(i)###=得到的aaa标里面每2个表示开头结尾索引. #=======下面都是简单的字符串处理而已 yuanwen=result.split('\n') jieguo=[list('O'*len(i)) for i in yuanwen] #=====根据颜色标注即可: for dex,i in enumerate(colorlist): aaa = text.tag_ranges(i) ###=得到的aaa标里面每2个表示开头结尾索引. for j in range(len(aaa)//2): a11= int(aaa[2*j].string.split('.')[0])#首航 a12= int(aaa[2*j].string.split('.')[1])#首列 a21= int(aaa[2*j+1].string.split('.')[0])#尾行 a22= int(aaa[2*j+1].string.split('.')[1])# 尾列 if 1: # ===============这里来处理每个颜色保存到rel里面. # (, ) =aaa save_triple.append([yuanwen[a11-1][a12:a22],a11-1,str(labellist[dex]),a12,a22-1]) pass if tool_type!='bio': if a11!=a21: pass else: if a22-a12==1:#标注S! jieguo[a11-1][a12]="S-"+str(labellist[dex]) else: jieguo[a11-1][a12:a22]=["B-"+str(labellist[dex])]+["I-"+str(labellist[dex])]*(a22-a12-2)+["E-"+str(labellist[dex])] if tool_type=='bio': if a11 != a21: pass else: jieguo[a11 - 1][a12:a22] = ["B-" + str(labellist[dex])] + ["I-" + str(labellist[dex])] * ( a22 - a12 - 1) jieguo=[' '.join(i)+'\n' for i in jieguo] with open('output.bio','w') as f: f.writelines(jieguo) zhengchagn=1 #=====================加一个ner整体输出. #=========这个为了后续标注关系时候方便. #输出样式: ner1_text ner1_label ner1_locationhead ner1_locationtail ner2_text ner2_label ner2_locationhead ner2tail.....########正文的该行. #==================处理: #=============非锁紧版本 if 0: from collections import defaultdict outfor_rel=defaultdict(list) for i in save_triple: outfor_rel[i[1]].append([i[0],i[2],str(i[3]),str(i[4])]) for i in outfor_rel: outfor_rel[i].append(yuanwen[i]) outlist=['']*(max(outfor_rel.keys())+1) sepp=' ' for i in sorted(outfor_rel.keys()): for jj in range(len(outfor_rel[i])-1): outlist[i]+=sepp+sepp.join(outfor_rel[i][jj]) outlist[i]+='######'+outfor_rel[i][-1] outlist[i]=str(i)+sepp+outlist[i][len(sepp):]+'\n' with open('output.ner','w',encoding='utf-8') as f: f.writelines(outlist) #============我感觉太乱,所以下面改成缩进版本.空格符用/了. if 1: from collections import defaultdict outfor_rel = defaultdict(list) for i in save_triple: outfor_rel[i[1]].append([i[0], i[2], str(i[3]), str(i[4])]) for i in outfor_rel: outfor_rel[i].append(yuanwen[i]) outlist=[] if outfor_rel: outlist = [''] * (max(outfor_rel.keys()) + 1) sepp = ' ' suojin='\\' for i in sorted(outfor_rel.keys()): for jj in range(len(outfor_rel[i]) - 1): outlist[i] += sepp + suojin.join(outfor_rel[i][jj]) outlist[i] += '######' + outfor_rel[i][-1] outlist[i] = str(i) + sepp + outlist[i][len(sepp):] + '\n' with open('output.ner', 'w', encoding='utf-8') as f: f.writelines(outlist) # zhengchagn=1 # except: # text666.set('output.bio写入失败') if 1: pass # for i in range(len(result)): # # savefor_rel[i+1]=result[i] if zhengchagn: text666.set('bio和txt都写入成功') setup_button() def chognzhi(): global text666 try: #============填入文本. with open('output.txt' ,encoding='utf-8') as f: tmp=f.readlines() text.delete('1.0','end') text.insert('1.0',''.join(tmp)) with open('output.bio' ) as f: tmp=f.readlines() tmp3=tmp tmp=' '.join(tmp).replace('\n',' ').split(' ') tmp=[i[2:] for i in tmp if '-' in i] tmp2=[] for i in tmp: if i not in tmp2: tmp2.append(i) tmp=tmp2 #=======进行配色. #我们的默认标签, 只有当新的跟这里面的不一样时候才做修改. global color_and_biaoqian global colorlist global labellist # color_and_biaoqian = [ # ['white', '标注为空'], # ['red','person'], # ['yellow','address'], # ['Blue','org'], # ['Cyan','telephone'], # ['orange','time'], # ['PeachPuff', 'nation'], # # ['Gray', 'time3'], # # ['Brown', 'time4'], # # ['Tan', 'time5'], # # ['Beige', 'time6'] # # ] for i in range(len(tmp)): if tmp[i] not in labellist: color_and_biaoqian.append([clis[i+1],tmp[i]]) #替换就的按钮. colorlist = [i[0] for i in color_and_biaoqian] labellist = [i[1] for i in color_and_biaoqian] setup_button() #=============下面我们根据bio进行涂色. tmp3=[i1.replace('\n','').split(' ') for i1 in tmp3] for i in range(len(tmp3)): for j in range(len(tmp3[i])): if '-' in tmp3[i][j]: aaa=tmp3[i][j][2:] for jjj in color_and_biaoqian: if jjj[1]==aaa: color=jjj[0] helloCallBack_read_bio(str(i+1)+'.'+str(j),str(i+1)+'.'+str(j+1),color) text666.set('读取成功') except: text666.set('读取失败') #=============第三版我们来实现读取bio文件的功能.为了方便就不加对话框了.直接读取output.bio b=tkinter.Button(frame, text ="load bio和txt", command = chognzhi) b.grid(row=0,column=0,padx=10) # entryExample = Entry(root) # entryExample.place(x = -10, # y =-10, # width=2000, # height=100) import tkinter from tkinter import ttk # 导入ttk模块,因为下拉菜单控件在ttk中 # 创建下拉菜单 cmb = ttk.Combobox(frame,state="readonly") cmb.grid(row=1, column=(1), padx=20) # 设置下拉菜单中的值 cmb['value'] = ('标注模式bioes', '标注模式bio') # 设置默认值,即默认下拉框中的内容 cmb.current(0) # 默认值中的内容为索引,从0开始 import tkinter as tk text666 = tk.StringVar() aaa=tkinter.Label(frame,textvariable=text666, bg='yellow') aaa.grid(row=2,column=0,padx=10) text666.set("type regex to right") b=tkinter.Button(frame, text ="save to BIO和txt", command = save) b.grid(row=1,column=0,padx=10) # b=tkinter.Button(root, text ="标注2", command = helloCallBack(c2)) # b.pack() # b=tkinter.Button(root, text ="标注3", command = helloCallBack(c3)) # b.pack() # b=tkinter.Button(root, text ="标注4", command = helloCallBack(c4)) # b.pack() # b=tkinter.Button(root, text ="标注5", command = helloCallBack(c5)) # text.tag_add('highlightline', '5.0', '6.0') E1 = Entry(frame, bd =5,) E1.insert('0','.{1}某') E1.grid(row=2, column=(1), padx=20) e=E1 # b = tkinter.Button(frame, text='右边输入你要的正则再点我进行标注', command=lambda x=1:reg()) # b.grid(row=2, column=( 0), padx=10) #按扭调用的函数, def reg(): a = e.get() # import base64 uuid='[[sep]]' # import flask # import json global_fuwenben=[] # import klembord #pip 一下. waijie=0 # 参考:https://www.coder.work/article/7769372 # 2023-02-06,12点36 //加入样式复制功能. def fun2(event): #======== a= text.index(tk.INSERT) try: txt = text.get('sel.first', 'sel.last') content = text.dump('sel.first', 'sel.last', tag=True, text=True) global global_fuwenben # content编码. global_fuwenben=content html_text = [] tmp=eval(str(content)) #我们往content里面加一个自己的id做校验. 因为如果有其他json会发生混淆. content=[uuid]+content tmp=json.dumps(content,ensure_ascii=True) # tmp=tmp.encode('utf-8') tttttttt=json.loads(tmp) # print(tmp,9999999999999999999999999999999999999999999999999999999999999999999999999999) # print(type(tmp)) tmp=str(tmp) klembord.set_text(uuid)#===========剪贴板里面编解码有问题!!!!!!!!!!所以这里面用一个技巧只需要传递信号即可, 只需要对面校验uuid即可. #==========2023-02-06,20点24 目前方案还是用自编码, 不用json, 即使他能用, 也会慢,因为非编辑器数据如果用jsonfy会卡.不如自己做一个头来判断速度快.#但是目前还是jsonfy好实现.先用着. # [('tagon', 'sel', '1.0'), ('text', '贴入你要处', '1.0'), ('tagon', 'red', '1.5'), ('text', '理的sdfsadf张某,李某,sdfsdf', '1.5'), ('tagoff', 'red', '1.26'), ('text', 'asf', '1.26'), ('tagon', 'red', '1.29'), ('text', 'asd', '1.29'), ('tagoff', 'red', '1.32'), ('text', '王某他们杀人了fas]', '1.32')] #=整理一下 #编码是 sep color1 sep text1 sep color2 sep text2 # tmp2=[] # # for i in range(len(content)): # if content[i][0] =='text' and content[i-1][0]!='tagon': # tmp2.append(sep_for_clipborad) # tmp2.append('white') # tmp2.append(sep_for_clipborad) # tmp2.append(content[i][1]) # if content[i][1]=='tagon': # tmp2.append(sep_for_clipborad) # tmp2.append(content[i][1]) # if content[i][0] =='text' and content[i-1][0]=='tagon': # tmp2.append(sep_for_clipborad) # tmp2.append(content[i][1]) # print(tmp2,12341230947239047238947238947329847234234) # klembord.set_text(''.join(tmp2)) # print(content) # 下面我们获取sel.first到sel.last的样式. # print(txt) # print(event) # print(11111111111111) except: #这是说明从外面来的数据. global waijie waijie=1 pass return 'break' # 这种写法可以阻塞ctrlc 的原始使用. # import copy def fun3(event): neirong=klembord.get_text() print(neirong,'neirong999999999999')#= global global_fuwenben #=========判断一下当前是否有选中. try: text.delete('sel.first', 'sel.last') except: pass a= text.index(tk.INSERT) # neirong=klembord.get_with_rich_text() # tmp=json.loads(neirong) # print(tmp,'tttttttttttttttmmmmmmmmmmmmmmppppppppppppppppp') #============原来我发现粘贴板贴过来的内容会多一个字符........还是空白字符................神奇??????????????????? neirong2=neirong[:len(uuid)] # print(uuid,'uuid') # print(neirong2.replace(' ','')==uuid,'校验!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!') # neirong=str(neirong).encode('utf-8') try: if neirong2!=uuid: raise if neirong2==uuid: global_fuwenben2 = copy.deepcopy(global_fuwenben) for i in range(len(global_fuwenben)): if global_fuwenben[i][0] == 'tagon' and global_fuwenben[i][1] in colorlist: global_fuwenben2[i], global_fuwenben2[i + 1] = global_fuwenben2[i + 1], global_fuwenben2[i] global_fuwenben = global_fuwenben2 a2 = a #=============================这个涂色逻辑是错误的!!!!!!!!!! for i in global_fuwenben: if i[0] == 'text': a = a2 # print(a,'mmmmmmmmmmmmmmmmmmmm') text.insert(a, i[1]) # print(len(i[1].replace('\n', '')), '文本偏移量!!!!!!!!') hang, lie = [int(i) for i in (a.split('.'))] hang += i[1].count('\n') if '\n' in i[1]: lie = 0 else: lie += len(i[1].replace('\n', '')) a2 = str(hang) + '.' + str(lie) if 1: if i[1] in colorlist and i[0] == 'tagon' or i[0]=='tagoff': # tuse ==========================注意content标签里面的设置情况, 有的时候你的选择部分on 和off 会不全, 所以我们要2种情况都进行涂色. 虽然会重复凸,但是会保证结果正确!!!!!!!!! color = i[1] text.tag_config(color, background=color) # 再为标签进行设置==类似html里面的div 里面class属性. # ===============注意要先删除其他的标签. for i in colorlist: text.tag_remove(i, a, a2) # =======变色 if color != 'white': # ======white实际上是不进行背景色标注!这样效果最好!!!!!!a trick text.tag_add(color, a, a2) # =======变色 except: # print('...................',content) text.insert(a, neirong) return 'break' # 这种写法可以阻塞ctrlv 的原始使用. # pass #如果解析失败说明他是其他数据不是编辑器里面的json. # global global_fuwenben # print(waijie,'外接吗??????????????') # print(global_fuwenben) # print(a,'??????????????????')#光标位置. #======== 直接插入就行.因为光标已经有了. #=========根据global_fuwenben这个进行插入和涂色. # print('接收到的文本是',global_fuwenben) # [('tagon', 'sel', '1.0'), ('text', '贴入你要处', '1.0'), ('tagon', 'red', '1.5'), ('text', '理的sdfsadf张某,李某,sdfsdf', '1.5'), ('tagoff', 'red', '1.26'), ('text', 'asf', '1.26'), ('tagon', 'red', '1.29'), ('text', 'asd', '1.29'), ('tagoff', 'red', '1.32'), ('text', '王某他们杀人了fas]', '1.32')] #=========颜色和text顺序需要交换. # colorlist # text.insert(a,'ffffffffffffff') #==========根据global_fuwenben来在光标位置插入内容即可. # print(22222222222222) #==========下面我们实现我们的ctrl v #我已经知道怎么做了, 这种带格式的复制,但是效率不够.所以先不写了.!!!!!!!!!!!!!!!!!!!!!!!!!!!! if 0:#============带格式的复制还没做好.先不开了.===========明白了还是应该往剪贴板里面写.才能跟原始功能通用. text.bind('', fun2) text.bind('', fun3) # 拦截ctrlv把 sep_for_clipborad='================[[[sep]]]=====================' #创建一个复杂字符,一般数据会有这个东西.来区分普通数据和编辑器的数据. #ceshi if 0: klembord.set_with_rich_text('plain text', 'plain text') if 1: def backout(): text.edit_undo() def regain(): text.edit_redo() # 定义撤销和恢复按钮 Button(frame,text = '文本撤销',command = backout).grid(row=3, column=0, sticky="w", padx=10, pady=7) Button(frame,text = '文本恢复',command = regain).grid(row=3, column=0, sticky="e", padx=10, pady=7) #===============================2023-02-15,10点59现在我们改成工具启动就开始重置读取bio和txt chognzhi() root.mainloop()