-
Notifications
You must be signed in to change notification settings - Fork 0
/
tag.py
69 lines (50 loc) · 1.83 KB
/
tag.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
from importer import *
import tools
from featureBuilder import Model, write
from document import processDoc
from format import formatDocument
import copy
def main():
parser = argparse.ArgumentParser()
parser.add_argument("--txt",dest = "txt")
parser.add_argument("--out",dest = "out")
parser.add_argument("--model",dest = "model")
parser.add_argument("--format",dest = "format")
args = parser.parse_args()
if not args.txt or not args.out or not args.model or not os.path.exists(args.model):
parser.print_help(sys.stderr)
sys.stderr.write('\n\tOne or more files/model not provided\n\n')
sys.stderr.write('\n')
exit(1)
file = glob.glob(args.txt)
tools.mkpath(args.out)
tag( file, args.model, args.out)
def tag(files, modelPath, outDir):
with open(modelPath, 'rb') as foo:
print(foo)
model = pickle.load(foo)
if not files:
sys.stderr.write( "\n\tInput files not provided\n\n")
exit()
n = len( files )
for k,text in enumerate( sorted( files) ):
textInst = processDoc(text)
fileName = os.path.splitext( os.path.basename( text))[0]+'.'+'con'
outPath = os.path.join( outDir,fileName)
sys.stdout.write('%s\n' % ('-' * 30))
sys.stdout.write('\n\t%d of %d\n' % (k+1,n))
sys.stdout.write('\t%s\n\n' % text)
labels = model.predictClassesFromDocument(textInst)
out = textInst.write(labels)
sys.stdout.write('\n\nwriting to: %s\n' % outPath)
with open( outPath,'w' ) as foo:
write( foo, '%s\n' % out)
sys.stdout.write('\n')
doc_inst = formatDocument(text, outPath)
concept = doc_inst.extract()
print(concept[0])
print(concept[1])
print(concept[2])
doc_inst.format()
if __name__ == '__main__':
main()