-
Notifications
You must be signed in to change notification settings - Fork 6
Expand file tree
/
Copy pathtests.py
More file actions
131 lines (112 loc) · 6.65 KB
/
tests.py
File metadata and controls
131 lines (112 loc) · 6.65 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
# -*- coding: utf-8 -*-
import os
import unittest
from nltk_opennlp.chunkers import OpenNLPChunker, OpenNERChunker, OpenNERChunkerMulti
from nltk_opennlp.taggers import OpenNLPTagger
opennlp_dir = 'apache-opennlp' # Path to Apache OpenNLP
models_dir = 'opennlp_models' # Path to OpenNLP models directory
class OpenNLPTest(unittest.TestCase):
def test_opennlp_tagger(self):
language = 'en'
tt = OpenNLPTagger(language=language,
path_to_bin=os.path.join(opennlp_dir, 'bin'),
path_to_model=os.path.join(models_dir, 'en-pos-maxent.bin'))
phrase = 'Pierre Vinken , 61 years old , will join the board as a nonexecutive director Nov. 29 .'
en_tags = tt.tag(phrase)
print(en_tags)
assert en_tags[0][0] == 'Pierre'
assert en_tags[0][1] == 'NNP'
def test_opennlp_tagger_list(self):
language = 'en'
tt = OpenNLPTagger(language=language,
path_to_bin=os.path.join(opennlp_dir, 'bin'),
path_to_model=os.path.join(models_dir, 'en-pos-maxent.bin'))
phrase = ['Pierre', 'Vinken' ',' '61', 'years', 'old', ',', 'will', 'join',
'the', 'board', 'as', 'a', 'nonexecutive', 'director', 'Nov.', '29', '.']
en_tags = tt.tag(phrase)
print(en_tags)
assert en_tags[0][0] == 'Pierre'
assert en_tags[0][1] == 'NNP'
def test_opennlp_chunker(self):
language = 'en'
tt = OpenNLPTagger(language=language,
path_to_bin=os.path.join(opennlp_dir, 'bin'),
path_to_model=os.path.join(models_dir, 'en-pos-maxent.bin'))
phrase = 'Pierre Vinken , 61 years old , will join the board as a nonexecutive director Nov. 29 .'
sentence = tt.tag(phrase)
cp = OpenNLPChunker(path_to_bin=os.path.join(opennlp_dir, 'bin'),
path_to_chunker=os.path.join(models_dir, 'en-chunker.bin'))
print(cp.parse(sentence))
def test_opennlp_chunker_de(self):
language = 'de'
tt = OpenNLPTagger(language=language,
path_to_bin=os.path.join(opennlp_dir, 'bin'),
path_to_model=os.path.join(models_dir, 'de-pos-maxent.bin'))
phrase = 'Das Haus hat einen großen hübschen Garten.'
sentence = tt.tag(phrase)
print(sentence)
# There should not be OpenNLP chunker for German language, thus OSError is thrown in Linux
if os.name != 'nt':
with self.assertRaises(OSError):
cp = OpenNLPChunker(path_to_bin=os.path.join(opennlp_dir, 'bin'),
path_to_chunker=os.path.join(models_dir, 'de-chunker.bin'))
print(cp.parse(sentence))
def test_opennlp_ner_chunker(self):
language = 'en'
tt = OpenNLPTagger(language=language,
path_to_bin=os.path.join(opennlp_dir, 'bin'),
path_to_model=os.path.join(models_dir, 'en-pos-maxent.bin'))
phrase = 'Pierre Vinken , 61 years old , will join Martin Vinken as a nonexecutive director Nov. 29 .'
sentence = tt.tag(phrase)
cp = OpenNERChunker(path_to_bin=os.path.join(opennlp_dir, 'bin'),
path_to_chunker=os.path.join(models_dir,
'{}-chunker.bin'.format(language)),
path_to_ner_model=os.path.join(models_dir,
'{}-ner-person.bin'.format(language)))
print(cp.parse(sentence))
def test_opennlp_ner_chunker_bracketed(self):
language = 'en'
tt = OpenNLPTagger(language=language,
path_to_bin=os.path.join(opennlp_dir, 'bin'),
path_to_model=os.path.join(models_dir, 'en-pos-maxent.bin'))
phrase = 'Pierre Vinken , ( 61 years old ) , will join Martin Vinken as a nonexecutive director Nov. 29 .'
sentence = tt.tag(phrase)
cp = OpenNERChunker(path_to_bin=os.path.join(opennlp_dir, 'bin'),
path_to_chunker=os.path.join(models_dir,
'{}-chunker.bin'.format(language)),
path_to_ner_model=os.path.join(models_dir,
'{}-ner-person.bin'.format(language)))
print(cp.parse(sentence))
def test_opennlp_ner_chunker_with_punc(self):
language = 'en'
tt = OpenNLPTagger(language=language,
path_to_bin=os.path.join(opennlp_dir, 'bin'),
path_to_model=os.path.join(models_dir, 'en-pos-maxent.bin'))
phrase = 'Pierre Vinken , 61 years old , will join Martin Vinken as a nonexecutive director Nov. 29 .'
sentence = tt.tag(phrase)
cp = OpenNERChunker(path_to_bin=os.path.join(opennlp_dir, 'bin'),
path_to_chunker=os.path.join(models_dir,
'{}-chunker.bin'.format(language)),
path_to_ner_model=os.path.join(models_dir,
'{}-ner-person.bin'.format(language)),
use_punc_tag=True)
print(cp.parse(sentence))
def test_opennlp_ner_multichunker(self):
language = 'en'
tt = OpenNLPTagger(language=language,
path_to_bin=os.path.join(opennlp_dir, 'bin'),
path_to_model=os.path.join(models_dir, 'en-pos-maxent.bin'))
phrase = 'John Haddock , 32 years old male , travelled to Cambridge , USA in October 20 while paying 6.50 dollars for the ticket'
sentence = tt.tag(phrase)
cp = OpenNERChunkerMulti(path_to_bin=os.path.join(opennlp_dir, 'bin'),
path_to_chunker=os.path.join(models_dir,
'{}-chunker.bin'.format(language)),
ner_models=[
os.path.join(models_dir, '{}-ner-person.bin'.format(language)),
os.path.join(models_dir, '{}-ner-date.bin'.format(language)),
os.path.join(models_dir, '{}-ner-location.bin'.format(language)),
os.path.join(models_dir, '{}-ner-time.bin'.format(language)),
os.path.join(models_dir, '{}-ner-money.bin'.format(language))])
print(cp.parse(sentence))
if __name__ == '__main__':
unittest.main()