-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathfeature_coding_accuracy2.py
More file actions
88 lines (73 loc) · 3.94 KB
/
feature_coding_accuracy2.py
File metadata and controls
88 lines (73 loc) · 3.94 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
from collections import defaultdict
import Reddit_MDA
import os
import torch
import flair
device = None
if torch.cuda.is_available():
device = torch.device('cuda:0')
else:
device = torch.device('cpu')
flair.device = torch.device('cuda:0')
print(flair.device)
sents = {}
with open("sample_sentences.txt", errors = "ignore") as f:
for line in f:
sents[line.split("\t")[0]] = line.split("\t")[1].strip("\n")
feats = ["vpast_001", "vpresperfect_002a", "vpastperfect_002b", "vpresent_003", "advplace_004", "advtime_position_005a", "advtime_durfreq_005b",
"profirpers_006", "prosecpers_007","prothirdper_008", "proit_009", "prodemons_010", "proindef_011",
"pverbdo_012", "whquest_013", "nominalis_014", "gerund_015","nouns_016", "passagentl_017", "passby_018",
"mainvbe_019", "exthere_020", "thatvcom_021", "thatacom_022", "whclause_023", "vinfinitive_024",
"vpresentpart_025", "vpastpart_026", "vpastwhiz_027", "vpresentwhiz_028", "thatresub_029", "thatreobj_030",
"whresub_031", "whreobj_032", "whrepied_033", "sentencere_034", "advsubcause_035", "advsubconc_036",
"advsubcond_037", "advsubother_038", "prepositions_039", "adjattr_040a", "adjpred_041a", "adjattr_040b", "adjpred_041b",
"adverbs_042", "ttratio_043", "wordlength_044", "conjuncts_045", "downtoners_046", "hedges_047", "amplifiers_048",
"discpart_050", "demonstr_051", "modalsposs_052", "modalsness_053", "modalspred_054",
"vpublic_055", "vprivate_056", "vsuasive_057", "vseemappear_058", "contractions_059", "thatdel_060",
"strandprep_061", "vsplitinf_062", "vsplitaux_063", "negsyn_066",
"negana_067", "hashtag_201", "link_202", "interlink_203", "caps_204", "vimperative_205", "lengthening_206",
"emoticons_207", "question_208", "exclamation_209", "lenchar_210", "lenword_211", "comparatives_syn_212",
"superlatives_syn_213", "comparatives_ana_214", "superlatives_ana_215", "reddit_vocab_216", "vprogressive_217",
"emojis_218", "coordAnd_219", "coordBut_220", "coordOr_221"]
# 043, 044, 210, 211 and 218 have placeholder manual coding files
p = open("Feature_coding_discrepancies_flair2.tsv", "w")
p.write("Feature\tsentence\tManualCount\tAutoCount\n")
p2 = open("Feature_coding_accuracies_flair2.tsv", "w")
p2.write("Feature\tAccuracy\tTrue_positives\tFalse_positives\tTrue_negatives\tFalse_negatives\tPrecision\tRecall\tFScore\n")
for feat in feats:
sent_count = 0
error_count = 0
true_pos = 0
false_pos = 0
true_neg = 0
false_neg = 0
if os.path.isfile("Manual_coding_files\manual_coding_"+ feat + "_corrected.txt"):
filstring = "_corrected"
else:
filstring = ""
with open("Manual_coding_files\manual_coding_"+ feat + filstring + ".txt") as f:
for line in f:
sent_count += 1
numid = line.split("\t")[0]
manual = int(line.strip("\n").split("\t")[1])
auto = Reddit_MDA.process_sent(sents[numid], feat)
if manual!= auto:
error_count += 1
p.write("\t".join([feat, sents[numid].strip("'").strip('"'), str(manual), str(auto)]) + "\n")
if manual > 0 and auto > 0:
true_pos += 1
elif manual == 0 and auto > 0:
false_pos += 1
elif manual == 0 and auto == 0:
true_neg += 1
else:
false_neg += 1
if true_pos > 0:
precision = true_pos/(true_pos+false_pos)
recall = true_pos/(true_pos+false_neg)
Fscore = 2*((precision*recall)/(precision+recall))
p2.write(feat + "\t" + str((sent_count-error_count)/sent_count) + "\t" + "\t".join([str(true_pos), str(false_pos), str(true_neg), str(false_neg), str(precision), str(recall), str(Fscore)]) + "\n")
else:
p2.write(feat + "\t" + str((sent_count-error_count)/sent_count) + "\t" + "\t".join([str(true_pos), str(false_pos), str(true_neg), str(false_neg), "xxx", "xxx", "xxx"]) + "\n")
p.close()
p2.close()