質問編集履歴
3
修正
test
CHANGED
File without changes
|
test
CHANGED
File without changes
|
2
修正
test
CHANGED
File without changes
|
test
CHANGED
@@ -9,3 +9,105 @@
|
|
9
9
|
何か知識を持っている方教えてください。
|
10
10
|
|
11
11
|
よろしくお願いします。
|
12
|
+
|
13
|
+
|
14
|
+
|
15
|
+
これはtf-idf値を計算するソースコードです。
|
16
|
+
|
17
|
+
これから単語の重要度を基にして文の重要度を決定。重要な文を選択して要約する感じにしたいです。
|
18
|
+
|
19
|
+
#!/usr/bin/perl
|
20
|
+
|
21
|
+
#
|
22
|
+
|
23
|
+
|
24
|
+
|
25
|
+
&load_file;
|
26
|
+
|
27
|
+
&print_dat;
|
28
|
+
|
29
|
+
|
30
|
+
|
31
|
+
sub load_file{
|
32
|
+
|
33
|
+
|
34
|
+
|
35
|
+
# open(DFLOG, ">> /mkyozai/jk1sum/log/calc_df.log");
|
36
|
+
|
37
|
+
# `date >> /mkyozai/jk1sum/log/calc_df.log`;
|
38
|
+
|
39
|
+
# `whoami >> /mkyozai/jk1sum/log/calc_df.log`;
|
40
|
+
|
41
|
+
# print DFLOG "perl calc_df.pl $ARGV[0] $ARGV[1] $ARGV[2] $ARGV[3]\n---\n";
|
42
|
+
|
43
|
+
# close(DFLOG);
|
44
|
+
|
45
|
+
|
46
|
+
|
47
|
+
#---load tf file to count df values---#
|
48
|
+
|
49
|
+
$fname=$ARGV[0];
|
50
|
+
|
51
|
+
$document=$ARGV[1];
|
52
|
+
|
53
|
+
@word=();
|
54
|
+
|
55
|
+
open(FP,$fname)||die "cannot open $fname\n";
|
56
|
+
|
57
|
+
while(defined($line=<FP>)){
|
58
|
+
|
59
|
+
chomp($line);
|
60
|
+
|
61
|
+
($tmp_word,$tmp_pos,$tmp_tf,$tmp_df,$tmp_idf)=split(/\t/,$line);
|
62
|
+
|
63
|
+
if($tmp_word ne ""){
|
64
|
+
|
65
|
+
push(@word,$tmp_word);
|
66
|
+
|
67
|
+
push(@pos,$tmp_pos);
|
68
|
+
|
69
|
+
push(@tf,$tmp_tf);
|
70
|
+
|
71
|
+
push(@df,$tmp_df);
|
72
|
+
|
73
|
+
push(@tf_idf,sprintf("%.2f",$tmp_tf*(log($document/($tmp_df+1))+1)));
|
74
|
+
|
75
|
+
}
|
76
|
+
|
77
|
+
}
|
78
|
+
|
79
|
+
close(FP);
|
80
|
+
|
81
|
+
}
|
82
|
+
|
83
|
+
|
84
|
+
|
85
|
+
|
86
|
+
|
87
|
+
|
88
|
+
|
89
|
+
sub print_dat{
|
90
|
+
|
91
|
+
|
92
|
+
|
93
|
+
($fname,$tmp)=split(/.df/,$ARGV[0]);
|
94
|
+
|
95
|
+
$fname=$fname.".idf";
|
96
|
+
|
97
|
+
# print "output file=$fname\n";
|
98
|
+
|
99
|
+
open(DF, ">$fname")||die("cannot open $fname\n");
|
100
|
+
|
101
|
+
for($i=0; $i<= $#word; $i++){
|
102
|
+
|
103
|
+
if($word[$i] ne ""){
|
104
|
+
|
105
|
+
print DF "$word[$i]\t$pos[$i]\t$tf[$i]\t$df[$i]\t$tf_idf[$i]\n";
|
106
|
+
|
107
|
+
}
|
108
|
+
|
109
|
+
}
|
110
|
+
|
111
|
+
close(DF);
|
112
|
+
|
113
|
+
}
|
1
タグの追加
test
CHANGED
File without changes
|
test
CHANGED
File without changes
|