#!/usr/local/bin/perl -w #----------------------------------------------- # # # 三階のマルコフ連鎖による学習とテキスト生成 # # #----------------------------------------------- # # 使用法 # $ perl l1.pl < text use strict; use Text::ChaSen; my %Textfiber=(); my $Maxhop=10; &initialize(); # ---------------------- # M A I N # ---------------------- srand; my $input; my @tokens; #一文ずつ学習 while(<>) { $input=$_; @tokens = &analyze($input); &learn3(@tokens); } #記憶のダンプ #&dump3; #ランダム文字列生成 for(0..50) { print &generate3("_HEAD")."\n"; } # ------------------------------------------------------------------------ # # L E A R N I N G # # 順方向で三階のMarkov連鎖 # # ------------------------------------------------------------------------ sub learn3(@) { my @input = @_; my $prefix1 = "_HEAD"; my $prefix2 = "_HEAD2"; my $surfix; push (@input,"_TAIL"); for $surfix (@input) { push(@{$Textfiber{$prefix1}},"$prefix2/$surfix"); $prefix1=$prefix2; $prefix2=$surfix; } push(@{$Textfiber{$prefix1}},"_TAIL"); } sub dump3() { my ($key,$k); print "dumping %Textfiber\n"; for $key (keys %Textfiber) { print "{".$key."}-> ".join(" , ",@{$Textfiber{$key}})."\n"; } } sub generate3($) { my $prefix=shift; my (@output,@cand,$surfix,$surfix2); #headはpushしない LOOP: for(1..$Maxhop) { @cand=@{$Textfiber{$prefix}}; last if($#cand == -1); $surfix=$cand[rand($#cand+1)]; @cand=split (/\//,$surfix); for $surfix2 (@cand) { # print "[$surfix2]"; last LOOP if $surfix2 eq "_TAIL"; push (@output,$surfix2) unless ($surfix2 eq "_HEAD2"); $prefix=$surfix2; } } return join("/",@output); } # ------------------------------------------------------------------------ # # # A N A L Y S I S # # # ------------------------------------------------------------------------ sub analyze($) { my $input = shift; my @tokens; # 特定文字のエスケープ for($input) { s/([%,)(\/])/'%'.unpack("H2",$1)/eg; } # 分かち書き $input=Text::ChaSen::sparse_tostr($input); chomp $input; @tokens=split /\//,$input; return @tokens; } sub initialize() { Text::ChaSen::getopt_argv('chasen','-F','%m/'); }