#!/usr/local/bin/perl -w #----------------------------------------------- # # # ニ階のマルコフ連鎖による学習とテキスト生成 # # #----------------------------------------------- # # 使用法 # $ perl l2.pl < text use strict; use Text::ChaSen; my %Textfiber=(); my $Maxhop=20; &initialize(); # ---------------------- # M A I N # ---------------------- srand; my $input; my @tokens; #一文ずつ学習 while(<>) { $input=$_; @tokens = &analyze($input); &learn1(@tokens); } #記憶のダンプ #&dump2; #ランダム文字列生成 for(0..50) { print &generate1("_HEAD")."\n"; } # ------------------------------------------------------------------------ # # L E A R N I N G # # 順方向で二階のMarkov連鎖 # # ------------------------------------------------------------------------ sub learn2(@) { my @input = @_; my $prefix = "_HEAD"; my $surfix; for $surfix (@input) { push(@{$Textfiber{$prefix}}, $surfix); $prefix=$surfix; } push(@{$Textfiber{$prefix}},"_TAIL"); } sub dump2() { my $key; print "dumping %Textfiber\n"; for $key (keys %Textfiber) { print "{".$key."}->".join (",",@{$Textfiber{$key}})."\n"; } } sub generate2($) { my $word=shift; my (@output,@cand); for(1..$Maxhop) { @cand=@{$Textfiber{$word}}; last if($#cand==-1); $word=$cand[rand($#cand+1)]; last if($word eq "_TAIL"); push(@output,$word); } return join("/",@output); } # ------------------------------------------------------------------------ # # # A N A L Y S I S # # # ------------------------------------------------------------------------ sub analyze($) { my $input = shift; my @tokens; # 特定文字のエスケープ for($input) { s/([%,)(\/])/'%'.unpack("H2",$1)/eg; } # 分かち書き $input=Text::ChaSen::sparse_tostr($input); chomp $input; @tokens=split /\//,$input; return @tokens; } sub initialize() { Text::ChaSen::getopt_argv('chasen','-F','%m/'); }