#!/usr/bin/ruby -Ke # #cidbushu2.txt: #1505,乾 L,常用 v,lr,十日十,乞 m,ur,十日十人,乙 ##≒十日十人 #↑v1参照 ↑v1での'l' # #会意文字の中に人が2個並んで入っていた時、それが从なのかどうかは解釈の問題 #中心となる分解データは意味向きから字形合成向きまで両方入るように # #意味向き #部品が字だったらその場で分解しないように 必要なら篆書にもリンク張る #合成オペレータの正確性は求めない # #字形合成向き #勲や哀を一発で合成できるスペックは求めない 積極的にべた書き # #意味:m 意味分解困難:m! 字形:v 自動合成困難:v! #パーツ範囲制限:l 異体字ルート:i m?:解字未調査 パーツ保留:v? #mv: 普通に考えて正解 m!: パーツ提示しない m: 字形用としては不適、異体字との区別は考えない #v: 意味用としては不適 mv!, v!: パーツは揃えたが自動合成困難 #哀 mv!,oi,衣,口 v,ud,亠,ud,口,衣の下 # #bushu.rev用優先順位 #i l mv mv! v v! m # #CLWFK,CHISE用優先順位 #mv v mv! v! m # #1175,尉 L,常用 v.23,lr,oi,尸,示,寸 ##会意 # ↑bushu.rev用出力時には"尉示(2番目)寸(3番目)"となる # #lr,ud,oi,lu,ur,ld,dr,ps,+-,mm,xx # #ku,mo,ka,ha,ta,tu,ny これらは内部ではoiとして扱われる #F4,F5,F6,F7,F8,F9,FA(U+2F) # ##旧フォーマット(v1)説明 #cidbushu.txt: #1505,乾 l 常用 b l十日十 r乞 a u十日十人 r乙 #↑ #第1フィールド: 合成後どうなるか。コンマ区切りでいくらでも。 # すぺて同一の字形を指し示すための表現である # 4桁以上の生数字: Adobe-Japan1(-6)におけるCID # U+〜〜: Unicode@MSゴシック字形小塚明朝字形 # その他文字コード表現: (略) # 生漢字1字{@90JIS}: その生漢字のバイト列が指すコードポイントの # JIS X 0208:1990での例示字形 # 生漢字1字@78JIS: 前略JIS X 0208:1978後略 # 生漢字1字@04JIS: 前略JIS X 0213:2004後略 # その他(3桁以下の半角数字なども含む): JIS X 0208:1990に入って # いないグリフに適宜つけた通称 # #それより後ろ: まずスペース区切り #l: 次のフィールドと合わせて法律的情報 #a: 分けかたは合ってる&&字形をある程度表現できてる(括弧→字形を表現できてない主要因) #b: 分けかたは間違ってる&&字形をある程度表現できてる #c: 正しい分けかた&&意味記述(字形が表現できてない)または異体字情報 #!c: 分割不能な象形文字等 フィールドを消費しない #その他半角英字や'?'など1文字: 作業中である。とりあえずbと同等に扱うこと # #cがなくてaがあるのは #1. aが分けかた字形意味全てにおいて完璧 #2. aで括弧ついてないのは意味も合ってる(それで括弧つきのほうは一言じゃ説明できない) # (括弧つきのが正しい意味ならcに置けばいい) #aと!c両方あるのは分割不能かどうか微妙 #c =?はその先が本字 たどっていくと意味が合ってる解字が出てくるはず #c /?は自分が借字となってその字の意味を取り込んでしまったパターン #c ?はその字の俗字だったものが別の意味を獲得(のっとられ側の字がない借字?) #c |?はどっちがえらいとも言い難い同音同義別解字("異体""別体"もここに含むべき) #("通じる"は/?か|?か微妙) # #a-cの後ろに任意長さの分解定義が続く #a (位置を表す英字1文字)(グリフを指し示す表現) ()() ... b ()() ... #作業中なので位置指定子が記述されていない場合がある #l&r: 偏と旁 u&d: 冠と脚 o&i: 構や垂や繞、あとむりやり間に挟める #p&s: 品口3の口がp(arts)で3がs(tructure) +&-: 引き算 #lまたはrとuまたはdの組み合わせは直線で切り分けられるのにしてはいけないパターン #http://www.itscj.ipsj.or.jp/ipsj-ts/02-02/ips_charid/toc.htm #↑のP4〜P7に近い感じ # # # # #http://www2.odn.ne.jp/alt-quinon/files/ptex/x0213/jx2004tbl.pdf #http://pc5.2ch.net/test/read.cgi/unix/1082032043/183 #http://www.taishukan.co.jp/kanji/archive/jinmei_minaoshi.html def usage $stderr.print "ruby #{$0} (--format|--sort|--to-rev{-tc}|--to-thunder|--to-clwfk|--to-chise|--for-tex) cidbushu.txt\n" $stderr.print "\n" $stderr.print "convert from old(-200603) cidbushu.txt\n" $stderr.print "ruby #{$0} --from-v1-path1 cidbushu.txt | ruby #{$0} --from-v1-path2 > cidbushu2.txt\n" end def treeize(pn) st = [] pn.reverse.each {|pe| if pe =~ /^[a-z\+\-]+$/ raise RuntimeError, 'few parts' if st.length < 2 l = st.pop r = st.pop st.push([pe, l, r]) else st.push(pe) end } raise RuntimeError, 'too many parts' if st.length != 1 return st[0] end def to_oi(op) case op when 'ku','mo','ka','ha','ta','tu','ny' return 'oi' else return op end end opt = ARGV.shift if opt == '--from-v1-path1' while gets chop! if $_[0..0] == "#" puts $_ next end $_ << ' ' r = $_.split(/\s+/) if r.length > 2 && r[1] == 'l' print r[0], ' L,', r[2], ' ' * ([21-r[0].length-r[2].length, 1].max-1) r.shift r.shift else print r[0], ' ' * ([24 - r[0].length,1].max-1) end r.shift while r.length > 0 && r[0] !~ /^\#/ if r[0] =~ /^([a-z\?]|\Sc)$/ case r[0] when 'a' r[0] = 'mv' when 'b' r[0] = 'v' when 'c' r[0] = 'm' when 'i' r[0] = 'i' when 'd' r[0] = 'HOGEd' when '?' r[0] = 'HOGE?' when '!c' r[0] = 'm!' else r[0] = 'HOGE' + r[0] #p r #exit 0; end print ' ', r[0] r.shift else #r[0] = ' ' + r[0] if r[0] !~ /^[a-z\+\-\=\/\<\>\|]/ print ',', r[0] r.shift end end print ' ', r.join(' ') if r.length > 0 print "\n" end elsif opt == '--from-v1-path2' while gets chop! if $_[0..0] == "#" puts $_ next end $_ << ' ' r = $_.split(/\s+/) if r.length > 1 && r[1][0..0] == 'L' print r[0], ' ', r[1], ' ' * ([23-r[0].length-r[1].length, 1].max-1) r.shift else print r[0], ' ' * ([24 - r[0].length,1].max-1) end r.shift while r.length > 0 && r[0] !~ /^\#/ s = r[0].split(/,/) if s.length == 1 print ' ', r[0] r.shift next end if s[1..-1].find {|i| i =~ /\(/ } s[0] += '?' if s[0] !~ /\?/ end s1 = s[1..-1].collect {|i| i[0..0] }.join if s1.length == 1 print ' ', s[0], ',', s1, ',', s[1][1..-1] r.shift next end h12 = {'u' => 'd', 'l' => 'r', 'o' => 'i'} h21 = {'d' => 'u', 'r' => 'l', 'i' => 'o'} case s1 when 'lr', 'ud', 'oi', 'lu', 'ld', 'ur', 'dr', 'ps', '+-' #OK when 'ou','od','di','so' #OK when 'xx', 'mm' #OK when 'rl', 'du', 'io', 'ul', 'dl', 'ru', 'rd', 'sp','uo' s[0] += '.21' #if s[0] !~ s[1], s[2] = s[2], s[1] when 'dmm' s[0] += '.31' s[1], s[2], s[3] = s[2], s[3], s[1] else if s1.length == 3 if h12[s1[0..0]] == s1[1..1] if h12[s1[2..2]] #ex. 'udl'=>'lud' s[0] += '.23' s[1], s[2], s[3] = s[3], s[1], s[2] else #ex. 'udr'=>'udr' #OK end elsif h21[s1[0..0]] == s1[1..1] if h12[s1[2..2]] #ex. 'dul'=>'lud' s[0] += '.32' s[1], s[2], s[3] = s[3], s[2], s[1] else #ex. 'dur'=>'udr' s[0] += '.21' s[1], s[2], s[3] = s[2], s[1], s[3] end elsif h12[s1[0..0]] == s1[2..2] if h12[s1[1..1]] #ex. 'uld'=>'lud' s[0] += '.21' s[1], s[2], s[3] = s[2], s[1], s[3] else #ex. 'urd'=>'udr' s[0] += '.13' s[1], s[2], s[3] = s[1], s[3], s[2] end elsif h21[s1[0..0]] == s1[2..2] if h12[s1[1..1]] #ex. 'dlu'=>'lud' s[0] += '.31' s[1], s[2], s[3] = s[2], s[3], s[1] else #ex. 'dru'=>'udr' s[0] += '.23' s[1], s[2], s[3] = s[3], s[1], s[2] end elsif h12[s1[1..1]] == s1[2..2] if h12[s1[0..0]] #ex. 'lud'=>'lud' #OK else #ex. 'rud'=>'udr' s[0] += '.31' s[1], s[2], s[3] = s[2], s[3], s[1] end elsif h21[s1[1..1]] == s1[2..2] if h12[s1[0..0]] #ex. 'ldu'=>'lud' s[0] += '.13' s[1], s[2], s[3] = s[1], s[3], s[2] else #ex. 'rdu'=>'udr' s[0] += '.32' s[1], s[2], s[3] = s[3], s[2], s[1] end end end end s1 = s[1..-1].collect {|i| i[0..0] }.join case s1 when 'lr', 'ud', 'oi', 'lu', 'ld', 'ur', 'dr', 'ps', '+-' #OK when 'ou','od','di','so' #OK when 'xx', 'mm' s[0].sub!(/v/, 'v!') if s[0] !~ /v\!/ when 'mmd' s[0].sub!(/v/, 'v!') if s[0] !~ /v\!/ s[1], s[2] = ['umm', s[1][1..-1], s[2][1..-1]].join(','), s[3] s.pop s1 = s[1..-1].collect {|i| i[0..0] }.join when /^lr[di]$/, /^ud[ri]$/, /^oi[rd]$/ ss = h21[s1[2..2]] + s1[0..1] s[1], s[2] = [ss, s[1][1..-1], s[2][1..-1]].join(','), s[3] s.pop s1 = s[1..-1].collect {|i| i[0..0] }.join when /^[uo]lr$/, /^[lo]ud$/, /^[lu]oi$/ ss = h12[s1[0..0]] + s1[1..-1] s[1], s[2] = s[1], [ss, s[2][1..-1], s[3][1..-1]].join(',') s.pop s1 = s[1..-1].collect {|i| i[0..0] }.join else p s exit end l = ([s1] + s[1..-1].collect {|i| i[1..-1] }) l = treeize(l) # def eval_oi(nd) return if nd.class == String if nd[0] == 'oi' && nd[1].class == String case nd[1] when /^(广|厂|やまいだれ|尸|在の外|虍|广廿|厂林|厂イ|雁たれ)$/ nd[0].replace 'ta' when /^(囗四)$/ nd[0].replace 'ku' when /^(門|冂|岡ひく山|風|微かまえ|戊|戌)$/ nd[0].replace 'mo' when /^(勹|裁かまえ|武ひく止|)$/ nd[0].replace 'tu' when /^(匚|匸)$/ nd[0].replace 'ha' when /^(凵)$/ nd[0].replace 'ka' when /^(之|え|廴|走|夂|九)$/ nd[0].replace 'ny' when /^(行|衣)$/ else end end eval_oi(nd[1]) eval_oi(nd[2]) end # eval_oi(l) l = l.flatten print ' ', s[0], ',', l.join(',') r.shift end print ' ', r.join(' ') if r.length > 0 print "\n" end elsif opt == '--format' while gets chop! if $_[0..0] == "#" puts $_ next end $_ << ' ' r = $_.split(/\s+/) if r.length > 1 && r[1][0..0] == 'L' print r[0], ' ', r[1], ' ' * ([23-r[0].length-r[1].length, 1].max-1) r.shift else print r[0], ' ' * ([24 - r[0].length,1].max-1) end r.shift while r.length > 1 && r[0] !~ /^\#/ print ' ', r[0] r.shift end print ' ', r.join(' ') if r.length > 0 print "\n" end elsif opt == '--sort' && nil #v1 ali = Hash.new def ali.[](key) return key if ! self.has_key?(key) super end fp = open('cidbushu.alias') while fp.gets chop! next if $_ =~ /^\s*\#/ gsub!(/\#.*$/, '') gsub!(/\s*$/, '') next if $_ =~ /^(delete|overwrite|protect)/ r = $_.split(/\s+/) next if r.length <= 1 r.shift if r[0] =~ /^(order-(strong|weak)|weak)/ d = r.shift r.each {|i| ali[i] = d if i.length > 2 } end fp.close d = Array.new while gets chop! r = $_.split(/\s+/) if r.length < 1 || r[0] =~ /^\#/ puts $_ next end r = r[0] r = r.split(/\,/) r.delete_if {|i| i =~ /^\d{4,}$/ } r = r[0] #p r d.push([r, $_]) if $_ =~ /c \=(\S+)/ && $1.length > 2 ali[$1] = r #p r, $1 end end d.each {|i| i.unshift(ali[ali[ali[i[0]]]].gsub(/^(常用|旧|たて|よこ|たれ|にょう)(.)/) { $2 }) } d.sort! {|i, j| [i[0][0..1], i[1][0..2], j[1][2..-1]] <=> [j[0][0..1], j[1][0..2], i[1][2..-1]] } d.each {|i| #p i[0..1] puts i[2] } elsif opt == '--to-expand' elsif opt =~ /^--to-rev/ || opt == '--to-thunder' PREFER_REGULAR = false if opt == '--to-thunder' prefer = < [[pos, part], ...] order_weak = [] weak = {} ali = {} delp = [] dels = [] def ali.[](*key) if key.length == 1 key = key[0] return key if ! self.has_key?(key) super elsif key.length == 2 pos = key[1] key = key[0] if pos =~ /^[亜-腕]$/ && self.has_key?(key+'(inJIS1)') return super(key+'(inJIS1)') elsif pos =~ /^[弌-熙]$/ && self.has_key?(key+'(inJIS2)') return super(key+'(inJIS2)') end return key if ! self.has_key?(key) super(key) end end while gets chop! next if $_ =~ /^\s*\#/ gsub!(/\#.*$/, '') gsub!(/\s*$/, '') r = $_.split(/\s+/) l = r[0] l = l.split(/\,/) l.delete_if {|i| i =~ /^\d{4,}$/ } l.each {|i| ali[i] = l[0].gsub(/\@90JIS$/, '') } composed = l[0].gsub(/\@90JIS$/, '') r.shift r.each_index {|i| parts = [] if r[i][0..0] == 'L' next end l = r[i].split(/\,/) c = l[0] next if l.length <= 1 if l[1] =~ /^[\=\/\<\>\|]$/ next end l.shift l = l.collect {|j| j.gsub(/\((.*)\)$/) { $1 } } #remove parenthesis # def subparts(pn) #p pn return [[pn]] if pn.class == String pn0 = to_oi(pn[0]) return subparts(pn[1]).collect {|br| [pn0[0..0]] + br } \ + subparts(pn[2]).collect {|br| [pn0[1..1]] + br } end # l = treeize(l) m = subparts(l) #p m m.each {|br| br = br[-2..-1] parts << br } if c =~ /\.\d\d/ i1 = $&[1..1].to_i-1 i2 = $&[2..2].to_i-1 parts2 = [parts[i1], parts[i2]] (i1, i2) = [i1, i2].sort parts.delete_at(i2) parts.delete_at(i1) parts = parts2 + parts c.gsub!(/\.\d\d/, '') end c.gsub!(/^HOGE/, '') c.gsub!(/(.)\?/) { $1 } c = {'mv' => 'a', 'v' => 'b', 'm' => 'c', 'mv!' => 'e', 'v!' => 'f'}[c] || c if c.length != 1 $stderr.puts [composed, c].inspect exit 1 end if rev.has_key?([composed, c]) $stderr.print "repeated composition definition:\n" $stderr.print "#{[composed, c].inspect} => #{rev[[composed, c]].inspect}\n#{$_}\n" next end rev[[composed, c]] = parts #p [composed, c, parts] } end fp = open('cidbushu.alias') prot = [] while fp.gets chop! $stderr.print "#{$_}\n" next if $_[0..0] == '#' gsub!(/\#.*$/, '') gsub!(/\s*$/, '') r = $_.split(/\s+/) case r[0] when 'delete' for i in 1...r.length for j in 'abcdefijklmn?'.split(//) next unless rev.has_key?([r[i], j]) parts = rev.delete([r[i], j]) end end when 'overwrite' for j in 'abcdefijklmn?'.split(//) next unless rev.has_key?([r[1], j]) parts = rev.delete([r[1], j]) end parts = r[2..-1] parts.each_index {|i| l = parts[i] l = ' ' + l if l !~ /^[a-z\+\-]/ l = [l[0..0], ali[ali[ali[ali[l[1..-1], r[1]], r[1]], r[1]], r[1]]] parts[i] = l } rev[[r[1], 'b']] = parts when 'protect' prot = r[1..-1] # when 'order-swap' # for i in 'abcdefijklmn?'.split(//) # next unless rev.has_key?([r[1], i]) # parts = rev[[r[1], i]] # parts[0..1] = [parts[1], parts[0]] # rev[[r[1], i]] = parts # end # rev.each {|cc, parts| # next unless parts[0..1].collect {|i| i[1] }.include?(r[1]) # parts[0..1] = [parts[1], parts[0]] # rev[cc] = parts # } ### # when 'try-making-malanalysis' ### when 'entry-prefix-will-be-deleted' delp += r[1..-1] when 'entry-suffix-will-be-deleted' dels += r[1..-1] else frec = nil if r[0] == 'order-strong' r.shift frec = :ostrong elsif r[0] == 'order-weak' r.shift frec = :oweak elsif r[0] == 'weak' r.shift frec = :weak end for j in 1...r.length #後からoverwriteする場合の救済措置 ali[r[j]] = r[0] end rev.each {|cc, parts| #next unless parts.collect {|i| i[1] }.include?(r[1]) fcr = false for j in 1...r.length parts.each {|k| rj = r[j].dup fquit = false if r[j] =~ /\(inJIS1\)$/ rj.gsub!(/\(inJIS1\)$/, '') #$stderr.puts cc.inspect next unless cc[0] =~ /^[亜-腕]$/ elsif r[j] =~ /\(inJIS2\)$/ rj.gsub!(/\(inJIS2\)$/, '') #$stderr.puts cc.inspect next unless cc[0] =~ /^[弌-熙]$/ end if k[1] == rj weak[cc[0]] = [cc.collect {|l| l.dup}, parts.collect {|l| l.dup}] if frec == :weak #後からaliasされるのには未対応 k[1] = r[0] order_weak |= [cc[0]] if frec == :oweak fcr = true if r[0] == cc[0] elsif k[1] == r[0] order_weak |= [cc[0]] if frec == :ostrong end } end if fcr #$stderr.print "deleted a crunched definition: #{cc.inspect} => #{parts.inspect}\n" rev.delete(cc) end } for i in 'abcdefijklmn?'.split(//) for j in 1...r.length next if prot.include?(r[j]) next unless rev.has_key?([r[j], i]) parts = rev.delete([r[j], i]) fcr = false parts.each {|k| fcr = true if k[1] == r[0] } if fcr $stderr.print "deleted a crunched definition: #{[r[0], i].inspect} => #{parts.inspect}\n" else if rev.has_key?([r[0], i]) $stderr.print "overwriting a collided definition by: #{[r[0], i].inspect} => #{parts.inspect}\n" end rev[[r[0], i]] = parts end end end prot = [] end end fp.close delp = Regexp.new('^(' + delp.collect {|i| Regexp.escape(i) }.join('|') + ')(.)$') dels = Regexp.new('^(.)(' + dels.collect {|i| Regexp.escape(i) }.join('|') + ')$') reva = rev.dup rev.each {|cc, parts| parts = parts.collect {|i| i[1] = i[1].gsub(delp) { $2 }; i } parts = parts.collect {|i| i[1] = i[1].gsub(dels) { $1 }; i } if parts.collect {|i| i[1] }.include?(cc[0]) $stderr.print "deleted a crunched definition: #{cc.inspect} => #{parts.inspect}\n" rev.delete(cc) next end rev[cc] = parts if cc[0].length > 2 || parts.collect {|i| i[1].length }.max > 2 $stderr.puts [cc, parts].inspect unless cc[0] =~ /@\d\dJIS$/ rev.delete(cc) next end #parts.each {|i| # if i[1] =~ /\?/ # rev.delete(cc) # break # end #} } rev2 = {} rev.each {|cc, parts| if rev2.has_key?(cc[0]) p = (rev2[cc[0]]=~/^[a-c]$/ ? rev2[cc[0]] : 'b'+rev2[cc[0]]) #p cc[1] n = (cc[1]=~/^[a-c]$/ ? cc[1] : 'b'+cc[1]) if PREFER_REGULAR else p = p.gsub(/^([ab])/) { $1 == 'a' ? 'b' : 'a' } n = n.gsub(/^([ab])/) { $1 == 'a' ? 'b' : 'a' } end if p > n rev2[cc[0]] = cc[1] end else rev2[cc[0]] = cc[1] end } rev.each {|cc, parts| next if rev2[cc[0]] == cc[1] rev.delete(cc) } fow = {} rev2 = {} rev.each {|cc, parts| parts0 = parts parts = parts.collect {|i| i[1] } #delete pos data if fow.has_key?(parts) # $stderr.puts [cc, parts].inspect, (order_weak.include?(fow[parts][0]) ? 2 : 0)+(order_weak.include?(cc[0]) ? 1 : 0) if cc[0] =~ /[壌壤]/ case (weak.has_key?(fow[parts][0]) ? 2 : 0)+(weak.has_key?(cc[0]) ? 1 : 0) when 0 #$stderr.print "undefined priority: #{fow[parts][0]}, #{cc[0]}\n" when 3 #$stderr.print "can't determine priority: #{fow[parts][0]}, #{cc[0]}\n" when 1 $stderr.print "canceling a replace: #{cc[0]}, #{parts}, #{weak[cc[0]][1]}\n" parts0 = weak[cc[0]][1].dup parts = parts0.collect {|i| i[1] } when 2 cc2 = fow[parts] fow[parts] = cc cc = cc2 parts02 = rev2.delete(cc) rev2[fow[parts]] = parts0 parts0 = parts02 $stderr.print "canceling a replace: #{cc[0]}, #{parts}, #{weak[cc[0]][1]}\n" parts0 = weak[cc[0]][1].dup parts = parts0.collect {|i| i[1] } end end if fow.has_key?(parts) # $stderr.puts [cc, parts].inspect, (order_weak.include?(fow[parts][0]) ? 2 : 0)+(order_weak.include?(cc[0]) ? 1 : 0) if cc[0] =~ /[壌壤]/ case (order_weak.include?(fow[parts][0]) ? 2 : 0)+(order_weak.include?(cc[0]) ? 1 : 0) when 0 $stderr.print "undefined order-priority: #{fow[parts][0]}, #{cc[0]}\n" when 3 $stderr.print "can't determine order-priority: #{fow[parts][0]}, #{cc[0]}\n" when 1 parts0 = parts0.dup parts0[0..1] = [parts0[1], parts0[0]] parts = parts0.collect {|i| i[1] } when 2 cc2 = fow[parts] fow[parts] = cc cc = cc2 parts02 = rev2.delete(cc) rev2[fow[parts]] = parts0 parts0 = parts02 parts0 = parts0.dup parts0[0..1] = [parts0[1], parts0[0]] parts = parts0.collect {|i| i[1] } end end if fow.has_key?(parts) $stderr.print "too crowded: #{fow[parts][0]}, #{cc[0]}\n" end fow[parts] = cc rev2[cc] = parts0 } rev2 = rev2.collect {|cc, parts| [cc[0]] + parts.collect {|i| i[1] } + [cc[1]] + parts.collect {|i| i[0] } } rev2.sort! {|i, j| i[0] <=> j[0] } rev2.each {|i| if opt =~ /^--to-rev/ puts i[0...i.length/2].to_s elsif opt == '--to-thunder' next if i[0] == '裁' puts i[0, 3].to_s + i[i.length/2, 3].to_s end } elsif opt == '--to-clwfk' || opt == '--to-chise' $renh0 = {} $renh1 = {} fp = open('cidbushu.alias.by_pos') while fp.gets chop! next if $_[0..0] == '#' r = $_.split(/\s+/) break if r.length < 3 case r[0] when '.' $renh0[r[1]] = r[2] when /\$$/ $renh1[[r[0][0..-2], r[1]]] = r[2] else end end fp.close while gets chop! next if $_ =~ /^\s*\#/ gsub!(/\#.*$/, '') gsub!(/\s*$/, '') r = $_.split(/\s+/) l = r[0] l = l.split(/\,/) l.delete_if {|i| i =~ /^\d{4,}$/ } composed = l[0].gsub(/\@90JIS$/, '') composed = $renh0[composed] if $renh0.has_key?(composed) r.shift r.each_index {|i| parts = [] if r[i][0..0] == 'L' next end l = r[i].split(/\,/) c = l[0] next if l.length <= 1 if l[1] =~ /^[\=\/\<\>\|]$/ next end l.shift l = l.collect {|j| j.gsub(/\((.*)\)$/) { $1 } } #remove parenthesis # def renparts(nd, pt = []) if nd.class == String pt = pt.join if $renh1.has_key?([pt[-1..-1], nd]) nd.replace($renh1[[pt[-1..-1], nd]]) elsif $renh0.has_key?(nd) nd.replace($renh0[nd]) end return end nd0 = to_oi(nd[0]) renparts(nd[1], pt + [nd0[0..0]]) renparts(nd[2], pt + [nd0[1..1]]) end # l = treeize(l) m = renparts(l) c.gsub!(/\.\d\d/, '') if opt == '--to-clwfk' def to_paren(nd) return nd if nd.class == String oh = {'lr' => 'yoko', 'ud' => 'tate', 'oi' => 'kamae', 'ku'=>'kamae', 'mo'=>'kamae', 'ka'=>'kamae', 'ha'=>'kamae', 'ta'=>'tare', 'tu'=>'kamae', 'ny'=>'nyou'} return "(#{(oh[nd[0]] || nd[0])} #{to_paren(nd[1])} #{to_paren(nd[2])})" end print "(setq #{composed} '#{to_paren(l)})\n" end } end elsif opt == '--to-chise' && nil #v1 while gets chop! next if $_ =~ /^\s*\#/ gsub!(/\#.*$/, '') gsub!(/\s*$/, '') r = $_.split(/\s+/) l = r[0] l = l.split(/\,/) l.delete_if {|i| i =~ /^\d{4,}$/ } l[1..-1].each {|i| ali[i] = l[0].gsub(/\@90JIS$/, '') } composed = l[0].gsub(/\@90JIS$/, '') r.shift i = 0 parts = [] fparen = false while i < r.length if r[i] =~ /^([a-z\?]|\Sc)$/ if r[i] == 'l' i += 2 next elsif r[i] == '!c' || r[i] =~ /^\Sc$/ i += 1 next end c = r[i] else if r[i] =~ /^[\=\/\<\>\|]/ i += 1 next end l = r[i] fparen = true if l =~ /\((.*)\)$/ l = ' ' + l if l !~ /^[a-z\+\-]/ l = [l[0..0], l[1..-1]] parts << l if (i == r.length-1 || r[i+1] =~ /^([a-z\?]|\Sc)$/) && parts.length > 0 if c !~ /^[ab]$/ || fparen parts = [] fparen = false i += 1 next end if rev.has_key?([composed, c]) $stderr.print "repeated composition definition:\n" $stderr.print "#{[composed, c].inspect} => #{rev[[composed, c]].inspect}\n#{$_}\n" i += 1 next end rev[[composed, c]] = parts parts = [] fparen = false end end i += 1 end end def xx(c1,c2); "U+2FFB(#{c1},#{c2})"; end def lr(c1,c2); "U+2FF0(#{c1},#{c2})"; end def ud(c1,c2); "U+2FF1(#{c1},#{c2})"; end def oi(c1,c2,c0) if c0 =~ /^()$/ elsif c0 =~ /^(广|厂|やまいだれ|尸|在の外|虍|广廿|厂林|厂イ|雁たれ)$/ "U+2FF8(#{c1},#{c2})" elsif c0 =~ /^(囗四)$/ "U+2FF4(#{c1},#{c2})" elsif c0 =~ /^(門|冂|岡ひく山|風|微かまえ|戊|戌)$/ "U+2FF5(#{c1},#{c2})" elsif c0 =~ /^(勹|裁かまえ|武ひく止|)$/ "U+2FF9(#{c1},#{c2})" elsif c0 =~ /^(匚|匸)$/ "U+2FF7(#{c1},#{c2})" elsif c0 =~ /^(凵)$/ "U+2FF6(#{c1},#{c2})" elsif c0 =~ /^(之|え|廴|走|夂)$/ "U+2FFA(#{c1},#{c2})" elsif c0 =~ /^(行|衣)$/ #U+2FF3 #o->u+d, cf.'udi' "U+2FFB(#{c1},#{c2})" end end rev2 = [] rev.each {|cc, parts| parts.sort! {|i, j| %w(l r u d o i p s m x + -).index(i[0]) <=> %w(l r u d o i p s m x + -).index(j[0]) } case parts.collect {|i| i[0]}.to_s when '+-' next when 'xx' rev2 << [cc, xx(parts[0][1],parts[1][1])] when 'mm' rev2 << [cc, xx(parts[0][1],parts[1][1])] when 'lr' rev2 << [cc, lr(parts[0][1],parts[1][1])] when 'ud' rev2 << [cc, ud(parts[0][1],parts[1][1])] when 'oi' rev2 << [cc, oi(parts[0][1],parts[1][1], cc[0])] when 'lu' rev2 << [cc, "U+2FF9(#{parts[1][1]},#{parts[0][1]})"] #u->l2+r, lr(ud(l2,l),r) or #u->u2+d, ud(u2,lr(l,d)) when 'ld' # when 'ru' rev2 << [cc, "U+2FF8(#{parts[1][1]},#{parts[0][1]})"] when 'rd' rev2 << [cc, "U+2FFA(#{parts[1][1]},#{parts[0][1]})"] when 'lri' rev2 << [cc, "U+2FF2(#{parts[0][1]},#{parts[2][1]},#{parts[1][1]})"] when 'udi' rev2 << [cc, "U+2FF3(#{parts[0][1]},#{parts[2][1]},#{parts[1][1]})"] when 'lru' rev2 << [cc, ud(parts[2][1], lr(parts[0][1], parts[1][1]))] when 'lrd' rev2 << [cc, ud(lr(parts[0][1], parts[1][1]), parts[2][1])] when 'udo' rev2 << [cc, oi(parts[2][1], ud(parts[0][1], parts[1][1]))] when 'uo' #o->o2+i, oi(o2,ud(u,i)) raise notImplementedError, "#{parts.collect {|i| i[0]}.to_s}" when 'do' raise notImplementedError, "#{parts.collect {|i| i[0]}.to_s}" else raise notImplementedError, "#{parts.collect {|i| i[0]}.to_s}" end } elsif opt == '--for-tex' #http://psitau.at.infoseek.co.jp/otf.html #http://oku.edu.mie-u.ac.jp/~okumura/texwiki/?OTF puts '\documentclass{jarticle}' puts '\usepackage{utf}' puts '\begin{document}' while gets chop! gsub!(/\s\#$/, '') gsub!(/ /, ' ') gsub!(/[\#$%&_{}^~<>\\|]/) {|i| i =~ /[<>\\]/ ? '\verb|'+i+'|' : i == '|' ? '\textbar ' : '\\'+i } gsub!(/^\d{4,}/) { '{\small '+$&+'}\CID{'+$&+'}' } gsub!(/U\+([0-9A-F]{4})/) { $&+'\UTF{'+$1+'}' } puts $_ puts end puts '\end{document}' elsif false itaiji = {} fp = open('./emacs/tcode/itaiji.maz') while fp.gets chop! i = split(/\s/) itaiji[i[0]] = i[1] end fp.close while gets chop! if $_ =~ /^\s*\#/ puts $_ next end r = $_.split(/\s+/) l = r[0] l = l.split(/\,/) l.delete_if {|i| i =~ /^\d{4,}$/ } c = l[0].gsub(/\@90JIS$/, '') if itaiji[c] $_ << " #c =#{itaiji[$1]} " end puts $_ end elsif false #ftp://ftp.oreilly.com/pub/examples/nutshell/cjkv/adobe/ while gets chop! next if $_ =~ /^\#/ next if $_ =~ /^CID/ codes = $_.split(/\t/) cid = codes[0].to_i c90jis = codes[1].hex c78jis = codes[13].hex next if c78jis == 0 && c90jis == 0 w = ' ' w[0] = (c90jis>>8)+0x80 w[1] = (c90jis&0xff)+0x80 c90jis = w w = ' ' w[0] = (c78jis>>8)+0x80 w[1] = (c78jis&0xff)+0x80 c78jis = w if c90jis == c78jis printf "%-8d\#%s\n", cid, c90jis else l = [] l << c90jis+"@90JIS" if c90jis.length > 0 l << c78jis+"@78JIS" if c78jis.length > 0 printf "%-8d#%s\n", cid, l.join(",") end end else usage() end ####