#!/usr/bin/ruby -Ke
#
#cidbushu2.txt:
#1505,乾 L,常用 v,lr,十日十,乞 m,ur,十日十人,乙 ##≒十日十人
#↑v1参照 ↑v1での'l'
#
#会意文字の中に人が2個並んで入っていた時、それが从なのかどうかは解釈の問題
#中心となる分解データは意味向きから字形合成向きまで両方入るように
#
#意味向き
#部品が字だったらその場で分解しないように 必要なら篆書にもリンク張る
#合成オペレータの正確性は求めない
#
#字形合成向き
#勲や哀を一発で合成できるスペックは求めない 積極的にべた書き
#
#意味:m 意味分解困難:m! 字形:v 自動合成困難:v!
#パーツ範囲制限:l 異体字ルート:i m?:解字未調査 パーツ保留:v?
#mv: 普通に考えて正解 m!: パーツ提示しない m: 字形用としては不適、異体字との区別は考えない
#v: 意味用としては不適 mv!, v!: パーツは揃えたが自動合成困難
#哀 mv!,oi,衣,口 v,ud,亠,ud,口,衣の下
#
#bushu.rev用優先順位
#i l mv mv! v v! m
#
#CLWFK,CHISE用優先順位
#mv v mv! v! m
#
#1175,尉 L,常用 v.23,lr,oi,尸,示,寸 ##会意
# ↑bushu.rev用出力時には"尉示(2番目)寸(3番目)"となる
#
#lr,ud,oi,lu,ur,ld,dr,ps,+-,mm,xx
#
#ku,mo,ka,ha,ta,tu,ny これらは内部ではoiとして扱われる
#F4,F5,F6,F7,F8,F9,FA(U+2F)
#
##旧フォーマット(v1)説明
#cidbushu.txt:
#1505,乾 l 常用 b l十日十 r乞 a u十日十人 r乙
#↑
#第1フィールド: 合成後どうなるか。コンマ区切りでいくらでも。
# すぺて同一の字形を指し示すための表現である
# 4桁以上の生数字: Adobe-Japan1(-6)におけるCID
# U+〜〜: Unicode@MSゴシック字形小塚明朝字形
# その他文字コード表現: (略)
# 生漢字1字{@90JIS}: その生漢字のバイト列が指すコードポイントの
# JIS X 0208:1990での例示字形
# 生漢字1字@78JIS: 前略JIS X 0208:1978後略
# 生漢字1字@04JIS: 前略JIS X 0213:2004後略
# その他(3桁以下の半角数字なども含む): JIS X 0208:1990に入って
# いないグリフに適宜つけた通称
#
#それより後ろ: まずスペース区切り
#l: 次のフィールドと合わせて法律的情報
#a: 分けかたは合ってる&&字形をある程度表現できてる(括弧→字形を表現できてない主要因)
#b: 分けかたは間違ってる&&字形をある程度表現できてる
#c: 正しい分けかた&&意味記述(字形が表現できてない)または異体字情報
#!c: 分割不能な象形文字等 フィールドを消費しない
#その他半角英字や'?'など1文字: 作業中である。とりあえずbと同等に扱うこと
#
#cがなくてaがあるのは
#1. aが分けかた字形意味全てにおいて完璧
#2. aで括弧ついてないのは意味も合ってる(それで括弧つきのほうは一言じゃ説明できない)
# (括弧つきのが正しい意味ならcに置けばいい)
#aと!c両方あるのは分割不能かどうか微妙
#c =?はその先が本字 たどっていくと意味が合ってる解字が出てくるはず
#c /?は自分が借字となってその字の意味を取り込んでしまったパターン
#c ?はその字の俗字だったものが別の意味を獲得(のっとられ側の字がない借字?)
#c |?はどっちがえらいとも言い難い同音同義別解字("異体""別体"もここに含むべき)
#("通じる"は/?か|?か微妙)
#
#a-cの後ろに任意長さの分解定義が続く
#a (位置を表す英字1文字)(グリフを指し示す表現) ()() ... b ()() ...
#作業中なので位置指定子が記述されていない場合がある
#l&r: 偏と旁 u&d: 冠と脚 o&i: 構や垂や繞、あとむりやり間に挟める
#p&s: 品口3の口がp(arts)で3がs(tructure) +&-: 引き算
#lまたはrとuまたはdの組み合わせは直線で切り分けられるのにしてはいけないパターン
#http://www.itscj.ipsj.or.jp/ipsj-ts/02-02/ips_charid/toc.htm
#↑のP4〜P7に近い感じ
#
#
#
#
#http://www2.odn.ne.jp/alt-quinon/files/ptex/x0213/jx2004tbl.pdf
#http://pc5.2ch.net/test/read.cgi/unix/1082032043/183
#http://www.taishukan.co.jp/kanji/archive/jinmei_minaoshi.html
def usage
$stderr.print "ruby #{$0} (--format|--sort|--to-rev{-tc}|--to-thunder|--to-clwfk|--to-chise|--for-tex) cidbushu.txt\n"
$stderr.print "\n"
$stderr.print "convert from old(-200603) cidbushu.txt\n"
$stderr.print "ruby #{$0} --from-v1-path1 cidbushu.txt | ruby #{$0} --from-v1-path2 > cidbushu2.txt\n"
end
def treeize(pn)
st = []
pn.reverse.each {|pe|
if pe =~ /^[a-z\+\-]+$/
raise RuntimeError, 'few parts' if st.length < 2
l = st.pop
r = st.pop
st.push([pe, l, r])
else
st.push(pe)
end
}
raise RuntimeError, 'too many parts' if st.length != 1
return st[0]
end
def to_oi(op)
case op
when 'ku','mo','ka','ha','ta','tu','ny'
return 'oi'
else
return op
end
end
opt = ARGV.shift
if opt == '--from-v1-path1'
while gets
chop!
if $_[0..0] == "#"
puts $_
next
end
$_ << ' '
r = $_.split(/\s+/)
if r.length > 2 && r[1] == 'l'
print r[0], ' L,', r[2], ' ' * ([21-r[0].length-r[2].length, 1].max-1)
r.shift
r.shift
else
print r[0], ' ' * ([24 - r[0].length,1].max-1)
end
r.shift
while r.length > 0 && r[0] !~ /^\#/
if r[0] =~ /^([a-z\?]|\Sc)$/
case r[0]
when 'a'
r[0] = 'mv'
when 'b'
r[0] = 'v'
when 'c'
r[0] = 'm'
when 'i'
r[0] = 'i'
when 'd'
r[0] = 'HOGEd'
when '?'
r[0] = 'HOGE?'
when '!c'
r[0] = 'm!'
else
r[0] = 'HOGE' + r[0]
#p r
#exit 0;
end
print ' ', r[0]
r.shift
else
#r[0] = ' ' + r[0] if r[0] !~ /^[a-z\+\-\=\/\<\>\|]/
print ',', r[0]
r.shift
end
end
print ' ', r.join(' ') if r.length > 0
print "\n"
end
elsif opt == '--from-v1-path2'
while gets
chop!
if $_[0..0] == "#"
puts $_
next
end
$_ << ' '
r = $_.split(/\s+/)
if r.length > 1 && r[1][0..0] == 'L'
print r[0], ' ', r[1], ' ' * ([23-r[0].length-r[1].length, 1].max-1)
r.shift
else
print r[0], ' ' * ([24 - r[0].length,1].max-1)
end
r.shift
while r.length > 0 && r[0] !~ /^\#/
s = r[0].split(/,/)
if s.length == 1
print ' ', r[0]
r.shift
next
end
if s[1..-1].find {|i| i =~ /\(/ }
s[0] += '?' if s[0] !~ /\?/
end
s1 = s[1..-1].collect {|i| i[0..0] }.join
if s1.length == 1
print ' ', s[0], ',', s1, ',', s[1][1..-1]
r.shift
next
end
h12 = {'u' => 'd', 'l' => 'r', 'o' => 'i'}
h21 = {'d' => 'u', 'r' => 'l', 'i' => 'o'}
case s1
when 'lr', 'ud', 'oi', 'lu', 'ld', 'ur', 'dr', 'ps', '+-'
#OK
when 'ou','od','di','so'
#OK
when 'xx', 'mm'
#OK
when 'rl', 'du', 'io', 'ul', 'dl', 'ru', 'rd', 'sp','uo'
s[0] += '.21' #if s[0] !~
s[1], s[2] = s[2], s[1]
when 'dmm'
s[0] += '.31'
s[1], s[2], s[3] = s[2], s[3], s[1]
else
if s1.length == 3
if h12[s1[0..0]] == s1[1..1]
if h12[s1[2..2]] #ex. 'udl'=>'lud'
s[0] += '.23'
s[1], s[2], s[3] = s[3], s[1], s[2]
else #ex. 'udr'=>'udr'
#OK
end
elsif h21[s1[0..0]] == s1[1..1]
if h12[s1[2..2]] #ex. 'dul'=>'lud'
s[0] += '.32'
s[1], s[2], s[3] = s[3], s[2], s[1]
else #ex. 'dur'=>'udr'
s[0] += '.21'
s[1], s[2], s[3] = s[2], s[1], s[3]
end
elsif h12[s1[0..0]] == s1[2..2]
if h12[s1[1..1]] #ex. 'uld'=>'lud'
s[0] += '.21'
s[1], s[2], s[3] = s[2], s[1], s[3]
else #ex. 'urd'=>'udr'
s[0] += '.13'
s[1], s[2], s[3] = s[1], s[3], s[2]
end
elsif h21[s1[0..0]] == s1[2..2]
if h12[s1[1..1]] #ex. 'dlu'=>'lud'
s[0] += '.31'
s[1], s[2], s[3] = s[2], s[3], s[1]
else #ex. 'dru'=>'udr'
s[0] += '.23'
s[1], s[2], s[3] = s[3], s[1], s[2]
end
elsif h12[s1[1..1]] == s1[2..2]
if h12[s1[0..0]] #ex. 'lud'=>'lud'
#OK
else #ex. 'rud'=>'udr'
s[0] += '.31'
s[1], s[2], s[3] = s[2], s[3], s[1]
end
elsif h21[s1[1..1]] == s1[2..2]
if h12[s1[0..0]] #ex. 'ldu'=>'lud'
s[0] += '.13'
s[1], s[2], s[3] = s[1], s[3], s[2]
else #ex. 'rdu'=>'udr'
s[0] += '.32'
s[1], s[2], s[3] = s[3], s[2], s[1]
end
end
end
end
s1 = s[1..-1].collect {|i| i[0..0] }.join
case s1
when 'lr', 'ud', 'oi', 'lu', 'ld', 'ur', 'dr', 'ps', '+-'
#OK
when 'ou','od','di','so'
#OK
when 'xx', 'mm'
s[0].sub!(/v/, 'v!') if s[0] !~ /v\!/
when 'mmd'
s[0].sub!(/v/, 'v!') if s[0] !~ /v\!/
s[1], s[2] = ['umm', s[1][1..-1], s[2][1..-1]].join(','), s[3]
s.pop
s1 = s[1..-1].collect {|i| i[0..0] }.join
when /^lr[di]$/, /^ud[ri]$/, /^oi[rd]$/
ss = h21[s1[2..2]] + s1[0..1]
s[1], s[2] = [ss, s[1][1..-1], s[2][1..-1]].join(','), s[3]
s.pop
s1 = s[1..-1].collect {|i| i[0..0] }.join
when /^[uo]lr$/, /^[lo]ud$/, /^[lu]oi$/
ss = h12[s1[0..0]] + s1[1..-1]
s[1], s[2] = s[1], [ss, s[2][1..-1], s[3][1..-1]].join(',')
s.pop
s1 = s[1..-1].collect {|i| i[0..0] }.join
else
p s
exit
end
l = ([s1] + s[1..-1].collect {|i| i[1..-1] })
l = treeize(l)
#
def eval_oi(nd)
return if nd.class == String
if nd[0] == 'oi' && nd[1].class == String
case nd[1]
when /^(广|厂|やまいだれ|尸|在の外|虍|广廿|厂林|厂イ|雁たれ)$/
nd[0].replace 'ta'
when /^(囗四)$/
nd[0].replace 'ku'
when /^(門|冂|岡ひく山|風|微かまえ|戊|戌)$/
nd[0].replace 'mo'
when /^(勹|裁かまえ|武ひく止|)$/
nd[0].replace 'tu'
when /^(匚|匸)$/
nd[0].replace 'ha'
when /^(凵)$/
nd[0].replace 'ka'
when /^(之|え|廴|走|夂|九)$/
nd[0].replace 'ny'
when /^(行|衣)$/
else
end
end
eval_oi(nd[1])
eval_oi(nd[2])
end
#
eval_oi(l)
l = l.flatten
print ' ', s[0], ',', l.join(',')
r.shift
end
print ' ', r.join(' ') if r.length > 0
print "\n"
end
elsif opt == '--format'
while gets
chop!
if $_[0..0] == "#"
puts $_
next
end
$_ << ' '
r = $_.split(/\s+/)
if r.length > 1 && r[1][0..0] == 'L'
print r[0], ' ', r[1], ' ' * ([23-r[0].length-r[1].length, 1].max-1)
r.shift
else
print r[0], ' ' * ([24 - r[0].length,1].max-1)
end
r.shift
while r.length > 1 && r[0] !~ /^\#/
print ' ', r[0]
r.shift
end
print ' ', r.join(' ') if r.length > 0
print "\n"
end
elsif opt == '--sort' && nil #v1
ali = Hash.new
def ali.[](key)
return key if ! self.has_key?(key)
super
end
fp = open('cidbushu.alias')
while fp.gets
chop!
next if $_ =~ /^\s*\#/
gsub!(/\#.*$/, '')
gsub!(/\s*$/, '')
next if $_ =~ /^(delete|overwrite|protect)/
r = $_.split(/\s+/)
next if r.length <= 1
r.shift if r[0] =~ /^(order-(strong|weak)|weak)/
d = r.shift
r.each {|i|
ali[i] = d if i.length > 2
}
end
fp.close
d = Array.new
while gets
chop!
r = $_.split(/\s+/)
if r.length < 1 || r[0] =~ /^\#/
puts $_
next
end
r = r[0]
r = r.split(/\,/)
r.delete_if {|i| i =~ /^\d{4,}$/ }
r = r[0]
#p r
d.push([r, $_])
if $_ =~ /c \=(\S+)/ && $1.length > 2
ali[$1] = r
#p r, $1
end
end
d.each {|i| i.unshift(ali[ali[ali[i[0]]]].gsub(/^(常用|旧|たて|よこ|たれ|にょう)(.)/) { $2 }) }
d.sort! {|i, j|
[i[0][0..1], i[1][0..2], j[1][2..-1]] <=> [j[0][0..1], j[1][0..2], i[1][2..-1]]
}
d.each {|i|
#p i[0..1]
puts i[2]
}
elsif opt == '--to-expand'
elsif opt =~ /^--to-rev/ || opt == '--to-thunder'
PREFER_REGULAR = false
if opt == '--to-thunder'
prefer = < [[pos, part], ...]
order_weak = []
weak = {}
ali = {}
delp = []
dels = []
def ali.[](*key)
if key.length == 1
key = key[0]
return key if ! self.has_key?(key)
super
elsif key.length == 2
pos = key[1]
key = key[0]
if pos =~ /^[亜-腕]$/ && self.has_key?(key+'(inJIS1)')
return super(key+'(inJIS1)')
elsif pos =~ /^[弌-熙]$/ && self.has_key?(key+'(inJIS2)')
return super(key+'(inJIS2)')
end
return key if ! self.has_key?(key)
super(key)
end
end
while gets
chop!
next if $_ =~ /^\s*\#/
gsub!(/\#.*$/, '')
gsub!(/\s*$/, '')
r = $_.split(/\s+/)
l = r[0]
l = l.split(/\,/)
l.delete_if {|i| i =~ /^\d{4,}$/ }
l.each {|i|
ali[i] = l[0].gsub(/\@90JIS$/, '')
}
composed = l[0].gsub(/\@90JIS$/, '')
r.shift
r.each_index {|i|
parts = []
if r[i][0..0] == 'L'
next
end
l = r[i].split(/\,/)
c = l[0]
next if l.length <= 1
if l[1] =~ /^[\=\/\<\>\|]$/
next
end
l.shift
l = l.collect {|j| j.gsub(/\((.*)\)$/) { $1 } } #remove parenthesis
#
def subparts(pn)
#p pn
return [[pn]] if pn.class == String
pn0 = to_oi(pn[0])
return subparts(pn[1]).collect {|br| [pn0[0..0]] + br } \
+ subparts(pn[2]).collect {|br| [pn0[1..1]] + br }
end
#
l = treeize(l)
m = subparts(l)
#p m
m.each {|br|
br = br[-2..-1]
parts << br
}
if c =~ /\.\d\d/
i1 = $&[1..1].to_i-1
i2 = $&[2..2].to_i-1
parts2 = [parts[i1], parts[i2]]
(i1, i2) = [i1, i2].sort
parts.delete_at(i2)
parts.delete_at(i1)
parts = parts2 + parts
c.gsub!(/\.\d\d/, '')
end
c.gsub!(/^HOGE/, '')
c.gsub!(/(.)\?/) { $1 }
c = {'mv' => 'a', 'v' => 'b', 'm' => 'c', 'mv!' => 'e', 'v!' => 'f'}[c] || c
if c.length != 1
$stderr.puts [composed, c].inspect
exit 1
end
if rev.has_key?([composed, c])
$stderr.print "repeated composition definition:\n"
$stderr.print "#{[composed, c].inspect} => #{rev[[composed, c]].inspect}\n#{$_}\n"
next
end
rev[[composed, c]] = parts
#p [composed, c, parts]
}
end
fp = open('cidbushu.alias')
prot = []
while fp.gets
chop!
$stderr.print "#{$_}\n"
next if $_[0..0] == '#'
gsub!(/\#.*$/, '')
gsub!(/\s*$/, '')
r = $_.split(/\s+/)
case r[0]
when 'delete'
for i in 1...r.length
for j in 'abcdefijklmn?'.split(//)
next unless rev.has_key?([r[i], j])
parts = rev.delete([r[i], j])
end
end
when 'overwrite'
for j in 'abcdefijklmn?'.split(//)
next unless rev.has_key?([r[1], j])
parts = rev.delete([r[1], j])
end
parts = r[2..-1]
parts.each_index {|i|
l = parts[i]
l = ' ' + l if l !~ /^[a-z\+\-]/
l = [l[0..0], ali[ali[ali[ali[l[1..-1], r[1]], r[1]], r[1]], r[1]]]
parts[i] = l
}
rev[[r[1], 'b']] = parts
when 'protect'
prot = r[1..-1]
# when 'order-swap'
# for i in 'abcdefijklmn?'.split(//)
# next unless rev.has_key?([r[1], i])
# parts = rev[[r[1], i]]
# parts[0..1] = [parts[1], parts[0]]
# rev[[r[1], i]] = parts
# end
# rev.each {|cc, parts|
# next unless parts[0..1].collect {|i| i[1] }.include?(r[1])
# parts[0..1] = [parts[1], parts[0]]
# rev[cc] = parts
# }
###
# when 'try-making-malanalysis'
###
when 'entry-prefix-will-be-deleted'
delp += r[1..-1]
when 'entry-suffix-will-be-deleted'
dels += r[1..-1]
else
frec = nil
if r[0] == 'order-strong'
r.shift
frec = :ostrong
elsif r[0] == 'order-weak'
r.shift
frec = :oweak
elsif r[0] == 'weak'
r.shift
frec = :weak
end
for j in 1...r.length #後からoverwriteする場合の救済措置
ali[r[j]] = r[0]
end
rev.each {|cc, parts|
#next unless parts.collect {|i| i[1] }.include?(r[1])
fcr = false
for j in 1...r.length
parts.each {|k|
rj = r[j].dup
fquit = false
if r[j] =~ /\(inJIS1\)$/
rj.gsub!(/\(inJIS1\)$/, '')
#$stderr.puts cc.inspect
next unless cc[0] =~ /^[亜-腕]$/
elsif r[j] =~ /\(inJIS2\)$/
rj.gsub!(/\(inJIS2\)$/, '')
#$stderr.puts cc.inspect
next unless cc[0] =~ /^[弌-熙]$/
end
if k[1] == rj
weak[cc[0]] = [cc.collect {|l| l.dup}, parts.collect {|l| l.dup}] if frec == :weak #後からaliasされるのには未対応
k[1] = r[0]
order_weak |= [cc[0]] if frec == :oweak
fcr = true if r[0] == cc[0]
elsif k[1] == r[0]
order_weak |= [cc[0]] if frec == :ostrong
end
}
end
if fcr
#$stderr.print "deleted a crunched definition: #{cc.inspect} => #{parts.inspect}\n"
rev.delete(cc)
end
}
for i in 'abcdefijklmn?'.split(//)
for j in 1...r.length
next if prot.include?(r[j])
next unless rev.has_key?([r[j], i])
parts = rev.delete([r[j], i])
fcr = false
parts.each {|k|
fcr = true if k[1] == r[0]
}
if fcr
$stderr.print "deleted a crunched definition: #{[r[0], i].inspect} => #{parts.inspect}\n"
else
if rev.has_key?([r[0], i])
$stderr.print "overwriting a collided definition by: #{[r[0], i].inspect} => #{parts.inspect}\n"
end
rev[[r[0], i]] = parts
end
end
end
prot = []
end
end
fp.close
delp = Regexp.new('^(' + delp.collect {|i| Regexp.escape(i) }.join('|') + ')(.)$')
dels = Regexp.new('^(.)(' + dels.collect {|i| Regexp.escape(i) }.join('|') + ')$')
reva = rev.dup
rev.each {|cc, parts|
parts = parts.collect {|i| i[1] = i[1].gsub(delp) { $2 }; i }
parts = parts.collect {|i| i[1] = i[1].gsub(dels) { $1 }; i }
if parts.collect {|i| i[1] }.include?(cc[0])
$stderr.print "deleted a crunched definition: #{cc.inspect} => #{parts.inspect}\n"
rev.delete(cc)
next
end
rev[cc] = parts
if cc[0].length > 2 || parts.collect {|i| i[1].length }.max > 2
$stderr.puts [cc, parts].inspect unless cc[0] =~ /@\d\dJIS$/
rev.delete(cc)
next
end
#parts.each {|i|
# if i[1] =~ /\?/
# rev.delete(cc)
# break
# end
#}
}
rev2 = {}
rev.each {|cc, parts|
if rev2.has_key?(cc[0])
p = (rev2[cc[0]]=~/^[a-c]$/ ? rev2[cc[0]] : 'b'+rev2[cc[0]])
#p cc[1]
n = (cc[1]=~/^[a-c]$/ ? cc[1] : 'b'+cc[1])
if PREFER_REGULAR
else
p = p.gsub(/^([ab])/) { $1 == 'a' ? 'b' : 'a' }
n = n.gsub(/^([ab])/) { $1 == 'a' ? 'b' : 'a' }
end
if p > n
rev2[cc[0]] = cc[1]
end
else
rev2[cc[0]] = cc[1]
end
}
rev.each {|cc, parts|
next if rev2[cc[0]] == cc[1]
rev.delete(cc)
}
fow = {}
rev2 = {}
rev.each {|cc, parts|
parts0 = parts
parts = parts.collect {|i| i[1] } #delete pos data
if fow.has_key?(parts)
# $stderr.puts [cc, parts].inspect, (order_weak.include?(fow[parts][0]) ? 2 : 0)+(order_weak.include?(cc[0]) ? 1 : 0) if cc[0] =~ /[壌壤]/
case (weak.has_key?(fow[parts][0]) ? 2 : 0)+(weak.has_key?(cc[0]) ? 1 : 0)
when 0
#$stderr.print "undefined priority: #{fow[parts][0]}, #{cc[0]}\n"
when 3
#$stderr.print "can't determine priority: #{fow[parts][0]}, #{cc[0]}\n"
when 1
$stderr.print "canceling a replace: #{cc[0]}, #{parts}, #{weak[cc[0]][1]}\n"
parts0 = weak[cc[0]][1].dup
parts = parts0.collect {|i| i[1] }
when 2
cc2 = fow[parts]
fow[parts] = cc
cc = cc2
parts02 = rev2.delete(cc)
rev2[fow[parts]] = parts0
parts0 = parts02
$stderr.print "canceling a replace: #{cc[0]}, #{parts}, #{weak[cc[0]][1]}\n"
parts0 = weak[cc[0]][1].dup
parts = parts0.collect {|i| i[1] }
end
end
if fow.has_key?(parts)
# $stderr.puts [cc, parts].inspect, (order_weak.include?(fow[parts][0]) ? 2 : 0)+(order_weak.include?(cc[0]) ? 1 : 0) if cc[0] =~ /[壌壤]/
case (order_weak.include?(fow[parts][0]) ? 2 : 0)+(order_weak.include?(cc[0]) ? 1 : 0)
when 0
$stderr.print "undefined order-priority: #{fow[parts][0]}, #{cc[0]}\n"
when 3
$stderr.print "can't determine order-priority: #{fow[parts][0]}, #{cc[0]}\n"
when 1
parts0 = parts0.dup
parts0[0..1] = [parts0[1], parts0[0]]
parts = parts0.collect {|i| i[1] }
when 2
cc2 = fow[parts]
fow[parts] = cc
cc = cc2
parts02 = rev2.delete(cc)
rev2[fow[parts]] = parts0
parts0 = parts02
parts0 = parts0.dup
parts0[0..1] = [parts0[1], parts0[0]]
parts = parts0.collect {|i| i[1] }
end
end
if fow.has_key?(parts)
$stderr.print "too crowded: #{fow[parts][0]}, #{cc[0]}\n"
end
fow[parts] = cc
rev2[cc] = parts0
}
rev2 = rev2.collect {|cc, parts|
[cc[0]] + parts.collect {|i| i[1] } + [cc[1]] + parts.collect {|i| i[0] }
}
rev2.sort! {|i, j| i[0] <=> j[0] }
rev2.each {|i|
if opt =~ /^--to-rev/
puts i[0...i.length/2].to_s
elsif opt == '--to-thunder'
next if i[0] == '裁'
puts i[0, 3].to_s + i[i.length/2, 3].to_s
end
}
elsif opt == '--to-clwfk' || opt == '--to-chise'
$renh0 = {}
$renh1 = {}
fp = open('cidbushu.alias.by_pos')
while fp.gets
chop!
next if $_[0..0] == '#'
r = $_.split(/\s+/)
break if r.length < 3
case r[0]
when '.'
$renh0[r[1]] = r[2]
when /\$$/
$renh1[[r[0][0..-2], r[1]]] = r[2]
else
end
end
fp.close
while gets
chop!
next if $_ =~ /^\s*\#/
gsub!(/\#.*$/, '')
gsub!(/\s*$/, '')
r = $_.split(/\s+/)
l = r[0]
l = l.split(/\,/)
l.delete_if {|i| i =~ /^\d{4,}$/ }
composed = l[0].gsub(/\@90JIS$/, '')
composed = $renh0[composed] if $renh0.has_key?(composed)
r.shift
r.each_index {|i|
parts = []
if r[i][0..0] == 'L'
next
end
l = r[i].split(/\,/)
c = l[0]
next if l.length <= 1
if l[1] =~ /^[\=\/\<\>\|]$/
next
end
l.shift
l = l.collect {|j| j.gsub(/\((.*)\)$/) { $1 } } #remove parenthesis
#
def renparts(nd, pt = [])
if nd.class == String
pt = pt.join
if $renh1.has_key?([pt[-1..-1], nd])
nd.replace($renh1[[pt[-1..-1], nd]])
elsif $renh0.has_key?(nd)
nd.replace($renh0[nd])
end
return
end
nd0 = to_oi(nd[0])
renparts(nd[1], pt + [nd0[0..0]])
renparts(nd[2], pt + [nd0[1..1]])
end
#
l = treeize(l)
m = renparts(l)
c.gsub!(/\.\d\d/, '')
if opt == '--to-clwfk'
def to_paren(nd)
return nd if nd.class == String
oh = {'lr' => 'yoko', 'ud' => 'tate', 'oi' => 'kamae',
'ku'=>'kamae', 'mo'=>'kamae', 'ka'=>'kamae', 'ha'=>'kamae',
'ta'=>'tare', 'tu'=>'kamae', 'ny'=>'nyou'}
return "(#{(oh[nd[0]] || nd[0])} #{to_paren(nd[1])} #{to_paren(nd[2])})"
end
print "(setq #{composed} '#{to_paren(l)})\n"
end
}
end
elsif opt == '--to-chise' && nil #v1
while gets
chop!
next if $_ =~ /^\s*\#/
gsub!(/\#.*$/, '')
gsub!(/\s*$/, '')
r = $_.split(/\s+/)
l = r[0]
l = l.split(/\,/)
l.delete_if {|i| i =~ /^\d{4,}$/ }
l[1..-1].each {|i|
ali[i] = l[0].gsub(/\@90JIS$/, '')
}
composed = l[0].gsub(/\@90JIS$/, '')
r.shift
i = 0
parts = []
fparen = false
while i < r.length
if r[i] =~ /^([a-z\?]|\Sc)$/
if r[i] == 'l'
i += 2
next
elsif r[i] == '!c' || r[i] =~ /^\Sc$/
i += 1
next
end
c = r[i]
else
if r[i] =~ /^[\=\/\<\>\|]/
i += 1
next
end
l = r[i]
fparen = true if l =~ /\((.*)\)$/
l = ' ' + l if l !~ /^[a-z\+\-]/
l = [l[0..0], l[1..-1]]
parts << l
if (i == r.length-1 || r[i+1] =~ /^([a-z\?]|\Sc)$/) && parts.length > 0
if c !~ /^[ab]$/ || fparen
parts = []
fparen = false
i += 1
next
end
if rev.has_key?([composed, c])
$stderr.print "repeated composition definition:\n"
$stderr.print "#{[composed, c].inspect} => #{rev[[composed, c]].inspect}\n#{$_}\n"
i += 1
next
end
rev[[composed, c]] = parts
parts = []
fparen = false
end
end
i += 1
end
end
def xx(c1,c2); "U+2FFB(#{c1},#{c2})"; end
def lr(c1,c2); "U+2FF0(#{c1},#{c2})"; end
def ud(c1,c2); "U+2FF1(#{c1},#{c2})"; end
def oi(c1,c2,c0)
if c0 =~ /^()$/
elsif c0 =~ /^(广|厂|やまいだれ|尸|在の外|虍|广廿|厂林|厂イ|雁たれ)$/
"U+2FF8(#{c1},#{c2})"
elsif c0 =~ /^(囗四)$/
"U+2FF4(#{c1},#{c2})"
elsif c0 =~ /^(門|冂|岡ひく山|風|微かまえ|戊|戌)$/
"U+2FF5(#{c1},#{c2})"
elsif c0 =~ /^(勹|裁かまえ|武ひく止|)$/
"U+2FF9(#{c1},#{c2})"
elsif c0 =~ /^(匚|匸)$/
"U+2FF7(#{c1},#{c2})"
elsif c0 =~ /^(凵)$/
"U+2FF6(#{c1},#{c2})"
elsif c0 =~ /^(之|え|廴|走|夂)$/
"U+2FFA(#{c1},#{c2})"
elsif c0 =~ /^(行|衣)$/
#U+2FF3
#o->u+d, cf.'udi'
"U+2FFB(#{c1},#{c2})"
end
end
rev2 = []
rev.each {|cc, parts|
parts.sort! {|i, j| %w(l r u d o i p s m x + -).index(i[0]) <=> %w(l r u d o i p s m x + -).index(j[0]) }
case parts.collect {|i| i[0]}.to_s
when '+-'
next
when 'xx'
rev2 << [cc, xx(parts[0][1],parts[1][1])]
when 'mm'
rev2 << [cc, xx(parts[0][1],parts[1][1])]
when 'lr'
rev2 << [cc, lr(parts[0][1],parts[1][1])]
when 'ud'
rev2 << [cc, ud(parts[0][1],parts[1][1])]
when 'oi'
rev2 << [cc, oi(parts[0][1],parts[1][1], cc[0])]
when 'lu'
rev2 << [cc, "U+2FF9(#{parts[1][1]},#{parts[0][1]})"]
#u->l2+r, lr(ud(l2,l),r) or
#u->u2+d, ud(u2,lr(l,d))
when 'ld'
#
when 'ru'
rev2 << [cc, "U+2FF8(#{parts[1][1]},#{parts[0][1]})"]
when 'rd'
rev2 << [cc, "U+2FFA(#{parts[1][1]},#{parts[0][1]})"]
when 'lri'
rev2 << [cc, "U+2FF2(#{parts[0][1]},#{parts[2][1]},#{parts[1][1]})"]
when 'udi'
rev2 << [cc, "U+2FF3(#{parts[0][1]},#{parts[2][1]},#{parts[1][1]})"]
when 'lru'
rev2 << [cc, ud(parts[2][1], lr(parts[0][1], parts[1][1]))]
when 'lrd'
rev2 << [cc, ud(lr(parts[0][1], parts[1][1]), parts[2][1])]
when 'udo'
rev2 << [cc, oi(parts[2][1], ud(parts[0][1], parts[1][1]))]
when 'uo'
#o->o2+i, oi(o2,ud(u,i))
raise notImplementedError, "#{parts.collect {|i| i[0]}.to_s}"
when 'do'
raise notImplementedError, "#{parts.collect {|i| i[0]}.to_s}"
else
raise notImplementedError, "#{parts.collect {|i| i[0]}.to_s}"
end
}
elsif opt == '--for-tex'
#http://psitau.at.infoseek.co.jp/otf.html
#http://oku.edu.mie-u.ac.jp/~okumura/texwiki/?OTF
puts '\documentclass{jarticle}'
puts '\usepackage{utf}'
puts '\begin{document}'
while gets
chop!
gsub!(/\s\#$/, '')
gsub!(/ /, ' ')
gsub!(/[\#$%&_{}^~<>\\|]/) {|i| i =~ /[<>\\]/ ? '\verb|'+i+'|' : i == '|' ? '\textbar ' : '\\'+i }
gsub!(/^\d{4,}/) { '{\small '+$&+'}\CID{'+$&+'}' }
gsub!(/U\+([0-9A-F]{4})/) { $&+'\UTF{'+$1+'}' }
puts $_
puts
end
puts '\end{document}'
elsif false
itaiji = {}
fp = open('./emacs/tcode/itaiji.maz')
while fp.gets
chop!
i = split(/\s/)
itaiji[i[0]] = i[1]
end
fp.close
while gets
chop!
if $_ =~ /^\s*\#/
puts $_
next
end
r = $_.split(/\s+/)
l = r[0]
l = l.split(/\,/)
l.delete_if {|i| i =~ /^\d{4,}$/ }
c = l[0].gsub(/\@90JIS$/, '')
if itaiji[c]
$_ << " #c =#{itaiji[$1]} "
end
puts $_
end
elsif false
#ftp://ftp.oreilly.com/pub/examples/nutshell/cjkv/adobe/
while gets
chop!
next if $_ =~ /^\#/
next if $_ =~ /^CID/
codes = $_.split(/\t/)
cid = codes[0].to_i
c90jis = codes[1].hex
c78jis = codes[13].hex
next if c78jis == 0 && c90jis == 0
w = ' '
w[0] = (c90jis>>8)+0x80
w[1] = (c90jis&0xff)+0x80
c90jis = w
w = ' '
w[0] = (c78jis>>8)+0x80
w[1] = (c78jis&0xff)+0x80
c78jis = w
if c90jis == c78jis
printf "%-8d\#%s\n", cid, c90jis
else
l = []
l << c90jis+"@90JIS" if c90jis.length > 0
l << c78jis+"@78JIS" if c78jis.length > 0
printf "%-8d#%s\n", cid, l.join(",")
end
end
else
usage()
end ####