Ruby解析加工Word文档的表格

和非格式化文本打交道需要相当强大的心脏,顺便抱怨一下歪果仁写的文档也有乱七八糟的,而且是随商业产品发布的。特此记录备忘。

#encoding:utf-8

require 'win32ole'

WIN32OLE.codepage = WIN32OLE::CP_UTF8

FilePath='plugindoc.docx'
FuncTypeOne = {
    'Plug-In Primary functions'=>[],
    'Plug-In Event functions'=>[],
    'Plug-In naming functions'=>[],
    'Plug-In External FileSystem functions'=>[],
    'Plug-In Export functions'=>[],
    'Plug-In Callback function'=>[],
}

FuncTypeTwo = {
    'SYSTEM Info functions'=>[],
    'IDE functions'=>[],
    'External FileSystem functions'=>[],
    'Connection functions'=>[],
    'SQL functions'=>[]
}

class TypeBase
  def self.parseDcl(s)
    s = s.inspect.to_s.gsub(/\\r/,' ').gsub(/\\t/,' ').gsub(/\\a/,'')
    pattern1 = s.gsub(/.*Delphi[ \t]+(function[^\)]+\)[ \t]*:[ \t]+[^ \t]+).*/,'\1')
    pattern2 = s.gsub(/.*Delphi[ \t]+(procedure[^\)]+\)).*/,'\1')
    pattern3 = s.gsub(/.*Delphi[ \t]+(procedure[ \t]+[0-9a-zA-Z_]+).*/,'\1')
    pattern4 = s.gsub(/.*Delphi[ \t]+(function[ \t]+[0-9a-zA-Z_]+[ \t]*:[ \t]+[^ \t]+).*/,'\1')
    pattern5 = s.gsub(/.*Delphi[ \t]+([^\)]+\)[ \t]*:[ \t]+[^ \t]+).*/,'\1')
    pattern6 = s.gsub(/.*Delphi[ \t]+([^:]+[ \t]*:[ \t]+[^ \t]+).*/,'\1')
    if pattern1 != s
      return pattern1
    else
      if pattern2 != s
        return pattern2
      else
        if pattern3 != s
          return pattern3
        else
          if pattern4 != s
            return pattern4
          else
            if pattern5 != s
              return 'function ' + pattern5
            else
              if pattern6 != s
                return 'function '+pattern6
              end
            end
          end
        end
      end
    end
    return 'Error...'
  end

  def self.parseFunc(s)
    return s.inspect.to_s.gsub(/[ \t]+([^ \t]+).*/,'\1')
  end

  def self.parseDesc(s)
    s.inspect.to_s
  end
end

class TypeOne < TypeBase
  attr_accessor :func,:dcl,:desc,:orign
  def initialize(table,i)
    s1 = table.Cell(i,1).Range.Text
    s2 = table.Cell(i,2).Range.Text
    @func = TypeBase.parseFunc(s1)
    @dcl = TypeBase.parseDcl(s2)
    @desc = TypeBase.parseDesc(s2)
    @orign = s2
  end
end

class TypeTwo < TypeBase
  attr_accessor :id,:func,:dcl,:desc,:orign
  def initialize(table,i)
    s1 = table.Cell(i,1).Range.Text
    s2 = table.Cell(i,2).Range.Text
    s3 = table.Cell(i,3).Range.Text
    @id = s1.inspect.to_s.gsub(/\\r\\a/,'')
    @func = TypeBase.parseFunc(s2)
    @dcl = TypeBase.parseDcl(s3)
    @desc = TypeBase.parseDesc(s3)
    @orign = s3
  end
end

def pushObject(o,fireFuncGroup)
  if FuncTypeOne.has_key?(fireFuncGroup)
    FuncTypeOne[fireFuncGroup] << o
  end
  if FuncTypeTwo.has_key?(fireFuncGroup)
    FuncTypeTwo[fireFuncGroup] << o
  end
end

def parseWordFile
  app=nil
  fireFuncGroup = ''
  begin
    app = WIN32OLE.new( 'Word.Application' )
    doc  = app.Documents.Open( FilePath )
    doc.Tables.each do |table|
      1.upto table.Rows.Count do |i|
        onlyTest = false
        FuncTypeOne.keys.each do |k|
          if table.Cell(i,1).Range.Text.to_s == "#{k}\r\a"
            fireFuncGroup = k
            onlyTest = true
            break
          end
        end
        FuncTypeTwo.keys.each do |k|
          if table.Cell(i,1).Range.Text.to_s == "#{k}\r\a"
            fireFuncGroup = k
            onlyTest = true
            break
          end
        end
        next if onlyTest
        if FuncTypeOne.has_key? fireFuncGroup
          pushObject TypeOne.new(table,i),fireFuncGroup
        end
        if FuncTypeTwo.has_key? fireFuncGroup
          pushObject TypeTwo.new(table,i),fireFuncGroup
        end
      end
    end
  ensure
    app.quit unless app.nil?
  end
end

#Execute
parseWordFile
FuncTypeOne.each do |k,v|
  puts k,v.size
  v.each do |o|
    puts '---' + o.dcl
  end
end
FuncTypeTwo.each do |k,v|
  puts k,v.size
  v.each do |o|
    puts '---' + o.dcl
  end
end

FuncTypeOne.each do |k,v|
  puts '{'+"#{k}"+'}'
  v.each do |item|
    next if item.dcl == 'Error...'
    puts <<EOF
    
#{item.dcl.end_with?(';') ? item.dcl+'cdecl;' : item.dcl+';cdecl;'}
begin
end;
EOF
  end
end

puts 'exports'
FuncTypeOne.each do |k,v|
  v.each do |item|
    puts item.func.gsub(/"([a-zA-Z0-9_]+)\\r.*/,'\1')+','
  end
end

puts '-'*70
FuncTypeTwo.each do |k,v|
  puts '{'+k+'}'
  v.each do |item|
    name=item.func.gsub(/"([a-zA-Z0-9_]+)\\r.*/,'\1')
    puts name+':'+item.dcl.gsub(name,'')+';cdecl;'
  end
  end
puts '-'*70
  FuncTypeTwo.each do |k,v|
    puts '{'+k+'}'
    v.each do |item|
      name=item.func.gsub(/"([a-zA-Z0-9_]+)\\r.*/,'\1')
      puts "#{item.id} : @#{name} := Addr;"
    end
  end

 

  • 0
    点赞
  • 0
    收藏
    觉得还不错? 一键收藏
  • 0
    评论
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值