require 'time'

# ======= HOW TO USE: ======
# ruby ttml-to-vvt.rb <inputfile.ttml> <output>
# ex:
#   input: "ruby ttml-to-vvt.rb vadirect.ttml vadirect"
#   outputs: vadirect.vvt

def format_time(time)
  t = time.to_f
  return Time.at(t).utc.strftime("%H:%M:%S.%L")
end

def convert_to_vvt(filename, start_time, end_time, text)
  header="WEBVTT\n\n"
  content = ""
  i = 0
  while i < start_time.length
    content << "#{i+1}\n"
    content << "#{format_time(start_time[i])} --> #{format_time(end_time[i])}\n"
    content << "#{text[i]}\n\n"
    i = i+1
  end
  ret_str = header << content
  output = filename + ".vvt"
  File.open(output, 'wb') { |file| file.write(ret_str) }
end

def parse(filename)
  start_time = Array.new
  end_time = Array.new
  text = Array.new


  f = File.open(filename, "r")
  i = 0
  f.each_line do |line|

    regexp = /<p begin="(\d+.\d+)s"\s*end="(\d+.\d+)s">([^<]*)/

    line.gsub!(/<br\s*\/>/, "\n")
    line.gsub!(/\\'92/, "'")
    line.gsub!(/\\'93/, '"')
    line.gsub!(/\\'94/, '"')
    line.gsub!(/\\'96/, '')
    m = regexp.match(line)
    if !m.nil?
      start_time << m[1]
      end_time << m[2]
      text << m[3]
    end
    i = i + 1
  end
  f.close

  return start_time, end_time, text;
end

# ======== MAIN ======== #

if ARGV.size != 2
  puts "incorrect arguments. run: 'ruby ttml-to-vvt.rb <input.ttml> <output>'"
else
  filename = ARGV[0]
  output = ARGV[1]
  start_time, end_time, text = parse(filename)
  convert_to_vvt(output, start_time, end_time, text)
end
