#!/usr/bin/env ruby ARCHIVE_DIR = "/var/lib/mailman/archives/private/" NEW_MESSAGE_IDS_FILE = "new_message_ids.yml" require 'mbox' require 'fileutils' require 'digest/sha1' require 'yaml' # keep the original content for metadata and content # (quick and ugly way) class Mbox class Mail def self.parse (input, options = {}) metadata = '' headers = Mbox::Mail::Headers.new content = '' next until input.eof? || (line = input.readline).match(options[:separator]) return if !line || line.empty? # metadata parsing metadata << line until input.eof? || (line = input.readline).match(options[:separator]) break unless line.match(/^>+/) metadata << line end metadata.chomp! # headers parsing current = '' begin break if line.strip.empty? current << line end until input.eof? || (line = input.readline).match(options[:separator]) headers.parse(current) # content parsing current = '' until input.eof? || (line = input.readline).match(options[:separator]) next if options[:headers_only] current << line end unless options[:headers_only] content = current.chomp end # put the last separator back in its place if !input.eof? && line input.seek(-line.length, IO::SEEK_CUR) end Mail.new(metadata, headers, content) end end end def fix_mbox(mbox, new_message_ids) filename = File.join(ARCHIVE_DIR, "#{mbox}.mbox", "#{mbox}.mbox") return unless File.exists? filename puts "Opening mbox '#{mbox}'" fixed_needed = false fixed_filename = filename + ".fixed" backup_filename = filename + ".bak" # only work with the original file if File.exists? backup_filename FileUtils.rm(filename) FileUtils.mv(backup_filename, filename) end File.open(fixed_filename, "w") do |fixed_mbox| Mbox.open(filename).each do |mail| mail_raw = mail.metadata.to_s + "\n" + mail.headers.to_s + "\n" + mail.content.to_s + "\n" if mail.headers['Message-Id'].nil? hash = Digest::SHA1.hexdigest(mail_raw) n_m_i = new_message_ids[hash] || generate_message_id(mail) mail.headers['Message-Id'] = n_m_i # recalculate mail_raw after change mail_raw = mail.metadata.to_s + "\n" + mail.headers.to_s + "\n" + mail.content.to_s + "\n" new_message_ids[hash] = n_m_i fixed_needed = true end fixed_mbox << mail_raw end end if fixed_needed # replace original file for import but keep original safe aside FileUtils.mv(filename, backup_filename) FileUtils.mv(fixed_filename, filename) puts " FIXED" else # useless, no fix was done FileUtils.rm(fixed_filename) end end # inspired from ruby-mail library def generate_message_id(mail) t = Time.now random_tag = sprintf('%x%x_%x%x%d%x', t.to_i, t.tv_usec, $$, Thread.current.object_id.abs, mail.object_id, rand(255)) "<#{random_tag}@fixed.lists.ovirt.org.mail>" end new_message_ids = {} if File.exists? NEW_MESSAGE_IDS_FILE new_message_ids = YAML.load_file(NEW_MESSAGE_IDS_FILE) end Dir.new(ARCHIVE_DIR).each do |dir| fix_mbox($1, new_message_ids) if dir =~ /^(.*)\.mbox$/ end puts "new_message_ids: #{new_message_ids.size}" File.open(NEW_MESSAGE_IDS_FILE, "w") do |file| file.write new_message_ids.to_yaml end