ossean/trustie2/lib/tasks/sync_osp_status.rake

145 lines
4.8 KiB
Ruby
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

namespace :sync_osp_status do
desc 'sync osp composite_score & view_num'
task :start => :environment do
#if File.exist?(pid_path)
# existing_pid = IO.read(pid_path).to_i
# begin
# Process.kill(0, existing_pid)
# raise(AlreadyRunningError, "Server is already running with PID #{existing_pid}")
# rescue Errno::ESRCH
# STDERR.puts("Removing stale PID file at #{pid_path}")
# FileUtils.rm(pid_path)
# end
#end
# run
# sync_composite_score
sync_popularity_and_composite_score
#Process.fork do
# pid = fork do
# Process.setsid
# STDIN.reopen('/dev/null')
# STDOUT.reopen('/dev/null')
# STDERR.reopen(STDOUT)
# run
# end
# FileUtils.mkdir_p(pid_dir)
# File.open(pid_path, 'w') do |file|
# file << pid
# end
#end
end
def sync_popularity_and_composite_score
batch_size = 1000
sleep_time = 86400
while true
cur_id = 0
begin
max_id = OpenSourceProject.maximum(:id)
puts("max: "+max_id.to_s)
while cur_id < max_id do
begin
OpenSourceProject.where("id > #{cur_id} AND id < #{cur_id+batch_size}").each do |osp|
# rs=[osp_id, view_num, count]
rs = RelativeMemoToOpenSourceProject.set_table_name(RelativeMemoToOpenSourceProject.getTableName(osp.id)).where(osp_id: osp.id).select("osp_id, SUM(view_num_crawled) AS total, COUNT(*) AS num")
unless rs.first.nil?
osp.update_attribute(:view_num, rs.first.total)
# osp.update_attribute(:composite_score, rs.first.num)
osp.update_attribute(:relative_memos_num, rs.first.num)
end
popularities = RelativeMemoToOpenSourceProject.set_table_name(RelativeMemoToOpenSourceProject.getTableName(osp.id)).where(osp_id: osp.id).group("YEAR(created_time),MONTH(created_time)").select("osp_id, YEAR(created_time) AS year, MONTH(created_time) AS month, COUNT(*) AS num")
popularities.each do |popularity|
unless popularity.year.nil?||popularity.month.nil?
year = popularity.year
month = popularity.month
num =popularity.num
if !OpenSourceProjectPopularity.where(osp_id: osp.id, year_col: year, month_col: month).exists?
OpenSourceProjectPopularity.create!(osp_id: osp.id, year_col: year, month_col: month, popularity: num)
else
OpenSourceProjectPopularity.where(osp_id: osp.id, year_col: year, month_col: month).first.update_attribute(:popularity, num)
end
end
end
cur_id=osp.id
puts("OpenSourceProject: "+cur_id.to_s)
end
rescue Exception => e
puts e
end
end
rescue Exception => e
puts e
end
puts 'sleep for '+sleep_time.to_s+' ms!!!'
sleep(86400)
#end
end
end
def sync_composite_score
rm_osp_count = Hash.new
osp_id_set = Set.new
range = 1..70
base_table='relative_memo_to_open_source_projects_'
range.each do |i|
####计算一段时间内的帖子新匹配多少项目以及查询项目集合
table = base_table+i.to_s
rm_records = RelativeMemoToOpenSourceProject.set_table_name(table).group(:relative_memo_id).select([:relative_memo_id, 'COUNT(*) AS num'])
osp_records = RelativeMemoToOpenSourceProject.set_table_name(table).group(:osp_id).select(:osp_id)
rm_records.each do |record|
######## rm_osp_count_local中没有则添加有则更新
rm_osp_count[record.relative_memo_id] = ((rm_osp_count.key? record.relative_memo_id) ? (rm_osp_count[record.relative_memo_id]+record.num) : record.num)
end
osp_records.each do |record|
osp_id_set.add record.osp_id
end
puts i
end
osp_id_set.each do |osp_id|
osp = OpenSourceProject.find(osp_id)
composite_score =calc_composite_score(rm_osp_count, osp_id_set, osp)
puts composite_score
osp.update_attribute(:composite_score, composite_score)
end
end
def calc_composite_score hash, set, osp
score = 0
rm_ids=RelativeMemoToOpenSourceProject.set_table_name(RelativeMemoToOpenSourceProject.getTableName(osp.id)).where(osp_id: osp.id).select(:relative_memo_id)
rm_ids.each do |id|
#puts id.relative_memo_id
#puts 1/hash[id.relative_memo_id].to_f
#puts hash.key? id.relative_memo_id
score+=((hash.key? id.relative_memo_id) ? 1/hash[id.relative_memo_id].to_f : 0)
#puts score
end
score
end
def pid_path
File.join(pid_dir, pid_file)
end
def pid_file
@pid_file || 'sync_osp_status.pid'
end
def pid_dir
File.expand_path(@pid_dir || FileUtils.pwd)
end
end