145 lines
4.8 KiB
Ruby
145 lines
4.8 KiB
Ruby
namespace :sync_osp_status do
|
||
desc 'sync osp composite_score & view_num'
|
||
task :start => :environment do
|
||
|
||
#if File.exist?(pid_path)
|
||
# existing_pid = IO.read(pid_path).to_i
|
||
# begin
|
||
# Process.kill(0, existing_pid)
|
||
# raise(AlreadyRunningError, "Server is already running with PID #{existing_pid}")
|
||
# rescue Errno::ESRCH
|
||
# STDERR.puts("Removing stale PID file at #{pid_path}")
|
||
# FileUtils.rm(pid_path)
|
||
# end
|
||
#end
|
||
|
||
|
||
# run
|
||
# sync_composite_score
|
||
sync_popularity_and_composite_score
|
||
#Process.fork do
|
||
# pid = fork do
|
||
# Process.setsid
|
||
# STDIN.reopen('/dev/null')
|
||
# STDOUT.reopen('/dev/null')
|
||
# STDERR.reopen(STDOUT)
|
||
# run
|
||
# end
|
||
# FileUtils.mkdir_p(pid_dir)
|
||
# File.open(pid_path, 'w') do |file|
|
||
# file << pid
|
||
# end
|
||
#end
|
||
|
||
end
|
||
|
||
def sync_popularity_and_composite_score
|
||
batch_size = 1000
|
||
sleep_time = 86400
|
||
|
||
while true
|
||
cur_id = 0
|
||
begin
|
||
max_id = OpenSourceProject.maximum(:id)
|
||
puts("max: "+max_id.to_s)
|
||
while cur_id < max_id do
|
||
begin
|
||
OpenSourceProject.where("id > #{cur_id} AND id < #{cur_id+batch_size}").each do |osp|
|
||
|
||
# rs=[osp_id, view_num, count]
|
||
rs = RelativeMemoToOpenSourceProject.set_table_name(RelativeMemoToOpenSourceProject.getTableName(osp.id)).where(osp_id: osp.id).select("osp_id, SUM(view_num_crawled) AS total, COUNT(*) AS num")
|
||
unless rs.first.nil?
|
||
osp.update_attribute(:view_num, rs.first.total)
|
||
# osp.update_attribute(:composite_score, rs.first.num)
|
||
osp.update_attribute(:relative_memos_num, rs.first.num)
|
||
end
|
||
popularities = RelativeMemoToOpenSourceProject.set_table_name(RelativeMemoToOpenSourceProject.getTableName(osp.id)).where(osp_id: osp.id).group("YEAR(created_time),MONTH(created_time)").select("osp_id, YEAR(created_time) AS year, MONTH(created_time) AS month, COUNT(*) AS num")
|
||
popularities.each do |popularity|
|
||
unless popularity.year.nil?||popularity.month.nil?
|
||
year = popularity.year
|
||
month = popularity.month
|
||
num =popularity.num
|
||
if !OpenSourceProjectPopularity.where(osp_id: osp.id, year_col: year, month_col: month).exists?
|
||
OpenSourceProjectPopularity.create!(osp_id: osp.id, year_col: year, month_col: month, popularity: num)
|
||
else
|
||
OpenSourceProjectPopularity.where(osp_id: osp.id, year_col: year, month_col: month).first.update_attribute(:popularity, num)
|
||
end
|
||
end
|
||
end
|
||
cur_id=osp.id
|
||
puts("OpenSourceProject: "+cur_id.to_s)
|
||
end
|
||
rescue Exception => e
|
||
puts e
|
||
end
|
||
end
|
||
|
||
rescue Exception => e
|
||
puts e
|
||
end
|
||
|
||
puts 'sleep for '+sleep_time.to_s+' ms!!!'
|
||
sleep(86400)
|
||
|
||
#end
|
||
end
|
||
end
|
||
|
||
def sync_composite_score
|
||
rm_osp_count = Hash.new
|
||
osp_id_set = Set.new
|
||
|
||
range = 1..70
|
||
base_table='relative_memo_to_open_source_projects_'
|
||
range.each do |i|
|
||
|
||
####计算一段时间内的帖子新匹配多少项目以及查询项目集合
|
||
table = base_table+i.to_s
|
||
rm_records = RelativeMemoToOpenSourceProject.set_table_name(table).group(:relative_memo_id).select([:relative_memo_id, 'COUNT(*) AS num'])
|
||
osp_records = RelativeMemoToOpenSourceProject.set_table_name(table).group(:osp_id).select(:osp_id)
|
||
rm_records.each do |record|
|
||
######## rm_osp_count_local中没有则添加,有则更新
|
||
rm_osp_count[record.relative_memo_id] = ((rm_osp_count.key? record.relative_memo_id) ? (rm_osp_count[record.relative_memo_id]+record.num) : record.num)
|
||
end
|
||
|
||
osp_records.each do |record|
|
||
osp_id_set.add record.osp_id
|
||
end
|
||
|
||
puts i
|
||
end
|
||
|
||
osp_id_set.each do |osp_id|
|
||
osp = OpenSourceProject.find(osp_id)
|
||
composite_score =calc_composite_score(rm_osp_count, osp_id_set, osp)
|
||
puts composite_score
|
||
osp.update_attribute(:composite_score, composite_score)
|
||
end
|
||
end
|
||
|
||
def calc_composite_score hash, set, osp
|
||
score = 0
|
||
rm_ids=RelativeMemoToOpenSourceProject.set_table_name(RelativeMemoToOpenSourceProject.getTableName(osp.id)).where(osp_id: osp.id).select(:relative_memo_id)
|
||
rm_ids.each do |id|
|
||
#puts id.relative_memo_id
|
||
#puts 1/hash[id.relative_memo_id].to_f
|
||
#puts hash.key? id.relative_memo_id
|
||
score+=((hash.key? id.relative_memo_id) ? 1/hash[id.relative_memo_id].to_f : 0)
|
||
#puts score
|
||
end
|
||
score
|
||
end
|
||
|
||
def pid_path
|
||
File.join(pid_dir, pid_file)
|
||
end
|
||
|
||
def pid_file
|
||
@pid_file || 'sync_osp_status.pid'
|
||
end
|
||
|
||
def pid_dir
|
||
File.expand_path(@pid_dir || FileUtils.pwd)
|
||
end
|
||
end
|