diff --git a/project_match/src/main/java/com/ossean/MergeProjectsOld.java b/project_match/src/main/java/com/ossean/MergeProjectsOld.java index 2454c60..2a81ffa 100644 --- a/project_match/src/main/java/com/ossean/MergeProjectsOld.java +++ b/project_match/src/main/java/com/ossean/MergeProjectsOld.java @@ -1,124 +1,124 @@ -package com.ossean; - -import java.util.List; - -import javax.annotation.Resource; - -import org.apache.log4j.Logger; -import org.springframework.beans.factory.annotation.Autowired; -import org.springframework.beans.factory.annotation.Qualifier; -import org.springframework.context.ApplicationContext; -import org.springframework.context.support.ClassPathXmlApplicationContext; -import org.springframework.stereotype.Component; - -import com.ossean.databaseSource.DBSource; -import com.ossean.databaseSource.GatherDao; -import com.ossean.databaseSource.UpdateControlProjectsDao; -import com.ossean.model.GatherProjectsModel; -import com.ossean.util.MergeProjectNew; -import com.ossean.util.MergeProjectsUtil; - -@Component -public class MergeProjectsOld { - Logger logger = Logger.getLogger(this.getClass()); - @Resource - private DBSource dbSource; - @Resource - private GatherDao gatherDao; - @Resource - private UpdateControlProjectsDao updateControlDao; - - @Qualifier("mergeProjectNew") - @Autowired - private MergeProjectNew mergeProjectNew; - //private MergeProjectsUtil mergeProjectsUtil; - - private int startId1; - - private static String pointerTableName = "edd_pointers_test"; - private static String sourceTableName = "gather_projects_test"; - private static String targetTableName = "edd_relations_test1"; - - private static int batchSize = 500; - - //读指针 - public int readPointer(String table, String source, String target, int minId){ - int pointer = minId; - try { - pointer = dbSource.getPointer(table, source, target); - } catch(Exception e) { - logger.info("No such pointer! Create one"); - dbSource.insertPointer(table, source, target, pointer); - } - return pointer; - } - - public void start(String edd_relations_pointer){ - int minId = 1; - int maxId = Integer.MAX_VALUE; - if(edd_relations_pointer.equals("edd_relations")){ - minId = 1; - maxId = 200000; - }else if(edd_relations_pointer.equals("edd_relations_pointer2")){ - minId = 200001; - maxId = 400000; - }else if(edd_relations_pointer.equals("edd_relations_pointer3")){ - minId = 400001; - maxId = 600000; - }else if(edd_relations_pointer.equals("edd_relations_pointer4")){ - minId = 600001; - maxId = 800000; - }else if(edd_relations_pointer.equals("edd_relations_pointer_others")){ - minId = 800001; - }else{} - while(true){ - startId1 = readPointer(pointerTableName, sourceTableName, edd_relations_pointer, minId); - List gpmList1 = gatherDao.selectGPMList(sourceTableName, batchSize); - if(gpmList1.size() == 0){ - try { - logger.info("no increase item to be handled! handle update items"); - //查看有没有更新的记录需要处理 通过gather_projects表中的update_mark字段进行判断 为1表示更新了数据 -// List updateProjectList = updateControlDao.findAllItems(updateControlProjectsTableName); - //不考虑更新了 湛云没有使用update_control_projects表控制更新 -// if(updateProjectList.size() != 0){ -// //表示有需要更新的项目 -// for(UpdateControlProjects updateProject:updateProjectList){ -// //重新处理更新项目 -// GatherProjectsModel model = gatherDao.selectGPMById(sourceTableName, updateProject.getId()); -// mergeProjectsUtil.handleNewProject(model, true); -// updateControlDao.deleteOneItem(updateControlProjectsTableName, updateProject.getId()); -// } -// } - List updateProjectList = gatherDao.selectUpdateProjects(sourceTableName); - for(GatherProjectsModel model:updateProjectList){ - mergeProjectNew.handleNewProject(model, true); - gatherDao.updateUpdateMark(sourceTableName, 1, model.getId()); - } - logger.info("all update items have been solved. Sleep 1h"); - Thread.sleep(3600*1000); - continue; - } catch (InterruptedException e) { - // TODO Auto-generated catch block - e.printStackTrace(); - } - } - for(GatherProjectsModel model1:gpmList1){ - logger.info("handling project : " + model1.getId()); -// List sameProjects = gatherDao.selectGPMByUrlMD5(sourceTableName, model1.getHomepageMD5()); - - mergeProjectNew.handleNewProject(model1, false); - - //如果是对更新表中的数据进行操作 则不更新指针 - dbSource.updatePointer(pointerTableName, sourceTableName, edd_relations_pointer, model1.getId() + 1); - gatherDao.updateUpdateMark(sourceTableName, 1, model1.getId()); //需要更新update_mark字段 因为按顺序取的时候可能取到1的 - } - } - } - - - public static void main(String[] args){ - ApplicationContext applicationContext = new ClassPathXmlApplicationContext("classpath:/applicationContext*.xml"); - MergeProjectsOld Main = applicationContext.getBean(MergeProjectsOld.class); - Main.start(args[0]); - } -} +package com.ossean; + +import java.util.List; + +import javax.annotation.Resource; + +import org.apache.log4j.Logger; +import org.springframework.beans.factory.annotation.Autowired; +import org.springframework.beans.factory.annotation.Qualifier; +import org.springframework.context.ApplicationContext; +import org.springframework.context.support.ClassPathXmlApplicationContext; +import org.springframework.stereotype.Component; + +import com.ossean.databaseSource.DBSource; +import com.ossean.databaseSource.GatherDao; +import com.ossean.databaseSource.UpdateControlProjectsDao; +import com.ossean.model.GatherProjectsModel; +import com.ossean.util.MergeProjectNew; +import com.ossean.util.MergeProjectsUtil; + +@Component +public class MergeProjectsOld { + Logger logger = Logger.getLogger(this.getClass()); + @Resource + private DBSource dbSource; + @Resource + private GatherDao gatherDao; + @Resource + private UpdateControlProjectsDao updateControlDao; + + @Qualifier("mergeProjectNew") + @Autowired + private MergeProjectNew mergeProjectNew; + //private MergeProjectsUtil mergeProjectsUtil; + + private int startId1; + + private static String pointerTableName = "edd_pointers_test"; + private static String sourceTableName = "gather_projects_test"; + private static String targetTableName = "edd_relations_test1"; + + private static int batchSize = 500; + + //读指针 + public int readPointer(String table, String source, String target, int minId){ + int pointer = minId; + try { + pointer = dbSource.getPointer(table, source, target); + } catch(Exception e) { + logger.info("No such pointer! Create one"); + dbSource.insertPointer(table, source, target, pointer); + } + return pointer; + } + + public void start(String edd_relations_pointer){ + int minId = 1; + int maxId = Integer.MAX_VALUE; + if(edd_relations_pointer.equals("edd_relations")){ + minId = 1; + maxId = 200000; + }else if(edd_relations_pointer.equals("edd_relations_pointer2")){ + minId = 200001; + maxId = 400000; + }else if(edd_relations_pointer.equals("edd_relations_pointer3")){ + minId = 400001; + maxId = 600000; + }else if(edd_relations_pointer.equals("edd_relations_pointer4")){ + minId = 600001; + maxId = 800000; + }else if(edd_relations_pointer.equals("edd_relations_pointer_others")){ + minId = 800001; + }else{} + while(true){ + startId1 = readPointer(pointerTableName, sourceTableName, edd_relations_pointer, minId); + List gpmList1 = gatherDao.selectGPMList(sourceTableName, batchSize); + if(gpmList1.size() == 0){ + try { + logger.info("no increase item to be handled! handle update items"); + //查看有没有更新的记录需要处理 通过gather_projects表中的update_mark字段进行判断 为1表示更新了数据 +// List updateProjectList = updateControlDao.findAllItems(updateControlProjectsTableName); + //不考虑更新了 湛云没有使用update_control_projects表控制更新 +// if(updateProjectList.size() != 0){ +// //表示有需要更新的项目 +// for(UpdateControlProjects updateProject:updateProjectList){ +// //重新处理更新项目 +// GatherProjectsModel model = gatherDao.selectGPMById(sourceTableName, updateProject.getId()); +// mergeProjectsUtil.handleNewProject(model, true); +// updateControlDao.deleteOneItem(updateControlProjectsTableName, updateProject.getId()); +// } +// } + List updateProjectList = gatherDao.selectUpdateProjects(sourceTableName,batchSize); + for(GatherProjectsModel model:updateProjectList){ + mergeProjectNew.handleNewProject(model, true); + gatherDao.updateUpdateMark(sourceTableName, 1, model.getId()); + } + logger.info("all update items have been solved. Sleep 1h"); + Thread.sleep(3600*1000); + continue; + } catch (InterruptedException e) { + // TODO Auto-generated catch block + e.printStackTrace(); + } + } + for(GatherProjectsModel model1:gpmList1){ + logger.info("handling project : " + model1.getId()); +// List sameProjects = gatherDao.selectGPMByUrlMD5(sourceTableName, model1.getHomepageMD5()); + + mergeProjectNew.handleNewProject(model1, false); + + //如果是对更新表中的数据进行操作 则不更新指针 + dbSource.updatePointer(pointerTableName, sourceTableName, edd_relations_pointer, model1.getId() + 1); + gatherDao.updateUpdateMark(sourceTableName, 1, model1.getId()); //需要更新update_mark字段 因为按顺序取的时候可能取到1的 + } + } + } + + + public static void main(String[] args){ + ApplicationContext applicationContext = new ClassPathXmlApplicationContext("classpath:/applicationContext*.xml"); + MergeProjectsOld Main = applicationContext.getBean(MergeProjectsOld.class); + Main.start(args[0]); + } +} diff --git a/project_match/src/main/java/com/ossean/databaseDest/DBDest.java b/project_match/src/main/java/com/ossean/databaseDest/DBDest.java index d4fa82b..5304e9d 100644 --- a/project_match/src/main/java/com/ossean/databaseDest/DBDest.java +++ b/project_match/src/main/java/com/ossean/databaseDest/DBDest.java @@ -1,57 +1,57 @@ -package com.ossean.databaseDest; - -import org.apache.ibatis.annotations.Delete; -import org.apache.ibatis.annotations.Insert; -import org.apache.ibatis.annotations.Param; -import org.apache.ibatis.annotations.Select; - -import com.ossean.model.OpenSourceProject; -import com.ossean.model.Taggings; - - - -public interface DBDest { - - //删除open_source_projects表中对应id的数据 - @Delete("delete from ${table} where id=#{id}") - public void deleteOpenSourceProjectsItem(@Param("table") String table, @Param("id") int id); - - //查找open_source_projects表对应id的记录 - @Select("select * from ${table} where id=#{id}") - public OpenSourceProject selectOpenSourceProjectsItem(@Param("table") String table, @Param("id") int id); - - - /** - * 下面是transferProjects程序的函数 - */ - //向open_source_projects表中插入对象数据 - @Insert("insert into ${table} (id,name,description," - + "url,language,category," - + "source,created_time,updated_time," - + "tags,tags_for_search,synonyms,update_mark) values (#{model.id},#{model.name},#{model.description}," - + "#{model.url},,#{model.language}," - + "#{model.category},#{model.source}," - + "#{model.created_time},#{model.updated_time},#{model.tags}," - + "#{model.tags_for_search},#{model.synonyms},#{model.update_mark})") - public void insertOsp(@Param("table") String table, @Param("model") OpenSourceProject model); - - //查找刚刚插入open_source_projects表中的记录id - @Select("select id from ${table} where name=#{model.name} order by id desc limit 1") - public int getAutoIncrementOspId(@Param("table") String table, @Param("model") OpenSourceProject model); - - //插入tag - @Insert("insert ignore into ${table} (name) values (#{name})") - public void insertTag(@Param("table") String table, @Param("name") String name); - - //根据tag name查找id - @Select("select id from ${table} where name=#{name}") - public int selectTagIdByName(@Param("table") String table, @Param("name") String name); - - //插入tagging - @Insert("insert ignore into ${table} (tag_id,taggable_id,taggable_type,context,created_at,disagree_num) values (#{model.tag_id},#{model.taggable_id},#{model.taggable_type},#{model.context},#{model.created_at},#{model.disagree_num})") - public void insertTagging(@Param("table") String table, @Param("model") Taggings model); - - //删除对应ospId的匹配结果 - @Delete("delete from ${table} where osp_id = ${ospId}") - public void deleteMatchResult(@Param("ospId") int ospId, @Param("table") String table); +package com.ossean.databaseDest; + +import org.apache.ibatis.annotations.Delete; +import org.apache.ibatis.annotations.Insert; +import org.apache.ibatis.annotations.Param; +import org.apache.ibatis.annotations.Select; + +import com.ossean.model.OpenSourceProject; +import com.ossean.model.Taggings; + + + +public interface DBDest { + + //删除open_source_projects表中对应id的数据 + @Delete("delete from ${table} where id=#{id}") + public void deleteOpenSourceProjectsItem(@Param("table") String table, @Param("id") int id); + + //查找open_source_projects表对应id的记录 + @Select("select * from ${table} where id=#{id}") + public OpenSourceProject selectOpenSourceProjectsItem(@Param("table") String table, @Param("id") int id); + + + /** + * 下面是transferProjects程序的函数 + */ + //向open_source_projects表中插入对象数据 + @Insert("insert into ${table} (id,name,description," + + "url,url_md5,language,category,homepage,license," + + "source,created_time,updated_time,extracted_time," + + "tags,tags_for_search,synonyms,update_mark) values (#{model.id},#{model.name},#{model.description}," + + "#{model.url},#{model.url_md5},#{model.language}," + + "#{model.category},#{model.homepage},#{model.license},#{model.source}," + + "#{model.created_time},#{model.updated_time},#{model.extracted_time},#{model.tags}," + + "#{model.tags_for_search},#{model.synonyms},#{model.update_mark})") + public void insertOsp(@Param("table") String table, @Param("model") OpenSourceProject model); + + //查找刚刚插入open_source_projects表中的记录id + @Select("select id from ${table} where name=#{model.name} order by id desc limit 1") + public int getAutoIncrementOspId(@Param("table") String table, @Param("model") OpenSourceProject model); + + //插入tag + @Insert("insert ignore into ${table} (name) values (#{name})") + public void insertTag(@Param("table") String table, @Param("name") String name); + + //根据tag name查找id + @Select("select id from ${table} where name=#{name}") + public int selectTagIdByName(@Param("table") String table, @Param("name") String name); + + //插入tagging + @Insert("insert ignore into ${table} (tag_id,taggable_id,taggable_type,context,created_at,disagree_num) values (#{model.tag_id},#{model.taggable_id},#{model.taggable_type},#{model.context},#{model.created_at},#{model.disagree_num})") + public void insertTagging(@Param("table") String table, @Param("model") Taggings model); + + //删除对应ospId的匹配结果 + @Delete("delete from ${table} where osp_id = ${ospId}") + public void deleteMatchResult(@Param("ospId") int ospId, @Param("table") String table); } \ No newline at end of file