Merge branch 'tmp'

This commit is contained in:
zhanyun 2016-11-18 13:50:12 +08:00
commit 165f6ff28a
2 changed files with 180 additions and 180 deletions

View File

@ -1,124 +1,124 @@
package com.ossean;
import java.util.List;
import javax.annotation.Resource;
import org.apache.log4j.Logger;
import org.springframework.beans.factory.annotation.Autowired;
import org.springframework.beans.factory.annotation.Qualifier;
import org.springframework.context.ApplicationContext;
import org.springframework.context.support.ClassPathXmlApplicationContext;
import org.springframework.stereotype.Component;
import com.ossean.databaseSource.DBSource;
import com.ossean.databaseSource.GatherDao;
import com.ossean.databaseSource.UpdateControlProjectsDao;
import com.ossean.model.GatherProjectsModel;
import com.ossean.util.MergeProjectNew;
import com.ossean.util.MergeProjectsUtil;
@Component
public class MergeProjectsOld {
Logger logger = Logger.getLogger(this.getClass());
@Resource
private DBSource dbSource;
@Resource
private GatherDao gatherDao;
@Resource
private UpdateControlProjectsDao updateControlDao;
@Qualifier("mergeProjectNew")
@Autowired
private MergeProjectNew mergeProjectNew;
//private MergeProjectsUtil mergeProjectsUtil;
private int startId1;
private static String pointerTableName = "edd_pointers_test";
private static String sourceTableName = "gather_projects_test";
private static String targetTableName = "edd_relations_test1";
private static int batchSize = 500;
//读指针
public int readPointer(String table, String source, String target, int minId){
int pointer = minId;
try {
pointer = dbSource.getPointer(table, source, target);
} catch(Exception e) {
logger.info("No such pointer! Create one");
dbSource.insertPointer(table, source, target, pointer);
}
return pointer;
}
public void start(String edd_relations_pointer){
int minId = 1;
int maxId = Integer.MAX_VALUE;
if(edd_relations_pointer.equals("edd_relations")){
minId = 1;
maxId = 200000;
}else if(edd_relations_pointer.equals("edd_relations_pointer2")){
minId = 200001;
maxId = 400000;
}else if(edd_relations_pointer.equals("edd_relations_pointer3")){
minId = 400001;
maxId = 600000;
}else if(edd_relations_pointer.equals("edd_relations_pointer4")){
minId = 600001;
maxId = 800000;
}else if(edd_relations_pointer.equals("edd_relations_pointer_others")){
minId = 800001;
}else{}
while(true){
startId1 = readPointer(pointerTableName, sourceTableName, edd_relations_pointer, minId);
List<GatherProjectsModel> gpmList1 = gatherDao.selectGPMList(sourceTableName, batchSize);
if(gpmList1.size() == 0){
try {
logger.info("no increase item to be handled! handle update items");
//查看有没有更新的记录需要处理 通过gather_projects表中的update_mark字段进行判断 为1表示更新了数据
// List<UpdateControlProjects> updateProjectList = updateControlDao.findAllItems(updateControlProjectsTableName);
//不考虑更新了 湛云没有使用update_control_projects表控制更新
// if(updateProjectList.size() != 0){
// //表示有需要更新的项目
// for(UpdateControlProjects updateProject:updateProjectList){
// //重新处理更新项目
// GatherProjectsModel model = gatherDao.selectGPMById(sourceTableName, updateProject.getId());
// mergeProjectsUtil.handleNewProject(model, true);
// updateControlDao.deleteOneItem(updateControlProjectsTableName, updateProject.getId());
// }
// }
List<GatherProjectsModel> updateProjectList = gatherDao.selectUpdateProjects(sourceTableName);
for(GatherProjectsModel model:updateProjectList){
mergeProjectNew.handleNewProject(model, true);
gatherDao.updateUpdateMark(sourceTableName, 1, model.getId());
}
logger.info("all update items have been solved. Sleep 1h");
Thread.sleep(3600*1000);
continue;
} catch (InterruptedException e) {
// TODO Auto-generated catch block
e.printStackTrace();
}
}
for(GatherProjectsModel model1:gpmList1){
logger.info("handling project : " + model1.getId());
// List<GatherProjectsModel> sameProjects = gatherDao.selectGPMByUrlMD5(sourceTableName, model1.getHomepageMD5());
mergeProjectNew.handleNewProject(model1, false);
//如果是对更新表中的数据进行操作 则不更新指针
dbSource.updatePointer(pointerTableName, sourceTableName, edd_relations_pointer, model1.getId() + 1);
gatherDao.updateUpdateMark(sourceTableName, 1, model1.getId()); //需要更新update_mark字段 因为按顺序取的时候可能取到1的
}
}
}
public static void main(String[] args){
ApplicationContext applicationContext = new ClassPathXmlApplicationContext("classpath:/applicationContext*.xml");
MergeProjectsOld Main = applicationContext.getBean(MergeProjectsOld.class);
Main.start(args[0]);
}
}
package com.ossean;
import java.util.List;
import javax.annotation.Resource;
import org.apache.log4j.Logger;
import org.springframework.beans.factory.annotation.Autowired;
import org.springframework.beans.factory.annotation.Qualifier;
import org.springframework.context.ApplicationContext;
import org.springframework.context.support.ClassPathXmlApplicationContext;
import org.springframework.stereotype.Component;
import com.ossean.databaseSource.DBSource;
import com.ossean.databaseSource.GatherDao;
import com.ossean.databaseSource.UpdateControlProjectsDao;
import com.ossean.model.GatherProjectsModel;
import com.ossean.util.MergeProjectNew;
import com.ossean.util.MergeProjectsUtil;
@Component
public class MergeProjectsOld {
Logger logger = Logger.getLogger(this.getClass());
@Resource
private DBSource dbSource;
@Resource
private GatherDao gatherDao;
@Resource
private UpdateControlProjectsDao updateControlDao;
@Qualifier("mergeProjectNew")
@Autowired
private MergeProjectNew mergeProjectNew;
//private MergeProjectsUtil mergeProjectsUtil;
private int startId1;
private static String pointerTableName = "edd_pointers_test";
private static String sourceTableName = "gather_projects_test";
private static String targetTableName = "edd_relations_test1";
private static int batchSize = 500;
//读指针
public int readPointer(String table, String source, String target, int minId){
int pointer = minId;
try {
pointer = dbSource.getPointer(table, source, target);
} catch(Exception e) {
logger.info("No such pointer! Create one");
dbSource.insertPointer(table, source, target, pointer);
}
return pointer;
}
public void start(String edd_relations_pointer){
int minId = 1;
int maxId = Integer.MAX_VALUE;
if(edd_relations_pointer.equals("edd_relations")){
minId = 1;
maxId = 200000;
}else if(edd_relations_pointer.equals("edd_relations_pointer2")){
minId = 200001;
maxId = 400000;
}else if(edd_relations_pointer.equals("edd_relations_pointer3")){
minId = 400001;
maxId = 600000;
}else if(edd_relations_pointer.equals("edd_relations_pointer4")){
minId = 600001;
maxId = 800000;
}else if(edd_relations_pointer.equals("edd_relations_pointer_others")){
minId = 800001;
}else{}
while(true){
startId1 = readPointer(pointerTableName, sourceTableName, edd_relations_pointer, minId);
List<GatherProjectsModel> gpmList1 = gatherDao.selectGPMList(sourceTableName, batchSize);
if(gpmList1.size() == 0){
try {
logger.info("no increase item to be handled! handle update items");
//查看有没有更新的记录需要处理 通过gather_projects表中的update_mark字段进行判断 为1表示更新了数据
// List<UpdateControlProjects> updateProjectList = updateControlDao.findAllItems(updateControlProjectsTableName);
//不考虑更新了 湛云没有使用update_control_projects表控制更新
// if(updateProjectList.size() != 0){
// //表示有需要更新的项目
// for(UpdateControlProjects updateProject:updateProjectList){
// //重新处理更新项目
// GatherProjectsModel model = gatherDao.selectGPMById(sourceTableName, updateProject.getId());
// mergeProjectsUtil.handleNewProject(model, true);
// updateControlDao.deleteOneItem(updateControlProjectsTableName, updateProject.getId());
// }
// }
List<GatherProjectsModel> updateProjectList = gatherDao.selectUpdateProjects(sourceTableName,batchSize);
for(GatherProjectsModel model:updateProjectList){
mergeProjectNew.handleNewProject(model, true);
gatherDao.updateUpdateMark(sourceTableName, 1, model.getId());
}
logger.info("all update items have been solved. Sleep 1h");
Thread.sleep(3600*1000);
continue;
} catch (InterruptedException e) {
// TODO Auto-generated catch block
e.printStackTrace();
}
}
for(GatherProjectsModel model1:gpmList1){
logger.info("handling project : " + model1.getId());
// List<GatherProjectsModel> sameProjects = gatherDao.selectGPMByUrlMD5(sourceTableName, model1.getHomepageMD5());
mergeProjectNew.handleNewProject(model1, false);
//如果是对更新表中的数据进行操作 则不更新指针
dbSource.updatePointer(pointerTableName, sourceTableName, edd_relations_pointer, model1.getId() + 1);
gatherDao.updateUpdateMark(sourceTableName, 1, model1.getId()); //需要更新update_mark字段 因为按顺序取的时候可能取到1的
}
}
}
public static void main(String[] args){
ApplicationContext applicationContext = new ClassPathXmlApplicationContext("classpath:/applicationContext*.xml");
MergeProjectsOld Main = applicationContext.getBean(MergeProjectsOld.class);
Main.start(args[0]);
}
}

View File

@ -1,57 +1,57 @@
package com.ossean.databaseDest;
import org.apache.ibatis.annotations.Delete;
import org.apache.ibatis.annotations.Insert;
import org.apache.ibatis.annotations.Param;
import org.apache.ibatis.annotations.Select;
import com.ossean.model.OpenSourceProject;
import com.ossean.model.Taggings;
public interface DBDest {
//删除open_source_projects表中对应id的数据
@Delete("delete from ${table} where id=#{id}")
public void deleteOpenSourceProjectsItem(@Param("table") String table, @Param("id") int id);
//查找open_source_projects表对应id的记录
@Select("select * from ${table} where id=#{id}")
public OpenSourceProject selectOpenSourceProjectsItem(@Param("table") String table, @Param("id") int id);
/**
* 下面是transferProjects程序的函数
*/
//向open_source_projects表中插入对象数据
@Insert("insert into ${table} (id,name,description,"
+ "url,language,category,"
+ "source,created_time,updated_time,"
+ "tags,tags_for_search,synonyms,update_mark) values (#{model.id},#{model.name},#{model.description},"
+ "#{model.url},,#{model.language},"
+ "#{model.category},#{model.source},"
+ "#{model.created_time},#{model.updated_time},#{model.tags},"
+ "#{model.tags_for_search},#{model.synonyms},#{model.update_mark})")
public void insertOsp(@Param("table") String table, @Param("model") OpenSourceProject model);
//查找刚刚插入open_source_projects表中的记录id
@Select("select id from ${table} where name=#{model.name} order by id desc limit 1")
public int getAutoIncrementOspId(@Param("table") String table, @Param("model") OpenSourceProject model);
//插入tag
@Insert("insert ignore into ${table} (name) values (#{name})")
public void insertTag(@Param("table") String table, @Param("name") String name);
//根据tag name查找id
@Select("select id from ${table} where name=#{name}")
public int selectTagIdByName(@Param("table") String table, @Param("name") String name);
//插入tagging
@Insert("insert ignore into ${table} (tag_id,taggable_id,taggable_type,context,created_at,disagree_num) values (#{model.tag_id},#{model.taggable_id},#{model.taggable_type},#{model.context},#{model.created_at},#{model.disagree_num})")
public void insertTagging(@Param("table") String table, @Param("model") Taggings model);
//删除对应ospId的匹配结果
@Delete("delete from ${table} where osp_id = ${ospId}")
public void deleteMatchResult(@Param("ospId") int ospId, @Param("table") String table);
package com.ossean.databaseDest;
import org.apache.ibatis.annotations.Delete;
import org.apache.ibatis.annotations.Insert;
import org.apache.ibatis.annotations.Param;
import org.apache.ibatis.annotations.Select;
import com.ossean.model.OpenSourceProject;
import com.ossean.model.Taggings;
public interface DBDest {
//删除open_source_projects表中对应id的数据
@Delete("delete from ${table} where id=#{id}")
public void deleteOpenSourceProjectsItem(@Param("table") String table, @Param("id") int id);
//查找open_source_projects表对应id的记录
@Select("select * from ${table} where id=#{id}")
public OpenSourceProject selectOpenSourceProjectsItem(@Param("table") String table, @Param("id") int id);
/**
* 下面是transferProjects程序的函数
*/
//向open_source_projects表中插入对象数据
@Insert("insert into ${table} (id,name,description,"
+ "url,url_md5,language,category,homepage,license,"
+ "source,created_time,updated_time,extracted_time,"
+ "tags,tags_for_search,synonyms,update_mark) values (#{model.id},#{model.name},#{model.description},"
+ "#{model.url},#{model.url_md5},#{model.language},"
+ "#{model.category},#{model.homepage},#{model.license},#{model.source},"
+ "#{model.created_time},#{model.updated_time},#{model.extracted_time},#{model.tags},"
+ "#{model.tags_for_search},#{model.synonyms},#{model.update_mark})")
public void insertOsp(@Param("table") String table, @Param("model") OpenSourceProject model);
//查找刚刚插入open_source_projects表中的记录id
@Select("select id from ${table} where name=#{model.name} order by id desc limit 1")
public int getAutoIncrementOspId(@Param("table") String table, @Param("model") OpenSourceProject model);
//插入tag
@Insert("insert ignore into ${table} (name) values (#{name})")
public void insertTag(@Param("table") String table, @Param("name") String name);
//根据tag name查找id
@Select("select id from ${table} where name=#{name}")
public int selectTagIdByName(@Param("table") String table, @Param("name") String name);
//插入tagging
@Insert("insert ignore into ${table} (tag_id,taggable_id,taggable_type,context,created_at,disagree_num) values (#{model.tag_id},#{model.taggable_id},#{model.taggable_type},#{model.context},#{model.created_at},#{model.disagree_num})")
public void insertTagging(@Param("table") String table, @Param("model") Taggings model);
//删除对应ospId的匹配结果
@Delete("delete from ${table} where osp_id = ${ospId}")
public void deleteMatchResult(@Param("ospId") int ospId, @Param("table") String table);
}