gather_program bugs
This commit is contained in:
parent
91d339344e
commit
a0d3875a7c
|
@ -5,9 +5,9 @@
|
|||
<entry key="pointerTableName">pointers</entry>
|
||||
<entry key="sourceTableName">oschina_project,openhub_project,sourceforge_project,apache,freecode_project</entry>
|
||||
<entry key="targetTableName">gather_projects</entry>
|
||||
<entry key="sourceFields">id,name,tags,url,url_md5,description,language,source,license,homepage,now(),extracted_time,category,created_time</entry>
|
||||
<entry key="sourceFields">id,name,tags,url,url_md5,description,language,source,license,homepage,now(),extracted_time,created_time</entry>
|
||||
<!-- <entry key="targetFields">url,crawled_time,tags,license,name,description,language,platform,source,registered_time,urlMD5</entry> -->
|
||||
<entry key="targetFields">id,name,tags,url,url_md5,description,language,source,license,homepage,updated_time,extracted_time,category,created_time</entry>
|
||||
<entry key="targetFields">id,name,tags,url,url_md5,description,language,source,license,homepage,updated_time,extracted_time,created_time</entry>
|
||||
<entry key="waitDataTime">3600000</entry>
|
||||
<entry key="andWhere"> </entry>
|
||||
<entry key="idsBegin">1</entry>
|
||||
|
|
|
@ -5,9 +5,9 @@
|
|||
<entry key="pointerTableName">pointers</entry>
|
||||
<entry key="sourceTableName">job_requirements</entry>
|
||||
<entry key="targetTableName">job_requirements</entry>
|
||||
<entry key="sourceFields">id,title,content,created_time,type,tags,url,url_md5,author,author_url,category,view_num,review_num,extracted_time,source,similar_position,work_place,experience,salary,scale,domain,finance,style,education</entry>
|
||||
<entry key="sourceFields">id,title,content,created_time,type,tags,url,url_md5,author,author_url,view_num,review_num,extracted_time,source,similar_position,work_place,experience,salary,scale,domain,finance,style,education</entry>
|
||||
<!-- <entry key="targetFields">url,crawled_time,tags,license,name,description,language,platform,source,registered_time,urlMD5</entry> -->
|
||||
<entry key="targetFields">id,title,content,created_time,type,tags,url,url_md5,author,author_url,category,view_num,review_num,extracted_time,source,similar_position,work_place,experience,salary,scale,domain,finance,style,education</entry>
|
||||
<entry key="targetFields">id,title,content,created_time,type,tags,url,url_md5,author,author_url,view_num,review_num,extracted_time,source,similar_position,work_place,experience,salary,scale,domain,finance,style,education</entry>
|
||||
<entry key="waitDataTime">3600000</entry>
|
||||
<entry key="andWhere"> </entry>
|
||||
<entry key="idsBegin">1</entry>
|
||||
|
|
|
@ -17,7 +17,6 @@ public class GatherProject {
|
|||
private String homepage;
|
||||
private String updated_time;
|
||||
private String extracted_time;
|
||||
private String category;
|
||||
private int composite_score;
|
||||
private int relative_memos_num;
|
||||
private String created_time;
|
||||
|
@ -111,12 +110,6 @@ public class GatherProject {
|
|||
public void setExtracted_time(String extracted_time) {
|
||||
this.extracted_time = extracted_time;
|
||||
}
|
||||
public String getCategory() {
|
||||
return category;
|
||||
}
|
||||
public void setCategory(String category) {
|
||||
this.category = category;
|
||||
}
|
||||
public int getComposite_score() {
|
||||
return composite_score;
|
||||
}
|
||||
|
|
|
@ -11,7 +11,6 @@ public class JobRequirement {
|
|||
private String url_md5 = null;
|
||||
private String author = null;
|
||||
private String author_url = null;
|
||||
private String category = null;
|
||||
private int view_num = 0;
|
||||
private int review_num = 0;
|
||||
private String extracted_time = null;
|
||||
|
@ -106,14 +105,6 @@ public class JobRequirement {
|
|||
this.author_url = author_url;
|
||||
}
|
||||
|
||||
public String getCategory() {
|
||||
return category;
|
||||
}
|
||||
|
||||
public void setCategory(String category) {
|
||||
this.category = category;
|
||||
}
|
||||
|
||||
public int getView_num() {
|
||||
return view_num;
|
||||
}
|
||||
|
|
|
@ -134,7 +134,7 @@ public class GatherThread implements Runnable {
|
|||
String urlMD5 = model.getUrl_md5();// 通过urlMD5判断是不是已经存在该帖子
|
||||
// 是否更新
|
||||
int postId = 0;
|
||||
RelativeMemo samePost = gatherDao.findPostByUrlMD5(
|
||||
RelativeMemo samePost = targetDao.findPostByUrlMD5(
|
||||
conf.getTargetTableName(), urlMD5);
|
||||
if (samePost == null) {
|
||||
// 不存在 插入
|
||||
|
@ -199,10 +199,10 @@ public class GatherThread implements Runnable {
|
|||
conf.getAndWhere());
|
||||
for (int i = 0; i < dataGet.size(); i++) {
|
||||
GatherProject model = dataGet.get(i);
|
||||
String urlMD5 = model.getUrl_md5();// 通过urlMD5判断是不是已经存在该帖子
|
||||
String urlMD5 = model.getUrl_md5();// 通过urlMD5判断是不是已经存在该项目
|
||||
// 是否更新
|
||||
int prjId = 0;
|
||||
GatherProject samePrj = gatherDao.findPrjByUrlMD5(
|
||||
GatherProject samePrj = targetDao.findPrjByUrlMD5(
|
||||
conf.getTargetTableName(), urlMD5);
|
||||
if (samePrj == null) {
|
||||
// 不存在 插入
|
||||
|
@ -270,7 +270,7 @@ public class GatherThread implements Runnable {
|
|||
String urlMD5 = model.getUrl_md5();// 通过urlMD5判断是不是已经存在该帖子
|
||||
// 是否更新
|
||||
int postId = 0;
|
||||
JobRequirement sameJob = gatherDao.findJobByUrlMD5(
|
||||
JobRequirement sameJob = targetDao.findJobByUrlMD5(
|
||||
conf.getTargetTableName(), urlMD5);
|
||||
if (sameJob == null) {
|
||||
// 不存在 插入
|
||||
|
|
|
@ -75,21 +75,6 @@ public interface GatherDao {
|
|||
@Param("sourceTableName") String sourceTableName,
|
||||
@Param("targetTableName") String targetTableName);
|
||||
|
||||
// 根据urlMD5码找到RelativeMemo对象
|
||||
@Select("select * from ${table} where url_md5=#{urlMD5}")
|
||||
public RelativeMemo findPostByUrlMD5(@Param("table") String table,
|
||||
@Param("urlMD5") String urlMD5);
|
||||
|
||||
// 根据urlMD5码找到OpenSourceProject对象
|
||||
@Select("select * from ${table} where url_md5=#{urlMD5}")
|
||||
public GatherProject findPrjByUrlMD5(@Param("table") String table,
|
||||
@Param("urlMD5") String urlMD5);
|
||||
|
||||
// 根据urlMD5码找到JobRequirement对象
|
||||
@Select("select * from ${table} where url_md5=#{urlMD5}")
|
||||
public JobRequirement findJobByUrlMD5(@Param("table") String table,
|
||||
@Param("urlMD5") String urlMD5);
|
||||
|
||||
// 批量获得JobRequirement
|
||||
@Select("SELECT * from job_requirements WHERE id > #{id} limit #{batchSize}")
|
||||
public List<JobRequirement> getJobRequirementList(@Param("id") int id,
|
||||
|
|
|
@ -64,7 +64,7 @@ public interface TargetDao {
|
|||
|
||||
// 将获取到得数据源数据插入目标表中
|
||||
@Insert("INSERT INTO ${targetTable} (${targetFields}) VALUES (#{model.id},#{model.name},#{model.tags},#{model.url},#{model.url_md5},"
|
||||
+ "#{model.description},#{model.language},#{model.source},#{model.license},#{model.homepage},#{model.updated_time},#{model.extracted_time},#{model.category},#{model.created_time})")
|
||||
+ "#{model.description},#{model.language},#{model.source},#{model.license},#{model.homepage},#{model.updated_time},#{model.extracted_time},#{model.created_time})")
|
||||
public int insertOpenSourceProject(
|
||||
@Param("targetTable") String targetTableName,
|
||||
@Param("targetFields") String targetFields,
|
||||
|
@ -73,14 +73,14 @@ public interface TargetDao {
|
|||
// 对urlMD5相同的数据进行update操作
|
||||
@Update("update ${targetTable} set id=#{model.id},name=#{model.name},tags=#{model.tags},url=#{model.url},url_md5=#{model.url_md5},"
|
||||
+ "description=#{model.description},language=#{model.language},source=#{model.source},license=#{model.license},homepage=#{model.homepage},updated_time=#{model.updated_time},extracted_time=#{model.extracted_time},"
|
||||
+ "category=#{model.category},created_time=#{model.created_time} where id=#{id}")
|
||||
+ "created_time=#{model.created_time} where id=#{id}")
|
||||
public void updateOpenSourceProject(
|
||||
@Param("targetTable") String targetTableName,
|
||||
@Param("model") GatherProject model, @Param("id") int id);
|
||||
|
||||
// 将获取到得数据源数据插入目标表中
|
||||
@Insert("INSERT INTO ${targetTable} (${targetFields}) VALUES (#{model.id},#{model.title},#{model.content},#{model.created_time},#{model.type},"
|
||||
+ "#{model.tags},#{model.url},#{model.url_md5},#{model.author},#{model.author_url},#{model.category},#{model.view_num},#{model.review_num},#{model.extracted_time}"
|
||||
+ "#{model.tags},#{model.url},#{model.url_md5},#{model.author},#{model.author_url},#{model.view_num},#{model.review_num},#{model.extracted_time}"
|
||||
+ ",#{model.source},#{model.similar_position},#{model.work_place},#{model.experience},#{model.salary},#{model.scale},#{model.domain}"
|
||||
+ ",#{model.finance},#{model.style},#{model.education})")
|
||||
public int insertJobRequirement(
|
||||
|
@ -90,7 +90,7 @@ public interface TargetDao {
|
|||
|
||||
// 对urlMD5相同的数据进行update操作
|
||||
@Update("update ${targetTable} set id=#{model.id},title=#{model.title},content=#{model.content},created_time=#{model.created_time},type=#{model.type},"
|
||||
+ "tags=#{model.tags},url=#{model.url},url_md5=#{model.url_md5},author=#{model.author},author_url=#{model.author_url},category=#{model.category},view_num=#{model.view_num},"
|
||||
+ "tags=#{model.tags},url=#{model.url},url_md5=#{model.url_md5},author=#{model.author},author_url=#{model.author_url},view_num=#{model.view_num},"
|
||||
+ "review_num=#{model.review_num},extracted_time=#{model.extracted_time},source=#{model.source},similar_position=#{model.similar_position}"
|
||||
+ ",work_place=#{model.work_place},experience=#{model.experience},salary=#{model.salary},scale=#{model.scale},domain=#{model.domain}"
|
||||
+ ",finance=#{model.finance},style=#{model.style},education=#{model.education} where id=#{id}")
|
||||
|
@ -103,4 +103,19 @@ public interface TargetDao {
|
|||
public int getAutoIncrementOspId(@Param("table") String table,
|
||||
@Param("model") GatherProject model);
|
||||
|
||||
// 根据urlMD5码找到RelativeMemo对象
|
||||
@Select("select * from ${table} where url_md5=#{urlMD5}")
|
||||
public RelativeMemo findPostByUrlMD5(@Param("table") String table,
|
||||
@Param("urlMD5") String urlMD5);
|
||||
|
||||
// 根据urlMD5码找到OpenSourceProject对象
|
||||
@Select("select * from ${table} where url_md5=#{urlMD5}")
|
||||
public GatherProject findPrjByUrlMD5(@Param("table") String table,
|
||||
@Param("urlMD5") String urlMD5);
|
||||
|
||||
// 根据urlMD5码找到JobRequirement对象
|
||||
@Select("select * from ${table} where url_md5=#{urlMD5}")
|
||||
public JobRequirement findJobByUrlMD5(@Param("table") String table,
|
||||
@Param("urlMD5") String urlMD5);
|
||||
|
||||
}
|
||||
|
|
|
@ -5,9 +5,9 @@
|
|||
<entry key="pointerTableName">pointers</entry>
|
||||
<entry key="sourceTableName">oschina_project,openhub_project</entry>
|
||||
<entry key="targetTableName">gather_projects</entry>
|
||||
<entry key="sourceFields">id,name,tags,url,url_md5,description,language,source,license,homepage,now(),extracted_time,category,created_time</entry>
|
||||
<entry key="sourceFields">id,name,tags,url,url_md5,description,language,source,license,homepage,now(),extracted_time,created_time</entry>
|
||||
<!-- <entry key="targetFields">url,crawled_time,tags,license,name,description,language,platform,source,registered_time,urlMD5</entry> -->
|
||||
<entry key="targetFields">id,name,tags,url,url_md5,description,language,source,license,homepage,updated_time,extracted_time,category,created_time</entry>
|
||||
<entry key="targetFields">id,name,tags,url,url_md5,description,language,source,license,homepage,updated_time,extracted_time,created_time</entry>
|
||||
<entry key="waitDataTime">3600000</entry>
|
||||
<entry key="andWhere"> </entry>
|
||||
<entry key="idsBegin">1</entry>
|
||||
|
|
|
@ -5,9 +5,9 @@
|
|||
<entry key="pointerTableName">pointers</entry>
|
||||
<entry key="sourceTableName">job_requirements</entry>
|
||||
<entry key="targetTableName">job_requirements_new</entry>
|
||||
<entry key="sourceFields">id,title,content,created_time,type,tags,url,url_md5,author,author_url,category,view_num,review_num,extracted_time,source,similar_position,work_place,experience,salary,scale,domain,finance,style,education</entry>
|
||||
<entry key="sourceFields">id,title,content,created_time,type,tags,url,url_md5,author,author_url,view_num,review_num,extracted_time,source,similar_position,work_place,experience,salary,scale,domain,finance,style,education</entry>
|
||||
<!-- <entry key="targetFields">url,crawled_time,tags,license,name,description,language,platform,source,registered_time,urlMD5</entry> -->
|
||||
<entry key="targetFields">id,title,content,created_time,type,tags,url,url_md5,author,author_url,category,view_num,review_num,extracted_time,source,similar_position,work_place,experience,salary,scale,domain,finance,style,education</entry>
|
||||
<entry key="targetFields">id,title,content,created_time,type,tags,url,url_md5,author,author_url,view_num,review_num,extracted_time,source,similar_position,work_place,experience,salary,scale,domain,finance,style,education</entry>
|
||||
<entry key="waitDataTime">3600000</entry>
|
||||
<entry key="andWhere"> </entry>
|
||||
<entry key="idsBegin">1</entry>
|
||||
|
|
Loading…
Reference in New Issue