gather_posts
This commit is contained in:
parent
e5c17cb0ef
commit
f7ac1a3cb2
|
@ -0,0 +1,26 @@
|
|||
<?xml version="1.0" encoding="UTF-8"?>
|
||||
<classpath>
|
||||
<classpathentry kind="src" output="target/classes" path="src/main/java">
|
||||
<attributes>
|
||||
<attribute name="optional" value="true"/>
|
||||
<attribute name="maven.pomderived" value="true"/>
|
||||
</attributes>
|
||||
</classpathentry>
|
||||
<classpathentry kind="src" output="target/test-classes" path="src/test/java">
|
||||
<attributes>
|
||||
<attribute name="optional" value="true"/>
|
||||
<attribute name="maven.pomderived" value="true"/>
|
||||
</attributes>
|
||||
</classpathentry>
|
||||
<classpathentry kind="con" path="org.eclipse.jdt.launching.JRE_CONTAINER/org.eclipse.jdt.internal.debug.ui.launcher.StandardVMType/J2SE-1.5">
|
||||
<attributes>
|
||||
<attribute name="maven.pomderived" value="true"/>
|
||||
</attributes>
|
||||
</classpathentry>
|
||||
<classpathentry kind="con" path="org.eclipse.m2e.MAVEN2_CLASSPATH_CONTAINER">
|
||||
<attributes>
|
||||
<attribute name="maven.pomderived" value="true"/>
|
||||
</attributes>
|
||||
</classpathentry>
|
||||
<classpathentry kind="output" path="target/classes"/>
|
||||
</classpath>
|
|
@ -0,0 +1 @@
|
|||
/target/
|
|
@ -0,0 +1,23 @@
|
|||
<?xml version="1.0" encoding="UTF-8"?>
|
||||
<projectDescription>
|
||||
<name>gather_posts</name>
|
||||
<comment></comment>
|
||||
<projects>
|
||||
</projects>
|
||||
<buildSpec>
|
||||
<buildCommand>
|
||||
<name>org.eclipse.jdt.core.javabuilder</name>
|
||||
<arguments>
|
||||
</arguments>
|
||||
</buildCommand>
|
||||
<buildCommand>
|
||||
<name>org.eclipse.m2e.core.maven2Builder</name>
|
||||
<arguments>
|
||||
</arguments>
|
||||
</buildCommand>
|
||||
</buildSpec>
|
||||
<natures>
|
||||
<nature>org.eclipse.jdt.core.javanature</nature>
|
||||
<nature>org.eclipse.m2e.core.maven2Nature</nature>
|
||||
</natures>
|
||||
</projectDescription>
|
|
@ -0,0 +1,4 @@
|
|||
eclipse.preferences.version=1
|
||||
encoding//src/main/java=UTF-8
|
||||
encoding//src/test/java=UTF-8
|
||||
encoding/<project>=UTF-8
|
|
@ -0,0 +1,5 @@
|
|||
eclipse.preferences.version=1
|
||||
org.eclipse.jdt.core.compiler.codegen.targetPlatform=1.5
|
||||
org.eclipse.jdt.core.compiler.compliance=1.5
|
||||
org.eclipse.jdt.core.compiler.problem.forbiddenReference=warning
|
||||
org.eclipse.jdt.core.compiler.source=1.5
|
|
@ -0,0 +1,4 @@
|
|||
activeProfiles=
|
||||
eclipse.preferences.version=1
|
||||
resolveWorkspaceProjects=true
|
||||
version=1
|
|
@ -0,0 +1,23 @@
|
|||
#!/bin/bash
|
||||
|
||||
task=51cto_blog_to_t_knowledge
|
||||
|
||||
find ./target/classes -name "*.properties"|xargs rm -f
|
||||
find ./target/classes -name "*.xml"|xargs rm -f
|
||||
find ./target/classes -name "*.dic"|xargs rm -f
|
||||
find ./target/classes/spring |xargs rm -f -r
|
||||
#export CLASSPATH=$CURR_DIR/lib:$CURR_DIR:$JAVA_HOME/lib:$JAVA_HOME/jre/lib
|
||||
|
||||
tmp='./bin/resources'
|
||||
tmp='./target/classes':$tmp
|
||||
tmp='./target/tFlow-1.0-SNAPSHOT-jar-with-dependencies-without-resources/*':$tmp
|
||||
|
||||
CLASSPATH=$tmp:$CLASSPATH
|
||||
|
||||
|
||||
echo $CLASSPATH
|
||||
JVM_ARGS="-Xmn48m -Xmx128m -Xms128m -XX:NewRatio=4 -XX:SurvivorRatio=4 -XX:MaxTenuringThreshold=2"
|
||||
#echo JVM_ARGS=$JVM_ARGS
|
||||
#ulimit -n 400000
|
||||
#echo "" > nohup.out
|
||||
java $JVM_ARGS -classpath $CLASSPATH net.trustie.db.TableFlow $task >>log/${task}.log 2>&1 &
|
|
@ -0,0 +1,23 @@
|
|||
#!/bin/bash
|
||||
|
||||
task=cnblog_news_to_t_knowledge
|
||||
|
||||
find ./target/classes -name "*.properties"|xargs rm -f
|
||||
find ./target/classes -name "*.xml"|xargs rm -f
|
||||
find ./target/classes -name "*.dic"|xargs rm -f
|
||||
find ./target/classes/spring |xargs rm -f -r
|
||||
#export CLASSPATH=$CURR_DIR/lib:$CURR_DIR:$JAVA_HOME/lib:$JAVA_HOME/jre/lib
|
||||
|
||||
tmp='./bin/resources'
|
||||
tmp='./target/classes':$tmp
|
||||
tmp='./target/tFlow-1.0-SNAPSHOT-jar-with-dependencies-without-resources/*':$tmp
|
||||
|
||||
CLASSPATH=$tmp:$CLASSPATH
|
||||
|
||||
|
||||
echo $CLASSPATH
|
||||
JVM_ARGS="-Xmn48m -Xmx128m -Xms128m -XX:NewRatio=4 -XX:SurvivorRatio=4 -XX:MaxTenuringThreshold=2"
|
||||
#echo JVM_ARGS=$JVM_ARGS
|
||||
#ulimit -n 400000
|
||||
#echo "" > nohup.out
|
||||
java $JVM_ARGS -classpath $CLASSPATH net.trustie.db.TableFlow $task >>log/${task}.log 2>&1 &
|
|
@ -0,0 +1,24 @@
|
|||
#!/bin/bash
|
||||
|
||||
task=cnblog_question_to_t_knowledge
|
||||
|
||||
find ./target/classes -name "*.properties"|xargs rm -f
|
||||
find ./target/classes -name "*.xml"|xargs rm -f
|
||||
find ./target/classes -name "*.dic"|xargs rm -f
|
||||
find ./target/classes/spring |xargs rm -f -r
|
||||
|
||||
#export CLASSPATH=$CURR_DIR/lib:$CURR_DIR:$JAVA_HOME/lib:$JAVA_HOME/jre/lib
|
||||
|
||||
tmp='./bin/resources'
|
||||
tmp='./target/classes':$tmp
|
||||
tmp='./target/tFlow-1.0-SNAPSHOT-jar-with-dependencies-without-resources/*':$tmp
|
||||
|
||||
CLASSPATH=$tmp:$CLASSPATH
|
||||
|
||||
|
||||
echo $CLASSPATH
|
||||
JVM_ARGS="-Xmn48m -Xmx128m -Xms128m -XX:NewRatio=4 -XX:SurvivorRatio=4 -XX:MaxTenuringThreshold=2"
|
||||
#echo JVM_ARGS=$JVM_ARGS
|
||||
#ulimit -n 400000
|
||||
#echo "" > nohup.out
|
||||
java $JVM_ARGS -classpath $CLASSPATH net.trustie.db.TableFlow $task >>log/${task}.log 2>&1 &
|
|
@ -0,0 +1,24 @@
|
|||
#!/bin/bash
|
||||
|
||||
task=csdn_ask_to_t_knowledge
|
||||
|
||||
find ./target/classes -name "*.properties"|xargs rm -f
|
||||
find ./target/classes -name "*.xml"|xargs rm -f
|
||||
find ./target/classes -name "*.dic"|xargs rm -f
|
||||
find ./target/classes/spring |xargs rm -f -r
|
||||
|
||||
#export CLASSPATH=$CURR_DIR/lib:$CURR_DIR:$JAVA_HOME/lib:$JAVA_HOME/jre/lib
|
||||
|
||||
tmp='./bin/resources'
|
||||
tmp='./target/classes':$tmp
|
||||
tmp='./target/tFlow-1.0-SNAPSHOT-jar-with-dependencies-without-resources/*':$tmp
|
||||
|
||||
CLASSPATH=$tmp:$CLASSPATH
|
||||
|
||||
|
||||
echo $CLASSPATH
|
||||
JVM_ARGS="-Xmn48m -Xmx128m -Xms128m -XX:NewRatio=4 -XX:SurvivorRatio=4 -XX:MaxTenuringThreshold=2"
|
||||
#echo JVM_ARGS=$JVM_ARGS
|
||||
#ulimit -n 400000
|
||||
#echo "" > nohup.out
|
||||
java $JVM_ARGS -classpath $CLASSPATH net.trustie.db.TableFlow $task >>log/${task}.log 2>&1 &
|
|
@ -0,0 +1,24 @@
|
|||
#!/bin/bash
|
||||
|
||||
task=csdn_blogs_to_t_knowledge
|
||||
|
||||
find ./target/classes -name "*.properties"|xargs rm -f
|
||||
find ./target/classes -name "*.xml"|xargs rm -f
|
||||
find ./target/classes -name "*.dic"|xargs rm -f
|
||||
find ./target/classes/spring |xargs rm -f -r
|
||||
|
||||
#export CLASSPATH=$CURR_DIR/lib:$CURR_DIR:$JAVA_HOME/lib:$JAVA_HOME/jre/lib
|
||||
|
||||
tmp='./bin/resources'
|
||||
tmp='./target/classes':$tmp
|
||||
tmp='./target/tFlow-1.0-SNAPSHOT-jar-with-dependencies-without-resources/*':$tmp
|
||||
|
||||
CLASSPATH=$tmp:$CLASSPATH
|
||||
|
||||
|
||||
echo $CLASSPATH
|
||||
JVM_ARGS="-Xmn48m -Xmx128m -Xms128m -XX:NewRatio=4 -XX:SurvivorRatio=4 -XX:MaxTenuringThreshold=2"
|
||||
#echo JVM_ARGS=$JVM_ARGS
|
||||
#ulimit -n 400000
|
||||
#echo "" > nohup.out
|
||||
java $JVM_ARGS -classpath $CLASSPATH net.trustie.db.TableFlow $task >>log/${task}.log 2>&1 &
|
|
@ -0,0 +1,24 @@
|
|||
#!/bin/bash
|
||||
|
||||
task=csdn_topics_to_t_knowledge
|
||||
|
||||
find ./target/classes -name "*.properties"|xargs rm -f
|
||||
find ./target/classes -name "*.xml"|xargs rm -f
|
||||
find ./target/classes -name "*.dic"|xargs rm -f
|
||||
find ./target/classes/spring |xargs rm -f -r
|
||||
|
||||
#export CLASSPATH=$CURR_DIR/lib:$CURR_DIR:$JAVA_HOME/lib:$JAVA_HOME/jre/lib
|
||||
|
||||
tmp='./bin/resources'
|
||||
tmp='./target/classes':$tmp
|
||||
tmp='./target/tFlow-1.0-SNAPSHOT-jar-with-dependencies-without-resources/*':$tmp
|
||||
|
||||
CLASSPATH=$tmp:$CLASSPATH
|
||||
|
||||
|
||||
echo $CLASSPATH
|
||||
JVM_ARGS="-Xmn48m -Xmx128m -Xms128m -XX:NewRatio=4 -XX:SurvivorRatio=4 -XX:MaxTenuringThreshold=2"
|
||||
#echo JVM_ARGS=$JVM_ARGS
|
||||
#ulimit -n 400000
|
||||
#echo "" > nohup.out
|
||||
java $JVM_ARGS -classpath $CLASSPATH net.trustie.db.TableFlow $task >>log/${task}.log 2>&1 &
|
|
@ -0,0 +1,24 @@
|
|||
#!/bin/bash
|
||||
|
||||
task=dewen_question_to_t_knowledge
|
||||
|
||||
find ./target/classes -name "*.properties"|xargs rm -f
|
||||
find ./target/classes -name "*.xml"|xargs rm -f
|
||||
find ./target/classes -name "*.dic"|xargs rm -f
|
||||
find ./target/classes/spring |xargs rm -f -r
|
||||
|
||||
#export CLASSPATH=$CURR_DIR/lib:$CURR_DIR:$JAVA_HOME/lib:$JAVA_HOME/jre/lib
|
||||
|
||||
tmp='./bin/resources'
|
||||
tmp='./target/classes':$tmp
|
||||
tmp='./target/tFlow-1.0-SNAPSHOT-jar-with-dependencies-without-resources/*':$tmp
|
||||
|
||||
CLASSPATH=$tmp:$CLASSPATH
|
||||
|
||||
|
||||
echo $CLASSPATH
|
||||
JVM_ARGS="-Xmn48m -Xmx128m -Xms128m -XX:NewRatio=4 -XX:SurvivorRatio=4 -XX:MaxTenuringThreshold=2"
|
||||
#echo JVM_ARGS=$JVM_ARGS
|
||||
#ulimit -n 400000
|
||||
#echo "" > nohup.out
|
||||
java $JVM_ARGS -classpath $CLASSPATH net.trustie.db.TableFlow $task >>log/${task}.log 2>&1 &
|
|
@ -0,0 +1,24 @@
|
|||
#!/bin/bash
|
||||
|
||||
task=freecode_projects_to_t_project
|
||||
|
||||
find ./target/classes -name "*.properties"|xargs rm -f
|
||||
find ./target/classes -name "*.xml"|xargs rm -f
|
||||
find ./target/classes -name "*.dic"|xargs rm -f
|
||||
find ./target/classes/spring |xargs rm -f -r
|
||||
|
||||
#export CLASSPATH=$CURR_DIR/lib:$CURR_DIR:$JAVA_HOME/lib:$JAVA_HOME/jre/lib
|
||||
|
||||
tmp='./bin/resources'
|
||||
tmp='./target/classes':$tmp
|
||||
tmp='./target/tFlow-1.0-SNAPSHOT-jar-with-dependencies-without-resources/*':$tmp
|
||||
|
||||
CLASSPATH=$tmp:$CLASSPATH
|
||||
|
||||
|
||||
echo $CLASSPATH
|
||||
JVM_ARGS="-Xmn48m -Xmx128m -Xms128m -XX:NewRatio=4 -XX:SurvivorRatio=4 -XX:MaxTenuringThreshold=2"
|
||||
#echo JVM_ARGS=$JVM_ARGS
|
||||
#ulimit -n 400000
|
||||
#echo "" > nohup.out
|
||||
java $JVM_ARGS -classpath $CLASSPATH net.trustie.db.TableFlow $task >>log/${task}.log 2>&1 &
|
|
@ -0,0 +1,24 @@
|
|||
#!/bin/bash
|
||||
|
||||
task=iteye_ask_to_t_knowledge
|
||||
|
||||
find ./target/classes -name "*.properties"|xargs rm -f
|
||||
find ./target/classes -name "*.xml"|xargs rm -f
|
||||
find ./target/classes -name "*.dic"|xargs rm -f
|
||||
find ./target/classes/spring |xargs rm -f -r
|
||||
|
||||
#export CLASSPATH=$CURR_DIR/lib:$CURR_DIR:$JAVA_HOME/lib:$JAVA_HOME/jre/lib
|
||||
|
||||
tmp='./bin/resources'
|
||||
tmp='./target/classes':$tmp
|
||||
tmp='./target/tFlow-1.0-SNAPSHOT-jar-with-dependencies-without-resources/*':$tmp
|
||||
|
||||
CLASSPATH=$tmp:$CLASSPATH
|
||||
|
||||
|
||||
echo $CLASSPATH
|
||||
JVM_ARGS="-Xmn48m -Xmx128m -Xms128m -XX:NewRatio=4 -XX:SurvivorRatio=4 -XX:MaxTenuringThreshold=2"
|
||||
#echo JVM_ARGS=$JVM_ARGS
|
||||
#ulimit -n 400000
|
||||
#echo "" > nohup.out
|
||||
java $JVM_ARGS -classpath $CLASSPATH net.trustie.db.TableFlow $task >>log/${task}.log 2>&1 &
|
|
@ -0,0 +1,23 @@
|
|||
#!/bin/bash
|
||||
|
||||
task=linuxtone_posts_to_t_knowledge
|
||||
|
||||
find ./target/classes -name "*.properties"|xargs rm -f
|
||||
find ./target/classes -name "*.xml"|xargs rm -f
|
||||
find ./target/classes -name "*.dic"|xargs rm -f
|
||||
find ./target/classes/spring |xargs rm -f -r
|
||||
#export CLASSPATH=$CURR_DIR/lib:$CURR_DIR:$JAVA_HOME/lib:$JAVA_HOME/jre/lib
|
||||
|
||||
tmp='./bin/resources'
|
||||
tmp='./target/classes':$tmp
|
||||
tmp='./target/tFlow-1.0-SNAPSHOT-jar-with-dependencies-without-resources/*':$tmp
|
||||
|
||||
CLASSPATH=$tmp:$CLASSPATH
|
||||
|
||||
|
||||
echo $CLASSPATH
|
||||
JVM_ARGS="-Xmn48m -Xmx128m -Xms128m -XX:NewRatio=4 -XX:SurvivorRatio=4 -XX:MaxTenuringThreshold=2"
|
||||
#echo JVM_ARGS=$JVM_ARGS
|
||||
#ulimit -n 400000
|
||||
#echo "" > nohup.out
|
||||
java $JVM_ARGS -classpath $CLASSPATH net.trustie.db.TableFlow $task >>log/${task}.log 2>&1 &
|
|
@ -0,0 +1,24 @@
|
|||
#!/bin/bash
|
||||
|
||||
task=openhub_project_to_t_project
|
||||
|
||||
find ./target/classes -name "*.properties"|xargs rm -f
|
||||
find ./target/classes -name "*.xml"|xargs rm -f
|
||||
find ./target/classes -name "*.dic"|xargs rm -f
|
||||
find ./target/classes/spring |xargs rm -f -r
|
||||
|
||||
#export CLASSPATH=$CURR_DIR/lib:$CURR_DIR:$JAVA_HOME/lib:$JAVA_HOME/jre/lib
|
||||
|
||||
tmp='./bin/resources'
|
||||
tmp='./target/classes':$tmp
|
||||
tmp='./target/tFlow-1.0-SNAPSHOT-jar-with-dependencies-without-resources/*':$tmp
|
||||
|
||||
CLASSPATH=$tmp:$CLASSPATH
|
||||
|
||||
|
||||
echo $CLASSPATH
|
||||
JVM_ARGS="-Xmn48m -Xmx128m -Xms128m -XX:NewRatio=4 -XX:SurvivorRatio=4 -XX:MaxTenuringThreshold=2"
|
||||
#echo JVM_ARGS=$JVM_ARGS
|
||||
#ulimit -n 400000
|
||||
#echo "" > nohup.out
|
||||
java $JVM_ARGS -classpath $CLASSPATH net.trustie.db.TableFlow $task >>log/${task}.log 2>&1 &
|
|
@ -0,0 +1,24 @@
|
|||
#!/bin/bash
|
||||
|
||||
task=oschina_project_to_t_project
|
||||
|
||||
find ./target/classes -name "*.properties"|xargs rm -f
|
||||
find ./target/classes -name "*.xml"|xargs rm -f
|
||||
find ./target/classes -name "*.dic"|xargs rm -f
|
||||
find ./target/classes/spring |xargs rm -f -r
|
||||
|
||||
#export CLASSPATH=$CURR_DIR/lib:$CURR_DIR:$JAVA_HOME/lib:$JAVA_HOME/jre/lib
|
||||
|
||||
tmp='./bin/resources'
|
||||
tmp='./target/classes':$tmp
|
||||
tmp='./target/tFlow-1.0-SNAPSHOT-jar-with-dependencies-without-resources/*':$tmp
|
||||
|
||||
CLASSPATH=$tmp:$CLASSPATH
|
||||
|
||||
|
||||
echo $CLASSPATH
|
||||
JVM_ARGS="-Xmn48m -Xmx128m -Xms128m -XX:NewRatio=4 -XX:SurvivorRatio=4 -XX:MaxTenuringThreshold=2"
|
||||
#echo JVM_ARGS=$JVM_ARGS
|
||||
#ulimit -n 400000
|
||||
#echo "" > nohup.out
|
||||
java $JVM_ARGS -classpath $CLASSPATH net.trustie.db.TableFlow $task >>log/${task}.log 2>&1 &
|
|
@ -0,0 +1,24 @@
|
|||
#!/bin/bash
|
||||
|
||||
task=oschina_question_to_t_knowledge
|
||||
|
||||
find ./target/classes -name "*.properties"|xargs rm -f
|
||||
find ./target/classes -name "*.xml"|xargs rm -f
|
||||
find ./target/classes -name "*.dic"|xargs rm -f
|
||||
find ./target/classes/spring |xargs rm -f -r
|
||||
|
||||
#export CLASSPATH=$CURR_DIR/lib:$CURR_DIR:$JAVA_HOME/lib:$JAVA_HOME/jre/lib
|
||||
|
||||
tmp='./bin/resources'
|
||||
tmp='./target/classes':$tmp
|
||||
tmp='./target/tFlow-1.0-SNAPSHOT-jar-with-dependencies-without-resources/*':$tmp
|
||||
|
||||
CLASSPATH=$tmp:$CLASSPATH
|
||||
|
||||
|
||||
echo $CLASSPATH
|
||||
JVM_ARGS="-Xmn48m -Xmx128m -Xms128m -XX:NewRatio=4 -XX:SurvivorRatio=4 -XX:MaxTenuringThreshold=2"
|
||||
#echo JVM_ARGS=$JVM_ARGS
|
||||
#ulimit -n 400000
|
||||
#echo "" > nohup.out
|
||||
java $JVM_ARGS -classpath $CLASSPATH net.trustie.db.TableFlow $task >>log/${task}.log 2>&1 &
|
|
@ -0,0 +1,23 @@
|
|||
#!/bin/bash
|
||||
|
||||
task=phpchina_posts_to_t_knowledge
|
||||
|
||||
find ./target/classes -name "*.properties"|xargs rm -f
|
||||
find ./target/classes -name "*.xml"|xargs rm -f
|
||||
find ./target/classes -name "*.dic"|xargs rm -f
|
||||
find ./target/classes/spring |xargs rm -f -r
|
||||
#export CLASSPATH=$CURR_DIR/lib:$CURR_DIR:$JAVA_HOME/lib:$JAVA_HOME/jre/lib
|
||||
|
||||
tmp='./bin/resources'
|
||||
tmp='./target/classes':$tmp
|
||||
tmp='./target/tFlow-1.0-SNAPSHOT-jar-with-dependencies-without-resources/*':$tmp
|
||||
|
||||
CLASSPATH=$tmp:$CLASSPATH
|
||||
|
||||
|
||||
echo $CLASSPATH
|
||||
JVM_ARGS="-Xmn48m -Xmx128m -Xms128m -XX:NewRatio=4 -XX:SurvivorRatio=4 -XX:MaxTenuringThreshold=2"
|
||||
#echo JVM_ARGS=$JVM_ARGS
|
||||
#ulimit -n 400000
|
||||
#echo "" > nohup.out
|
||||
java $JVM_ARGS -classpath $CLASSPATH net.trustie.db.TableFlow $task >>log/${task}.log 2>&1 &
|
|
@ -0,0 +1,20 @@
|
|||
<?xml version="1.0" encoding="UTF-8"?>
|
||||
<!DOCTYPE properties SYSTEM "http://java.sun.com/dtd/properties.dtd">
|
||||
<properties>
|
||||
<comment>TableFlow</comment>
|
||||
<entry key="pointerTableName">pointers</entry>
|
||||
<entry key="migrationTask">migrationTask</entry>
|
||||
<entry key="sourceTableName">51cto_blog</entry>
|
||||
<entry key="targetTableName">t_knowledge</entry>
|
||||
<entry key="sourceFields">deal_title,author,content,tags,reply_num,NULL,vote_up,NULL,extractTime,Url,created_time,"51cto_blog",author_url,NULL,"blogs",MD5(Url)</entry>
|
||||
<entry key="targetFields">title,author,content,tags,reply_num,view_num,vote_num,collection_num,crawled_time,url,created_time,source,author_url,last_time,type,url_md5</entry>
|
||||
<entry key="waitDataTime">10000</entry>
|
||||
<entry key="andWhere"></entry>
|
||||
|
||||
<!--需要转移开始Id值-->
|
||||
<entry key="idsBegin">1</entry>
|
||||
<!--需要转移结束Id值-->
|
||||
<entry key="idsEnd">500000</entry>
|
||||
<!--每次转移的Id量-->
|
||||
<entry key="idsIncrement">500</entry>
|
||||
</properties>
|
|
@ -0,0 +1,20 @@
|
|||
<?xml version="1.0" encoding="UTF-8"?>
|
||||
<!DOCTYPE properties SYSTEM "http://java.sun.com/dtd/properties.dtd">
|
||||
<properties>
|
||||
<comment>TableFlow</comment>
|
||||
<entry key="pointerTableName">pointers</entry>
|
||||
<entry key="migrationTask">migrationTask</entry>
|
||||
<entry key="sourceTableName">cnblog_news</entry>
|
||||
<entry key="targetTableName">t_knowledge</entry>
|
||||
<entry key="sourceFields">deal_title,author,content,tags,reply_num,view_num,NULL,collection_num,extractTime,url,created_time,"cnblogs",author_url,last_time,"news", MD5(Url)</entry>
|
||||
<entry key="targetFields">title,author,content,tags,reply_num,view_num,vote_num,collection_num,crawled_time,url,created_time,source,author_url,last_time,type,url_md5</entry>
|
||||
<entry key="waitDataTime">10000</entry>
|
||||
<entry key="andWhere"></entry>
|
||||
|
||||
<!--需要转移开始Id值-->
|
||||
<entry key="idsBegin">1</entry>
|
||||
<!--需要转移结束Id值-->
|
||||
<entry key="idsEnd">500000</entry>
|
||||
<!--每次转移的Id量-->
|
||||
<entry key="idsIncrement">500</entry>
|
||||
</properties>
|
|
@ -0,0 +1,20 @@
|
|||
<?xml version="1.0" encoding="UTF-8"?>
|
||||
<!DOCTYPE properties SYSTEM "http://java.sun.com/dtd/properties.dtd">
|
||||
<properties>
|
||||
<comment>TableFlow</comment>
|
||||
<entry key="pointerTableName">pointers</entry>
|
||||
<entry key="migrationTask">migrationTask</entry>
|
||||
<entry key="sourceTableName">cnblog_question</entry>
|
||||
<entry key="targetTableName">t_knowledge</entry>
|
||||
<entry key="sourceFields">deal_title,author,content,tags,reply_num,view_num,vote_num,collection_num,extractTime,url,created_time,"cnblogs",author_url,last_time,"topic",MD5(Url)</entry>
|
||||
<entry key="targetFields">title,author,content,tags,reply_num,view_num,vote_num,collection_num,crawled_time,url,created_time,source,author_url,last_time,type,url_md5</entry>
|
||||
<entry key="waitDataTime">10000</entry>
|
||||
<entry key="andWhere"></entry>
|
||||
|
||||
<!--需要转移开始Id值-->
|
||||
<entry key="idsBegin">1</entry>
|
||||
<!--需要转移结束Id值-->
|
||||
<entry key="idsEnd">500000</entry>
|
||||
<!--每次转移的Id量-->
|
||||
<entry key="idsIncrement">500</entry>
|
||||
</properties>
|
|
@ -0,0 +1,20 @@
|
|||
<?xml version="1.0" encoding="UTF-8"?>
|
||||
<!DOCTYPE properties SYSTEM "http://java.sun.com/dtd/properties.dtd">
|
||||
<properties>
|
||||
<comment>TableFlow</comment>
|
||||
<entry key="pointerTableName">pointers</entry>
|
||||
<entry key="migrationTask">migrationTask</entry>
|
||||
<entry key="sourceTableName">csdn_ask</entry>
|
||||
<entry key="targetTableName">t_knowledge</entry>
|
||||
<entry key="sourceFields">deal_title,author,content,tags,reply_num,view_num,vote_num,collection_num,extractTime,url,created_time,"CSDN",author_url,last_time,"topic",MD5(Url)</entry>
|
||||
<entry key="targetFields">title,author,content,tags,reply_num,view_num,vote_num,collection_num,crawled_time,url,created_time,source,author_url,last_time,type,url_md5</entry>
|
||||
<entry key="waitDataTime">10000</entry>
|
||||
<entry key="andWhere"></entry>
|
||||
|
||||
<!--需要转移开始Id值-->
|
||||
<entry key="idsBegin">1</entry>
|
||||
<!--需要转移结束Id值-->
|
||||
<entry key="idsEnd">500000</entry>
|
||||
<!--每次转移的Id量-->
|
||||
<entry key="idsIncrement">500</entry>
|
||||
</properties>
|
|
@ -0,0 +1,20 @@
|
|||
<?xml version="1.0" encoding="UTF-8"?>
|
||||
<!DOCTYPE properties SYSTEM "http://java.sun.com/dtd/properties.dtd">
|
||||
<properties>
|
||||
<comment>TableFlow</comment>
|
||||
<entry key="pointerTableName">pointers</entry>
|
||||
<entry key="migrationTask">migrationTask</entry>
|
||||
<entry key="sourceTableName">csdn_blogs</entry>
|
||||
<entry key="targetTableName">t_knowledge</entry>
|
||||
<entry key="sourceFields">deal_title,author,content,tags,reply_num,view_num,(supportNum-opposeNum),NULL,extractTime,url,created_time,"CSDN",author_url,NULL,"blog",MD5(Url)</entry>
|
||||
<entry key="targetFields">title,author,content,tags,reply_num,view_num,vote_num,collection_num,crawled_time,url,created_time,source,author_url,last_time,type,url_md5</entry>
|
||||
<entry key="waitDataTime">10000</entry>
|
||||
<entry key="andWhere"></entry>
|
||||
|
||||
<!--需要转移开始Id值-->
|
||||
<entry key="idsBegin">1</entry>
|
||||
<!--需要转移结束Id值-->
|
||||
<entry key="idsEnd">500000</entry>
|
||||
<!--每次转移的Id量-->
|
||||
<entry key="idsIncrement">500</entry>
|
||||
</properties>
|
|
@ -0,0 +1,20 @@
|
|||
<?xml version="1.0" encoding="UTF-8"?>
|
||||
<!DOCTYPE properties SYSTEM "http://java.sun.com/dtd/properties.dtd">
|
||||
<properties>
|
||||
<comment>TableFlow</comment>
|
||||
<entry key="pointerTableName">pointers</entry>
|
||||
<entry key="migrationTask">migrationTask</entry>
|
||||
<entry key="sourceTableName">csdn_topics</entry>
|
||||
<entry key="targetTableName">t_knowledge</entry>
|
||||
<entry key="sourceFields">deal_title,author,content,tags,replyNum,view_num,vote_num,collection_num,extractTime,url,created_time,"CSDN",author_url,NULL,"topic",MD5(Url)</entry>
|
||||
<entry key="targetFields">title,author,content,tags,reply_num,view_num,vote_num,collection_num,crawled_time,url,created_time,source,author_url,last_time,type,url_md5</entry>
|
||||
<entry key="waitDataTime">10000</entry>
|
||||
<entry key="andWhere"></entry>
|
||||
|
||||
<!--需要转移开始Id值-->
|
||||
<entry key="idsBegin">1</entry>
|
||||
<!--需要转移结束Id值-->
|
||||
<entry key="idsEnd">500000</entry>
|
||||
<!--每次转移的Id量-->
|
||||
<entry key="idsIncrement">500</entry>
|
||||
</properties>
|
|
@ -0,0 +1,20 @@
|
|||
<?xml version="1.0" encoding="UTF-8"?>
|
||||
<!DOCTYPE properties SYSTEM "http://java.sun.com/dtd/properties.dtd">
|
||||
<properties>
|
||||
<comment>TableFlow</comment>
|
||||
<entry key="pointerTableName">pointers</entry>
|
||||
<entry key="migrationTask">migrationTask</entry>
|
||||
<entry key="sourceTableName">dewen_question</entry>
|
||||
<entry key="targetTableName">t_knowledge</entry>
|
||||
<entry key="sourceFields">deal_title,author,content,tags,(reply_num+answerNum),view_num,vote_num,collection_num,extractTime,url,created_time,"DEWEN",author_url,NULL,"topic",MD5(Url)</entry>
|
||||
<entry key="targetFields">title,author,content,tags,reply_num,view_num,vote_num,collection_num,crawled_time,url,created_time,source,author_url,last_time,type,url_md5</entry>
|
||||
<entry key="waitDataTime">10000</entry>
|
||||
<entry key="andWhere"></entry>
|
||||
|
||||
<!--需要转移开始Id值-->
|
||||
<entry key="idsBegin">1</entry>
|
||||
<!--需要转移结束Id值-->
|
||||
<entry key="idsEnd">500000</entry>
|
||||
<!--每次转移的Id量-->
|
||||
<entry key="idsIncrement">500</entry>
|
||||
</properties>
|
|
@ -0,0 +1,17 @@
|
|||
<?xml version="1.0" encoding="UTF-8"?>
|
||||
<!DOCTYPE properties SYSTEM "http://java.sun.com/dtd/properties.dtd">
|
||||
<properties>
|
||||
<comment>TableFlow</comment>
|
||||
<entry key="pointerTableName">pointer</entry>
|
||||
<entry key="migrationTask">migrationTask</entry>
|
||||
<entry key="sourceTableName">src</entry>
|
||||
<entry key="targetTableName">dest</entry>
|
||||
<entry key="sourceFields">id,name,url</entry>
|
||||
<entry key="targetFields">proj_id, proj_name, proj_url</entry>
|
||||
<!--需要转移开始Id值-->
|
||||
<entry key="idsBegin">1</entry>
|
||||
<!--需要转移结束Id值-->
|
||||
<entry key="idsEnd">500000</entry>
|
||||
<!--每次转移的Id量-->
|
||||
<entry key="idsIncrement">500</entry>
|
||||
</properties>
|
|
@ -0,0 +1,20 @@
|
|||
<?xml version="1.0" encoding="UTF-8"?>
|
||||
<!DOCTYPE properties SYSTEM "http://java.sun.com/dtd/properties.dtd">
|
||||
<properties>
|
||||
<comment>TableFlow</comment>
|
||||
<entry key="pointerTableName">pointer</entry>
|
||||
<entry key="migrationTask">migrationTask</entry>
|
||||
<entry key="sourceTableName">source</entry>
|
||||
<entry key="targetTableName">target</entry>
|
||||
<entry key="sourceFields">sField1,sField2,sField3</entry>
|
||||
<entry key="targetFields">tField1,tField2,tField3</entry>
|
||||
<entry key="waitDataTime">10000</entry>
|
||||
<entry key="andWhere"></entry>
|
||||
|
||||
<!--需要转移开始Id值-->
|
||||
<entry key="idsBegin">1</entry>
|
||||
<!--需要转移结束Id值-->
|
||||
<entry key="idsEnd">500000</entry>
|
||||
<!--每次转移的Id量-->
|
||||
<entry key="idsIncrement">5000</entry>
|
||||
</properties>
|
|
@ -0,0 +1,16 @@
|
|||
<?xml version="1.0" encoding="UTF-8"?>
|
||||
<!DOCTYPE properties SYSTEM "http://java.sun.com/dtd/properties.dtd">
|
||||
<properties>
|
||||
<comment>TableFlow</comment>
|
||||
<entry key="pointerTableName">pointers</entry>
|
||||
<entry key="migrationTask">migrationTask</entry>
|
||||
<entry key="sourceTableName">freecode_projects</entry>
|
||||
<entry key="targetTableName">t_project</entry>
|
||||
<entry key="sourceFields">Url,extract_time,tag,license,project_title,project_des,implementation,operate_system,"FreeCode",post_date,MD5(Url)</entry>
|
||||
<entry key="targetFields">url,crawled_time,tags,license,name,description,language,platform,source,registered_time,urlMD5</entry>
|
||||
<entry key="waitDataTime">3600000</entry>
|
||||
<entry key="andWhere">AND tag is NOT NULL AND project_des IS NOT NULL</entry>
|
||||
<entry key="idsBegin">1</entry>
|
||||
<entry key="idsEnd">500000</entry>
|
||||
<entry key="idsIncrement">500</entry>
|
||||
</properties>
|
|
@ -0,0 +1,20 @@
|
|||
<?xml version="1.0" encoding="UTF-8"?>
|
||||
<!DOCTYPE properties SYSTEM "http://java.sun.com/dtd/properties.dtd">
|
||||
<properties>
|
||||
<comment>TableFlow</comment>
|
||||
<entry key="pointerTableName">pointers</entry>
|
||||
<entry key="migrationTask">migrationTask</entry>
|
||||
<entry key="sourceTableName">iteye_ask</entry>
|
||||
<entry key="targetTableName">t_knowledge</entry>
|
||||
<entry key="sourceFields">deal_title,author,content,tags,reply_num,view_num,(vote_num-voteDown),collection_num,extractTime,url,created_time,"ITeye",author_url,NULL,"topic",MD5(Url)</entry>
|
||||
<entry key="targetFields">title,author,content,tags,reply_num,view_num,vote_num,collection_num,crawled_time,url,created_time,source,author_url,last_time,type,url_md5</entry>
|
||||
<entry key="waitDataTime">10000</entry>
|
||||
<entry key="andWhere"></entry>
|
||||
|
||||
<!--需要转移开始Id值-->
|
||||
<entry key="idsBegin">1</entry>
|
||||
<!--需要转移结束Id值-->
|
||||
<entry key="idsEnd">500000</entry>
|
||||
<!--每次转移的Id量-->
|
||||
<entry key="idsIncrement">500</entry>
|
||||
</properties>
|
|
@ -0,0 +1,20 @@
|
|||
<?xml version="1.0" encoding="UTF-8"?>
|
||||
<!DOCTYPE properties SYSTEM "http://java.sun.com/dtd/properties.dtd">
|
||||
<properties>
|
||||
<comment>TableFlow</comment>
|
||||
<entry key="pointerTableName">pointers</entry>
|
||||
<entry key="migrationTask">migrationTask</entry>
|
||||
<entry key="sourceTableName">linuxtone</entry>
|
||||
<entry key="targetTableName">t_knowledge</entry>
|
||||
<entry key="sourceFields">deal_title,author,content,tags,reply_num,view_num,vote_num,collection_num,extractTime,url,created_time,"linuxtone",author_url,NULL,"posts",MD5(Url)</entry>
|
||||
<entry key="targetFields">title,author,content,tags,reply_num,view_num,vote_num,collection_num,crawled_time,url,created_time,source,author_url,last_time,type,url_md5</entry>
|
||||
<entry key="waitDataTime">10000</entry>
|
||||
<entry key="andWhere"></entry>
|
||||
|
||||
<!--需要转移开始Id值-->
|
||||
<entry key="idsBegin">1</entry>
|
||||
<!--需要转移结束Id值-->
|
||||
<entry key="idsEnd">500000</entry>
|
||||
<!--每次转移的Id量-->
|
||||
<entry key="idsIncrement">500</entry>
|
||||
</properties>
|
|
@ -0,0 +1,61 @@
|
|||
<?xml version="1.0" encoding="UTF-8"?>
|
||||
<!DOCTYPE log4j:configuration SYSTEM "log4j.dtd">
|
||||
<log4j:configuration xmlns:log4j="http://jakarta.apache.org/log4j/">
|
||||
|
||||
<appender name="stdout" class="org.apache.log4j.ConsoleAppender">
|
||||
<layout class="org.apache.log4j.PatternLayout">
|
||||
<param name="threshold" value="ERROR" />
|
||||
<param name="ConversionPattern" value="%d{yy-MM-dd HH:mm:ss,SSS} %-5p ## %m%n" />
|
||||
</layout>
|
||||
</appender>
|
||||
|
||||
<appender name="file" class="org.apache.log4j.DailyRollingFileAppender">
|
||||
<param name="File" value="./log/error.log" />
|
||||
<param name="threshold" value="ERROR" />
|
||||
<layout class="org.apache.log4j.PatternLayout">
|
||||
<param name="ConversionPattern" value="%d{yy-MM-dd HH:mm:ss,SSS} ## %m%n" />
|
||||
</layout>
|
||||
</appender>
|
||||
|
||||
<appender name="file_log" class="org.apache.log4j.DailyRollingFileAppender">
|
||||
<param name="File" value="./log/tflow.log" />
|
||||
<layout class="org.apache.log4j.PatternLayout">
|
||||
<param name="ConversionPattern" value="%d{yy-MM-dd HH:mm:ss,SSS} ## %m%n" />
|
||||
</layout>
|
||||
</appender>
|
||||
|
||||
|
||||
<!-- 邮件(只有ERROR时才会发送!) -->
|
||||
<appender name="MAIL" class="org.apache.log4j.net.SMTPAppender">
|
||||
<param name="threshold" value="debug" />
|
||||
<!-- 日志的错误级别 <param name="threshold" value="fatal"/> -->
|
||||
<!-- 缓存文件大小,日志达到512K时发送Email -->
|
||||
<param name="BufferSize" value="1" />
|
||||
<!-- 单位K -->
|
||||
<param name="From" value="ossean_debug@163.com" />
|
||||
<param name="SMTPHost" value="smtp.163.com" />
|
||||
<param name="Subject" value="ossean-crawler-debug-log4jMessage" />
|
||||
<param name="To" value="getbox@126.com" />
|
||||
<param name="SMTPUsername" value="ossean_debug" />
|
||||
<param name="SMTPPassword" value="goodwell123" />
|
||||
<layout class="org.apache.log4j.PatternLayout">
|
||||
<param name="ConversionPattern" value="%-d{yyyy-MM-dd HH:mm:ss.SSS} [%p]-[%c] %m%n" />
|
||||
</layout>
|
||||
</appender>
|
||||
|
||||
|
||||
<logger name="org.apache" additivity="false">
|
||||
<level value="warn" />
|
||||
<appender-ref ref="stdout" />
|
||||
</logger>
|
||||
|
||||
<root>
|
||||
<level value="info" />
|
||||
<appender-ref ref="stdout" />
|
||||
<appender-ref ref="file" />
|
||||
<appender-ref ref="file_log" />
|
||||
<!-- <appender-ref ref="MAIL" />-->
|
||||
<appender-ref ref="DATABASE" />
|
||||
</root>
|
||||
|
||||
</log4j:configuration>
|
|
@ -0,0 +1,16 @@
|
|||
<?xml version="1.0" encoding="UTF-8"?>
|
||||
<!DOCTYPE properties SYSTEM "http://java.sun.com/dtd/properties.dtd">
|
||||
<properties>
|
||||
<comment>TableFlow</comment>
|
||||
<entry key="pointerTableName">pointers</entry>
|
||||
<entry key="migrationTask">migrationTask</entry>
|
||||
<entry key="sourceTableName">openhub_project</entry>
|
||||
<entry key="targetTableName">t_project</entry>
|
||||
<entry key="sourceFields">name,description,tags,Url,licenses,contributorNum,firstCommitTime,lastCommitTime,"OpenHub",crawlerTime,MD5(Url)</entry>
|
||||
<entry key="targetFields">name,description,tags,url,license,contributors_num,registered_time,last_update_time,source,crawled_time,urlMD5</entry>
|
||||
<entry key="waitDataTime">3600000</entry>
|
||||
<entry key="andWhere"> useCount > 0</entry>
|
||||
<entry key="idsBegin">1</entry>
|
||||
<entry key="idsEnd">500000</entry>
|
||||
<entry key="idsIncrement">500</entry>
|
||||
</properties>
|
|
@ -0,0 +1,16 @@
|
|||
<?xml version="1.0" encoding="UTF-8"?>
|
||||
<!DOCTYPE properties SYSTEM "http://java.sun.com/dtd/properties.dtd">
|
||||
<properties>
|
||||
<comment>TableFlow</comment>
|
||||
<entry key="pointerTableName">pointers</entry>
|
||||
<entry key="migrationTask">migrationTask</entry>
|
||||
<entry key="sourceTableName">oschina_project</entry>
|
||||
<entry key="targetTableName">t_project</entry>
|
||||
<entry key="sourceFields">projectShortName,Url,projectDesc,projectCategory,projectLicenses,projectLanguage,exteactTime,pageMD5,history,projectOS,"OSChina",MD5(Url)</entry>
|
||||
<entry key="targetFields">name,url,description,category,license,language,crawled_time,pageMD5,history,platform,source,urlMD5</entry>
|
||||
<entry key="waitDataTime">3600000</entry>
|
||||
<entry key="andWhere"></entry>
|
||||
<entry key="idsBegin">1</entry>
|
||||
<entry key="idsEnd">500000</entry>
|
||||
<entry key="idsIncrement">500</entry>
|
||||
</properties>
|
|
@ -0,0 +1,20 @@
|
|||
<?xml version="1.0" encoding="UTF-8"?>
|
||||
<!DOCTYPE properties SYSTEM "http://java.sun.com/dtd/properties.dtd">
|
||||
<properties>
|
||||
<comment>TableFlow</comment>
|
||||
<entry key="pointerTableName">pointers</entry>
|
||||
<entry key="migrationTask">migrationTask</entry>
|
||||
<entry key="sourceTableName">oschina_question</entry>
|
||||
<entry key="targetTableName">t_knowledge</entry>
|
||||
<entry key="sourceFields">deal_title,author,content,tags,reply_num,view_num,vote_num,collection_num,extractTime,url,created_time,"OSChina",author_url,NULL,"topic",MD5(Url)</entry>
|
||||
<entry key="targetFields">title,author,content,tags,reply_num,view_num,vote_num,collection_num,crawled_time,url,created_time,source,author_url,last_time,type,url_md5</entry>
|
||||
<entry key="waitDataTime">10000</entry>
|
||||
<entry key="andWhere"></entry>
|
||||
|
||||
<!--需要转移开始Id值-->
|
||||
<entry key="idsBegin">1</entry>
|
||||
<!--需要转移结束Id值-->
|
||||
<entry key="idsEnd">500000</entry>
|
||||
<!--每次转移的Id量-->
|
||||
<entry key="idsIncrement">500</entry>
|
||||
</properties>
|
|
@ -0,0 +1,20 @@
|
|||
<?xml version="1.0" encoding="UTF-8"?>
|
||||
<!DOCTYPE properties SYSTEM "http://java.sun.com/dtd/properties.dtd">
|
||||
<properties>
|
||||
<comment>TableFlow</comment>
|
||||
<entry key="pointerTableName">pointers</entry>
|
||||
<entry key="migrationTask">migrationTask</entry>
|
||||
<entry key="sourceTableName">phpchina</entry>
|
||||
<entry key="targetTableName">t_knowledge</entry>
|
||||
<entry key="sourceFields">deal_title,author,content,tags,reply_num,NULL,vote_num,collection_num,extractTime,url,created_time,"PhpChina",author_url,NULL,"posts",MD5(Url)</entry>
|
||||
<entry key="targetFields">title,author,content,tags,reply_num,view_num,vote_num,collection_num,crawled_time,url,created_time,source,author_url,last_time,type,url_md5</entry>
|
||||
<entry key="waitDataTime">10000</entry>
|
||||
<entry key="andWhere"></entry>
|
||||
|
||||
<!--需要转移开始Id值-->
|
||||
<entry key="idsBegin">1</entry>
|
||||
<!--需要转移结束Id值-->
|
||||
<entry key="idsEnd">500000</entry>
|
||||
<!--每次转移的Id量-->
|
||||
<entry key="idsIncrement">500</entry>
|
||||
</properties>
|
|
@ -0,0 +1,20 @@
|
|||
<?xml version="1.0" encoding="UTF-8"?>
|
||||
<!DOCTYPE properties SYSTEM "http://java.sun.com/dtd/properties.dtd">
|
||||
<properties>
|
||||
<comment>TableFlow</comment>
|
||||
<entry key="pointerTableName">pointers</entry>
|
||||
<entry key="migrationTask">migrationTask</entry>
|
||||
<entry key="sourceTableName">slashdot</entry>
|
||||
<entry key="targetTableName">t_knowledge</entry>
|
||||
<entry key="sourceFields">deal_title,author,content,tags,reply_num,view_num,vote_num,collection_num,extractTime,url,created_time,"slashdot",author_url,NULL,"posts",MD5(Url)</entry>
|
||||
<entry key="targetFields">title,author,content,tags,reply_num,view_num,vote_num,collection_num,crawled_time,url,created_time,source,author_url,last_time,type,url_md5</entry>
|
||||
<entry key="waitDataTime">10000</entry>
|
||||
<entry key="andWhere"></entry>
|
||||
|
||||
<!--需要转移开始Id值-->
|
||||
<entry key="idsBegin">1</entry>
|
||||
<!--需要转移结束Id值-->
|
||||
<entry key="idsEnd">500000</entry>
|
||||
<!--每次转移的Id量-->
|
||||
<entry key="idsIncrement">500</entry>
|
||||
</properties>
|
|
@ -0,0 +1,16 @@
|
|||
<?xml version="1.0" encoding="UTF-8"?>
|
||||
<!DOCTYPE properties SYSTEM "http://java.sun.com/dtd/properties.dtd">
|
||||
<properties>
|
||||
<comment>TableFlow</comment>
|
||||
<entry key="pointerTableName">pointers</entry>
|
||||
<entry key="migrationTask">migrationTask</entry>
|
||||
<entry key="sourceTableName">sourceforge_project</entry>
|
||||
<entry key="targetTableName">t_project</entry>
|
||||
<entry key="sourceFields">name,downloadCount,lastUpdate,`desc`,categories,license,programmingLanguage,registeredTime,pageMd5,history,platform,Url,"SourceForge",MD5(Url)</entry>
|
||||
<entry key="targetFields">name,download_num,last_update_time,description,category,license,language,registered_time,pageMD5,history,platform,url,source,urlMD5</entry>
|
||||
<entry key="waitDataTime">3600000</entry>
|
||||
<entry key="andWhere">AND stars > 0 AND downloadCount > 0</entry>
|
||||
<entry key="idsBegin">1</entry>
|
||||
<entry key="idsEnd">500000</entry>
|
||||
<entry key="idsIncrement">500</entry>
|
||||
</properties>
|
|
@ -0,0 +1,20 @@
|
|||
<?xml version="1.0" encoding="UTF-8"?>
|
||||
<!DOCTYPE properties SYSTEM "http://java.sun.com/dtd/properties.dtd">
|
||||
<properties>
|
||||
<comment>TableFlow</comment>
|
||||
<entry key="pointerTableName">pointers</entry>
|
||||
<entry key="migrationTask">migrationTask</entry>
|
||||
<entry key="sourceTableName">stackoverflow_q</entry>
|
||||
<entry key="targetTableName">t_knowledge</entry>
|
||||
<entry key="sourceFields">questionTitle,author,questionContent,tag,answerNum,viewNum,voteNum,likeNum,extractTime,Url,postTime,"StackOverflow",authorUrl,activeTime,"topic",MD5(Url)</entry>
|
||||
<entry key="targetFields">title,author,content,tags,reply_num,view_num,vote_num,collection_num,crawled_time,url,created_time,source,author_url,last_time,type,url_md5</entry>
|
||||
<entry key="waitDataTime">10000</entry>
|
||||
<entry key="andWhere"></entry>
|
||||
|
||||
<!--需要转移开始Id值-->
|
||||
<entry key="idsBegin">1</entry>
|
||||
<!--需要转移结束Id值-->
|
||||
<entry key="idsEnd">500000</entry>
|
||||
<!--每次转移的Id量-->
|
||||
<entry key="idsIncrement">500</entry>
|
||||
</properties>
|
|
@ -0,0 +1,23 @@
|
|||
#!/bin/bash
|
||||
|
||||
task=slashdot_posts_to_t_kowledge
|
||||
|
||||
find ./target/classes -name "*.properties"|xargs rm -f
|
||||
find ./target/classes -name "*.xml"|xargs rm -f
|
||||
find ./target/classes -name "*.dic"|xargs rm -f
|
||||
find ./target/classes/spring |xargs rm -f -r
|
||||
#export CLASSPATH=$CURR_DIR/lib:$CURR_DIR:$JAVA_HOME/lib:$JAVA_HOME/jre/lib
|
||||
|
||||
tmp='./bin/resources'
|
||||
tmp='./target/classes':$tmp
|
||||
tmp='./target/tFlow-1.0-SNAPSHOT-jar-with-dependencies-without-resources/*':$tmp
|
||||
|
||||
CLASSPATH=$tmp:$CLASSPATH
|
||||
|
||||
|
||||
echo $CLASSPATH
|
||||
JVM_ARGS="-Xmn48m -Xmx128m -Xms128m -XX:NewRatio=4 -XX:SurvivorRatio=4 -XX:MaxTenuringThreshold=2"
|
||||
#echo JVM_ARGS=$JVM_ARGS
|
||||
#ulimit -n 400000
|
||||
#echo "" > nohup.out
|
||||
java $JVM_ARGS -classpath $CLASSPATH net.trustie.db.TableFlow $task >>log/${task}.log 2>&1 &
|
|
@ -0,0 +1,24 @@
|
|||
#!/bin/bash
|
||||
|
||||
task=sourceforge_project_to_t_project
|
||||
|
||||
find ./target/classes -name "*.properties"|xargs rm -f
|
||||
find ./target/classes -name "*.xml"|xargs rm -f
|
||||
find ./target/classes -name "*.dic"|xargs rm -f
|
||||
find ./target/classes/spring |xargs rm -f -r
|
||||
|
||||
#export CLASSPATH=$CURR_DIR/lib:$CURR_DIR:$JAVA_HOME/lib:$JAVA_HOME/jre/lib
|
||||
|
||||
tmp='./bin/resources'
|
||||
tmp='./target/classes':$tmp
|
||||
tmp='./target/tFlow-1.0-SNAPSHOT-jar-with-dependencies-without-resources/*':$tmp
|
||||
|
||||
CLASSPATH=$tmp:$CLASSPATH
|
||||
|
||||
|
||||
echo $CLASSPATH
|
||||
JVM_ARGS="-Xmn48m -Xmx128m -Xms128m -XX:NewRatio=4 -XX:SurvivorRatio=4 -XX:MaxTenuringThreshold=2"
|
||||
#echo JVM_ARGS=$JVM_ARGS
|
||||
#ulimit -n 400000
|
||||
#echo "" > nohup.out
|
||||
java $JVM_ARGS -classpath $CLASSPATH net.trustie.db.TableFlow $task >>log/${task}.log 2>&1 &
|
|
@ -0,0 +1,24 @@
|
|||
#!/bin/bash
|
||||
|
||||
task=stackoverflow_q_to_t_knowledge
|
||||
|
||||
find ./target/classes -name "*.properties"|xargs rm -f
|
||||
find ./target/classes -name "*.xml"|xargs rm -f
|
||||
find ./target/classes -name "*.dic"|xargs rm -f
|
||||
find ./target/classes/spring |xargs rm -f -r
|
||||
|
||||
#export CLASSPATH=$CURR_DIR/lib:$CURR_DIR:$JAVA_HOME/lib:$JAVA_HOME/jre/lib
|
||||
|
||||
tmp='./bin/resources'
|
||||
tmp='./target/classes':$tmp
|
||||
tmp='./target/tFlow-1.0-SNAPSHOT-jar-with-dependencies-without-resources/*':$tmp
|
||||
|
||||
CLASSPATH=$tmp:$CLASSPATH
|
||||
|
||||
|
||||
echo $CLASSPATH
|
||||
JVM_ARGS="-Xmn48m -Xmx128m -Xms128m -XX:NewRatio=4 -XX:SurvivorRatio=4 -XX:MaxTenuringThreshold=2"
|
||||
#echo JVM_ARGS=$JVM_ARGS
|
||||
#ulimit -n 400000
|
||||
#echo "" > nohup.out
|
||||
java $JVM_ARGS -classpath $CLASSPATH net.trustie.db.TableFlow $task >>log/${task}.log 2>&1 &
|
|
@ -0,0 +1,24 @@
|
|||
#!/bin/bash
|
||||
|
||||
task=test
|
||||
|
||||
find ./target/classes -name "*.properties"|xargs rm -f
|
||||
find ./target/classes -name "*.xml"|xargs rm -f
|
||||
find ./target/classes -name "*.dic"|xargs rm -f
|
||||
find ./target/classes/spring |xargs rm -f -r
|
||||
|
||||
#export CLASSPATH=$CURR_DIR/lib:$CURR_DIR:$JAVA_HOME/lib:$JAVA_HOME/jre/lib
|
||||
|
||||
tmp='./bin/resources'
|
||||
tmp='./target/classes':$tmp
|
||||
tmp='./target/tFlow-1.0-SNAPSHOT-jar-with-dependencies-without-resources/*':$tmp
|
||||
|
||||
CLASSPATH=$tmp:$CLASSPATH
|
||||
|
||||
|
||||
echo $CLASSPATH
|
||||
JVM_ARGS="-Xmn48m -Xmx128m -Xms128m -XX:NewRatio=4 -XX:SurvivorRatio=4 -XX:MaxTenuringThreshold=2"
|
||||
#echo JVM_ARGS=$JVM_ARGS
|
||||
#ulimit -n 400000
|
||||
#echo "" > nohup.out
|
||||
java $JVM_ARGS -classpath $CLASSPATH net.trustie.db.TableFlow $task >>log/${task}.log 2>&1 &
|
|
@ -0,0 +1,19 @@
|
|||
#!/bin/bash
|
||||
|
||||
sh bin/cnblog_news_to_t_knowledge.sh
|
||||
sh bin/cnblog_question_to_t_knowledge.sh
|
||||
sh bin/csdn_ask_to_t_knowledge.sh
|
||||
sh bin/csdn_blogs_to_t_knowledge.sh
|
||||
sh bin/csdn_topics_to_t_knowledge.sh
|
||||
sh bin/dewen_question_to_t_knowledge.sh
|
||||
sh bin/freecode_projects_to_t_project.sh
|
||||
sh bin/iteye_ask_to_t_knowledge.sh
|
||||
sh bin/openhub_project_to_t_project.sh
|
||||
sh bin/oschina_project_to_t_project.sh
|
||||
sh bin/oschina_question_to_t_knowledge.sh
|
||||
sh bin/sourceforge_project_to_t_project.sh
|
||||
sh bin/stackoverflow_q_to_t_knowledge.sh
|
||||
sh bin/51cto_blog_to_t_knowledge.sh
|
||||
sh bin/linuxtone_posts_to_t_konwlege.sh
|
||||
sh bin/slashdot_posts_to_t_kowledge.sh
|
||||
sh bin/phpchina_posts_to_t_knowledge,sh
|
|
@ -0,0 +1,93 @@
|
|||
<project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
|
||||
xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
|
||||
<modelVersion>4.0.0</modelVersion>
|
||||
|
||||
<groupId>cn.edu.zhanyun</groupId>
|
||||
<artifactId>gather_posts</artifactId>
|
||||
<version>0.0.1-SNAPSHOT</version>
|
||||
<packaging>jar</packaging>
|
||||
|
||||
<name>gather_posts</name>
|
||||
<url>http://maven.apache.org</url>
|
||||
|
||||
<build>
|
||||
<sourceDirectory>${basedir}/src/main/java</sourceDirectory>
|
||||
<plugins>
|
||||
<plugin>
|
||||
<groupId>org.apache.maven.plugins</groupId>
|
||||
<artifactId>maven-compiler-plugin</artifactId>
|
||||
<configuration>
|
||||
<source>1.7</source>
|
||||
<target>1.7</target>
|
||||
</configuration>
|
||||
</plugin>
|
||||
<plugin>
|
||||
<artifactId>maven-assembly-plugin</artifactId>
|
||||
<version>2.5.1</version>
|
||||
<configuration>
|
||||
<descriptors>
|
||||
<descriptor>src/main/assembly/assembly.xml</descriptor>
|
||||
</descriptors>
|
||||
</configuration>
|
||||
</plugin>
|
||||
</plugins>
|
||||
</build>
|
||||
|
||||
<properties>
|
||||
<project.build.sourceEncoding>UTF-8</project.build.sourceEncoding>
|
||||
</properties>
|
||||
|
||||
<dependencies>
|
||||
<dependency>
|
||||
<groupId>junit</groupId>
|
||||
<artifactId>junit</artifactId>
|
||||
<version>3.8.1</version>
|
||||
<scope>test</scope>
|
||||
</dependency>
|
||||
<dependency>
|
||||
<groupId>org.slf4j</groupId>
|
||||
<artifactId>slf4j-log4j12</artifactId>
|
||||
<version>1.7.7</version>
|
||||
</dependency>
|
||||
<dependency>
|
||||
<groupId>commons-collections</groupId>
|
||||
<artifactId>commons-collections</artifactId>
|
||||
<version>3.2.1</version>
|
||||
</dependency>
|
||||
<dependency>
|
||||
<groupId>org.apache.commons</groupId>
|
||||
<artifactId>commons-io</artifactId>
|
||||
<version>1.3.2</version>
|
||||
</dependency>
|
||||
<dependency>
|
||||
<groupId>org.springframework</groupId>
|
||||
<artifactId>spring-context</artifactId>
|
||||
<version>4.1.4.RELEASE</version>
|
||||
</dependency>
|
||||
<dependency>
|
||||
<groupId>org.apache.commons</groupId>
|
||||
<artifactId>commons-lang3</artifactId>
|
||||
<version>3.1</version>
|
||||
</dependency>
|
||||
<dependency>
|
||||
<groupId>mysql</groupId>
|
||||
<artifactId>mysql-connector-java</artifactId>
|
||||
<version>5.1.18</version>
|
||||
</dependency>
|
||||
<dependency>
|
||||
<groupId>commons-dbcp</groupId>
|
||||
<artifactId>commons-dbcp</artifactId>
|
||||
<version>1.3</version>
|
||||
</dependency>
|
||||
<dependency>
|
||||
<groupId>org.mybatis</groupId>
|
||||
<artifactId>mybatis</artifactId>
|
||||
<version>3.1.1</version>
|
||||
</dependency>
|
||||
<dependency>
|
||||
<groupId>org.mybatis</groupId>
|
||||
<artifactId>mybatis-spring</artifactId>
|
||||
<version>1.1.1</version>
|
||||
</dependency>
|
||||
</dependencies>
|
||||
</project>
|
|
@ -0,0 +1,12 @@
|
|||
package org.ossean.transfertknowledgeandtagmatch.dao;
|
||||
|
||||
import java.util.List;
|
||||
import org.apache.ibatis.annotations.Param;
|
||||
import org.apache.ibatis.annotations.Select;
|
||||
import org.ossean.transfertknowledgeandtagmatch.model.JobRequirement;
|
||||
|
||||
public interface JobRequirementDAO {
|
||||
@Select("SELECT * from job_requirements WHERE id > #{id} limit #{batchSize}")
|
||||
public List<JobRequirement> getJobRequirementList(@Param("id") int id, @Param("batchSize") int batchSize);
|
||||
|
||||
}
|
|
@ -0,0 +1,20 @@
|
|||
package org.ossean.transfertknowledgeandtagmatch.dao;
|
||||
|
||||
|
||||
import org.apache.ibatis.annotations.Insert;
|
||||
import org.apache.ibatis.annotations.Param;
|
||||
import org.apache.ibatis.annotations.Select;
|
||||
import org.apache.ibatis.annotations.Update;
|
||||
|
||||
public interface PointerDAO {
|
||||
|
||||
@Select("select Pointer from ${table} where SourceTableName=#{SourceTableName} and TargetTableName=#{TargetTableName}")
|
||||
public int getPointer(@Param("table") String table, @Param("SourceTableName") String SourceTableName, @Param("TargetTableName") String TargetTableName);
|
||||
|
||||
@Update("update ${table} set Pointer=#{Pointer} where SourceTableName=#{SourceTableName} and TargetTableName=#{TargetTableName}")
|
||||
public void updatePointer(@Param("table") String table, @Param("SourceTableName") String SourceTableName, @Param("TargetTableName") String TargetTableName, @Param("Pointer") int Pointer);
|
||||
|
||||
@Insert("insert into ${table} (`SourceTableName`,`TargetTableName`,`Pointer`) values (#{SourceTableName},#{TargetTableName},#{Pointer})")
|
||||
public void insertPointer(@Param("table") String table, @Param("SourceTableName") String SourceTableName, @Param("TargetTableName") String TargetTableName, @Param("Pointer") int Pointer);
|
||||
|
||||
}
|
|
@ -0,0 +1,21 @@
|
|||
package org.ossean.transfertknowledgeandtagmatch.dao;
|
||||
|
||||
import java.util.List;
|
||||
import java.util.Set;
|
||||
|
||||
import org.apache.ibatis.annotations.Param;
|
||||
import org.apache.ibatis.annotations.Select;
|
||||
import org.ossean.transfertknowledgeandtagmatch.model.RelativeMemo;
|
||||
|
||||
public interface RelativeMemoDao {
|
||||
|
||||
//批量获取帖子对象
|
||||
@Select("select * from relative_memos where id>#{startId} AND id<=#{endId}")
|
||||
public List<RelativeMemo> getMemoInfo(@Param("startId") int startId, @Param("endId") int endId);
|
||||
|
||||
//获得relative_memos中已存在的url_md5集合
|
||||
@Select("SELECT DISTINCT(url_md5) AS md5 FROM relative_memos")
|
||||
public Set<String> initUrlSet();
|
||||
|
||||
|
||||
}
|
|
@ -0,0 +1,328 @@
|
|||
package org.ossean.transfertknowledgeandtagmatch.model;
|
||||
|
||||
|
||||
public class JobRequirement {
|
||||
private int id = 0;
|
||||
private String author_url=null;
|
||||
private String author =null;
|
||||
private String url=null;
|
||||
private String deal_title = null;
|
||||
private String tags=null;
|
||||
private String content=null;
|
||||
private String salary=null;
|
||||
private String experience=null;
|
||||
private String comp_name=null;
|
||||
private String comp_url = null;
|
||||
private String addr = null;
|
||||
private String comp_scale = null;
|
||||
private String comp_field = null;
|
||||
private String financing = null;
|
||||
private String comp_vision =null;
|
||||
private String created_time=null;
|
||||
private String extractTime = null;
|
||||
private int history = 0;
|
||||
private String relative_osps = null;
|
||||
private int reply_num = 0;
|
||||
private int view_num = 0;
|
||||
private int vote_num = 0;
|
||||
private int collection_num = 0;
|
||||
private String abstractString = null;
|
||||
private String type = null;
|
||||
private String crawled_time = null;
|
||||
private String category = null;
|
||||
private String source = null;
|
||||
private String last_time = null;
|
||||
private String url_md5 = null;
|
||||
|
||||
public JobRequirement(int id, String author_url, String author, String url,
|
||||
String deal_title, String tags, String content, String salary,
|
||||
String experience, String comp_name, String comp_url, String addr,
|
||||
String comp_scale, String comp_field, String financing,
|
||||
String comp_vision, String created_time, String extractTime,
|
||||
int history, String relative_osps, int reply_num, int view_num,
|
||||
int vote_num, int collection_num, String abstractString,
|
||||
String type, String crawled_time, String category, String source,
|
||||
String last_time, String url_md5) {
|
||||
super();
|
||||
this.id = id;
|
||||
this.author_url = author_url;
|
||||
this.author = author;
|
||||
this.url = url;
|
||||
this.deal_title = deal_title;
|
||||
this.tags = tags;
|
||||
this.content = content;
|
||||
this.salary = salary;
|
||||
this.experience = experience;
|
||||
this.comp_name = comp_name;
|
||||
this.comp_url = comp_url;
|
||||
this.addr = addr;
|
||||
this.comp_scale = comp_scale;
|
||||
this.comp_field = comp_field;
|
||||
this.financing = financing;
|
||||
this.comp_vision = comp_vision;
|
||||
this.created_time = created_time;
|
||||
this.extractTime = extractTime;
|
||||
this.history = history;
|
||||
this.relative_osps = relative_osps;
|
||||
this.reply_num = reply_num;
|
||||
this.view_num = view_num;
|
||||
this.vote_num = vote_num;
|
||||
this.collection_num = collection_num;
|
||||
this.abstractString = abstractString;
|
||||
this.type = type;
|
||||
this.crawled_time = crawled_time;
|
||||
this.category = category;
|
||||
this.source = source;
|
||||
this.last_time = last_time;
|
||||
this.url_md5 = url_md5;
|
||||
}
|
||||
|
||||
public int getId() {
|
||||
return id;
|
||||
}
|
||||
|
||||
public void setId(int id) {
|
||||
this.id = id;
|
||||
}
|
||||
|
||||
public String getAuthor_url() {
|
||||
return author_url;
|
||||
}
|
||||
|
||||
public void setAuthor_url(String author_url) {
|
||||
this.author_url = author_url;
|
||||
}
|
||||
|
||||
public String getAuthor() {
|
||||
return author;
|
||||
}
|
||||
|
||||
public void setAuthor(String author) {
|
||||
this.author = author;
|
||||
}
|
||||
|
||||
public String getUrl() {
|
||||
return url;
|
||||
}
|
||||
|
||||
public void setUrl(String url) {
|
||||
this.url = url;
|
||||
}
|
||||
|
||||
public String getDeal_title() {
|
||||
return deal_title;
|
||||
}
|
||||
|
||||
public void setDeal_title(String deal_title) {
|
||||
this.deal_title = deal_title;
|
||||
}
|
||||
|
||||
public String getTags() {
|
||||
return tags;
|
||||
}
|
||||
|
||||
public void setTags(String tags) {
|
||||
this.tags = tags;
|
||||
}
|
||||
|
||||
public String getContent() {
|
||||
return content;
|
||||
}
|
||||
|
||||
public void setContent(String content) {
|
||||
this.content = content;
|
||||
}
|
||||
|
||||
public String getSalary() {
|
||||
return salary;
|
||||
}
|
||||
|
||||
public void setSalary(String salary) {
|
||||
this.salary = salary;
|
||||
}
|
||||
|
||||
public String getExperience() {
|
||||
return experience;
|
||||
}
|
||||
|
||||
public void setExperience(String experience) {
|
||||
this.experience = experience;
|
||||
}
|
||||
|
||||
public String getComp_name() {
|
||||
return comp_name;
|
||||
}
|
||||
|
||||
public void setComp_name(String comp_name) {
|
||||
this.comp_name = comp_name;
|
||||
}
|
||||
|
||||
public String getComp_url() {
|
||||
return comp_url;
|
||||
}
|
||||
|
||||
public void setComp_url(String comp_url) {
|
||||
this.comp_url = comp_url;
|
||||
}
|
||||
|
||||
public String getAddr() {
|
||||
return addr;
|
||||
}
|
||||
|
||||
public void setAddr(String addr) {
|
||||
this.addr = addr;
|
||||
}
|
||||
|
||||
public String getComp_scale() {
|
||||
return comp_scale;
|
||||
}
|
||||
|
||||
public void setComp_scale(String comp_scale) {
|
||||
this.comp_scale = comp_scale;
|
||||
}
|
||||
|
||||
public String getComp_field() {
|
||||
return comp_field;
|
||||
}
|
||||
|
||||
public void setComp_field(String comp_field) {
|
||||
this.comp_field = comp_field;
|
||||
}
|
||||
|
||||
public String getFinancing() {
|
||||
return financing;
|
||||
}
|
||||
|
||||
public void setFinancing(String financing) {
|
||||
this.financing = financing;
|
||||
}
|
||||
|
||||
public String getComp_vision() {
|
||||
return comp_vision;
|
||||
}
|
||||
|
||||
public void setComp_vision(String comp_vision) {
|
||||
this.comp_vision = comp_vision;
|
||||
}
|
||||
|
||||
public String getCreated_time() {
|
||||
return created_time;
|
||||
}
|
||||
|
||||
public void setCreated_time(String created_time) {
|
||||
this.created_time = created_time;
|
||||
}
|
||||
|
||||
public String getExtractTime() {
|
||||
return extractTime;
|
||||
}
|
||||
|
||||
public void setExtractTime(String extractTime) {
|
||||
this.extractTime = extractTime;
|
||||
}
|
||||
|
||||
public int getHistory() {
|
||||
return history;
|
||||
}
|
||||
|
||||
public void setHistory(int history) {
|
||||
this.history = history;
|
||||
}
|
||||
|
||||
public String getRelative_osps() {
|
||||
return relative_osps;
|
||||
}
|
||||
|
||||
public void setRelative_osps(String relative_osps) {
|
||||
this.relative_osps = relative_osps;
|
||||
}
|
||||
|
||||
public int getReply_num() {
|
||||
return reply_num;
|
||||
}
|
||||
|
||||
public void setReply_num(int reply_num) {
|
||||
this.reply_num = reply_num;
|
||||
}
|
||||
|
||||
public int getView_num() {
|
||||
return view_num;
|
||||
}
|
||||
|
||||
public void setView_num(int view_num) {
|
||||
this.view_num = view_num;
|
||||
}
|
||||
|
||||
public int getVote_num() {
|
||||
return vote_num;
|
||||
}
|
||||
|
||||
public void setVote_num(int vote_num) {
|
||||
this.vote_num = vote_num;
|
||||
}
|
||||
|
||||
public int getCollection_num() {
|
||||
return collection_num;
|
||||
}
|
||||
|
||||
public void setCollection_num(int collection_num) {
|
||||
this.collection_num = collection_num;
|
||||
}
|
||||
|
||||
public String getAbstractString() {
|
||||
return abstractString;
|
||||
}
|
||||
|
||||
public void setAbstractString(String abstractString) {
|
||||
this.abstractString = abstractString;
|
||||
}
|
||||
|
||||
public String getType() {
|
||||
return type;
|
||||
}
|
||||
|
||||
public void setType(String type) {
|
||||
this.type = type;
|
||||
}
|
||||
|
||||
public String getCrawled_time() {
|
||||
return crawled_time;
|
||||
}
|
||||
|
||||
public void setCrawled_time(String crawled_time) {
|
||||
this.crawled_time = crawled_time;
|
||||
}
|
||||
|
||||
public String getCategory() {
|
||||
return category;
|
||||
}
|
||||
|
||||
public void setCategory(String category) {
|
||||
this.category = category;
|
||||
}
|
||||
|
||||
public String getSource() {
|
||||
return source;
|
||||
}
|
||||
|
||||
public void setSource(String source) {
|
||||
this.source = source;
|
||||
}
|
||||
|
||||
public String getLast_time() {
|
||||
return last_time;
|
||||
}
|
||||
|
||||
public void setLast_time(String last_time) {
|
||||
this.last_time = last_time;
|
||||
}
|
||||
|
||||
public String getUrl_md5() {
|
||||
return url_md5;
|
||||
}
|
||||
|
||||
public void setUrl_md5(String url_md5) {
|
||||
this.url_md5 = url_md5;
|
||||
}
|
||||
|
||||
}
|
|
@ -0,0 +1,209 @@
|
|||
package org.ossean.transfertknowledgeandtagmatch.model;
|
||||
|
||||
public class RelativeMemo {
|
||||
|
||||
private int id;
|
||||
private int osp_id;
|
||||
private String subject;
|
||||
private String content;
|
||||
private String author;
|
||||
private int replies_num;
|
||||
private int lock;
|
||||
private int sticky;
|
||||
private String created_time;
|
||||
private String updated_time;
|
||||
private String url;
|
||||
private int view_num_crawled;
|
||||
private int vote_up_num;
|
||||
private int collection_num;
|
||||
private String abstractText;
|
||||
private String memo_type;
|
||||
private String source;
|
||||
private String category;
|
||||
private int view_num_trustie;
|
||||
private int author_id;
|
||||
private int parent_id;
|
||||
private int last_reply_id;
|
||||
private int is_quote;
|
||||
private String username;
|
||||
private String userhomeurl;
|
||||
private String crawled_time;
|
||||
private String author_url;
|
||||
private String url_md5;
|
||||
private String tags;
|
||||
public int getId() {
|
||||
return id;
|
||||
}
|
||||
public void setId(int id) {
|
||||
this.id = id;
|
||||
}
|
||||
public int getOsp_id() {
|
||||
return osp_id;
|
||||
}
|
||||
public void setOsp_id(int osp_id) {
|
||||
this.osp_id = osp_id;
|
||||
}
|
||||
public String getSubject() {
|
||||
return subject;
|
||||
}
|
||||
public void setSubject(String subject) {
|
||||
this.subject = subject;
|
||||
}
|
||||
public String getContent() {
|
||||
return content;
|
||||
}
|
||||
public void setContent(String content) {
|
||||
this.content = content;
|
||||
}
|
||||
public String getAuthor() {
|
||||
return author;
|
||||
}
|
||||
public void setAuthor(String author) {
|
||||
this.author = author;
|
||||
}
|
||||
public int getReplies_num() {
|
||||
return replies_num;
|
||||
}
|
||||
public void setReplies_num(int replies_num) {
|
||||
this.replies_num = replies_num;
|
||||
}
|
||||
public int getLock() {
|
||||
return lock;
|
||||
}
|
||||
public void setLock(int lock) {
|
||||
this.lock = lock;
|
||||
}
|
||||
public int getSticky() {
|
||||
return sticky;
|
||||
}
|
||||
public void setSticky(int sticky) {
|
||||
this.sticky = sticky;
|
||||
}
|
||||
public String getCreated_time() {
|
||||
return created_time;
|
||||
}
|
||||
public void setCreated_time(String created_time) {
|
||||
this.created_time = created_time;
|
||||
}
|
||||
public String getUpdated_time() {
|
||||
return updated_time;
|
||||
}
|
||||
public void setUpdated_time(String updated_time) {
|
||||
this.updated_time = updated_time;
|
||||
}
|
||||
public String getUrl() {
|
||||
return url;
|
||||
}
|
||||
public void setUrl(String url) {
|
||||
this.url = url;
|
||||
}
|
||||
public int getView_num_crawled() {
|
||||
return view_num_crawled;
|
||||
}
|
||||
public void setView_num_crawled(int view_num_crawled) {
|
||||
this.view_num_crawled = view_num_crawled;
|
||||
}
|
||||
public int getVote_up_num() {
|
||||
return vote_up_num;
|
||||
}
|
||||
public void setVote_up_num(int vote_up_num) {
|
||||
this.vote_up_num = vote_up_num;
|
||||
}
|
||||
public int getCollection_num() {
|
||||
return collection_num;
|
||||
}
|
||||
public void setCollection_num(int collection_num) {
|
||||
this.collection_num = collection_num;
|
||||
}
|
||||
public String getAbstractText() {
|
||||
return abstractText;
|
||||
}
|
||||
public void setAbstractText(String abstractText) {
|
||||
this.abstractText = abstractText;
|
||||
}
|
||||
public String getMemo_type() {
|
||||
return memo_type;
|
||||
}
|
||||
public void setMemo_type(String memo_type) {
|
||||
this.memo_type = memo_type;
|
||||
}
|
||||
public String getSource() {
|
||||
return source;
|
||||
}
|
||||
public void setSource(String source) {
|
||||
this.source = source;
|
||||
}
|
||||
public String getCategory() {
|
||||
return category;
|
||||
}
|
||||
public void setCategory(String category) {
|
||||
this.category = category;
|
||||
}
|
||||
public int getView_num_trustie() {
|
||||
return view_num_trustie;
|
||||
}
|
||||
public void setView_num_trustie(int view_num_trustie) {
|
||||
this.view_num_trustie = view_num_trustie;
|
||||
}
|
||||
public int getAuthor_id() {
|
||||
return author_id;
|
||||
}
|
||||
public void setAuthor_id(int author_id) {
|
||||
this.author_id = author_id;
|
||||
}
|
||||
public int getParent_id() {
|
||||
return parent_id;
|
||||
}
|
||||
public void setParent_id(int parent_id) {
|
||||
this.parent_id = parent_id;
|
||||
}
|
||||
public int getLast_reply_id() {
|
||||
return last_reply_id;
|
||||
}
|
||||
public void setLast_reply_id(int last_reply_id) {
|
||||
this.last_reply_id = last_reply_id;
|
||||
}
|
||||
public int getIs_quote() {
|
||||
return is_quote;
|
||||
}
|
||||
public void setIs_quote(int is_quote) {
|
||||
this.is_quote = is_quote;
|
||||
}
|
||||
public String getUsername() {
|
||||
return username;
|
||||
}
|
||||
public void setUsername(String username) {
|
||||
this.username = username;
|
||||
}
|
||||
public String getUserhomeurl() {
|
||||
return userhomeurl;
|
||||
}
|
||||
public void setUserhomeurl(String userhomeurl) {
|
||||
this.userhomeurl = userhomeurl;
|
||||
}
|
||||
public String getCrawled_time() {
|
||||
return crawled_time;
|
||||
}
|
||||
public void setCrawled_time(String crawled_time) {
|
||||
this.crawled_time = crawled_time;
|
||||
}
|
||||
public String getAuthor_url() {
|
||||
return author_url;
|
||||
}
|
||||
public void setAuthor_url(String author_url) {
|
||||
this.author_url = author_url;
|
||||
}
|
||||
public String getUrl_md5() {
|
||||
return url_md5;
|
||||
}
|
||||
public void setUrl_md5(String url_md5) {
|
||||
this.url_md5 = url_md5;
|
||||
}
|
||||
public String getTags() {
|
||||
return tags;
|
||||
}
|
||||
public void setTags(String tags) {
|
||||
this.tags = tags;
|
||||
}
|
||||
|
||||
}
|
|
@ -0,0 +1,306 @@
|
|||
package org.ossean.transfertknowledgeandtagmatch.model;
|
||||
|
||||
|
||||
public class TKnowledge {
|
||||
private int id = 0;
|
||||
private String title=null;
|
||||
private String author =null;
|
||||
private String content=null;
|
||||
private String tags = null;
|
||||
private int replyNum=0;
|
||||
private int viewNum=0;
|
||||
private int voteNum=0;
|
||||
private int collectionNum=0;
|
||||
private String strAbstract=null;
|
||||
private String type = null;
|
||||
private String crawledTime =null;
|
||||
private String url = null;
|
||||
private String createdTime = null;
|
||||
private String category=null;
|
||||
private String source =null;
|
||||
private String authorUrl=null;
|
||||
private String lastTime = null;
|
||||
private String urlMd5 =null;
|
||||
/**
|
||||
* @param id
|
||||
* @param title
|
||||
* @param author
|
||||
* @param content
|
||||
* @param tags
|
||||
* @param replyNum
|
||||
* @param viewNum
|
||||
* @param voteNum
|
||||
* @param collectionNum
|
||||
* @param strAbstract
|
||||
* @param type
|
||||
* @param crawledTime
|
||||
* @param url
|
||||
* @param createdTime
|
||||
* @param category
|
||||
* @param source
|
||||
* @param authorUrl
|
||||
* @param lastTime
|
||||
* @param urlMd5
|
||||
*/
|
||||
public TKnowledge(int id, String title, String author, String content,
|
||||
String tags, int replyNum, int viewNum, int voteNum,
|
||||
int collectionNum, String strAbstract, String type,
|
||||
String crawledTime, String url, String createdTime, String category,
|
||||
String source, String authorUrl, String lastTime, String urlMd5) {
|
||||
this.id = id;
|
||||
this.title = title;
|
||||
this.author = author;
|
||||
this.content = content;
|
||||
this.tags = tags;
|
||||
this.replyNum = replyNum;
|
||||
this.viewNum = viewNum;
|
||||
this.voteNum = voteNum;
|
||||
this.collectionNum = collectionNum;
|
||||
this.strAbstract = strAbstract;
|
||||
this.type = type;
|
||||
this.crawledTime = crawledTime;
|
||||
this.url = url;
|
||||
this.createdTime = createdTime;
|
||||
this.category = category;
|
||||
this.source = source;
|
||||
this.authorUrl = authorUrl;
|
||||
this.lastTime = lastTime;
|
||||
this.urlMd5 = urlMd5;
|
||||
// this.title=StringUtils.replace(this.title, "'", "\'");
|
||||
// this.content=StringUtils.replace(this.content, "'", "\'");
|
||||
// this.author = StringUtils.replace(this.author, "'", "\'");
|
||||
|
||||
}
|
||||
/**
|
||||
* @return the id
|
||||
*/
|
||||
public int getId() {
|
||||
return id;
|
||||
}
|
||||
/**
|
||||
* @return the title
|
||||
*/
|
||||
public String getTitle() {
|
||||
return title;
|
||||
}
|
||||
/**
|
||||
* @return the author
|
||||
*/
|
||||
public String getAuthor() {
|
||||
return author;
|
||||
}
|
||||
/**
|
||||
* @return the content
|
||||
*/
|
||||
public String getContent() {
|
||||
return content;
|
||||
}
|
||||
/**
|
||||
* @return the tags
|
||||
*/
|
||||
public String getTags() {
|
||||
return tags;
|
||||
}
|
||||
/**
|
||||
* @return the replyNum
|
||||
*/
|
||||
public int getReplyNum() {
|
||||
return replyNum;
|
||||
}
|
||||
/**
|
||||
* @return the viewNum
|
||||
*/
|
||||
public int getViewNum() {
|
||||
return viewNum;
|
||||
}
|
||||
/**
|
||||
* @return the voteNum
|
||||
*/
|
||||
public int getVoteNum() {
|
||||
return voteNum;
|
||||
}
|
||||
/**
|
||||
* @return the collectionNum
|
||||
*/
|
||||
public int getCollectionNum() {
|
||||
return collectionNum;
|
||||
}
|
||||
/**
|
||||
* @return the strAbstract
|
||||
*/
|
||||
public String getStrAbstract() {
|
||||
return strAbstract;
|
||||
}
|
||||
/**
|
||||
* @return the type
|
||||
*/
|
||||
public String getType() {
|
||||
return type;
|
||||
}
|
||||
/**
|
||||
* @return the crawledTime
|
||||
*/
|
||||
public String getCrawledTime() {
|
||||
return crawledTime;
|
||||
}
|
||||
/**
|
||||
* @return the url
|
||||
*/
|
||||
public String getUrl() {
|
||||
return url;
|
||||
}
|
||||
/**
|
||||
* @return the createdTime
|
||||
*/
|
||||
public String getCreatedTime() {
|
||||
return createdTime;
|
||||
}
|
||||
/**
|
||||
* @return the category
|
||||
*/
|
||||
public String getCategory() {
|
||||
return category;
|
||||
}
|
||||
/**
|
||||
* @return the source
|
||||
*/
|
||||
public String getSource() {
|
||||
return source;
|
||||
}
|
||||
/**
|
||||
* @return the authorUrl
|
||||
*/
|
||||
public String getAuthorUrl() {
|
||||
return authorUrl;
|
||||
}
|
||||
/**
|
||||
* @return the lastTime
|
||||
*/
|
||||
public String getLastTime() {
|
||||
return lastTime;
|
||||
}
|
||||
/**
|
||||
* @return the urlMd5
|
||||
*/
|
||||
public String getUrlMd5() {
|
||||
return urlMd5;
|
||||
}
|
||||
/**
|
||||
* @param id the id to set
|
||||
*/
|
||||
public void setId(int id) {
|
||||
this.id = id;
|
||||
}
|
||||
/**
|
||||
* @param title the title to set
|
||||
*/
|
||||
public void setTitle(String title) {
|
||||
this.title = title;
|
||||
}
|
||||
/**
|
||||
* @param author the author to set
|
||||
*/
|
||||
public void setAuthor(String author) {
|
||||
this.author = author;
|
||||
}
|
||||
/**
|
||||
* @param content the content to set
|
||||
*/
|
||||
public void setContent(String content) {
|
||||
this.content = content;
|
||||
}
|
||||
/**
|
||||
* @param tags the tags to set
|
||||
*/
|
||||
public void setTags(String tags) {
|
||||
this.tags = tags;
|
||||
}
|
||||
/**
|
||||
* @param replyNum the replyNum to set
|
||||
*/
|
||||
public void setReplyNum(int replyNum) {
|
||||
this.replyNum = replyNum;
|
||||
}
|
||||
/**
|
||||
* @param viewNum the viewNum to set
|
||||
*/
|
||||
public void setViewNum(int viewNum) {
|
||||
this.viewNum = viewNum;
|
||||
}
|
||||
/**
|
||||
* @param voteNum the voteNum to set
|
||||
*/
|
||||
public void setVoteNum(int voteNum) {
|
||||
this.voteNum = voteNum;
|
||||
}
|
||||
/**
|
||||
* @param collectionNum the collectionNum to set
|
||||
*/
|
||||
public void setCollectionNum(int collectionNum) {
|
||||
this.collectionNum = collectionNum;
|
||||
}
|
||||
/**
|
||||
* @param strAbstract the strAbstract to set
|
||||
*/
|
||||
public void setStrAbstract(String strAbstract) {
|
||||
this.strAbstract = strAbstract;
|
||||
}
|
||||
/**
|
||||
* @param type the type to set
|
||||
*/
|
||||
public void setType(String type) {
|
||||
this.type = type;
|
||||
}
|
||||
/**
|
||||
* @param crawledTime the crawledTime to set
|
||||
*/
|
||||
public void setCrawledTime(String crawledTime) {
|
||||
this.crawledTime = crawledTime;
|
||||
}
|
||||
/**
|
||||
* @param url the url to set
|
||||
*/
|
||||
public void setUrl(String url) {
|
||||
this.url = url;
|
||||
}
|
||||
/**
|
||||
* @param createdTime the createdTime to set
|
||||
*/
|
||||
public void setCreatedTime(String createdTime) {
|
||||
this.createdTime = createdTime;
|
||||
}
|
||||
/**
|
||||
* @param category the category to set
|
||||
*/
|
||||
public void setCategory(String category) {
|
||||
this.category = category;
|
||||
}
|
||||
/**
|
||||
* @param source the source to set
|
||||
*/
|
||||
public void setSource(String source) {
|
||||
this.source = source;
|
||||
}
|
||||
/**
|
||||
* @param authorUrl the authorUrl to set
|
||||
*/
|
||||
public void setAuthorUrl(String authorUrl) {
|
||||
this.authorUrl = authorUrl;
|
||||
}
|
||||
/**
|
||||
* @param lastTime the lastTime to set
|
||||
*/
|
||||
public void setLastTime(String lastTime) {
|
||||
this.lastTime = lastTime;
|
||||
}
|
||||
/**
|
||||
* @param urlMd5 the urlMd5 to set
|
||||
*/
|
||||
public void setUrlMd5(String urlMd5) {
|
||||
this.urlMd5 = urlMd5;
|
||||
}
|
||||
|
||||
|
||||
|
||||
|
||||
}
|
|
@ -0,0 +1,38 @@
|
|||
package cn.edu.zhanyun.gather_posts;
|
||||
|
||||
import junit.framework.Test;
|
||||
import junit.framework.TestCase;
|
||||
import junit.framework.TestSuite;
|
||||
|
||||
/**
|
||||
* Unit test for simple App.
|
||||
*/
|
||||
public class AppTest
|
||||
extends TestCase
|
||||
{
|
||||
/**
|
||||
* Create the test case
|
||||
*
|
||||
* @param testName name of the test case
|
||||
*/
|
||||
public AppTest( String testName )
|
||||
{
|
||||
super( testName );
|
||||
}
|
||||
|
||||
/**
|
||||
* @return the suite of tests being tested
|
||||
*/
|
||||
public static Test suite()
|
||||
{
|
||||
return new TestSuite( AppTest.class );
|
||||
}
|
||||
|
||||
/**
|
||||
* Rigourous Test :-)
|
||||
*/
|
||||
public void testApp()
|
||||
{
|
||||
assertTrue( true );
|
||||
}
|
||||
}
|
|
@ -31,7 +31,7 @@ public class PointerDAO {
|
|||
try {
|
||||
pst.close();
|
||||
} catch (SQLException e) {
|
||||
logger.info("error:" + e);
|
||||
logger.info("sql error! " + e);
|
||||
}
|
||||
}
|
||||
return pointer;
|
||||
|
@ -75,7 +75,7 @@ public class PointerDAO {
|
|||
try {
|
||||
pst.close();
|
||||
} catch (SQLException e) {
|
||||
logger.info("sql error: ", e);
|
||||
logger.info("sql error锛<EFBFBD>", e);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
Loading…
Reference in New Issue