This commit is contained in:
wrzzx 2017-02-16 11:08:00 +08:00
parent e682fcf652
commit 614027dec0
7 changed files with 1135 additions and 348 deletions

View File

@ -2,51 +2,11 @@
<project version="4">
<component name="ChangeListManager">
<list default="true" id="f59140c6-b612-48dd-baf7-0162af9922f0" name="Default" comment="">
<change type="DELETED" beforePath="$PROJECT_DIR$/../../../new_osseanextractor/.idea/libraries/Maven__commons_io_commons_io_1_3_2.xml" afterPath="" />
<change type="MODIFICATION" beforePath="$PROJECT_DIR$/.idea/compiler.xml" afterPath="$PROJECT_DIR$/.idea/compiler.xml" />
<change type="MODIFICATION" beforePath="$PROJECT_DIR$/.idea/workspace.xml" afterPath="$PROJECT_DIR$/.idea/workspace.xml" />
<change type="MODIFICATION" beforePath="$PROJECT_DIR$/fetchnetworks.iml" afterPath="$PROJECT_DIR$/fetchnetworks.iml" />
<change type="MODIFICATION" beforePath="$PROJECT_DIR$/../../../new_osseanextractor/.idea/compiler.xml" afterPath="$PROJECT_DIR$/../../../new_osseanextractor/.idea/compiler.xml" />
<change type="MODIFICATION" beforePath="$PROJECT_DIR$/../../../new_osseanextractor/.idea/libraries/Maven__aopalliance_aopalliance_1_0.xml" afterPath="$PROJECT_DIR$/../../../new_osseanextractor/.idea/libraries/Maven__aopalliance_aopalliance_1_0.xml" />
<change type="MODIFICATION" beforePath="$PROJECT_DIR$/../../../new_osseanextractor/.idea/libraries/Maven__com_alibaba_fastjson_1_1_37.xml" afterPath="$PROJECT_DIR$/../../../new_osseanextractor/.idea/libraries/Maven__com_alibaba_fastjson_1_1_37.xml" />
<change type="MODIFICATION" beforePath="$PROJECT_DIR$/../../../new_osseanextractor/.idea/libraries/Maven__com_google_guava_guava_15_0.xml" afterPath="$PROJECT_DIR$/../../../new_osseanextractor/.idea/libraries/Maven__com_google_guava_guava_15_0.xml" />
<change type="MODIFICATION" beforePath="$PROJECT_DIR$/../../../new_osseanextractor/.idea/libraries/Maven__com_jayway_jsonpath_json_path_0_8_1.xml" afterPath="$PROJECT_DIR$/../../../new_osseanextractor/.idea/libraries/Maven__com_jayway_jsonpath_json_path_0_8_1.xml" />
<change type="MODIFICATION" beforePath="$PROJECT_DIR$/../../../new_osseanextractor/.idea/libraries/Maven__commons_codec_commons_codec_1_6.xml" afterPath="$PROJECT_DIR$/../../../new_osseanextractor/.idea/libraries/Maven__commons_codec_commons_codec_1_6.xml" />
<change type="MODIFICATION" beforePath="$PROJECT_DIR$/../../../new_osseanextractor/.idea/libraries/Maven__commons_collections_commons_collections_3_2_1.xml" afterPath="$PROJECT_DIR$/../../../new_osseanextractor/.idea/libraries/Maven__commons_collections_commons_collections_3_2_1.xml" />
<change type="MODIFICATION" beforePath="$PROJECT_DIR$/../../../new_osseanextractor/.idea/libraries/Maven__commons_dbcp_commons_dbcp_1_4.xml" afterPath="$PROJECT_DIR$/../../../new_osseanextractor/.idea/libraries/Maven__commons_dbcp_commons_dbcp_1_4.xml" />
<change type="MODIFICATION" beforePath="$PROJECT_DIR$/../../../new_osseanextractor/.idea/libraries/Maven__commons_lang_commons_lang_2_6.xml" afterPath="$PROJECT_DIR$/../../../new_osseanextractor/.idea/libraries/Maven__commons_lang_commons_lang_2_6.xml" />
<change type="MODIFICATION" beforePath="$PROJECT_DIR$/../../../new_osseanextractor/.idea/libraries/Maven__commons_logging_commons_logging_1_1_3.xml" afterPath="$PROJECT_DIR$/../../../new_osseanextractor/.idea/libraries/Maven__commons_logging_commons_logging_1_1_3.xml" />
<change type="MODIFICATION" beforePath="$PROJECT_DIR$/../../../new_osseanextractor/.idea/libraries/Maven__commons_pool_commons_pool_1_5_4.xml" afterPath="$PROJECT_DIR$/../../../new_osseanextractor/.idea/libraries/Maven__commons_pool_commons_pool_1_5_4.xml" />
<change type="MODIFICATION" beforePath="$PROJECT_DIR$/../../../new_osseanextractor/.idea/libraries/Maven__junit_junit_dep_4_10.xml" afterPath="$PROJECT_DIR$/../../../new_osseanextractor/.idea/libraries/Maven__junit_junit_dep_4_10.xml" />
<change type="MODIFICATION" beforePath="$PROJECT_DIR$/../../../new_osseanextractor/.idea/libraries/Maven__log4j_log4j_1_2_17.xml" afterPath="$PROJECT_DIR$/../../../new_osseanextractor/.idea/libraries/Maven__log4j_log4j_1_2_17.xml" />
<change type="MODIFICATION" beforePath="$PROJECT_DIR$/../../../new_osseanextractor/.idea/libraries/Maven__mysql_mysql_connector_java_5_1_18.xml" afterPath="$PROJECT_DIR$/../../../new_osseanextractor/.idea/libraries/Maven__mysql_mysql_connector_java_5_1_18.xml" />
<change type="MODIFICATION" beforePath="$PROJECT_DIR$/../../../new_osseanextractor/.idea/libraries/Maven__net_minidev_json_smart_1_1_1.xml" afterPath="$PROJECT_DIR$/../../../new_osseanextractor/.idea/libraries/Maven__net_minidev_json_smart_1_1_1.xml" />
<change type="MODIFICATION" beforePath="$PROJECT_DIR$/../../../new_osseanextractor/.idea/libraries/Maven__org_apache_commons_commons_lang3_3_1.xml" afterPath="$PROJECT_DIR$/../../../new_osseanextractor/.idea/libraries/Maven__org_apache_commons_commons_lang3_3_1.xml" />
<change type="MODIFICATION" beforePath="$PROJECT_DIR$/../../../new_osseanextractor/.idea/libraries/Maven__org_apache_httpcomponents_httpclient_4_3_3.xml" afterPath="$PROJECT_DIR$/../../../new_osseanextractor/.idea/libraries/Maven__org_apache_httpcomponents_httpclient_4_3_3.xml" />
<change type="MODIFICATION" beforePath="$PROJECT_DIR$/../../../new_osseanextractor/.idea/libraries/Maven__org_apache_httpcomponents_httpcore_4_3_2.xml" afterPath="$PROJECT_DIR$/../../../new_osseanextractor/.idea/libraries/Maven__org_apache_httpcomponents_httpcore_4_3_2.xml" />
<change type="MODIFICATION" beforePath="$PROJECT_DIR$/../../../new_osseanextractor/.idea/libraries/Maven__org_assertj_assertj_core_1_5_0.xml" afterPath="$PROJECT_DIR$/../../../new_osseanextractor/.idea/libraries/Maven__org_assertj_assertj_core_1_5_0.xml" />
<change type="MODIFICATION" beforePath="$PROJECT_DIR$/../../../new_osseanextractor/.idea/libraries/Maven__org_hamcrest_hamcrest_core_1_1.xml" afterPath="$PROJECT_DIR$/../../../new_osseanextractor/.idea/libraries/Maven__org_hamcrest_hamcrest_core_1_1.xml" />
<change type="MODIFICATION" beforePath="$PROJECT_DIR$/../../../new_osseanextractor/.idea/libraries/Maven__org_jsoup_jsoup_1_7_2.xml" afterPath="$PROJECT_DIR$/../../../new_osseanextractor/.idea/libraries/Maven__org_jsoup_jsoup_1_7_2.xml" />
<change type="MODIFICATION" beforePath="$PROJECT_DIR$/../../../new_osseanextractor/.idea/libraries/Maven__org_mybatis_mybatis_3_1_1.xml" afterPath="$PROJECT_DIR$/../../../new_osseanextractor/.idea/libraries/Maven__org_mybatis_mybatis_3_1_1.xml" />
<change type="MODIFICATION" beforePath="$PROJECT_DIR$/../../../new_osseanextractor/.idea/libraries/Maven__org_mybatis_mybatis_spring_1_1_1.xml" afterPath="$PROJECT_DIR$/../../../new_osseanextractor/.idea/libraries/Maven__org_mybatis_mybatis_spring_1_1_1.xml" />
<change type="MODIFICATION" beforePath="$PROJECT_DIR$/../../../new_osseanextractor/.idea/libraries/Maven__org_slf4j_slf4j_api_1_7_6.xml" afterPath="$PROJECT_DIR$/../../../new_osseanextractor/.idea/libraries/Maven__org_slf4j_slf4j_api_1_7_6.xml" />
<change type="MODIFICATION" beforePath="$PROJECT_DIR$/../../../new_osseanextractor/.idea/libraries/Maven__org_slf4j_slf4j_log4j12_1_7_6.xml" afterPath="$PROJECT_DIR$/../../../new_osseanextractor/.idea/libraries/Maven__org_slf4j_slf4j_log4j12_1_7_6.xml" />
<change type="MODIFICATION" beforePath="$PROJECT_DIR$/../../../new_osseanextractor/.idea/libraries/Maven__org_springframework_spring_aop_3_1_1_RELEASE.xml" afterPath="$PROJECT_DIR$/../../../new_osseanextractor/.idea/libraries/Maven__org_springframework_spring_aop_3_1_1_RELEASE.xml" />
<change type="MODIFICATION" beforePath="$PROJECT_DIR$/../../../new_osseanextractor/.idea/libraries/Maven__org_springframework_spring_asm_3_1_1_RELEASE.xml" afterPath="$PROJECT_DIR$/../../../new_osseanextractor/.idea/libraries/Maven__org_springframework_spring_asm_3_1_1_RELEASE.xml" />
<change type="MODIFICATION" beforePath="$PROJECT_DIR$/../../../new_osseanextractor/.idea/libraries/Maven__org_springframework_spring_beans_3_1_1_RELEASE.xml" afterPath="$PROJECT_DIR$/../../../new_osseanextractor/.idea/libraries/Maven__org_springframework_spring_beans_3_1_1_RELEASE.xml" />
<change type="MODIFICATION" beforePath="$PROJECT_DIR$/../../../new_osseanextractor/.idea/libraries/Maven__org_springframework_spring_context_3_1_1_RELEASE.xml" afterPath="$PROJECT_DIR$/../../../new_osseanextractor/.idea/libraries/Maven__org_springframework_spring_context_3_1_1_RELEASE.xml" />
<change type="MODIFICATION" beforePath="$PROJECT_DIR$/../../../new_osseanextractor/.idea/libraries/Maven__org_springframework_spring_core_3_1_1_RELEASE.xml" afterPath="$PROJECT_DIR$/../../../new_osseanextractor/.idea/libraries/Maven__org_springframework_spring_core_3_1_1_RELEASE.xml" />
<change type="MODIFICATION" beforePath="$PROJECT_DIR$/../../../new_osseanextractor/.idea/libraries/Maven__org_springframework_spring_expression_3_1_1_RELEASE.xml" afterPath="$PROJECT_DIR$/../../../new_osseanextractor/.idea/libraries/Maven__org_springframework_spring_expression_3_1_1_RELEASE.xml" />
<change type="MODIFICATION" beforePath="$PROJECT_DIR$/../../../new_osseanextractor/.idea/libraries/Maven__org_springframework_spring_jdbc_3_1_1_RELEASE.xml" afterPath="$PROJECT_DIR$/../../../new_osseanextractor/.idea/libraries/Maven__org_springframework_spring_jdbc_3_1_1_RELEASE.xml" />
<change type="MODIFICATION" beforePath="$PROJECT_DIR$/../../../new_osseanextractor/.idea/libraries/Maven__org_springframework_spring_tx_3_1_1_RELEASE.xml" afterPath="$PROJECT_DIR$/../../../new_osseanextractor/.idea/libraries/Maven__org_springframework_spring_tx_3_1_1_RELEASE.xml" />
<change type="MODIFICATION" beforePath="$PROJECT_DIR$/../../../new_osseanextractor/.idea/libraries/Maven__redis_clients_jedis_2_0_0.xml" afterPath="$PROJECT_DIR$/../../../new_osseanextractor/.idea/libraries/Maven__redis_clients_jedis_2_0_0.xml" />
<change type="MODIFICATION" beforePath="$PROJECT_DIR$/../../../new_osseanextractor/.idea/libraries/Maven__us_codecraft_webmagic_core_0_5_2.xml" afterPath="$PROJECT_DIR$/../../../new_osseanextractor/.idea/libraries/Maven__us_codecraft_webmagic_core_0_5_2.xml" />
<change type="MODIFICATION" beforePath="$PROJECT_DIR$/../../../new_osseanextractor/.idea/libraries/Maven__us_codecraft_webmagic_extension_0_5_2.xml" afterPath="$PROJECT_DIR$/../../../new_osseanextractor/.idea/libraries/Maven__us_codecraft_webmagic_extension_0_5_2.xml" />
<change type="MODIFICATION" beforePath="$PROJECT_DIR$/../../../new_osseanextractor/.idea/libraries/Maven__us_codecraft_xsoup_0_2_4.xml" afterPath="$PROJECT_DIR$/../../../new_osseanextractor/.idea/libraries/Maven__us_codecraft_xsoup_0_2_4.xml" />
<change type="MODIFICATION" beforePath="$PROJECT_DIR$/../../../new_osseanextractor/.idea/misc.xml" afterPath="$PROJECT_DIR$/../../../new_osseanextractor/.idea/misc.xml" />
<change type="MODIFICATION" beforePath="$PROJECT_DIR$/../../../new_osseanextractor/.idea/workspace.xml" afterPath="$PROJECT_DIR$/../../../new_osseanextractor/.idea/workspace.xml" />
<change type="MODIFICATION" beforePath="$PROJECT_DIR$/../../../new_osseanextractor/osseanextractor.iml" afterPath="$PROJECT_DIR$/../../../new_osseanextractor/osseanextractor.iml" />
<change type="MODIFICATION" beforePath="$PROJECT_DIR$/../../../new_osseanextractor/resources/spring/applicationContext-myBatis.xml" afterPath="$PROJECT_DIR$/../../../new_osseanextractor/resources/spring/applicationContext-myBatis.xml" />
<change type="MODIFICATION" beforePath="$PROJECT_DIR$/../../../new_osseanextractor/src/main/java/net/trustie/dao/OpenHubProject_Dao.java" afterPath="$PROJECT_DIR$/../../../new_osseanextractor/src/main/java/net/trustie/dao/OpenHubProject_Dao.java" />
<change type="MODIFICATION" beforePath="$PROJECT_DIR$/../../../new_osseanextractor/src/main/java/net/trustie/model/openhub_Model.java" afterPath="$PROJECT_DIR$/../../../new_osseanextractor/src/main/java/net/trustie/model/openhub_Model.java" />
<change type="MODIFICATION" beforePath="$PROJECT_DIR$/../../../new_osseanextractor/src/main/java/net/trustie/one/ExtractThread.java" afterPath="$PROJECT_DIR$/../../../new_osseanextractor/src/main/java/net/trustie/one/ExtractThread.java" />
</list>
<ignored path="$PROJECT_DIR$/target/" />
<option name="EXCLUDED_CONVERTED_TO_IGNORED" value="true" />
@ -86,9 +46,11 @@
<file leaf-file-name="ListHtmlProcessor.java" pinned="false" current-in-tab="true">
<entry file="file://$PROJECT_DIR$/src/main/java/net/trustie/webmagic/one/ListHtmlProcessor.java">
<provider selected="true" editor-type-id="text-editor">
<state relative-caret-position="102">
<caret line="74" column="41" lean-forward="true" selection-start-line="74" selection-start-column="41" selection-end-line="74" selection-end-column="41" />
<folding />
<state relative-caret-position="238">
<caret line="44" column="22" lean-forward="true" selection-start-line="44" selection-start-column="22" selection-end-line="44" selection-end-column="22" />
<folding>
<element signature="e#4827#4835#0" expanded="true" />
</folding>
</state>
</provider>
</entry>
@ -107,6 +69,7 @@
<option name="CHANGED_PATHS">
<list>
<option value="$PROJECT_DIR$/pom.xml" />
<option value="$PROJECT_DIR$/src/main/java/net/trustie/webmagic/one/ListHtmlProcessor.java" />
</list>
</option>
</component>
@ -181,7 +144,7 @@
<foldersAlwaysOnTop value="true" />
</navigator>
<panes>
<pane id="Scope" />
<pane id="PackagesPane" />
<pane id="ProjectPane">
<subPane>
<PATH>
@ -194,119 +157,9 @@
<option name="myItemType" value="com.intellij.ide.projectView.impl.nodes.PsiDirectoryNode" />
</PATH_ELEMENT>
</PATH>
<PATH>
<PATH_ELEMENT>
<option name="myItemId" value="fetchnetworks" />
<option name="myItemType" value="com.intellij.ide.projectView.impl.nodes.ProjectViewProjectNode" />
</PATH_ELEMENT>
<PATH_ELEMENT>
<option name="myItemId" value="fetch_networks" />
<option name="myItemType" value="com.intellij.ide.projectView.impl.nodes.PsiDirectoryNode" />
</PATH_ELEMENT>
<PATH_ELEMENT>
<option name="myItemId" value="src" />
<option name="myItemType" value="com.intellij.ide.projectView.impl.nodes.PsiDirectoryNode" />
</PATH_ELEMENT>
</PATH>
<PATH>
<PATH_ELEMENT>
<option name="myItemId" value="fetchnetworks" />
<option name="myItemType" value="com.intellij.ide.projectView.impl.nodes.ProjectViewProjectNode" />
</PATH_ELEMENT>
<PATH_ELEMENT>
<option name="myItemId" value="fetch_networks" />
<option name="myItemType" value="com.intellij.ide.projectView.impl.nodes.PsiDirectoryNode" />
</PATH_ELEMENT>
<PATH_ELEMENT>
<option name="myItemId" value="src" />
<option name="myItemType" value="com.intellij.ide.projectView.impl.nodes.PsiDirectoryNode" />
</PATH_ELEMENT>
<PATH_ELEMENT>
<option name="myItemId" value="main" />
<option name="myItemType" value="com.intellij.ide.projectView.impl.nodes.PsiDirectoryNode" />
</PATH_ELEMENT>
</PATH>
<PATH>
<PATH_ELEMENT>
<option name="myItemId" value="fetchnetworks" />
<option name="myItemType" value="com.intellij.ide.projectView.impl.nodes.ProjectViewProjectNode" />
</PATH_ELEMENT>
<PATH_ELEMENT>
<option name="myItemId" value="fetch_networks" />
<option name="myItemType" value="com.intellij.ide.projectView.impl.nodes.PsiDirectoryNode" />
</PATH_ELEMENT>
<PATH_ELEMENT>
<option name="myItemId" value="src" />
<option name="myItemType" value="com.intellij.ide.projectView.impl.nodes.PsiDirectoryNode" />
</PATH_ELEMENT>
<PATH_ELEMENT>
<option name="myItemId" value="main" />
<option name="myItemType" value="com.intellij.ide.projectView.impl.nodes.PsiDirectoryNode" />
</PATH_ELEMENT>
<PATH_ELEMENT>
<option name="myItemId" value="java" />
<option name="myItemType" value="com.intellij.ide.projectView.impl.nodes.PsiDirectoryNode" />
</PATH_ELEMENT>
<PATH_ELEMENT>
<option name="myItemId" value="webmagic" />
<option name="myItemType" value="com.intellij.ide.projectView.impl.nodes.PsiDirectoryNode" />
</PATH_ELEMENT>
</PATH>
<PATH>
<PATH_ELEMENT>
<option name="myItemId" value="fetchnetworks" />
<option name="myItemType" value="com.intellij.ide.projectView.impl.nodes.ProjectViewProjectNode" />
</PATH_ELEMENT>
<PATH_ELEMENT>
<option name="myItemId" value="fetch_networks" />
<option name="myItemType" value="com.intellij.ide.projectView.impl.nodes.PsiDirectoryNode" />
</PATH_ELEMENT>
<PATH_ELEMENT>
<option name="myItemId" value="src" />
<option name="myItemType" value="com.intellij.ide.projectView.impl.nodes.PsiDirectoryNode" />
</PATH_ELEMENT>
<PATH_ELEMENT>
<option name="myItemId" value="main" />
<option name="myItemType" value="com.intellij.ide.projectView.impl.nodes.PsiDirectoryNode" />
</PATH_ELEMENT>
<PATH_ELEMENT>
<option name="myItemId" value="java" />
<option name="myItemType" value="com.intellij.ide.projectView.impl.nodes.PsiDirectoryNode" />
</PATH_ELEMENT>
<PATH_ELEMENT>
<option name="myItemId" value="webmagic" />
<option name="myItemType" value="com.intellij.ide.projectView.impl.nodes.PsiDirectoryNode" />
</PATH_ELEMENT>
<PATH_ELEMENT>
<option name="myItemId" value="one" />
<option name="myItemType" value="com.intellij.ide.projectView.impl.nodes.PsiDirectoryNode" />
</PATH_ELEMENT>
</PATH>
<PATH>
<PATH_ELEMENT>
<option name="myItemId" value="fetchnetworks" />
<option name="myItemType" value="com.intellij.ide.projectView.impl.nodes.ProjectViewProjectNode" />
</PATH_ELEMENT>
<PATH_ELEMENT>
<option name="myItemId" value="fetch_networks" />
<option name="myItemType" value="com.intellij.ide.projectView.impl.nodes.PsiDirectoryNode" />
</PATH_ELEMENT>
<PATH_ELEMENT>
<option name="myItemId" value="src" />
<option name="myItemType" value="com.intellij.ide.projectView.impl.nodes.PsiDirectoryNode" />
</PATH_ELEMENT>
<PATH_ELEMENT>
<option name="myItemId" value="main" />
<option name="myItemType" value="com.intellij.ide.projectView.impl.nodes.PsiDirectoryNode" />
</PATH_ELEMENT>
<PATH_ELEMENT>
<option name="myItemId" value="java" />
<option name="myItemType" value="com.intellij.ide.projectView.impl.nodes.PsiDirectoryNode" />
</PATH_ELEMENT>
</PATH>
</subPane>
</pane>
<pane id="PackagesPane" />
<pane id="Scope" />
<pane id="Scratches" />
</panes>
</component>
@ -319,7 +172,7 @@
<property name="js.eslint.eslintPackage" value="" />
<property name="js-jscs-nodeInterpreter" value="C:\Program Files (x86)\nodejs\node.exe" />
</component>
<component name="RunManager" selected="Application.list_openhub">
<component name="RunManager" selected="Application.detail_openhub">
<configuration default="true" type="#org.jetbrains.idea.devkit.run.PluginConfigurationType" factoryName="Plugin">
<module name="" />
<option name="VM_PARAMETERS" value="-Xmx512m -Xms256m -XX:MaxPermSize=250m -ea" />
@ -715,9 +568,9 @@
<envs />
<method />
</configuration>
<configuration default="false" name="list_openhub" type="Application" factoryName="Application">
<configuration default="false" name="detail_openhub" type="Application" factoryName="Application">
<extension name="coverage" enabled="false" merge="false" sample_coverage="true" runner="idea" />
<option name="MAIN_CLASS_NAME" value="net.trustie.webmagic.one.ListHtmlCrawler" />
<option name="MAIN_CLASS_NAME" value="net.trustie.webmagic.one.DetailHtmlCrawler" />
<option name="VM_PARAMETERS" value="" />
<option name="PROGRAM_PARAMETERS" value="openhub" />
<option name="WORKING_DIRECTORY" value="file://$PROJECT_DIR$" />
@ -747,7 +600,7 @@
</configuration>
<list size="3">
<item index="0" class="java.lang.String" itemvalue="Application.list_stackoverflow" />
<item index="1" class="java.lang.String" itemvalue="Application.list_openhub" />
<item index="1" class="java.lang.String" itemvalue="Application.detail_openhub" />
<item index="2" class="java.lang.String" itemvalue="Application.detail_stackoverflow" />
</list>
</component>
@ -770,11 +623,12 @@
<workItem from="1486632270490" duration="2139000" />
<workItem from="1486695485959" duration="431000" />
<workItem from="1486708709471" duration="13000" />
<workItem from="1487174066127" duration="4746000" />
</task>
<servers />
</component>
<component name="TimeTrackingManager">
<option name="totallyTimeSpent" value="8910000" />
<option name="totallyTimeSpent" value="13656000" />
</component>
<component name="TodoView">
<todo-panel id="selected-file">
@ -787,36 +641,36 @@
</component>
<component name="ToolWindowManager">
<frame x="-8" y="-8" width="1382" height="744" extended-state="6" />
<editor active="true" />
<editor active="false" />
<layout>
<window_info id="Palette" active="false" anchor="right" auto_hide="false" internal_type="DOCKED" type="DOCKED" visible="false" show_stripe_button="true" weight="0.33" sideWeight="0.5" order="3" side_tool="false" content_ui="tabs" />
<window_info id="TODO" active="false" anchor="bottom" auto_hide="false" internal_type="DOCKED" type="DOCKED" visible="false" show_stripe_button="true" weight="0.33" sideWeight="0.5" order="5" side_tool="false" content_ui="tabs" />
<window_info id="Nl-Palette" active="false" anchor="left" auto_hide="false" internal_type="DOCKED" type="DOCKED" visible="false" show_stripe_button="true" weight="0.33" sideWeight="0.5" order="2" side_tool="false" content_ui="tabs" />
<window_info id="Messages" active="false" anchor="bottom" auto_hide="false" internal_type="DOCKED" type="DOCKED" visible="false" show_stripe_button="true" weight="0.32761577" sideWeight="0.5" order="6" side_tool="false" content_ui="tabs" />
<window_info id="Palette&#9;" active="false" anchor="right" auto_hide="false" internal_type="DOCKED" type="DOCKED" visible="false" show_stripe_button="true" weight="0.33" sideWeight="0.5" order="3" side_tool="false" content_ui="tabs" />
<window_info id="Image Layers" active="false" anchor="left" auto_hide="false" internal_type="DOCKED" type="DOCKED" visible="false" show_stripe_button="true" weight="0.33" sideWeight="0.5" order="2" side_tool="false" content_ui="tabs" />
<window_info id="Capture Analysis" active="false" anchor="right" auto_hide="false" internal_type="DOCKED" type="DOCKED" visible="false" show_stripe_button="true" weight="0.33" sideWeight="0.5" order="3" side_tool="false" content_ui="tabs" />
<window_info id="Event Log" active="false" anchor="bottom" auto_hide="false" internal_type="DOCKED" type="DOCKED" visible="false" show_stripe_button="true" weight="0.33" sideWeight="0.5" order="7" side_tool="true" content_ui="tabs" />
<window_info id="Maven Projects" active="false" anchor="right" auto_hide="false" internal_type="DOCKED" type="DOCKED" visible="false" show_stripe_button="true" weight="0.33" sideWeight="0.5" order="3" side_tool="false" content_ui="tabs" />
<window_info id="Run" active="false" anchor="bottom" auto_hide="false" internal_type="DOCKED" type="DOCKED" visible="false" show_stripe_button="true" weight="0.41509435" sideWeight="0.5" order="11" side_tool="false" content_ui="tabs" />
<window_info id="Version Control" active="false" anchor="bottom" auto_hide="false" internal_type="DOCKED" type="DOCKED" visible="false" show_stripe_button="true" weight="0.32933104" sideWeight="0.5" order="8" side_tool="false" content_ui="tabs" />
<window_info id="Run" active="false" anchor="bottom" auto_hide="false" internal_type="DOCKED" type="DOCKED" visible="false" show_stripe_button="true" weight="0.24356775" sideWeight="0.5" order="11" side_tool="false" content_ui="tabs" />
<window_info id="Properties" active="false" anchor="right" auto_hide="false" internal_type="DOCKED" type="DOCKED" visible="false" show_stripe_button="true" weight="0.33" sideWeight="0.5" order="3" side_tool="false" content_ui="tabs" />
<window_info id="Spring" active="false" anchor="bottom" auto_hide="false" internal_type="DOCKED" type="DOCKED" visible="false" show_stripe_button="true" weight="0.33" sideWeight="0.5" order="9" side_tool="false" content_ui="tabs" />
<window_info id="Terminal" active="false" anchor="bottom" auto_hide="false" internal_type="DOCKED" type="DOCKED" visible="false" show_stripe_button="true" weight="0.33" sideWeight="0.5" order="10" side_tool="false" content_ui="tabs" />
<window_info id="Capture Tool" active="false" anchor="left" auto_hide="false" internal_type="DOCKED" type="DOCKED" visible="false" show_stripe_button="true" weight="0.33" sideWeight="0.5" order="2" side_tool="false" content_ui="tabs" />
<window_info id="Designer" active="false" anchor="left" auto_hide="false" internal_type="DOCKED" type="DOCKED" visible="false" show_stripe_button="true" weight="0.33" sideWeight="0.5" order="2" side_tool="false" content_ui="tabs" />
<window_info id="Project" active="false" anchor="left" auto_hide="false" internal_type="DOCKED" type="DOCKED" visible="true" show_stripe_button="true" weight="0.22995462" sideWeight="0.5" order="0" side_tool="false" content_ui="combo" />
<window_info id="Project" active="true" anchor="left" auto_hide="false" internal_type="DOCKED" type="DOCKED" visible="true" show_stripe_button="true" weight="0.23373677" sideWeight="0.5" order="0" side_tool="false" content_ui="combo" />
<window_info id="Database" active="false" anchor="right" auto_hide="false" internal_type="DOCKED" type="DOCKED" visible="false" show_stripe_button="true" weight="0.33" sideWeight="0.5" order="3" side_tool="false" content_ui="tabs" />
<window_info id="Structure" active="false" anchor="left" auto_hide="false" internal_type="DOCKED" type="DOCKED" visible="false" show_stripe_button="true" weight="0.25" sideWeight="0.5" order="1" side_tool="false" content_ui="tabs" />
<window_info id="Ant Build" active="false" anchor="right" auto_hide="false" internal_type="DOCKED" type="DOCKED" visible="false" show_stripe_button="true" weight="0.25" sideWeight="0.5" order="1" side_tool="false" content_ui="tabs" />
<window_info id="UI Designer" active="false" anchor="left" auto_hide="false" internal_type="DOCKED" type="DOCKED" visible="false" show_stripe_button="true" weight="0.33" sideWeight="0.5" order="2" side_tool="false" content_ui="tabs" />
<window_info id="Theme Preview" active="false" anchor="right" auto_hide="false" internal_type="DOCKED" type="DOCKED" visible="false" show_stripe_button="true" weight="0.33" sideWeight="0.5" order="3" side_tool="false" content_ui="tabs" />
<window_info id="Debug" active="false" anchor="bottom" auto_hide="false" internal_type="DOCKED" type="DOCKED" visible="false" show_stripe_button="true" weight="0.4" sideWeight="0.5" order="2" side_tool="false" content_ui="tabs" />
<window_info id="Favorites" active="false" anchor="left" auto_hide="false" internal_type="DOCKED" type="DOCKED" visible="false" show_stripe_button="true" weight="0.33" sideWeight="0.5" order="2" side_tool="true" content_ui="tabs" />
<window_info id="Debug" active="false" anchor="bottom" auto_hide="false" internal_type="DOCKED" type="DOCKED" visible="false" show_stripe_button="true" weight="0.4" sideWeight="0.5" order="2" side_tool="false" content_ui="tabs" />
<window_info id="Cvs" active="false" anchor="bottom" auto_hide="false" internal_type="DOCKED" type="DOCKED" visible="false" show_stripe_button="true" weight="0.25" sideWeight="0.5" order="3" side_tool="false" content_ui="tabs" />
<window_info id="Message" active="false" anchor="bottom" auto_hide="false" internal_type="DOCKED" type="DOCKED" visible="false" show_stripe_button="true" weight="0.33" sideWeight="0.5" order="0" side_tool="false" content_ui="tabs" />
<window_info id="Commander" active="false" anchor="right" auto_hide="false" internal_type="DOCKED" type="DOCKED" visible="false" show_stripe_button="true" weight="0.4" sideWeight="0.5" order="0" side_tool="false" content_ui="tabs" />
<window_info id="Hierarchy" active="false" anchor="right" auto_hide="false" internal_type="DOCKED" type="DOCKED" visible="false" show_stripe_button="true" weight="0.25" sideWeight="0.5" order="2" side_tool="false" content_ui="combo" />
<window_info id="Messages" active="false" anchor="bottom" auto_hide="false" internal_type="DOCKED" type="DOCKED" visible="false" show_stripe_button="true" weight="0.32876712" sideWeight="0.5" order="6" side_tool="false" content_ui="tabs" />
<window_info id="Inspection" active="false" anchor="bottom" auto_hide="false" internal_type="DOCKED" type="DOCKED" visible="false" show_stripe_button="true" weight="0.4" sideWeight="0.5" order="4" side_tool="false" content_ui="tabs" />
<window_info id="Find" active="false" anchor="bottom" auto_hide="false" internal_type="DOCKED" type="DOCKED" visible="false" show_stripe_button="true" weight="0.33" sideWeight="0.5" order="1" side_tool="false" content_ui="tabs" />
</layout>
@ -848,10 +702,30 @@
<provider selected="true" editor-type-id="text-editor">
<state relative-caret-position="0">
<caret line="0" column="0" lean-forward="false" selection-start-line="0" selection-start-column="0" selection-end-line="0" selection-end-column="0" />
<folding>
<element signature="e#4827#4835#0" expanded="true" />
</folding>
</state>
</provider>
</entry>
<entry file="file://$PROJECT_DIR$/src/main/java/net/trustie/webmagic/one/ListHtmlCrawler.java">
<provider selected="true" editor-type-id="text-editor">
<state relative-caret-position="442">
<caret line="72" column="0" lean-forward="false" selection-start-line="72" selection-start-column="0" selection-end-line="72" selection-end-column="0" />
<folding />
</state>
</provider>
</entry>
<entry file="file://$PROJECT_DIR$/src/main/java/net/trustie/webmagic/one/ListHtmlProcessor.java">
<provider selected="true" editor-type-id="text-editor">
<state relative-caret-position="0">
<caret line="0" column="0" lean-forward="false" selection-start-line="0" selection-start-column="0" selection-end-line="0" selection-end-column="0" />
<folding>
<element signature="e#4827#4835#0" expanded="true" />
</folding>
</state>
</provider>
</entry>
<entry file="file://$PROJECT_DIR$/src/main/java/net/trustie/webmagic/one/ListHtmlCrawler.java">
<provider selected="true" editor-type-id="text-editor">
<state relative-caret-position="0">
@ -958,9 +832,11 @@
</entry>
<entry file="file://$PROJECT_DIR$/src/main/java/net/trustie/webmagic/one/ListHtmlProcessor.java">
<provider selected="true" editor-type-id="text-editor">
<state relative-caret-position="102">
<caret line="74" column="41" lean-forward="true" selection-start-line="74" selection-start-column="41" selection-end-line="74" selection-end-column="41" />
<folding />
<state relative-caret-position="238">
<caret line="44" column="22" lean-forward="true" selection-start-line="44" selection-start-column="22" selection-end-line="44" selection-end-column="22" />
<folding>
<element signature="e#4827#4835#0" expanded="true" />
</folding>
</state>
</provider>
</entry>

File diff suppressed because it is too large Load Diff

View File

@ -25,7 +25,7 @@ public interface OpenHubProject_Dao {
@Insert("insert into openhub_project_test"
@Insert("insert into openhub_project"
+"(`url_md5`,`source`,`name`,`url`,`user_num`,`description`,`tags`,`organization`,"
+ "`license`,`similar_project`,`manager`,"
+ "`language`,`active_degree`,"

View File

@ -49,12 +49,9 @@ public class openhub_Model implements AfterExtractor, ValidateExtractor {
// "| //*[@id='projects_show_page']/div[2]/div[3]/div[2]/div/*/*/a/regex(\"<a href='(.*?)'>.*Homepage</a>\",1)/@href" //jquery multi links
// +"| //*[@id='projects_show_page']/div[2]/div[4]/div[2]/div/*/*/a/regex(\"<a href='(.*?)'>.*Homepage</a>\",1)/@href "
//)
@ExtractBy(value="//a/regex(\"<a.*href=\"(.*?)\".*Homepage.*</a>\",1) " )
/*+
@ExtractBy(value="//a/regex(\"<a.*href=\"(.*?)\".*Homepage.*</a>\",1) " +
" | //a/regex(\"<a.*href=\"(.*?)\".*Download.*</a>\",1)" +
" | //a/regex(\"<a.*href=\"(.*?)\".*Other.*</a>\",1)")
*/
private List<String> homepages = new ArrayList<String>();
private static String homepage ="";
@ -426,12 +423,9 @@ private List<String> homepages = new ArrayList<String>();
this.rateLevel = this.rateLevel.substring(0, this.rateLevel.indexOf("/"));
if (StringHandler.isAtLeastOneBlank(this.name, this.activity
/*
,this.description
*/
/*
,this.
description
,getHomepage()
*/
/* ,
* this.licenses
*/)) {
@ -1280,7 +1274,7 @@ private List<String> homepages = new ArrayList<String>();
public String getValidHomepage(String homePage){
String result = homePage;
//目前的Homepage抽取规则/开头的如"/p/jQuery"均是多链接的情形
//目前的Homepage抽取规则/开头的如"/p/jQuery"均是多链接的情形
if(!homePage.equals("") && homePage.startsWith("/")){
//获取Homepage列表中的Homepage
result = new ExtractMutilLink4Openhub().extractLinks(homePage);

View File

@ -87,14 +87,19 @@ public class ExtractThread implements Runnable{
for( RawPage page : pages){
try{
long startTime=System.currentTimeMillis(); //获取开始时间
result = extractor.extract(page,pageModel);
long endTime=System.currentTimeMillis(); //获取结束时间
System.out.println("页面抽取时间: "+(endTime-startTime)+"ms");
startTime=System.currentTimeMillis(); //获取开始时间
//持久化 更新抽取历史
saveResult(site,result);
endTime=System.currentTimeMillis(); //获取结束时间
System.out.println("结果保存时间: "+(endTime-startTime)+"ms");
}catch (Exception e){
e.printStackTrace();
pageErrorOutPut.returnErrorPage(page, e);
// pageErrorOutPut.returnErrorPage(page, e);错误页面
}
}
updateLastId(site,lastId + pages.size());

View File

@ -0,0 +1,61 @@
package net.trustie.core;
import net.trustie.utils.ExtractMutilLink4Openhub;
import us.codecraft.webmagic.Page;
import us.codecraft.webmagic.Site;
import us.codecraft.webmagic.Spider;
import us.codecraft.webmagic.model.OOSpider;
import us.codecraft.webmagic.model.annotation.ExtractBy;
import us.codecraft.webmagic.processor.PageProcessor;
import us.codecraft.webmagic.selector.Selectable;
import java.util.regex.Matcher;
import java.util.List;
public class GithubRepoPageProcessor implements PageProcessor {
private Site site = Site.me().setRetryTimes(3).setSleepTime(1000);
private List<String> homepage;
@Override
public void process(Page page) {
page.putField("links",page.getHtml().xpath("//div[@class='links center-block']/div[1]/div[2]/div/h5/a/@href"));
homepage = page.getHtml().xpath("//div[@class='links center-block']/div[1]/div[2]/div/h5/a").links().all();
}
@Override
public Site getSite() {
return site;
}
// public void setHomepage(String homepage){
// homepage = homepage;
// }
public List<String> getHomepage(){
return this.homepage;
}
public static String extractHomepage(String url){
String homePage = "";
GithubRepoPageProcessor githubRepoPageProcessor = new GithubRepoPageProcessor();
Spider.create(githubRepoPageProcessor).addUrl("https://www.openhub.net/p/zimbra/links").thread(5).run();
if(!githubRepoPageProcessor.getHomepage().isEmpty()){
for(String one : githubRepoPageProcessor.getHomepage()){
homePage += one+";";
}
homePage = homePage.substring(0,homePage.lastIndexOf(";"));
}
return homePage;
}
public static void main(String[] args) {
GithubRepoPageProcessor githubRepoPageProcessor = new GithubRepoPageProcessor();
String url = "/p/jQuery/links";
Spider.create(githubRepoPageProcessor).addUrl(new String("https://www.openhub.net"+url)).thread(5).run();
System.out.print(githubRepoPageProcessor.getHomepage().get(0));
// String hompage = "/p/bac";
// System.out.println(hompage.startsWith("/"));
String result = new ExtractMutilLink4Openhub().extractLink("/p/jQuery/links");
System.out.println(result);
}
}

View File

@ -0,0 +1,19 @@
package net.trustie.core;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
/**
* Created by LouAnt on 2017/2/7 0007
* Description
*/
public class URLCheck {
public static void main(String[] args){
String check ="^([hH][tT]{2}[pP]://|[hH][tT]{2}[pP][sS]://)(([a-zA-Z0-9\\._-]+\\.[a-zA-Z]{2,6})|([0-9]{1,3}\\.[0-9]{1,3}\\.[0-9]{1,3}\\.[0-9]{1,3}))(:[0-9]{1,4})*(/[a-zA-Z0-9\\&%_\\./-~-]*)?";
Pattern p = Pattern.compile(check,Pattern.CASE_INSENSITIVE);
String url = "http://www.leniel.net/2013/04/manage-folders-and-files-in-your-aspnet-mvc-project-with-elfinder-net-file-manager.html";
Matcher m = p.matcher(url);
boolean res = m.matches();
System.out.println(res);
}
}