init
This commit is contained in:
commit
78b627930f
|
@ -0,0 +1,33 @@
|
|||
<?xml version="1.0" encoding="UTF-8"?>
|
||||
<classpath>
|
||||
<classpathentry excluding="net/trustie/modeltest/SlashdotNewsTest.java" kind="src" output="target/classes" path="src/main/java">
|
||||
<attributes>
|
||||
<attribute name="optional" value="true"/>
|
||||
<attribute name="maven.pomderived" value="true"/>
|
||||
</attributes>
|
||||
</classpathentry>
|
||||
<classpathentry kind="src" output="target/test-classes" path="src/test/java">
|
||||
<attributes>
|
||||
<attribute name="optional" value="true"/>
|
||||
<attribute name="maven.pomderived" value="true"/>
|
||||
</attributes>
|
||||
</classpathentry>
|
||||
<classpathentry kind="src" path="src/main/assembly"/>
|
||||
<classpathentry kind="src" path="resources"/>
|
||||
<classpathentry exported="true" kind="con" path="org.eclipse.m2e.MAVEN2_CLASSPATH_CONTAINER">
|
||||
<attributes>
|
||||
<attribute name="maven.pomderived" value="true"/>
|
||||
</attributes>
|
||||
</classpathentry>
|
||||
<classpathentry exported="true" kind="con" path="org.eclipse.jdt.launching.JRE_CONTAINER/org.eclipse.jdt.internal.debug.ui.launcher.StandardVMType/JavaSE-1.7">
|
||||
<attributes>
|
||||
<attribute name="maven.pomderived" value="true"/>
|
||||
</attributes>
|
||||
</classpathentry>
|
||||
<classpathentry excluding="**" kind="src" output="target/classes" path="src/main/resources">
|
||||
<attributes>
|
||||
<attribute name="maven.pomderived" value="true"/>
|
||||
</attributes>
|
||||
</classpathentry>
|
||||
<classpathentry kind="output" path="target/classes"/>
|
||||
</classpath>
|
|
@ -0,0 +1,8 @@
|
|||
/target
|
||||
/.settings
|
||||
/log/*
|
||||
/error
|
||||
/src/main/resources
|
||||
/cursor
|
||||
/src/test/java/net/trustie/modeltest
|
||||
.DS_Store
|
|
@ -0,0 +1 @@
|
|||
osseanextractor
|
|
@ -0,0 +1,32 @@
|
|||
<?xml version="1.0" encoding="UTF-8"?>
|
||||
<project version="4">
|
||||
<component name="CompilerConfiguration">
|
||||
<resourceExtensions />
|
||||
<wildcardResourcePatterns>
|
||||
<entry name="!?*.java" />
|
||||
<entry name="!?*.form" />
|
||||
<entry name="!?*.class" />
|
||||
<entry name="!?*.groovy" />
|
||||
<entry name="!?*.scala" />
|
||||
<entry name="!?*.flex" />
|
||||
<entry name="!?*.kt" />
|
||||
<entry name="!?*.clj" />
|
||||
<entry name="!?*.aj" />
|
||||
</wildcardResourcePatterns>
|
||||
<annotationProcessing>
|
||||
<profile default="true" name="Default" enabled="false">
|
||||
<processorPath useClasspath="true" />
|
||||
</profile>
|
||||
<profile default="false" name="Annotation profile for osseanextractor" enabled="true">
|
||||
<sourceOutputDir name="target\generated-sources\annotations" />
|
||||
<sourceTestOutputDir name="target\generated-test-sources\test-annotations" />
|
||||
<outputRelativeToContentRoot value="true" />
|
||||
<processorPath useClasspath="true" />
|
||||
<module name="osseanextractor" />
|
||||
</profile>
|
||||
</annotationProcessing>
|
||||
<bytecodeTargetLevel>
|
||||
<module name="osseanextractor" target="1.7" />
|
||||
</bytecodeTargetLevel>
|
||||
</component>
|
||||
</project>
|
|
@ -0,0 +1,3 @@
|
|||
<component name="CopyrightManager">
|
||||
<settings default="" />
|
||||
</component>
|
|
@ -0,0 +1,6 @@
|
|||
<?xml version="1.0" encoding="UTF-8"?>
|
||||
<project version="4">
|
||||
<component name="Encoding">
|
||||
<file url="file://$PROJECT_DIR$" charset="UTF-8" />
|
||||
</component>
|
||||
</project>
|
|
@ -0,0 +1,13 @@
|
|||
<component name="libraryTable">
|
||||
<library name="Maven: aopalliance:aopalliance:1.0">
|
||||
<CLASSES>
|
||||
<root url="jar://$USER_HOME$/.m2/repository/aopalliance/aopalliance/1.0/aopalliance-1.0.jar!/" />
|
||||
</CLASSES>
|
||||
<JAVADOC>
|
||||
<root url="jar://$USER_HOME$/.m2/repository/aopalliance/aopalliance/1.0/aopalliance-1.0-javadoc.jar!/" />
|
||||
</JAVADOC>
|
||||
<SOURCES>
|
||||
<root url="jar://$USER_HOME$/.m2/repository/aopalliance/aopalliance/1.0/aopalliance-1.0-sources.jar!/" />
|
||||
</SOURCES>
|
||||
</library>
|
||||
</component>
|
|
@ -0,0 +1,13 @@
|
|||
<component name="libraryTable">
|
||||
<library name="Maven: com.alibaba:fastjson:1.1.37">
|
||||
<CLASSES>
|
||||
<root url="jar://$USER_HOME$/.m2/repository/com/alibaba/fastjson/1.1.37/fastjson-1.1.37.jar!/" />
|
||||
</CLASSES>
|
||||
<JAVADOC>
|
||||
<root url="jar://$USER_HOME$/.m2/repository/com/alibaba/fastjson/1.1.37/fastjson-1.1.37-javadoc.jar!/" />
|
||||
</JAVADOC>
|
||||
<SOURCES>
|
||||
<root url="jar://$USER_HOME$/.m2/repository/com/alibaba/fastjson/1.1.37/fastjson-1.1.37-sources.jar!/" />
|
||||
</SOURCES>
|
||||
</library>
|
||||
</component>
|
|
@ -0,0 +1,13 @@
|
|||
<component name="libraryTable">
|
||||
<library name="Maven: com.google.guava:guava:15.0">
|
||||
<CLASSES>
|
||||
<root url="jar://$USER_HOME$/.m2/repository/com/google/guava/guava/15.0/guava-15.0.jar!/" />
|
||||
</CLASSES>
|
||||
<JAVADOC>
|
||||
<root url="jar://$USER_HOME$/.m2/repository/com/google/guava/guava/15.0/guava-15.0-javadoc.jar!/" />
|
||||
</JAVADOC>
|
||||
<SOURCES>
|
||||
<root url="jar://$USER_HOME$/.m2/repository/com/google/guava/guava/15.0/guava-15.0-sources.jar!/" />
|
||||
</SOURCES>
|
||||
</library>
|
||||
</component>
|
|
@ -0,0 +1,13 @@
|
|||
<component name="libraryTable">
|
||||
<library name="Maven: com.jayway.jsonpath:json-path:0.8.1">
|
||||
<CLASSES>
|
||||
<root url="jar://$USER_HOME$/.m2/repository/com/jayway/jsonpath/json-path/0.8.1/json-path-0.8.1.jar!/" />
|
||||
</CLASSES>
|
||||
<JAVADOC>
|
||||
<root url="jar://$USER_HOME$/.m2/repository/com/jayway/jsonpath/json-path/0.8.1/json-path-0.8.1-javadoc.jar!/" />
|
||||
</JAVADOC>
|
||||
<SOURCES>
|
||||
<root url="jar://$USER_HOME$/.m2/repository/com/jayway/jsonpath/json-path/0.8.1/json-path-0.8.1-sources.jar!/" />
|
||||
</SOURCES>
|
||||
</library>
|
||||
</component>
|
|
@ -0,0 +1,13 @@
|
|||
<component name="libraryTable">
|
||||
<library name="Maven: commons-codec:commons-codec:1.6">
|
||||
<CLASSES>
|
||||
<root url="jar://$USER_HOME$/.m2/repository/commons-codec/commons-codec/1.6/commons-codec-1.6.jar!/" />
|
||||
</CLASSES>
|
||||
<JAVADOC>
|
||||
<root url="jar://$USER_HOME$/.m2/repository/commons-codec/commons-codec/1.6/commons-codec-1.6-javadoc.jar!/" />
|
||||
</JAVADOC>
|
||||
<SOURCES>
|
||||
<root url="jar://$USER_HOME$/.m2/repository/commons-codec/commons-codec/1.6/commons-codec-1.6-sources.jar!/" />
|
||||
</SOURCES>
|
||||
</library>
|
||||
</component>
|
|
@ -0,0 +1,13 @@
|
|||
<component name="libraryTable">
|
||||
<library name="Maven: commons-collections:commons-collections:3.2.1">
|
||||
<CLASSES>
|
||||
<root url="jar://$USER_HOME$/.m2/repository/commons-collections/commons-collections/3.2.1/commons-collections-3.2.1.jar!/" />
|
||||
</CLASSES>
|
||||
<JAVADOC>
|
||||
<root url="jar://$USER_HOME$/.m2/repository/commons-collections/commons-collections/3.2.1/commons-collections-3.2.1-javadoc.jar!/" />
|
||||
</JAVADOC>
|
||||
<SOURCES>
|
||||
<root url="jar://$USER_HOME$/.m2/repository/commons-collections/commons-collections/3.2.1/commons-collections-3.2.1-sources.jar!/" />
|
||||
</SOURCES>
|
||||
</library>
|
||||
</component>
|
|
@ -0,0 +1,13 @@
|
|||
<component name="libraryTable">
|
||||
<library name="Maven: commons-dbcp:commons-dbcp:1.4">
|
||||
<CLASSES>
|
||||
<root url="jar://$USER_HOME$/.m2/repository/commons-dbcp/commons-dbcp/1.4/commons-dbcp-1.4.jar!/" />
|
||||
</CLASSES>
|
||||
<JAVADOC>
|
||||
<root url="jar://$USER_HOME$/.m2/repository/commons-dbcp/commons-dbcp/1.4/commons-dbcp-1.4-javadoc.jar!/" />
|
||||
</JAVADOC>
|
||||
<SOURCES>
|
||||
<root url="jar://$USER_HOME$/.m2/repository/commons-dbcp/commons-dbcp/1.4/commons-dbcp-1.4-sources.jar!/" />
|
||||
</SOURCES>
|
||||
</library>
|
||||
</component>
|
|
@ -0,0 +1,13 @@
|
|||
<component name="libraryTable">
|
||||
<library name="Maven: commons-io:commons-io:1.3.2">
|
||||
<CLASSES>
|
||||
<root url="jar://$USER_HOME$/.m2/repository/commons-io/commons-io/1.3.2/commons-io-1.3.2.jar!/" />
|
||||
</CLASSES>
|
||||
<JAVADOC>
|
||||
<root url="jar://$USER_HOME$/.m2/repository/commons-io/commons-io/1.3.2/commons-io-1.3.2-javadoc.jar!/" />
|
||||
</JAVADOC>
|
||||
<SOURCES>
|
||||
<root url="jar://$USER_HOME$/.m2/repository/commons-io/commons-io/1.3.2/commons-io-1.3.2-sources.jar!/" />
|
||||
</SOURCES>
|
||||
</library>
|
||||
</component>
|
|
@ -0,0 +1,13 @@
|
|||
<component name="libraryTable">
|
||||
<library name="Maven: commons-lang:commons-lang:2.6">
|
||||
<CLASSES>
|
||||
<root url="jar://$USER_HOME$/.m2/repository/commons-lang/commons-lang/2.6/commons-lang-2.6.jar!/" />
|
||||
</CLASSES>
|
||||
<JAVADOC>
|
||||
<root url="jar://$USER_HOME$/.m2/repository/commons-lang/commons-lang/2.6/commons-lang-2.6-javadoc.jar!/" />
|
||||
</JAVADOC>
|
||||
<SOURCES>
|
||||
<root url="jar://$USER_HOME$/.m2/repository/commons-lang/commons-lang/2.6/commons-lang-2.6-sources.jar!/" />
|
||||
</SOURCES>
|
||||
</library>
|
||||
</component>
|
|
@ -0,0 +1,13 @@
|
|||
<component name="libraryTable">
|
||||
<library name="Maven: commons-logging:commons-logging:1.1.3">
|
||||
<CLASSES>
|
||||
<root url="jar://$USER_HOME$/.m2/repository/commons-logging/commons-logging/1.1.3/commons-logging-1.1.3.jar!/" />
|
||||
</CLASSES>
|
||||
<JAVADOC>
|
||||
<root url="jar://$USER_HOME$/.m2/repository/commons-logging/commons-logging/1.1.3/commons-logging-1.1.3-javadoc.jar!/" />
|
||||
</JAVADOC>
|
||||
<SOURCES>
|
||||
<root url="jar://$USER_HOME$/.m2/repository/commons-logging/commons-logging/1.1.3/commons-logging-1.1.3-sources.jar!/" />
|
||||
</SOURCES>
|
||||
</library>
|
||||
</component>
|
|
@ -0,0 +1,13 @@
|
|||
<component name="libraryTable">
|
||||
<library name="Maven: commons-pool:commons-pool:1.5.4">
|
||||
<CLASSES>
|
||||
<root url="jar://$USER_HOME$/.m2/repository/commons-pool/commons-pool/1.5.4/commons-pool-1.5.4.jar!/" />
|
||||
</CLASSES>
|
||||
<JAVADOC>
|
||||
<root url="jar://$USER_HOME$/.m2/repository/commons-pool/commons-pool/1.5.4/commons-pool-1.5.4-javadoc.jar!/" />
|
||||
</JAVADOC>
|
||||
<SOURCES>
|
||||
<root url="jar://$USER_HOME$/.m2/repository/commons-pool/commons-pool/1.5.4/commons-pool-1.5.4-sources.jar!/" />
|
||||
</SOURCES>
|
||||
</library>
|
||||
</component>
|
|
@ -0,0 +1,13 @@
|
|||
<component name="libraryTable">
|
||||
<library name="Maven: junit:junit-dep:4.10">
|
||||
<CLASSES>
|
||||
<root url="jar://$USER_HOME$/.m2/repository/junit/junit-dep/4.10/junit-dep-4.10.jar!/" />
|
||||
</CLASSES>
|
||||
<JAVADOC>
|
||||
<root url="jar://$USER_HOME$/.m2/repository/junit/junit-dep/4.10/junit-dep-4.10-javadoc.jar!/" />
|
||||
</JAVADOC>
|
||||
<SOURCES>
|
||||
<root url="jar://$USER_HOME$/.m2/repository/junit/junit-dep/4.10/junit-dep-4.10-sources.jar!/" />
|
||||
</SOURCES>
|
||||
</library>
|
||||
</component>
|
|
@ -0,0 +1,13 @@
|
|||
<component name="libraryTable">
|
||||
<library name="Maven: log4j:log4j:1.2.17">
|
||||
<CLASSES>
|
||||
<root url="jar://$USER_HOME$/.m2/repository/log4j/log4j/1.2.17/log4j-1.2.17.jar!/" />
|
||||
</CLASSES>
|
||||
<JAVADOC>
|
||||
<root url="jar://$USER_HOME$/.m2/repository/log4j/log4j/1.2.17/log4j-1.2.17-javadoc.jar!/" />
|
||||
</JAVADOC>
|
||||
<SOURCES>
|
||||
<root url="jar://$USER_HOME$/.m2/repository/log4j/log4j/1.2.17/log4j-1.2.17-sources.jar!/" />
|
||||
</SOURCES>
|
||||
</library>
|
||||
</component>
|
|
@ -0,0 +1,13 @@
|
|||
<component name="libraryTable">
|
||||
<library name="Maven: mysql:mysql-connector-java:5.1.18">
|
||||
<CLASSES>
|
||||
<root url="jar://$USER_HOME$/.m2/repository/mysql/mysql-connector-java/5.1.18/mysql-connector-java-5.1.18.jar!/" />
|
||||
</CLASSES>
|
||||
<JAVADOC>
|
||||
<root url="jar://$USER_HOME$/.m2/repository/mysql/mysql-connector-java/5.1.18/mysql-connector-java-5.1.18-javadoc.jar!/" />
|
||||
</JAVADOC>
|
||||
<SOURCES>
|
||||
<root url="jar://$USER_HOME$/.m2/repository/mysql/mysql-connector-java/5.1.18/mysql-connector-java-5.1.18-sources.jar!/" />
|
||||
</SOURCES>
|
||||
</library>
|
||||
</component>
|
|
@ -0,0 +1,13 @@
|
|||
<component name="libraryTable">
|
||||
<library name="Maven: net.minidev:json-smart:1.1.1">
|
||||
<CLASSES>
|
||||
<root url="jar://$USER_HOME$/.m2/repository/net/minidev/json-smart/1.1.1/json-smart-1.1.1.jar!/" />
|
||||
</CLASSES>
|
||||
<JAVADOC>
|
||||
<root url="jar://$USER_HOME$/.m2/repository/net/minidev/json-smart/1.1.1/json-smart-1.1.1-javadoc.jar!/" />
|
||||
</JAVADOC>
|
||||
<SOURCES>
|
||||
<root url="jar://$USER_HOME$/.m2/repository/net/minidev/json-smart/1.1.1/json-smart-1.1.1-sources.jar!/" />
|
||||
</SOURCES>
|
||||
</library>
|
||||
</component>
|
|
@ -0,0 +1,13 @@
|
|||
<component name="libraryTable">
|
||||
<library name="Maven: org.apache.commons:commons-lang3:3.1">
|
||||
<CLASSES>
|
||||
<root url="jar://$USER_HOME$/.m2/repository/org/apache/commons/commons-lang3/3.1/commons-lang3-3.1.jar!/" />
|
||||
</CLASSES>
|
||||
<JAVADOC>
|
||||
<root url="jar://$USER_HOME$/.m2/repository/org/apache/commons/commons-lang3/3.1/commons-lang3-3.1-javadoc.jar!/" />
|
||||
</JAVADOC>
|
||||
<SOURCES>
|
||||
<root url="jar://$USER_HOME$/.m2/repository/org/apache/commons/commons-lang3/3.1/commons-lang3-3.1-sources.jar!/" />
|
||||
</SOURCES>
|
||||
</library>
|
||||
</component>
|
|
@ -0,0 +1,13 @@
|
|||
<component name="libraryTable">
|
||||
<library name="Maven: org.apache.httpcomponents:httpclient:4.3.3">
|
||||
<CLASSES>
|
||||
<root url="jar://$USER_HOME$/.m2/repository/org/apache/httpcomponents/httpclient/4.3.3/httpclient-4.3.3.jar!/" />
|
||||
</CLASSES>
|
||||
<JAVADOC>
|
||||
<root url="jar://$USER_HOME$/.m2/repository/org/apache/httpcomponents/httpclient/4.3.3/httpclient-4.3.3-javadoc.jar!/" />
|
||||
</JAVADOC>
|
||||
<SOURCES>
|
||||
<root url="jar://$USER_HOME$/.m2/repository/org/apache/httpcomponents/httpclient/4.3.3/httpclient-4.3.3-sources.jar!/" />
|
||||
</SOURCES>
|
||||
</library>
|
||||
</component>
|
|
@ -0,0 +1,13 @@
|
|||
<component name="libraryTable">
|
||||
<library name="Maven: org.apache.httpcomponents:httpcore:4.3.2">
|
||||
<CLASSES>
|
||||
<root url="jar://$USER_HOME$/.m2/repository/org/apache/httpcomponents/httpcore/4.3.2/httpcore-4.3.2.jar!/" />
|
||||
</CLASSES>
|
||||
<JAVADOC>
|
||||
<root url="jar://$USER_HOME$/.m2/repository/org/apache/httpcomponents/httpcore/4.3.2/httpcore-4.3.2-javadoc.jar!/" />
|
||||
</JAVADOC>
|
||||
<SOURCES>
|
||||
<root url="jar://$USER_HOME$/.m2/repository/org/apache/httpcomponents/httpcore/4.3.2/httpcore-4.3.2-sources.jar!/" />
|
||||
</SOURCES>
|
||||
</library>
|
||||
</component>
|
|
@ -0,0 +1,13 @@
|
|||
<component name="libraryTable">
|
||||
<library name="Maven: org.assertj:assertj-core:1.5.0">
|
||||
<CLASSES>
|
||||
<root url="jar://$USER_HOME$/.m2/repository/org/assertj/assertj-core/1.5.0/assertj-core-1.5.0.jar!/" />
|
||||
</CLASSES>
|
||||
<JAVADOC>
|
||||
<root url="jar://$USER_HOME$/.m2/repository/org/assertj/assertj-core/1.5.0/assertj-core-1.5.0-javadoc.jar!/" />
|
||||
</JAVADOC>
|
||||
<SOURCES>
|
||||
<root url="jar://$USER_HOME$/.m2/repository/org/assertj/assertj-core/1.5.0/assertj-core-1.5.0-sources.jar!/" />
|
||||
</SOURCES>
|
||||
</library>
|
||||
</component>
|
|
@ -0,0 +1,13 @@
|
|||
<component name="libraryTable">
|
||||
<library name="Maven: org.hamcrest:hamcrest-core:1.1">
|
||||
<CLASSES>
|
||||
<root url="jar://$USER_HOME$/.m2/repository/org/hamcrest/hamcrest-core/1.1/hamcrest-core-1.1.jar!/" />
|
||||
</CLASSES>
|
||||
<JAVADOC>
|
||||
<root url="jar://$USER_HOME$/.m2/repository/org/hamcrest/hamcrest-core/1.1/hamcrest-core-1.1-javadoc.jar!/" />
|
||||
</JAVADOC>
|
||||
<SOURCES>
|
||||
<root url="jar://$USER_HOME$/.m2/repository/org/hamcrest/hamcrest-core/1.1/hamcrest-core-1.1-sources.jar!/" />
|
||||
</SOURCES>
|
||||
</library>
|
||||
</component>
|
|
@ -0,0 +1,13 @@
|
|||
<component name="libraryTable">
|
||||
<library name="Maven: org.jsoup:jsoup:1.7.2">
|
||||
<CLASSES>
|
||||
<root url="jar://$USER_HOME$/.m2/repository/org/jsoup/jsoup/1.7.2/jsoup-1.7.2.jar!/" />
|
||||
</CLASSES>
|
||||
<JAVADOC>
|
||||
<root url="jar://$USER_HOME$/.m2/repository/org/jsoup/jsoup/1.7.2/jsoup-1.7.2-javadoc.jar!/" />
|
||||
</JAVADOC>
|
||||
<SOURCES>
|
||||
<root url="jar://$USER_HOME$/.m2/repository/org/jsoup/jsoup/1.7.2/jsoup-1.7.2-sources.jar!/" />
|
||||
</SOURCES>
|
||||
</library>
|
||||
</component>
|
|
@ -0,0 +1,13 @@
|
|||
<component name="libraryTable">
|
||||
<library name="Maven: org.mybatis:mybatis:3.1.1">
|
||||
<CLASSES>
|
||||
<root url="jar://$USER_HOME$/.m2/repository/org/mybatis/mybatis/3.1.1/mybatis-3.1.1.jar!/" />
|
||||
</CLASSES>
|
||||
<JAVADOC>
|
||||
<root url="jar://$USER_HOME$/.m2/repository/org/mybatis/mybatis/3.1.1/mybatis-3.1.1-javadoc.jar!/" />
|
||||
</JAVADOC>
|
||||
<SOURCES>
|
||||
<root url="jar://$USER_HOME$/.m2/repository/org/mybatis/mybatis/3.1.1/mybatis-3.1.1-sources.jar!/" />
|
||||
</SOURCES>
|
||||
</library>
|
||||
</component>
|
|
@ -0,0 +1,13 @@
|
|||
<component name="libraryTable">
|
||||
<library name="Maven: org.mybatis:mybatis-spring:1.1.1">
|
||||
<CLASSES>
|
||||
<root url="jar://$USER_HOME$/.m2/repository/org/mybatis/mybatis-spring/1.1.1/mybatis-spring-1.1.1.jar!/" />
|
||||
</CLASSES>
|
||||
<JAVADOC>
|
||||
<root url="jar://$USER_HOME$/.m2/repository/org/mybatis/mybatis-spring/1.1.1/mybatis-spring-1.1.1-javadoc.jar!/" />
|
||||
</JAVADOC>
|
||||
<SOURCES>
|
||||
<root url="jar://$USER_HOME$/.m2/repository/org/mybatis/mybatis-spring/1.1.1/mybatis-spring-1.1.1-sources.jar!/" />
|
||||
</SOURCES>
|
||||
</library>
|
||||
</component>
|
|
@ -0,0 +1,13 @@
|
|||
<component name="libraryTable">
|
||||
<library name="Maven: org.slf4j:slf4j-api:1.7.6">
|
||||
<CLASSES>
|
||||
<root url="jar://$USER_HOME$/.m2/repository/org/slf4j/slf4j-api/1.7.6/slf4j-api-1.7.6.jar!/" />
|
||||
</CLASSES>
|
||||
<JAVADOC>
|
||||
<root url="jar://$USER_HOME$/.m2/repository/org/slf4j/slf4j-api/1.7.6/slf4j-api-1.7.6-javadoc.jar!/" />
|
||||
</JAVADOC>
|
||||
<SOURCES>
|
||||
<root url="jar://$USER_HOME$/.m2/repository/org/slf4j/slf4j-api/1.7.6/slf4j-api-1.7.6-sources.jar!/" />
|
||||
</SOURCES>
|
||||
</library>
|
||||
</component>
|
|
@ -0,0 +1,13 @@
|
|||
<component name="libraryTable">
|
||||
<library name="Maven: org.slf4j:slf4j-log4j12:1.7.6">
|
||||
<CLASSES>
|
||||
<root url="jar://$USER_HOME$/.m2/repository/org/slf4j/slf4j-log4j12/1.7.6/slf4j-log4j12-1.7.6.jar!/" />
|
||||
</CLASSES>
|
||||
<JAVADOC>
|
||||
<root url="jar://$USER_HOME$/.m2/repository/org/slf4j/slf4j-log4j12/1.7.6/slf4j-log4j12-1.7.6-javadoc.jar!/" />
|
||||
</JAVADOC>
|
||||
<SOURCES>
|
||||
<root url="jar://$USER_HOME$/.m2/repository/org/slf4j/slf4j-log4j12/1.7.6/slf4j-log4j12-1.7.6-sources.jar!/" />
|
||||
</SOURCES>
|
||||
</library>
|
||||
</component>
|
|
@ -0,0 +1,13 @@
|
|||
<component name="libraryTable">
|
||||
<library name="Maven: org.springframework:spring-aop:3.1.1.RELEASE">
|
||||
<CLASSES>
|
||||
<root url="jar://$USER_HOME$/.m2/repository/org/springframework/spring-aop/3.1.1.RELEASE/spring-aop-3.1.1.RELEASE.jar!/" />
|
||||
</CLASSES>
|
||||
<JAVADOC>
|
||||
<root url="jar://$USER_HOME$/.m2/repository/org/springframework/spring-aop/3.1.1.RELEASE/spring-aop-3.1.1.RELEASE-javadoc.jar!/" />
|
||||
</JAVADOC>
|
||||
<SOURCES>
|
||||
<root url="jar://$USER_HOME$/.m2/repository/org/springframework/spring-aop/3.1.1.RELEASE/spring-aop-3.1.1.RELEASE-sources.jar!/" />
|
||||
</SOURCES>
|
||||
</library>
|
||||
</component>
|
|
@ -0,0 +1,13 @@
|
|||
<component name="libraryTable">
|
||||
<library name="Maven: org.springframework:spring-asm:3.1.1.RELEASE">
|
||||
<CLASSES>
|
||||
<root url="jar://$USER_HOME$/.m2/repository/org/springframework/spring-asm/3.1.1.RELEASE/spring-asm-3.1.1.RELEASE.jar!/" />
|
||||
</CLASSES>
|
||||
<JAVADOC>
|
||||
<root url="jar://$USER_HOME$/.m2/repository/org/springframework/spring-asm/3.1.1.RELEASE/spring-asm-3.1.1.RELEASE-javadoc.jar!/" />
|
||||
</JAVADOC>
|
||||
<SOURCES>
|
||||
<root url="jar://$USER_HOME$/.m2/repository/org/springframework/spring-asm/3.1.1.RELEASE/spring-asm-3.1.1.RELEASE-sources.jar!/" />
|
||||
</SOURCES>
|
||||
</library>
|
||||
</component>
|
|
@ -0,0 +1,13 @@
|
|||
<component name="libraryTable">
|
||||
<library name="Maven: org.springframework:spring-beans:3.1.1.RELEASE">
|
||||
<CLASSES>
|
||||
<root url="jar://$USER_HOME$/.m2/repository/org/springframework/spring-beans/3.1.1.RELEASE/spring-beans-3.1.1.RELEASE.jar!/" />
|
||||
</CLASSES>
|
||||
<JAVADOC>
|
||||
<root url="jar://$USER_HOME$/.m2/repository/org/springframework/spring-beans/3.1.1.RELEASE/spring-beans-3.1.1.RELEASE-javadoc.jar!/" />
|
||||
</JAVADOC>
|
||||
<SOURCES>
|
||||
<root url="jar://$USER_HOME$/.m2/repository/org/springframework/spring-beans/3.1.1.RELEASE/spring-beans-3.1.1.RELEASE-sources.jar!/" />
|
||||
</SOURCES>
|
||||
</library>
|
||||
</component>
|
|
@ -0,0 +1,13 @@
|
|||
<component name="libraryTable">
|
||||
<library name="Maven: org.springframework:spring-context:3.1.1.RELEASE">
|
||||
<CLASSES>
|
||||
<root url="jar://$USER_HOME$/.m2/repository/org/springframework/spring-context/3.1.1.RELEASE/spring-context-3.1.1.RELEASE.jar!/" />
|
||||
</CLASSES>
|
||||
<JAVADOC>
|
||||
<root url="jar://$USER_HOME$/.m2/repository/org/springframework/spring-context/3.1.1.RELEASE/spring-context-3.1.1.RELEASE-javadoc.jar!/" />
|
||||
</JAVADOC>
|
||||
<SOURCES>
|
||||
<root url="jar://$USER_HOME$/.m2/repository/org/springframework/spring-context/3.1.1.RELEASE/spring-context-3.1.1.RELEASE-sources.jar!/" />
|
||||
</SOURCES>
|
||||
</library>
|
||||
</component>
|
|
@ -0,0 +1,13 @@
|
|||
<component name="libraryTable">
|
||||
<library name="Maven: org.springframework:spring-core:3.1.1.RELEASE">
|
||||
<CLASSES>
|
||||
<root url="jar://$USER_HOME$/.m2/repository/org/springframework/spring-core/3.1.1.RELEASE/spring-core-3.1.1.RELEASE.jar!/" />
|
||||
</CLASSES>
|
||||
<JAVADOC>
|
||||
<root url="jar://$USER_HOME$/.m2/repository/org/springframework/spring-core/3.1.1.RELEASE/spring-core-3.1.1.RELEASE-javadoc.jar!/" />
|
||||
</JAVADOC>
|
||||
<SOURCES>
|
||||
<root url="jar://$USER_HOME$/.m2/repository/org/springframework/spring-core/3.1.1.RELEASE/spring-core-3.1.1.RELEASE-sources.jar!/" />
|
||||
</SOURCES>
|
||||
</library>
|
||||
</component>
|
|
@ -0,0 +1,13 @@
|
|||
<component name="libraryTable">
|
||||
<library name="Maven: org.springframework:spring-expression:3.1.1.RELEASE">
|
||||
<CLASSES>
|
||||
<root url="jar://$USER_HOME$/.m2/repository/org/springframework/spring-expression/3.1.1.RELEASE/spring-expression-3.1.1.RELEASE.jar!/" />
|
||||
</CLASSES>
|
||||
<JAVADOC>
|
||||
<root url="jar://$USER_HOME$/.m2/repository/org/springframework/spring-expression/3.1.1.RELEASE/spring-expression-3.1.1.RELEASE-javadoc.jar!/" />
|
||||
</JAVADOC>
|
||||
<SOURCES>
|
||||
<root url="jar://$USER_HOME$/.m2/repository/org/springframework/spring-expression/3.1.1.RELEASE/spring-expression-3.1.1.RELEASE-sources.jar!/" />
|
||||
</SOURCES>
|
||||
</library>
|
||||
</component>
|
|
@ -0,0 +1,13 @@
|
|||
<component name="libraryTable">
|
||||
<library name="Maven: org.springframework:spring-jdbc:3.1.1.RELEASE">
|
||||
<CLASSES>
|
||||
<root url="jar://$USER_HOME$/.m2/repository/org/springframework/spring-jdbc/3.1.1.RELEASE/spring-jdbc-3.1.1.RELEASE.jar!/" />
|
||||
</CLASSES>
|
||||
<JAVADOC>
|
||||
<root url="jar://$USER_HOME$/.m2/repository/org/springframework/spring-jdbc/3.1.1.RELEASE/spring-jdbc-3.1.1.RELEASE-javadoc.jar!/" />
|
||||
</JAVADOC>
|
||||
<SOURCES>
|
||||
<root url="jar://$USER_HOME$/.m2/repository/org/springframework/spring-jdbc/3.1.1.RELEASE/spring-jdbc-3.1.1.RELEASE-sources.jar!/" />
|
||||
</SOURCES>
|
||||
</library>
|
||||
</component>
|
|
@ -0,0 +1,13 @@
|
|||
<component name="libraryTable">
|
||||
<library name="Maven: org.springframework:spring-tx:3.1.1.RELEASE">
|
||||
<CLASSES>
|
||||
<root url="jar://$USER_HOME$/.m2/repository/org/springframework/spring-tx/3.1.1.RELEASE/spring-tx-3.1.1.RELEASE.jar!/" />
|
||||
</CLASSES>
|
||||
<JAVADOC>
|
||||
<root url="jar://$USER_HOME$/.m2/repository/org/springframework/spring-tx/3.1.1.RELEASE/spring-tx-3.1.1.RELEASE-javadoc.jar!/" />
|
||||
</JAVADOC>
|
||||
<SOURCES>
|
||||
<root url="jar://$USER_HOME$/.m2/repository/org/springframework/spring-tx/3.1.1.RELEASE/spring-tx-3.1.1.RELEASE-sources.jar!/" />
|
||||
</SOURCES>
|
||||
</library>
|
||||
</component>
|
|
@ -0,0 +1,13 @@
|
|||
<component name="libraryTable">
|
||||
<library name="Maven: redis.clients:jedis:2.0.0">
|
||||
<CLASSES>
|
||||
<root url="jar://$USER_HOME$/.m2/repository/redis/clients/jedis/2.0.0/jedis-2.0.0.jar!/" />
|
||||
</CLASSES>
|
||||
<JAVADOC>
|
||||
<root url="jar://$USER_HOME$/.m2/repository/redis/clients/jedis/2.0.0/jedis-2.0.0-javadoc.jar!/" />
|
||||
</JAVADOC>
|
||||
<SOURCES>
|
||||
<root url="jar://$USER_HOME$/.m2/repository/redis/clients/jedis/2.0.0/jedis-2.0.0-sources.jar!/" />
|
||||
</SOURCES>
|
||||
</library>
|
||||
</component>
|
|
@ -0,0 +1,13 @@
|
|||
<component name="libraryTable">
|
||||
<library name="Maven: us.codecraft:webmagic-core:0.5.2">
|
||||
<CLASSES>
|
||||
<root url="jar://$USER_HOME$/.m2/repository/us/codecraft/webmagic-core/0.5.2/webmagic-core-0.5.2.jar!/" />
|
||||
</CLASSES>
|
||||
<JAVADOC>
|
||||
<root url="jar://$USER_HOME$/.m2/repository/us/codecraft/webmagic-core/0.5.2/webmagic-core-0.5.2-javadoc.jar!/" />
|
||||
</JAVADOC>
|
||||
<SOURCES>
|
||||
<root url="jar://$USER_HOME$/.m2/repository/us/codecraft/webmagic-core/0.5.2/webmagic-core-0.5.2-sources.jar!/" />
|
||||
</SOURCES>
|
||||
</library>
|
||||
</component>
|
|
@ -0,0 +1,13 @@
|
|||
<component name="libraryTable">
|
||||
<library name="Maven: us.codecraft:webmagic-extension:0.5.2">
|
||||
<CLASSES>
|
||||
<root url="jar://$USER_HOME$/.m2/repository/us/codecraft/webmagic-extension/0.5.2/webmagic-extension-0.5.2.jar!/" />
|
||||
</CLASSES>
|
||||
<JAVADOC>
|
||||
<root url="jar://$USER_HOME$/.m2/repository/us/codecraft/webmagic-extension/0.5.2/webmagic-extension-0.5.2-javadoc.jar!/" />
|
||||
</JAVADOC>
|
||||
<SOURCES>
|
||||
<root url="jar://$USER_HOME$/.m2/repository/us/codecraft/webmagic-extension/0.5.2/webmagic-extension-0.5.2-sources.jar!/" />
|
||||
</SOURCES>
|
||||
</library>
|
||||
</component>
|
|
@ -0,0 +1,13 @@
|
|||
<component name="libraryTable">
|
||||
<library name="Maven: us.codecraft:xsoup:0.2.4">
|
||||
<CLASSES>
|
||||
<root url="jar://$USER_HOME$/.m2/repository/us/codecraft/xsoup/0.2.4/xsoup-0.2.4.jar!/" />
|
||||
</CLASSES>
|
||||
<JAVADOC>
|
||||
<root url="jar://$USER_HOME$/.m2/repository/us/codecraft/xsoup/0.2.4/xsoup-0.2.4-javadoc.jar!/" />
|
||||
</JAVADOC>
|
||||
<SOURCES>
|
||||
<root url="jar://$USER_HOME$/.m2/repository/us/codecraft/xsoup/0.2.4/xsoup-0.2.4-sources.jar!/" />
|
||||
</SOURCES>
|
||||
</library>
|
||||
</component>
|
|
@ -0,0 +1,42 @@
|
|||
<?xml version="1.0" encoding="UTF-8"?>
|
||||
<project version="4">
|
||||
<component name="EntryPointsManager">
|
||||
<entry_points version="2.0" />
|
||||
</component>
|
||||
<component name="MavenProjectsManager">
|
||||
<option name="originalFiles">
|
||||
<list>
|
||||
<option value="$PROJECT_DIR$/pom.xml" />
|
||||
</list>
|
||||
</option>
|
||||
</component>
|
||||
<component name="ProjectLevelVcsManager" settingsEditedManually="false">
|
||||
<OptionsSetting value="true" id="Add" />
|
||||
<OptionsSetting value="true" id="Remove" />
|
||||
<OptionsSetting value="true" id="Checkout" />
|
||||
<OptionsSetting value="true" id="Update" />
|
||||
<OptionsSetting value="true" id="Status" />
|
||||
<OptionsSetting value="true" id="Edit" />
|
||||
<ConfirmationsSetting value="0" id="Add" />
|
||||
<ConfirmationsSetting value="0" id="Remove" />
|
||||
</component>
|
||||
<component name="ProjectRootManager" version="2" languageLevel="JDK_1_8" default="true" assert-keyword="true" jdk-15="true" project-jdk-name="1.8" project-jdk-type="JavaSDK">
|
||||
<output url="file://$PROJECT_DIR$/out" />
|
||||
</component>
|
||||
<component name="masterDetails">
|
||||
<states>
|
||||
<state key="ProjectJDKs.UI">
|
||||
<settings>
|
||||
<last-edited>1.8</last-edited>
|
||||
<splitter-proportions>
|
||||
<option name="proportions">
|
||||
<list>
|
||||
<option value="0.2" />
|
||||
</list>
|
||||
</option>
|
||||
</splitter-proportions>
|
||||
</settings>
|
||||
</state>
|
||||
</states>
|
||||
</component>
|
||||
</project>
|
|
@ -0,0 +1,8 @@
|
|||
<?xml version="1.0" encoding="UTF-8"?>
|
||||
<project version="4">
|
||||
<component name="ProjectModuleManager">
|
||||
<modules>
|
||||
<module fileurl="file://$PROJECT_DIR$/osseanextractor.iml" filepath="$PROJECT_DIR$/osseanextractor.iml" />
|
||||
</modules>
|
||||
</component>
|
||||
</project>
|
|
@ -0,0 +1,6 @@
|
|||
<?xml version="1.0" encoding="UTF-8"?>
|
||||
<project version="4">
|
||||
<component name="VcsDirectoryMappings">
|
||||
<mapping directory="" vcs="Git" />
|
||||
</component>
|
||||
</project>
|
|
@ -0,0 +1,400 @@
|
|||
<?xml version="1.0" encoding="UTF-8"?>
|
||||
<project version="4">
|
||||
<component name="ChangeListManager">
|
||||
<list default="true" id="08302292-1670-461c-87ed-da9c7726ec87" name="Default" comment="">
|
||||
<change type="MODIFICATION" beforePath="$PROJECT_DIR$/.classpath" afterPath="$PROJECT_DIR$/.classpath" />
|
||||
<change type="MODIFICATION" beforePath="$PROJECT_DIR$/src/main/java/net/trustie/extractor/CNblogNews_Extractor.java" afterPath="$PROJECT_DIR$/src/main/java/net/trustie/extractor/CNblogNews_Extractor.java" />
|
||||
<change type="MODIFICATION" beforePath="$PROJECT_DIR$/src/main/java/net/trustie/comment/model/CsdnAsk_Comment_Model.java" afterPath="$PROJECT_DIR$/src/main/java/net/trustie/comment/model/CsdnAsk_Comment_Model.java" />
|
||||
<change type="MODIFICATION" beforePath="$PROJECT_DIR$/src/main/java/net/trustie/extractor/CsdnAsk_Extractor.java" afterPath="$PROJECT_DIR$/src/main/java/net/trustie/extractor/CsdnAsk_Extractor.java" />
|
||||
</list>
|
||||
<ignored path="osseanextractor.iws" />
|
||||
<ignored path=".idea/workspace.xml" />
|
||||
<ignored path="$PROJECT_DIR$/target/" />
|
||||
<ignored path=".idea/dataSources.local.xml" />
|
||||
<option name="EXCLUDED_CONVERTED_TO_IGNORED" value="true" />
|
||||
<option name="TRACKING_ENABLED" value="true" />
|
||||
<option name="SHOW_DIALOG" value="false" />
|
||||
<option name="HIGHLIGHT_CONFLICTS" value="true" />
|
||||
<option name="HIGHLIGHT_NON_ACTIVE_CHANGELIST" value="false" />
|
||||
<option name="LAST_RESOLUTION" value="IGNORE" />
|
||||
</component>
|
||||
<component name="ChangesViewManager" flattened_view="true" show_ignored="false" />
|
||||
<component name="CreatePatchCommitExecutor">
|
||||
<option name="PATCH_PATH" value="" />
|
||||
</component>
|
||||
<component name="ExecutionTargetManager" SELECTED_TARGET="default_target" />
|
||||
<component name="FavoritesManager">
|
||||
<favorites_list name="osseanextractor" />
|
||||
</component>
|
||||
<component name="FileEditorManager">
|
||||
<leaf>
|
||||
<file leaf-file-name="CNblogNews_Extractor.java" pinned="false" current-in-tab="true">
|
||||
<entry file="file://$PROJECT_DIR$/src/main/java/net/trustie/extractor/CNblogNews_Extractor.java">
|
||||
<provider selected="true" editor-type-id="text-editor">
|
||||
<state vertical-scroll-proportion="0.7441253">
|
||||
<caret line="30" column="27" selection-start-line="30" selection-start-column="27" selection-end-line="30" selection-end-column="27" />
|
||||
<folding />
|
||||
</state>
|
||||
</provider>
|
||||
</entry>
|
||||
</file>
|
||||
</leaf>
|
||||
</component>
|
||||
<component name="Git.Settings">
|
||||
<option name="RECENT_GIT_ROOT_PATH" value="$PROJECT_DIR$" />
|
||||
</component>
|
||||
<component name="GradleLocalSettings">
|
||||
<option name="externalProjectsViewState">
|
||||
<projects_view />
|
||||
</option>
|
||||
</component>
|
||||
<component name="IdeDocumentHistory">
|
||||
<option name="CHANGED_PATHS">
|
||||
<list>
|
||||
<option value="$PROJECT_DIR$/src/main/java/net/trustie/extractor/CNblogNews_Extractor.java" />
|
||||
</list>
|
||||
</option>
|
||||
</component>
|
||||
<component name="JsBuildToolGruntFileManager" detection-done="true" />
|
||||
<component name="JsGulpfileManager">
|
||||
<detection-done>true</detection-done>
|
||||
</component>
|
||||
<component name="MavenImportPreferences">
|
||||
<option name="generalSettings">
|
||||
<MavenGeneralSettings>
|
||||
<option name="mavenHome" value="Bundled (Maven 3)" />
|
||||
</MavenGeneralSettings>
|
||||
</option>
|
||||
</component>
|
||||
<component name="NamedScopeManager">
|
||||
<order />
|
||||
</component>
|
||||
<component name="ProjectFrameBounds">
|
||||
<option name="x" value="-53" />
|
||||
<option name="y" value="218" />
|
||||
<option name="width" value="1382" />
|
||||
<option name="height" value="744" />
|
||||
</component>
|
||||
<component name="ProjectLevelVcsManager" settingsEditedManually="false">
|
||||
<OptionsSetting value="true" id="Add" />
|
||||
<OptionsSetting value="true" id="Remove" />
|
||||
<OptionsSetting value="true" id="Checkout" />
|
||||
<OptionsSetting value="true" id="Update" />
|
||||
<OptionsSetting value="true" id="Status" />
|
||||
<OptionsSetting value="true" id="Edit" />
|
||||
<ConfirmationsSetting value="1" id="Add" />
|
||||
<ConfirmationsSetting value="0" id="Remove" />
|
||||
</component>
|
||||
<component name="ProjectView">
|
||||
<navigator currentView="ProjectPane" proportions="" version="1">
|
||||
<flattenPackages />
|
||||
<showMembers />
|
||||
<showModules />
|
||||
<showLibraryContents />
|
||||
<hideEmptyPackages />
|
||||
<abbreviatePackageNames />
|
||||
<autoscrollToSource />
|
||||
<autoscrollFromSource />
|
||||
<sortByType />
|
||||
</navigator>
|
||||
<panes>
|
||||
<pane id="ProjectPane">
|
||||
<subPane>
|
||||
<PATH>
|
||||
<PATH_ELEMENT>
|
||||
<option name="myItemId" value="osseanextractor" />
|
||||
<option name="myItemType" value="com.intellij.ide.projectView.impl.nodes.ProjectViewProjectNode" />
|
||||
</PATH_ELEMENT>
|
||||
</PATH>
|
||||
</subPane>
|
||||
</pane>
|
||||
<pane id="Scope" />
|
||||
<pane id="Scratches" />
|
||||
<pane id="PackagesPane" />
|
||||
</panes>
|
||||
</component>
|
||||
<component name="PropertiesComponent">
|
||||
<property name="aspect.path.notification.shown" value="true" />
|
||||
<property name="WebServerToolWindowFactoryState" value="false" />
|
||||
<property name="FileHistory.git4idea.history.GitHistoryProvider_flatWidth0" value="110" />
|
||||
<property name="FileHistory.git4idea.history.GitHistoryProvider_flatOrder0" value="0" />
|
||||
<property name="FileHistory.git4idea.history.GitHistoryProvider_flatWidth1" value="140" />
|
||||
<property name="FileHistory.git4idea.history.GitHistoryProvider_flatOrder1" value="1" />
|
||||
<property name="FileHistory.git4idea.history.GitHistoryProvider_flatWidth2" value="140" />
|
||||
<property name="FileHistory.git4idea.history.GitHistoryProvider_flatOrder2" value="2" />
|
||||
<property name="FileHistory.git4idea.history.GitHistoryProvider_flatWidth3" value="933" />
|
||||
<property name="FileHistory.git4idea.history.GitHistoryProvider_flatOrder3" value="3" />
|
||||
<property name="FileHistory.git4idea.history.GitHistoryProvider_treeWidth0" value="128" />
|
||||
<property name="FileHistory.git4idea.history.GitHistoryProvider_treeOrder0" value="0" />
|
||||
<property name="FileHistory.git4idea.history.GitHistoryProvider_treeWidth1" value="134" />
|
||||
<property name="FileHistory.git4idea.history.GitHistoryProvider_treeOrder1" value="1" />
|
||||
<property name="FileHistory.git4idea.history.GitHistoryProvider_treeWidth2" value="134" />
|
||||
<property name="FileHistory.git4idea.history.GitHistoryProvider_treeOrder2" value="2" />
|
||||
<property name="FileHistory.git4idea.history.GitHistoryProvider_treeWidth3" value="927" />
|
||||
<property name="FileHistory.git4idea.history.GitHistoryProvider_treeOrder3" value="3" />
|
||||
<property name="FullScreen" value="false" />
|
||||
</component>
|
||||
<component name="RunManager">
|
||||
<configuration default="true" type="AndroidRunConfigurationType" factoryName="Android Application">
|
||||
<module name="" />
|
||||
<option name="ACTIVITY_CLASS" value="" />
|
||||
<option name="MODE" value="default_activity" />
|
||||
<option name="DEPLOY" value="true" />
|
||||
<option name="ARTIFACT_NAME" value="" />
|
||||
<option name="TARGET_SELECTION_MODE" value="EMULATOR" />
|
||||
<option name="USE_LAST_SELECTED_DEVICE" value="false" />
|
||||
<option name="PREFERRED_AVD" value="" />
|
||||
<option name="USE_COMMAND_LINE" value="true" />
|
||||
<option name="COMMAND_LINE" value="" />
|
||||
<option name="WIPE_USER_DATA" value="false" />
|
||||
<option name="DISABLE_BOOT_ANIMATION" value="false" />
|
||||
<option name="NETWORK_SPEED" value="full" />
|
||||
<option name="NETWORK_LATENCY" value="none" />
|
||||
<option name="CLEAR_LOGCAT" value="false" />
|
||||
<option name="SHOW_LOGCAT_AUTOMATICALLY" value="true" />
|
||||
<option name="FILTER_LOGCAT_AUTOMATICALLY" value="true" />
|
||||
<method />
|
||||
</configuration>
|
||||
<configuration default="true" type="AndroidTestRunConfigurationType" factoryName="Android Tests">
|
||||
<module name="" />
|
||||
<option name="TESTING_TYPE" value="0" />
|
||||
<option name="INSTRUMENTATION_RUNNER_CLASS" value="" />
|
||||
<option name="METHOD_NAME" value="" />
|
||||
<option name="CLASS_NAME" value="" />
|
||||
<option name="PACKAGE_NAME" value="" />
|
||||
<option name="TARGET_SELECTION_MODE" value="EMULATOR" />
|
||||
<option name="USE_LAST_SELECTED_DEVICE" value="false" />
|
||||
<option name="PREFERRED_AVD" value="" />
|
||||
<option name="USE_COMMAND_LINE" value="true" />
|
||||
<option name="COMMAND_LINE" value="" />
|
||||
<option name="WIPE_USER_DATA" value="false" />
|
||||
<option name="DISABLE_BOOT_ANIMATION" value="false" />
|
||||
<option name="NETWORK_SPEED" value="full" />
|
||||
<option name="NETWORK_LATENCY" value="none" />
|
||||
<option name="CLEAR_LOGCAT" value="false" />
|
||||
<option name="SHOW_LOGCAT_AUTOMATICALLY" value="true" />
|
||||
<option name="FILTER_LOGCAT_AUTOMATICALLY" value="true" />
|
||||
<method />
|
||||
</configuration>
|
||||
<configuration default="true" type="Applet" factoryName="Applet">
|
||||
<option name="WIDTH" value="400" />
|
||||
<option name="HEIGHT" value="300" />
|
||||
<option name="POLICY_FILE" value="$APPLICATION_HOME_DIR$/bin/appletviewer.policy" />
|
||||
<module />
|
||||
<method />
|
||||
</configuration>
|
||||
<configuration default="true" type="Application" factoryName="Application">
|
||||
<extension name="coverage" enabled="false" merge="false" sample_coverage="true" runner="idea" />
|
||||
<option name="MAIN_CLASS_NAME" />
|
||||
<option name="VM_PARAMETERS" />
|
||||
<option name="PROGRAM_PARAMETERS" />
|
||||
<option name="WORKING_DIRECTORY" value="$PROJECT_DIR$" />
|
||||
<option name="ALTERNATIVE_JRE_PATH_ENABLED" value="false" />
|
||||
<option name="ALTERNATIVE_JRE_PATH" />
|
||||
<option name="ENABLE_SWING_INSPECTOR" value="false" />
|
||||
<option name="ENV_VARIABLES" />
|
||||
<option name="PASS_PARENT_ENVS" value="true" />
|
||||
<module name="" />
|
||||
<envs />
|
||||
<method />
|
||||
</configuration>
|
||||
<configuration default="true" type="CucumberJavaRunConfigurationType" factoryName="Cucumber java">
|
||||
<extension name="coverage" enabled="false" merge="false" sample_coverage="true" runner="idea" />
|
||||
<option name="myFilePath" />
|
||||
<option name="GLUE" />
|
||||
<option name="myNameFilter" />
|
||||
<option name="myGeneratedName" />
|
||||
<option name="MAIN_CLASS_NAME" />
|
||||
<option name="VM_PARAMETERS" />
|
||||
<option name="PROGRAM_PARAMETERS" />
|
||||
<option name="WORKING_DIRECTORY" />
|
||||
<option name="ALTERNATIVE_JRE_PATH_ENABLED" value="false" />
|
||||
<option name="ALTERNATIVE_JRE_PATH" />
|
||||
<option name="ENABLE_SWING_INSPECTOR" value="false" />
|
||||
<option name="ENV_VARIABLES" />
|
||||
<option name="PASS_PARENT_ENVS" value="true" />
|
||||
<module name="" />
|
||||
<envs />
|
||||
<method />
|
||||
</configuration>
|
||||
<configuration default="true" type="FlashRunConfigurationType" factoryName="Flash App">
|
||||
<option name="BCName" value="" />
|
||||
<option name="IOSSimulatorSdkPath" value="" />
|
||||
<option name="adlOptions" value="" />
|
||||
<option name="airProgramParameters" value="" />
|
||||
<option name="appDescriptorForEmulator" value="Android" />
|
||||
<option name="debugTransport" value="USB" />
|
||||
<option name="debuggerSdkRaw" value="BC SDK" />
|
||||
<option name="emulator" value="NexusOne" />
|
||||
<option name="emulatorAdlOptions" value="" />
|
||||
<option name="fastPackaging" value="true" />
|
||||
<option name="fullScreenHeight" value="0" />
|
||||
<option name="fullScreenWidth" value="0" />
|
||||
<option name="launchUrl" value="false" />
|
||||
<option name="launcherParameters">
|
||||
<LauncherParameters>
|
||||
<option name="browser" value="a7bb68e0-33c0-4d6f-a81a-aac1fdb870c8" />
|
||||
<option name="launcherType" value="OSDefault" />
|
||||
<option name="newPlayerInstance" value="false" />
|
||||
<option name="playerPath" value="FlashPlayerDebugger.exe" />
|
||||
</LauncherParameters>
|
||||
</option>
|
||||
<option name="mobileRunTarget" value="Emulator" />
|
||||
<option name="moduleName" value="" />
|
||||
<option name="overriddenMainClass" value="" />
|
||||
<option name="overriddenOutputFileName" value="" />
|
||||
<option name="overrideMainClass" value="false" />
|
||||
<option name="runTrusted" value="true" />
|
||||
<option name="screenDpi" value="0" />
|
||||
<option name="screenHeight" value="0" />
|
||||
<option name="screenWidth" value="0" />
|
||||
<option name="url" value="http://" />
|
||||
<option name="usbDebugPort" value="7936" />
|
||||
<method />
|
||||
</configuration>
|
||||
<configuration default="true" type="FlexUnitRunConfigurationType" factoryName="FlexUnit" appDescriptorForEmulator="Android" class_name="" emulatorAdlOptions="" method_name="" package_name="" scope="Class">
|
||||
<option name="BCName" value="" />
|
||||
<option name="launcherParameters">
|
||||
<LauncherParameters>
|
||||
<option name="browser" value="a7bb68e0-33c0-4d6f-a81a-aac1fdb870c8" />
|
||||
<option name="launcherType" value="OSDefault" />
|
||||
<option name="newPlayerInstance" value="false" />
|
||||
<option name="playerPath" value="FlashPlayerDebugger.exe" />
|
||||
</LauncherParameters>
|
||||
</option>
|
||||
<option name="moduleName" value="" />
|
||||
<option name="trusted" value="true" />
|
||||
<method />
|
||||
</configuration>
|
||||
<configuration default="true" type="GradleRunConfiguration" factoryName="Gradle">
|
||||
<ExternalSystemSettings>
|
||||
<option name="executionName" />
|
||||
<option name="externalProjectPath" />
|
||||
<option name="externalSystemIdString" value="GRADLE" />
|
||||
<option name="scriptParameters" />
|
||||
<option name="taskDescriptions">
|
||||
<list />
|
||||
</option>
|
||||
<option name="taskNames">
|
||||
<list />
|
||||
</option>
|
||||
<option name="vmOptions" />
|
||||
</ExternalSystemSettings>
|
||||
<method />
|
||||
</configuration>
|
||||
<configuration default="true" type="GrailsRunConfigurationType" factoryName="Grails">
|
||||
<module name="" />
|
||||
<setting name="vmparams" value="" />
|
||||
<setting name="cmdLine" value="run-app" />
|
||||
<setting name="depsClasspath" value="false" />
|
||||
<setting name="passParentEnv" value="true" />
|
||||
<extension name="coverage" enabled="false" merge="false" sample_coverage="true" runner="idea" />
|
||||
<setting name="launchBrowser" value="false" />
|
||||
<method />
|
||||
</configuration>
|
||||
<configuration default="true" type="JavascriptDebugType" factoryName="JavaScript Debug">
|
||||
<method />
|
||||
</configuration>
|
||||
<configuration default="true" type="SpringBootApplicationConfigurationType" factoryName="Spring Boot">
|
||||
<extension name="coverage" enabled="false" merge="false" sample_coverage="true" runner="idea" />
|
||||
<module name="" />
|
||||
<envs />
|
||||
<method />
|
||||
</configuration>
|
||||
<configuration default="true" type="js.build_tools.gulp" factoryName="Gulp.js">
|
||||
<node-options />
|
||||
<gulpfile />
|
||||
<tasks />
|
||||
<arguments />
|
||||
<pass-parent-envs>true</pass-parent-envs>
|
||||
<envs />
|
||||
<method />
|
||||
</configuration>
|
||||
<configuration default="true" type="osgi.bnd.run" factoryName="Run Launcher">
|
||||
<method />
|
||||
</configuration>
|
||||
<configuration default="true" type="osgi.bnd.run" factoryName="Test Launcher (JUnit)">
|
||||
<method />
|
||||
</configuration>
|
||||
</component>
|
||||
<component name="ShelveChangesManager" show_recycled="false" />
|
||||
<component name="SvnConfiguration">
|
||||
<configuration />
|
||||
</component>
|
||||
<component name="TaskManager">
|
||||
<task active="true" id="Default" summary="Default task">
|
||||
<changelist id="08302292-1670-461c-87ed-da9c7726ec87" name="Default" comment="" />
|
||||
<created>1432444136539</created>
|
||||
<option name="number" value="Default" />
|
||||
<updated>1432444136539</updated>
|
||||
<workItem from="1432444138435" duration="203000" />
|
||||
<workItem from="1436271132941" duration="1652000" />
|
||||
</task>
|
||||
<servers />
|
||||
</component>
|
||||
<component name="TimeTrackingManager">
|
||||
<option name="totallyTimeSpent" value="1855000" />
|
||||
</component>
|
||||
<component name="ToolWindowManager">
|
||||
<frame x="-53" y="218" width="1382" height="744" extended-state="0" />
|
||||
<editor active="true" />
|
||||
<layout>
|
||||
<window_info id="Palette" active="false" anchor="right" auto_hide="false" internal_type="DOCKED" type="DOCKED" visible="false" weight="0.33" sideWeight="0.5" order="3" side_tool="false" content_ui="tabs" />
|
||||
<window_info id="TODO" active="false" anchor="bottom" auto_hide="false" internal_type="DOCKED" type="DOCKED" visible="false" weight="0.33" sideWeight="0.5" order="6" side_tool="false" content_ui="tabs" />
|
||||
<window_info id="Palette	" active="false" anchor="left" auto_hide="false" internal_type="DOCKED" type="DOCKED" visible="false" weight="0.33" sideWeight="0.5" order="2" side_tool="false" content_ui="tabs" />
|
||||
<window_info id="Event Log" active="false" anchor="bottom" auto_hide="false" internal_type="DOCKED" type="DOCKED" visible="false" weight="0.33" sideWeight="0.5" order="7" side_tool="true" content_ui="tabs" />
|
||||
<window_info id="Application Servers" active="false" anchor="bottom" auto_hide="false" internal_type="DOCKED" type="DOCKED" visible="false" weight="0.33" sideWeight="0.5" order="7" side_tool="false" content_ui="tabs" />
|
||||
<window_info id="Maven Projects" active="false" anchor="right" auto_hide="false" internal_type="DOCKED" type="DOCKED" visible="false" weight="0.33" sideWeight="0.5" order="3" side_tool="false" content_ui="tabs" />
|
||||
<window_info id="Version Control" active="false" anchor="bottom" auto_hide="false" internal_type="DOCKED" type="DOCKED" visible="true" weight="0.32789558" sideWeight="0.5" order="7" side_tool="false" content_ui="tabs" />
|
||||
<window_info id="Terminal" active="false" anchor="bottom" auto_hide="false" internal_type="DOCKED" type="DOCKED" visible="false" weight="0.33" sideWeight="0.5" order="7" side_tool="false" content_ui="tabs" />
|
||||
<window_info id="Designer" active="false" anchor="right" auto_hide="false" internal_type="DOCKED" type="DOCKED" visible="false" weight="0.33" sideWeight="0.5" order="3" side_tool="false" content_ui="tabs" />
|
||||
<window_info id="Project" active="false" anchor="left" auto_hide="false" internal_type="DOCKED" type="DOCKED" visible="true" weight="0.24281392" sideWeight="0.5" order="0" side_tool="false" content_ui="combo" />
|
||||
<window_info id="Database" active="false" anchor="right" auto_hide="false" internal_type="DOCKED" type="DOCKED" visible="false" weight="0.33" sideWeight="0.5" order="3" side_tool="false" content_ui="tabs" />
|
||||
<window_info id="Structure" active="false" anchor="left" auto_hide="false" internal_type="DOCKED" type="DOCKED" visible="false" weight="0.25" sideWeight="0.5" order="1" side_tool="false" content_ui="tabs" />
|
||||
<window_info id="Ant Build" active="false" anchor="right" auto_hide="false" internal_type="DOCKED" type="DOCKED" visible="false" weight="0.25" sideWeight="0.5" order="1" side_tool="false" content_ui="tabs" />
|
||||
<window_info id="UI Designer" active="false" anchor="left" auto_hide="false" internal_type="DOCKED" type="DOCKED" visible="false" weight="0.33" sideWeight="0.5" order="2" side_tool="false" content_ui="tabs" />
|
||||
<window_info id="Favorites" active="false" anchor="left" auto_hide="false" internal_type="DOCKED" type="DOCKED" visible="false" weight="0.33" sideWeight="0.5" order="2" side_tool="true" content_ui="tabs" />
|
||||
<window_info id="Cvs" active="false" anchor="bottom" auto_hide="false" internal_type="DOCKED" type="DOCKED" visible="false" weight="0.25" sideWeight="0.5" order="4" side_tool="false" content_ui="tabs" />
|
||||
<window_info id="Message" active="false" anchor="bottom" auto_hide="false" internal_type="DOCKED" type="DOCKED" visible="false" weight="0.33" sideWeight="0.5" order="0" side_tool="false" content_ui="tabs" />
|
||||
<window_info id="Commander" active="false" anchor="right" auto_hide="false" internal_type="SLIDING" type="SLIDING" visible="false" weight="0.4" sideWeight="0.5" order="0" side_tool="false" content_ui="tabs" />
|
||||
<window_info id="Inspection" active="false" anchor="bottom" auto_hide="false" internal_type="DOCKED" type="DOCKED" visible="false" weight="0.4" sideWeight="0.5" order="5" side_tool="false" content_ui="tabs" />
|
||||
<window_info id="Run" active="false" anchor="bottom" auto_hide="false" internal_type="DOCKED" type="DOCKED" visible="false" weight="0.33" sideWeight="0.5" order="2" side_tool="false" content_ui="tabs" />
|
||||
<window_info id="Hierarchy" active="false" anchor="right" auto_hide="false" internal_type="DOCKED" type="DOCKED" visible="false" weight="0.25" sideWeight="0.5" order="2" side_tool="false" content_ui="combo" />
|
||||
<window_info id="Find" active="false" anchor="bottom" auto_hide="false" internal_type="DOCKED" type="DOCKED" visible="false" weight="0.33" sideWeight="0.5" order="1" side_tool="false" content_ui="tabs" />
|
||||
<window_info id="Debug" active="false" anchor="bottom" auto_hide="false" internal_type="DOCKED" type="DOCKED" visible="false" weight="0.4" sideWeight="0.5" order="3" side_tool="false" content_ui="tabs" />
|
||||
</layout>
|
||||
</component>
|
||||
<component name="Vcs.Log.UiProperties">
|
||||
<option name="RECENTLY_FILTERED_USER_GROUPS">
|
||||
<collection />
|
||||
</option>
|
||||
<option name="RECENTLY_FILTERED_BRANCH_GROUPS">
|
||||
<collection />
|
||||
</option>
|
||||
</component>
|
||||
<component name="VcsContentAnnotationSettings">
|
||||
<option name="myLimit" value="2678400000" />
|
||||
</component>
|
||||
<component name="XDebuggerManager">
|
||||
<breakpoint-manager />
|
||||
<watches-manager />
|
||||
</component>
|
||||
<component name="editorHistoryManager">
|
||||
<entry file="file://$PROJECT_DIR$/src/main/java/net/trustie/extractor/CNblogNews_Extractor.java">
|
||||
<provider selected="true" editor-type-id="text-editor">
|
||||
<state vertical-scroll-proportion="0.0">
|
||||
<caret line="0" column="0" selection-start-line="0" selection-start-column="0" selection-end-line="0" selection-end-column="0" />
|
||||
<folding />
|
||||
</state>
|
||||
</provider>
|
||||
</entry>
|
||||
<entry file="file://$PROJECT_DIR$/src/main/java/net/trustie/extractor/CNblogNews_Extractor.java">
|
||||
<provider selected="true" editor-type-id="text-editor">
|
||||
<state vertical-scroll-proportion="0.7441253">
|
||||
<caret line="30" column="27" selection-start-line="30" selection-start-column="27" selection-end-line="30" selection-end-column="27" />
|
||||
<folding />
|
||||
</state>
|
||||
</provider>
|
||||
</entry>
|
||||
</component>
|
||||
</project>
|
|
@ -0,0 +1,23 @@
|
|||
<?xml version="1.0" encoding="UTF-8"?>
|
||||
<projectDescription>
|
||||
<name>ossean</name>
|
||||
<comment></comment>
|
||||
<projects>
|
||||
</projects>
|
||||
<buildSpec>
|
||||
<buildCommand>
|
||||
<name>org.eclipse.jdt.core.javabuilder</name>
|
||||
<arguments>
|
||||
</arguments>
|
||||
</buildCommand>
|
||||
<buildCommand>
|
||||
<name>org.eclipse.m2e.core.maven2Builder</name>
|
||||
<arguments>
|
||||
</arguments>
|
||||
</buildCommand>
|
||||
</buildSpec>
|
||||
<natures>
|
||||
<nature>org.eclipse.jdt.core.javanature</nature>
|
||||
<nature>org.eclipse.m2e.core.maven2Nature</nature>
|
||||
</natures>
|
||||
</projectDescription>
|
|
@ -0,0 +1,17 @@
|
|||
#!/bin/bash
|
||||
|
||||
SITE='51cto_blog'
|
||||
|
||||
find /home/song/hx/new_osseanextractor/target/classes -name "*.xml"|xargs rm -f
|
||||
|
||||
tmp='/home/song/hx/new_osseanextractor/bin/resources'
|
||||
tmp='/home/song/hx/new_osseanextractor/target/classes':$tmp
|
||||
tmp='/home/song/hx/new_osseanextractor/target/osseanextractor-0.0.1-jar-with-dependencies-without-resources/*':$tmp
|
||||
|
||||
CLASSPATH=$tmp:$CLASSPATH
|
||||
|
||||
JAVA_OPTS="-Xms256m -Xmx256m -Xmn128m"
|
||||
|
||||
echo $CLASSPATH
|
||||
|
||||
java $JAVA_OPTS -DlogFilePath=${SITE} -classpath $CLASSPATH net.trustie.extractor.CTO51Blogs_Extractor >>/home/song/hx/new_osseanextractor/log/${SITE}.log 2>&1 &
|
|
@ -0,0 +1,17 @@
|
|||
#!/bin/bash
|
||||
|
||||
SITE='OpenHub_project'
|
||||
|
||||
find /home/song/hx/new_osseanextractor/target/classes -name "*.xml"|xargs rm -f
|
||||
|
||||
tmp='/home/song/hx/new_osseanextractor/bin/resources'
|
||||
tmp='/home/song/hx/new_osseanextractor/target/classes':$tmp
|
||||
tmp='/home/song/hx/new_osseanextractor/target/osseanextractor-0.0.1-jar-with-dependencies-without-resources/*':$tmp
|
||||
|
||||
CLASSPATH=$tmp:$CLASSPATH
|
||||
|
||||
JAVA_OPTS="-Xms256m -Xmx256m -Xmn128m"
|
||||
|
||||
echo $CLASSPATH
|
||||
|
||||
java $JAVA_OPTS -DlogFilePath=${SITE} -classpath $CLASSPATH net.trustie.extractor.OpenHubProject_Extractor >>/home/song/hx/new_osseanextractor/log/${SITE}.log 2>&1 &
|
|
@ -0,0 +1,17 @@
|
|||
#!/bin/bash
|
||||
|
||||
SITE='sourceforge_project'
|
||||
|
||||
find /home/song/hx/new_osseanextractor/target/classes -name "*.xml"|xargs rm -f
|
||||
|
||||
tmp='/home/song/hx/new_osseanextractor/bin/resources'
|
||||
tmp='/home/song/hx/new_osseanextractor/target/classes':$tmp
|
||||
tmp='/home/song/hx/new_osseanextractor/target/osseanextractor-0.0.1-jar-with-dependencies-without-resources/*':$tmp
|
||||
|
||||
CLASSPATH=$tmp:$CLASSPATH
|
||||
|
||||
JAVA_OPTS="-Xms256m -Xmx256m -Xmn128m"
|
||||
|
||||
echo $CLASSPATH
|
||||
|
||||
java $JAVA_OPTS -DlogFilePath=${SITE} -classpath $CLASSPATH net.trustie.extractor.SFProject_Extractor >>/home/song/hx/new_osseanextractor/log/${SITE}.log 2>&1 &
|
|
@ -0,0 +1,17 @@
|
|||
#!/bin/bash
|
||||
|
||||
SITE='apache'
|
||||
|
||||
find /home/song/hx/new_osseanextractor/target/classes -name "*.xml"|xargs rm -f
|
||||
|
||||
tmp='/home/song/hx/new_osseanextractor/bin/resources'
|
||||
tmp='/home/song/hx/new_osseanextractor/target/classes':$tmp
|
||||
tmp='/home/song/hx/new_osseanextractor/target/osseanextractor-0.0.1-jar-with-dependencies-without-resources/*':$tmp
|
||||
|
||||
CLASSPATH=$tmp:$CLASSPATH
|
||||
|
||||
JAVA_OPTS="-Xms256m -Xmx256m -Xmn128m"
|
||||
|
||||
echo $CLASSPATH
|
||||
|
||||
java $JAVA_OPTS -DlogFilePath=${SITE} -classpath $CLASSPATH net.trustie.extractor.ApacheProject_Extractor >>/home/song/hx/new_osseanextractor/log/${SITE}.log 2>&1 &
|
|
@ -0,0 +1,17 @@
|
|||
#!/bin/bash
|
||||
|
||||
SITE='cnblog_news'
|
||||
|
||||
find /home/song/hx/new_osseanextractor/target/classes -name "*.xml"|xargs rm -f
|
||||
|
||||
tmp='/home/song/hx/new_osseanextractor/bin/resources'
|
||||
tmp='/home/song/hx/new_osseanextractor/target/classes':$tmp
|
||||
tmp='/home/song/hx/new_osseanextractor/target/osseanextractor-0.0.1-jar-with-dependencies-without-resources/*':$tmp
|
||||
|
||||
CLASSPATH=$tmp:$CLASSPATH
|
||||
|
||||
JAVA_OPTS="-Xms256m -Xmx256m -Xmn128m"
|
||||
|
||||
echo $CLASSPATH
|
||||
|
||||
java $JAVA_OPTS -DlogFilePath=${SITE} -classpath $CLASSPATH net.trustie.extractor.CNblogNews_Extractor >>/home/song/hx/new_osseanextractor/log/${SITE}.log 2>&1 &
|
|
@ -0,0 +1,18 @@
|
|||
#!/bin/bash
|
||||
|
||||
SITE='cnblog_q_solve'
|
||||
|
||||
find /home/song/hx/new_osseanextractor/target/classes -name "*.xml"|xargs rm -f
|
||||
|
||||
|
||||
tmp='/home/song/hx/new_osseanextractor/bin/resources'
|
||||
tmp='/home/song/hx/new_osseanextractor/target/classes':$tmp
|
||||
tmp='/home/song/hx/new_osseanextractor/target/osseanextractor-0.0.1-jar-with-dependencies-without-resources/*':$tmp
|
||||
|
||||
CLASSPATH=$tmp:$CLASSPATH
|
||||
|
||||
JAVA_OPTS="-Xms256m -Xmx256m -Xmn128m"
|
||||
|
||||
echo $CLASSPATH
|
||||
|
||||
java $JAVA_OPTS -DlogFilePath=${SITE} -classpath $CLASSPATH net.trustie.extractor.CNblogQSolve_Extractor >>/home/song/hx/new_osseanextractor/log/${SITE}.log 2>&1 &
|
|
@ -0,0 +1,17 @@
|
|||
#!/bin/bash
|
||||
|
||||
SITE='cnblog_q_solve_comments'
|
||||
|
||||
find /home/song/hx/new_osseanextractor/target/classes -name "*.xml"|xargs rm -f
|
||||
|
||||
tmp='/home/song/hx/new_osseanextractor/bin/resources'
|
||||
tmp='/home/song/hx/new_osseanextractor/target/classes':$tmp
|
||||
tmp='/home/song/hx/new_osseanextractor/target/osseanextractor-0.0.1-jar-with-dependencies-without-resources/*':$tmp
|
||||
|
||||
CLASSPATH=$tmp:$CLASSPATH
|
||||
|
||||
JAVA_OPTS="-Xms256m -Xmx256m -Xmn128m"
|
||||
|
||||
echo $CLASSPATH
|
||||
|
||||
java $JAVA_OPTS -DlogFilePath=${SITE} -classpath $CLASSPATH net.trustie.comment.extractor.CnblogsQ_Comment_Extractor >>/home/song/hx/new_osseanextractor/log/${SITE}.log 2>&1 &
|
|
@ -0,0 +1,18 @@
|
|||
#!/bin/bash
|
||||
|
||||
SITE='cnblog_q_unsolve'
|
||||
|
||||
find /home/song/hx/new_osseanextractor/target/classes -name "*.xml"|xargs rm -f
|
||||
|
||||
|
||||
tmp='/home/song/hx/new_osseanextractor/bin/resources'
|
||||
tmp='/home/song/hx/new_osseanextractor/target/classes':$tmp
|
||||
tmp='/home/song/hx/new_osseanextractor/target/osseanextractor-0.0.1-jar-with-dependencies-without-resources/*':$tmp
|
||||
|
||||
CLASSPATH=$tmp:$CLASSPATH
|
||||
|
||||
JAVA_OPTS="-Xms256m -Xmx256m -Xmn128m"
|
||||
|
||||
echo $CLASSPATH
|
||||
|
||||
java $JAVA_OPTS -DlogFilePath=${SITE} -classpath $CLASSPATH net.trustie.extractor.CNblogQUnSolve_Extractor >>/home/song/hx/new_osseanextractor/log/${SITE}.log 2>&1 &
|
|
@ -0,0 +1,17 @@
|
|||
#!/bin/bash
|
||||
|
||||
SITE='code_project'
|
||||
|
||||
find /home/song/hx/new_osseanextractor/target/classes -name "*.xml"|xargs rm -f
|
||||
|
||||
tmp='/home/song/hx/new_osseanextractor/bin/resources'
|
||||
tmp='/home/song/hx/new_osseanextractor/target/classes':$tmp
|
||||
tmp='/home/song/hx/new_osseanextractor/target/osseanextractor-0.0.1-jar-with-dependencies-without-resources/*':$tmp
|
||||
|
||||
CLASSPATH=$tmp:$CLASSPATH
|
||||
|
||||
JAVA_OPTS="-Xms256m -Xmx256m -Xmn128m"
|
||||
|
||||
echo $CLASSPATH
|
||||
|
||||
java $JAVA_OPTS -DlogFilePath=${SITE} -classpath $CLASSPATH net.trustie.extractor.Codeproject_Extractor >>/home/song/hx/new_osseanextractor/log/${SITE}.log 2>&1 &
|
|
@ -0,0 +1,17 @@
|
|||
#!/bin/bash
|
||||
|
||||
SITE='csdn_ask'
|
||||
|
||||
find /home/song/hx/new_osseanextractor/target/classes -name "*.xml"|xargs rm -f
|
||||
|
||||
tmp='/home/song/hx/new_osseanextractor/bin/resources'
|
||||
tmp='/home/song/hx/new_osseanextractor/target/classes':$tmp
|
||||
tmp='/home/song/hx/new_osseanextractor/target/osseanextractor-0.0.1-jar-with-dependencies-without-resources/*':$tmp
|
||||
|
||||
CLASSPATH=$tmp:$CLASSPATH
|
||||
|
||||
JAVA_OPTS="-Xms256m -Xmx256m -Xmn128m"
|
||||
|
||||
echo $CLASSPATH
|
||||
|
||||
java $JAVA_OPTS -DlogFilePath=${SITE} -classpath $CLASSPATH net.trustie.extractor.CsdnAsk_Extractor >>/home/song/hx/new_osseanextractor/log/${SITE}.log 2>&1 &
|
|
@ -0,0 +1,19 @@
|
|||
#!/bin/bash
|
||||
|
||||
SITE='csdn_blogs'
|
||||
|
||||
find /home/song/hx/new_osseanextractor/target/classes -name "*.xml"|xargs rm -f
|
||||
|
||||
tmp='/home/song/hx/new_osseanextractor/bin/resources'
|
||||
tmp='/home/song/hx/new_osseanextractor/target/classes':$tmp
|
||||
tmp='/home/song/hx/new_osseanextractor/target/osseanextractor-0.0.1-jar-with-dependencies-without-resources/*':$tmp
|
||||
|
||||
CLASSPATH=$tmp:$CLASSPATH
|
||||
|
||||
JAVA_OPTS="-Xms256m -Xmx256m -Xmn128m"
|
||||
|
||||
echo $CLASSPATH
|
||||
|
||||
java $JAVA_OPTS -DlogFilePath=${SITE} -classpath $CLASSPATH net.trustie.extractor.CsdnBlogs_Extractor >>/home/song/hx/new_osseanextractor/log/${SITE}.log 2>&1 &
|
||||
|
||||
|
|
@ -0,0 +1,19 @@
|
|||
#!/bin/bash
|
||||
|
||||
SITE='csdn_topic'
|
||||
|
||||
find /home/song/hx/new_osseanextractor/target/classes -name "*.xml"|xargs rm -f
|
||||
|
||||
tmp='/home/song/hx/new_osseanextractor/bin/resources'
|
||||
tmp='/home/song/hx/new_osseanextractor/target/classes':$tmp
|
||||
tmp='/home/song/hx/new_osseanextractor/target/osseanextractor-0.0.1-jar-with-dependencies-without-resources/*':$tmp
|
||||
|
||||
CLASSPATH=$tmp:$CLASSPATH
|
||||
|
||||
JAVA_OPTS="-Xms256m -Xmx256m -Xmn128m"
|
||||
|
||||
echo $CLASSPATH
|
||||
|
||||
java $JAVA_OPTS -DlogFilePath=${SITE} -classpath $CLASSPATH net.trustie.extractor.CsdnTopic_Extractor >>/home/song/hx/new_osseanextractor/log/${SITE}.log 2>&1 &
|
||||
|
||||
|
|
@ -0,0 +1,17 @@
|
|||
#!/bin/bash
|
||||
|
||||
SITE='dewen_q'
|
||||
|
||||
find /home/song/hx/new_osseanextractor/target/classes -name "*.xml"|xargs rm -f
|
||||
|
||||
tmp='/home/song/hx/new_osseanextractor/bin/resources'
|
||||
tmp='/home/song/hx/new_osseanextractor/target/classes':$tmp
|
||||
tmp='/home/song/hx/new_osseanextractor/target/osseanextractor-0.0.1-jar-with-dependencies-without-resources/*':$tmp
|
||||
|
||||
CLASSPATH=$tmp:$CLASSPATH
|
||||
|
||||
JAVA_OPTS="-Xms256m -Xmx256m -Xmn128m"
|
||||
|
||||
echo $CLASSPATH
|
||||
|
||||
java $JAVA_OPTS -DlogFilePath=${SITE} -classpath $CLASSPATH net.trustie.extractor.DeWenQ_Extractor >>/home/song/hx/new_osseanextractor/log/${SITE}.log 2>&1 &
|
|
@ -0,0 +1,17 @@
|
|||
#!/bin/bash
|
||||
|
||||
SITE='freecode_project'
|
||||
|
||||
find /home/song/hx/new_osseanextractor/target/classes -name "*.xml"|xargs rm -f
|
||||
|
||||
tmp='/home/song/hx/new_osseanextractor/bin/resources'
|
||||
tmp='/home/song/hx/new_osseanextractor/target/classes':$tmp
|
||||
tmp='/home/song/hx/new_osseanextractor/target/osseanextractor-0.0.1-jar-with-dependencies-without-resources/*':$tmp
|
||||
|
||||
CLASSPATH=$tmp:$CLASSPATH
|
||||
|
||||
JAVA_OPTS="-Xms256m -Xmx256m -Xmn128m"
|
||||
|
||||
echo $CLASSPATH
|
||||
|
||||
java $JAVA_OPTS -DlogFilePath=${SITE} -classpath $CLASSPATH net.trustie.extractor.FreeCodeProject_Extractor >>/home/song/hx/new_osseanextractor/log/${SITE}.log 2>&1 &
|
|
@ -0,0 +1,17 @@
|
|||
#!/bin/bash
|
||||
|
||||
SITE='gna'
|
||||
|
||||
find /home/song/hx/new_osseanextractor/target/classes -name "*.xml"|xargs rm -f
|
||||
|
||||
tmp='/home/song/hx/new_osseanextractor/bin/resources'
|
||||
tmp='/home/song/hx/new_osseanextractor/target/classes':$tmp
|
||||
tmp='/home/song/hx/new_osseanextractor/target/osseanextractor-0.0.1-jar-with-dependencies-without-resources/*':$tmp
|
||||
|
||||
CLASSPATH=$tmp:$CLASSPATH
|
||||
|
||||
JAVA_OPTS="-Xms256m -Xmx256m -Xmn128m"
|
||||
|
||||
echo $CLASSPATH
|
||||
|
||||
java $JAVA_OPTS -DlogFilePath=${SITE} -classpath $CLASSPATH net.trustie.extractor.Gna_Extractor >>/home/song/hx/new_osseanextractor/log/${SITE}.log 2>&1 &
|
|
@ -0,0 +1,17 @@
|
|||
#!/bin/bash
|
||||
|
||||
SITE='iteye_ask'
|
||||
|
||||
find /home/song/hx/new_osseanextractor/target/classes -name "*.xml"|xargs rm -f
|
||||
|
||||
tmp='/home/song/hx/new_osseanextractor/bin/resources'
|
||||
tmp='/home/song/hx/new_osseanextractor/target/classes':$tmp
|
||||
tmp='/home/song/hx/new_osseanextractor/target/osseanextractor-0.0.1-jar-with-dependencies-without-resources/*':$tmp
|
||||
|
||||
CLASSPATH=$tmp:$CLASSPATH
|
||||
|
||||
JAVA_OPTS="-Xms256m -Xmx256m -Xmn128m"
|
||||
|
||||
echo $CLASSPATH
|
||||
|
||||
java $JAVA_OPTS -DlogFilePath=${SITE} -classpath $CLASSPATH net.trustie.extractor.IteyeAsk_Extractor >>/home/song/hx/new_osseanextractor/log/${SITE}.log 2>&1 &
|
|
@ -0,0 +1,17 @@
|
|||
#!/bin/bash
|
||||
|
||||
SITE='iteye_blog'
|
||||
|
||||
find /home/song/hx/new_osseanextractor/target/classes -name "*.xml"|xargs rm -f
|
||||
|
||||
tmp='/home/song/hx/new_osseanextractor/bin/resources'
|
||||
tmp='/home/song/hx/new_osseanextractor/target/classes':$tmp
|
||||
tmp='/home/song/hx/new_osseanextractor/target/osseanextractor-0.0.1-jar-with-dependencies-without-resources/*':$tmp
|
||||
|
||||
CLASSPATH=$tmp:$CLASSPATH
|
||||
|
||||
JAVA_OPTS="-Xms256m -Xmx256m -Xmn128m"
|
||||
|
||||
echo $CLASSPATH
|
||||
|
||||
java $JAVA_OPTS -DlogFilePath=${SITE} -classpath $CLASSPATH net.trustie.extractor.IteyeBlog_Extractor >>/home/song/hx/new_osseanextractor/log/${SITE}.log 2>&1 &
|
|
@ -0,0 +1,17 @@
|
|||
#!/bin/bash
|
||||
|
||||
SITE='lagou'
|
||||
|
||||
find ./target/classes -name "*.xml"|xargs rm -f
|
||||
|
||||
tmp='./bin/resources'
|
||||
tmp='./target/classes':$tmp
|
||||
tmp='./target/osseanextractor-0.0.1-jar-with-dependencies-without-resources/*':$tmp
|
||||
|
||||
CLASSPATH=$tmp:$CLASSPATH
|
||||
|
||||
JAVA_OPTS="-Xms256m -Xmx512m -Xmn128m"
|
||||
|
||||
echo $CLASSPATH
|
||||
|
||||
java $JAVA_OPTS -DlogFilePath=${SITE} -classpath $CLASSPATH net.trustie.extractor.lagou_Extractor >>log/${SITE}.log 2>&1 &
|
|
@ -0,0 +1,17 @@
|
|||
#!/bin/bash
|
||||
|
||||
SITE='linxutone'
|
||||
|
||||
find /home/song/hx/new_osseanextractor/target/classes -name "*.xml"|xargs rm -f
|
||||
|
||||
tmp='/home/song/hx/new_osseanextractor/bin/resources'
|
||||
tmp='/home/song/hx/new_osseanextractor/target/classes':$tmp
|
||||
tmp='/home/song/hx/new_osseanextractor/target/osseanextractor-0.0.1-jar-with-dependencies-without-resources/*':$tmp
|
||||
|
||||
CLASSPATH=$tmp:$CLASSPATH
|
||||
|
||||
JAVA_OPTS="-Xms256m -Xmx256m -Xmn128m"
|
||||
|
||||
echo $CLASSPATH
|
||||
|
||||
java $JAVA_OPTS -DlogFilePath=${SITE} -classpath $CLASSPATH net.trustie.extractor.Linuxtone_Extractor >>/home/song/hx/new_osseanextractor/log/${SITE}.log 2>&1 &
|
|
@ -0,0 +1,17 @@
|
|||
#!/bin/bash
|
||||
|
||||
SITE='lupaworld'
|
||||
|
||||
find /home/song/hx/new_osseanextractor/target/classes -name "*.xml"|xargs rm -f
|
||||
|
||||
tmp='/home/song/hx/new_osseanextractor/bin/resources'
|
||||
tmp='/home/song/hx/new_osseanextractor/target/classes':$tmp
|
||||
tmp='/home/song/hx/new_osseanextractor/target/osseanextractor-0.0.1-jar-with-dependencies-without-resources/*':$tmp
|
||||
|
||||
CLASSPATH=$tmp:$CLASSPATH
|
||||
|
||||
JAVA_OPTS="-Xms256m -Xmx256m -Xmn128m"
|
||||
|
||||
echo $CLASSPATH
|
||||
|
||||
java $JAVA_OPTS -DlogFilePath=${SITE} -classpath $CLASSPATH net.trustie.extractor.Lupaworld_Extractor >>/home/song/hx/new_osseanextractor/log/${SITE}.log 2>&1 &
|
|
@ -0,0 +1,17 @@
|
|||
#!/bin/bash
|
||||
|
||||
SITE='neitui'
|
||||
|
||||
find /home/song/hx/new_osseanextractor/target/classes -name "*.xml"|xargs rm -f
|
||||
|
||||
tmp='/home/song/hx/new_osseanextractor/bin/resources'
|
||||
tmp='/home/song/hx/new_osseanextractor/target/classes':$tmp
|
||||
tmp='/home/song/hx/new_osseanextractor/target/osseanextractor-0.0.1-jar-with-dependencies-without-resources/*':$tmp
|
||||
|
||||
CLASSPATH=$tmp:$CLASSPATH
|
||||
|
||||
JAVA_OPTS="-Xms256m -Xmx512m -Xmn128m"
|
||||
|
||||
echo $CLASSPATH
|
||||
|
||||
java $JAVA_OPTS -DlogFilePath=${SITE} -classpath $CLASSPATH net.trustie.extractor.neitui_Extractor >>/home/song/hx/new_osseanextractor/log/${SITE}.log 2>&1 &
|
|
@ -0,0 +1,17 @@
|
|||
#!/bin/bash
|
||||
|
||||
SITE='oschina_project'
|
||||
|
||||
find /home/song/hx/new_osseanextractor/target/classes -name "*.xml"|xargs rm -f
|
||||
|
||||
tmp='/home/song/hx/new_osseanextractor/bin/resources'
|
||||
tmp='/home/song/hx/new_osseanextractor/target/classes':$tmp
|
||||
tmp='/home/song/hx/new_osseanextractor/target/osseanextractor-0.0.1-jar-with-dependencies-without-resources/*':$tmp
|
||||
|
||||
CLASSPATH=$tmp:$CLASSPATH
|
||||
|
||||
JAVA_OPTS="-Xms256m -Xmx256m -Xmn128m"
|
||||
|
||||
echo $CLASSPATH
|
||||
|
||||
java $JAVA_OPTS -DlogFilePath=${SITE} -classpath $CLASSPATH net.trustie.extractor.OSChinaProject_Extractor >>/home/song/hx/new_osseanextractor/log/${SITE}.log 2>&1 &
|
|
@ -0,0 +1,17 @@
|
|||
#!/bin/bash
|
||||
|
||||
SITE='oschina_question'
|
||||
|
||||
find /home/song/hx/new_osseanextractor/target/classes -name "*.xml"|xargs rm -f
|
||||
|
||||
tmp='/home/song/hx/new_osseanextractor/bin/resources'
|
||||
tmp='/home/song/hx/new_osseanextractor/target/classes':$tmp
|
||||
tmp='/home/song/hx/new_osseanextractor/target/osseanextractor-0.0.1-jar-with-dependencies-without-resources/*':$tmp
|
||||
|
||||
CLASSPATH=$tmp:$CLASSPATH
|
||||
|
||||
JAVA_OPTS="-Xms256m -Xmx256m -Xmn128m"
|
||||
|
||||
echo $CLASSPATH
|
||||
|
||||
java $JAVA_OPTS -DlogFilePath=${SITE} -classpath $CLASSPATH net.trustie.extractor.OSChinaQuestion_Extractor >>/home/song/hx/new_osseanextractor/log/${SITE}.log 2>&1 &
|
|
@ -0,0 +1,17 @@
|
|||
#!/bin/bash
|
||||
|
||||
SITE='phpchina'
|
||||
|
||||
find /home/song/hx/new_osseanextractor/target/classes -name "*.xml"|xargs rm -f
|
||||
|
||||
tmp='/home/song/hx/new_osseanextractor/bin/resources'
|
||||
tmp='/home/song/hx/new_osseanextractor/target/classes':$tmp
|
||||
tmp='/home/song/hx/new_osseanextractor/target/osseanextractor-0.0.1-jar-with-dependencies-without-resources/*':$tmp
|
||||
|
||||
CLASSPATH=$tmp:$CLASSPATH
|
||||
|
||||
JAVA_OPTS="-Xms256m -Xmx256m -Xmn128m"
|
||||
|
||||
echo $CLASSPATH
|
||||
|
||||
java $JAVA_OPTS -DlogFilePath=${SITE} -classpath $CLASSPATH net.trustie.extractor.PHPChina_Extractor >>/home/song/hx/new_osseanextractor/log/${SITE}.log 2>&1 &
|
|
@ -0,0 +1,30 @@
|
|||
<?xml version="1.0" encoding="UTF-8"?>
|
||||
<!DOCTYPE log4j:configuration SYSTEM "log4j.dtd">
|
||||
<log4j:configuration xmlns:log4j="http://jakarta.apache.org/log4j/">
|
||||
|
||||
<appender name="console" class="org.apache.log4j.ConsoleAppender">
|
||||
<layout class="org.apache.log4j.PatternLayout">
|
||||
<param name="threshold" value="INFO" />
|
||||
<param name="ConversionPattern" value="%d{yy-MM-dd HH:mm:ss,SSS} %-5p %c(%F:%L) ## %m%n" />
|
||||
</layout>
|
||||
</appender>
|
||||
|
||||
<appender name="railyFile" class="org.apache.log4j.RollingFileAppender">
|
||||
<param name="File" value="./error/${logFilePath}/error.log"></param>
|
||||
<param name="ImmediateFlush" value="true" />
|
||||
<param name="Threshold" value="warn"></param>
|
||||
<param name="Append" value="true"></param>
|
||||
<param name="MaxFileSize" value="1024KB"></param>
|
||||
<param name="MaxBackupIndex" value="100"></param>
|
||||
<layout class="org.apache.log4j.PatternLayout">
|
||||
<param name="ConversionPattern" value="[%d{yyyy-MM-dd HH:mm:ss\} %-5p] {%c:%L}#%m%n"></param>
|
||||
</layout>
|
||||
</appender>
|
||||
|
||||
<root>
|
||||
<level value="info" />
|
||||
<appender-ref ref="console" />
|
||||
<appender-ref ref="railyFile" />
|
||||
</root>
|
||||
|
||||
</log4j:configuration>
|
|
@ -0,0 +1,40 @@
|
|||
<?xml version="1.0" encoding="UTF-8"?>
|
||||
<beans xmlns="http://www.springframework.org/schema/beans"
|
||||
xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
|
||||
xsi:schemaLocation="http://www.springframework.org/schema/beans
|
||||
http://www.springframework.org/schema/beans/spring-beans-3.0.xsd">
|
||||
<!--SourceData -->
|
||||
<bean id="dataSourceOne" class="org.apache.commons.dbcp.BasicDataSource"
|
||||
destroy-method="close">
|
||||
<property name="driverClassName" value="com.mysql.jdbc.Driver" />
|
||||
<property name="url"
|
||||
value="jdbc:mysql://192.168.80.104:3306/pages?characterEncoding=UTF-8" />
|
||||
<property name="username" value="influx" />
|
||||
<property name="password" value="influx1234" />
|
||||
</bean>
|
||||
<bean id="sqlSessionFactoryOne" class="org.mybatis.spring.SqlSessionFactoryBean">
|
||||
<property name="dataSource" ref="dataSourceOne" />
|
||||
</bean>
|
||||
<bean class="org.mybatis.spring.mapper.MapperScannerConfigurer">
|
||||
<property name="basePackage" value="net.trustie.downloader" />
|
||||
<property name="sqlSessionFactory" ref="sqlSessionFactoryOne"></property>
|
||||
</bean>
|
||||
|
||||
<!--DestinationData -->
|
||||
<bean id="dataSourceTwo" class="org.apache.commons.dbcp.BasicDataSource"
|
||||
destroy-method="close">
|
||||
<property name="driverClassName" value="com.mysql.jdbc.Driver" />
|
||||
<property name="url"
|
||||
value="jdbc:mysql://192.168.80.104:3306/extract_result?characterEncoding=UTF-8" />
|
||||
<property name="username" value="influx" />
|
||||
<property name="password" value="influx1234" />
|
||||
</bean>
|
||||
<bean id="sqlSessionFactoryTwo" class="org.mybatis.spring.SqlSessionFactoryBean">
|
||||
<property name="dataSource" ref="dataSourceTwo" />
|
||||
</bean>
|
||||
<bean class="org.mybatis.spring.mapper.MapperScannerConfigurer">
|
||||
<property name="basePackage" value="net.trustie.dao" />
|
||||
<property name="sqlSessionFactory" ref="sqlSessionFactoryTwo"></property>
|
||||
</bean>
|
||||
|
||||
</beans>
|
|
@ -0,0 +1,15 @@
|
|||
<?xml version="1.0" encoding="UTF-8"?>
|
||||
<beans xmlns="http://www.springframework.org/schema/beans"
|
||||
xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
|
||||
xmlns:context="http://www.springframework.org/schema/context"
|
||||
xmlns:mvc="http://www.springframework.org/schema/mvc"
|
||||
xsi:schemaLocation="http://www.springframework.org/schema/mvc
|
||||
http://www.springframework.org/schema/mvc/spring-mvc-3.0.xsd
|
||||
http://www.springframework.org/schema/beans
|
||||
http://www.springframework.org/schema/beans/spring-beans-3.0.xsd
|
||||
http://www.springframework.org/schema/context
|
||||
http://www.springframework.org/schema/context/spring-context-3.0.xsd">
|
||||
<context:annotation-config/>
|
||||
<context:component-scan base-package="net.trustie"/>
|
||||
|
||||
</beans>
|
|
@ -0,0 +1,17 @@
|
|||
#!/bin/bash
|
||||
|
||||
SITE='slashdot_news'
|
||||
|
||||
find /home/song/hx/new_osseanextractor/target/classes -name "*.xml"|xargs rm -f
|
||||
|
||||
tmp='/home/song/hx/new_osseanextractor/bin/resources'
|
||||
tmp='/home/song/hx/new_osseanextractor/target/classes':$tmp
|
||||
tmp='/home/song/hx/new_osseanextractor/target/osseanextractor-0.0.1-jar-with-dependencies-without-resources/*':$tmp
|
||||
|
||||
CLASSPATH=$tmp:$CLASSPATH
|
||||
|
||||
JAVA_OPTS="-Xms256m -Xmx512m -Xmn128m"
|
||||
|
||||
echo $CLASSPATH
|
||||
|
||||
java $JAVA_OPTS -DlogFilePath=${SITE} -classpath $CLASSPATH net.trustie.extractor.SlashdotNews_Extractor >>/home/song/hx/new_osseanextractor/log/${SITE}.log 2>&1 &
|
|
@ -0,0 +1,17 @@
|
|||
#!/bin/bash
|
||||
|
||||
SITE='softpedia'
|
||||
|
||||
find /home/song/hx/new_osseanextractor/target/classes -name "*.xml"|xargs rm -f
|
||||
|
||||
tmp='/home/song/hx/new_osseanextractor/bin/resources'
|
||||
tmp='/home/song/hx/new_osseanextractor/target/classes':$tmp
|
||||
tmp='/home/song/hx/new_osseanextractor/target/osseanextractor-0.0.1-jar-with-dependencies-without-resources/*':$tmp
|
||||
|
||||
CLASSPATH=$tmp:$CLASSPATH
|
||||
|
||||
JAVA_OPTS="-Xms256m -Xmx256m -Xmn128m"
|
||||
|
||||
echo $CLASSPATH
|
||||
|
||||
java $JAVA_OPTS -DlogFilePath=${SITE} -classpath $CLASSPATH net.trustie.extractor.SoftPedia_Extractor >>/home/song/hx/new_osseanextractor/log/${SITE}.log 2>&1 &
|
|
@ -0,0 +1,17 @@
|
|||
#!/bin/bash
|
||||
|
||||
SITE='stackoverflow_q'
|
||||
|
||||
find /home/song/hx/new_osseanextractor/target/classes -name "*.xml"|xargs rm -f
|
||||
|
||||
tmp='/home/song/hx/new_osseanextractor/bin/resources'
|
||||
tmp='/home/song/hx/new_osseanextractor/target/classes':$tmp
|
||||
tmp='/home/song/hx/new_osseanextractor/target/osseanextractor-0.0.1-jar-with-dependencies-without-resources/*':$tmp
|
||||
|
||||
CLASSPATH=$tmp:$CLASSPATH
|
||||
|
||||
JAVA_OPTS="-Xms256m -Xmx256m -Xmn128m"
|
||||
|
||||
echo $CLASSPATH
|
||||
|
||||
java $JAVA_OPTS -DlogFilePath=${SITE} -classpath $CLASSPATH net.trustie.extractor.StackOverflow_Extractor >>/home/song/hx/new_osseanextractor/log/${SITE}.log 2>&1 &
|
|
@ -0,0 +1,25 @@
|
|||
#!/bin/bash
|
||||
|
||||
sh /home/song/hx/new_osseanextractor/bin/code_project.sh
|
||||
sh /home/song/hx/new_osseanextractor/bin/cnblog_news.sh
|
||||
sh /home/song/hx/new_osseanextractor/bin/cnblog_q_solve_comment.sh
|
||||
sh /home/song/hx/new_osseanextractor/bin/cnblog_q_solve.sh
|
||||
sh /home/song/hx/new_osseanextractor/bin/cnblog_q_unsolve.sh
|
||||
sh /home/song/hx/new_osseanextractor/bin/csdn_ask.sh
|
||||
sh /home/song/hx/new_osseanextractor/bin/csdn_blog.sh
|
||||
sh /home/song/hx/new_osseanextractor/bin/csdn_topic.sh
|
||||
sh /home/song/hx/new_osseanextractor/bin/dewen_q.sh
|
||||
sh /home/song/hx/new_osseanextractor/bin/freecode_pro.sh
|
||||
sh /home/song/hx/new_osseanextractor/bin/iteye_ask.sh
|
||||
sh /home/song/hx/new_osseanextractor/bin/iteye_blog.sh
|
||||
sh /home/song/hx/new_osseanextractor/bin/OpenHub_pro.sh
|
||||
sh /home/song/hx/new_osseanextractor/bin/oschina_project.sh
|
||||
sh /home/song/hx/new_osseanextractor/bin/oschina_question.sh
|
||||
sh /home/song/hx/new_osseanextractor/bin/SFProject.sh
|
||||
sh /home/song/hx/new_osseanextractor/bin/stackoverflow_q.sh
|
||||
sh /home/song/hx/new_osseanextractor/bin/lupaworld.sh
|
||||
sh /home/song/hx/new_osseanextractor/bin/gna.sh
|
||||
sh /home/song/hx/new_osseanextractor/bin/51cto_blog.sh
|
||||
sh /home/song/hx/new_osseanextractor/bin/apache.sh
|
||||
sh /home/song/hx/new_osseanextractor/bin/phpchina_post.sh
|
||||
sh /home/song/hx/new_osseanextractor/bin/softpedia.sh
|
|
@ -0,0 +1,4 @@
|
|||
51cto_blog 51cto_blog_html_detail
|
||||
apache apache_html_detail
|
||||
cnblog_q_unsolve cnblogs_q_unsolved_html_detail
|
||||
codeproject codeproject_html_detail
|
|
@ -0,0 +1,52 @@
|
|||
<?xml version="1.0" encoding="UTF-8"?>
|
||||
<module org.jetbrains.idea.maven.project.MavenProjectsManager.isMavenModule="true" type="JAVA_MODULE" version="4">
|
||||
<component name="NewModuleRootManager" LANGUAGE_LEVEL="JDK_1_7" inherit-compiler-output="false">
|
||||
<output url="file://$MODULE_DIR$/target/classes" />
|
||||
<output-test url="file://$MODULE_DIR$/target/test-classes" />
|
||||
<content url="file://$MODULE_DIR$">
|
||||
<sourceFolder url="file://$MODULE_DIR$/src/main/java" isTestSource="false" />
|
||||
<sourceFolder url="file://$MODULE_DIR$/src/main/resources" type="java-resource" />
|
||||
<sourceFolder url="file://$MODULE_DIR$/src/test/java" isTestSource="true" />
|
||||
<excludeFolder url="file://$MODULE_DIR$/target" />
|
||||
</content>
|
||||
<orderEntry type="inheritedJdk" />
|
||||
<orderEntry type="sourceFolder" forTests="false" />
|
||||
<orderEntry type="library" name="Maven: junit:junit-dep:4.10" level="project" />
|
||||
<orderEntry type="library" name="Maven: org.hamcrest:hamcrest-core:1.1" level="project" />
|
||||
<orderEntry type="library" name="Maven: us.codecraft:webmagic-core:0.5.2" level="project" />
|
||||
<orderEntry type="library" name="Maven: org.apache.httpcomponents:httpclient:4.3.3" level="project" />
|
||||
<orderEntry type="library" name="Maven: org.apache.httpcomponents:httpcore:4.3.2" level="project" />
|
||||
<orderEntry type="library" name="Maven: commons-logging:commons-logging:1.1.3" level="project" />
|
||||
<orderEntry type="library" name="Maven: commons-codec:commons-codec:1.6" level="project" />
|
||||
<orderEntry type="library" name="Maven: com.google.guava:guava:15.0" level="project" />
|
||||
<orderEntry type="library" name="Maven: org.apache.commons:commons-lang3:3.1" level="project" />
|
||||
<orderEntry type="library" name="Maven: us.codecraft:xsoup:0.2.4" level="project" />
|
||||
<orderEntry type="library" name="Maven: org.slf4j:slf4j-api:1.7.6" level="project" />
|
||||
<orderEntry type="library" name="Maven: org.slf4j:slf4j-log4j12:1.7.6" level="project" />
|
||||
<orderEntry type="library" name="Maven: log4j:log4j:1.2.17" level="project" />
|
||||
<orderEntry type="library" name="Maven: commons-collections:commons-collections:3.2.1" level="project" />
|
||||
<orderEntry type="library" name="Maven: org.assertj:assertj-core:1.5.0" level="project" />
|
||||
<orderEntry type="library" name="Maven: org.jsoup:jsoup:1.7.2" level="project" />
|
||||
<orderEntry type="library" name="Maven: commons-io:commons-io:1.3.2" level="project" />
|
||||
<orderEntry type="library" name="Maven: com.jayway.jsonpath:json-path:0.8.1" level="project" />
|
||||
<orderEntry type="library" name="Maven: net.minidev:json-smart:1.1.1" level="project" />
|
||||
<orderEntry type="library" name="Maven: commons-lang:commons-lang:2.6" level="project" />
|
||||
<orderEntry type="library" name="Maven: com.alibaba:fastjson:1.1.37" level="project" />
|
||||
<orderEntry type="library" name="Maven: us.codecraft:webmagic-extension:0.5.2" level="project" />
|
||||
<orderEntry type="library" name="Maven: redis.clients:jedis:2.0.0" level="project" />
|
||||
<orderEntry type="library" name="Maven: org.springframework:spring-context:3.1.1.RELEASE" level="project" />
|
||||
<orderEntry type="library" name="Maven: org.springframework:spring-aop:3.1.1.RELEASE" level="project" />
|
||||
<orderEntry type="library" name="Maven: aopalliance:aopalliance:1.0" level="project" />
|
||||
<orderEntry type="library" name="Maven: org.springframework:spring-beans:3.1.1.RELEASE" level="project" />
|
||||
<orderEntry type="library" name="Maven: org.springframework:spring-core:3.1.1.RELEASE" level="project" />
|
||||
<orderEntry type="library" name="Maven: org.springframework:spring-expression:3.1.1.RELEASE" level="project" />
|
||||
<orderEntry type="library" name="Maven: org.springframework:spring-asm:3.1.1.RELEASE" level="project" />
|
||||
<orderEntry type="library" name="Maven: org.mybatis:mybatis:3.1.1" level="project" />
|
||||
<orderEntry type="library" name="Maven: org.mybatis:mybatis-spring:1.1.1" level="project" />
|
||||
<orderEntry type="library" name="Maven: org.springframework:spring-tx:3.1.1.RELEASE" level="project" />
|
||||
<orderEntry type="library" name="Maven: org.springframework:spring-jdbc:3.1.1.RELEASE" level="project" />
|
||||
<orderEntry type="library" name="Maven: commons-dbcp:commons-dbcp:1.4" level="project" />
|
||||
<orderEntry type="library" name="Maven: commons-pool:commons-pool:1.5.4" level="project" />
|
||||
<orderEntry type="library" name="Maven: mysql:mysql-connector-java:5.1.18" level="project" />
|
||||
</component>
|
||||
</module>
|
|
@ -0,0 +1,87 @@
|
|||
<project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
|
||||
xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
|
||||
<modelVersion>4.0.0</modelVersion>
|
||||
|
||||
<groupId>org.ossean</groupId>
|
||||
<artifactId>osseanextractor</artifactId>
|
||||
<version>0.0.1</version>
|
||||
<packaging>jar</packaging>
|
||||
|
||||
<name>osseanextractor</name>
|
||||
<url>http://maven.apache.org</url>
|
||||
|
||||
<properties>
|
||||
<project.build.sourceEncoding>UTF-8</project.build.sourceEncoding>
|
||||
</properties>
|
||||
|
||||
<build>
|
||||
<plugins>
|
||||
<plugin>
|
||||
<groupId>org.apache.maven.plugins</groupId>
|
||||
<artifactId>maven-compiler-plugin</artifactId>
|
||||
<configuration>
|
||||
<source>1.7</source>
|
||||
<target>1.7</target>
|
||||
</configuration>
|
||||
</plugin>
|
||||
<plugin>
|
||||
<artifactId>maven-assembly-plugin</artifactId>
|
||||
<version>2.5.1</version>
|
||||
<configuration>
|
||||
<descriptors>
|
||||
<descriptor>src/main/assembly/assembly.xml</descriptor>
|
||||
</descriptors>
|
||||
</configuration>
|
||||
</plugin>
|
||||
</plugins>
|
||||
</build>
|
||||
|
||||
<dependencies>
|
||||
<dependency>
|
||||
<groupId>junit</groupId>
|
||||
<artifactId>junit-dep</artifactId>
|
||||
<version>4.10</version>
|
||||
</dependency>
|
||||
<dependency>
|
||||
<groupId>us.codecraft</groupId>
|
||||
<artifactId>webmagic-core</artifactId>
|
||||
<version>0.5.2</version>
|
||||
</dependency>
|
||||
<dependency>
|
||||
<groupId>us.codecraft</groupId>
|
||||
<artifactId>webmagic-extension</artifactId>
|
||||
<version>0.5.2</version>
|
||||
</dependency>
|
||||
<dependency>
|
||||
<groupId>org.springframework</groupId>
|
||||
<artifactId>spring-context</artifactId>
|
||||
<version>3.1.1.RELEASE</version>
|
||||
</dependency>
|
||||
<dependency>
|
||||
<groupId>org.mybatis</groupId>
|
||||
<artifactId>mybatis</artifactId>
|
||||
<version>3.1.1</version>
|
||||
</dependency>
|
||||
<dependency>
|
||||
<groupId>org.mybatis</groupId>
|
||||
<artifactId>mybatis-spring</artifactId>
|
||||
<version>1.1.1</version>
|
||||
</dependency>
|
||||
<dependency>
|
||||
<groupId>commons-dbcp</groupId>
|
||||
<artifactId>commons-dbcp</artifactId>
|
||||
<version>1.4</version>
|
||||
</dependency>
|
||||
<dependency>
|
||||
<groupId>mysql</groupId>
|
||||
<artifactId>mysql-connector-java</artifactId>
|
||||
<version>5.1.18</version>
|
||||
</dependency>
|
||||
<dependency>
|
||||
<groupId>commons-io</groupId>
|
||||
<artifactId>commons-io</artifactId>
|
||||
<version>2.4</version>
|
||||
</dependency>
|
||||
|
||||
</dependencies>
|
||||
</project>
|
|
@ -0,0 +1,40 @@
|
|||
<?xml version="1.0" encoding="UTF-8"?>
|
||||
<beans xmlns="http://www.springframework.org/schema/beans"
|
||||
xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
|
||||
xsi:schemaLocation="http://www.springframework.org/schema/beans
|
||||
http://www.springframework.org/schema/beans/spring-beans-3.0.xsd">
|
||||
<!--SourceData -->
|
||||
<bean id="dataSourceOne" class="org.apache.commons.dbcp.BasicDataSource"
|
||||
destroy-method="close">
|
||||
<property name="driverClassName" value="com.mysql.jdbc.Driver" />
|
||||
<property name="url"
|
||||
value="jdbc:mysql://localhost:3306/pages?characterEncoding=UTF-8" />
|
||||
<property name="username" value="root" />
|
||||
<property name="password" value="root" />
|
||||
</bean>
|
||||
<bean id="sqlSessionFactoryOne" class="org.mybatis.spring.SqlSessionFactoryBean">
|
||||
<property name="dataSource" ref="dataSourceOne" />
|
||||
</bean>
|
||||
<bean class="org.mybatis.spring.mapper.MapperScannerConfigurer">
|
||||
<property name="basePackage" value="net.trustie.downloader" />
|
||||
<property name="sqlSessionFactory" ref="sqlSessionFactoryOne"></property>
|
||||
</bean>
|
||||
|
||||
<!--DestinationData -->
|
||||
<bean id="dataSourceTwo" class="org.apache.commons.dbcp.BasicDataSource"
|
||||
destroy-method="close">
|
||||
<property name="driverClassName" value="com.mysql.jdbc.Driver" />
|
||||
<property name="url"
|
||||
value="jdbc:mysql://localhost:3306/extract_result?characterEncoding=UTF-8" />
|
||||
<property name="username" value="root" />
|
||||
<property name="password" value="root" />
|
||||
</bean>
|
||||
<bean id="sqlSessionFactoryTwo" class="org.mybatis.spring.SqlSessionFactoryBean">
|
||||
<property name="dataSource" ref="dataSourceTwo" />
|
||||
</bean>
|
||||
<bean class="org.mybatis.spring.mapper.MapperScannerConfigurer">
|
||||
<property name="basePackage" value="net.trustie.dao" />
|
||||
<property name="sqlSessionFactory" ref="sqlSessionFactoryTwo"></property>
|
||||
</bean>
|
||||
|
||||
</beans>
|
|
@ -0,0 +1,15 @@
|
|||
<?xml version="1.0" encoding="UTF-8"?>
|
||||
<beans xmlns="http://www.springframework.org/schema/beans"
|
||||
xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
|
||||
xmlns:context="http://www.springframework.org/schema/context"
|
||||
xmlns:mvc="http://www.springframework.org/schema/mvc"
|
||||
xsi:schemaLocation="http://www.springframework.org/schema/mvc
|
||||
http://www.springframework.org/schema/mvc/spring-mvc-3.0.xsd
|
||||
http://www.springframework.org/schema/beans
|
||||
http://www.springframework.org/schema/beans/spring-beans-3.0.xsd
|
||||
http://www.springframework.org/schema/context
|
||||
http://www.springframework.org/schema/context/spring-context-3.0.xsd">
|
||||
<context:annotation-config/>
|
||||
<context:component-scan base-package="net.trustie"/>
|
||||
|
||||
</beans>
|
|
@ -0,0 +1,25 @@
|
|||
<assembly
|
||||
xmlns="http://maven.apache.org/plugins/maven-assembly-plugin/assembly/1.1.2"
|
||||
xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
|
||||
xsi:schemaLocation="http://maven.apache.org/plugins/maven-assembly-plugin/assembly/1.1.2 http://maven.apache.org/xsd/assembly-1.1.2.xsd">
|
||||
<!-- TODO: a jarjar format would be better -->
|
||||
<id>jar-with-dependencies-without-resources</id>
|
||||
<formats>
|
||||
<format>zip</format>
|
||||
</formats>
|
||||
<includeBaseDirectory>false</includeBaseDirectory>
|
||||
<dependencySets>
|
||||
<dependencySet>
|
||||
<outputDirectory>/</outputDirectory>
|
||||
<useProjectArtifact>false</useProjectArtifact>
|
||||
<unpack>false</unpack>
|
||||
<scope>runtime</scope>
|
||||
</dependencySet>
|
||||
<dependencySet>
|
||||
<outputDirectory>/</outputDirectory>
|
||||
<useProjectArtifact>false</useProjectArtifact>
|
||||
<unpack>false</unpack>
|
||||
<scope>system</scope>
|
||||
</dependencySet>
|
||||
</dependencySets>
|
||||
</assembly>
|
|
@ -0,0 +1,7 @@
|
|||
package core;
|
||||
|
||||
public interface AfterExtractor {
|
||||
|
||||
public void afterProcess(Page page);
|
||||
|
||||
}
|
|
@ -0,0 +1,55 @@
|
|||
package core;
|
||||
|
||||
import java.lang.reflect.Field;
|
||||
import java.lang.reflect.InvocationTargetException;
|
||||
import java.lang.reflect.Method;
|
||||
import java.util.Set;
|
||||
import org.apache.commons.lang3.StringUtils;
|
||||
import us.codecraft.webmagic.Task;
|
||||
import us.codecraft.webmagic.utils.ClassUtils;
|
||||
public class ConsolePipeline implements Pipeline {
|
||||
|
||||
@SuppressWarnings("rawtypes")
|
||||
public void process(ResultItems resultItems, Task task) {
|
||||
Set<String> keySet = resultItems.getAll().keySet();
|
||||
for (String key : keySet) {
|
||||
Object object = resultItems.get(key);
|
||||
Class clazz = object.getClass();
|
||||
if (AfterExtractor.class.isAssignableFrom(clazz)) {
|
||||
for (Field field : ClassUtils.getFieldsIncludeSuperClass(clazz)) {
|
||||
field.setAccessible(true);
|
||||
prinfField(field, clazz, object);
|
||||
}
|
||||
} else
|
||||
System.out.println(key + ":\t" + resultItems.get(key));
|
||||
System.out.println("*****************************");
|
||||
}
|
||||
}
|
||||
|
||||
@SuppressWarnings({ "rawtypes", "unchecked" })
|
||||
public void prinfField(Field field, Class clazz, Object object) {
|
||||
String fieldName = field.getName();
|
||||
String outPut = fieldName + ":\t";
|
||||
fieldName = "get" + StringUtils.capitalize(field.getName());
|
||||
try {
|
||||
Method method = clazz.getMethod(fieldName);
|
||||
System.out.println(outPut + method.invoke(object));
|
||||
|
||||
} catch (NoSuchMethodException e) {
|
||||
// TODO Auto-generated catch block
|
||||
e.printStackTrace();
|
||||
} catch (SecurityException e) {
|
||||
// TODO Auto-generated catch block
|
||||
e.printStackTrace();
|
||||
} catch (IllegalAccessException e) {
|
||||
// TODO Auto-generated catch block
|
||||
e.printStackTrace();
|
||||
} catch (IllegalArgumentException e) {
|
||||
// TODO Auto-generated catch block
|
||||
e.printStackTrace();
|
||||
} catch (InvocationTargetException e) {
|
||||
// TODO Auto-generated catch block
|
||||
e.printStackTrace();
|
||||
}
|
||||
}
|
||||
}
|
|
@ -0,0 +1,52 @@
|
|||
package core;
|
||||
|
||||
import us.codecraft.webmagic.selector.Selector;
|
||||
|
||||
/**
|
||||
* The object contains 'ExtractBy' information.
|
||||
*
|
||||
* @author code4crafter@gmail.com <br>
|
||||
* @since 0.2.0
|
||||
*/
|
||||
class Extractor {
|
||||
|
||||
protected Selector selector;
|
||||
|
||||
protected final Source source;
|
||||
|
||||
protected final boolean notNull;
|
||||
|
||||
protected final boolean multi;
|
||||
|
||||
static enum Source {
|
||||
Html, Url, RawHtml
|
||||
}
|
||||
|
||||
public Extractor(Selector selector, Source source, boolean notNull,
|
||||
boolean multi) {
|
||||
this.selector = selector;
|
||||
this.source = source;
|
||||
this.notNull = notNull;
|
||||
this.multi = multi;
|
||||
}
|
||||
|
||||
Selector getSelector() {
|
||||
return selector;
|
||||
}
|
||||
|
||||
Source getSource() {
|
||||
return source;
|
||||
}
|
||||
|
||||
boolean isNotNull() {
|
||||
return notNull;
|
||||
}
|
||||
|
||||
boolean isMulti() {
|
||||
return multi;
|
||||
}
|
||||
|
||||
void setSelector(Selector selector) {
|
||||
this.selector = selector;
|
||||
}
|
||||
}
|
|
@ -0,0 +1,8 @@
|
|||
package core;
|
||||
|
||||
public interface ExtractorBegin {
|
||||
|
||||
public void threadRun();
|
||||
|
||||
public boolean check();
|
||||
}
|
|
@ -0,0 +1,60 @@
|
|||
package core;
|
||||
|
||||
import java.lang.reflect.Field;
|
||||
import java.lang.reflect.Method;
|
||||
|
||||
import us.codecraft.webmagic.model.formatter.ObjectFormatter;
|
||||
import us.codecraft.webmagic.selector.Selector;
|
||||
|
||||
/**
|
||||
* Wrapper of field and extractor.
|
||||
*
|
||||
* @author code4crafter@gmail.com <br>
|
||||
* @since 0.2.0
|
||||
*/
|
||||
class FieldExtractor extends Extractor {
|
||||
|
||||
private final Field field;
|
||||
|
||||
private Method setterMethod;
|
||||
|
||||
private ObjectFormatter<?> objectFormatter;
|
||||
|
||||
public FieldExtractor(Field field, Selector selector, Source source,
|
||||
boolean notNull, boolean multi) {
|
||||
super(selector, source, notNull, multi);
|
||||
this.field = field;
|
||||
}
|
||||
|
||||
Field getField() {
|
||||
return field;
|
||||
}
|
||||
|
||||
Selector getSelector() {
|
||||
return selector;
|
||||
}
|
||||
|
||||
Source getSource() {
|
||||
return source;
|
||||
}
|
||||
|
||||
void setSetterMethod(Method setterMethod) {
|
||||
this.setterMethod = setterMethod;
|
||||
}
|
||||
|
||||
Method getSetterMethod() {
|
||||
return setterMethod;
|
||||
}
|
||||
|
||||
boolean isNotNull() {
|
||||
return notNull;
|
||||
}
|
||||
|
||||
ObjectFormatter<?> getObjectFormatter() {
|
||||
return objectFormatter;
|
||||
}
|
||||
|
||||
void setObjectFormatter(ObjectFormatter<?> objectFormatter) {
|
||||
this.objectFormatter = objectFormatter;
|
||||
}
|
||||
}
|
|
@ -0,0 +1,39 @@
|
|||
package core;
|
||||
|
||||
import java.util.ArrayList;
|
||||
import java.util.List;
|
||||
|
||||
public class ModelPageProcessor implements PageProcessor {
|
||||
private List<PageModelExtractor> pageModelExtractorList = new ArrayList<PageModelExtractor>();
|
||||
|
||||
public static ModelPageProcessor create(Class<?>... clazzs) {
|
||||
ModelPageProcessor modelPageProcessor = new ModelPageProcessor();
|
||||
for (Class<?> clazz : clazzs) {
|
||||
modelPageProcessor.addPageModel(clazz);
|
||||
}
|
||||
return modelPageProcessor;
|
||||
}
|
||||
|
||||
public ModelPageProcessor addPageModel(Class<?> clazz) {
|
||||
PageModelExtractor pageModelExtractor = PageModelExtractor
|
||||
.create(clazz);
|
||||
pageModelExtractorList.add(pageModelExtractor);
|
||||
return this;
|
||||
}
|
||||
|
||||
public void process(Page page) {
|
||||
// TODO Auto-generated method stub
|
||||
for (PageModelExtractor pageModelExtractor : pageModelExtractorList) {
|
||||
Object process = pageModelExtractor.process(page);
|
||||
if (process == null
|
||||
|| (process instanceof List && ((List<?>) process).size() == 0)) {
|
||||
continue;
|
||||
}
|
||||
page.putField(pageModelExtractor.getClazz().getCanonicalName(),
|
||||
process);
|
||||
}
|
||||
if (page.getResultItems().getAll().size() == 0) {
|
||||
page.getResultItems().setSkip(true);
|
||||
}
|
||||
}
|
||||
}
|
|
@ -0,0 +1,60 @@
|
|||
package core;
|
||||
|
||||
import java.lang.annotation.Annotation;
|
||||
import java.util.List;
|
||||
import java.util.Map;
|
||||
import java.util.concurrent.ConcurrentHashMap;
|
||||
|
||||
import us.codecraft.webmagic.Task;
|
||||
import us.codecraft.webmagic.model.annotation.ExtractBy;
|
||||
|
||||
|
||||
|
||||
/**
|
||||
* The extension to Pipeline for page model extractor.
|
||||
*
|
||||
* @author code4crafter@gmail.com <br>
|
||||
* @since 0.2.0
|
||||
*/
|
||||
public class ModelPipeline implements Pipeline {
|
||||
|
||||
@SuppressWarnings("rawtypes")
|
||||
private Map<Class, PageModelPipeline> pageModelPipelines = new ConcurrentHashMap<Class, PageModelPipeline>();
|
||||
|
||||
public ModelPipeline() {
|
||||
}
|
||||
|
||||
public ModelPipeline put(Class<?> clazz,
|
||||
PageModelPipeline<?> pageModelPipeline) {
|
||||
pageModelPipelines.put(clazz, pageModelPipeline);
|
||||
return this;
|
||||
}
|
||||
|
||||
@SuppressWarnings({ "rawtypes", "unchecked", "deprecation" })
|
||||
public void process(ResultItems resultItems, Task task) {
|
||||
for (Map.Entry<Class, PageModelPipeline> classPageModelPipelineEntry : pageModelPipelines
|
||||
.entrySet()) {
|
||||
boolean isSkip = resultItems
|
||||
.getFieldSkip(classPageModelPipelineEntry.getKey()
|
||||
.getCanonicalName());
|
||||
if (isSkip) {
|
||||
continue;
|
||||
}
|
||||
Object o = resultItems.get(classPageModelPipelineEntry.getKey()
|
||||
.getCanonicalName());
|
||||
if (o != null) {
|
||||
Annotation annotation = classPageModelPipelineEntry.getKey()
|
||||
.getAnnotation(ExtractBy.class);
|
||||
if (annotation == null || !((ExtractBy) annotation).multi()) {
|
||||
classPageModelPipelineEntry.getValue().process(o, task);
|
||||
} else {
|
||||
List<Object> list = (List<Object>) o;
|
||||
for (Object o1 : list) {
|
||||
classPageModelPipelineEntry.getValue()
|
||||
.process(o1, task);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
|
@ -0,0 +1,161 @@
|
|||
package core;
|
||||
|
||||
import java.util.Date;
|
||||
|
||||
import us.codecraft.webmagic.selector.Html;
|
||||
import us.codecraft.webmagic.selector.Json;
|
||||
import us.codecraft.webmagic.selector.Selectable;
|
||||
import us.codecraft.webmagic.utils.UrlUtils;
|
||||
|
||||
/**
|
||||
*
|
||||
* @author Administrator
|
||||
*
|
||||
*/
|
||||
public class Page {
|
||||
private ResultItems resultItems = new ResultItems();
|
||||
|
||||
private Html html;
|
||||
|
||||
private Json json;
|
||||
|
||||
private String rawText;
|
||||
|
||||
private Selectable url;
|
||||
|
||||
private String pageUrl;
|
||||
|
||||
private Date time;
|
||||
|
||||
public Page() {
|
||||
|
||||
}
|
||||
|
||||
public Page setSkip(boolean skip) {
|
||||
resultItems.setSkip(skip);
|
||||
return this;
|
||||
}
|
||||
|
||||
/**
|
||||
* store extract results
|
||||
*
|
||||
* @param key
|
||||
* @param field
|
||||
*/
|
||||
public void putField(String key, Object field) {
|
||||
resultItems.put(key, field);
|
||||
}
|
||||
|
||||
/**
|
||||
* decide the extract results produced by single model are store;
|
||||
*
|
||||
* @param key
|
||||
* @param value
|
||||
*/
|
||||
public void setResultSkip(Object object, boolean value) {
|
||||
resultItems.putFieldSkip(object.getClass().getCanonicalName(), value);
|
||||
}
|
||||
|
||||
/**
|
||||
* get results is Skiped
|
||||
*/
|
||||
public boolean getResultSkip(Object object) {
|
||||
return resultItems.getFieldSkip(object.getClass().getCanonicalName());
|
||||
}
|
||||
|
||||
/**
|
||||
* resultItems is all skip
|
||||
* @return
|
||||
*/
|
||||
public boolean isAllResultSkip(){
|
||||
return resultItems.isAllFieldSkip();
|
||||
}
|
||||
|
||||
/**
|
||||
* resultItems is all skip find by name;
|
||||
* @param names
|
||||
* @return
|
||||
*/
|
||||
public boolean isAllResultSkip(String...names){
|
||||
return resultItems.isAllFieldSkip(names);
|
||||
}
|
||||
|
||||
/**
|
||||
* get html content of page
|
||||
*
|
||||
* @return html
|
||||
*/
|
||||
public Html getHtml() {
|
||||
if (html == null) {
|
||||
html = new Html(UrlUtils.fixAllRelativeHrefs(rawText, pageUrl));
|
||||
}
|
||||
return html;
|
||||
}
|
||||
|
||||
/**
|
||||
* get json content of page
|
||||
*
|
||||
* @return json
|
||||
* @since 0.5.0
|
||||
*/
|
||||
public Json getJson() {
|
||||
if (json == null) {
|
||||
json = new Json(rawText);
|
||||
}
|
||||
return json;
|
||||
}
|
||||
|
||||
/**
|
||||
* @param html
|
||||
* @deprecated since 0.4.0 The html is parse just when first time of calling
|
||||
* {@link #getHtml()}, so use {@link #setRawText(String)}
|
||||
* instead.
|
||||
*/
|
||||
public void setHtml(Html html) {
|
||||
this.html = html;
|
||||
}
|
||||
|
||||
public String getRawText() {
|
||||
return rawText;
|
||||
}
|
||||
|
||||
public Page setRawText(String rawText) {
|
||||
this.rawText = rawText;
|
||||
return this;
|
||||
}
|
||||
|
||||
public Selectable getUrl() {
|
||||
return url;
|
||||
}
|
||||
|
||||
public void setUrl(Selectable url) {
|
||||
this.url = url;
|
||||
}
|
||||
|
||||
public ResultItems getResultItems() {
|
||||
return resultItems;
|
||||
}
|
||||
|
||||
public String getPageUrl() {
|
||||
return pageUrl;
|
||||
}
|
||||
|
||||
public void setPageUrl(String pageUrl) {
|
||||
this.pageUrl = pageUrl;
|
||||
}
|
||||
|
||||
public Date getTime() {
|
||||
return time;
|
||||
}
|
||||
|
||||
public void setTime(Date time) {
|
||||
this.time = time;
|
||||
}
|
||||
|
||||
@Override
|
||||
public String toString() {
|
||||
return "Page{" + "pageUrl=" + pageUrl + ", resultItems=" + resultItems
|
||||
+ ", rawText='" + rawText + '\'' + ", url=" + url + '}';
|
||||
}
|
||||
|
||||
}
|
|
@ -0,0 +1,326 @@
|
|||
package core;
|
||||
|
||||
import java.lang.annotation.Annotation;
|
||||
import java.lang.reflect.Field;
|
||||
import java.lang.reflect.InvocationTargetException;
|
||||
import java.lang.reflect.Method;
|
||||
import java.util.ArrayList;
|
||||
import java.util.List;
|
||||
|
||||
import org.apache.commons.lang3.StringUtils;
|
||||
import org.slf4j.Logger;
|
||||
import org.slf4j.LoggerFactory;
|
||||
|
||||
import us.codecraft.webmagic.model.annotation.ExtractBy;
|
||||
import us.codecraft.webmagic.model.annotation.Formatter;
|
||||
import us.codecraft.webmagic.model.formatter.BasicTypeFormatter;
|
||||
import us.codecraft.webmagic.model.formatter.ObjectFormatter;
|
||||
import us.codecraft.webmagic.model.formatter.ObjectFormatters;
|
||||
import us.codecraft.webmagic.selector.Selector;
|
||||
import us.codecraft.webmagic.selector.XpathSelector;
|
||||
import us.codecraft.webmagic.utils.ClassUtils;
|
||||
import us.codecraft.webmagic.utils.ExtractorUtils;
|
||||
|
||||
public class PageModelExtractor {
|
||||
private Class<?> clazz;
|
||||
private Extractor objectExtractor;
|
||||
private List<FieldExtractor> fieldExtractors;
|
||||
private Logger logger = LoggerFactory.getLogger(getClass());
|
||||
|
||||
public static PageModelExtractor create(Class<?> clazz) {
|
||||
PageModelExtractor pageModelExtractor = new PageModelExtractor();
|
||||
pageModelExtractor.init(clazz);
|
||||
return pageModelExtractor;
|
||||
}
|
||||
|
||||
private void init(Class<?> clazz) {
|
||||
this.clazz = clazz;
|
||||
initClassExtractors();
|
||||
fieldExtractors = new ArrayList<FieldExtractor>();
|
||||
for (Field field : ClassUtils.getFieldsIncludeSuperClass(clazz)) {
|
||||
field.setAccessible(true);
|
||||
FieldExtractor fieldExtractor = getAnnotationExtractBy(clazz, field);
|
||||
if (fieldExtractor != null) {
|
||||
checkFormat(field, fieldExtractor);
|
||||
fieldExtractors.add(fieldExtractor);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
private void checkFormat(Field field, FieldExtractor fieldExtractor) {
|
||||
// TODO Auto-generated method stub
|
||||
Formatter formatter = field.getAnnotation(Formatter.class);
|
||||
if (formatter != null
|
||||
&& !formatter.formatter().equals(ObjectFormatter.class)) {
|
||||
if (formatter != null) {
|
||||
if (!formatter.formatter().equals(ObjectFormatter.class)) {
|
||||
ObjectFormatter<?> objectFormatter = initFormatter(formatter
|
||||
.formatter());
|
||||
objectFormatter.initParam(formatter.value());
|
||||
fieldExtractor.setObjectFormatter(objectFormatter);
|
||||
return;
|
||||
}
|
||||
}
|
||||
}
|
||||
if (!fieldExtractor.isMulti()
|
||||
&& !String.class.isAssignableFrom(field.getType())) {
|
||||
Class<?> fieldClazz = BasicTypeFormatter.detectBasicClass(field
|
||||
.getType());
|
||||
ObjectFormatter<?> objectFormatter = getObjectFormatter(field,
|
||||
fieldClazz, formatter);
|
||||
if (objectFormatter == null) {
|
||||
throw new IllegalStateException(
|
||||
"Can't find formatter for field " + field.getName()
|
||||
+ " of type " + fieldClazz);
|
||||
} else {
|
||||
fieldExtractor.setObjectFormatter(objectFormatter);
|
||||
}
|
||||
} else if (fieldExtractor.isMulti()) {
|
||||
if (!List.class.isAssignableFrom(field.getType())) {
|
||||
throw new IllegalStateException("Field " + field.getName()
|
||||
+ " must be list");
|
||||
}
|
||||
if (formatter != null) {
|
||||
if (!formatter.subClazz().equals(Void.class)) {
|
||||
ObjectFormatter<?> objectFormatter = getObjectFormatter(
|
||||
field, formatter.subClazz(), formatter);
|
||||
if (objectFormatter == null) {
|
||||
throw new IllegalStateException(
|
||||
"Can't find formatter for field "
|
||||
+ field.getName() + " of type "
|
||||
+ formatter.subClazz());
|
||||
} else {
|
||||
fieldExtractor.setObjectFormatter(objectFormatter);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@SuppressWarnings("rawtypes")
|
||||
private ObjectFormatter<?> initFormatter(
|
||||
Class<? extends ObjectFormatter> formatter) {
|
||||
// TODO Auto-generated method stub
|
||||
try {
|
||||
return formatter.newInstance();
|
||||
} catch (InstantiationException e) {
|
||||
logger.error("init ObjectFormatter fail", e);
|
||||
} catch (IllegalAccessException e) {
|
||||
logger.error("init ObjectFormatter fail", e);
|
||||
}
|
||||
return null;
|
||||
}
|
||||
|
||||
private ObjectFormatter<?> getObjectFormatter(Field field,
|
||||
Class<?> subClazz, Formatter formatter) {
|
||||
// TODO Auto-generated method stub
|
||||
return initFormatter(ObjectFormatters.get(subClazz));
|
||||
}
|
||||
|
||||
@SuppressWarnings("deprecation")
|
||||
private FieldExtractor getAnnotationExtractBy(Class<?> clazz, Field field) {
|
||||
FieldExtractor fieldExtractor = null;
|
||||
ExtractBy extractBy = field.getAnnotation(ExtractBy.class);
|
||||
if (extractBy != null) {
|
||||
Selector selector = ExtractorUtils.getSelector(extractBy);
|
||||
fieldExtractor = new FieldExtractor(
|
||||
field,
|
||||
selector,
|
||||
extractBy.source() == ExtractBy.Source.RawHtml ? FieldExtractor.Source.RawHtml
|
||||
: FieldExtractor.Source.Html, extractBy.notNull(),
|
||||
extractBy.multi()
|
||||
|| List.class.isAssignableFrom(field.getType()));
|
||||
Method setterMethod = getSetterMethod(clazz, field);
|
||||
if (setterMethod != null) {
|
||||
fieldExtractor.setSetterMethod(setterMethod);
|
||||
}
|
||||
}
|
||||
return fieldExtractor;
|
||||
}
|
||||
|
||||
private Method getSetterMethod(Class<?> clazz, Field field) {
|
||||
// TODO Auto-generated method stub
|
||||
String name = "set" + StringUtils.capitalize(field.getName());
|
||||
try {
|
||||
Method declaredMethod = clazz.getDeclaredMethod(name,
|
||||
field.getType());
|
||||
declaredMethod.setAccessible(true);
|
||||
return declaredMethod;
|
||||
} catch (NoSuchMethodException e) {
|
||||
return null;
|
||||
}
|
||||
}
|
||||
|
||||
@SuppressWarnings("deprecation")
|
||||
private void initClassExtractors() {
|
||||
// TODO Auto-generated method stub
|
||||
Annotation annotation = clazz.getAnnotation(ExtractBy.class);
|
||||
if (annotation != null) {
|
||||
ExtractBy extractBy = (ExtractBy) annotation;
|
||||
objectExtractor = new Extractor(
|
||||
new XpathSelector(extractBy.value()),
|
||||
Extractor.Source.Html, extractBy.notNull(),
|
||||
extractBy.multi());
|
||||
}
|
||||
}
|
||||
|
||||
public Object process(Page page) {
|
||||
if (objectExtractor == null) {
|
||||
return processSingle(page, null, true);
|
||||
} else {
|
||||
if (objectExtractor.multi) {
|
||||
List<Object> os = new ArrayList<Object>();
|
||||
List<String> list = objectExtractor.getSelector().selectList(
|
||||
page.getRawText());
|
||||
for (String s : list) {
|
||||
Object o = processSingle(page, s, false);
|
||||
if (o != null) {
|
||||
os.add(o);
|
||||
}
|
||||
}
|
||||
return os;
|
||||
} else {
|
||||
String select = objectExtractor.getSelector().select(
|
||||
page.getRawText());
|
||||
Object o = processSingle(page, select, false);
|
||||
return o;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
private Object processSingle(Page page, String html, boolean isRaw) {
|
||||
// TODO Auto-generated method stub
|
||||
Object o = null;
|
||||
try {
|
||||
o = clazz.newInstance();//将所写的model实例化
|
||||
for (FieldExtractor fieldExtractor : fieldExtractors) {
|
||||
if (fieldExtractor.isMulti()) {
|
||||
List<String> value;
|
||||
switch (fieldExtractor.getSource()) {
|
||||
case RawHtml:
|
||||
value = page.getHtml().selectDocumentForList(
|
||||
fieldExtractor.getSelector());
|
||||
break;
|
||||
case Html:
|
||||
if (isRaw) {
|
||||
value = page.getHtml().selectDocumentForList(
|
||||
fieldExtractor.getSelector());
|
||||
} else {
|
||||
value = fieldExtractor.getSelector().selectList(
|
||||
html);
|
||||
}
|
||||
break;
|
||||
case Url:
|
||||
value = fieldExtractor.getSelector().selectList(
|
||||
page.getUrl().toString());
|
||||
break;
|
||||
default:
|
||||
value = fieldExtractor.getSelector().selectList(html);
|
||||
}
|
||||
if ((value == null || value.size() == 0)
|
||||
&& fieldExtractor.isNotNull()) {
|
||||
return null;
|
||||
}
|
||||
if (fieldExtractor.getObjectFormatter() != null) {
|
||||
List<Object> converted = convert(value,
|
||||
fieldExtractor.getObjectFormatter());
|
||||
setField(o, fieldExtractor, converted);
|
||||
} else {
|
||||
setField(o, fieldExtractor, value);
|
||||
}
|
||||
} else {
|
||||
String value;
|
||||
switch (fieldExtractor.getSource()) {
|
||||
case RawHtml:
|
||||
value = page.getHtml().selectDocument(
|
||||
fieldExtractor.getSelector());
|
||||
break;
|
||||
case Html:
|
||||
if (isRaw) {
|
||||
value = page.getHtml().selectDocument(
|
||||
fieldExtractor.getSelector());
|
||||
} else {
|
||||
value = fieldExtractor.getSelector().select(html);
|
||||
}
|
||||
break;
|
||||
case Url:
|
||||
value = fieldExtractor.getSelector().select(
|
||||
page.getUrl().toString());
|
||||
break;
|
||||
default:
|
||||
value = fieldExtractor.getSelector().select(html);
|
||||
}
|
||||
if (value == null && fieldExtractor.isNotNull()) {
|
||||
return null;
|
||||
}
|
||||
if (fieldExtractor.getObjectFormatter() != null) {
|
||||
|
||||
Object converted = convert(value,
|
||||
fieldExtractor.getObjectFormatter());
|
||||
if (converted == null && fieldExtractor.isNotNull()) {
|
||||
return null;
|
||||
}
|
||||
setField(o, fieldExtractor, converted);
|
||||
} else {
|
||||
setField(o, fieldExtractor, value);
|
||||
}
|
||||
}
|
||||
}
|
||||
if (AfterExtractor.class.isAssignableFrom(clazz)) {
|
||||
((AfterExtractor) o).afterProcess(page);
|
||||
}
|
||||
if (ValidateExtractor.class.isAssignableFrom(clazz)) {
|
||||
((ValidateExtractor) o).validate(page);
|
||||
}
|
||||
} catch (InstantiationException e) {
|
||||
logger.error("extract fail", e);
|
||||
} catch (IllegalAccessException e) {
|
||||
logger.error("extract fail", e);
|
||||
} catch (InvocationTargetException e) {
|
||||
// TODO Auto-generated catch block
|
||||
e.printStackTrace();
|
||||
}
|
||||
return o;
|
||||
}
|
||||
|
||||
private List<Object> convert(List<String> values,
|
||||
ObjectFormatter<?> objectFormatter) {
|
||||
List<Object> objects = new ArrayList<Object>();
|
||||
for (String value : values) {
|
||||
Object converted = convert(value, objectFormatter);
|
||||
if (converted != null) {
|
||||
objects.add(converted);
|
||||
}
|
||||
}
|
||||
return objects;
|
||||
}
|
||||
|
||||
private Object convert(String value, ObjectFormatter<?> objectFormatter) {
|
||||
// TODO Auto-generated method stub
|
||||
try {
|
||||
Object format = objectFormatter.format(value);
|
||||
logger.debug("String {} is converted to {}", value, format);
|
||||
return format;
|
||||
} catch (Exception e) {
|
||||
logger.error("convert " + value + " to " + objectFormatter.clazz()
|
||||
+ " error!", e);
|
||||
}
|
||||
return null;
|
||||
}
|
||||
|
||||
private void setField(Object o, FieldExtractor fieldExtractor, Object value)
|
||||
throws IllegalAccessException, InvocationTargetException {
|
||||
if (value == null) {
|
||||
return;
|
||||
}
|
||||
if (fieldExtractor.getSetterMethod() != null) {
|
||||
fieldExtractor.getSetterMethod().invoke(o, value);
|
||||
}
|
||||
fieldExtractor.getField().set(o, value);
|
||||
}
|
||||
|
||||
Class<?> getClazz() {
|
||||
return clazz;
|
||||
}
|
||||
|
||||
}
|
|
@ -0,0 +1,14 @@
|
|||
package core;
|
||||
|
||||
import us.codecraft.webmagic.Task;
|
||||
|
||||
/**
|
||||
* Implements PageModelPipeline to persistent your page model.
|
||||
* @author code4crafter@gmail.com <br>
|
||||
* @since 0.2.0
|
||||
*/
|
||||
public interface PageModelPipeline<T> {
|
||||
|
||||
public void process(T t, Task task);
|
||||
|
||||
}
|
|
@ -0,0 +1,25 @@
|
|||
package core;
|
||||
|
||||
/**
|
||||
* Interface to be implemented to customize a crawler.<br>
|
||||
* <br>
|
||||
* In PageProcessor, you can customize:
|
||||
* <p/>
|
||||
* start urls and other settings in {@link Site}<br>
|
||||
* how the urls to fetch are detected <br>
|
||||
* how the data are extracted and stored <br>
|
||||
*
|
||||
* @author code4crafter@gmail.com <br>
|
||||
* @see Site
|
||||
* @see Page
|
||||
* @since 0.1.0
|
||||
*/
|
||||
public interface PageProcessor {
|
||||
|
||||
/**
|
||||
* process the page, extract urls to fetch, extract the data and store
|
||||
*
|
||||
* @param page
|
||||
*/
|
||||
public void process(Page page);
|
||||
}
|
|
@ -0,0 +1,23 @@
|
|||
package core;
|
||||
|
||||
import us.codecraft.webmagic.Task;
|
||||
|
||||
/**
|
||||
* Pipeline is the persistent and offline process part of crawler.<br>
|
||||
* The interface Pipeline can be implemented to customize ways of persistent.
|
||||
*
|
||||
* @author code4crafter@gmail.com <br>
|
||||
* @since 0.1.0
|
||||
* @see ConsolePipeline
|
||||
* @see FilePipeline
|
||||
*/
|
||||
public interface Pipeline {
|
||||
|
||||
/**
|
||||
* Process extracted results.
|
||||
*
|
||||
* @param resultItems
|
||||
* @param task
|
||||
*/
|
||||
public void process(ResultItems resultItems, Task task);
|
||||
}
|
|
@ -0,0 +1,119 @@
|
|||
package core;
|
||||
|
||||
import java.util.LinkedHashMap;
|
||||
import java.util.Map;
|
||||
import java.util.Set;
|
||||
|
||||
/**
|
||||
* Object contains extract results.<br>
|
||||
* It is contained in Page and will be processed in pipeline.
|
||||
*
|
||||
* @author code4crafter@gmail.com <br>
|
||||
* @since 0.1.0
|
||||
* @see Page
|
||||
* @see us.codecraft.webmagic.pipeline.Pipeline
|
||||
*/
|
||||
public class ResultItems {
|
||||
|
||||
private Map<String, Object> fields = new LinkedHashMap<String, Object>();
|
||||
|
||||
private Map<String, Boolean> isFieldSkip = new LinkedHashMap<String, Boolean>();
|
||||
|
||||
private boolean skip;
|
||||
|
||||
private String pageUrl;
|
||||
|
||||
@SuppressWarnings("unchecked")
|
||||
public <T> T get(String key) {
|
||||
Object o = fields.get(key);
|
||||
if (o == null) {
|
||||
return null;
|
||||
}
|
||||
return (T) fields.get(key);
|
||||
}
|
||||
|
||||
public <T> ResultItems put(String key, T value) {
|
||||
fields.put(key, value);
|
||||
return this;
|
||||
}
|
||||
|
||||
public Map<String, Object> getAll() {
|
||||
return fields;
|
||||
}
|
||||
|
||||
public boolean getFieldSkip(String key) {
|
||||
if (isFieldSkip.containsKey(key))
|
||||
return isFieldSkip.get(key);
|
||||
return false;
|
||||
}
|
||||
|
||||
public ResultItems putFieldSkip(String key, boolean value) {
|
||||
isFieldSkip.put(key, value);
|
||||
return this;
|
||||
}
|
||||
|
||||
/**
|
||||
* Whether to skip the result.<br>
|
||||
* Result which is skipped will not be processed by Pipeline.
|
||||
*
|
||||
* @return whether to skip the result
|
||||
*/
|
||||
public boolean isSkip() {
|
||||
return skip;
|
||||
}
|
||||
|
||||
/**
|
||||
* Set whether to skip the result.<br>
|
||||
* Result which is skipped will not be processed by Pipeline.
|
||||
*
|
||||
* @param skip
|
||||
* whether to skip the result
|
||||
* @return this
|
||||
*/
|
||||
public ResultItems setSkip(boolean skip) {
|
||||
this.skip = skip;
|
||||
return this;
|
||||
}
|
||||
|
||||
/**
|
||||
* is all field output skiped
|
||||
*
|
||||
* @return
|
||||
*/
|
||||
public boolean isAllFieldSkip() {
|
||||
if (fields.size() > isFieldSkip.size())
|
||||
return false;
|
||||
Set<String> keys = isFieldSkip.keySet();
|
||||
for (String key : keys) {
|
||||
if (!isFieldSkip.get(key))
|
||||
return false;
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
/**
|
||||
* is all field output skiped find by name;
|
||||
*
|
||||
* @return
|
||||
*/
|
||||
public boolean isAllFieldSkip(String... fieldName) {
|
||||
for (String name : fieldName)
|
||||
if (!getFieldSkip(name))
|
||||
return false;
|
||||
return true;
|
||||
}
|
||||
|
||||
public String getUrl() {
|
||||
return pageUrl;
|
||||
}
|
||||
|
||||
public void setUrl(String url) {
|
||||
this.pageUrl = url;
|
||||
}
|
||||
|
||||
@Override
|
||||
public String toString() {
|
||||
return "ResultItems{" + "fields=" + fields + ",url=" + pageUrl
|
||||
+ ", skip=" + skip + '}';
|
||||
}
|
||||
}
|
|
@ -0,0 +1,42 @@
|
|||
package core;
|
||||
|
||||
public class Site {
|
||||
|
||||
private int firstTime = 0;
|
||||
|
||||
private long period = 1 * 10 * 1000;
|
||||
|
||||
private int resultNum = 100;
|
||||
|
||||
public static Site me() {
|
||||
return new Site();
|
||||
}
|
||||
|
||||
public Site setResultNum(int num) {
|
||||
this.resultNum = num;
|
||||
return this;
|
||||
}
|
||||
|
||||
public Site setFirstTime(int firstTime) {
|
||||
this.firstTime = firstTime;
|
||||
return this;
|
||||
}
|
||||
|
||||
public Site setPeriod(long period) {
|
||||
this.period = period;
|
||||
return this;
|
||||
}
|
||||
|
||||
public long getPeriod() {
|
||||
return this.period;
|
||||
}
|
||||
|
||||
public int getFirstTime() {
|
||||
return this.firstTime;
|
||||
}
|
||||
|
||||
public int getResultNum() {
|
||||
return this.resultNum;
|
||||
}
|
||||
|
||||
}
|
|
@ -0,0 +1,14 @@
|
|||
package core;
|
||||
|
||||
/**
|
||||
* Interface to be implemented by page models that need to do something after
|
||||
* fields are extracted.<br>
|
||||
*
|
||||
* @author code4crafter@gmail.com <br>
|
||||
* @since 0.2.0
|
||||
*/
|
||||
public interface ValidateExtractor {
|
||||
|
||||
public void validate(Page page);
|
||||
|
||||
}
|
Some files were not shown because too many files have changed in this diff Show More
Loading…
Reference in New Issue