/usr/java/latest/bin/java -Dspark.master=local[*] -Dspline.persistence.factory=za.co.absa.spline.persistence.mongo.MongoPersistenceFactory -Dspline.mongodb.url=mongodb://localhost -Dspline.mongodb.name=spline -Didea.launcher.port=7532 -Didea.launcher.bin.path=/opt/idea-IU-141.3058.30/bin -Dfile.encoding=UTF-8 -classpath /usr/java/latest/jre/lib/rt.jar:/usr/java/latest/jre/lib/management-agent.jar:/usr/java/latest/jre/lib/jce.jar:/usr/java/latest/jre/lib/plugin.jar:/usr/java/latest/jre/lib/javaws.jar:/usr/java/latest/jre/lib/jsse.jar:/usr/java/latest/jre/lib/resources.jar:/usr/java/latest/jre/lib/jfxswt.jar:/usr/java/latest/jre/lib/charsets.jar:/usr/java/latest/jre/lib/deploy.jar:/usr/java/latest/jre/lib/jfr.jar:/usr/java/latest/jre/lib/ext/sunjce_provider.jar:/usr/java/latest/jre/lib/ext/cldrdata.jar:/usr/java/latest/jre/lib/ext/zipfs.jar:/usr/java/latest/jre/lib/ext/jfxrt.jar:/usr/java/latest/jre/lib/ext/nashorn.jar:/usr/java/latest/jre/lib/ext/localedata.jar:/usr/java/latest/jre/lib/ext/dnsns.jar:/usr/java/latest/jre/lib/ext/sunec.jar:/usr/java/latest/jre/lib/ext/sunpkcs11.jar:/home/awajda/Projects/absaoss/spline/sample/target/classes:/home/awajda/.m2/repository/org/apache/spark/spark-core_2.11/2.2.0/spark-core_2.11-2.2.0.jar:/home/awajda/.m2/repository/org/apache/avro/avro/1.7.7/avro-1.7.7.jar:/home/awajda/.m2/repository/org/codehaus/jackson/jackson-core-asl/1.9.13/jackson-core-asl-1.9.13.jar:/home/awajda/.m2/repository/org/codehaus/jackson/jackson-mapper-asl/1.9.13/jackson-mapper-asl-1.9.13.jar:/home/awajda/.m2/repository/com/thoughtworks/paranamer/paranamer/2.3/paranamer-2.3.jar:/home/awajda/.m2/repository/org/apache/commons/commons-compress/1.4.1/commons-compress-1.4.1.jar:/home/awajda/.m2/repository/org/tukaani/xz/1.0/xz-1.0.jar:/home/awajda/.m2/repository/org/apache/avro/avro-mapred/1.7.7/avro-mapred-1.7.7-hadoop2.jar:/home/awajda/.m2/repository/org/apache/avro/avro-ipc/1.7.7/avro-ipc-1.7.7.jar:/home/awajda/.m2/repository/org/apache/avro/avro-ipc/1.7.7/avro-ipc-1.7.7-tests.jar:/home/awajda/.m2/repository/com/twitter/chill_2.11/0.8.0/chill_2.11-0.8.0.jar:/home/awajda/.m2/repository/com/esotericsoftware/kryo-shaded/3.0.3/kryo-shaded-3.0.3.jar:/home/awajda/.m2/repository/com/esotericsoftware/minlog/1.3.0/minlog-1.3.0.jar:/home/awajda/.m2/repository/org/objenesis/objenesis/2.1/objenesis-2.1.jar:/home/awajda/.m2/repository/com/twitter/chill-java/0.8.0/chill-java-0.8.0.jar:/home/awajda/.m2/repository/org/apache/xbean/xbean-asm5-shaded/4.4/xbean-asm5-shaded-4.4.jar:/home/awajda/.m2/repository/org/apache/hadoop/hadoop-client/2.6.5/hadoop-client-2.6.5.jar:/home/awajda/.m2/repository/org/apache/hadoop/hadoop-common/2.6.5/hadoop-common-2.6.5.jar:/home/awajda/.m2/repository/commons-cli/commons-cli/1.2/commons-cli-1.2.jar:/home/awajda/.m2/repository/xmlenc/xmlenc/0.52/xmlenc-0.52.jar:/home/awajda/.m2/repository/commons-httpclient/commons-httpclient/3.1/commons-httpclient-3.1.jar:/home/awajda/.m2/repository/commons-io/commons-io/2.4/commons-io-2.4.jar:/home/awajda/.m2/repository/commons-collections/commons-collections/3.2.2/commons-collections-3.2.2.jar:/home/awajda/.m2/repository/commons-lang/commons-lang/2.6/commons-lang-2.6.jar:/home/awajda/.m2/repository/com/google/protobuf/protobuf-java/2.5.0/protobuf-java-2.5.0.jar:/home/awajda/.m2/repository/org/apache/hadoop/hadoop-auth/2.6.5/hadoop-auth-2.6.5.jar:/home/awajda/.m2/repository/org/apache/directory/server/apacheds-kerberos-codec/2.0.0-M15/apacheds-kerberos-codec-2.0.0-M15.jar:/home/awajda/.m2/repository/org/apache/directory/server/apacheds-i18n/2.0.0-M15/apacheds-i18n-2.0.0-M15.jar:/home/awajda/.m2/repository/org/apache/directory/api/api-asn1-api/1.0.0-M20/api-asn1-api-1.0.0-M20.jar:/home/awajda/.m2/repository/org/apache/directory/api/api-util/1.0.0-M20/api-util-1.0.0-M20.jar:/home/awajda/.m2/repository/org/apache/curator/curator-client/2.6.0/curator-client-2.6.0.jar:/home/awajda/.m2/repository/org/htrace/htrace-core/3.0.4/htrace-core-3.0.4.jar:/home/awajda/.m2/repository/org/apache/hadoop/hadoop-hdfs/2.6.5/hadoop-hdfs-2.6.5.jar:/home/awajda/.m2/repository/org/mortbay/jetty/jetty-util/6.1.26/jetty-util-6.1.26.jar:/home/awajda/.m2/repository/xerces/xercesImpl/2.9.1/xercesImpl-2.9.1.jar:/home/awajda/.m2/repository/xml-apis/xml-apis/1.3.04/xml-apis-1.3.04.jar:/home/awajda/.m2/repository/org/apache/hadoop/hadoop-mapreduce-client-app/2.6.5/hadoop-mapreduce-client-app-2.6.5.jar:/home/awajda/.m2/repository/org/apache/hadoop/hadoop-mapreduce-client-common/2.6.5/hadoop-mapreduce-client-common-2.6.5.jar:/home/awajda/.m2/repository/org/apache/hadoop/hadoop-yarn-client/2.6.5/hadoop-yarn-client-2.6.5.jar:/home/awajda/.m2/repository/org/apache/hadoop/hadoop-yarn-server-common/2.6.5/hadoop-yarn-server-common-2.6.5.jar:/home/awajda/.m2/repository/org/apache/hadoop/hadoop-mapreduce-client-shuffle/2.6.5/hadoop-mapreduce-client-shuffle-2.6.5.jar:/home/awajda/.m2/repository/org/apache/hadoop/hadoop-yarn-api/2.6.5/hadoop-yarn-api-2.6.5.jar:/home/awajda/.m2/repository/org/apache/hadoop/hadoop-mapreduce-client-core/2.6.5/hadoop-mapreduce-client-core-2.6.5.jar:/home/awajda/.m2/repository/org/apache/hadoop/hadoop-yarn-common/2.6.5/hadoop-yarn-common-2.6.5.jar:/home/awajda/.m2/repository/javax/xml/bind/jaxb-api/2.2.2/jaxb-api-2.2.2.jar:/home/awajda/.m2/repository/javax/xml/stream/stax-api/1.0-2/stax-api-1.0-2.jar:/home/awajda/.m2/repository/org/codehaus/jackson/jackson-xc/1.9.13/jackson-xc-1.9.13.jar:/home/awajda/.m2/repository/org/apache/hadoop/hadoop-mapreduce-client-jobclient/2.6.5/hadoop-mapreduce-client-jobclient-2.6.5.jar:/home/awajda/.m2/repository/org/apache/hadoop/hadoop-annotations/2.6.5/hadoop-annotations-2.6.5.jar:/home/awajda/.m2/repository/org/apache/spark/spark-launcher_2.11/2.2.0/spark-launcher_2.11-2.2.0.jar:/home/awajda/.m2/repository/org/apache/spark/spark-network-common_2.11/2.2.0/spark-network-common_2.11-2.2.0.jar:/home/awajda/.m2/repository/org/fusesource/leveldbjni/leveldbjni-all/1.8/leveldbjni-all-1.8.jar:/home/awajda/.m2/repository/com/fasterxml/jackson/core/jackson-annotations/2.6.5/jackson-annotations-2.6.5.jar:/home/awajda/.m2/repository/org/apache/spark/spark-network-shuffle_2.11/2.2.0/spark-network-shuffle_2.11-2.2.0.jar:/home/awajda/.m2/repository/org/apache/spark/spark-unsafe_2.11/2.2.0/spark-unsafe_2.11-2.2.0.jar:/home/awajda/.m2/repository/net/java/dev/jets3t/jets3t/0.9.3/jets3t-0.9.3.jar:/home/awajda/.m2/repository/org/apache/httpcomponents/httpcore/4.3.3/httpcore-4.3.3.jar:/home/awajda/.m2/repository/org/apache/httpcomponents/httpclient/4.3.6/httpclient-4.3.6.jar:/home/awajda/.m2/repository/commons-codec/commons-codec/1.10/commons-codec-1.10.jar:/home/awajda/.m2/repository/javax/activation/activation/1.1.1/activation-1.1.1.jar:/home/awajda/.m2/repository/mx4j/mx4j/3.0.2/mx4j-3.0.2.jar:/home/awajda/.m2/repository/javax/mail/mail/1.4.7/mail-1.4.7.jar:/home/awajda/.m2/repository/org/bouncycastle/bcprov-jdk15on/1.51/bcprov-jdk15on-1.51.jar:/home/awajda/.m2/repository/com/jamesmurty/utils/java-xmlbuilder/1.0/java-xmlbuilder-1.0.jar:/home/awajda/.m2/repository/net/iharder/base64/2.3.8/base64-2.3.8.jar:/home/awajda/.m2/repository/org/apache/curator/curator-recipes/2.6.0/curator-recipes-2.6.0.jar:/home/awajda/.m2/repository/org/apache/curator/curator-framework/2.6.0/curator-framework-2.6.0.jar:/home/awajda/.m2/repository/org/apache/zookeeper/zookeeper/3.4.6/zookeeper-3.4.6.jar:/home/awajda/.m2/repository/com/google/guava/guava/16.0.1/guava-16.0.1.jar:/home/awajda/.m2/repository/javax/servlet/javax.servlet-api/3.1.0/javax.servlet-api-3.1.0.jar:/home/awajda/.m2/repository/org/apache/commons/commons-lang3/3.5/commons-lang3-3.5.jar:/home/awajda/.m2/repository/org/apache/commons/commons-math3/3.4.1/commons-math3-3.4.1.jar:/home/awajda/.m2/repository/com/google/code/findbugs/jsr305/1.3.9/jsr305-1.3.9.jar:/home/awajda/.m2/repository/org/slf4j/slf4j-api/1.7.25/slf4j-api-1.7.25.jar:/home/awajda/.m2/repository/org/slf4j/jul-to-slf4j/1.7.25/jul-to-slf4j-1.7.25.jar:/home/awajda/.m2/repository/org/slf4j/jcl-over-slf4j/1.7.25/jcl-over-slf4j-1.7.25.jar:/home/awajda/.m2/repository/log4j/log4j/1.2.17/log4j-1.2.17.jar:/home/awajda/.m2/repository/org/slf4j/slf4j-log4j12/1.7.25/slf4j-log4j12-1.7.25.jar:/home/awajda/.m2/repository/com/ning/compress-lzf/1.0.3/compress-lzf-1.0.3.jar:/home/awajda/.m2/repository/org/xerial/snappy/snappy-java/1.1.2.6/snappy-java-1.1.2.6.jar:/home/awajda/.m2/repository/net/jpountz/lz4/lz4/1.3.0/lz4-1.3.0.jar:/home/awajda/.m2/repository/org/roaringbitmap/RoaringBitmap/0.5.11/RoaringBitmap-0.5.11.jar:/home/awajda/.m2/repository/commons-net/commons-net/2.2/commons-net-2.2.jar:/home/awajda/.m2/repository/org/json4s/json4s-jackson_2.11/3.2.11/json4s-jackson_2.11-3.2.11.jar:/home/awajda/.m2/repository/org/json4s/json4s-core_2.11/3.2.11/json4s-core_2.11-3.2.11.jar:/home/awajda/.m2/repository/org/json4s/json4s-ast_2.11/3.2.11/json4s-ast_2.11-3.2.11.jar:/home/awajda/.m2/repository/org/glassfish/jersey/core/jersey-client/2.22.2/jersey-client-2.22.2.jar:/home/awajda/.m2/repository/javax/ws/rs/javax.ws.rs-api/2.0.1/javax.ws.rs-api-2.0.1.jar:/home/awajda/.m2/repository/org/glassfish/hk2/hk2-api/2.4.0-b34/hk2-api-2.4.0-b34.jar:/home/awajda/.m2/repository/org/glassfish/hk2/hk2-utils/2.4.0-b34/hk2-utils-2.4.0-b34.jar:/home/awajda/.m2/repository/org/glassfish/hk2/external/aopalliance-repackaged/2.4.0-b34/aopalliance-repackaged-2.4.0-b34.jar:/home/awajda/.m2/repository/org/glassfish/hk2/external/javax.inject/2.4.0-b34/javax.inject-2.4.0-b34.jar:/home/awajda/.m2/repository/org/glassfish/hk2/hk2-locator/2.4.0-b34/hk2-locator-2.4.0-b34.jar:/home/awajda/.m2/repository/org/javassist/javassist/3.18.1-GA/javassist-3.18.1-GA.jar:/home/awajda/.m2/repository/org/glassfish/jersey/core/jersey-common/2.22.2/jersey-common-2.22.2.jar:/home/awajda/.m2/repository/javax/annotation/javax.annotation-api/1.2/javax.annotation-api-1.2.jar:/home/awajda/.m2/repository/org/glassfish/jersey/bundles/repackaged/jersey-guava/2.22.2/jersey-guava-2.22.2.jar:/home/awajda/.m2/repository/org/glassfish/hk2/osgi-resource-locator/1.0.1/osgi-resource-locator-1.0.1.jar:/home/awajda/.m2/repository/org/glassfish/jersey/core/jersey-server/2.22.2/jersey-server-2.22.2.jar:/home/awajda/.m2/repository/org/glassfish/jersey/media/jersey-media-jaxb/2.22.2/jersey-media-jaxb-2.22.2.jar:/home/awajda/.m2/repository/javax/validation/validation-api/1.1.0.Final/validation-api-1.1.0.Final.jar:/home/awajda/.m2/repository/org/glassfish/jersey/containers/jersey-container-servlet/2.22.2/jersey-container-servlet-2.22.2.jar:/home/awajda/.m2/repository/org/glassfish/jersey/containers/jersey-container-servlet-core/2.22.2/jersey-container-servlet-core-2.22.2.jar:/home/awajda/.m2/repository/io/netty/netty-all/4.0.43.Final/netty-all-4.0.43.Final.jar:/home/awajda/.m2/repository/io/netty/netty/3.9.9.Final/netty-3.9.9.Final.jar:/home/awajda/.m2/repository/com/clearspring/analytics/stream/2.7.0/stream-2.7.0.jar:/home/awajda/.m2/repository/io/dropwizard/metrics/metrics-core/3.1.2/metrics-core-3.1.2.jar:/home/awajda/.m2/repository/io/dropwizard/metrics/metrics-jvm/3.1.2/metrics-jvm-3.1.2.jar:/home/awajda/.m2/repository/io/dropwizard/metrics/metrics-json/3.1.2/metrics-json-3.1.2.jar:/home/awajda/.m2/repository/io/dropwizard/metrics/metrics-graphite/3.1.2/metrics-graphite-3.1.2.jar:/home/awajda/.m2/repository/com/fasterxml/jackson/core/jackson-databind/2.6.5/jackson-databind-2.6.5.jar:/home/awajda/.m2/repository/com/fasterxml/jackson/core/jackson-core/2.6.5/jackson-core-2.6.5.jar:/home/awajda/.m2/repository/com/fasterxml/jackson/module/jackson-module-scala_2.11/2.6.5/jackson-module-scala_2.11-2.6.5.jar:/home/awajda/.m2/repository/com/fasterxml/jackson/module/jackson-module-paranamer/2.6.5/jackson-module-paranamer-2.6.5.jar:/home/awajda/.m2/repository/org/apache/ivy/ivy/2.4.0/ivy-2.4.0.jar:/home/awajda/.m2/repository/oro/oro/2.0.8/oro-2.0.8.jar:/home/awajda/.m2/repository/net/razorvine/pyrolite/4.13/pyrolite-4.13.jar:/home/awajda/.m2/repository/net/sf/py4j/py4j/0.10.4/py4j-0.10.4.jar:/home/awajda/.m2/repository/org/apache/spark/spark-tags_2.11/2.2.0/spark-tags_2.11-2.2.0.jar:/home/awajda/.m2/repository/org/apache/commons/commons-crypto/1.0.0/commons-crypto-1.0.0.jar:/home/awajda/.m2/repository/org/spark-project/spark/unused/1.0.0/unused-1.0.0.jar:/home/awajda/.m2/repository/org/apache/spark/spark-sql_2.11/2.2.0/spark-sql_2.11-2.2.0.jar:/home/awajda/.m2/repository/com/univocity/univocity-parsers/2.2.1/univocity-parsers-2.2.1.jar:/home/awajda/.m2/repository/org/apache/spark/spark-sketch_2.11/2.2.0/spark-sketch_2.11-2.2.0.jar:/home/awajda/.m2/repository/org/apache/spark/spark-catalyst_2.11/2.2.0/spark-catalyst_2.11-2.2.0.jar:/home/awajda/.m2/repository/org/codehaus/janino/janino/3.0.0/janino-3.0.0.jar:/home/awajda/.m2/repository/org/codehaus/janino/commons-compiler/3.0.0/commons-compiler-3.0.0.jar:/home/awajda/.m2/repository/org/antlr/antlr4-runtime/4.5.3/antlr4-runtime-4.5.3.jar:/home/awajda/.m2/repository/org/apache/parquet/parquet-column/1.8.2/parquet-column-1.8.2.jar:/home/awajda/.m2/repository/org/apache/parquet/parquet-common/1.8.2/parquet-common-1.8.2.jar:/home/awajda/.m2/repository/org/apache/parquet/parquet-encoding/1.8.2/parquet-encoding-1.8.2.jar:/home/awajda/.m2/repository/org/apache/parquet/parquet-hadoop/1.8.2/parquet-hadoop-1.8.2.jar:/home/awajda/.m2/repository/org/apache/parquet/parquet-format/2.3.1/parquet-format-2.3.1.jar:/home/awajda/.m2/repository/org/apache/parquet/parquet-jackson/1.8.2/parquet-jackson-1.8.2.jar:/home/awajda/Projects/absaoss/spline/core/target/classes:/home/awajda/Projects/absaoss/spline/commons/target/classes:/home/awajda/.m2/repository/commons-configuration/commons-configuration/1.10/commons-configuration-1.10.jar:/home/awajda/.m2/repository/commons-logging/commons-logging/1.1.1/commons-logging-1.1.1.jar:/home/awajda/Projects/absaoss/spline/model/target/classes:/home/awajda/.m2/repository/com/github/salat/salat-util_2.11/1.11.2/salat-util_2.11-1.11.2.jar:/home/awajda/Projects/absaoss/spline/persistence/api/target/classes:/home/awajda/Projects/absaoss/spline/persistence/mongo/target/classes:/home/awajda/.m2/repository/com/github/salat/salat-core_2.11/1.11.2/salat-core_2.11-1.11.2.jar:/home/awajda/.m2/repository/org/mongodb/casbah-core_2.11/3.1.1/casbah-core_2.11-3.1.1.jar:/home/awajda/.m2/repository/org/mongodb/casbah-commons_2.11/3.1.1/casbah-commons_2.11-3.1.1.jar:/home/awajda/.m2/repository/org/mongodb/mongo-java-driver/3.2.2/mongo-java-driver-3.2.2.jar:/home/awajda/.m2/repository/org/mongodb/casbah-query_2.11/3.1.1/casbah-query_2.11-3.1.1.jar:/home/awajda/.m2/repository/org/json4s/json4s-native_2.11/3.2.9/json4s-native_2.11-3.2.9.jar:/home/awajda/Projects/absaoss/spline/persistence/atlas/target/classes:/home/awajda/.m2/repository/org/apache/atlas/atlas-notification/0.8-incubating/atlas-notification-0.8-incubating.jar:/home/awajda/.m2/repository/org/apache/atlas/atlas-client/0.8-incubating/atlas-client-0.8-incubating.jar:/home/awajda/.m2/repository/com/sun/jersey/jersey-client/1.19/jersey-client-1.19.jar:/home/awajda/.m2/repository/com/sun/jersey/jersey-core/1.19/jersey-core-1.19.jar:/home/awajda/.m2/repository/javax/ws/rs/jsr311-api/1.1.1/jsr311-api-1.1.1.jar:/home/awajda/.m2/repository/org/apache/atlas/atlas-common/0.8-incubating/atlas-common-0.8-incubating.jar:/home/awajda/.m2/repository/com/google/inject/guice/4.1.0/guice-4.1.0.jar:/home/awajda/.m2/repository/aopalliance/aopalliance/1.0/aopalliance-1.0.jar:/home/awajda/.m2/repository/org/springframework/spring-beans/3.1.3.RELEASE/spring-beans-3.1.3.RELEASE.jar:/home/awajda/.m2/repository/org/springframework/spring-core/3.1.3.RELEASE/spring-core-3.1.3.RELEASE.jar:/home/awajda/.m2/repository/org/springframework/spring-asm/3.1.3.RELEASE/spring-asm-3.1.3.RELEASE.jar:/home/awajda/.m2/repository/org/apache/atlas/atlas-server-api/0.8-incubating/atlas-server-api-0.8-incubating.jar:/home/awajda/.m2/repository/org/aspectj/aspectjrt/1.8.7/aspectjrt-1.8.7.jar:/home/awajda/.m2/repository/org/apache/kafka/kafka-clients/0.10.0.0/kafka-clients-0.10.0.0.jar:/home/awajda/.m2/repository/org/apache/kafka/kafka_2.11/0.10.0.0/kafka_2.11-0.10.0.0.jar:/home/awajda/.m2/repository/com/yammer/metrics/metrics-core/2.2.0/metrics-core-2.2.0.jar:/home/awajda/.m2/repository/net/sf/jopt-simple/jopt-simple/4.9/jopt-simple-4.9.jar:/home/awajda/.m2/repository/com/google/inject/extensions/guice-multibindings/4.1.0/guice-multibindings-4.1.0.jar:/home/awajda/.m2/repository/com/101tec/zkclient/0.8/zkclient-0.8.jar:/home/awajda/.m2/repository/org/apache/atlas/atlas-typesystem/0.8-incubating/atlas-typesystem-0.8-incubating.jar:/home/awajda/.m2/repository/org/codehaus/jettison/jettison/1.3.7/jettison-1.3.7.jar:/home/awajda/.m2/repository/stax/stax-api/1.0.1/stax-api-1.0.1.jar:/home/awajda/.m2/repository/org/scala-lang/scala-compiler/2.11.8/scala-compiler-2.11.8.jar:/home/awajda/.m2/repository/org/scala-lang/scala-actors/2.11.8/scala-actors-2.11.8.jar:/home/awajda/.m2/repository/org/scala-lang/scalap/2.11.8/scalap-2.11.8.jar:/home/awajda/.m2/repository/com/github/nscala-time/nscala-time_2.11/1.6.0/nscala-time_2.11-1.6.0.jar:/home/awajda/.m2/repository/com/typesafe/config/1.2.1/config-1.2.1.jar:/home/awajda/.m2/repository/com/google/code/gson/gson/2.5/gson-2.5.jar:/home/awajda/.m2/repository/it/unimi/dsi/fastutil/6.5.16/fastutil-6.5.16.jar:/home/awajda/.m2/repository/org/apache/atlas/atlas-intg/0.8-incubating/atlas-intg-0.8-incubating.jar:/home/awajda/.m2/repository/org/codehaus/jackson/jackson-jaxrs/1.9.13/jackson-jaxrs-1.9.13.jar:/home/awajda/.m2/repository/javax/inject/javax.inject/1/javax.inject-1.jar:/home/awajda/Projects/absaoss/spline/persistence/hdfs/target/classes:/home/awajda/.m2/repository/org/json4s/json4s-ext_2.11/3.2.11/json4s-ext_2.11-3.2.11.jar:/home/awajda/.m2/repository/joda-time/joda-time/2.3/joda-time-2.3.jar:/home/awajda/.m2/repository/org/joda/joda-convert/1.6/joda-convert-1.6.jar:/home/awajda/.m2/repository/com/databricks/spark-xml_2.11/0.4.1/spark-xml_2.11-0.4.1.jar:/home/awajda/.m2/repository/org/scala-lang/scala-library/2.11.8/scala-library-2.11.8.jar:/home/awajda/.m2/repository/org/scala-lang/scala-reflect/2.11.8/scala-reflect-2.11.8.jar:/home/awajda/.m2/repository/org/scala-lang/modules/scala-xml_2.11/1.0.5/scala-xml_2.11-1.0.5.jar:/home/awajda/.m2/repository/org/scala-lang/modules/scala-parser-combinators_2.11/1.0.4/scala-parser-combinators_2.11-1.0.4.jar:/opt/idea-IU-141.3058.30/lib/idea_rt.jar com.intellij.rt.execution.application.AppMain za.co.absa.spline.sample.SampleJob1
Using Spark's default log4j profile: org/apache/spark/log4j-defaults.properties
17/10/25 00:47:38 INFO SparkContext: Running Spark version 2.2.0
17/10/25 00:47:39 WARN NativeCodeLoader: Unable to load native-hadoop library for your platform... using builtin-java classes where applicable
17/10/25 00:47:40 WARN Utils: Your hostname, lenovo-lnx resolves to a loopback address: 127.0.0.1; using 172.16.3.75 instead (on interface wlp3s0)
17/10/25 00:47:40 WARN Utils: Set SPARK_LOCAL_IP if you need to bind to another address
17/10/25 00:47:40 INFO SparkContext: Submitted application: Sample Job 1
17/10/25 00:47:40 INFO SecurityManager: Changing view acls to: awajda
17/10/25 00:47:40 INFO SecurityManager: Changing modify acls to: awajda
17/10/25 00:47:40 INFO SecurityManager: Changing view acls groups to:
17/10/25 00:47:40 INFO SecurityManager: Changing modify acls groups to:
17/10/25 00:47:40 INFO SecurityManager: SecurityManager: authentication disabled; ui acls disabled; users with view permissions: Set(awajda); groups with view permissions: Set(); users with modify permissions: Set(awajda); groups with modify permissions: Set()
17/10/25 00:47:40 INFO Utils: Successfully started service 'sparkDriver' on port 36383.
17/10/25 00:47:40 INFO SparkEnv: Registering MapOutputTracker
17/10/25 00:47:40 INFO SparkEnv: Registering BlockManagerMaster
17/10/25 00:47:40 INFO BlockManagerMasterEndpoint: Using org.apache.spark.storage.DefaultTopologyMapper for getting topology information
17/10/25 00:47:40 INFO BlockManagerMasterEndpoint: BlockManagerMasterEndpoint up
17/10/25 00:47:40 INFO DiskBlockManager: Created local directory at /tmp/blockmgr-0528bcea-10b2-48df-aa46-d5abb55cdbd2
17/10/25 00:47:40 INFO MemoryStore: MemoryStore started with capacity 858.0 MB
17/10/25 00:47:41 INFO SparkEnv: Registering OutputCommitCoordinator
17/10/25 00:47:41 INFO Utils: Successfully started service 'SparkUI' on port 4040.
17/10/25 00:47:41 INFO SparkUI: Bound SparkUI to 0.0.0.0, and started at http://172.16.3.75:4040
17/10/25 00:47:41 INFO Executor: Starting executor ID driver on host localhost
17/10/25 00:47:41 INFO Utils: Successfully started service 'org.apache.spark.network.netty.NettyBlockTransferService' on port 35721.
17/10/25 00:47:41 INFO NettyBlockTransferService: Server created on 172.16.3.75:35721
17/10/25 00:47:41 INFO BlockManager: Using org.apache.spark.storage.RandomBlockReplicationPolicy for block replication policy
17/10/25 00:47:41 INFO BlockManagerMaster: Registering BlockManager BlockManagerId(driver, 172.16.3.75, 35721, None)
17/10/25 00:47:41 INFO BlockManagerMasterEndpoint: Registering block manager 172.16.3.75:35721 with 858.0 MB RAM, BlockManagerId(driver, 172.16.3.75, 35721, None)
17/10/25 00:47:41 INFO BlockManagerMaster: Registered BlockManager BlockManagerId(driver, 172.16.3.75, 35721, None)
17/10/25 00:47:41 INFO BlockManager: Initialized BlockManager: BlockManagerId(driver, 172.16.3.75, 35721, None)
17/10/25 00:47:42 INFO SharedState: Setting hive.metastore.warehouse.dir ('null') to the value of spark.sql.warehouse.dir ('file:/home/awajda/Projects/absaoss/spline/sample/spark-warehouse/').
17/10/25 00:47:42 INFO SharedState: Warehouse path is 'file:/home/awajda/Projects/absaoss/spline/sample/spark-warehouse/'.
17/10/25 00:47:43 INFO StateStoreCoordinatorRef: Registered StateStoreCoordinator endpoint
17/10/25 00:47:48 INFO FileSourceStrategy: Pruning directories with:
17/10/25 00:47:48 INFO FileSourceStrategy: Post-Scan Filters: (length(trim(value#0)) > 0)
17/10/25 00:47:48 INFO FileSourceStrategy: Output Data Schema: struct<value: string>
17/10/25 00:47:48 INFO FileSourceScanExec: Pushed Filters:
17/10/25 00:47:49 INFO CodeGenerator: Code generated in 357.367102 ms
17/10/25 00:47:49 INFO MemoryStore: Block broadcast_0 stored as values in memory (estimated size 219.9 KB, free 857.8 MB)
17/10/25 00:47:49 INFO MemoryStore: Block broadcast_0_piece0 stored as bytes in memory (estimated size 20.6 KB, free 857.8 MB)
17/10/25 00:47:49 INFO BlockManagerInfo: Added broadcast_0_piece0 in memory on 172.16.3.75:35721 (size: 20.6 KB, free: 858.0 MB)
17/10/25 00:47:49 INFO SparkContext: Created broadcast 0 from csv at SampleJob1.scala:35
17/10/25 00:47:49 INFO FileSourceScanExec: Planning scan with bin packing, max size: 4194304 bytes, open cost is considered as scanning 4194304 bytes.
17/10/25 00:47:49 INFO SparkContext: Starting job: csv at SampleJob1.scala:35
17/10/25 00:47:49 INFO DAGScheduler: Got job 0 (csv at SampleJob1.scala:35) with 1 output partitions
17/10/25 00:47:49 INFO DAGScheduler: Final stage: ResultStage 0 (csv at SampleJob1.scala:35)
17/10/25 00:47:49 INFO DAGScheduler: Parents of final stage: List()
17/10/25 00:47:49 INFO DAGScheduler: Missing parents: List()
17/10/25 00:47:49 INFO DAGScheduler: Submitting ResultStage 0 (MapPartitionsRDD[2] at csv at SampleJob1.scala:35), which has no missing parents
17/10/25 00:47:49 INFO MemoryStore: Block broadcast_1 stored as values in memory (estimated size 8.2 KB, free 857.8 MB)
17/10/25 00:47:49 INFO MemoryStore: Block broadcast_1_piece0 stored as bytes in memory (estimated size 4.3 KB, free 857.8 MB)
17/10/25 00:47:49 INFO BlockManagerInfo: Added broadcast_1_piece0 in memory on 172.16.3.75:35721 (size: 4.3 KB, free: 858.0 MB)
17/10/25 00:47:49 INFO SparkContext: Created broadcast 1 from broadcast at DAGScheduler.scala:1006
17/10/25 00:47:49 INFO DAGScheduler: Submitting 1 missing tasks from ResultStage 0 (MapPartitionsRDD[2] at csv at SampleJob1.scala:35) (first 15 tasks are for partitions Vector(0))
17/10/25 00:47:49 INFO TaskSchedulerImpl: Adding task set 0.0 with 1 tasks
17/10/25 00:47:50 INFO TaskSetManager: Starting task 0.0 in stage 0.0 (TID 0, localhost, executor driver, partition 0, PROCESS_LOCAL, 5311 bytes)
17/10/25 00:47:50 INFO Executor: Running task 0.0 in stage 0.0 (TID 0)
17/10/25 00:47:50 INFO FileScanRDD: Reading File path: file:///home/awajda/Projects/absaoss/spline/sample/data/input/wikidata.csv, range: 0-342116, partition values: [empty row]
17/10/25 00:47:50 INFO CodeGenerator: Code generated in 19.573494 ms
17/10/25 00:47:50 INFO Executor: Finished task 0.0 in stage 0.0 (TID 0). 1323 bytes result sent to driver
17/10/25 00:47:50 INFO TaskSetManager: Finished task 0.0 in stage 0.0 (TID 0) in 268 ms on localhost (executor driver) (1/1)
17/10/25 00:47:50 INFO TaskSchedulerImpl: Removed TaskSet 0.0, whose tasks have all completed, from pool
17/10/25 00:47:50 INFO DAGScheduler: ResultStage 0 (csv at SampleJob1.scala:35) finished in 0.301 s
17/10/25 00:47:50 INFO DAGScheduler: Job 0 finished: csv at SampleJob1.scala:35, took 0.512895 s
17/10/25 00:47:50 INFO CodeGenerator: Code generated in 25.027275 ms
17/10/25 00:47:50 INFO FileSourceStrategy: Pruning directories with:
17/10/25 00:47:50 INFO FileSourceStrategy: Post-Scan Filters:
17/10/25 00:47:50 INFO FileSourceStrategy: Output Data Schema: struct<value: string>
17/10/25 00:47:50 INFO FileSourceScanExec: Pushed Filters:
17/10/25 00:47:50 INFO CodeGenerator: Code generated in 12.551125 ms
17/10/25 00:47:50 INFO MemoryStore: Block broadcast_2 stored as values in memory (estimated size 219.9 KB, free 857.5 MB)
17/10/25 00:47:50 INFO MemoryStore: Block broadcast_2_piece0 stored as bytes in memory (estimated size 20.6 KB, free 857.5 MB)
17/10/25 00:47:50 INFO BlockManagerInfo: Added broadcast_2_piece0 in memory on 172.16.3.75:35721 (size: 20.6 KB, free: 858.0 MB)
17/10/25 00:47:50 INFO SparkContext: Created broadcast 2 from csv at SampleJob1.scala:35
17/10/25 00:47:50 INFO FileSourceScanExec: Planning scan with bin packing, max size: 4194304 bytes, open cost is considered as scanning 4194304 bytes.
17/10/25 00:47:50 INFO SparkContext: Starting job: csv at SampleJob1.scala:35
17/10/25 00:47:50 INFO DAGScheduler: Got job 1 (csv at SampleJob1.scala:35) with 1 output partitions
17/10/25 00:47:50 INFO DAGScheduler: Final stage: ResultStage 1 (csv at SampleJob1.scala:35)
17/10/25 00:47:50 INFO DAGScheduler: Parents of final stage: List()
17/10/25 00:47:50 INFO DAGScheduler: Missing parents: List()
17/10/25 00:47:50 INFO DAGScheduler: Submitting ResultStage 1 (MapPartitionsRDD[7] at csv at SampleJob1.scala:35), which has no missing parents
17/10/25 00:47:50 INFO MemoryStore: Block broadcast_3 stored as values in memory (estimated size 14.6 KB, free 857.5 MB)
17/10/25 00:47:50 INFO MemoryStore: Block broadcast_3_piece0 stored as bytes in memory (estimated size 8.4 KB, free 857.5 MB)
17/10/25 00:47:50 INFO BlockManagerInfo: Added broadcast_3_piece0 in memory on 172.16.3.75:35721 (size: 8.4 KB, free: 857.9 MB)
17/10/25 00:47:50 INFO SparkContext: Created broadcast 3 from broadcast at DAGScheduler.scala:1006
17/10/25 00:47:50 INFO DAGScheduler: Submitting 1 missing tasks from ResultStage 1 (MapPartitionsRDD[7] at csv at SampleJob1.scala:35) (first 15 tasks are for partitions Vector(0))
17/10/25 00:47:50 INFO TaskSchedulerImpl: Adding task set 1.0 with 1 tasks
17/10/25 00:47:50 INFO TaskSetManager: Starting task 0.0 in stage 1.0 (TID 1, localhost, executor driver, partition 0, PROCESS_LOCAL, 5311 bytes)
17/10/25 00:47:50 INFO Executor: Running task 0.0 in stage 1.0 (TID 1)
17/10/25 00:47:50 INFO FileScanRDD: Reading File path: file:///home/awajda/Projects/absaoss/spline/sample/data/input/wikidata.csv, range: 0-342116, partition values: [empty row]
17/10/25 00:47:51 INFO Executor: Finished task 0.0 in stage 1.0 (TID 1). 1635 bytes result sent to driver
17/10/25 00:47:51 INFO TaskSetManager: Finished task 0.0 in stage 1.0 (TID 1) in 500 ms on localhost (executor driver) (1/1)
17/10/25 00:47:51 INFO TaskSchedulerImpl: Removed TaskSet 1.0, whose tasks have all completed, from pool
17/10/25 00:47:51 INFO DAGScheduler: ResultStage 1 (csv at SampleJob1.scala:35) finished in 0.494 s
17/10/25 00:47:51 INFO DAGScheduler: Job 1 finished: csv at SampleJob1.scala:35, took 0.528656 s
17/10/25 00:47:51 INFO FileSourceStrategy: Pruning directories with:
17/10/25 00:47:51 INFO FileSourceStrategy: Post-Scan Filters: (length(trim(value#26)) > 0)
17/10/25 00:47:51 INFO FileSourceStrategy: Output Data Schema: struct<value: string>
17/10/25 00:47:51 INFO FileSourceScanExec: Pushed Filters:
17/10/25 00:47:51 INFO MemoryStore: Block broadcast_4 stored as values in memory (estimated size 219.9 KB, free 857.3 MB)
17/10/25 00:47:51 INFO MemoryStore: Block broadcast_4_piece0 stored as bytes in memory (estimated size 20.6 KB, free 857.3 MB)
17/10/25 00:47:51 INFO BlockManagerInfo: Added broadcast_4_piece0 in memory on 172.16.3.75:35721 (size: 20.6 KB, free: 857.9 MB)
17/10/25 00:47:51 INFO SparkContext: Created broadcast 4 from csv at SampleJob1.scala:43
17/10/25 00:47:51 INFO FileSourceScanExec: Planning scan with bin packing, max size: 4194304 bytes, open cost is considered as scanning 4194304 bytes.
17/10/25 00:47:51 INFO SparkContext: Starting job: csv at SampleJob1.scala:43
17/10/25 00:47:51 INFO DAGScheduler: Got job 2 (csv at SampleJob1.scala:43) with 1 output partitions
17/10/25 00:47:51 INFO DAGScheduler: Final stage: ResultStage 2 (csv at SampleJob1.scala:43)
17/10/25 00:47:51 INFO DAGScheduler: Parents of final stage: List()
17/10/25 00:47:51 INFO DAGScheduler: Missing parents: List()
17/10/25 00:47:51 INFO DAGScheduler: Submitting ResultStage 2 (MapPartitionsRDD[10] at csv at SampleJob1.scala:43), which has no missing parents
17/10/25 00:47:51 INFO MemoryStore: Block broadcast_5 stored as values in memory (estimated size 8.2 KB, free 857.3 MB)
17/10/25 00:47:51 INFO MemoryStore: Block broadcast_5_piece0 stored as bytes in memory (estimated size 4.3 KB, free 857.2 MB)
17/10/25 00:47:51 INFO BlockManagerInfo: Added broadcast_5_piece0 in memory on 172.16.3.75:35721 (size: 4.3 KB, free: 857.9 MB)
17/10/25 00:47:51 INFO SparkContext: Created broadcast 5 from broadcast at DAGScheduler.scala:1006
17/10/25 00:47:51 INFO DAGScheduler: Submitting 1 missing tasks from ResultStage 2 (MapPartitionsRDD[10] at csv at SampleJob1.scala:43) (first 15 tasks are for partitions Vector(0))
17/10/25 00:47:51 INFO TaskSchedulerImpl: Adding task set 2.0 with 1 tasks
17/10/25 00:47:51 INFO TaskSetManager: Starting task 0.0 in stage 2.0 (TID 2, localhost, executor driver, partition 0, PROCESS_LOCAL, 5309 bytes)
17/10/25 00:47:51 INFO Executor: Running task 0.0 in stage 2.0 (TID 2)
17/10/25 00:47:51 INFO FileScanRDD: Reading File path: file:///home/awajda/Projects/absaoss/spline/sample/data/input/domain.csv, range: 0-83, partition values: [empty row]
17/10/25 00:47:51 INFO Executor: Finished task 0.0 in stage 2.0 (TID 2). 1275 bytes result sent to driver
17/10/25 00:47:51 INFO TaskSetManager: Finished task 0.0 in stage 2.0 (TID 2) in 22 ms on localhost (executor driver) (1/1)
17/10/25 00:47:51 INFO DAGScheduler: ResultStage 2 (csv at SampleJob1.scala:43) finished in 0.022 s
17/10/25 00:47:51 INFO DAGScheduler: Job 2 finished: csv at SampleJob1.scala:43, took 0.041221 s
17/10/25 00:47:51 INFO TaskSchedulerImpl: Removed TaskSet 2.0, whose tasks have all completed, from pool
17/10/25 00:47:51 INFO FileSourceStrategy: Pruning directories with:
17/10/25 00:47:51 INFO FileSourceStrategy: Post-Scan Filters:
17/10/25 00:47:51 INFO FileSourceStrategy: Output Data Schema: struct<value: string>
17/10/25 00:47:51 INFO FileSourceScanExec: Pushed Filters:
17/10/25 00:47:51 INFO MemoryStore: Block broadcast_6 stored as values in memory (estimated size 219.9 KB, free 857.0 MB)
17/10/25 00:47:51 INFO MemoryStore: Block broadcast_6_piece0 stored as bytes in memory (estimated size 20.6 KB, free 857.0 MB)
17/10/25 00:47:51 INFO BlockManagerInfo: Added broadcast_6_piece0 in memory on 172.16.3.75:35721 (size: 20.6 KB, free: 857.9 MB)
17/10/25 00:47:51 INFO SparkContext: Created broadcast 6 from csv at SampleJob1.scala:43
17/10/25 00:47:51 INFO FileSourceScanExec: Planning scan with bin packing, max size: 4194304 bytes, open cost is considered as scanning 4194304 bytes.
17/10/25 00:47:51 INFO SparkContext: Starting job: csv at SampleJob1.scala:43
17/10/25 00:47:51 INFO DAGScheduler: Got job 3 (csv at SampleJob1.scala:43) with 1 output partitions
17/10/25 00:47:51 INFO DAGScheduler: Final stage: ResultStage 3 (csv at SampleJob1.scala:43)
17/10/25 00:47:51 INFO DAGScheduler: Parents of final stage: List()
17/10/25 00:47:51 INFO DAGScheduler: Missing parents: List()
17/10/25 00:47:51 INFO DAGScheduler: Submitting ResultStage 3 (MapPartitionsRDD[15] at csv at SampleJob1.scala:43), which has no missing parents
17/10/25 00:47:51 INFO MemoryStore: Block broadcast_7 stored as values in memory (estimated size 14.5 KB, free 857.0 MB)
17/10/25 00:47:51 INFO MemoryStore: Block broadcast_7_piece0 stored as bytes in memory (estimated size 8.4 KB, free 857.0 MB)
17/10/25 00:47:51 INFO BlockManagerInfo: Added broadcast_7_piece0 in memory on 172.16.3.75:35721 (size: 8.4 KB, free: 857.9 MB)
17/10/25 00:47:51 INFO SparkContext: Created broadcast 7 from broadcast at DAGScheduler.scala:1006
17/10/25 00:47:51 INFO DAGScheduler: Submitting 1 missing tasks from ResultStage 3 (MapPartitionsRDD[15] at csv at SampleJob1.scala:43) (first 15 tasks are for partitions Vector(0))
17/10/25 00:47:51 INFO TaskSchedulerImpl: Adding task set 3.0 with 1 tasks
17/10/25 00:47:51 INFO TaskSetManager: Starting task 0.0 in stage 3.0 (TID 3, localhost, executor driver, partition 0, PROCESS_LOCAL, 5309 bytes)
17/10/25 00:47:51 INFO Executor: Running task 0.0 in stage 3.0 (TID 3)
17/10/25 00:47:51 INFO FileScanRDD: Reading File path: file:///home/awajda/Projects/absaoss/spline/sample/data/input/domain.csv, range: 0-83, partition values: [empty row]
17/10/25 00:47:51 INFO Executor: Finished task 0.0 in stage 3.0 (TID 3). 1473 bytes result sent to driver
17/10/25 00:47:51 INFO TaskSetManager: Finished task 0.0 in stage 3.0 (TID 3) in 43 ms on localhost (executor driver) (1/1)
17/10/25 00:47:51 INFO DAGScheduler: ResultStage 3 (csv at SampleJob1.scala:43) finished in 0.044 s
17/10/25 00:47:51 INFO DAGScheduler: Job 3 finished: csv at SampleJob1.scala:43, took 0.060091 s
17/10/25 00:47:51 INFO TaskSchedulerImpl: Removed TaskSet 3.0, whose tasks have all completed, from pool
17/10/25 00:47:51 INFO FileSourceStrategy: Pruning directories with:
17/10/25 00:47:51 INFO FileSourceStrategy: Post-Scan Filters: isnotnull(total_response_size#16),(total_response_size#16 > 1000),isnotnull(count_views#15),(count_views#15 > 10)
17/10/25 00:47:51 INFO FileSourceStrategy: Output Data Schema: struct<domain_code: string, page_title: string, count_views: int, total_response_size: int ... 2 more fields>
17/10/25 00:47:51 INFO FileSourceScanExec: Pushed Filters: IsNotNull(total_response_size),GreaterThan(total_response_size,1000),IsNotNull(count_views),GreaterThan(count_views,10)
17/10/25 00:47:51 INFO FileSourceStrategy: Pruning directories with:
17/10/25 00:47:51 INFO FileSourceStrategy: Post-Scan Filters:
17/10/25 00:47:51 INFO FileSourceStrategy: Output Data Schema: struct<d_code: string, d_name: string>
17/10/25 00:47:51 INFO FileSourceScanExec: Pushed Filters:
17/10/25 00:47:52 INFO ParquetFileFormat: Using default output committer for Parquet: org.apache.parquet.hadoop.ParquetOutputCommitter
17/10/25 00:47:52 INFO SQLHadoopMapReduceCommitProtocol: Using user defined output committer class org.apache.parquet.hadoop.ParquetOutputCommitter
17/10/25 00:47:52 INFO SQLHadoopMapReduceCommitProtocol: Using output committer class org.apache.parquet.hadoop.ParquetOutputCommitter
17/10/25 00:47:52 INFO MemoryStore: Block broadcast_8 stored as values in memory (estimated size 221.0 KB, free 856.8 MB)
17/10/25 00:47:52 INFO MemoryStore: Block broadcast_8_piece0 stored as bytes in memory (estimated size 20.7 KB, free 856.8 MB)
17/10/25 00:47:52 INFO BlockManagerInfo: Added broadcast_8_piece0 in memory on 172.16.3.75:35721 (size: 20.7 KB, free: 857.9 MB)
17/10/25 00:47:52 INFO SparkContext: Created broadcast 8 from run at ThreadPoolExecutor.java:1149
17/10/25 00:47:52 INFO FileSourceScanExec: Planning scan with bin packing, max size: 4194304 bytes, open cost is considered as scanning 4194304 bytes.
17/10/25 00:47:52 INFO SparkContext: Starting job: run at ThreadPoolExecutor.java:1149
17/10/25 00:47:52 INFO DAGScheduler: Got job 4 (run at ThreadPoolExecutor.java:1149) with 1 output partitions
17/10/25 00:47:52 INFO DAGScheduler: Final stage: ResultStage 4 (run at ThreadPoolExecutor.java:1149)
17/10/25 00:47:52 INFO DAGScheduler: Parents of final stage: List()
17/10/25 00:47:52 INFO DAGScheduler: Missing parents: List()
17/10/25 00:47:52 INFO DAGScheduler: Submitting ResultStage 4 (MapPartitionsRDD[18] at run at ThreadPoolExecutor.java:1149), which has no missing parents
17/10/25 00:47:52 INFO MemoryStore: Block broadcast_9 stored as values in memory (estimated size 11.2 KB, free 856.7 MB)
17/10/25 00:47:52 INFO MemoryStore: Block broadcast_9_piece0 stored as bytes in memory (estimated size 6.8 KB, free 856.7 MB)
17/10/25 00:47:52 INFO BlockManagerInfo: Added broadcast_9_piece0 in memory on 172.16.3.75:35721 (size: 6.8 KB, free: 857.9 MB)
17/10/25 00:47:52 INFO SparkContext: Created broadcast 9 from broadcast at DAGScheduler.scala:1006
17/10/25 00:47:52 INFO DAGScheduler: Submitting 1 missing tasks from ResultStage 4 (MapPartitionsRDD[18] at run at ThreadPoolExecutor.java:1149) (first 15 tasks are for partitions Vector(0))
17/10/25 00:47:52 INFO TaskSchedulerImpl: Adding task set 4.0 with 1 tasks
17/10/25 00:47:52 INFO TaskSetManager: Starting task 0.0 in stage 4.0 (TID 4, localhost, executor driver, partition 0, PROCESS_LOCAL, 5309 bytes)
17/10/25 00:47:52 INFO Executor: Running task 0.0 in stage 4.0 (TID 4)
17/10/25 00:47:52 INFO FileScanRDD: Reading File path: file:///home/awajda/Projects/absaoss/spline/sample/data/input/domain.csv, range: 0-83, partition values: [empty row]
17/10/25 00:47:52 INFO CodeGenerator: Code generated in 41.275284 ms
17/10/25 00:47:52 INFO Executor: Finished task 0.0 in stage 4.0 (TID 4). 1306 bytes result sent to driver
17/10/25 00:47:52 INFO TaskSetManager: Finished task 0.0 in stage 4.0 (TID 4) in 138 ms on localhost (executor driver) (1/1)
17/10/25 00:47:52 INFO TaskSchedulerImpl: Removed TaskSet 4.0, whose tasks have all completed, from pool
17/10/25 00:47:52 INFO DAGScheduler: ResultStage 4 (run at ThreadPoolExecutor.java:1149) finished in 0.098 s
17/10/25 00:47:52 INFO DAGScheduler: Job 4 finished: run at ThreadPoolExecutor.java:1149, took 0.159900 s
17/10/25 00:47:52 INFO MemoryStore: Block broadcast_10 stored as values in memory (estimated size 8.0 MB, free 848.7 MB)
17/10/25 00:47:52 INFO MemoryStore: Block broadcast_10_piece0 stored as bytes in memory (estimated size 284.0 B, free 848.7 MB)
17/10/25 00:47:52 INFO BlockManagerInfo: Added broadcast_10_piece0 in memory on 172.16.3.75:35721 (size: 284.0 B, free: 857.9 MB)
17/10/25 00:47:52 INFO ContextCleaner: Cleaned accumulator 114
17/10/25 00:47:52 INFO SparkContext: Created broadcast 10 from run at ThreadPoolExecutor.java:1149
17/10/25 00:47:52 INFO ContextCleaner: Cleaned accumulator 86
17/10/25 00:47:52 INFO BlockManagerInfo: Removed broadcast_1_piece0 on 172.16.3.75:35721 in memory (size: 4.3 KB, free: 857.9 MB)
17/10/25 00:47:52 INFO ContextCleaner: Cleaned accumulator 87
17/10/25 00:47:52 INFO ContextCleaner: Cleaned accumulator 84
17/10/25 00:47:52 INFO BlockManagerInfo: Removed broadcast_3_piece0 on 172.16.3.75:35721 in memory (size: 8.4 KB, free: 857.9 MB)
17/10/25 00:47:52 INFO ContextCleaner: Cleaned accumulator 83
17/10/25 00:47:52 INFO ContextCleaner: Cleaned accumulator 115
17/10/25 00:47:52 INFO BlockManagerInfo: Removed broadcast_4_piece0 on 172.16.3.75:35721 in memory (size: 20.6 KB, free: 857.9 MB)
17/10/25 00:47:52 INFO BlockManagerInfo: Removed broadcast_6_piece0 on 172.16.3.75:35721 in memory (size: 20.6 KB, free: 857.9 MB)
17/10/25 00:47:52 INFO ContextCleaner: Cleaned accumulator 116
17/10/25 00:47:52 INFO ContextCleaner: Cleaned accumulator 117
17/10/25 00:47:52 INFO ContextCleaner: Cleaned accumulator 85
17/10/25 00:47:52 INFO BlockManagerInfo: Removed broadcast_7_piece0 on 172.16.3.75:35721 in memory (size: 8.4 KB, free: 857.9 MB)
17/10/25 00:47:52 INFO BlockManagerInfo: Removed broadcast_5_piece0 on 172.16.3.75:35721 in memory (size: 4.3 KB, free: 857.9 MB)
17/10/25 00:47:52 INFO ContextCleaner: Cleaned accumulator 113
17/10/25 00:47:52 INFO BlockManagerInfo: Removed broadcast_9_piece0 on 172.16.3.75:35721 in memory (size: 6.8 KB, free: 857.9 MB)
17/10/25 00:47:52 INFO ContextCleaner: Cleaned accumulator 88
17/10/25 00:47:52 INFO CodeGenerator: Code generated in 71.147939 ms
17/10/25 00:47:52 INFO MemoryStore: Block broadcast_11 stored as values in memory (estimated size 221.0 KB, free 849.1 MB)
17/10/25 00:47:52 INFO MemoryStore: Block broadcast_11_piece0 stored as bytes in memory (estimated size 20.7 KB, free 849.1 MB)
17/10/25 00:47:52 INFO BlockManagerInfo: Added broadcast_11_piece0 in memory on 172.16.3.75:35721 (size: 20.7 KB, free: 857.9 MB)
17/10/25 00:47:52 INFO SparkContext: Created broadcast 11 from parquet at SampleJob1.scala:50
17/10/25 00:47:52 INFO FileSourceScanExec: Planning scan with bin packing, max size: 4194304 bytes, open cost is considered as scanning 4194304 bytes.
17/10/25 00:47:52 INFO SparkContext: Starting job: parquet at SampleJob1.scala:50
17/10/25 00:47:52 INFO DAGScheduler: Got job 5 (parquet at SampleJob1.scala:50) with 1 output partitions
17/10/25 00:47:52 INFO DAGScheduler: Final stage: ResultStage 5 (parquet at SampleJob1.scala:50)
17/10/25 00:47:52 INFO DAGScheduler: Parents of final stage: List()
17/10/25 00:47:52 INFO DAGScheduler: Missing parents: List()
17/10/25 00:47:52 INFO DAGScheduler: Submitting ResultStage 5 (MapPartitionsRDD[20] at parquet at SampleJob1.scala:50), which has no missing parents
17/10/25 00:47:52 INFO MemoryStore: Block broadcast_12 stored as values in memory (estimated size 81.6 KB, free 849.0 MB)
17/10/25 00:47:52 INFO MemoryStore: Block broadcast_12_piece0 stored as bytes in memory (estimated size 32.0 KB, free 848.9 MB)
17/10/25 00:47:52 INFO BlockManagerInfo: Added broadcast_12_piece0 in memory on 172.16.3.75:35721 (size: 32.0 KB, free: 857.9 MB)
17/10/25 00:47:52 INFO SparkContext: Created broadcast 12 from broadcast at DAGScheduler.scala:1006
17/10/25 00:47:52 INFO DAGScheduler: Submitting 1 missing tasks from ResultStage 5 (MapPartitionsRDD[20] at parquet at SampleJob1.scala:50) (first 15 tasks are for partitions Vector(0))
17/10/25 00:47:52 INFO TaskSchedulerImpl: Adding task set 5.0 with 1 tasks
17/10/25 00:47:52 INFO TaskSetManager: Starting task 0.0 in stage 5.0 (TID 5, localhost, executor driver, partition 0, PROCESS_LOCAL, 5311 bytes)
17/10/25 00:47:52 INFO Executor: Running task 0.0 in stage 5.0 (TID 5)
17/10/25 00:47:52 INFO SQLHadoopMapReduceCommitProtocol: Using user defined output committer class org.apache.parquet.hadoop.ParquetOutputCommitter
17/10/25 00:47:52 INFO SQLHadoopMapReduceCommitProtocol: Using output committer class org.apache.parquet.hadoop.ParquetOutputCommitter
17/10/25 00:47:52 INFO CodecConfig: Compression: SNAPPY
17/10/25 00:47:52 INFO CodecConfig: Compression: SNAPPY
17/10/25 00:47:52 INFO ParquetOutputFormat: Parquet block size to 134217728
17/10/25 00:47:52 INFO ParquetOutputFormat: Parquet page size to 1048576
17/10/25 00:47:52 INFO ParquetOutputFormat: Parquet dictionary page size to 1048576
17/10/25 00:47:52 INFO ParquetOutputFormat: Dictionary is on
17/10/25 00:47:52 INFO ParquetOutputFormat: Validation is off
17/10/25 00:47:52 INFO ParquetOutputFormat: Writer version is: PARQUET_1_0
17/10/25 00:47:52 INFO ParquetOutputFormat: Maximum row group padding size is 0 bytes
17/10/25 00:47:52 INFO ParquetOutputFormat: Page size checking is: estimated
17/10/25 00:47:52 INFO ParquetOutputFormat: Min row count for page size check is: 100
17/10/25 00:47:52 INFO ParquetOutputFormat: Max row count for page size check is: 10000
17/10/25 00:47:52 INFO ParquetWriteSupport: Initialized Parquet WriteSupport with Catalyst schema:
{
"type" : "struct",
"fields" : [ {
"name" : "page",
"type" : "string",
"nullable" : true,
"metadata" : { }
}, {
"name" : "domain",
"type" : "string",
"nullable" : true,
"metadata" : { }
}, {
"name" : "count_views",
"type" : "integer",
"nullable" : true,
"metadata" : { }
} ]
}
and corresponding Parquet message type:
message spark_schema {
optional binary page (UTF8);
optional binary domain (UTF8);
optional int32 count_views;
}
17/10/25 00:47:53 INFO CodecPool: Got brand-new compressor [.snappy]
17/10/25 00:47:53 INFO FileScanRDD: Reading File path: file:///home/awajda/Projects/absaoss/spline/sample/data/input/wikidata.csv, range: 0-342116, partition values: [empty row]
17/10/25 00:47:53 INFO CodeGenerator: Code generated in 18.441753 ms
17/10/25 00:47:53 INFO InternalParquetRecordWriter: Flushing mem columnStore to file. allocated memory: 199
17/10/25 00:47:53 INFO FileOutputCommitter: Saved output of task 'attempt_20171025004752_0005_m_000000_0' to file:/home/awajda/Projects/absaoss/spline/sample/data/results/job1_results/_temporary/0/task_20171025004752_0005_m_000000
17/10/25 00:47:53 INFO SparkHadoopMapRedUtil: attempt_20171025004752_0005_m_000000_0: Committed
17/10/25 00:47:53 INFO Executor: Finished task 0.0 in stage 5.0 (TID 5). 1775 bytes result sent to driver
17/10/25 00:47:53 INFO TaskSetManager: Finished task 0.0 in stage 5.0 (TID 5) in 933 ms on localhost (executor driver) (1/1)
17/10/25 00:47:53 INFO TaskSchedulerImpl: Removed TaskSet 5.0, whose tasks have all completed, from pool
17/10/25 00:47:53 INFO DAGScheduler: ResultStage 5 (parquet at SampleJob1.scala:50) finished in 0.936 s
17/10/25 00:47:53 INFO DAGScheduler: Job 5 finished: parquet at SampleJob1.scala:50, took 0.971026 s
17/10/25 00:47:53 INFO FileFormatWriter: Job null committed.
17/10/25 00:47:54 INFO cluster: Cluster created with settings {hosts=[localhost:27017], mode=SINGLE, requiredClusterType=UNKNOWN, serverSelectionTimeout='30000 ms', maxWaitQueueSize=500}
17/10/25 00:47:54 INFO cluster: No server chosen by ReadPreferenceServerSelector{readPreference=primary} from cluster description ClusterDescription{type=UNKNOWN, connectionMode=SINGLE, all=[ServerDescription{address=localhost:27017, type=UNKNOWN, state=CONNECTING}]}. Waiting for 30000 ms before timing out
17/10/25 00:47:54 INFO connection: Opened connection [connectionId{localValue:1, serverValue:6}] to localhost:27017
17/10/25 00:47:54 INFO cluster: Monitor thread successfully connected to server with description ServerDescription{address=localhost:27017, type=REPLICA_SET_PRIMARY, state=CONNECTED, ok=true, version=ServerVersion{versionList=[3, 4, 9]}, minWireVersion=0, maxWireVersion=5, maxDocumentSize=16777216, roundTripTimeNanos=1360987, setName='rs0', canonicalAddress=lenovo-lnx:27017, hosts=[lenovo-lnx:27017], passives=[], arbiters=[], primary='lenovo-lnx:27017', tagSet=TagSet{[]}, electionId=59eef0fcacf1f64f314d52c1, setVersion=1}
17/10/25 00:47:54 INFO connection: Opened connection [connectionId{localValue:2, serverValue:7}] to localhost:27017
17/10/25 00:47:54 INFO BSONSalatContext$$anon$1: registerGlobalKeyOverride: context=BSON Salat Context will globally remap key='id' to '_id'
17/10/25 00:47:55 INFO BlockManagerInfo: Removed broadcast_11_piece0 on 172.16.3.75:35721 in memory (size: 20.7 KB, free: 857.9 MB)
17/10/25 00:47:55 INFO BlockManagerInfo: Removed broadcast_10_piece0 on 172.16.3.75:35721 in memory (size: 284.0 B, free: 857.9 MB)
17/10/25 00:47:55 INFO BlockManagerInfo: Removed broadcast_12_piece0 on 172.16.3.75:35721 in memory (size: 32.0 KB, free: 857.9 MB)
17/10/25 00:47:55 INFO cluster: Cluster created with settings {hosts=[localhost:27017], mode=SINGLE, requiredClusterType=UNKNOWN, serverSelectionTimeout='30000 ms', maxWaitQueueSize=500}
17/10/25 00:47:55 INFO cluster: No server chosen by WritableServerSelector from cluster description ClusterDescription{type=UNKNOWN, connectionMode=SINGLE, all=[ServerDescription{address=localhost:27017, type=UNKNOWN, state=CONNECTING}]}. Waiting for 30000 ms before timing out
17/10/25 00:47:55 INFO connection: Opened connection [connectionId{localValue:3, serverValue:8}] to localhost:27017
17/10/25 00:47:55 INFO cluster: Monitor thread successfully connected to server with description ServerDescription{address=localhost:27017, type=REPLICA_SET_PRIMARY, state=CONNECTED, ok=true, version=ServerVersion{versionList=[3, 4, 9]}, minWireVersion=0, maxWireVersion=5, maxDocumentSize=16777216, roundTripTimeNanos=1120194, setName='rs0', canonicalAddress=lenovo-lnx:27017, hosts=[lenovo-lnx:27017], passives=[], arbiters=[], primary='lenovo-lnx:27017', tagSet=TagSet{[]}, electionId=59eef0fcacf1f64f314d52c1, setVersion=1}
17/10/25 00:47:55 INFO connection: Opened connection [connectionId{localValue:4, serverValue:9}] to localhost:27017
17/10/25 00:48:55 INFO SparkContext: Invoking stop() from shutdown hook
17/10/25 00:48:55 INFO SparkUI: Stopped Spark web UI at http://172.16.3.75:4040
17/10/25 00:48:55 INFO MapOutputTrackerMasterEndpoint: MapOutputTrackerMasterEndpoint stopped!
17/10/25 00:48:56 INFO MemoryStore: MemoryStore cleared
17/10/25 00:48:56 INFO BlockManager: BlockManager stopped
17/10/25 00:48:56 INFO BlockManagerMaster: BlockManagerMaster stopped
17/10/25 00:48:56 INFO OutputCommitCoordinator$OutputCommitCoordinatorEndpoint: OutputCommitCoordinator stopped!
17/10/25 00:48:56 INFO SparkContext: Successfully stopped SparkContext
17/10/25 00:48:56 INFO ShutdownHookManager: Shutdown hook called
17/10/25 00:48:56 INFO ShutdownHookManager: Deleting directory /tmp/spark-692b734a-4aa6-47e2-ab81-f6b16521a3df
Process finished with exit code 0