解决spark streaming长时间运行日志不断增长问题
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19
| log4j.rootLogger=WARN,stdout,A1
log4j.appender.stdout=org.apache.log4j.ConsoleAppender log4j.appender.stdout.Threshold=WARN log4j.appender.stdout.encoding=UTF-8 log4j.appender.stdout.layout=org.apache.log4j.PatternLayout log4j.appender.stdout.layout.ConversionPattern=[%d{yyyy-MM-dd HH:mm:ss,SSS}] %m %n[%d{yyyy-MM-dd HH:mm:ss,SSS}] %p | %F:%L | %M%n%n
log4j.appender.A1=org.apache.log4j.RollingFileAppender log4j.appender.A1.BufferedIO=true log4j.appender.A1.BufferSize=8192 log4j.appender.A1.File=${spark.yarn.app.container.log.dir}/stderr log4j.appender.A1.MaxFileSize=10MB log4j.appender.A1.MaxBackupIndex=9 log4j.appender.A1.encoding=UTF-8 log4j.appender.A1.Append=true log4j.appender.A1.Threshold=ERROR log4j.appender.A1.layout=org.apache.log4j.PatternLayout log4j.appender.A1.layout.ConversionPattern=[%d{yyyy-MM-dd HH:mm:ss,SSS}] %m %n[%d{yyyy-MM-dd HH:mm:ss,SSS}] %p | %F:%L | %M%n%n
|
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32
| #!/bin/bash
echo "================= spark job:CbDimStreamDriver start!!!========================"
spark-submit \ --master yarn \ --deploy-mode cluster \ --name stream-rm-cb-dim \ --queue bf_yarn_pool.production \ --class com.baofu.rm.streaming.CbDimStreamDriver \ --num-executors 32 \ --driver-memory 3G \ --executor-memory 4G \ --executor-cores 1 \ --conf spark.dynamicAllocation.enabled=false \ --conf spark.executor.extraJavaOptions=-XX:+UseConcMarkSweepGC \ --conf spark.streaming.backpressure.enabled=true \ --conf spark.streaming.kafka.maxRatePerPartition=1000 \ --conf spark.eventLog.enabled=false \ --conf spark.driver.extraJavaOptions=-Dlog4j.configuration=file:log4j.properties \ --conf spark.executor.extraJavaOptions=-Dlog4j.configuration=file:log4j.properties \ --files ./log4j.properties \ /home/bf_app_spark/spark-jobs/streams/fxJob/cbdim/rm-streaming-analysis-pro.jar
rc=$? if [[ $rc != 0 ]]; then echo "spark task: $0 failed,please check......" exit 1 fi
echo "end run spark: `date "+%Y-%m-%d %H:%M:%S"`" echo "================== spark job:CbDimStreamDriver end!!!===================="
|
1
| nohup sh ./stream_CbDimStreamDriver.sh > /dev/null 2>&1
|
参考: http://mkuthan.github.io/blog/2016/09/30/spark-streaming-on-yarn/