分享知识,分享快乐

0%

解决spark streaming长时间运行日志不断增长问题

解决spark streaming长时间运行日志不断增长问题

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
log4j.rootLogger=WARN,stdout,A1

log4j.appender.stdout=org.apache.log4j.ConsoleAppender
log4j.appender.stdout.Threshold=WARN
log4j.appender.stdout.encoding=UTF-8
log4j.appender.stdout.layout=org.apache.log4j.PatternLayout
log4j.appender.stdout.layout.ConversionPattern=[%d{yyyy-MM-dd HH:mm:ss,SSS}] %m %n[%d{yyyy-MM-dd HH:mm:ss,SSS}] %p | %F:%L | %M%n%n

log4j.appender.A1=org.apache.log4j.RollingFileAppender
log4j.appender.A1.BufferedIO=true
log4j.appender.A1.BufferSize=8192
log4j.appender.A1.File=${spark.yarn.app.container.log.dir}/stderr
log4j.appender.A1.MaxFileSize=10MB
log4j.appender.A1.MaxBackupIndex=9
log4j.appender.A1.encoding=UTF-8
log4j.appender.A1.Append=true
log4j.appender.A1.Threshold=ERROR
log4j.appender.A1.layout=org.apache.log4j.PatternLayout
log4j.appender.A1.layout.ConversionPattern=[%d{yyyy-MM-dd HH:mm:ss,SSS}] %m %n[%d{yyyy-MM-dd HH:mm:ss,SSS}] %p | %F:%L | %M%n%n
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
#!/bin/bash

echo "================= spark job:CbDimStreamDriver start!!!========================"

spark-submit \
--master yarn \
--deploy-mode cluster \
--name stream-rm-cb-dim \
--queue bf_yarn_pool.production \
--class com.baofu.rm.streaming.CbDimStreamDriver \
--num-executors 32 \
--driver-memory 3G \
--executor-memory 4G \
--executor-cores 1 \
--conf spark.dynamicAllocation.enabled=false \
--conf spark.executor.extraJavaOptions=-XX:+UseConcMarkSweepGC \
--conf spark.streaming.backpressure.enabled=true \
--conf spark.streaming.kafka.maxRatePerPartition=1000 \
--conf spark.eventLog.enabled=false \
--conf spark.driver.extraJavaOptions=-Dlog4j.configuration=file:log4j.properties \
--conf spark.executor.extraJavaOptions=-Dlog4j.configuration=file:log4j.properties \
--files ./log4j.properties \
/home/bf_app_spark/spark-jobs/streams/fxJob/cbdim/rm-streaming-analysis-pro.jar

rc=$?
if [[ $rc != 0 ]]; then
echo "spark task: $0 failed,please check......"
exit 1
fi

echo "end run spark: `date "+%Y-%m-%d %H:%M:%S"`"
echo "================== spark job:CbDimStreamDriver end!!!===================="
1
nohup sh ./stream_CbDimStreamDriver.sh > /dev/null 2>&1

参考: http://mkuthan.github.io/blog/2016/09/30/spark-streaming-on-yarn/