From 0e8746d895dedf0d8e286d8168d11f67d36cb233 Mon Sep 17 00:00:00 2001 From: Steve Fung Date: Thu, 20 Aug 2015 17:07:50 -0700 Subject: [PATCH] crash_reporter: Fix crash_sender - Remove all the ChromeOS specific logic. - Fix paths to correct Android paths. - Add periodic_scheduler, and add crash_sender to init. Bug: 23231196 Bug: 23233267 Change-Id: I12de28bfbe5d5b08831eda9b28c6d7a669c22290 --- crash_reporter/Android.mk | 10 ++ crash_reporter/crash_collector.cc | 7 +- crash_reporter/crash_sender | 143 ++++++++++---------------- crash_reporter/init.crash_reporter.rc | 11 ++ crash_reporter/periodic_scheduler | 81 +++++++++++++++ 5 files changed, 161 insertions(+), 91 deletions(-) create mode 100755 crash_reporter/periodic_scheduler diff --git a/crash_reporter/Android.mk b/crash_reporter/Android.mk index 6b98af4bd..467432a19 100644 --- a/crash_reporter/Android.mk +++ b/crash_reporter/Android.mk @@ -81,6 +81,7 @@ include $(CLEAR_VARS) LOCAL_MODULE := crash_sender LOCAL_MODULE_CLASS := EXECUTABLES LOCAL_MODULE_PATH := $(TARGET_OUT_EXECUTABLES) +LOCAL_REQUIRED_MODULES := curl periodic_scheduler LOCAL_SRC_FILES := crash_sender include $(BUILD_PREBUILT) @@ -113,6 +114,15 @@ LOCAL_MODULE_PATH := $(PRODUCT_OUT)/system/etc LOCAL_SRC_FILES := crash_reporter_logs.conf include $(BUILD_PREBUILT) +# Periodic Scheduler. +# ======================================================== +include $(CLEAR_VARS) +LOCAL_MODULE := periodic_scheduler +LOCAL_MODULE_CLASS := EXECUTABLES +LOCAL_MODULE_PATH := $(TARGET_OUT_EXECUTABLES) +LOCAL_SRC_FILES := periodic_scheduler +include $(BUILD_PREBUILT) + # Crash reporter tests. # ======================================================== include $(CLEAR_VARS) diff --git a/crash_reporter/crash_collector.cc b/crash_reporter/crash_collector.cc index 77755f470..b81a936cf 100644 --- a/crash_reporter/crash_collector.cc +++ b/crash_reporter/crash_collector.cc @@ -42,12 +42,13 @@ namespace { const char kCollectChromeFile[] = "/mnt/stateful_partition/etc/collect_chrome_crashes"; -const char kCrashTestInProgressPath[] = "/tmp/crash-test-in-progress"; +const char kCrashTestInProgressPath[] = + "/data/misc/crash_reporter/tmp/crash-test-in-progress"; const char kDefaultLogConfig[] = "/etc/crash_reporter_logs.conf"; const char kDefaultUserName[] = "chronos"; -const char kLeaveCoreFile[] = "/root/.leave_core"; +const char kLeaveCoreFile[] = "/data/misc/crash_reporter/.leave_core"; const char kLsbRelease[] = "/etc/lsb-release"; -const char kShellPath[] = "/bin/sh"; +const char kShellPath[] = "/system/bin/sh"; const char kSystemCrashPath[] = "/data/misc/crash_reporter/crash"; const char kUploadVarPrefix[] = "upload_var_"; const char kUploadFilePrefix[] = "upload_file_"; diff --git a/crash_reporter/crash_sender b/crash_reporter/crash_sender index fa2f8fc71..7f9062afb 100755 --- a/crash_reporter/crash_sender +++ b/crash_reporter/crash_sender @@ -1,4 +1,4 @@ -#!/bin/sh +#!/system/bin/sh # Copyright (C) 2010 The Android Open Source Project # @@ -17,20 +17,20 @@ set -e # Default product ID in crash report (used if GOOGLE_CRASH_* is undefined). -CHROMEOS_PRODUCT=ChromeOS +BRILLO_PRODUCT=Brillo + +# Base directory that contains any crash reporter state files. +CRASH_STATE_DIR="/data/misc/crash_reporter" # File whose existence implies crash reports may be sent, and whose # contents includes our machine's anonymized guid. -CONSENT_ID="/home/chronos/Consent To Send Stats" +CONSENT_ID="/data/misc/metrics/enabled" # Crash sender lock in case the sender is already running. -CRASH_SENDER_LOCK="/var/lock/crash_sender" +CRASH_SENDER_LOCK="${CRASH_STATE_DIR}/lock/crash_sender" # Path to file that indicates a crash test is currently running. -CRASH_TEST_IN_PROGRESS_FILE="/tmp/crash-test-in-progress" - -# Path to find which is required for computing the crash rate. -FIND="/usr/bin/find" +CRASH_TEST_IN_PROGRESS_FILE="${CRASH_STATE_DIR}/tmp/crash-test-in-progress" # Set this to 1 in the environment to allow uploading crash reports # for unofficial versions. @@ -40,20 +40,17 @@ FORCE_OFFICIAL=${FORCE_OFFICIAL:-0} HWCLASS_PATH="/sys/devices/platform/chromeos_acpi/HWID" # Path to file that indicates this is a developer image. -LEAVE_CORE_FILE="/root/.leave_core" +LEAVE_CORE_FILE="${CRASH_STATE_DIR}/.leave_core" # Path to list_proxies. -LIST_PROXIES="/usr/bin/list_proxies" +LIST_PROXIES="list_proxies" # Maximum crashes to send per day. MAX_CRASH_RATE=${MAX_CRASH_RATE:-32} -# Path to metrics_client. -METRICS_CLIENT="/usr/bin/metrics_client" - # File whose existence mocks crash sending. If empty we pretend the # crash sending was successful, otherwise unsuccessful. -MOCK_CRASH_SENDING="/tmp/mock-crash-sending" +MOCK_CRASH_SENDING="${CRASH_STATE_DIR}/tmp/mock-crash-sending" # Set this to 1 in the environment to pretend to have booted in developer # mode. This is used by autotests. @@ -64,40 +61,39 @@ OVERRIDE_PAUSE_SENDING=${OVERRIDE_PAUSE_SENDING:-0} # File whose existence causes crash sending to be delayed (for testing). # Must be stateful to enable testing kernel crashes. -PAUSE_CRASH_SENDING="/var/lib/crash_sender_paused" +PAUSE_CRASH_SENDING="${CRASH_STATE_DIR}/lock/crash_sender_paused" # URL to send official build crash reports to. REPORT_UPLOAD_PROD_URL="https://clients2.google.com/cr/report" # Path to a directory of restricted certificates which includes # a certificate for ${REPORT_UPLOAD_PROD_URL}. -RESTRICTED_CERTIFICATES_PATH="/usr/share/chromeos-ca-certificates" +RESTRICTED_CERTIFICATES_PATH="/system/etc/security/cacerts" # File whose existence implies we're running and not to start again. -RUN_FILE="/var/run/crash_sender.pid" +RUN_FILE="${CRASH_STATE_DIR}/run/crash_sender.pid" # Maximum time to sleep between sends. SECONDS_SEND_SPREAD=${SECONDS_SEND_SPREAD:-600} # Set this to 1 to allow uploading of device coredumps. -DEVCOREDUMP_UPLOAD_FLAG_FILE=\ -"/var/lib/crash_reporter/device_coredump_upload_allowed" +DEVCOREDUMP_UPLOAD_FLAG_FILE="${CRASH_STATE_DIR}/device_coredump_upload_allowed" # The syslog tag for all logging we emit. TAG="$(basename $0)[$$]" # Directory to store timestamp files indicating the uploads in the past 24 # hours. -TIMESTAMPS_DIR="/var/lib/crash_sender" +TIMESTAMPS_DIR="${CRASH_STATE_DIR}/crash_sender" # Temp directory for this process. TMP_DIR="" -# Chrome's crash report log file. -CHROME_CRASH_LOG="/var/log/chrome/Crash Reports/uploads.log" +# Crash report log file. +CRASH_LOG="${CRASH_STATE_DIR}/log/uploads.log" lecho() { - logger -t "${TAG}" "$@" + log -t "${TAG}" "$@" } # Returns true if mock is enabled. @@ -117,6 +113,9 @@ cleanup() { rm -rf "${TMP_DIR}" fi rm -f "${RUN_FILE}" + if [ -n "${CRASH_SENDER_LOCK}" ]; then + rm -rf "${CRASH_SENDER_LOCK}" + fi crash_done } @@ -130,7 +129,7 @@ crash_done() { is_official_image() { [ ${FORCE_OFFICIAL} -ne 0 ] && return 0 - grep ^CHROMEOS_RELEASE_DESCRIPTION /etc/lsb-release | grep -q Official + getprop ro.product.description | grep -q Official } # Returns 0 if the a crash test is currently running. NOTE: Mirrors @@ -167,7 +166,11 @@ is_developer_mode() { # If we're testing crash reporter itself, we don't want to special-case # for developer mode. is_crash_test_in_progress && return 1 - crossystem "devsw_boot?1" # exit status will be accurate + if [ "$(getprop ro.build.type)" = "eng" ]; then + return 0 + else + return 1 + fi } # Return 0 if the uploading of device coredumps is allowed. @@ -188,7 +191,7 @@ generate_uniform_random() { check_rate() { mkdir -p ${TIMESTAMPS_DIR} # Only consider minidumps written in the past 24 hours by removing all older. - ${FIND} "${TIMESTAMPS_DIR}" -mindepth 1 -mmin +$((24 * 60)) \ + find "${TIMESTAMPS_DIR}" -mindepth 1 -mtime +1 \ -exec rm -- '{}' ';' local sends_in_24hrs=$(echo "${TIMESTAMPS_DIR}"/* | wc -w) lecho "Current send rate: ${sends_in_24hrs}sends/24hrs" @@ -198,7 +201,7 @@ check_rate() { "max ${MAX_CRASH_RATE}send/24hrs" return 1 fi - mktemp "${TIMESTAMPS_DIR}"/XXXX > /dev/null + mktemp "${TIMESTAMPS_DIR}"/XXXXXX > /dev/null return 0 } @@ -252,27 +255,18 @@ get_key_value() { get_keys() { local file="$1" regex="$2" - awk -F'[[:space:]=]' -vregex="${regex}" \ - 'match($1, regex) { print $1 }' "${file}" -} - -# Return the board name. -get_board() { - get_key_value "/etc/lsb-release" "CHROMEOS_RELEASE_BOARD" + cut -d '=' -f1 "${file}" | grep --color=never "${regex}" } # Return the channel name (sans "-channel" suffix). get_channel() { - get_key_value "/etc/lsb-release" "CHROMEOS_RELEASE_TRACK" | - sed 's:-channel$::' + getprop ro.product.channel | sed 's:-channel$::' } # Return the hardware class or "undefined". get_hardware_class() { if [ -r "${HWCLASS_PATH}" ]; then cat "${HWCLASS_PATH}" - elif crossystem hwid > /dev/null 2>&1; then - echo "$(crossystem hwid)" else echo "undefined" fi @@ -284,13 +278,12 @@ send_crash() { local kind="$(get_kind "${meta_path}")" local exec_name="$(get_key_value "${meta_path}" "exec_name")" local url="${REPORT_UPLOAD_PROD_URL}" - local chromeos_version="$(get_key_value "${meta_path}" "ver")" - local board="$(get_board)" + local brillo_version="$(get_key_value "${meta_path}" "ver")" local hwclass="$(get_hardware_class)" local write_payload_size="$(get_key_value "${meta_path}" "payload_size")" local log="$(get_key_value "${meta_path}" "log")" local sig="$(get_key_value "${meta_path}" "sig")" - local send_payload_size="$(stat --printf=%s "${report_payload}" 2>/dev/null)" + local send_payload_size="$(stat -c "%s" "${report_payload}" 2>/dev/null)" local product="$(get_key_value "${meta_path}" "upload_var_prod")" local version="$(get_key_value "${meta_path}" "upload_var_ver")" local upload_prefix="$(get_key_value "${meta_path}" "upload_prefix")" @@ -358,10 +351,10 @@ send_crash() { # If ID or VERSION_ID is undefined, we use the default product name # and CHROMEOS_RELEASE_VERSION from /etc/lsb-release. if [ "${product}" = "undefined" ]; then - product="${CHROMEOS_PRODUCT}" + product="${BRILLO_PRODUCT}" fi if [ "${version}" = "undefined" ]; then - version="${chromeos_version}" + version="${brillo_version}" fi local image_type @@ -376,11 +369,7 @@ send_crash() { fi local boot_mode - if ! crossystem "cros_debug" > /dev/null 2>&1; then - # Sanity-check failed that makes sure crossystem exists. - lecho "Cannot determine boot mode due to error running crossystem command" - boot_mode="missing-crossystem" - elif is_developer_mode; then + if is_developer_mode; then boot_mode="dev" fi @@ -392,7 +381,7 @@ send_crash() { [ "${error_type}" = "undefined" ] && error_type= lecho "Sending crash:" - if [ "${product}" != "${CHROMEOS_PRODUCT}" ]; then + if [ "${product}" != "${BRILLO_PRODUCT}" ]; then lecho " Sending crash report on behalf of ${product}" fi lecho " Metadata: ${meta_path} (${kind})" @@ -403,7 +392,6 @@ send_crash() { if is_mock; then lecho " Product: ${product}" lecho " URL: ${url}" - lecho " Board: ${board}" lecho " HWClass: ${hwclass}" lecho " write_payload_size: ${write_payload_size}" lecho " send_payload_size: ${send_payload_size}" @@ -451,7 +439,6 @@ send_crash() { --capath "${RESTRICTED_CERTIFICATES_PATH}" --ciphers HIGH \ -F "prod=${product}" \ -F "ver=${version}" \ - -F "board=${board}" \ -F "hwclass=${hwclass}" \ -F "exec_name=${exec_name}" \ ${image_type:+-F "image_type=${image_type}"} \ @@ -477,15 +464,11 @@ send_crash() { fi ;; *) - if is_official_image; then - product_name="ChromeOS" - else - product_name="ChromiumOS" - fi + product_name="Brillo" ;; esac printf '%s,%s,%s\n' \ - "${timestamp}" "${id}" "${product_name}" >> "${CHROME_CRASH_LOG}" + "${timestamp}" "${id}" "${product_name}" >> "${CRASH_LOG}" lecho "Crash report receipt ID ${id}" else lecho "Crash sending failed with exit code ${curl_result}: " \ @@ -512,6 +495,7 @@ remove_report() { # 3G connection (see crosbug.com/3304 for discussion). send_crashes() { local dir="$1" + lecho "Sending crashes for ${dir}" if [ ! -d "${dir}" ]; then return @@ -519,8 +503,8 @@ send_crashes() { # Consider any old files which still have no corresponding meta file # as orphaned, and remove them. - for old_file in $(${FIND} "${dir}" -mindepth 1 \ - -mmin +$((24 * 60)) -type f); do + for old_file in $(find "${dir}" -mindepth 1 \ + -mtime +1 -type f); do if [ ! -e "$(get_base "${old_file}").meta" ]; then lecho "Removing old orphaned file: ${old_file}." rm -f -- "${old_file}" @@ -548,8 +532,8 @@ send_crashes() { if ! is_complete_metadata "${meta_path}"; then # This report is incomplete, so if it's old, just remove it. - local old_meta=$(${FIND} "${dir}" -mindepth 1 -name \ - $(basename "${meta_path}") -mmin +$((24 * 60)) -type f) + local old_meta=$(find "${dir}" -mindepth 1 -name \ + $(basename "${meta_path}") -mtime +1 -type f) if [ -n "${old_meta}" ]; then lecho "Removing old incomplete metadata." remove_report "${meta_path}" @@ -571,19 +555,10 @@ send_crashes() { continue fi - # Don't send crash reports from previous sessions while we're in guest mode - # to avoid the impression that crash reporting was enabled, which it isn't. - # (Don't exit right now because subsequent reports may be candidates for - # deletion.) - if ${METRICS_CLIENT} -g; then - lecho "Guest mode has been entered. Delaying crash sending until exited." - continue - fi - # Remove existing crashes in case user consent has not (yet) been given or # has been revoked. This must come after the guest mode check because - # ${METRICS_CLIENT} always returns "not consented" in guest mode. - if ! ${METRICS_CLIENT} -c; then + # metrics_client always returns "not consented" in guest mode. + if ! metrics_client -c; then lecho "Crash reporting is disabled. Removing crash." remove_report "${meta_path}" continue @@ -602,7 +577,7 @@ send_crashes() { # reports is spread out randomly by up to SECONDS_SEND_SPREAD. Thus, for # the sleep call the greater of the two delays is used. local now=$(date +%s) - local holdoff_time=$(($(stat --format=%Y "${meta_path}") + 30 - ${now})) + local holdoff_time=$(($(stat -c "%Y" "${meta_path}") + 30 - ${now})) local spread_time=$(generate_uniform_random "${SECONDS_SEND_SPREAD}") local sleep_time if [ ${spread_time} -gt ${holdoff_time} ]; then @@ -673,8 +648,6 @@ parseargs() { } main() { - trap cleanup EXIT INT TERM - parseargs "$@" if [ -e "${PAUSE_CRASH_SENDING}" ] && \ @@ -693,31 +666,25 @@ main() { # (like with autotests) that we're still running. echo $$ > "${RUN_FILE}" - for dependency in "${FIND}" "${METRICS_CLIENT}" \ - "${RESTRICTED_CERTIFICATES_PATH}"; do + for dependency in "${RESTRICTED_CERTIFICATES_PATH}"; do if [ ! -x "${dependency}" ]; then lecho "Fatal: Crash sending disabled: ${dependency} not found." exit 1 fi done - TMP_DIR="$(mktemp -d /tmp/crash_sender.XXXXXX)" + TMP_DIR="$(mktemp -d "${CRASH_STATE_DIR}/tmp/crash_sender.XXXXXX")" # Send system-wide crashes - send_crashes "/var/spool/crash" - - # Send user-specific crashes - local d - for d in /home/chronos/crash /home/chronos/u-*/crash; do - send_crashes "${d}" - done + send_crashes "${CRASH_STATE_DIR}/crash" } -( -if ! flock -n 9; then +trap cleanup EXIT INT TERM + +#TODO(http://b/23937249): Change the locking logic back to using flock. +if ! mkdir "${CRASH_SENDER_LOCK}" 2>/dev/null; then lecho "Already running; quitting." crash_done exit 1 fi main "$@" -) 9>"${CRASH_SENDER_LOCK}" diff --git a/crash_reporter/init.crash_reporter.rc b/crash_reporter/init.crash_reporter.rc index 6882b771d..db9bb6fa1 100644 --- a/crash_reporter/init.crash_reporter.rc +++ b/crash_reporter/init.crash_reporter.rc @@ -10,9 +10,20 @@ on boot # number to prevent infinitely recursing on crash handling. write /proc/sys/kernel/core_pipe_limit 4 + # Remove any previous orphaned locks. + rmdir /data/misc/crash_reporter/lock/crash_sender + # Create crash directories. mkdir /data/misc/crash_reporter 0700 root root + mkdir /data/misc/crash_reporter/lock 0700 root root + mkdir /data/misc/crash_reporter/log 0700 root root + mkdir /data/misc/crash_reporter/run 0700 root root + mkdir /data/misc/crash_reporter/tmp 0700 root root service crash_reporter /system/bin/crash_reporter --init class late_start oneshot + +service crash_sender /system/bin/periodic_scheduler 3600 14400 crash_sender \ + /system/bin/crash_sender + class late_start diff --git a/crash_reporter/periodic_scheduler b/crash_reporter/periodic_scheduler new file mode 100755 index 000000000..7fdb5c9d0 --- /dev/null +++ b/crash_reporter/periodic_scheduler @@ -0,0 +1,81 @@ +#!/system/bin/sh + +# Copyright (C) 2014 The Android Open Source Project +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +# Run tasks periodically. +# Usage: $0 +# +# Executes task by running every . + +set -e -u + +SCRIPT_NAME="$(basename "$0")" +#CHECK_DELAY=300 # Check every 5 minutes. +CHECK_DELAY=15 # Check every 5 minutes. +KILL_DELAY=10 # How long to let the job clean up after a timeout. +# Let the unittests override. +: ${SPOOL_DIR:=/data/misc/crash_reporter/spool/cron-lite} + +loginfo() { + log -p i -t "${SCRIPT_NAME}" "$@" +} + +trap "loginfo 'exiting'" EXIT + +check_and_fix_spool_paths() { + # Avoid weird spool paths if possible. + rm -f "$(dirname "${SPOOL_DIR}")" "${SPOOL_DIR}" 2>/dev/null || : + mkdir -p "${SPOOL_DIR}" + if [ ! -O "${SPOOL_DIR}" -o ! -d "${SPOOL_DIR}" ]; then + loginfo "Spool directory is damaged. Aborting!" + exit 1 + fi +} + +main() { + local delay="$1" + local timeout="$2" + local name="$3" + local spool_file="${SPOOL_DIR}/${name}" + shift 3 + + [ -z "${delay}" ] && exit 1 + [ -z "${timeout}" ] && exit 1 + [ -z "${name}" ] && exit 1 + [ $# -eq 0 ] && exit 1 + check_and_fix_spool_paths + + while true; do + # Allow the sleep to be killed manually without terminating the handler. + # Send stderr to /dev/null to suppress the shell's "Terminated" message. + sleep $(( CHECK_DELAY + KILL_DELAY )) 2>/dev/null || true + + [ ! -e "${spool_file}" ] && touch "${spool_file}" + + local last_rotation="$(stat -c "%Y" "${spool_file}" 2>/dev/null || echo 0)" + local now="$(date +%s)" + local time_diff=$((now - last_rotation)) + + if [ ${time_diff} -gt ${delay} ]; then + rm "${spool_file}" || true + touch "${spool_file}" + loginfo "${name}: running $*" + timeout -k ${KILL_DELAY} ${timeout} "$@" || true + loginfo "${name}: job completed" + fi + done +} + +main "$@"