#!/usr/bin/bash
#
# Copyright (C) 2021-2026 Red Hat, Inc.
#
# Licensed under the Apache License, Version 2.0 (the "License"); you may
# not use this file except in compliance with the License. You may obtain
# a copy of the License at
#
#      http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
# License for the specific language governing permissions and limitations
# under the License.

# Description: A shell script that collects logs from an OCP cluster
# It requires access to the a valid KUBECONFIG env variable and a log directory

set -x

oc_path=$(which oc)

function usage(){
  printf "Usage: %s <log_dir>\n" "$(basename "$0")"
  printf "A log directory is required.\n"
}

if [[ $# != 1 ]]; then
  printf "Error: missing log directory argument.\n" >&2
  usage
  exit 1
fi

log_dir="${1}"

# Get the output of various oc commands
${oc_path} version > "${log_dir}/version.txt"
${oc_path} get clusterversion/version > "${log_dir}/clusterversion.txt"
${oc_path} get clusteroperator > "${log_dir}/clusteroperator.txt"
${oc_path} get subs -A -o json | jq '[.items[] | {operator_name: .spec.name, version: .status.currentCSV, catalog: .spec.source }]' > "${log_dir}/operators.json"
${oc_path} get network cluster -o yaml > "${log_dir}/clusternetwork.yaml"
${oc_path} get nodes -o wide > "${log_dir}/nodes.txt"
${oc_path} get nodes -o yaml > "${log_dir}/all-nodes.yaml"
${oc_path} get pods --all-namespaces -o wide > "${log_dir}/pods.txt"

# Get events with created object time
${oc_path} get events --all-namespaces --sort-by='.metadata.creationTimestamp' -o json |
  jq -r '
    "NAMESPACE\tCREATED\tLAST SEEN\tTYPE\tREASON\tOBJECT\tMESSAGE",
    (.items[] | [
      (.involvedObject.namespace | tostring),
      (.metadata.creationTimestamp | tostring),
      (.lastTimestamp | tostring),
      (.type | tostring),
      (.reason | tostring),
      ((.involvedObject.kind // "") + "/" + (.involvedObject.name // "")),
      (.message | tostring | gsub("\n"; ";"))
    ] | @tsv)' |
  column -t -s $'\t' \
  > "${log_dir}/events.txt"

MUST_GATHER_OPTS=""
MUST_GATHER_ORIG_OPTS=""
any_mirror_used=false

if [ "$DO_MUST_GATHER" = true ]; then
  # Get image sources if available
  # Only rewrite registry if there is an IDMS/ICSP entry whose source
  # matches the image we want to pull (e.g. registry.redhat.io/openshift4).
  # Blindly using the first IDMS entry breaks connected clusters that have
  # unrelated IDMS entries (e.g. for OADP operators).
  for image in ${DCI_MUST_GATHER_IMAGES:-"registry.redhat.io/openshift4/ose-must-gather"}
  do
    MUST_GATHER_ORIG_OPTS="${MUST_GATHER_ORIG_OPTS} --image=${image}"
    # Extract the source registry+namespace (e.g. "registry.redhat.io/openshift4")
    image_source="${image%/*}"
    mirror=$(\
        ${oc_path} get imagedigestmirrorsets -o json 2>/dev/null |
          jq -r --arg source "${image_source}" \
            '[.items[].spec.imageDigestMirrors[] |
             select(.source == $source) |
             .mirrors[0]] | first // ""'
    )
    if [[ -z "${mirror}" ]]; then
      mirror=$(\
          ${oc_path} get imagecontentsourcepolicy -o json 2>/dev/null |
            jq -r --arg source "${image_source}" \
              '[.items[].spec.repositoryDigestMirrors[] |
               select(.source == $source) |
               .mirrors[0]] | first // ""'
      )
    fi
    if [[ -n "${mirror}" ]]; then
      image="${mirror}/${image##*/}"
      any_mirror_used=true
    fi
    MUST_GATHER_OPTS="${MUST_GATHER_OPTS} --image=${image}"
  done

  must_gather_dir="${log_dir}/must_gather"
  mkdir -p "${must_gather_dir}"

  if ! ${oc_path} adm must-gather \
    --dest-dir="${must_gather_dir}" \
    ${MUST_GATHER_OPTS} \
    >/dev/null; then
    if ${any_mirror_used}; then
      echo "Mirror-based must-gather failed, falling back to original images..."
      rm -Rf "${must_gather_dir}"
      mkdir -p "${must_gather_dir}"
      ${oc_path} adm must-gather \
        --dest-dir="${must_gather_dir}" \
        ${MUST_GATHER_ORIG_OPTS} \
        >/dev/null
    fi
  fi

  # Trim log files to only keep entries since the job start time
  if [ -n "${SINCE_TIME}" ]; then
    # Strip sub-second precision and timezone to get a comparable prefix (YYYY-MM-DDTHH:MM:SS)
    cutoff="${SINCE_TIME%%.*}"
    cutoff="${cutoff%%Z}"
    # Extract "MM DD HH:MM:SS" for syslog-format logs
    cutoff_syslog="${cutoff:5:2} ${cutoff:8:2} ${cutoff#*T}"

    # Filter log files by timestamp
    find "${must_gather_dir}" -name '*.log' -type f | while read -r logfile; do
      first_line=$(head -1 "$logfile")
      if [[ "$first_line" =~ ^[0-9]{4}-[0-9]{2}-[0-9]{2}T ]]; then
        # RFC3339 format (container logs)
        awk -v cutoff="$cutoff" '
          substr($0, 1, 19) >= cutoff { found=1 }
          found { print }
        ' "$logfile" > "${logfile}.tmp" && mv "${logfile}.tmp" "$logfile"
      elif [[ "$first_line" =~ ^[A-Z][a-z]{2}\ [0-9]{2}\ [0-9]{2}: ]]; then
        # Syslog format: "Jun 04 16:18:22.772978 hostname ..."
        awk -v cutoff="$cutoff_syslog" '
          BEGIN {
            split("Jan Feb Mar Apr May Jun Jul Aug Sep Oct Nov Dec", m)
            for (i in m) mon[m[i]] = sprintf("%02d", i)
          }
          {
            ts = mon[$1] " " $2 " " substr($3, 1, 8)
            if (ts >= cutoff) print
          }
        ' "$logfile" > "${logfile}.tmp" && mv "${logfile}.tmp" "$logfile"
      fi
    done

    # Handle rotated container logs (filename pattern: N.log.YYYYMMDD-HHMMSS[.gz])
    # The timestamp is when the file stopped receiving entries.
    cutoff_rotated="${cutoff:0:4}${cutoff:5:2}${cutoff:8:2}-${cutoff:11:2}${cutoff:14:2}${cutoff:17:2}"
    find "${must_gather_dir}" -path '*/logs/rotated/*' -type f | while read -r rotated; do
      rot_ts=$(basename "$rotated" | grep -oE '[0-9]{8}-[0-9]{6}')
      if [ -n "$rot_ts" ] && [ "$rot_ts" '<' "$cutoff_rotated" ]; then
        rm -f "$rotated"
      elif [ -n "$rot_ts" ] && [[ "$rotated" != *.gz ]]; then
        awk -v cutoff="$cutoff" '
          substr($0, 1, 19) >= cutoff { found=1 }
          found { print }
        ' "$rotated" > "${rotated}.tmp" && mv "${rotated}.tmp" "$rotated"
      fi
    done
  fi

  # Compress must-gather
  tar \
    --use-compress-program='pigz --best --recursive' \
    --directory "${log_dir}" \
    --create \
    --file "${must_gather_dir}.tar.gz" must_gather/

  rm -Rf "${must_gather_dir}"
fi
