#! /usr/bin/python3
# -*- coding: utf-8 -*-
#
# Copyright 2005 Lars Wirzenius (liw@iki.fi)
# Copyright 2009-2019 Holger Levsen (holger@layer-acht.org)
# Copyright © 2011-2018 Andreas Beckmann (anbe@debian.org)
# Copyright 2013 David Steele (dsteele@gmail.com)
#
# This program is free software; you can redistribute it and/or modify it
# under the terms of the GNU General Public License as published by the
# Free Software Foundation; either version 2 of the License, or (at your
# option) any later version.
#
# This program is distributed in the hope that it will be useful, but
# WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General
# Public License for more details.
#
# You should have received a copy of the GNU General Public License along with
# this program. If not, see <https://www.gnu.org/licenses/>


"""Create HTML reports of piuparts log files

Lars Wirzenius <liw@iki.fi>
"""


import fcntl
import hashlib
import logging
import os
import pickle
import random
import re
import shutil
import string
import sys
import time
from collections import deque

import yaml

# if python-rpy2 ain't installed, we don't draw fancy graphs
try:
    from rpy2 import robjects
    from rpy2.robjects.packages import importr
except ImportError:
    pass

from urllib.error import HTTPError, URLError

import piupartslib.conf
import piupartslib.packagesdb
import piupartslib.pkgsummary as pkgsummary
from piupartslib.conf import MissingSection
from piupartslib.dwke import (
    BUG_EXT,
    KPR_EXT,
    LOG_EXT,
    FailureManager,
    clean_cache_files,
    create_problem_list,
    get_file_dict,
    get_pkg,
    make_kprs,
    replace_ext,
)

CONFIG_FILE = "/etc/piuparts/piuparts.conf"
DISTRO_CONFIG_FILE = "/etc/piuparts/distros.conf"
KPR_DIRS = ("pass", "bugged", "affected", "fail")
TPL_EXT = ".tpl"


PIUPARTS_VERSION = "1.6.0"

HTML_HEADER = """<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01 Transitional//EN">
<!-- $content_md5 -->
 <html><head><meta http-equiv="Content-Type" content="text/html; charset=utf-8">
  <!-- Generated by piuparts-report $piuparts_version -->
  <title>
   $page_title
  </title>
  <link type="text/css" rel="stylesheet" href="$doc_root/style.css">
  <link rel="shortcut icon" href="$doc_root/favicon.ico">
 </head>

 <body>
 <div id="header">
   <h1 class="header">
    <a href="https://www.debian.org/">
     <img src="$doc_root/images/openlogo-nd-50.png" border="0" hspace="0" vspace="0" alt=""></a>
    <a href="https://www.debian.org/">
     <img src="$doc_root/images/debian.png" border="0" hspace="0" vspace="0" alt="Debian Project"></a>
    Quality Assurance
   </h1>
   <div id="obeytoyourfriend">Policy is your friend. Trust the Policy. Love the Policy. Obey the Policy.</div>
 </div>
 <hr>
<div id="main">
<table class="containertable">
 <tr class="containerrow" valign="top">
  <td class="containercell">
   <table class="lefttable">
    <tr class="titlerow">
     <td class="titlecell">
      General information
     </td>
    </tr>
    <tr class="normalrow">
     <td class="contentcell">
      <a href="$doc_root/">About</a>
     </td>
    </tr>
    <tr class="normalrow">
     <td class="contentcell">
      <a href="$doc_root/news.html">News</a>
     </td>
    </tr>
    <tr class="normalrow">
     <td class="contentcell">
      <a href="https://wiki.debian.org/piuparts/FAQ" target="_blank">FAQ</a>
     </td>
    </tr>
    <tr class="normalrow">
     <td class="contentcell">
      <a href="mailto:piuparts-devel@alioth-lists.debian.net" target="_blank">Contact us</a>
     </td>
    </tr>
    <tr class="titlerow">
     <td class="titlecell">
      Documentation
     </td>
    </tr>
    <tr class="normalrow">
     <td class="contentcell">
      <a href="$doc_root/bug_howto.html">How to file bugs</a><br />
      using <a href="$doc_root/templates/mail/">templates</a>
     </td>
    </tr>
    <tr class="normalrow">
     <td class="contentcell">
      <a href="https://www.debian.org/doc/debian-policy/" target="_blank">Debian policy</a>
     </td>
    </tr>
    <tr class="normalrow">
     <td class="contentcell">
      piuparts.d.o configuration:<br>
      <a href="https://salsa.debian.org/debian/piuparts/tree/develop/instances" target="_blank">piuparts.conf</a>,<br>
      <a href="https://salsa.debian.org/debian/piuparts/blob/develop/conf/distros.conf"
         target="_blank">distros.conf</a>,<br>
      <a href="https://salsa.debian.org/debian/piuparts/tree/develop/custom-scripts" target="_blank">scripts</a> and
      <a href="https://piuparts.debian.org/logs/" target="_blank">logs</a>
     </td>
    </tr>
    <tr class="normalrow">
     <td class="contentcell">
      <a href="$doc_root/doc/README.html" target="_blank">README</a>
     </td>
    </tr>
    <tr class="normalrow">
     <td class="contentcell">
      <a href="$doc_root/doc/README_server.html" target="_blank">README_server</a>
     </td>
    </tr>
    <tr class="normalrow">
     <td class="contentcell">
      <a href="$doc_root/doc/piuparts.1.html" target="_blank">piuparts manpage</a>
     </td>
    </tr>
    <tr class="titlerow">
     <td class="alerttitlecell">
      Summaries
     </td>
    </tr>
    <tr>
     <td class="contentcell">
      <a href="
         https://bugs.debian.org/cgi-bin/pkgreport.cgi?tag=piuparts;users=debian-qa@lists.debian.org&amp;archive=both"
       target="_blank">Bugs filed</a>
     </td>
    </tr>
    <tr class="normalrow">
     <td class="contentcell">
      <a href="https://piuparts.debian.org/overview.html" target="_blank">Suites overview</a>
     </td>
    </tr>
    $section_navigation
    <tr class="titlerow">
     <td class="titlecell">
      src: piuparts
     </td>
    </tr>
    <tr class="normalrow">
     <td class="contentcell">
      <a href="https://salsa.debian.org/debian/piuparts.git" target="_blank">Source</a>
     </td>
    </tr>
    <tr class="normalrow">
     <td class="contentcell">
      piuparts.d.o <a href="https://bugs.debian.org/src:piuparts.debian.org" target="_blank">bugs</a>
     </td>
    </tr>
    <tr class="normalrow">
     <td class="contentcell">
      piuparts <a href="https://bugs.debian.org/src:piuparts" target="_blank">bugs</a> /
               <a href="https://salsa.debian.org/debian/piuparts/blob/develop/TODO" target="_blank">ToDo</a>
     </td>
    </tr>
    <tr class="titlerow">
     <td class="titlecell">
      Other Debian QA efforts
     </td>
    </tr>
    <tr class="normalrow">
     <td class="contentcell">
      <a href="https://wiki.debian.org/qa.debian.org" target="_blank">Debian QA Group</a>
     </td>
    </tr>
    <tr class="normalrow">
     <td class="contentcell">
      <a href="https://qa.debian.org/dose/" target="_blank">Dose tools (former: EDOS)</a>
     </td>
    </tr>
    <tr class="normalrow">
     <td class="contentcell">
      <a href="https://lintian.debian.org" target="_blank">Lintian</a>
     </td>
    </tr>
    <tr class="normalrow">
     <td class="contentcell">
      <a href="https://tracker.debian.org" target="_blank">Debian Package Tracker</a>
     </td>
    <tr class="normalrow">
     <td class="contentcell">
      <a href="https://udd.debian.org" target="_blank">Ultimate Debian Database</a>
     </td>
    </tr>
    <tr class="normalrow">
     <td class="contentcell">
      <a href="https://jenkins.debian.net" target="_blank">jenkins.debian.net</a>
     </td>
    </tr>
    <tr class="normalrow">
     <td class="contentcell">
      <a href="http://ci.debian.net" target="_blank">ci.debian.net</a>
     </td>
    </tr>
    <tr class="titlerow">
     <td class="titlecell">
      Last update
     </td>
    </tr>
    <tr class="normalrow">
     <td class="lastcell">
      $time
     </td>
    </tr>
   </table>
  </td>
  <td class="containercell">
"""


HTML_FOOTER = """
  </td>
 </tr>
</table>
</div>
 <hr>
 <div id="footer">
  <div>
   <a href="https://tracker.debian.org/pkg/piuparts" target="_blank">piuparts</a> is GPL2
   <a href="https://packages.debian.org/changelogs/pool/main/p/piuparts/current/copyright" target="_blank">licenced</a>
   and was originally written by <a href="mailto:liw@iki.fi">Lars Wirzenius</a> and today is maintained by
   <a href="mailto:anbe@debian.org">Andreas Beckmann</a> and
   <a href="mailto:holger@layer-acht.org">Holger Levsen</a> and

   <a href="mailto:piuparts-devel@alioth-lists.debian.net">others</a> using
   <a href="https://salsa.debian.org/debian/piuparts.git" target="_blank">piuparts.git</a>.
   Ditto for this website.
   Weather icons are from the
   <a href="http://tango.freedesktop.org/Tango_Icon_Library" target="_blank">Tango Icon Library</a>.
   <a href="http://validator.w3.org/check?uri=referer">
    <img border="0" src="$doc_root/images/valid-html401.png"
         alt="Valid HTML 4.01!" height="15" width="80" align="middle">
   </a>
   <a href="http://jigsaw.w3.org/css-validator/check/referer">
    <img border="0" src="$doc_root/images/w3c-valid-css.png"
         alt="Valid CSS!"  height="15" width="80" align="middle">
   </a>
  </div>
 </div>
</body>
</html>
"""


LOG_LIST_BODY_TEMPLATE = """
   <table class="righttable">
    <tr class="titlerow">
     <td class="$title_style" colspan="2">
      $title in $section
     </td>
    </tr>
    <tr class="normalrow">
     <td class="contentcell2" colspan="2">
      $preface
      The list has $count packages.
     </td>
    </tr>
$logrows
   </table>
"""


STATE_BODY_TEMPLATE = """
   <table class="righttable">
    <tr class="titlerow">
     <td class="alerttitlecell">
      Packages in state "$state" in $section $aside
     </td>
    </tr>
    <tr class="normalrow">
     <td class="contentcell2">
      <ul>
$list
      </ul>
     </td>
    </tr>
   </table>
"""


SECTION_INDEX_BODY_TEMPLATE = """
   <table class="righttable">
    <tr class="titlerow">
     <td class="titlecell" colspan="3">
      $section statistics
     </td>
    </tr>
    <tr class="normalrow">
     <td class="contentcell2" colspan="3">
      $description
     </td>
    </tr>
    <tr class="titlerow">
     <td class="alerttitlecell" colspan="3">
      Binary packages per state
     </td>
    </tr>
$tablerows
    <tr class="titlerow">
     <td class="titlecell" colspan="3">
      URL to Packages file
     </td>
    </tr>
     <tr class="normalrow">
     <td class="contentcell2" colspan="3">
      <code>$packagesurl</code>
     </td>
    </tr>
   </table>
"""

MAINTAINER_BODY_TEMPLATE = """
   <table class="righttable">
    <tr class="titlerow">
     <td class="titlecell" colspan="6">
      $maintainer
     </td>
    </tr>
$distrolinks
$rows
   </table>
"""

BASIC_BODY_TEMPLATE = """
   <table class="righttable">
$rows
   </table>
"""

PROB_TPL = """\
<tr class="titlerow"><td class="titlecell">$HEADER in $SECTION, sorted by reverse dependency count.</td></tr>
<tr class="normalrow"><td class="contentcell2">
$HELPTEXT
<p>The commandline to find these logs is: <pre>
COMMAND='$COMMAND'
</pre></p>
</td></tr>
<tr class="titlerow"><td class="alerttitlecell">Please file bugs!</td></tr>
<tr class="normalrow"><td class="contentcell2" colspan="3">
<ul>
$PACKAGE_LIST</ul>
<p>Affected packages in $SECTION: $COUNT</p></td></tr>
"""

PKG_ERROR_TPL = """<li>$RDEPS - <a href=\"$LOG\">$LOG</a>$ARCH
    (<a href=\"https://piuparts.debian.org/$SECTION/source/$SSUBDIR/$SPKG.html\" target=\"_blank\">piuparts.d.o</a>)
    (<a href=\"https://tracker.debian.org/pkg/$SPKG\" target=\"_blank\">tracker.d.o</a>)
    (<a href=\"https://bugs.debian.org/$PACKAGE?dist=unstable\" target=\"_blank\">BTS</a>)
$BUG</li>
"""


title_by_dir = {
    "pass": "PASSED piuparts logs",
    "fail": "Failed UNREPORTED piuparts logs",
    "bugged": "Failed REPORTED piuparts logs",
    "affected": "Failed AFFECTED piuparts logs",
    "reserved": "RESERVED packages",
    "untestable": "UNTESTABLE packages",
}


desc_by_dir = {
    "pass": "Log files for packages that have PASSED testing.",
    "fail": "Log files for packages that have FAILED testing. " + "Bugs have not yet been reported.",
    "bugged": "Log files for packages that have FAILED testing. " + "Bugs have been reported, but not yet fixed.",
    "affected": "Log files for packages that have dependencies FAILED testing. "
    + "Bugs have been reported, but not yet fixed.",
    "reserved": "Packages that are RESERVED for testing on a node in a " + "distributed piuparts network.",
    "untestable": "Log files for packages that have are UNTESTABLE with " + "piuparts at the current time.",
}

state_by_dir = {
    "pass": "successfully-tested",
    "fail": "failed-testing",
    "bugged": "failed-testing",
    "affected": "failed-testing",
    "reserved": "waiting-to-be-tested",
    "untestable": "cannot-be-tested",
}

# better use XX_name.tpl and get the linktarget from the template
# (its a substring of the <title> of the that template
# maintaining this list is errorprone and tiresome
linktarget_by_template = [
    ("initdscript_lsb_header_issue.tpl", "but logfile contains update-rc.d issues"),
    ("command_not_found_issue.tpl", "but logfile contains 'command not found'"),
    (
        "debsums_mismatch_issue.tpl",
        "but logfile contains modified conffiles or other shipped files",
    ),
    (
        "alternatives_after_purge_issue.tpl",
        "but logfile contains forgotten alternatives",
    ),
    (
        "owned_files_after_purge_issue.tpl",
        "but logfile contains owned files existing after purge",
    ),
    (
        "unowned_files_after_purge_issue.tpl",
        "but logfile contains unowned files after purge",
    ),
    ("maintainer_script_issue.tpl", "but logfile contains maintainer script failures"),
    ("db_setup_issue.tpl", "but logfile contains failure to setup a database"),
    (
        "problems_and_no_force_issue.tpl",
        "but logfile reports that not enough force was used",
    ),
    (
        "installs_over_symlink_issue.tpl",
        "but package installs something over existing symlinks",
    ),
    ("broken_symlinks_issue.tpl", "but logfile contains 'broken symlinks'"),
    ("unknown_inadequate_issue.tpl", "but logfile contains unknown inadequate issues"),
    (
        "boring_obsolete_conffile_file_inadequate_issue.tpl",
        "...and logfile contains tag from adequate 'obsolete-conffile-file'",
    ),
    (
        "boring_broken_symlink_file_inadequate_issue.tpl",
        "...and logfile contains tag from adequate 'broken-symlink-file'",
    ),
    (
        "bin_or_sbin_binary_requires_usr_lib_library_inadequate_issue.tpl",
        "but adequate found a binary in /bin or /sbin that requires a /usr/lib library",
    ),
    (
        "incompatible_licenses_inadequate_issue.tpl",
        "but adequate found a license incompatibility",
    ),
    (
        "broken_binfmt_detector_inadequate_issue.tpl",
        "but adequate did not find the detector registered with update_binfmts",
    ),
    (
        "broken_binfmt_interpreter_inadequate_issue.tpl",
        "but adequate did not find the interpreter registered with update_binfmts",
    ),
    (
        "missing_alternative_inadequate_issue.tpl",
        "but adequate found a missing alternative",
    ),
    (
        "missing_copyright_file_inadequate_issue.tpl",
        "but adequate couldn't find a copyright file",
    ),
    (
        "missing_pkgconfig-dependency_issue.tpl",
        "but adequate found a missing pkgconfig dependency",
    ),
    (
        "program_name_collision_inadequate_issue.tpl",
        "but adequate found a program name collision",
    ),
    (
        "py_file_not_bytecompiled_inadequate_issue.tpl",
        "but adequate found a .py file that is not byte-compiled",
    ),
    (
        "pyshared_file_not_bytecompiled_inadequate_issue.tpl",
        "but adequate found a .py file in /usr/share/pyshared that is not byte-compiled",
    ),
    (
        "ldd_inadequate_issue.tpl",
        "but adequate failed to run ldd on a binary or library",
    ),
    (
        "library_not_found_inadequate_issue.tpl",
        "but adequate couldn't find a required library",
    ),
    ("undefined_symbol_inadequate_issue.tpl", "but adequate found an undefined symbol"),
    (
        "symbol-size-mismatch_inadequate_issue.tpl",
        "but adequate found that a symbol has changed size since the package was built",
    ),
    (
        "missing-symbol-version-information_inadequate_issue.tpl",
        "but adequate found that a library is missing symbol version information",
    ),
    ("unknown_inadequate_issue.tpl", "but an unknown adequate issue was found"),
    ("inadequate_exit_issue.tpl", "but adequate exited inadequately"),
    (
        "packages_have_been_kept_back_issue.tpl",
        "but logfile contains 'packages have been kept back'",
    ),
    ("needs_rebuild_issue.tpl", "but logfile recommends to rebuild some packages"),
    ("module_build_error_issue.tpl", "but logfile contains dkms module build failures"),
    ("obsolete_conffiles_issue.tpl", "but logfile reports obsolete conffiles"),
    ("missing_md5sums_issue.tpl", "but logfile reports missing md5sums"),
    ("unowned_lib_symlink_issue.tpl", "but logfile reports unowned lib symlinks"),
    (
        "piuparts-depends-dummy_issue.tpl",
        "but logfile reports piuparts-depends-dummy.deb could not be installed",
    ),
    ("used_exception_issue.tpl", "but package used a piuparts exception"),
    ("test_was_skipped_issue.tpl", "but package test was skipped"),
    ("dependency_error.tpl", "due to unsatisfied dependencies"),
    (
        "packages_have_been_kept_back_error.tpl",
        "...and logfile also contains 'packages have been kept back'",
    ),
    ("command_not_found_error.tpl", "due to a 'command not found' error"),
    ("files_in_usr_local_error.tpl", "due to files in /usr/local"),
    (
        "overwrite_other_packages_files_error.tpl",
        "due to overwriting other packages files",
    ),
    ("debsums_mismatch_error.tpl", "due to modifying conffiles or other shipped files"),
    ("alternatives_after_purge_error.tpl", "due to forgotten alternatives after purge"),
    ("owned_files_by_many_packages_error.tpl", "due to owned files by many packages"),
    ("owned_files_after_purge_error.tpl", "due to owned files existing after purge"),
    ("unowned_files_after_purge_error.tpl", "due to unowned files after purge"),
    (
        "modified_files_after_purge_error.tpl",
        "due to files having been modified after purge",
    ),
    (
        "disappeared_files_after_purge_error.tpl",
        "due to files having disappeared after purge",
    ),
    ("diversion_error.tpl", "due to diversions being modified after purge"),
    ("processes_running_error.tpl", "due to leaving processes running behind"),
    ("resource_violation_error.tpl", "due to resource violation"),
    ("conffile_prompt_error.tpl", "due to prompting due to modified conffiles"),
    ("db_setup_error.tpl", "due to failing to setup a database"),
    ("insserv_error.tpl", "due to a problem with insserv"),
    ("problems_and_no_force_error.tpl", "due to not enough force being used"),
    ("trigger_cycle_error.tpl", "due to dpkg encountered trigger problems"),
    (
        "immediate_configuration_error.tpl",
        "due to apt could not perform immediate configuration",
    ),
    ("pre_depends_error.tpl", "due to a problem with pre-depends"),
    (
        "pre_installation_script_error.tpl",
        "due to pre-installation maintainer script failed",
    ),
    (
        "post_installation_script_error.tpl",
        "due to post-installation maintainer script failed",
    ),
    ("pre_removal_script_error.tpl", "due to pre-removal maintainer script failed"),
    ("post_removal_script_error.tpl", "due to post-removal maintainer script failed"),
    ("unknown_purge_error.tpl", "due to purge failed due to an unknown reason"),
    ("cron_error_after_removal_error.tpl", "due to errors from cronjob after removal"),
    (
        "logrotate_error_after_removal_error.tpl",
        "due to errors from logrotate after removal",
    ),
    (
        "installs_over_symlink_error.tpl",
        "...and package installs something over existing symlinks",
    ),
    ("broken_symlinks_error.tpl", "...and logfile also contains 'broken symlinks'"),
    (
        "module_build_error_error.tpl",
        "...and logfile contains dkms module build failures",
    ),
    ("obsolete_conffiles_error.tpl", "...and logfile reports obsolete conffiles"),
    ("missing_md5sums_error.tpl", "...and logfile reports missing md5sums"),
    ("unowned_lib_symlink_error.tpl", "...and logfile reports unowned lib symlinks"),
    (
        "piuparts-depends-dummy_error.tpl",
        "...and logfile reports piuparts-depends-dummy.deb could not be installed",
    ),
    (
        "file_moved_usr_error,tpl",
        "...and logfile reports a file moved between /{bin|sbin|lib*} and /usr/{bin|sbin|lib*}",
    ),
    (
        "file_moved_usr_issue,tpl",
        "but logfile reports a file moved between /{bin|sbin|lib*} and /usr/{bin|sbin|lib*}",
    ),
    ("unclassified_failures.tpl", "due to unclassified failures"),
]


class Config(piupartslib.conf.Config):
    def __init__(self, section="report", defaults_section=None):
        self.section = section
        piupartslib.conf.Config.__init__(
            self,
            section,
            {
                "sections": "report",
                "output-directory": "html",
                "master-directory": ".",
                "depends-sections": None,
                "description": "",
                "proxy": None,
                "mirror": None,
                "distro": None,
                "area": None,
                "arch": None,
                "upgrade-test-distros": None,
                "max-reserved": 1,
                "doc-root": "/",
                "known-problem-directory": "/usr/share/piuparts/known_problems",
                "exclude-known-problems": None,
                "json-sections": "default",
                "precedence": 1,
                "web-host": "piuparts.debian.org",
            },
            defaults_section=defaults_section,
        )


def setup_logging(log_level, log_file_name):
    logger = logging.getLogger()
    logger.setLevel(log_level)

    formatter = logging.Formatter(fmt="%(asctime)s %(message)s", datefmt="%H:%M:%S")

    handler = logging.StreamHandler(sys.stderr)
    handler.setFormatter(formatter)
    logger.addHandler(handler)

    if log_file_name:
        handler = logging.FileHandler(log_file_name)
        logger.addHandler(handler)


def html_protect(vstr):
    vstr = "&amp;".join(vstr.split("&"))
    vstr = "&lt;".join(vstr.split("<"))
    vstr = "&gt;".join(vstr.split(">"))
    vstr = "&#34;".join(vstr.split('"'))
    vstr = "&#39;".join(vstr.split("'"))
    return vstr


def is_bad_state(state):
    bad_states = [
        # "successfully-tested",
        "failed-testing",
        "cannot-be-tested",
        # "essential-required",  # obsolete
        # "waiting-to-be-tested",
        # "waiting-for-dependency-to-be-tested",
        "dependency-failed-testing",
        "dependency-cannot-be-tested",
        "dependency-does-not-exist",
        "circular-dependency",  # obsolete
        "unknown",
        "unknown-preferred-alternative",  # obsolete
        "no-dependency-from-alternatives-exists",  # obsolete
        "outdated",
        # "foreign:*",
        "does-not-exist",
    ]

    return state in bad_states


def emphasize_reason(reason):
    if is_bad_state(reason):
        reason = "<em>" + reason + "</em>"
    return reason


def source_subdir(source):
    if source[:3] == "lib":
        return source[:4]
    else:
        return source[:1]


def source_summary_url(web_host, doc_root, section, src_pkg):
    return "https://%s%s/%s/source/%s/%s.html" % (
        web_host,
        doc_root,
        section,
        source_subdir(src_pkg),
        src_pkg,
    )


def maintainer_subdir(maintainer):
    return maintainer.lower()[:1]


def find_files_with_suffix(vdir, suffix):
    pairs = []  # (mtime, name)
    for name in os.listdir(vdir):
        if name.endswith(suffix):
            try:
                if os.path.isfile(os.path.join(vdir, name)):
                    mtime = os.path.getmtime(os.path.join(vdir, name))
                    pairs.append((mtime, name))
            except OSError:
                pass
    # sort by mtime
    return [x[1] for x in sorted(pairs)]


def update_file(source, target):
    if os.path.exists(target):
        try:
            aa = os.stat(source)
            bb = os.stat(target)
        except OSError:
            pass
        else:
            if aa.st_size == bb.st_size and aa.st_mtime <= bb.st_mtime:
                return
        try:
            os.remove(target)
        except Exception:
            pass
    try:
        os.link(source, target)
    except OSError:
        try:
            shutil.copyfile(source, target)
        except IOError as xxx_todo_changeme:
            (errno, strerror) = xxx_todo_changeme.args
            logging.error("failed to copy %s to %s: I/O error(%d): %s" % (source, target, errno, strerror))


def copy_logs(logs_by_dir, output_dir):
    for vdir in logs_by_dir:
        fulldir = os.path.join(output_dir, vdir)
        if not os.path.exists(fulldir):
            os.makedirs(fulldir)
        for basename in logs_by_dir[vdir]:
            source = os.path.join(vdir, basename)
            target = os.path.join(fulldir, basename)
            update_file(source, target)


def remove_old_logs(logs_by_dir, output_dir):
    for vdir in logs_by_dir:
        fulldir = os.path.join(output_dir, vdir)

        # convert logs_by_dir array to a dict to avoid linear search
        logs_dict = {}
        for log in logs_by_dir[vdir]:
            logs_dict[log] = 1

        if os.path.exists(fulldir):
            for basename in os.listdir(fulldir):
                if basename not in logs_dict:
                    os.remove(os.path.join(fulldir, basename))


def create_file(filename, contents):
    with open(filename, "w") as f:
        f.write(contents)


def append_file(filename, contents):
    with open(filename, "a") as f:
        f.write(contents)


def read_file(filename):
    with open(filename, "r") as f:
        return f.read()


def readlines_file(filename):
    with open(filename, "r") as f:
        return f.readlines()


def fileage(filename):
    mtime = os.path.getmtime(filename)
    now = time.time()
    return now - mtime


def write_template_html(filename, body, mapping={}, defer_if_unmodified=False, md5cache=None):
    header = HTML_HEADER
    footer = HTML_FOOTER
    htmlpage = string.Template(header + body + footer)
    mapping = mapping.copy()
    mapping.update(
        {
            "content_md5": "",
            "piuparts_version": "",
            "time": "",
        }
    )
    content_md5 = hashlib.md5(htmlpage.safe_substitute(mapping).encode()).hexdigest()

    if md5cache is not None:
        md5cache["new"][filename] = content_md5

    if defer_if_unmodified:
        if filename in md5cache["old"] and md5cache["old"][filename] == content_md5 and os.path.exists(filename):
            md5cache["unmodified"] += 1
            # defer updating the file between 1 and 4 weeks
            minage = 1 * 7 * 86400
            maxage = 4 * 7 * 86400
            if fileage(filename) < random.randint(minage, maxage):
                return
            md5cache["refreshed"] += 1

    if defer_if_unmodified and filename not in md5cache["old"] and os.path.exists(filename):
        # read the first 10 lines of the old file and look for a matching md5sum
        with open(filename, "r") as f:
            for x in range(10):
                line = f.readline()
                if not line:
                    break
                if line.startswith("<!-- ") and line[5 : 5 + len(content_md5)] == content_md5:
                    md5cache["unmodified"] += 1
                    # defer updating the file between 1 and 4 weeks
                    minage = 1 * 7 * 86400
                    maxage = 4 * 7 * 86400
                    if fileage(filename) < random.randint(minage, maxage):
                        return
                    md5cache["refreshed"] += 1
                    break

    mapping.update(
        {
            "content_md5": content_md5,
            "piuparts_version": PIUPARTS_VERSION,
            "time": time.strftime("%Y-%m-%d %H:%M %Z"),
        }
    )
    create_file(filename, htmlpage.safe_substitute(mapping))
    if md5cache is not None:
        md5cache["written"] += 1


def create_section_navigation(section_names, current_section, doc_root):
    tablerows = '<tr class="titlerow"><td class="alerttitlecell">Suite: <a href=\'%s/%s\'>%s</a></td></tr>' % (
        doc_root,
        current_section,
        current_section,
    )
    tablerows += '<tr><td class="contentcell"><a href="%s/%s/maintainer/">by maintainer / uploader</a></td></tr>\n' % (
        doc_root,
        current_section,
    )
    tablerows += '<tr><td class="contentcell"><a href="%s/%s/source/">by source package</a></td></tr>\n' % (
        doc_root,
        current_section,
    )
    tablerows += '<tr><td class="contentcell">states <a href="%s/%s/states.png">graph</a></td></tr>\n' % (
        doc_root,
        current_section,
    )
    tablerows += '<tr class="titlerow"><td class="alerttitlecell">all tested suites</td></tr>'
    for section in section_names:
        tablerows += ('<tr class="normalrow"><td class="contentcell"><a href=\'%s/%s\'>%s</a></td></tr>\n') % (
            doc_root,
            html_protect(section),
            html_protect(section),
        )
    return tablerows


def get_email_address(maintainer):
    m = re.match(r"(.+)(<)(.+@.+)(>)", maintainer)
    if not m:
        return "INVALID maintainer address: %s" % maintainer

    return m.group(3)


def package2id(package_name):
    # "+" is not a valid identifier char for id=... attributes
    return package_name.replace("+", "_")


# return order preserving list of the first occurrence of an element
def unique(stuff):
    # can't use set() because 'stuff' is a list of lists and list() is not hashable
    vlist = []
    previtem = stuff
    for item in stuff:
        if item != previtem:
            vlist.append(item)
            previtem = item
    return vlist


class Busy(Exception):
    def __init__(self):
        self.args = ("section is locked by another process",)


class Section:
    def __init__(self, section, master_directory, doc_root, packagedb_cache={}):
        self._config = Config(section=section, defaults_section="global")
        self._config.read(CONFIG_FILE)
        self._distro_config = piupartslib.conf.DistroConfig(DISTRO_CONFIG_FILE, self._config["mirror"])
        logging.debug("-------------------------------------------")
        logging.debug("Running section " + self._config.section)

        self._section_directory = os.path.abspath(os.path.join(master_directory, self._config.section))
        if not os.path.exists(self._section_directory):
            logging.debug(
                "Warning: %s did not exist, now created. Did you ever let the slave work?" % self._section_directory
            )
            os.makedirs(self._section_directory)

        self._doc_root = doc_root

        logging.debug("Loading and parsing Packages file")
        self._packagedb_cache = packagedb_cache
        self._package_databases = {}
        self._load_package_database(section, master_directory)
        self._binary_db = self._package_databases[section]

        self._source_db = piupartslib.packagesdb.PackagesDB(prefix=self._section_directory)
        self._source_db.load_packages_urls(
            self._distro_config.get_sources_urls(self._config.get_distro(), self._config.get_area())
        )
        if self._config.get_distro() != self._config.get_final_distro():
            # take version numbers (or None) from final distro
            self._source_db.load_alternate_versions_from_packages_urls(
                self._distro_config.get_sources_urls(self._config.get_final_distro(), self._config.get_area())
            )

        self._log_name_cache = {}
        self._md5cache = {
            "old": {},
            "new": {},
            "written": 0,
            "unmodified": 0,
            "refreshed": 0,
        }

    def _load_package_database(self, section, master_directory):
        if section in self._package_databases:
            return
        elif section in self._packagedb_cache:
            self._package_databases[section] = self._packagedb_cache[section]
            return

        config = Config(section=section, defaults_section="global")
        config.read(CONFIG_FILE)
        if not config["depends-sections"]:
            # this is a base database eligible for caching
            # only cache the most recent base database
            self._packagedb_cache.clear()
        sectiondir = os.path.join(master_directory, section)
        db = piupartslib.packagesdb.PackagesDB(prefix=sectiondir)
        self._package_databases[section] = db
        if config["depends-sections"]:
            deps = config["depends-sections"].split()
            for dep in deps:
                self._load_package_database(dep, master_directory)
            db.set_dependency_databases([self._package_databases[dep] for dep in deps])
        else:
            # only cache the big base databases that don't have additional dependencies
            self._packagedb_cache[section] = db
        db.load_packages_urls(
            self._distro_config.get_packages_urls(config.get_distro(), config.get_area(), config.get_arch())
        )
        if config.get_distro() != config.get_final_distro():
            # take version numbers (or None) from final distro
            db.load_alternate_versions_from_packages_urls(
                self._distro_config.get_packages_urls(config.get_final_distro(), config.get_area(), config.get_arch())
            )

    def _write_template_html(self, filename, body, mapping={}, defer_if_unmodified=False):
        mapping = mapping.copy()
        mapping.update(
            {
                "section_navigation": self._section_navigation,
                "doc_root": self._doc_root,
                "section": html_protect(self._config.section),
            }
        )
        write_template_html(
            filename,
            body,
            mapping,
            defer_if_unmodified=defer_if_unmodified,
            md5cache=self._md5cache,
        )

    def write_log_list_page(self, filename, title, preface, logs):
        packages = {}
        for pathname, package, version in logs:
            packages[package] = packages.get(package, []) + [(pathname, version)]

        names = sorted(packages.keys())
        lines = []
        for package in names:
            versions = []
            for pathname, version in packages[package]:
                cruft = ""
                bin_pkg = self._binary_db.get_package(package)
                if self._source_db.has_package(
                    bin_pkg.source()
                ) and bin_pkg.source_version() != self._source_db.get_version(bin_pkg.source()):
                    cruft = " [cruft]"
                versions.append('<a href="%s">%s</a>%s' % (html_protect(pathname), html_protect(version), cruft))
            line = '<tr class="normalrow"><td class="contentcell2">%s</td><td class="contentcell2">%s</td></tr>' % (
                html_protect(package),
                ", ".join(versions),
            )
            lines.append(line)

        if "FAIL" in preface:
            title_style = "alerttitlecell"
        else:
            title_style = "titlecell"

        self._write_template_html(
            filename,
            LOG_LIST_BODY_TEMPLATE,
            {
                "page_title": html_protect(title + " in " + self._config.section),
                "title": html_protect(title),
                "title_style": title_style,
                "preface": preface,
                "count": len(packages),
                "logrows": "".join(lines),
            },
        )

    def print_by_dir(self, output_directory, logs_by_dir):
        for vdir in logs_by_dir:
            vlist = []
            for basename in logs_by_dir[vdir]:
                assert basename.endswith(".log")
                assert "_" in basename
                package, version = basename[: -len(".log")].split("_")
                vlist.append((os.path.join(vdir, basename), package, version))
            self.write_log_list_page(
                os.path.join(output_directory, vdir + ".html"),
                title_by_dir[vdir],
                desc_by_dir[vdir],
                vlist,
            )

    def find_links_to_logs(self, package_name, dirs, logs_by_dir):
        links = []
        for vdir in dirs:
            # avoid linear search against log file names by caching in a dict
            #
            # this cache was added to avoid a very expensive linear search
            # against the arrays in logs_by_dir. Note that the use of this cache
            # assumes that the contents of logs_by_dir is invarient across calls
            # to find_links_to_logs()
            #
            if vdir not in self._log_name_cache:
                self._log_name_cache[vdir] = {}

                for basename in logs_by_dir[vdir]:
                    if basename.endswith(".log"):
                        package, version = basename[: -len(".log")].split("_")

                        self._log_name_cache[vdir][package] = version

            if vdir == "fail":
                style = ' class="needs-bugging"'
            else:
                style = ""

            if package_name in self._log_name_cache[vdir]:
                basename = package_name + "_" + self._log_name_cache[vdir][package_name] + ".log"

                links.append(
                    '<a href="%s/%s"%s>%s</a>'
                    % (
                        self._doc_root,
                        os.path.join(self._config.section, vdir, basename),
                        style,
                        html_protect(self._log_name_cache[vdir][package_name]),
                    )
                )

        return links

    def link_to_maintainer_summary(self, maintainer):
        email = get_email_address(maintainer)
        return '<a href="%s/%s/maintainer/%s/%s.html">%s</a>' % (
            self._doc_root,
            self._config.section,
            maintainer_subdir(email),
            email,
            html_protect(maintainer),
        )

    def link_to_uploaders(self, uploaders):
        link = ""
        for uploader in uploaders.split(","):
            link += self.link_to_maintainer_summary(uploader.strip()) + ", "
        return link[:-2]

    def link_to_source_summary(self, package_name):
        source_name = self._binary_db.get_package(package_name).source()
        link = '<a href="%s/%s/source/%s">%s</a>' % (
            self._doc_root,
            self._config.section,
            source_subdir(source_name) + "/" + source_name + ".html",
            html_protect(package_name),
        )
        return link

    def link_to_state_page(self, section, package_name, link_target):
        if self._binary_db.has_package(package_name):
            state = self._binary_db.get_package_state(package_name)
            link = '<a href="%s/%s/%s">%s</a>' % (
                self._doc_root,
                section,
                "state-" + state + ".html" + "#" + package_name,
                link_target,
            )
        else:
            if link_target == package_name:
                link = html_protect(package_name)
            else:
                link = "unknown-package"

        return link

    def links_to_logs(self, package_name, state, logs_by_dir):
        link = "N/A"
        dirs = ""

        if state == "successfully-tested":
            dirs = ["pass"]
        elif state == "failed-testing":
            dirs = ["fail", "bugged", "affected"]
        elif state == "cannot-be-tested":
            dirs = ["untestable"]

        if dirs != "":
            links = self.find_links_to_logs(package_name, dirs, logs_by_dir)
            link = ", ".join(links)

        if "/bugged/" in link or "/affected/" in link:
            link += (
                ' - <a href="https://bugs.debian.org/cgi-bin/pkgreport.cgi?package='
                + package_name
                + '" target="_blank" class="bugged">&nbsp;bug filed&nbsp;</a>\n'
            )

        return link

    def write_counts_summary(self):
        logging.debug("Writing counts.txt")
        header = "date"
        current_day = "%s" % time.strftime("%Y%m%d")
        counts = current_day
        total = 0
        for state in self._binary_db.get_states():
            count = len(self._binary_db.get_pkg_names_in_state(state))
            header += ", %s" % state
            counts += ", %s" % count
            logging.debug("%s: %s" % (state, count))
            total += count
        logging.debug("total: %s" % total)
        logging.debug("source: %s" % len(self._source_db.get_all_packages()))
        header += "\n"
        counts += "\n"

        countsfile = os.path.join(self._section_directory, "counts.txt")
        if not os.path.isfile(countsfile):
            logging.debug("writing new file: %s" % countsfile)
            create_file(countsfile, header)
            last_line = ""
        else:
            last_line = readlines_file(countsfile)[-1]
        if current_day not in last_line:
            append_file(countsfile, counts)
            logging.debug("appending line: %s" % counts.strip())

        shutil.copy(countsfile, self._output_directory)

        return total

    def create_maintainer_summaries(self, maintainers, source_data):
        logging.debug("Writing %d maintainer summaries" % len(maintainers))
        maintainer_dir = os.path.join(self._output_directory, "maintainer")
        if not os.path.exists(maintainer_dir):
            os.mkdir(maintainer_dir)
        states = ["fail", "unknown", "pass"]
        for maintainer in sorted(maintainers.keys()):
            sources = maintainers[maintainer]
            maintainer_subdir_path = os.path.join(maintainer_dir, maintainer_subdir(maintainer))
            if not os.path.exists(maintainer_subdir_path):
                os.mkdir(maintainer_subdir_path)
            rows = ""
            package_rows = {}
            packages = {}
            for state in states:
                packages[state] = []
                package_rows[state] = ""
            for source in sorted(sources):
                (state, sourcerows, binaryrows) = source_data[source]
                packages[state].append(source)
                package_rows[state] += sourcerows + binaryrows

            for state in states:
                if len(packages[state]) > 0:
                    links = ""
                    for package in packages[state]:
                        links += '<a href="#%s">%s</a> ' % (package, package)
                else:
                    links = "&nbsp;"
                rows += (
                    '<tr class="normalrow">'
                    + '<td class="labelcell">%s:</td>' % state
                    + '<td class="contentcell2">%s</td>' % len(packages[state])
                    + '<td class="contentcell2" colspan="4">%s</td>' % links
                    + "</tr>\n"
                )

            distrolinks = (
                '<tr class="normalrow">'
                + '<td class="labelcell">other distributions: </td>'
                + '<td class="contentcell2" colspan="5">'
            )

            for section in self._section_names:
                if section != self._config.section:
                    distrolinks += (
                        '<a href="'
                        + self._doc_root
                        + "/"
                        + section
                        + "/maintainer/"
                        + maintainer_subdir(maintainer)
                        + "/"
                        + maintainer
                        + '.html">'
                        + html_protect(section)
                        + "</a> "
                    )
            distrolinks += "</td></tr>"

            self._write_template_html(
                os.path.join(maintainer_subdir_path, maintainer + ".html"),
                MAINTAINER_BODY_TEMPLATE,
                {
                    "page_title": html_protect("Status of " + maintainer + " packages in " + self._config.section),
                    "maintainer": html_protect(maintainer + " in " + self._config.section),
                    "distrolinks": distrolinks,
                    "rows": rows + "".join([package_rows[state] for state in states]),
                },
                defer_if_unmodified=True,
            )

    def create_source_summary(self, source, logs_by_dir):
        source_version = self._source_db.get_test_versions(source)
        binaries = self._source_db.get_control_header(source, "Binary")
        maintainer = self._source_db.get_control_header(source, "Maintainer")
        uploaders = self._source_db.get_control_header(source, "Uploaders")

        success = True
        failed = False
        binaryrows = ""
        for binary in sorted([x.strip() for x in binaries.split(",") if x.strip()]):
            if not self._binary_db.has_package(binary):
                # udebs or binary packages for other architectures
                # The latter is a FIXME which needs parsing the Packages files from other archs too
                binaryrows += (
                    '<tr class="normalrow">'
                    + '<td class="labelcell">Binary:</td>'
                    + '<td class="contentcell2">%s</td>' % binary
                    + '<td class="contentcell2" colspan="4">unknown package</td>'
                    + "</tr>\n"
                )
                continue

            state = self._binary_db.get_package_state(binary, recurse=False)

            if "waiting" not in state and "dependency" in state:
                state_style = "lightalertlabelcell"
            elif state == "failed-testing":
                state_style = "lightlabelcell"
            else:
                state_style = "labelcell"

            binary_version = self._binary_db.get_test_versions(binary)
            if self._binary_db.get_package(binary).source_version() != source_version:
                binary_version += " [cruft]"
            binary_arch = self._binary_db.get_control_header(binary, "Architecture")
            archsuffix = ""
            if binary_arch == "all":
                archsuffix = ":all"
            binaryrows += (
                '<tr class="normalrow">'
                + '<td class="labelcell">Binary:</td>'
                + '<td class="contentcell2">%s%s</td>' % (binary, archsuffix)
                + '<td class="%s">piuparts-result:</td>' % state_style
                + '<td class="contentcell2">%s %s</td>'
                % (
                    self.link_to_state_page(self._config.section, binary, state),
                    self.links_to_logs(binary, state, logs_by_dir),
                )
                + '<td class="labelcell">Version:</td>'
                + '<td class="contentcell2">%s</td>' % html_protect(binary_version)
                + "</tr>\n"
            )

            if state not in ("successfully-tested", "essential-required"):
                success = False
            if state in (
                "failed-testing",
                "dependency-does-not-exist",
                "cannot-be-tested",
            ):
                failed = True

        if binaryrows != "":
            source_state = "unknown"

            if success:
                source_state = '<img src="%s/images/sunny.png" alt="success">' % self._doc_root
            if failed:
                source_state = '<img src="%s/images/weather-severe-alert.png" alt="failed">' % self._doc_root

            sourcerows = (
                '<tr class="titlerow">'
                + '<td class="titlecell" colspan="6" id="%s">%s in %s</td>'
                % (package2id(source), source, self._config.section)
                + "</tr>\n"
            )

            sourcerows += (
                '<tr class="normalrow">'
                + '<td class="labelcell">Source:</td>'
                + '<td class="contentcell2">'
                + '<a href="https://tracker.debian.org/%s" target="_blank">%s</a>' % (source, html_protect(source))
                + "</td>"
                + '<td class="labelcell">piuparts summary:</td>'
                + '<td class="contentcell2">%s</td>' % source_state
                + '<td class="labelcell">Version:</td>'
                + '<td class="contentcell2">%s</td>' % html_protect(source_version)
                + "</tr>\n"
            )

            sourcerows += (
                '<tr class="normalrow">'
                + '<td class="labelcell">Maintainer:</td>'
                + '<td class="contentcell2" colspan="5">%s</td>' % self.link_to_maintainer_summary(maintainer)
                + "</tr>\n"
            )

            if uploaders:
                sourcerows += (
                    '<tr class="normalrow">'
                    + '<td class="labelcell">Uploaders:</td>'
                    + '<td class="contentcell2" colspan="5">%s</td>' % self.link_to_uploaders(uploaders)
                    + "</tr>\n"
                )

            source_summary_page_path = os.path.join(self._output_directory, "source", source_subdir(source))

            if not os.path.exists(source_summary_page_path):
                os.makedirs(source_summary_page_path)

            self._write_template_html(
                os.path.join(source_summary_page_path, (source + ".html")),
                BASIC_BODY_TEMPLATE,
                {
                    "page_title": html_protect("Status of source package " + source + " in " + self._config.section),
                    "rows": sourcerows + binaryrows,
                },
                defer_if_unmodified=True,
            )

            # return parsable values
            if success:
                source_state = "pass"
            if failed:
                source_state = "fail"
        else:
            source_state = "udeb"
            sourcerows = ""

        return sourcerows, binaryrows, source_state, maintainer, uploaders

    def create_package_summaries(self, logs_by_dir):
        src_names = sorted(self._source_db.get_all_package_names())
        logging.debug("Writing %d source summaries" % len(src_names))

        maintainers = {}
        source_binary_rows = {}
        sources = ""
        sources_data = []
        for source in src_names:
            (
                sourcerows,
                binaryrows,
                source_state,
                maintainer,
                uploaders,
            ) = self.create_source_summary(source, logs_by_dir)

            if source_state != "udeb":
                source_version = self._source_db.get_test_versions(source)
                source_data = {}
                source_data["source"] = source
                source_data["state"] = source_state
                source_data["version"] = source_version
                sources_data.append(source_data)

                sources += "%s: %s\n" % (source, source_state)
                source_binary_rows[source] = (source_state, sourcerows, binaryrows)
                for maint in [maintainer] + uploaders.split(","):
                    if maint.strip():
                        email = get_email_address(maint.strip())
                        if "INVALID" not in email:
                            if email not in maintainers:
                                maintainers[email] = []
                            maintainers[email].append(source)

        create_file(os.path.join(self._output_directory, "sources.txt"), sources)
        create_file(
            os.path.join(self._output_directory, "sources.yaml"),
            yaml.dump(sources_data, default_flow_style=False),
        )

        self.create_maintainer_summaries(maintainers, source_binary_rows)

    def make_section_stats_graph(self):
        countsfile = os.path.join(self._section_directory, "counts.txt")
        pngfile = os.path.join(self._output_directory, "states.png")
        grdevices = importr("grDevices")
        grdevices.png(
            file=pngfile,
            width=1600,
            height=900,
            pointsize=10,
            res=100,
            antialias="none",
        )
        r = robjects.r
        states = self._binary_db.get_states()
        active_states = self._binary_db.get_active_states()
        r("nstate = %d" % len(states))
        r("snames = c(%s)" % ",".join(['"%s"' % s if s in active_states else '"(%s)"' % s for s in states]))
        r('cnames = c("date",snames)')
        r('t <- (read.table("' + countsfile + '",sep=",",header=0,skip=1,col.names=cnames,row.names=1,fill=TRUE))')
        # here we define how many days we wants stats for (163=half a year)
        # r('v <- t[(nrow(t)-163):nrow(t),0:nstate]')
        # make graph since day 1
        r("v <- t[0:nrow(t),0:nstate]")
        # thanks to http://tango.freedesktop.org/Generic_Icon_Theme_Guidelines for those nice colors
        r("palsize = 14")
        r(
            'palette(c("#4e9a06", "#ef2929", "#d3d7cf", "#5c3566", "#c4a000", \
                     "#fce94f", "#a40000", "#888a85", "#2e3436", "#729fcf", \
                     "#3465a4", "#204a87", "#555753", "#ce5c00"))'
        )
        r(
            'barplot(t(v),col = 1:palsize, \
          main="Binary packages per state in '
            + self._config.section
            + '", \
          xlab="", ylab="Number of binary packages", space=0, border=NA)'
        )
        r('legend(x="bottom",legend=snames, ncol=2,fill=1:palsize,xjust=0.5,yjust=0,bty="n")')
        grdevices.dev_off()

        stats_html = (
            '<tr class="normalrow"> '
            + '<td class="contentcell2" colspan="3">'
            + '<a href="%s">' % "states.png"
            + '<img src="%s" width="100%%" alt="Binary package states in %s">' % ("states.png", self._config.section)
            + "</a>"
            + '<br>(<a href="counts.txt">Source</a>)\n'
            + "</td></tr>\n"
        )

        return stats_html

    def prepare_known_problem_reports(self, failures):
        self._problem_reports = {}
        for problem in self._problem_list:
            tpl_text = dwke_update_tpl(
                self._config.section,
                problem,
                failures.filtered(problem.name),
                PKG_ERROR_TPL,
                PROB_TPL,
                self._binary_db,
                self._source_db,
            )
            if len(tpl_text):
                self._problem_reports[problem.name[:-5] + TPL_EXT] = tpl_text

    def create_and_link_to_analysises(self, state):
        link = "<ul>\n"
        for template, linktarget in linktarget_by_template:
            # successful logs only have issues and failed logs only have errors
            if (state == "failed-testing" and template[-9:] != "issue.tpl") or (
                state == "successfully-tested" and template[-9:] == "issue.tpl"
            ):
                substats = ""

                if template in self._problem_reports:
                    logging.debug("Writing analysis page for %s" % template)
                    rows = self._problem_reports[template]

                    self._write_template_html(
                        os.path.join(self._output_directory, template[: -len(".tpl")] + ".html"),
                        BASIC_BODY_TEMPLATE,
                        {
                            "page_title": html_protect("Packages in state " + state + " " + linktarget),
                            "rows": rows,
                        },
                    )
                    if state == "failed-testing":
                        count_bugged = rows.count('"bugged/')
                        count_affected = rows.count('"affected/')
                        count_failed = rows.count('"fail/')
                        sep = ": "
                        if count_bugged > 0:
                            substats += sep + "%s bugged" % count_bugged
                            sep = ", "
                        if count_affected > 0:
                            substats += sep + "%s affected" % count_affected
                            sep = ", "
                        if count_failed > 0:
                            substats += sep + '<span class="needs-bugging">%s failed</span>' % count_failed
                    else:
                        count_passed = rows.count('"pass/')
                        if count_passed > 0:
                            substats += ": %s passed" % count_passed
                    link += "<li><a href=%s>%s</a>%s</li>\n" % (
                        template[: -len(".tpl")] + ".html",
                        linktarget,
                        substats,
                    )

        link += "</ul>"
        if link == "<ul>\n</ul>":
            link = ""
        return link

    def write_section_index_page(self, dirs, total_packages):
        tablerows = ""
        for state in self._binary_db.get_active_states():
            dir_link = ""
            analysis = ""
            for vdir in dirs:
                if vdir in ("pass", "fail", "bugged", "affected", "untestable") and state_by_dir[vdir] == state:
                    dir_link += "<a href='%s.html'>%s</a> logs<br>" % (
                        vdir,
                        html_protect(vdir),
                    )
            if state in ("successfully-tested", "failed-testing"):
                analysis = self.create_and_link_to_analysises(state)
            if analysis or len(self._binary_db.get_pkg_names_in_state(state)):
                tablerows += (
                    '<tr class="normalrow"><td class="contentcell2"><a href=\'state-%s.html\'>%s</a>%s</td>'
                    + '<td class="contentcell2">%d</td><td class="contentcell2">%s</td></tr>\n'
                ) % (
                    html_protect(state),
                    html_protect(state),
                    analysis,
                    len(self._binary_db.get_pkg_names_in_state(state)),
                    dir_link,
                )
        try:
            tablerows += self.make_section_stats_graph()
        except Exception:
            logging.debug("Error generating the graph images, probably python-rpy2 is not installed, disabling graphs.")

        tablerows += (
            '<tr class="normalrow"> <td class="labelcell2">Total</td> <td class="labelcell2" colspan="2">%d</td></tr>\n'
            % total_packages
        )
        vendor = "Debian"
        if len(self._config.get_distros()) > 1:
            description = "%s %s: package installation in %s" % (
                vendor,
                self._config.get_area(),
                self._config.get_start_distro(),
            )
            for distro in self._config.get_distros()[1:]:
                description += ", dist-upgrade to %s" % distro
            description += ", removal, and purge test."
        else:
            description = "%s %s / %s: package installation, removal, and purge test." % (
                vendor,
                self._config.get_distro(),
                self._config.get_area(),
            )
        if self._config["description"].startswith("+"):
            description = description + " " + self._config["description"][1:]
        elif self._config["description"].endswith("+"):
            description = self._config["description"][:-1] + " " + description
        elif self._config["description"]:
            description = self._config["description"]

        self._write_template_html(
            os.path.join(self._output_directory, "index.html"),
            SECTION_INDEX_BODY_TEMPLATE,
            {
                "page_title": html_protect(self._config.section + " statistics"),
                "description": html_protect(description),
                "tablerows": tablerows,
                "packagesurl": "<br>".join([html_protect(url) for url in self._binary_db.get_urls()]),
            },
        )

    def _show_providers(self, dep):
        providers = self._binary_db.get_providers(dep)
        vlist = ""
        if providers:
            vlist += "\n<ul>\n"
            for provider in providers:
                vlist += "<li>provider %s is %s</li>\n" % (
                    self.link_to_state_page(self._config.section, provider, provider),
                    emphasize_reason(html_protect(self._binary_db.get_package_state(provider))),
                )
            vlist += "</ul>\n"
        return vlist

    def write_state_pages(self):
        for state in self._binary_db.get_active_states():
            logging.debug("Writing page for %s" % state)
            vlist = ""
            if state in self._binary_db.get_error_states():
                with_counts = True
                aside = " (reverse deps, blocked pkgs)"

                def sort_key(x):
                    return (
                        -self._binary_db.block_count(x["Package"]),
                        x["Package"],
                    )

            else:
                with_counts = False
                aside = ""

                def sort_key(x):
                    return x["Package"]

            names = self._binary_db.get_pkg_names_in_state(state)
            packages = [self._binary_db.get_package(name) for name in names]
            packages.sort(key=sort_key)

            for package in packages:
                vlist += '<li id="%s">%s' % (
                    package2id(package["Package"]),
                    self.link_to_source_summary(package["Package"]),
                )
                if package["Architecture"] == "all":
                    vlist += ":all"
                if package.source_version() != self._source_db.get_version(package.source()):
                    vlist += " [cruft]"
                if with_counts:
                    vlist += " (%d, %d)" % (
                        self._binary_db.rrdep_count(package["Package"]),
                        self._binary_db.block_count(package["Package"]),
                    )
                vlist += " (%s)" % html_protect(package["Maintainer"])
                all_deps = unique(package.all_dependencies())
                if all_deps:
                    vlist += "\n<ul>\n"
                    for alternatives in all_deps:
                        dep = alternatives[0]
                        vlist += "<li>dependency %s is %s" % (
                            self.link_to_state_page(self._config.section, dep, dep),
                            emphasize_reason(
                                html_protect(self._binary_db.get_package_state(dep, resolve_virtual=False))
                            ),
                        )
                        vlist += self._show_providers(dep)
                        if len(alternatives) > 1:
                            vlist += "\n<ul>\n"
                            for dep in alternatives[1:]:
                                vlist += "<li>alternative dependency %s is %s" % (
                                    self.link_to_state_page(self._config.section, dep, dep),
                                    emphasize_reason(
                                        html_protect(self._binary_db.get_package_state(dep, resolve_virtual=False))
                                    ),
                                )
                                vlist += self._show_providers(dep)
                                vlist += "</li>\n"
                            vlist += "</ul>\n"
                        vlist += "</li>\n"
                    vlist += "</ul>\n"
                vlist += "</li>\n"

            self._write_template_html(
                os.path.join(self._output_directory, "state-%s.html" % state),
                STATE_BODY_TEMPLATE,
                {
                    "page_title": html_protect("Packages in state " + state + " in " + self._config.section),
                    "state": html_protect(state),
                    "list": vlist,
                    "aside": aside,
                },
            )

    def archive_logfile(self, vdir, log):
        archivedir = os.path.join("archive", vdir)
        if not os.path.exists(archivedir):
            os.makedirs(archivedir)
        try:
            os.rename(os.path.join(vdir, log), os.path.join("archive", vdir, log))
        except OSError:
            logging.debug("OSError while archiving %s/%s" % (vdir, log))

    def cleanup_removed_packages(self, logs_by_dir):
        vdirs = logs_by_dir.keys()
        vdirs = vdirs - {"reserved"}
        for vdir in vdirs:
            for log in sorted(logs_by_dir[vdir]):
                if log.endswith(".log"):
                    package, version = log[: -len(".log")].split("_")
                    if not self._binary_db.has_package(package):
                        logging.debug("Archiving %s/%s, package was removed" % (vdir, log))
                        self.archive_logfile(vdir, log)
                        logs_by_dir[vdir].remove(log)
                    else:
                        current = self._binary_db.get_test_versions(package)
                        state = self._binary_db.get_package_state(package, resolve_virtual=False, recurse=False)
                        if version != current:
                            logging.debug("Archiving %s/%s, package is outdated (%s)" % (vdir, log, current))
                            self.archive_logfile(vdir, log)
                            logs_by_dir[vdir].remove(log)
                        elif state == "outdated":
                            logging.debug("Archiving %s/%s, package is superseded" % (vdir, log))
                            self.archive_logfile(vdir, log)
                            logs_by_dir[vdir].remove(log)

    def generate_html(self):
        md5cachefile = os.path.join(self._output_directory, ".md5cache")
        try:
            with open(md5cachefile, "rb") as f:
                self._md5cache["old"] = pickle.load(f, encoding="utf-8")
        except (IOError, EOFError):
            pass

        logging.debug("Finding log files")
        dirs = ["pass", "fail", "bugged", "affected", "reserved", "untestable"]
        logs_by_dir = {}
        for vdir in dirs:
            logs_by_dir[vdir] = find_files_with_suffix(vdir, ".log")

        logging.debug("Archiving logs of obsolete packages")
        self.cleanup_removed_packages(logs_by_dir)

        logging.debug("Copying log files")
        copy_logs(logs_by_dir, self._output_directory)

        logging.debug("Writing per-dir HTML pages")
        self.print_by_dir(self._output_directory, logs_by_dir)

        total_packages = self.write_counts_summary()

        self.create_package_summaries(logs_by_dir)

        logging.debug("Preparing known problem reports")
        failures = dwke_get_failures(self._binary_db, self._problem_list)
        self.prepare_known_problem_reports(failures)

        logging.debug("Writing section index page")
        self.write_section_index_page(dirs, total_packages)

        logging.debug("Writing state pages")
        self.write_state_pages()

        logging.debug(
            "Wrote %d out of %d html files, refreshed %d out of %d unmodified files"
            % (
                self._md5cache["written"],
                len(self._md5cache["new"]),
                self._md5cache["refreshed"],
                self._md5cache["unmodified"],
            )
        )
        with open(md5cachefile, "wb") as f:
            pickle.dump(self._md5cache["new"], f)

        logging.debug("Removing old log files")
        remove_old_logs(logs_by_dir, self._output_directory)

    def generate_summary(self, web_host):
        summary_path = os.path.join(self._output_directory, "summary.json")

        if os.path.isfile(summary_path):
            os.unlink(summary_path)

        reporting_sections = self._config["json-sections"].split()
        if not reporting_sections or reporting_sections[0] == "default":
            reporting_sections = [self._config.get_std_distro()]

        if reporting_sections[0] == "none":
            logging.debug("Skipping summary")
        else:
            logging.debug("Generating summary")

            summary = pkgsummary.new_summary()

            for reporting_section in reporting_sections:
                for binpkg in self._binary_db.get_all_packages():
                    pkgname = binpkg["Package"]
                    state = self._binary_db.get_package_state(pkgname)
                    flag = pkgsummary.get_flag(state)
                    block_cnt = 0
                    if flag == "F":
                        block_cnt = self._binary_db.block_count(pkgname)
                    srcpkg = self._binary_db.get_package(pkgname).source()
                    url = source_summary_url(web_host, self._doc_root, self._config.section, srcpkg)

                    pkgsummary.add_summary(summary, reporting_section, srcpkg, flag, block_cnt, url)

            pkgsummary.write_summary(summary, summary_path)

    def generate_output(self, output_directory, section_names, problem_list, web_host):
        # skip output generation for disabled sections
        if int(self._config["max-reserved"]) == 0:
            return

        self._section_names = section_names
        self._section_navigation = create_section_navigation(self._section_names, self._config.section, self._doc_root)
        self._output_directory = os.path.abspath(os.path.join(output_directory, self._config.section))
        if not os.path.exists(self._output_directory):
            os.makedirs(self._output_directory)
        self._problem_list = problem_list
        if self._config["exclude-known-problems"]:
            excluded = self._config["exclude-known-problems"].split()
            self._problem_list = [p for p in problem_list if p.name not in excluded]

        oldcwd = os.getcwd()
        os.chdir(self._section_directory)
        self.generate_html()
        os.chdir(oldcwd)

        self.generate_summary(web_host)


def sections_by_precedence(sections):
    precedence = {}
    count = 0
    for section in sections:
        config = Config(section=section, defaults_section="global")
        config.read(CONFIG_FILE)
        precedence[section] = (config["precedence"], count)
        count += 1

    return sorted(sections, key=lambda x: precedence[x])


def generate_global_summary(dir, sections):
    json_name = "summary.json"

    logging.debug("Generating global summary")

    summary = pkgsummary.new_summary()

    for section in sections_by_precedence(sections):
        sec_path = os.path.join(dir, section, json_name)
        if os.path.isfile(sec_path):
            sec_summ = pkgsummary.read_summary(sec_path)
            summary = pkgsummary.merge_summary(summary, sec_summ)

    summary_path = os.path.join(dir, json_name)
    pkgsummary.write_summary(summary, summary_path)


# START detect_well_known_errors


def get_bug_text(logpath):
    bugpath = replace_ext(logpath, BUG_EXT)

    if os.path.exists(bugpath):
        return read_file(bugpath)
    return ""


def populate_tpl(tmpl, vals):
    for key in vals:
        tmpl = re.sub(r"\$%s" % key, str(vals[key]), tmpl)

    return tmpl


def dwke_update_tpl(section, problem, failures, ftpl, ptpl, pkgsdb, srcdb):
    pkg_text = ""
    bugged_section = False
    for failure in failures:
        bin_pkg = get_pkg(failure.pkgspec)
        src_pkg = pkgsdb.get_package(bin_pkg).source()
        rdep_cnt = pkgsdb.rrdep_count(bin_pkg)
        bin_arch = ""
        if pkgsdb.get_control_header(bin_pkg, "Architecture") == "all":
            bin_arch = " [all]"
        if pkgsdb.get_package(bin_pkg).source_version() != srcdb.get_version(src_pkg):
            bin_arch += " [cruft]"

        if bugged_section is False and failure.where != "fail":
            bugged_section = True
            pkg_text += "</ul><ul>\n"

        log = os.path.join(failure.where, failure.pkgspec + LOG_EXT)

        pkg_text += populate_tpl(
            ftpl,
            {
                "LOG": log,
                "PACKAGE": bin_pkg,
                "ARCH": bin_arch,
                "BUG": get_bug_text(log),
                "RDEPS": rdep_cnt,
                "SPKG": src_pkg,
                "SSUBDIR": source_subdir(src_pkg),
                "SECTION": section,
            },
        )

    if len(pkg_text):
        return populate_tpl(
            ptpl,
            {
                "HEADER": problem.HEADER,
                "SECTION": section,
                "HELPTEXT": problem.HELPTEXT,
                "COMMAND": problem.get_command(),
                "PACKAGE_LIST": pkg_text,
                "COUNT": len(failures),
            },
        )
    return ""


def dwke_get_failures(pkgsdb, problem_list):
    logdict = get_file_dict(KPR_DIRS, LOG_EXT)
    kprdict = get_file_dict(KPR_DIRS, KPR_EXT)
    del_cnt = clean_cache_files(logdict, kprdict)
    kprdict = get_file_dict(KPR_DIRS, KPR_EXT)
    add_cnt = make_kprs(logdict, kprdict, problem_list)

    failures = FailureManager(logdict)
    failures.sort_by_bugged_and_rdeps(pkgsdb)

    logging.info("parsed logfiles: %d removed, %d added" % (del_cnt, add_cnt))
    for prob in problem_list:
        pcount = len(failures.filtered(prob.name))
        if pcount:
            logging.info("%7d %s" % (pcount, prob.name))

    return failures


# END detect_well_known_errors


def make_bts_stats_graph(master_dir, out_dir):
    countsfile = os.path.join(master_dir, "bts_stats.txt")
    pngfile = os.path.join(out_dir, "images", "bts_stats.png")
    grdevices = importr("grDevices")
    grdevices.png(file=pngfile, width=1600, height=900, pointsize=10, res=100)
    r = robjects.r
    r('t <- (read.table("' + countsfile + '",sep=",",header=1,row.names=1))')
    # make graph since day 1
    r("v <- t[c(4, 2, 3, 1)]")  # reorder columns
    # tango colors again:
    r('palette(c("#a40000", "#ef2929", "#4e9a06", "#8ae234"))')
    r(
        'barplot(t(v),col = 1:4, \
        main="Bugs with usertag=piuparts and user=debian-qa@lists.debian.org", \
        xlab="", ylab="Total number of RC and non-RC bugs submitted and closed", space=0, border=NA)'
    )
    r('legend("right", legend=rev(colnames(v)), fill=rev(1:4), inset=0.05, bty="n")')
    grdevices.dev_off()


def main():
    setup_logging(logging.DEBUG, None)
    global_config = Config(section="global")
    global_config.read(CONFIG_FILE)
    if global_config["proxy"]:
        os.environ["http_proxy"] = global_config["proxy"]
    section_names = global_config["sections"].split()
    process_section_names = section_names
    if len(sys.argv) > 1:
        process_section_names = sys.argv[1:]
    master_directory = global_config["master-directory"]
    output_directory = global_config["output-directory"]
    web_host = global_config["web-host"]

    doc_root = global_config["doc-root"].strip()
    if not doc_root.startswith("/"):
        doc_root = "/" + doc_root
    if doc_root.endswith("/"):
        doc_root = doc_root[:-1]
    problem_list = create_problem_list(global_config["known-problem-directory"])

    if not os.path.exists(master_directory):
        logging.debug("Warning: %s does not exist!?! Creating it for you now." % master_directory)
        os.makedirs(master_directory)
        return

    with open(os.path.join(master_directory, "report.lock"), "w") as lock:
        try:
            fcntl.flock(lock, fcntl.LOCK_EX | fcntl.LOCK_NB)
        except IOError:
            if sys.stdout.isatty():
                sys.exit("another piuparts-report process is already running")
            else:
                sys.exit(0)

        packagedb_cache = {}
        create_file(
            os.path.join(output_directory, "sections.yaml"),
            yaml.dump(section_names, default_flow_style=False),
        )
        todo = deque([(s, 0) for s in process_section_names])
        while len(todo):
            (section_name, next_try) = todo.popleft()
            now = time.time()
            if now < next_try:
                logging.info("Sleeping while section is busy")
                time.sleep(max(30, next_try - now) + 30)
            try:
                section_directory = os.path.join(master_directory, section_name)
                if not os.path.exists(section_directory):
                    raise MissingSection("", section_name)
                with open(os.path.join(section_directory, "master.lock"), "w") as lock:
                    try:
                        fcntl.flock(lock, fcntl.LOCK_EX | fcntl.LOCK_NB)
                    except IOError:
                        raise Busy()

                    section = Section(
                        section_name,
                        master_directory,
                        doc_root,
                        packagedb_cache=packagedb_cache,
                    )
                    section.generate_output(output_directory, section_names, problem_list, web_host)
            except Busy:
                logging.info("Section is busy")
                todo.append((section_name, time.time() + 300))
            except MissingSection as e:
                logging.error("Configuration Error in section '%s': %s" % (section_name, e))
            except (HTTPError, URLError) as e:
                logging.error("Failed to fetch Packages files for section '%s' : %s" % (section_name, e))

        # static pages
        logging.debug("Writing static pages")
        section_navigation = create_section_navigation(section_names, "sid", doc_root)
        for page in ("index", "news", "bug_howto"):
            tpl = os.path.join(output_directory, page + ".tpl")
            INDEX_BODY = read_file(tpl)
            if page == "index":
                page_title = "About piuparts.debian.org"
            elif page == "news":
                page_title = "News about piuparts.debian.org"
            else:
                page_title = "How to file bugs based on piuparts.d.o results"
            write_template_html(
                os.path.join(output_directory, page + ".html"),
                INDEX_BODY,
                {
                    "page_title": page_title,
                    "section_navigation": section_navigation,
                    "doc_root": doc_root,
                },
            )
        # overview page
        rows = "<tr>"
        counter = 0
        for section in section_names:
            counter += 1
            rows += (
                '<td><a href="/%s/"><img src="/%s/states.png" width="99%%" alt="Binary package states in %s"></a></td>'
            ) % (section, section, section)
            if counter % 4 == 0:
                rows += "</tr><tr>"
        rows += "</tr>"
        write_template_html(
            os.path.join(output_directory, "overview.html"),
            BASIC_BODY_TEMPLATE,
            {
                "page_title": "Overview of suites tested by piuparts.debian.org",
                "section_navigation": section_navigation,
                "doc_root": doc_root,
                "rows": rows,
            },
        )
        # create graph about bugs filed
        try:
            make_bts_stats_graph(master_directory, output_directory)
        except Exception:
            logging.debug("Error generating the graph images, probably python-rpy2 is not installed, disabling graphs.")

        generate_global_summary(output_directory, section_names)

        logging.debug("Done")


if __name__ == "__main__":
    main()

# vi:set et ts=4 sw=4 :
