1#!/usr/bin/env python3
2
3# Copyright (C) 2014 by Thomas Petazzoni <thomas.petazzoni@free-electrons.com>
4
5# This program is free software; you can redistribute it and/or modify
6# it under the terms of the GNU General Public License as published by
7# the Free Software Foundation; either version 2 of the License, or
8# (at your option) any later version.
9#
10# This program is distributed in the hope that it will be useful,
11# but WITHOUT ANY WARRANTY; without even the implied warranty of
12# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
13# General Public License for more details.
14#
15# You should have received a copy of the GNU General Public License
16# along with this program; if not, write to the Free Software
17# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
18
19import sys
20import os
21import os.path
22import argparse
23import csv
24import collections
25import math
26
27try:
28    import matplotlib
29    matplotlib.use('Agg')
30    import matplotlib.font_manager as fm
31    import matplotlib.pyplot as plt
32except ImportError:
33    sys.stderr.write("You need python-matplotlib to generate the size graph\n")
34    exit(1)
35
36
37class Config:
38    biggest_first = False
39    iec = False
40    size_limit = 0.01
41    colors = ['#e60004', '#f28e00', '#ffed00', '#940084',
42              '#2e1d86', '#0068b5', '#009836', '#97c000']
43
44
45#
46# This function adds a new file to 'filesdict', after checking its
47# size. The 'filesdict' contain the relative path of the file as the
48# key, and as the value a tuple containing the name of the package to
49# which the file belongs and the size of the file.
50#
51# filesdict: the dict to which  the file is added
52# relpath: relative path of the file
53# fullpath: absolute path to the file
54# pkg: package to which the file belongs
55#
56def add_file(filesdict, relpath, abspath, pkg):
57    if relpath.endswith(".py"):
58        # also check for compiled .pyc file
59        add_file(filesdict, relpath + "c", abspath + "c", pkg)
60    if not os.path.exists(abspath):
61        return
62    if os.path.islink(abspath):
63        return
64    sz = os.stat(abspath).st_size
65    filesdict[relpath] = (pkg, sz)
66
67
68#
69# This function returns a dict where each key is the path of a file in
70# the root filesystem, and the value is a tuple containing two
71# elements: the name of the package to which this file belongs and the
72# size of the file.
73#
74# builddir: path to the Buildroot output directory
75#
76def build_package_dict(builddir):
77    filesdict = {}
78    with open(os.path.join(builddir, "build", "packages-file-list.txt")) as f:
79        for line in f.readlines():
80            pkg, fpath = line.split(",", 1)
81            # remove the initial './' in each file path
82            fpath = fpath.strip()[2:]
83            fullpath = os.path.join(builddir, "target", fpath)
84            add_file(filesdict, fpath, fullpath, pkg)
85    return filesdict
86
87
88#
89# This function builds a dictionary that contains the name of a
90# package as key, and the size of the files installed by this package
91# as the value.
92#
93# filesdict: dictionary with the name of the files as key, and as
94# value a tuple containing the name of the package to which the files
95# belongs, and the size of the file. As returned by
96# build_package_dict.
97#
98# builddir: path to the Buildroot output directory
99#
100def build_package_size(filesdict, builddir):
101    pkgsize = collections.defaultdict(int)
102
103    seeninodes = set()
104    for root, _, files in os.walk(os.path.join(builddir, "target")):
105        for f in files:
106            fpath = os.path.join(root, f)
107            if os.path.islink(fpath):
108                continue
109
110            st = os.stat(fpath)
111            if st.st_ino in seeninodes:
112                # hard link
113                continue
114            else:
115                seeninodes.add(st.st_ino)
116
117            frelpath = os.path.relpath(fpath, os.path.join(builddir, "target"))
118            if frelpath not in filesdict:
119                print("WARNING: %s is not part of any package" % frelpath)
120                pkg = "unknown"
121            else:
122                pkg = filesdict[frelpath][0]
123
124            pkgsize[pkg] += st.st_size
125
126    return pkgsize
127
128
129#
130# Given a dict returned by build_package_size(), this function
131# generates a pie chart of the size installed by each package.
132#
133# pkgsize: dictionary with the name of the package as a key, and the
134# size as the value, as returned by build_package_size.
135#
136# outputf: output file for the graph
137#
138def draw_graph(pkgsize, outputf):
139    def size2string(sz):
140        if Config.iec:
141            divider = 1024.0
142            prefixes = ['', 'Ki', 'Mi', 'Gi', 'Ti']
143        else:
144            divider = 1000.0
145            prefixes = ['', 'k', 'M', 'G', 'T']
146        while sz > divider and len(prefixes) > 1:
147            prefixes = prefixes[1:]
148            sz = sz/divider
149        # precision is made so that there are always at least three meaningful
150        # digits displayed (e.g. '3.14' and '10.4', not just '3' and '10')
151        precision = int(2-math.floor(math.log10(sz))) if sz < 1000 else 0
152        return '{:.{prec}f} {}B'.format(sz, prefixes[0], prec=precision)
153
154    total = sum(pkgsize.values())
155    labels = []
156    values = []
157    other_value = 0
158    unknown_value = 0
159    for (p, sz) in sorted(pkgsize.items(), key=lambda x: x[1],
160                          reverse=Config.biggest_first):
161        if sz < (total * Config.size_limit):
162            other_value += sz
163        elif p == "unknown":
164            unknown_value = sz
165        else:
166            labels.append("%s (%s)" % (p, size2string(sz)))
167            values.append(sz)
168    if unknown_value != 0:
169        labels.append("Unknown (%s)" % (size2string(unknown_value)))
170        values.append(unknown_value)
171    if other_value != 0:
172        labels.append("Other (%s)" % (size2string(other_value)))
173        values.append(other_value)
174
175    plt.figure()
176    patches, texts, autotexts = plt.pie(values, labels=labels,
177                                        autopct='%1.1f%%', shadow=True,
178                                        colors=Config.colors)
179    # Reduce text size
180    proptease = fm.FontProperties()
181    proptease.set_size('xx-small')
182    plt.setp(autotexts, fontproperties=proptease)
183    plt.setp(texts, fontproperties=proptease)
184
185    plt.suptitle("Filesystem size per package", fontsize=18, y=.97)
186    plt.title("Total filesystem size: %s" % (size2string(total)), fontsize=10,
187              y=.96)
188    plt.savefig(outputf)
189
190
191#
192# Generate a CSV file with statistics about the size of each file, its
193# size contribution to the package and to the overall system.
194#
195# filesdict: dictionary with the name of the files as key, and as
196# value a tuple containing the name of the package to which the files
197# belongs, and the size of the file. As returned by
198# build_package_dict.
199#
200# pkgsize: dictionary with the name of the package as a key, and the
201# size as the value, as returned by build_package_size.
202#
203# outputf: output CSV file
204#
205def gen_files_csv(filesdict, pkgsizes, outputf):
206    total = 0
207    for (p, sz) in pkgsizes.items():
208        total += sz
209    with open(outputf, 'w') as csvfile:
210        wr = csv.writer(csvfile, delimiter=',', quoting=csv.QUOTE_MINIMAL)
211        wr.writerow(["File name",
212                     "Package name",
213                     "File size",
214                     "Package size",
215                     "File size in package (%)",
216                     "File size in system (%)"])
217        for f, (pkgname, filesize) in filesdict.items():
218            pkgsize = pkgsizes[pkgname]
219
220            if pkgsize == 0:
221                percent_pkg = 0
222            else:
223                percent_pkg = float(filesize) / pkgsize * 100
224
225            percent_total = float(filesize) / total * 100
226
227            wr.writerow([f, pkgname, filesize, pkgsize,
228                         "%.1f" % percent_pkg,
229                         "%.1f" % percent_total])
230
231
232#
233# Generate a CSV file with statistics about the size of each package,
234# and their size contribution to the overall system.
235#
236# pkgsize: dictionary with the name of the package as a key, and the
237# size as the value, as returned by build_package_size.
238#
239# outputf: output CSV file
240#
241def gen_packages_csv(pkgsizes, outputf):
242    total = sum(pkgsizes.values())
243    with open(outputf, 'w') as csvfile:
244        wr = csv.writer(csvfile, delimiter=',', quoting=csv.QUOTE_MINIMAL)
245        wr.writerow(["Package name", "Package size",
246                     "Package size in system (%)"])
247        for (pkg, size) in pkgsizes.items():
248            wr.writerow([pkg, size, "%.1f" % (float(size) / total * 100)])
249
250
251#
252# Our special action for --iec, --binary, --si, --decimal
253#
254class PrefixAction(argparse.Action):
255    def __init__(self, option_strings, dest, **kwargs):
256        for key in ["type", "nargs"]:
257            if key in kwargs:
258                raise ValueError('"{}" not allowed'.format(key))
259        super(PrefixAction, self).__init__(option_strings, dest, nargs=0,
260                                           type=bool, **kwargs)
261
262    def __call__(self, parser, namespace, values, option_string=None):
263        setattr(namespace, self.dest, option_string in ["--iec", "--binary"])
264
265
266def main():
267    parser = argparse.ArgumentParser(description='Draw size statistics graphs')
268
269    parser.add_argument("--builddir", '-i', metavar="BUILDDIR", required=True,
270                        help="Buildroot output directory")
271    parser.add_argument("--graph", '-g', metavar="GRAPH",
272                        help="Graph output file (.pdf or .png extension)")
273    parser.add_argument("--file-size-csv", '-f', metavar="FILE_SIZE_CSV",
274                        help="CSV output file with file size statistics")
275    parser.add_argument("--package-size-csv", '-p', metavar="PKG_SIZE_CSV",
276                        help="CSV output file with package size statistics")
277    parser.add_argument("--biggest-first", action='store_true',
278                        help="Sort packages in decreasing size order, " +
279                             "rather than in increasing size order")
280    parser.add_argument("--iec", "--binary", "--si", "--decimal",
281                        action=PrefixAction,
282                        help="Use IEC (binary, powers of 1024) or SI (decimal, "
283                             "powers of 1000, the default) prefixes")
284    parser.add_argument("--size-limit", "-l", type=float,
285                        help='Under this size ratio, files are accounted to ' +
286                             'the generic "Other" package. Default: 0.01 (1%%)')
287    args = parser.parse_args()
288
289    Config.biggest_first = args.biggest_first
290    Config.iec = args.iec
291    if args.size_limit is not None:
292        if args.size_limit < 0.0 or args.size_limit > 1.0:
293            parser.error("--size-limit must be in [0.0..1.0]")
294        Config.size_limit = args.size_limit
295
296    # Find out which package installed what files
297    pkgdict = build_package_dict(args.builddir)
298
299    # Collect the size installed by each package
300    pkgsize = build_package_size(pkgdict, args.builddir)
301
302    if args.graph:
303        draw_graph(pkgsize, args.graph)
304    if args.file_size_csv:
305        gen_files_csv(pkgdict, pkgsize, args.file_size_csv)
306    if args.package_size_csv:
307        gen_packages_csv(pkgsize, args.package_size_csv)
308
309
310if __name__ == "__main__":
311    main()
312