1#!/usr/bin/env python3 2 3# Copyright (C) 2014 by Thomas Petazzoni <thomas.petazzoni@free-electrons.com> 4 5# This program is free software; you can redistribute it and/or modify 6# it under the terms of the GNU General Public License as published by 7# the Free Software Foundation; either version 2 of the License, or 8# (at your option) any later version. 9# 10# This program is distributed in the hope that it will be useful, 11# but WITHOUT ANY WARRANTY; without even the implied warranty of 12# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 13# General Public License for more details. 14# 15# You should have received a copy of the GNU General Public License 16# along with this program; if not, write to the Free Software 17# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA 18 19import sys 20import os 21import os.path 22import argparse 23import csv 24import collections 25import math 26 27try: 28 import matplotlib 29 matplotlib.use('Agg') 30 import matplotlib.font_manager as fm 31 import matplotlib.pyplot as plt 32except ImportError: 33 sys.stderr.write("You need python-matplotlib to generate the size graph\n") 34 exit(1) 35 36 37class Config: 38 biggest_first = False 39 iec = False 40 size_limit = 0.01 41 colors = ['#e60004', '#f28e00', '#ffed00', '#940084', 42 '#2e1d86', '#0068b5', '#009836', '#97c000'] 43 44 45# 46# This function adds a new file to 'filesdict', after checking its 47# size. The 'filesdict' contain the relative path of the file as the 48# key, and as the value a tuple containing the name of the package to 49# which the file belongs and the size of the file. 50# 51# filesdict: the dict to which the file is added 52# relpath: relative path of the file 53# fullpath: absolute path to the file 54# pkg: package to which the file belongs 55# 56def add_file(filesdict, relpath, abspath, pkg): 57 if relpath.endswith(".py"): 58 # also check for compiled .pyc file 59 add_file(filesdict, relpath + "c", abspath + "c", pkg) 60 if not os.path.exists(abspath): 61 return 62 if os.path.islink(abspath): 63 return 64 sz = os.stat(abspath).st_size 65 filesdict[relpath] = (pkg, sz) 66 67 68# 69# This function returns a dict where each key is the path of a file in 70# the root filesystem, and the value is a tuple containing two 71# elements: the name of the package to which this file belongs and the 72# size of the file. 73# 74# builddir: path to the Buildroot output directory 75# 76def build_package_dict(builddir): 77 filesdict = {} 78 with open(os.path.join(builddir, "build", "packages-file-list.txt")) as f: 79 for line in f.readlines(): 80 pkg, fpath = line.split(",", 1) 81 # remove the initial './' in each file path 82 fpath = fpath.strip()[2:] 83 fullpath = os.path.join(builddir, "target", fpath) 84 add_file(filesdict, fpath, fullpath, pkg) 85 return filesdict 86 87 88# 89# This function builds a dictionary that contains the name of a 90# package as key, and the size of the files installed by this package 91# as the value. 92# 93# filesdict: dictionary with the name of the files as key, and as 94# value a tuple containing the name of the package to which the files 95# belongs, and the size of the file. As returned by 96# build_package_dict. 97# 98# builddir: path to the Buildroot output directory 99# 100def build_package_size(filesdict, builddir): 101 pkgsize = collections.defaultdict(int) 102 103 seeninodes = set() 104 for root, _, files in os.walk(os.path.join(builddir, "target")): 105 for f in files: 106 fpath = os.path.join(root, f) 107 if os.path.islink(fpath): 108 continue 109 110 st = os.stat(fpath) 111 if st.st_ino in seeninodes: 112 # hard link 113 continue 114 else: 115 seeninodes.add(st.st_ino) 116 117 frelpath = os.path.relpath(fpath, os.path.join(builddir, "target")) 118 if frelpath not in filesdict: 119 print("WARNING: %s is not part of any package" % frelpath) 120 pkg = "unknown" 121 else: 122 pkg = filesdict[frelpath][0] 123 124 pkgsize[pkg] += st.st_size 125 126 return pkgsize 127 128 129# 130# Given a dict returned by build_package_size(), this function 131# generates a pie chart of the size installed by each package. 132# 133# pkgsize: dictionary with the name of the package as a key, and the 134# size as the value, as returned by build_package_size. 135# 136# outputf: output file for the graph 137# 138def draw_graph(pkgsize, outputf): 139 def size2string(sz): 140 if Config.iec: 141 divider = 1024.0 142 prefixes = ['', 'Ki', 'Mi', 'Gi', 'Ti'] 143 else: 144 divider = 1000.0 145 prefixes = ['', 'k', 'M', 'G', 'T'] 146 while sz > divider and len(prefixes) > 1: 147 prefixes = prefixes[1:] 148 sz = sz/divider 149 # precision is made so that there are always at least three meaningful 150 # digits displayed (e.g. '3.14' and '10.4', not just '3' and '10') 151 precision = int(2-math.floor(math.log10(sz))) if sz < 1000 else 0 152 return '{:.{prec}f} {}B'.format(sz, prefixes[0], prec=precision) 153 154 total = sum(pkgsize.values()) 155 labels = [] 156 values = [] 157 other_value = 0 158 unknown_value = 0 159 for (p, sz) in sorted(pkgsize.items(), key=lambda x: x[1], 160 reverse=Config.biggest_first): 161 if sz < (total * Config.size_limit): 162 other_value += sz 163 elif p == "unknown": 164 unknown_value = sz 165 else: 166 labels.append("%s (%s)" % (p, size2string(sz))) 167 values.append(sz) 168 if unknown_value != 0: 169 labels.append("Unknown (%s)" % (size2string(unknown_value))) 170 values.append(unknown_value) 171 if other_value != 0: 172 labels.append("Other (%s)" % (size2string(other_value))) 173 values.append(other_value) 174 175 plt.figure() 176 patches, texts, autotexts = plt.pie(values, labels=labels, 177 autopct='%1.1f%%', shadow=True, 178 colors=Config.colors) 179 # Reduce text size 180 proptease = fm.FontProperties() 181 proptease.set_size('xx-small') 182 plt.setp(autotexts, fontproperties=proptease) 183 plt.setp(texts, fontproperties=proptease) 184 185 plt.suptitle("Filesystem size per package", fontsize=18, y=.97) 186 plt.title("Total filesystem size: %s" % (size2string(total)), fontsize=10, 187 y=.96) 188 plt.savefig(outputf) 189 190 191# 192# Generate a CSV file with statistics about the size of each file, its 193# size contribution to the package and to the overall system. 194# 195# filesdict: dictionary with the name of the files as key, and as 196# value a tuple containing the name of the package to which the files 197# belongs, and the size of the file. As returned by 198# build_package_dict. 199# 200# pkgsize: dictionary with the name of the package as a key, and the 201# size as the value, as returned by build_package_size. 202# 203# outputf: output CSV file 204# 205def gen_files_csv(filesdict, pkgsizes, outputf): 206 total = 0 207 for (p, sz) in pkgsizes.items(): 208 total += sz 209 with open(outputf, 'w') as csvfile: 210 wr = csv.writer(csvfile, delimiter=',', quoting=csv.QUOTE_MINIMAL) 211 wr.writerow(["File name", 212 "Package name", 213 "File size", 214 "Package size", 215 "File size in package (%)", 216 "File size in system (%)"]) 217 for f, (pkgname, filesize) in filesdict.items(): 218 pkgsize = pkgsizes[pkgname] 219 220 if pkgsize == 0: 221 percent_pkg = 0 222 else: 223 percent_pkg = float(filesize) / pkgsize * 100 224 225 percent_total = float(filesize) / total * 100 226 227 wr.writerow([f, pkgname, filesize, pkgsize, 228 "%.1f" % percent_pkg, 229 "%.1f" % percent_total]) 230 231 232# 233# Generate a CSV file with statistics about the size of each package, 234# and their size contribution to the overall system. 235# 236# pkgsize: dictionary with the name of the package as a key, and the 237# size as the value, as returned by build_package_size. 238# 239# outputf: output CSV file 240# 241def gen_packages_csv(pkgsizes, outputf): 242 total = sum(pkgsizes.values()) 243 with open(outputf, 'w') as csvfile: 244 wr = csv.writer(csvfile, delimiter=',', quoting=csv.QUOTE_MINIMAL) 245 wr.writerow(["Package name", "Package size", 246 "Package size in system (%)"]) 247 for (pkg, size) in pkgsizes.items(): 248 wr.writerow([pkg, size, "%.1f" % (float(size) / total * 100)]) 249 250 251# 252# Our special action for --iec, --binary, --si, --decimal 253# 254class PrefixAction(argparse.Action): 255 def __init__(self, option_strings, dest, **kwargs): 256 for key in ["type", "nargs"]: 257 if key in kwargs: 258 raise ValueError('"{}" not allowed'.format(key)) 259 super(PrefixAction, self).__init__(option_strings, dest, nargs=0, 260 type=bool, **kwargs) 261 262 def __call__(self, parser, namespace, values, option_string=None): 263 setattr(namespace, self.dest, option_string in ["--iec", "--binary"]) 264 265 266def main(): 267 parser = argparse.ArgumentParser(description='Draw size statistics graphs') 268 269 parser.add_argument("--builddir", '-i', metavar="BUILDDIR", required=True, 270 help="Buildroot output directory") 271 parser.add_argument("--graph", '-g', metavar="GRAPH", 272 help="Graph output file (.pdf or .png extension)") 273 parser.add_argument("--file-size-csv", '-f', metavar="FILE_SIZE_CSV", 274 help="CSV output file with file size statistics") 275 parser.add_argument("--package-size-csv", '-p', metavar="PKG_SIZE_CSV", 276 help="CSV output file with package size statistics") 277 parser.add_argument("--biggest-first", action='store_true', 278 help="Sort packages in decreasing size order, " + 279 "rather than in increasing size order") 280 parser.add_argument("--iec", "--binary", "--si", "--decimal", 281 action=PrefixAction, 282 help="Use IEC (binary, powers of 1024) or SI (decimal, " 283 "powers of 1000, the default) prefixes") 284 parser.add_argument("--size-limit", "-l", type=float, 285 help='Under this size ratio, files are accounted to ' + 286 'the generic "Other" package. Default: 0.01 (1%%)') 287 args = parser.parse_args() 288 289 Config.biggest_first = args.biggest_first 290 Config.iec = args.iec 291 if args.size_limit is not None: 292 if args.size_limit < 0.0 or args.size_limit > 1.0: 293 parser.error("--size-limit must be in [0.0..1.0]") 294 Config.size_limit = args.size_limit 295 296 # Find out which package installed what files 297 pkgdict = build_package_dict(args.builddir) 298 299 # Collect the size installed by each package 300 pkgsize = build_package_size(pkgdict, args.builddir) 301 302 if args.graph: 303 draw_graph(pkgsize, args.graph) 304 if args.file_size_csv: 305 gen_files_csv(pkgdict, pkgsize, args.file_size_csv) 306 if args.package_size_csv: 307 gen_packages_csv(pkgsize, args.package_size_csv) 308 309 310if __name__ == "__main__": 311 main() 312