#!/usr/bin/perl -w
#
# cdepend -- Find which functions depend on which.
#
# Copyright (C) 2003, Eli Billauer
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation; either version 2 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program; if not, write to the Free Software
# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
#
# A copy of the license can be found in a file named "licence.txt", at the
# root directory of this project.
#

# Important notice: cdepend is based on the Eobj environment, which
# can be downloaded from your favourite CPAN site. In order to spare
# some of the mess, a local copy of Eobj comes with cdepend, so there is
# no need to download Eobj separately to *run* cdepend.
# But this bundle lacks a lot of doc. So go for the original Eobj if
# you need to learn more about Eobj.

use Eobj;
use strict;

$main::VERSION = '0.02';

my ($execpath, $execname) = ($0 =~ /^(.*?)[\/\\]*([^\/\\]+)$/);

inherit('parser', "$execpath/cdepend-x86parser.pl", 'root');
inherit('variable', "$execpath/cdepend-variable.pl", 'root');

init;

# Parse input parameters

sub usage {
  my $msg = shift || "";

  die("${msg}Usage: $execname [-qrXLS] -v [-d outdir] [-o outfile] [-l{library},...] [-s start-function] asmfile,...\n");
}

my $startfunction = "main";
my $outdir = undef;
my $outfile = undef;
my @libs = ();
my $recursive = 0;
my $explicitonly = 0;
my $libraryonly = 0;
my $quiet = 0;
my $tail;

my @P = @ARGV;

my %flaghash = ('q' => sub { $quiet = 1; },
		'r' => sub { $recursive = 1; },
		'S' => sub { undef $startfunction; },
		'X' => sub { blow("-X flag contradicts previous -L flag\n")
			       if ($libraryonly);
			     $explicitonly = 1; },
		'L' => sub { blow("-L flag contradicts previous -X flag\n")
			       if ($explicitonly);
			     $libraryonly = 1; },
		'd' => sub { usage("Double choice of outdir,\n")
			       if (defined $outdir);
			     $outdir = shift @P;
			     usage("Faulty outdir,\n")
			       if (( ! defined $outdir) || ($outdir =~ /^-/));
			   },
		'o' => sub { usage("Double choice of outfile,\n")
			       if (defined $outfile);
			     $outfile = shift @P;
			     usage("Faulty outfile,\n")
			       if (( ! defined $outfile) || ($outfile =~ /^-/));
			   },
		's' => sub { usage("Double choice of start-function,\n")
			       if (defined $startfunction);
			     $outfile = shift @P;
			     usage("Faulty start-function,\n")
			       if (( ! defined $startfunction) || ($startfunction =~ /^-/));
			   },
		'l' => sub { usage("-l option should be followed by library name immeditately\n")
			       if (length($tail) == 0);
			     push @libs, "-l$tail";
			     $tail = "";
			   },
		'v' => sub { print STDERR "cdepend version $main::VERSION\n";
			     exit(0);
			   }
	       );
		
while (@P) {
  last unless ($P[0] =~ /^-/); # Asm files list now?
  $tail = shift @P;
  ($tail) = ($tail =~ /^-(.*)$/); # Remove prefix "-"
  my $flag;
  while (length($tail) > 0) {
    ($flag, $tail) = ($tail =~ /^(.)(.*)$/);
    usage("Unrecognized flag -$flag\n")
      unless (defined $flaghash{$flag});
    &{$flaghash{$flag}}; # Call the respective flag handling function
  }
}

my @files = @P;

my ($badfile) = grep /^-/, @files;

usage("Bad flag $badfile (appears after asmfile/s )\n")
  if (defined $badfile);

usage("No assembly files given\n")
  unless(@files);

if (defined $outdir) {
  blow("A file or directory of name $outdir already exists\n")
    if (-e $outdir);
  
  mkdir $outdir, 0777 || blow("Failed to create directory $outdir\n");
}

#my $files = `find /home/eli/discretix/cdepend-testcase/ -name \\*.S`;
#my $files = `find /home/eli/delme/linux/drivers -name \\*.S`;
#my $files = `find S -name \\*.S`;#| head -10`;
#my @files = split("\n", $files);

#
# End of user interface games. Now to real work.
#

my $p = parser->new(name => 'MainParser');

foreach my $file (@files) {
  my $complaints = $p->parse($file);
  print STDERR $complaints unless ($quiet);
}

my $objfinder = globalobj->get('objfinder');
my $startobj = undef;

if (defined $startfunction) {
  $startobj = $objfinder->{$startfunction};
  blow("Could not find starting function ${startfunction}()\n")
    unless (defined $startobj);
}

my %libhash = ();
setvarhash(\%libhash, @libs);

# Create a reference, which will hold either STDOUT or the file we want to
# output to.

my $stdout;

if (defined $outfile) {
  open($stdout, ">$outfile") || blow("Failed to open output file $outfile\n");
} else {
  $stdout = \*STDOUT;
}

$startobj->uselist # Recurse down to all used functions
  if (defined $startobj);

print $stdout "\nUndeclared variables:\n\n";

my $unreach = 0;

foreach my $sym (globalobj->get('symbols')) {
  my $varname = $sym->get('varname');

  unless ($sym->get('declared')) {
    my $lib = $libhash{$varname};
    if (defined $lib) {
      $sym -> set('library', $lib); 
    } else {
      if ((defined $startobj) && (! $sym->get('is_used'))) {
	print $stdout "(*) ";
	$unreach = 1;
      }
      print $stdout $sym->nicename."\n";
    }
  }
}

print $stdout "\nSymbols marked with (*) are unreachable from starting function ${startfunction}()\n"
  if ($unreach);

print $stdout "\nUnused variables:\n\n";

if (defined $startobj) {
  foreach my $sym (globalobj->get('symbols')) {
    print $stdout $sym->nicename."\n"
      unless ($sym->get('is_used'));
  }
} else {
  foreach my $sym (globalobj->get('symbols')) {
    print $stdout $sym->nicename."\n"
      unless ($sym->get('usedby'));
  }
}

if (defined $outdir) {
  foreach my $sym (globalobj->get('symbols')) {
    next if ($explicitonly && ($sym->get('library')));
    next if ($libraryonly && (! $sym->get('library')));
    my $name = $sym->get('varname');
    open(F, ">$outdir/$name") || blow("Failed to open $outdir/$name for output\n");
    print F $sym->report($recursive);
    close F;
  }
}

close $stdout;

sub setvarhash {
  my $libvars = shift;
  my @compilerargs = @_;
  
  # This is lame, but it works: We compile an empty C program, and fetch
  # the basic dynamic link libraries from it with ldd.
  
  my $minic = "tmp_empty_c_application";
  
  open (F, ">$minic.c") || die("Failed to open temporary file $minic.c for write\n");
  print F "int main() {}\n";
  close F;
  
  system("gcc", "$minic.c", @compilerargs, "-o", $minic) == 0
    or die("Failed to compile $minic.c\n");
  
  my $ldd = `ldd $minic`;
  
  unlink "$minic.c" || die("Failed to delete $minic.c\n");
  unlink "$minic" || die("Failed to delete $minic\n");
  
  my @libs = ($ldd =~ /=>[ \t]*([^ \t]+)/g);
  my $cnt = 0;
  foreach my $l (@libs) {
    my $ans = `nm --defined-only --no-sort -g $l 2>/dev/null`;
    foreach my $v (reverse ($ans =~ /([^ \t]+)[ \t]*$/mg)) {
      next if (defined $libvars->{$v});
      $libvars->{$v} = $l;
    }
  }
  
}
__END__

=head1 NAME

cdepend - Find which C function depends on which

=head1 SYNOPSIS

  cdepend [OPTIONS] ASMFILES

=head1 DESCRIPTION

cdepend is a utility, which analyzes a C software project, and generates reports
that tell how each function depends on the others.

This is useful for finding functions that are never used, finding what
library functions the package is accessing, and also get a picture of
how changing some function may affect the entire project.

cdepend generates one main report, and, if required,  a separate report for
each symbol (variable or function) encountered.

The main report is a list of unused functions and variables. "Unused"
usually means it can't be reached if the execution starts at C<main()>
(which is usually the case). It's also possible to change the entry point
to some other function, or not define any entry point at all. In the latter
case, any symbol that is referred to anywhere, will be considered "used".

cdepend scans the assembly code generated by a partial compilation of the
C source. This usually requires the C code to be recompiled especially with
some minor changes in the gcc flags (see below). This recompilation
must be done by the user before applying cdepend, with exactly the same flag
as the real-target compilation, plus C<-S> and C<-gstabs+>.

Library files are scanned for symbols as well. Their internal dependencies
are not analyzed, but their use is marked.

=head1 OPTIONS

=over 4

=item -v

Display version and exit.

=item -q

Be quiet. Don't report warnings when something looks fishy.

=item -d DIR

Create directory DIR and write a separate report file for each function
or variable encountered. The DIR directory must not exist prior to
execution of cdepend.

=item -r

Create recursive reports (effective when -d option is present). All indirect
usages of functions and variables are given in the reports. This option will
create heavy output when used on large software projects, but is useful to
get an overall view of the total dependency of a certain function.

=item -X

Create report files for explicit functions and variables only. In other words,
don't create reports for symbols found in libraries. 
(effective when -d option is present)

=item -L

Create report files for library functions and variables only. Useful too check
what library functions are used by whom.
(effective when -d option is present)

=item -o FILE

Write brief report to FILE rather than standard output.

=item -l[library]

Whenever a C<-l[library]> flag is given to gcc during compilation, it should
be given to cdepend as well. This allows resolving symbol names that come
from libraries. The syntax is exactly the same.
Note that if the -l[library] argument is faulty, an error message produced
by gcc will be displayed.

=item -s function_name

Use function_name() as the starting function instead of C<main()>. Any
function or variable that can't be reached from this function (directly or
indirectly) will be considered unused.

=item -S

Don't use any function as a starting function. Only functions and variables
that are not used from anywhere, will be considered unused.

=back

=head1 HOW TO USE

There are three stages in using cdepend:

=over 4

=item 1.

Compile all relevant source files with the -S and -gstabs+ flags set. This will
result in assembly files as output, instead of object code.

=item 2.

Run cdepend jointly on all assembly files.

=item 3.

Read the report files, and figure out what they say about your project.

=back

The trickiest part is the compilation. Basically, if a normal compilation
would be

  gcc mycode.c -o mycode

or

  gcc -c mycode.c -o mycode.o

you should instead compile (in both cases) with

  gcc -gstabs+ -S mycode.c -o mycode.S

The C<-c> flag doesn't make any difference here. You may also call the output
file whatever you want. The C<.S> suffix will help you avoid confusion (from
yourself and C<make>).

All files in the project should be compiled this way. Then, if the C<.S> suffix
was used, you may run cdepend with:

  cdepend [options] `find [project-dir] -name \*.S`

=head1 OUTPUT

cdepend generates two kinds of output: A summary of unused and undeclared
functions and variables, and, when the -d option is used, a file for each
symbol encountered.

The summary output is rather straightforward.

The per-symbol report files will hold the function names as their file names.
Each file starts with where the function was declared (file and line
number), and continues with a list of functions that the function calls
(or refers to otherwise). The line numbers in the source code, where the
function calls take place are given as well. This is the "used" list. 

After this we have a list of functions B<using> the function in question.
The filenames and line numbers are given for each instance.

This is called "direct" usage. If the C<-r> flag is given, the "indirect"
connections are given, by recursively finding all functions that calls
our function or are called by it. The calling chain is displayed in
each depth level: Assume that function A calls B,  B calls C, and C calls D.
If we look at A's private report file, we'll find B in the "used directly"
section. C and D will B<both> appear in the "used indirectly" section.

Indirectly using and used functions are displayed with the complete calling
chain between them and the function in question. Arrows show which function
called which.

Variables and functions are treated equally. A function which refers to a
global variable is handled like a function calling another function.
A variable which is initialized with pointers to functions or other
variables is no different than a function using these variables.

Library functions are usually displayed within brackets, for example
C<(malloc)>.

=head1 HOW IT ALL WORKS

Groslly, cdepend scans the assembly files for definitions of symbols,
uses of symbols and debugger information. Since functions are compiled into
assembly code that begins with declaring function name as the label, all
assembly code until the next function label is assumed to be a result of
that function.

Every use of symbols is recorded, and the respective function is marked
as dependent on the variable or function behind the symbol. There is
no real distinction between using a symbol as a function or a variable,
since pointers to functions may be used as plain variables.

cdepend recognizes functions by using debugger information, which comes
along with function declarations. A label called or jumped to will also
be considered a function.
Library functions may not
be recognized as functions unless they are called or jumped to (which is
almost always the case).

The source file's names and line numbers are taken from the debugger
information.

Library files are scanned for their symbols by using binutils' C<nm>.
Symbols found in library files differ only in the fact that they are not
marked as undeclared, if the symbol's declaration is absent in the assembly
files.

Which library file to scan is determined by compiling an empty C<main()>
function with C<gcc>, passing the relevant C<-l> options to the compiler.
The list of library files is then obtained by using binutils' C<ldd>.

=head1 CAVEATS

Since cdepend only looks on assembly files, the results may be misleading in
the following ways:

=over 4

=item *

A function may appear to be unnecessary because it wasn't used on a certain
compilation (due to #ifdef's). cdepend has no way to know about code that
wasn't compiled due to compile-time directives.

=item *

Some functions may "disappear" as a result of being compiled inline.
This may become even more confusing if the -O3 option is used, in which
case the compiler feels free to compile any function inline, whenever
beneficial.

=back

=head1 BUGS

cdepend currently works only on Linux-i386.

If cdepend spits a lot of errors and does nothing, odds are that the
assembler parser didn't like what C<gcc> fed it with. You may be luckier on
another computer, or you may try to send me the assembly file in question.
Hopefully, it's an easy fix.

cdepend is highly based upon heuristics. It may rely on local features of the
compiler and the operating system. If it works, goodie. If it doesn't, there
is no need to be too surprised.

The output should be considered as hints, not facts. Remember that there is
no substitute for your own brain, which should be the main tool in making
decisions, not some clutter from this utility.

Having said this, it is usually quite easy to track down and fix bugs, as the
script is written. Should you encounter something wrong, please send a
description of the bug and the shortest possible test that demonstrates it
to the author's address below. Please supply the answers of C<perl -V>,
C<gcc -v> and C<uname -a> as well.

=head1 AUTHOR

Eli Billauer, E<lt>eli_billauer@users.sf.netE<gt>

=head1 SEE ALSO

L<gcc>, L<ldd>, L<nm> and the author's home page: L<http://www.billauer.co.il/>

=cut
