rework the INDEX file generation. The new approach speeds up things by trunk
authordmcmahill <dmcmahill@pkgsrc.org>
Wed, 23 Jul 2003 09:41:23 +0000
branchtrunk
changeset 45205 014d81bdeefc
parent 45204 6413de45c3f6
child 45206 9004e35db62e
rework the INDEX file generation. The new approach speeds up things by several orders of magnitude and 'make index' now takes 30 minutes or so instead of several days on my test machine. The approach now is to take one pass through every package and extract some key information including the explicitly listed dependencies. After the data is extracted, the dependencies are flattened in one step which avoids the extremely inefficient recursive make that was previously used.
Makefile
mk/bsd.pkg.mk
mk/scripts/genindex.awk
--- a/Makefile	Tue Jul 22 23:44:46 2003 +0000
+++ b/Makefile	Wed Jul 23 09:41:23 2003 +0000
@@ -1,4 +1,4 @@
-# $NetBSD: Makefile,v 1.55 2003/06/30 22:05:11 hubertf Exp $
+# $NetBSD: Makefile,v 1.56 2003/07/23 09:41:23 dmcmahill Exp $
 #
 
 .include "mk/bsd.prefs.mk"
@@ -101,14 +101,48 @@
 _PKGSRCDIR=${.CURDIR}
 .endif
 
-index:
-	@${RM} -f ${.CURDIR}/INDEX
-	@${MAKE} ${.CURDIR}/INDEX
+.PHONY: index
+index: ${.CURDIR}/INDEX
 
 ${.CURDIR}/INDEX:
-	@${ECHO} -n "Generating INDEX - please wait.."
-	@${MAKE} describe ECHO_MSG="${ECHO} > /dev/null" > ${.CURDIR}/INDEX
-	@${ECHO} " Done."
+	@${RM} -f ${.CURDIR}/DEPENDSDB
+	@${ECHO_MSG} "Extracting complete dependency database.  This may take a while..."
+	@DB=${.CURDIR}/DEPENDSDB ; \
+	PKGSRCDIR=${.CURDIR} ; \
+	npkg=1; \
+	${RM} -fr $$DB ; \
+	list=`${GREP} '^[[:space:]]*'SUBDIR */Makefile | sed 's,/Makefile.*=[[:space:]]*,/,'` ; \
+	for pkgdir in $$list ; do \
+		if [ ! -d $$pkgdir ]; then  \
+			echo " " ; \
+			echo "WARNING:  the package directory $pkgdir is listed in" > /dev/stderr ; \
+			echo $pkgdir | sed 's;/.*;/Makefile;g' > /dev/stderr ; \
+			echo "but the directory does not exist.  Please fix this!" > /dev/stderr ; \
+		else \
+			cd $$pkgdir ; \
+			l=`${MAKE} print-summary-data`  ; \
+			if [ $$? != 0 ]; then \
+				echo "WARNING (printdepends):  the package in $$pkgdir had problem with" \
+					> /dev/stderr ; \
+				echo "    ${MAKE} print-summary-data" > /dev/stderr ; \
+				echo "    database information for this package" > /dev/stderr ; \
+				echo "    will be dropped." > /dev/stderr ; \
+				${MAKE} print-summary-data  2>&1 > /dev/stderr ; \
+			else \
+				echo "$$l" >> $$DB ; \
+			fi ; \
+		fi ; \
+		echo -n "." ; \
+		if [ `${EXPR} $$npkg % 100 = 0` -eq 1 ]; then \
+			echo " " ; \
+			echo "$$npkg" ; \
+		fi ; \
+		npkg=`${EXPR} $$npkg + 1` ; \
+		cd $$PKGSRCDIR  ; \
+	done
+	@${RM} -f ${.CURDIR}/INDEX
+	@${AWK} -f ./mk/scripts/genindex.awk PKGSRCDIR=${.CURDIR} SORT=${SORT} ${.CURDIR}/DEPENDSDB
+	@${RM} -f ${.CURDIR}/DEPENDSDB
 
 print-index:	${.CURDIR}/INDEX
 	@${AWK} -F\| '{ printf("Port:\t%s\nPath:\t%s\nInfo:\t%s\nMaint:\t%s\nIndex:\t%s\nB-deps:\t%s\nR-deps:\t%s\nArch:\t%s\n\n", $$1, $$2, $$4, $$6, $$7, $$8, $$9, $$10); }' < ${.CURDIR}/INDEX
--- a/mk/bsd.pkg.mk	Tue Jul 22 23:44:46 2003 +0000
+++ b/mk/bsd.pkg.mk	Wed Jul 23 09:41:23 2003 +0000
@@ -1,4 +1,4 @@
-#	$NetBSD: bsd.pkg.mk,v 1.1220 2003/07/22 13:48:48 agc Exp $
+#	$NetBSD: bsd.pkg.mk,v 1.1221 2003/07/23 09:41:26 dmcmahill Exp $
 #
 # This file is in the public domain.
 #
@@ -3996,8 +3996,24 @@
 	@${ECHO} wildcard ${PKGPATH} ${PKGWILDCARD:Q}
 	@${ECHO} comment ${PKGPATH} ${COMMENT:Q}
 	@${ECHO} license ${PKGPATH} ${LICENSE:Q}
-	@${ECHO} onlyfor ${PKGPATH} ${ONLY_FOR_ARCHS}
-	@${ECHO} notfor ${PKGPATH} ${NOT_FOR_OPSYS}
+	@if [ "${ONLY_FOR_ARCHS}" = "" ]; then				\
+		${ECHO} "onlyfor ${PKGPATH} any";			\
+	else								\
+		${ECHO} "onlyfor ${PKGPATH} ${ONLY_FOR_ARCHS}";		\
+	fi;
+	if [ "${NOT_FOR_OPSYS}" = "" ]; then				\
+		${ECHO} "notfor ${PKGPATH} any";			\
+	else								\
+		${ECHO} "notfor ${PKGPATH} not ${NOT_FOR_OPSYS}";	\
+	fi;
+	@${ECHO} "maintainer ${PKGPATH} ${MAINTAINER}"
+	@${ECHO} "categories ${PKGPATH} ${CATEGORIES}"
+	@if [ -f ${DESCR_SRC} ]; then					\
+		${ECHO}  "descr ${PKGPATH} ${DESCR_SRC}";		\
+	else								\
+		${ECHO}  "descr ${PKGPATH} /dev/null";			\
+	fi
+	@${ECHO} "prefix ${PKGPATH} ${PREFIX}"
 .endif
 
 .if !target(show-license)
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/mk/scripts/genindex.awk	Wed Jul 23 09:41:23 2003 +0000
@@ -0,0 +1,389 @@
+#!/usr/bin/awk -f
+# $NetBSD: genindex.awk,v 1.1 2003/07/23 09:41:29 dmcmahill Exp $
+#
+# Copyright (c) 2002, 2003 The NetBSD Foundation, Inc.
+# All rights reserved.
+#
+# This code is derived from software contributed to The NetBSD Foundation
+# by Dan McMahill.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions
+# are met:
+# 1. Redistributions of source code must retain the above copyright
+#    notice, this list of conditions and the following disclaimer.
+# 2. Redistributions in binary form must reproduce the above copyright
+#    notice, this list of conditions and the following disclaimer in the
+#    documentation and/or other materials provided with the distribution.
+# 3. All advertising materials mentioning features or use of this software
+#    must display the following acknowledgement:
+#        This product includes software developed by the NetBSD
+#        Foundation, Inc. and its contributors.
+# 4. Neither the name of The NetBSD Foundation nor the names of its
+#    contributors may be used to endorse or promote products derived
+#    from this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
+# ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
+# TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
+# BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+# CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+# SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+# INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+# CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+# ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+# POSSIBILITY OF SUCH DAMAGE.
+#
+
+
+# Global variables
+#-----------------
+# The following associative arrays are used for storing the dependency
+# information and other information for the packages
+#
+# topdepends[]  : index=pkgdir (math/scilab)
+#                 List of explicitly listed depencencies by name.
+#                 I.e.  "xless-[0-9]* pvm-3.4.3"
+#
+# alldepends[]  : index=pkgdir (math/scilab)
+#                 Flattened dependency list by name.
+#         
+#
+
+
+BEGIN {
+  debug = 0;
+	printf("Reading database file\n");
+}
+
+#conflicts /usr/pkgsrc/math/scilab
+#depends /usr/pkgsrc/math/scilab xless-[0-9]*:../../x11/xless pvm-3.4.3:../../parallel/pvm3
+#
+
+/^(build_)?depends / {
+#
+# Read in the entire depends tree
+# These lines look like:
+#
+#depends /usr/pkgsrc/math/scilab xless-[0-9]*:../../x11/xless pvm-3.4.3:../../parallel/pvm3
+#build_depends /usr/pkgsrc/math/scilab libtool-base>=1.4.20010614nb9:../../devel/libtool-base
+#
+	deptype=$1;
+#    pkg=fulldir2pkgdir($2);
+	pkg = $2;
+	if (pkg in topdepends) {}
+	else {topdepends[pkg] = "";}
+	if (pkg in topbuilddepends) {}
+	else {topbuilddepends[pkg] = "";}
+	
+	for (i = 3; i <= NF; i++) {
+		split($i, a,":");
+		pkgpat = a[1];
+		pkgdir = a[2];
+		sub(/[\.\/]*/, "", pkgdir);
+		if (pkgdir !~ /\//) {
+			pkgcat = pkg;
+			gsub(/\/.*/, "", pkgcat);
+			pkgdir=pkgcat "/" pkgdir;
+			if (debug)
+				printf("Corrected missing category directory to get \"%s\"\n",
+				       pkgdir);
+		}
+		if (debug){
+			printf("package in directory %s %s on:\n",
+			       pkg, deptype);
+			printf("\tpkgpat = %s\n", pkgpat);
+			printf("\tpkgdir = %s\n", pkgdir);
+		}
+		
+		
+#
+# store the package directory in a associative array with the wildcard
+# pattern as the index since we will need to be able to look this up later
+#
+		pat2dir[pkgpat] = pkgdir;
+		
+		if (deptype == "depends") {
+			topdepends[pkg] = topdepends[pkg] " " pkgpat " " ;
+			if (debug) {
+			  printf("Appending %s to topdepends[%s] (%s)\n",
+				 pkgpat, pkg, topdepends[pkg]);
+			}
+		}
+		else {
+			if (debug) {
+			  printf("Appending %s to topbuilddepends[%s] (%s)\n",
+				 pkgpat, pkg, topbuilddepends[pkg]);
+			}
+			topbuilddepends[pkg] = topbuilddepends[pkg] " " pkgpat " " ;
+		}
+	}
+	
+	next;
+}
+
+/^categories /{
+  dir = $2;
+	gsub(/^categories[ \t]*/, "");
+	tmp = substr($0, length($1) + 1);
+	gsub(/^[ \t]*/, "", tmp);
+	categories[dir] = tmp;
+	next;
+}
+
+/^comment /{
+	dir = $2;
+	gsub(/^comment[ \t]*/, "");
+	tmp = substr($0, length($1) + 1);
+	gsub(/^[ \t]*/, "", tmp);
+	comment[dir] = tmp;
+	next;
+}
+
+/^descr /{
+	descr[$2] = $3;
+	next;
+}
+
+/^index / {
+#
+# read lines like:
+#index /usr/pkgsrc/math/scilab scilab-2.6nb3
+# and store the directory name in a associative array where the index
+# is the package name and in a associative array that lets us lookup
+# name from directory.  We use fuldir2pkgdir to get "math/scilab"
+# and drop the /usr/pkgsrc part.
+#
+#	pkgname2dir[$3] = fulldir2pkgdir($2);
+#	pkgdir2name[fulldir2pkgdir($2)] = $3;
+	pkgname2dir[$3] = $2;
+	pkgdir2name[$2] = $3;
+	next;
+}
+
+/^license /{
+	license[$2] = $3;
+	next;
+}
+
+/^maintainer /{
+	maintainer[$2] = $3;
+	next;
+}
+
+/^notfor /{
+        dir = $2;
+	gsub(/^notfor[ \t]*/, "");
+	tmp = substr($0, length($1) + 1);
+	gsub(/^[ \t]*/, "", tmp);
+	notfor[dir] = tmp;
+	next;
+}
+
+/^onlyfor /{
+	dir = $2;
+	gsub(/^onlyfor[ \t]*/, "");
+	tmp = substr($0, length($1) + 1);
+	gsub(/^[ \t]*/, "", tmp);
+	onlyfor[dir] = tmp;
+	next;
+}
+
+/^prefix /{
+	prefix[$2] = $3;
+	next;
+}
+
+/^wildcard /{
+	wildcard[$2] = $3;
+}
+
+#
+# Now recurse the tree to give a flattened depends list for each pkg
+#
+
+END {
+	if( SORT == "" ) { SORT = "sort"; }
+        indexf = SORT " > INDEX";
+	if ( dependsfile == "" ) dependsfile = "/dev/null";
+	if ( builddependsfile == "" ) builddependsfile = "/dev/null";
+	
+	printf("Flattening dependencies\n");
+	printf("") > dependsfile;
+	for (toppkg in topdepends){
+		if (debug) printf("calling find_all_depends(%s, run)\n", toppkg);
+		find_all_depends(toppkg, "run");
+		if (debug) printf("%s depends on: %s, topdepends on %s\n",
+				  toppkg, alldepends[toppkg],
+				  topdepends[toppkg]);
+		printf("%s depends on: %s\n",
+		       toppkg, alldepends[toppkg]) >> dependsfile;
+		flatdepends[toppkg] = alldepends[toppkg];
+	}
+	close(dependsfile);
+	
+	
+# clear out the flattened depends list and repeat for the build depends
+	for( pkg in alldepends) {
+		delete alldepends[pkg];
+	}
+	
+	printf("Flattening build dependencies\n");
+	printf("") > builddependsfile;
+	for (toppkg in topbuilddepends){
+		find_all_depends(toppkg, "build");
+		printf("%s build_depends on: %s\n",
+		       toppkg, alldepends[toppkg]) >> builddependsfile;
+	}
+	close(builddependsfile);
+	
+	printf("Generating INDEX file\n");
+	
+# Output format:       
+#  package-name|package-path|installation-prefix|comment| \
+#  description-file|maintainer|categories|build deps|run deps|for arch| \
+#  not for opsys
+	
+	pkgcnt = 0;
+	for (toppkg in topdepends){
+		pkgcnt++;
+		pkgdir = PKGSRCDIR "/" toppkg;
+		printf("%s|", pkgdir2name[toppkg]) | indexf;
+		printf("%s|", pkgdir) | indexf;
+		printf("%s|", prefix[toppkg]) | indexf;
+		printf("%s|", comment[toppkg]) | indexf;
+		printf("%s|", descr[toppkg]) | indexf;
+		printf("%s|", maintainer[toppkg]) | indexf;
+		printf("%s|", categories[toppkg]) | indexf;
+		gsub(/^ /, "", alldepends[toppkg]);
+		gsub(/ $/, "", alldepends[toppkg]);
+		printf("%s|", alldepends[toppkg]) | indexf;
+		gsub(/^ /, "", flatdepends[toppkg]);
+		gsub(/ $/, "", flatdepends[toppkg]);
+		printf("%s|", flatdepends[toppkg]) | indexf;
+		printf("%s|", onlyfor[toppkg]) | indexf;
+		printf("%s", notfor[toppkg]) | indexf;
+		printf("\n") | indexf;
+	}
+	close(indexf);
+	printf("Indexed %d packages\n", pkgcnt);
+	exit 0;
+}
+
+function find_all_depends(pkg, type, pkgreg, i, deps, depdir, topdep){
+# pkg is the package directory, like math/scilab
+
+#    printf("find_all_depends(%s, %s)\n", pkg, type);
+# if we find the package already has been fully depended
+# then return the depends list
+	if (pkg in alldepends){
+		if (debug) printf("\t%s is allready depended.  Returning %s\n",
+				  pkg, alldepends[pkg]);
+		return(alldepends[pkg]);
+	}
+
+# if this package has no top dependencies, enter an empty flat dependency
+# list for it.
+	if( type == "run" ) {
+# we only want DEPENDS
+		topdep = topdepends[pkg];
+	} else {
+# we want BUILD_DEPENDS and DEPENDS
+		topdep = topdepends[pkg] " " topbuilddepends[pkg];
+	}
+	if (topdep ~ "^[ \t]*$") {
+		alldepends[pkg] = " ";
+		if (debug) printf("\t%s has no depends(%s).  Returning %s\n",
+				  pkg, topdep, alldepends[pkg]);
+		return(alldepends[pkg]);
+	}
+   
+# recursively gather depends that each of the depends has
+	pkgreg = reg2str(pkg);
+	split(topdep, deps);
+	i = 1;
+	alldepends[pkg] = " ";
+	while ( i in deps ) {
+
+# figure out the directory name associated with the package hame
+# in (wild card/dewey) version form
+		depdir = pat2dir[deps[i]];
+		if (debug) printf("\tadding dependency #%d on \"%s\" (%s)\n",
+				  i, deps[i], depdir);
+
+# do not add ourselves to the list (should not happen, but
+# we would like to not get stuck in a loop if one exists)
+#		if (" "deps[i]" " !~ pkgreg){
+
+# if we do not already have this dependency (deps[i]) listed, then add
+# it.  However, we may have already added it because another package
+# we depend on may also have depended on
+# deps[i].
+		if (alldepends[pkg] !~ reg2str(deps[i])){
+		  alldepends[pkg] = alldepends[pkg] " " deps[i] " " find_all_depends(depdir, type);
+		}
+		else {
+		  if (debug) printf("\t%s is already listed in %s\n",
+				    deps[i], alldepends[pkg]);
+		}
+		
+		i = i + 1;
+	} # while i
+	
+	if (debug) printf("\tcalling uniq() on alldepends[%s] = %s\n",
+			  pkg, alldepends[pkg]);
+	alldepends[pkg] = uniq(alldepends[pkg]);
+	if (debug) printf("\tuniq() output alldepends[%s] = %s\n",
+			  pkg, alldepends[pkg]);
+	return(alldepends[pkg]);	
+}
+
+#
+# take a string which has special characters like '+' in it and
+# escape them.  Also put a space before and after since that's how
+# we'll distinguish things like gnome from gnome-libs
+#
+function reg2str(reg){
+	gsub(/\./, "\\\.", reg);
+	gsub(/\+/, "\\\+", reg);
+	gsub(/\*/, "\\\*", reg);
+	gsub(/\?/, "\\\?", reg);
+	gsub(/\[/, "\\\[", reg);
+	gsub(/\]/, "\\\]", reg);
+	reg = " "reg" ";
+	return(reg);
+}
+
+#
+# accepts a full path to a package directory, like "/usr/pkgsrc/math/scilab"
+# and returns just the last 2 directories, like "math/scilab"
+#
+function fulldir2pkgdir(d, i){
+	i = match(d, /\/[^\/]+\/[^\/]+$/);
+	return substr(d, i + 1);
+}
+
+#
+# take the depends lists and uniq them.
+#
+function uniq(list, deps, i, ulist){
+   
+# split out the depends
+	split(list, deps);
+   
+	i = 1;
+	ulist = " ";
+	while (i in deps){
+#	printf("uniq():  Checking \"%s\"\n", ulist);
+#	printf("         for \"%s\"\n", reg2str(deps[i]));
+		if (ulist !~reg2str(deps[i])){
+			ulist = ulist deps[i]" ";
+		}
+		i++;
+	}
+	return(ulist);
+}
+
+
+