[Debtags-commits] [svn] r1883 - central-database/trunk/c-tdb

Erich Schubert erich at costa.debian.org
Wed Aug 30 23:58:46 UTC 2006


Author: erich
Date: Wed Aug 30 23:58:46 2006
New Revision: 1883

Modified:
   central-database/trunk/c-tdb/clean-data.cc
Log:
create 'magic' lists of non-empty tags and tagged packages as starting points for browsing

Modified: central-database/trunk/c-tdb/clean-data.cc
==============================================================================
--- central-database/trunk/c-tdb/clean-data.cc	(original)
+++ central-database/trunk/c-tdb/clean-data.cc	Wed Aug 30 23:58:46 2006
@@ -28,6 +28,8 @@
 	VocDB dbvoc(O_RDONLY);
 	MapDB mappt(MAP_ID_PKG_TAG_DB, O_RDWR);
 	MapDB maptp(MAP_ID_TAG_PKG_DB, O_RDWR);
+	mappt.lock(); // note that these global locks are maybe not reliable
+	maptp.lock(); // note that these global locks are maybe not reliable
 
 	collectids pkgids;
 	int count = etdb_traverse< dbstr, dbid, collectids >(dbpkg.name_to_id, pkgids);
@@ -38,6 +40,7 @@
 
 	/* build pkg ids, that don't have data */
 	set<dbid> missing_pkgs;
+	set<dbid> untagged_pkgs;
 	for (set<dbid>::iterator i = pkgids.ids.begin(); i != pkgids.ids.end(); i++)
 		if (! dbpkg.id_to_data.has_key(*i))
 			missing_pkgs.insert(*i);
@@ -46,13 +49,13 @@
 
 	/* build tag ids, that aren't in vocabulary */
 	set<dbid> missing_vocs;
+	set<dbid> empty_vocs;
 	for (set<dbid>::iterator i = vocids.ids.begin(); i != vocids.ids.end(); i++)
 		if (! dbvoc.id_to_data.has_key(*i))
 			missing_vocs.insert(*i);
 	cout << missing_vocs.size() << " tags are known, but I don't have data for them." << endl;
 	IdList missing_vocs_list(missing_vocs);
 
-
 	/* clean packages -> tag database */
 	int purged_from_ptdb = 0;
 	int cleaned_in_ptdb = 0;
@@ -67,14 +70,18 @@
 				unsigned int len = list.size();
 				list.subtract(missing_vocs_list);
 				if (list.size() != len) {
-					cleaned_entires_in_ptdb += (len-list.size());
+					cleaned_entries_in_ptdb += (len-list.size());
 					mappt.set(*i, list);
 					cleaned_in_ptdb++;
 				}
 			}
+		} else {
+			if (missing_pkgs.count(*i) == 0) {
+				untagged_pkgs.insert(*i);
+			}
 		}
 	cout << "Dropped " << purged_from_ptdb << " pkg -> tags entries with no matching package information." << endl;
-	cout << "Cleaned " << cleaned_in_ptdb << " pkg -> tags entries from unknown tags." << endl;
+	cout << "Cleaned " << cleaned_in_ptdb << " pkg -> tags entries (" << cleaned_entries_in_ptdb << " lines) from unknown tags." << endl;
 
 	/* clean tag -> packages database */
 	int purged_from_tpdb = 0;
@@ -90,14 +97,31 @@
 				unsigned int len = list.size();
 				list.subtract(missing_pkgs_list);
 				if (list.size() != len) {
-					cleaned_entires_in_tpdb += (len-list.size());
+					cleaned_entries_in_tpdb += (len-list.size());
 					maptp.set(*i, list);
 					cleaned_in_tpdb++;
 				}
 			}
+		} else {
+			if (missing_vocs.count(*i) == 0)
+				empty_vocs.insert(*i);
 		}
 	cout << "Dropped " << purged_from_tpdb << " tag -> pkgs entries with no matching tag information." << endl;
-	cout << "Cleaned " << cleaned_in_tpdb << " tag -> pkgs entries from unknown packages." << endl;
+	cout << "Cleaned " << cleaned_in_tpdb << " tag -> pkgs entries (" << cleaned_entries_in_tpdb << " lines) from unknown packages." << endl;
+
+	/* update "all packages" and "all tags" magic */
+	IdList all_pkgs_list(pkgids.ids);
+	all_pkgs_list.subtract(missing_pkgs_list);
+	IdList untagged_pkgs_list(untagged_pkgs);
+	all_pkgs_list.subtract(untagged_pkgs_list);
+	maptp.set(dbid(0), all_pkgs_list);
+	IdList all_vocs_list(vocids.ids);
+	all_vocs_list.subtract(missing_vocs_list);
+	IdList empty_vocs_list(empty_vocs);
+	all_vocs_list.subtract(empty_vocs_list);
+	mappt.set(dbid(0), all_vocs_list);
 
+	mappt.unlock(); // note that these global locks are maybe not reliable
+	maptp.unlock(); // note that these global locks are maybe not reliable
 	return(0);
 }



More information about the Debtags-commits mailing list