[game-data-packager] 01/02: spider: check only one game when argv[1] is provided

Alexandre Detiste detiste-guest at moszumanska.debian.org
Tue Sep 29 13:29:55 UTC 2015


This is an automated email from the git hooks/post-receive script.

detiste-guest pushed a commit to branch master
in repository game-data-packager.

commit 0b664f85fd5fd9af670689d71e7bd923a40ea427
Author: Alexandre Detiste <alexandre.detiste at gmail.com>
Date:   Tue Sep 29 15:28:16 2015 +0200

    spider: check only one game when argv[1] is provided
---
 tools/spider.py | 10 +++++++++-
 1 file changed, 9 insertions(+), 1 deletion(-)

diff --git a/tools/spider.py b/tools/spider.py
index e06093c..a966135 100755
--- a/tools/spider.py
+++ b/tools/spider.py
@@ -19,6 +19,7 @@
 # in per-engine-wiki pages
 # we don't rescan games we already have
 
+import sys
 import time
 import urllib.request
 from bs4 import BeautifulSoup
@@ -26,6 +27,11 @@ from game_data_packager import load_games
 
 CSV = 'data/wikipedia.csv'
 
+try:
+     todo = sys.argv[1]
+except IndexError:
+     todo = '*'
+
 urls = dict()
 with open(CSV, 'r', encoding='utf8') as f:
     for line in f.readlines():
@@ -38,16 +44,18 @@ with open(CSV, 'r', encoding='utf8') as f:
 def is_wikipedia(href):
     return href and "wikipedia" in href
 
-for shortname, game in load_games().items():
+for shortname, game in load_games(None, game=todo).items():
     if not game.wiki:
         continue
     if shortname in urls:
         continue
 
+    print('processing %s ...' % shortname)
     url = game.wikibase + game.wiki
     html = urllib.request.urlopen(url)
     soup = BeautifulSoup(html, 'lxml')
     for tag in soup.find_all(href=is_wikipedia):
+        print('  ' + tag['href'])
         urls[shortname] = tag['href']
 
     #break

-- 
Alioth's /usr/local/bin/git-commit-notice on /srv/git.debian.org/git/pkg-games/game-data-packager.git



More information about the Pkg-games-commits mailing list