forked from DanielKinsman/twfy
-
Notifications
You must be signed in to change notification settings - Fork 0
/
weeklyupdate
executable file
·30 lines (21 loc) · 967 Bytes
/
weeklyupdate
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
#!/usr/bin/env bash
# Jobs that need running weekly
#set -x
source consts
source ../../shlib/deployfns
read_conf "../conf/general"
mkdir -p $RAWDATA/dumps
# Get new wikipedia titles database
rm -f $RAWDATA/dumps/all-titles-in-ns0.gz
rm -f $RAWDATA/dumps/all-titles-in-ns0
DUMPDATE=`fetch -q -o - http://download.wikimedia.org/backup-index.html | grep "enwiki/" | perl -pi.bak -e "s/.*(\d\d\d\d\d\d\d\d).*/\\\$1/;"`
#echo "Wikipedia dump date $DUMPDATE"
fetch -q -o $RAWDATA/dumps/all-titles-in-ns0.gz http://download.wikimedia.org/enwiki/$DUMPDATE/enwiki-$DUMPDATE-all-titles-in-ns0.gz
gunzip $RAWDATA/dumps/all-titles-in-ns0.gz
MYSQL="mysql -u $DB_USER --password=$DB_PASSWORD $DB_NAME"
echo "load data infile '$RAWDATA/dumps/all-titles-in-ns0' ignore into table titles;" | $MYSQL
cat wikipedia-exceptions | $MYSQL
# Compact Xapian database
./compactsearchdb
#Full database:
#http://download.wikimedia.org/enwiki/latest/enwiki-latest-pages-articles.xml.bz2