Step By Step KN Build

This section contains instruction if you wish to run each step of the Knowledge Network Build Pipeline separately. It makes heavy use of the environmental variables specified at the beginning.

Set environment variables

KNP_CHRONOS_URL='127.0.0.1:8888'
KNP_BUILD_NAME='hsap-all'
KNP_CODE_DIR="/kn_builder/code/"
KNP_WORKING_DIR='./'
KNP_STORAGE_DIR="$KNP_WORKING_DIR"
KNP_DB_DIR="$KNP_WORKING_DIR"
KNP_DATA_PATH='kn-data-'$KNP_BUILD_NAME
KNP_LOGS_PATH='kn-logs-'$KNP_BUILD_NAME
KNP_ENS_SPECIES='homo_sapiens'

KNP_EXPORT_DIR="$KNP_WORKING_DIR/kn-final-$KNP_BUILD_NAME"
KNP_MARATHON_URL='127.0.0.1:8080/v2/apps'

export KNP_MYSQL_HOST='127.0.0.1'
export KNP_MYSQL_PORT='3306'
export KNP_MYSQL_PASS='KnowEnG'
export KNP_MYSQL_USER='root'
export KNP_MYSQL_DB='KnowNet'
KNP_MYSQL_DIR=$KNP_DB_DIR'/kn-mysql-'$KNP_MYSQL_PORT'-'$KNP_BUILD_NAME
KNP_MYSQL_CONF='build_conf/'
KNP_MYSQL_MEM='10000'
KNP_MYSQL_CPU='0.5'
KNP_MYSQL_CONSTRAINT_URL='127.0.0.1'

export KNP_REDIS_HOST='127.0.0.1'
export KNP_REDIS_PORT='6379'
export KNP_REDIS_PASS='KnowEnG'
KNP_REDIS_DIR=$KNP_DB_DIR'/kn-redis-'$KNP_REDIS_PORT'-'$KNP_BUILD_NAME
KNP_REDIS_MEM='8000'
KNP_REDIS_CPU='0.5'
KNP_REDIS_CONSTRAINT_URL='127.0.0.1'

Copy pipeline code

cd "$KNP_CODE_DIR"
git clone https://github.com/KnowEnG/KN_Builder.git
cd KN_Builder/

Clear any existing files

rm -r $KNP_STORAGE_DIR/$KNP_LOGS_PATH/*
rm -r $KNP_STORAGE_DIR/$KNP_DATA_PATH/*
rm -r $KNP_STORAGE_DIR/$KNP_BUCKET/*

MySQL setup

Start MySQL database if it is not running

python3 src/code/mysql_utilities.py \
    -myh $KNP_MYSQL_HOST -myp $KNP_MYSQL_PORT \
    -mym $KNP_MYSQL_MEM -myc $KNP_MYSQL_CPU \
    -myd $KNP_MYSQL_DIR -mycf $KNP_MYSQL_CONF \
    -myps $KNP_MYSQL_PASS -myu $KNP_MYSQL_USER -mycu $KNP_MYSQL_CONSTRAINT_URL \
    -m $KNP_MARATHON_URL -wd $KNP_WORKING_DIR \
    -sd $KNP_STORAGE_DIR -dp $KNP_DATA_PATH -lp $KNP_LOGS_PATH

Empty MySQL database if it is running

mysql -h $KNP_MYSQL_HOST -u $KNP_MYSQL_USER -p$KNP_MYSQL_PASS \
        -P $KNP_MYSQL_PORT --execute "drop database KnowNet;"

Redis setup

Start Redis database if it is not running

python3 src/code/redis_utilities.py \
    -rh $KNP_REDIS_HOST -rp $KNP_REDIS_PORT \
    -rm $KNP_REDIS_MEM -rc $KNP_REDIS_CPU \
    -rd $KNP_REDIS_DIR -rps $KNP_REDIS_PASS -rcu $KNP_REDIS_CONSTRAINT_URL\
    -m $KNP_MARATHON_URL -wd $KNP_WORKING_DIR -lp $KNP_LOGS_PATH

Empty Redis database if it is running

redis-cli -h $KNP_REDIS_HOST -p $KNP_REDIS_PORT -a $KNP_REDIS_PASS FLUSHDB
redis-cli -h $KNP_REDIS_HOST -p $KNP_REDIS_PORT -a $KNP_REDIS_PASS BGREWRITEAOF

Clear the chronos queue

for c in $KNP_CHRONOS_URL ; do
    curl -L -X GET $c/scheduler/jobs | sed 's#,#\n#g' | sed 's#\[##g' | grep '"name"' | sed 's#{"name":"##g' | sed 's#"##g' > /tmp/t.txt
    for s in 'export-' 'import-' 'map-' 'table-' 'fetch-' 'check-' 'KN_starter'  ; do
        echo $s
        for i in `grep "$s" /tmp/t.txt  `; do
            CMD="curl -L -X DELETE $c/scheduler/job/$i";
            echo "$CMD";
            eval "$CMD";
        done;
    done;
done;

Check the status of jobs

python3 src/code/job_status.py -c $KNP_CHRONOS_URL

Run setup pipeline (time: 2hr 30min)

python3 src/code/workflow_utilities.py CHECK -su \
    -myh $KNP_MYSQL_HOST -myp $KNP_MYSQL_PORT \
    -myps $KNP_MYSQL_PASS -myu $KNP_MYSQL_USER \
    -rh $KNP_REDIS_HOST -rp $KNP_REDIS_PORT \
    -wd $KNP_WORKING_DIR -dp $KNP_DATA_PATH -lp $KNP_LOGS_PATH \
    -c $KNP_CHRONOS_URL \
    -sd $KNP_STORAGE_DIR -es $KNP_ENS_SPECIES

Run parse pipeline (time: 2hr)

python3 src/code/workflow_utilities.py CHECK \
    -myh $KNP_MYSQL_HOST -myp $KNP_MYSQL_PORT \
    -myps $KNP_MYSQL_PASS -myu $KNP_MYSQL_USER \
    -rh $KNP_REDIS_HOST -rp $KNP_REDIS_PORT \
    -wd $KNP_WORKING_DIR -dp $KNP_DATA_PATH -lp $KNP_LOGS_PATH \
    -c $KNP_CHRONOS_URL \
    -sd $KNP_STORAGE_DIR

Run import pipeline (time: 2hr 45min)

python3 src/code/workflow_utilities.py IMPORT \
    -myh $KNP_MYSQL_HOST -myp $KNP_MYSQL_PORT \
    -myps $KNP_MYSQL_PASS -myu $KNP_MYSQL_USER \
    -rh $KNP_REDIS_HOST -rp $KNP_REDIS_PORT \
    -wd $KNP_WORKING_DIR -dp $KNP_DATA_PATH -lp $KNP_LOGS_PATH \
    -c $KNP_CHRONOS_URL \
    -sd $KNP_STORAGE_DIR

Run export pipeline (time: 45 mins)

src/code/export1.sh
src/code/export2.sh

Check for errors

grep -ri -e failed -e error -e killed $KNP_LOGS_PATH/*

Export databases

mysqldump -h $KNP_MYSQL_HOST -u $KNP_MYSQL_USER -p$KNP_MYSQL_PASS -P $KNP_MYSQL_PORT $KNP_MYSQL_DB | gzip > $KNP_S3_DIR/mysql.gz
redis-cli -h $KNP_REDIS_HOST -p $KNP_REDIS_PORT -a $KNP_REDIS_PASS SAVE && mv $KNP_REDIS_DIR/dump.rdb $KNP_S3_DIR/dump.rdb

Import databases

mysql -h $KNP_MYSQL_HOST -u $KNP_MYSQL_USER -p$KNP_MYSQL_PASS -P $KNP_MYSQL_PORT -e "CREATE DATABASE KnowNet;"
gzip -dc $KNP_S3_DIR/mysql.gz | mysql -h $KNP_MYSQL_HOST -u $KNP_MYSQL_USER -p$KNP_MYSQL_PASS -P $KNP_MYSQL_PORT KnowNet

Create report of results

cp -r $KNP_WORKING_DIR/$KNP_DATA_PATH/id_map $KNP_STORAGE_DIR/$KNP_DATA_PATH/id_map
src/code/reports/enumerate_files.sh $KNP_STORAGE_DIR/$KNP_DATA_PATH COUNTS $KNP_MYSQL_HOST \
    $KNP_REDIS_HOST $KNP_MYSQL_PORT $KNP_REDIS_PORT > tests/KN03-KClus-build.$KNP_DATA_PATH.pipe
git add -f tests/KN03-KClus-build.$KNP_DATA_PATH.pipe
git commit -m 'adding result report'
git push