wiki:PiwikImportScript

Version 3 (modified by chris, 3 years ago) (diff)

--

Following is the script we are using to import PuffinServer Nginx logs into Piwik, see PiwikServer#ServerLogAnalytics

#!/bin/bash

# date format used in log files names
DATE=$(date "+%Y-%m-%d")

# location of the latest log file
NEW_LOG="/home/puffin/nginx/puffin-nginx-$DATE.log"

LOGFILE="/home/puffin/nginx/puffin-nginx.log"

# check the log file exists and then fix it
# and import it to piwik
if [[ -f $NEW_LOG ]]; then

  # fix the issue with the proxy ip address being added to the first field
  cat $NEW_LOG | sed 's/, 127.0.0.1//' > $LOGFILE

  # import the fixed log file
  python /web/stats.transitionnetwork.org/piwik/misc/log-analytics/import_logs.py \
         --url=https://stats.transitionnetwork.org/ \
         --idsite=12 --enable-static --enable-bots --enable-http-errors --enable-http-redirect \
         --log-format-regex='"(?P<ip>\S+)" (?P<host>\S+) \[(?P<date>.*?) (?P<timezone>.*?)\] (?P<status>\S+) (?P<length>\S+) \S+ \S+ "(?P<referrer>.*?)" "(?P<user_agent>.*?)" \S+ "\S+"' \
         --recorders=8 $LOGFILE

fi

# delete the log file
rm $NEW_LOG