A forum for reverse engineering, OS internals and malware analysis 

Forum for announcements and questions about tools and software.
 #23557  by ViRii
 Fri Aug 08, 2014 7:02 pm
For those who use http://malc0de.com/database/ and/or http://vxvault.siri-urz.net/ViriList.php as malware samples provider, below scripts will download automatically how many samples you need from those . All you need is python 2.7

by default, samples are saved on c:\malware, but can be changed using "-d new location", also user agent used to download samples can be changed, nr of threads, etc
to list all available
Code: Select all
script.py -h
License:
- no warranty express or implied
- free to use if you don’t use-it to gain money

Malc0de downloader:
Image
Warning: Downloaded files can harm you computer.
Code: Select all
import re
import time
import urllib2
import hashlib
import os
import random
import Queue
import threading
import argparse
 
print """
Malc0de.com samples downloader v3.3
               )\._.,--....,'``.       
  .b--.        /;   _.. \   _\  (`._ ,. 
 `=,-,-'~~~   `----(,_..'--(,_..'`-.;.'
http://virii.tk    http://twitter.com/ViRiiTk
"""
 
parser = argparse.ArgumentParser(description="Malc0de.com samples downloader v3.3")
 
parser.add_argument("nr_samples", type=int,
                    help= "Number of samples you want to download")
 
parser.add_argument("-t", "--nr_threads", metavar="threads", type=int, default=200, 
                    help= "Threads number (Default: 200)")
 
parser.add_argument("-a", "--agent", default="Mozilla/5.0 (Windows NT 5.1; rv:31.0) Gecko/20100101 Firefox/31.0",
                    help= "User Agent used to download samples")
 
parser.add_argument("-d", "--dldfolder", default = "C:\malware\\",
                    help= "Local folder to download samples (Default: C:\malware\\ )")
 
parser.add_argument("-i", "--info", default = "_files.txt",
                    help = "file to store info about downloaded samples (Default: _files.txt)")
 
parser.add_argument("-e", "--error", default = "_errors.txt",
                    help = "file to store errors (Default: _errors.txt)")
 
parser.add_argument("-u", "--malurl", default = "_mal_url.txt",
                    help = "file to store malware urls (Default: _mal_url.txt)")
args = parser.parse_args()
 
#create download folder if not exist
if not os.path.isdir(args.dldfolder):
    os.mkdir(args.dldfolder)
 
#limit the number of download samples
if args.nr_samples > 10000:
    print "You need very Very VERY many samples, 5k is enough for you"
    args.nr_samples = 4999   
 
if args.nr_threads >= args.nr_samples:
    args.nr_threads = args.nr_samples
     
print "Try to download latest %i samples" %(args.nr_samples)
print "Threads: %i" %(args.nr_threads) 
print "Malware samples will be downloaded to %s" %(args.dldfolder), "\n"
 
#construct user agents
dldagent = {'User-Agent' : args.agent}
useragent = { 'User-Agent' : 'Malc0de.com samples downloader v3.3 http://ViRii.Tk'}
 
#queue
q = Queue.Queue()
 
#generate random string
def get_random_word(a):
    word = ''
    for i in range(a):
        word += random.choice('ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789')
    return word
 
#md5 file
def md5Checksum(filePath):
    fh = open(filePath, 'rb')
    m = hashlib.md5()
    while True:
        data = fh.read(8192)
        if not data:
            break
        m.update(data)
    return m.hexdigest()
 
#nr paginilor ce trebuie vizitate
counter = 0
if args.nr_samples % 50 == 0:
    pages = args.nr_samples / 50
else :
    pages = (args.nr_samples / 50) + 1
 
#find all malware address on curent page
def getmalware(pagina):
    global counter
    b = re.findall("<td>[\d]{4}-[\d]{2}-[\d]{2}<\/td>\n.+\n", pagina)
    if b:
        for i in b:
            data = re.search("<td>([\d]{4}-[\d]{2}-[\d]{2})<\/td>", i)
            malware = re.search("\t<td>(.+)<\/td>", i)
            if data and malware:
                malware= re.sub("<br\/>", "",malware.group(1) )
                #print data.group(1), malware
                if counter >= args.nr_samples:
                    return
                else:
                    q.put(malware) 
                    counter += 1
#browsing pages
print "Browsing pages:"
for i in range(1, pages + 1):
     
    adresa = "http://malc0de.com/database/?&page=" + str(i)
    print "Searching on:", adresa
    time.sleep(3) # pauza intre pagini (s)
     
    try:
        req = urllib2.Request(adresa, None, useragent)
        response = urllib2.urlopen(req)
        continut = response.read()
        getmalware(continut)    
    except Exception as e:
        print e
        pass
     
def dld_mal(url_mal):
    #downloading malware samples
         
    #write address of this sample
    with open(args.dldfolder + args.malurl, "a") as handle:
        handle.write(url_mal + "\n")
        handle.close()
         
    #get file name    
    file_name = url_mal.split("/")[-1]
     
    #remove bad characters from file name
    if len(file_name)==0 or re.search("\?", file_name) or re.search("\&", file_name):
        file_name =  "No_name" + str(get_random_word(8))
         
    #try to download sample   
    try:
        #check if url start with "http://
        if url_mal[:7] != "http://":
            url_mal = "http://" + url_mal
         
        #construct url and set timeout
        url_construct = urllib2.Request(url_mal, None, dldagent)
        u = urllib2.urlopen(url_construct, timeout = 59) #timeout 1 min
         
        # every downloaded malware will have a uniq name: "Malware_sample" + "_" + 3 random characters
        f_name = args.dldfolder + str(file_name) +"_" + get_random_word(3) 
         
        #write to file
        f = open(f_name, 'wb')
        block_sz = 8192
        while True:
            buffer = u.read(block_sz)
            if not buffer:
                break
            f.write(buffer)
        f.close()
         
        #write info to _files.txt
        with open(args.dldfolder + args.info, "a") as handle:
            md5hash = md5Checksum(f_name)
            handle.write(str(md5Checksum(f_name)) +"\t" + str(file_name)+ "\t" + url_mal + "\n")
            handle.close
             
        print "\n" + "Am descarcat: " + file_name,
    except Exception as e:
        #adding error to _errors.txt
        with open(args.dldfolder + args.error, "a") as handle:
            handle.write(url_mal + "\t" + str(e) + "\n")
            handle.close()
        pass
 
print "Downloading:",
def worker():
    while True:
        if not q.empty():
            item = q.get()
            dld_mal(item)
            q.task_done()
 
for i in range(args.nr_threads):
    t = threading.Thread(target=worker)
    t.daemon = True
    t.start()
 
q.join()
exit()
Vx Vault downloader:
Image
Warning: Downloaded files can harm you computer.
Code: Select all
import os
import urllib2
import hashlib
import argparse
import random
import re
import Queue
import threading
 
print """
       Vx Vault samples downloader    
      _.---.._             _.---...__
   .-'   /\   \          .'  /\     /
   `.   (  )   \        /   (  )   /
     `.  \/   .'\      /`.   \/  .'
       ``---''   )    (   ``---''
               .';.--.;`.
             .' /_...._\ `.
           .'   `.a  a.'   `.
          (        \/        )
           `.___..-'`-..___.'
              \   v1.3   /
               `-.____.-'
http://virii.tk    http://twitter.com/ViRiiTk
"""
 
parser = argparse.ArgumentParser(description="Vx Vault samples downloader v1.3")
 
parser.add_argument("nr_samples", type=int,
                    help= "How many samples you want to download")
 
parser.add_argument("-t", "--nr_threads", metavar="threads", type=int, default=200, 
                    help= "Threads number (Default: 200)")
parser.add_argument("-a", "--agent", default="Mozilla/5.0 (Windows NT 5.1; rv:31.0) Gecko/20100101 Firefox/31.0",
                    help= "User Agent used to download samples")
parser.add_argument("-d", "--dldfolder", default = "C:\malware\\",
                    help= "Local folder to download samples (Default: C:\malware\\ )")
parser.add_argument("-i", "--info", default = "_files.txt",
                    help = "file to store info about downloaded samples (Default: _files.txt)")
parser.add_argument("-e", "--error", default = "_errors.txt",
                    help = "file to store errors (Default: _errors.txt)")
parser.add_argument("-u", "--malurl", default = "_mal_url.txt",
                    help = "file to store malware urls (Default: _mal_url.txt)")
args = parser.parse_args()
 
#limit the number of download samples
if args.nr_samples > 10000:
    print "You need very Very VERY many samples, 5k is enough for you"
    args.nr_samples = 4999
 
#create download folder if not exist
if not os.path.isdir(args.dldfolder):
    os.mkdir(args.dldfolder)
     
print "Malware samples will be downloaded to %s" %(args.dldfolder)
print "Try to download latest %i samples" %(args.nr_samples)
print "Threads: %i" %(args.nr_threads), "\n"
 
#construct user agents
dldagent = {'User-Agent' : args.agent}
useragent = {'User-Agent' : "Samples downloader v1.3 http://ViRii.Tk"}
 
#generate random string
def get_random_word(a):
    word = ''
    for i in range(a):
        word += random.choice('ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz')
    return word
 
#md5 file
def md5Checksum(filePath):
    fh = open(filePath, 'rb')
    m = hashlib.md5()
    while True:
        data = fh.read(8192)
        if not data:
            break
        m.update(data)
    return m.hexdigest()
 
q = Queue.Queue()
 
adresa = "http://vxvault.siri-urz.net/ViriList.php?s=0&m=" + str(args.nr_samples)
 
try:
    req = urllib2.Request(adresa, None, useragent)
    continut_pagina = urllib2.urlopen(req).read()
    #print continut_pagina
except Exception as e:
    exit(e)
 
#find all malware address
pagina = continut_pagina.split("\r")
for i in pagina:
    match = re.search("href='ViriFiche\.php\?ID=[\d]+'>(.+)</a></TD>", i)
    if match:
        temp_mal_address = match.group(1)
        if not re.search("[\d]{1,2}-[\d]{1,2}", temp_mal_address):
            #print temp_mal_address
             
            #add malware address in  download queue
            q.put(temp_mal_address)
             
#downloading malware samples
def dld_mal(url_mal):
     
    #write in "_mal_url.txt" address of this sample
    with open(args.dldfolder + args.malurl, "a") as handle:
        handle.write(url_mal + "\n")
        handle.close()
         
    #get file name    
    file_name = url_mal.split("/")[-1]
     
    #remove bad characters from file name
    if len(file_name)==0 or re.search("\?", file_name) or re.search("\&", file_name):
        file_name =  "No_name" + str(get_random_word(8))
         
    #try to download sample   
    try:
        #check if url start with "http://
        if url_mal[:7] != "http://":
            url_mal = "http://" + url_mal
         
        #construct url and set timeout
        url_construct = urllib2.Request(url_mal, None, dldagent)
        u = urllib2.urlopen(url_construct, timeout = 59) #timeout 1 min
         
        # every downloaded malware will have a uniq name: "Malware_sample" + "_" + 3 random characters
        f_name = args.dldfolder + str(file_name) +"_" + get_random_word(3) 
         
        #write to file
        f = open(f_name, 'wb')
        block_sz = 8192
        while True:
            buffer = u.read(block_sz)
            if not buffer:
                break
            f.write(buffer)
        f.close()
         
        #write info to _files.txt
        with open(args.dldfolder + args.info, "a") as handle:
            md5hash = md5Checksum(f_name)
            handle.write(str(md5Checksum(f_name)) +"\t" + str(file_name)+ "\t" + url_mal + "\n")
            handle.close
             
        print "\n" + "Am descarcat: " + file_name,
         
    except Exception as e:
        #adding error to _errors.txt
        with open(args.dldfolder + args.error, "a") as handle:
            handle.write(url_mal + "\t" + str(e) + "\n")
            handle.close()
        pass
 
#creating download threads
print "Downloading:",
 
def worker():
    while True:
        if not q.empty():
            item = q.get()
            dld_mal(item)
            q.task_done()
 
for i in range(args.nr_threads):
    t = threading.Thread(target=worker)
    t.daemon = True
    t.start()
 
q.join()
exit()
Source:
http://virii.tk/malc0de-com-samples-downloader-v3-3/
http://virii.tk/vx-vault-samples-downloader-v1-3/
 #24314  by ViRii
 Tue Nov 11, 2014 5:18 pm
Malware sample downloader IV
Image
Code: Select all
#!/usr/bin/env python
 
##### License:
# - no warranty express or implied
# - free to use if you don't use-it to gain money
#
##### Warning:
# - downloaded files may harm your computer.
#
##### Config:
# If want to use proxy option:
#   - create a file named "banned_country.txt" and put there banned country name, one per line 
#   - create a file named "proxy.txt" and put there your proxyes, ip:port, one per line 
#
##### Usage examples:
# - download 100 samples:
# python this_scrypt.py 100
# - download 100 samples, using 55 threads:
# python this_scrypt.py 100 -t 55
# - download 100 samples using proxy
# python this_scrypt.py 100 -p proxy.txt
# 
 
import re
import urllib2
import hashlib
import os
import random
import Queue
import threading
import argparse
import time
 
print """
Malc0de.com Malware sample downloader IV 
               )\._.,--....,'``.       
  .b--.        /;   _.. \   _\  (`._ ,. 
 `=,-,-'~~~   `----(,_..'--(,_..'`-.;.'
http://virii.tk    http://twitter.com/ViRiiTk
"""
 
parser = argparse.ArgumentParser(description="Malc0de.com Malware sample downloader IV")
 
parser.add_argument("nr_samples", type=int,
                    help= "Number of samples you want to download")
 
parser.add_argument("-t", "--nr_threads", metavar="threads", type=int, default=200, 
                    help= "Threads number (Default: 200)")
 
parser.add_argument("-a", "--agent", default="Mozilla/5.0 (Windows NT 6.3; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/37.0.2049.0 Safari/537.36",
                    help= "User Agent used to download samples")
 
parser.add_argument("-d", "--dldfolder", default = "C:\malware\\",
                    help= "Local folder to download samples (Default: C:\malware\\ )")
 
parser.add_argument("-i", "--info", default = "_files.txt",
                    help = "file to store info about downloaded samples (Default: _files.txt)")
 
parser.add_argument("-e", "--error", default = "_errors.txt",
                    help = "file to store errors (Default: _errors.txt)")
 
parser.add_argument("-u", "--malurl", default = "_mal_url.txt",
                    help = "file to store malware urls (Default: _mal_url.txt)")
 
parser.add_argument("-p", "--proxy", 
                    help = """use proxy to get malware urls (proxy.txt)
                    Ex:
                        127.0.0.1:80
                        127.0.0.2:80
                        ...""")
args = parser.parse_args()
 
# user agents
dldagent  = {'User-Agent' : args.agent}
useragent = {'User-Agent' : 'Malc0de.com Malware sample downloader IV, more info on: http://ViRii.Tk'}
 
# create download folder if not exist
if not os.path.isdir(args.dldfolder):
    os.mkdir(args.dldfolder)
 
# remove sample nr errors
if args.nr_samples < 0:
    print "You want to download %i ?? I can't do that" %(args.nr_samples)
    exit()
 
# limit the number of download samples
if args.nr_samples > 10000:
    print "You need very Very VERY many samples, 5k is enough for you"
    args.nr_samples = 4999   
 
# remove useless threads
if args.nr_threads >= args.nr_samples:
    args.nr_threads = args.nr_samples
     
print "Try to download latest %i samples" %(args.nr_samples)
print "Threads: %i" %(args.nr_threads) 
print "Malware samples will be downloaded to %s" %(args.dldfolder), "\n"
 
# remove proxy from banned country
banned = []    
proxylist =[]
 
# exit if proxy option is selected and file not found
if args.proxy and not os.path.isfile(args.proxy):
    exit("Option proxy: %s not found" % (args.proxy))
     
if args.proxy and os.path.isfile(args.proxy):
     
    # load banned country list
    with open("banned_country.txt", "r") as handle:
        for country in handle.read().split("\n"):
            banned.append(country.strip())
                 
    # get proxy from proxy.txt        
    listaproxytemp = open(args.proxy, "r").read()
    listaproxytemp = re.findall("[\d]{1,3}\.[\d]{1,3}\.[\d]{1,3}.[\d]{1,3}:[\d]{2,5}", listaproxytemp)
    listaproxytemp = list(set(listaproxytemp))
        
    # test proxy 
    print "Testing proxy: %d" %(len(listaproxytemp))
    url = 'http://www.geoips.com/en/geolocation'
    for p in listaproxytemp:
        try:
            proxy = urllib2.ProxyHandler({'http': p})
            opener = urllib2.build_opener(proxy)
            tester = opener.open(url ,timeout = 2)
               
            country = re.search("\<strong\>Country:\<\/strong\>([a-z A-Z ]{1,30})",tester.read())
            if country:
                if str(country.group(1))[1:] not in banned:
                    print str(p) + "\t" + str(country.group(1))[1:]
                    proxylist.append(p)
        except :
            pass
        print "Alive proxy: %d" %(len(proxylist))
    with open ("good_proxy.txt" , "a") as good_p:
        good_p.write("--->" + time.strftime("%c") + "<---\n")
        for w_p in proxylist:
            good_p.write(w_p + "\n")
 
# exit if no working proxy was found
if args.proxy and (len(proxylist) == 0):
    exit("Working proxy: None")
     
# queue
q = Queue.Queue()
 
# generate random string
def get_random_word(a):
    word = ''
    for i in range(a):
        word += random.choice('ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789')
    return word
 
# md5 file
def md5Checksum(filePath):
    fh = open(filePath, 'rb')
    m = hashlib.md5()
    while True:
        data = fh.read(8192)
        if not data:
            break
        m.update(data)
    return m.hexdigest()
 
# nr paginilor ce trebuie vizitate
counter = 0
if args.nr_samples % 50 == 0:
    pages = args.nr_samples / 50
else :
    pages = (args.nr_samples / 50) + 1
 
# find all malware address on curent page
def getmalware(pagina):
    global counter
    b = re.findall("<td>[\d]{4}-[\d]{2}-[\d]{2}<\/td>\n.+\n", pagina)
    if b:
        for i in b:
            data = re.search("<td>([\d]{4}-[\d]{2}-[\d]{2})<\/td>", i)
            malware = re.search("\t<td>(.+)<\/td>", i)
            if data and malware:
                malware= re.sub("<br\/>", "",malware.group(1) )
                #print data.group(1), malware
                if counter >= args.nr_samples:
                    return
                else:
                    q.put(malware) 
                    counter += 1
                     
#browsing pages
print "Browsing pages:"
for i in range(1, pages + 1):
    adresa = "http://malc0de.com/database/?&page=" + str(i)
    print "Searching on:", adresa,
 
    try:
        if len(proxylist) > 0:
             
            # choose proxy
            p = random.choice(proxylist)
            print p
            proxy = urllib2.ProxyHandler({'http': p})        
            opener = urllib2.build_opener(proxy)
            urllib2.install_opener(opener)
         
        # set useragent
        req = urllib2.Request(adresa, None, useragent)
         
        # access malc0de 
        continut = urllib2.urlopen(req, timeout =  30).read()
         
        # extract sample url's
        getmalware(continut)
         
    except Exception as e:
        print str(e) + "\t maybe your ip is banned or proxy(if use) not work"
        pass
     
# download malware samples
def dld_mal(url_mal):
     
    # write address of this sample
    with open(args.dldfolder + args.malurl, "a") as handle:
        handle.write(url_mal + "\n")
        handle.close()
    url_mal = re.sub(" ", "%20", url_mal)
     
    #get file name    
    file_name = url_mal.split("/")[-1]
     
    # remove bad characters from file name
    if len(file_name)==0 or re.search("\?", file_name) or re.search("\&", file_name):
        file_name =  "No_name" + str(get_random_word(8))
     
    # try to download sample   
    try:
        # check if url start with "http://
        if url_mal[:7] != "http://":
            url_mal = "http://" + url_mal
         
        if len(proxylist) >0 :
             
            # choose proxy
            p = random.choice(proxylist)
            proxy = urllib2.ProxyHandler({'http': p})        
            opener = urllib2.build_opener(proxy)
            urllib2.install_opener(opener)
         
        # set download useragent
        req = urllib2.Request(url_mal, None, dldagent)
        u = urllib2.urlopen(req, timeout =  137) #timeout
         
        # make every filename uniq: "Malware_original_filename" + "_" + 3 random characters
        f_name = args.dldfolder + str(file_name) +"_" + get_random_word(3) 
         
        # write to file
        f = open(f_name, 'wb')
        block_sz = 8192
        while True:
            buffer = u.read(block_sz)
            if not buffer:
                break
            f.write(buffer)
        f.close()
         
        # write info to _files.txt
        with open(args.dldfolder + args.info, "a") as handle:
            md5hash = md5Checksum(f_name)
            handle.write(str(md5Checksum(f_name)) +"\t" + str(file_name)+ "\t" + url_mal + "\n")
            handle.close
         
        print "\n" + "Am descarcat: " + file_name,
         
    except Exception as e:
        # adding error to _errors.txt
        with open(args.dldfolder + args.error, "a") as handle:
            handle.write(url_mal + "\t" + str(e) + "\n")
            handle.close()
        pass
 
# get malware address from queue and download files
print "Downloading:",
def worker():
    while True:
        if not q.empty():
            try:
                item = q.get()
                dld_mal(item)
                q.task_done()
            except Exception as e:
                print e
 
# threads number limit            
for i in range(args.nr_threads):
    t = threading.Thread(target=worker)
    t.daemon = True
    t.start()
 
q.join()
exit()
if have problem leave feedback here: http://virii.tk/malware-sample-downloader-iv/ or here: https://twitter.com/ViRiiTk
 #24380  by ViRii
 Tue Nov 18, 2014 12:25 pm
below script will download all/or how many you want samples from http://malwareurls.joxeankoret.com/normal.txt

Image
python 2.7
Code: Select all
#!/usr/bin/env python

##### Script License:
# - no warranty express or implied
# - free to use if you don't use-it to gain money
#
##### Warning:
# - downloaded files may harm your computer.
#
##### Usage examples:
# - download 100 samples:
# python this_scrypt.py 100
# - download 100 samples, using 50 threads:
# python this_scrypt.py 100 -t 50
# - download all samples listed on http://malwareurls.joxeankoret.com/normal.txt
# python this_script.py 0

import re
import urllib2
import hashlib
import os
import random
import Queue
import threading
import argparse

print r"""
JoxeanKoret.com Malware sample downloader 
        __.,,------.._
     ,'"   _      _   "`.
    /.__, ._  -=- _"`    Y
   (.____.-.`      ""`   j
    VvvvvvV`.Y,.    _.,-'       ,     ,     ,
        Y    ||,   '"\         ,/    ,/    ./
        |   ,'  ,     `-..,'_,'/___,'/   ,'/   ,
   ..  ,;,,',-'"\,'  ,  .     '     ' ""' '--,/    .. ..
 ,'. `.`---'     `, /  , Y -=-    ,'   ,   ,. .`-..||_|| ..
ff\\`. `._        /f ,'j j , ,' ,   , f ,  \=\ Y   || ||`||_..
l` \` `.`."`-..,-' j  /./ /, , / , / /l \   \=\l   || `' || ||...
 `  `   `-._ `-.,-/ ,' /`"/-/-/-/-"'''"`.`.  `'.\--`'--..`'_`' || ,
            "`-_,',  ,'  f    ,   /      `._    ``._     ,  `-.`'//         ,
          ,-"'' _.,-'    l_,-'_,,'          "`-._ . "`. /|     `.'\ ,       |
        ,',.,-'"          \=) ,`-.         ,    `-'._`.V |       \ // .. . /j
        |f\\               `._ )-."`.     /|         `.| |        `.`-||-\\/
        l` \`                 "`._   "`--' j          j' j          `-`---'
         `  `                     "`_,-','/       ,-'"  /
                                 ,'",__,-'       /,, ,-'
                                 Vvv'            VVv'
http://virii.tk                                    http://twitter.com/ViRiiTk
"""
# credits for alien: http://www.chris.com/ascii/index.php?art=creatures/aliens

parser = argparse.ArgumentParser(description="JoxeanKoret.com Malware sample downloader ")

parser.add_argument("nr_samples", type=int, default = 0,
                    help= "Number of samples you want to download, 0 = all")

parser.add_argument("-t", "--nr_threads", metavar="threads", type=int, default=200, 
                    help= "Threads number (Default: 200)")

parser.add_argument("-a", "--agent", default="Mozilla/5.0 (Windows NT 6.3; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/37.0.2049.0 Safari/537.36",
                    help= "User Agent used to download samples")

parser.add_argument("-d", "--dldfolder", default = "C:\malware\\",
                    help= "Local folder to download samples (Default: C:\malware\\ )")

parser.add_argument("-i", "--info", default = "_files.txt",
                    help = "file to store info about downloaded samples (Default: _files.txt)")

parser.add_argument("-e", "--error", default = "_errors.txt",
                    help = "file to store errors (Default: _errors.txt)")

parser.add_argument("-u", "--malurl", default = "_mal_url.txt",
                    help = "file to store malware urls (Default: _mal_url.txt)")

args = parser.parse_args()

useragent = {'User-Agent' : 'JoxeanKoret.com samples downloader, more info on: http://ViRii.Tk'}
dldagent  = {'User-Agent' : args.agent}

# create download folder if not exist
if not os.path.isdir(args.dldfolder):
    os.mkdir(args.dldfolder)

# remove sample nr errors
if args.nr_samples < 0:
    print "You want to download %i ?? I can't do that" %(args.nr_samples)
    exit()
    
# remove useless threads
if args.nr_threads >= args.nr_samples and args.nr_samples !=0 :
    args.nr_threads = args.nr_samples    
    
print "Try to download %s samples" %("all" if args.nr_samples == 0 else str(args.nr_samples))
print "Threads: %i" %(args.nr_threads) 
print "Malware samples will be downloaded to %s" %(args.dldfolder), "\n"

# queue
q = Queue.Queue()

# generate random string
def get_random_word(a):
    word = ''
    for i in range(a):
        word += random.choice('ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789')
    return word

# md5 file
def md5Checksum(filePath):
    fh = open(filePath, 'rb')
    m = hashlib.md5()
    while True:
        data = fh.read(8192)
        if not data:
            break
        m.update(data)
    return m.hexdigest()


# get samples address
adresa = "http://malwareurls.joxeankoret.com/normal.txt"      
# set useragent

req = urllib2.Request(adresa, None, useragent)

# access  
continut = urllib2.urlopen(req, timeout =  60).read()
    
continut = continut.split("\n")

if args.nr_samples != 0:
    for malware in continut[18: 18 + args.nr_samples ]:
        q.put(malware)
else:
    for malware in continut[18:]:
        q.put(malware)

#print malware colected date
print continut[1]
#print total number of samples
print continut[2]

# download malware samples
def dld_mal(url_mal):
    
    # write address of this sample on _mal_url.txt
    with open(args.dldfolder + args.malurl, "a") as handle:
        handle.write(url_mal + "\n")
        handle.close()
    url_mal = re.sub(" ", "%20", url_mal)
    
    #get file name    
    file_name = url_mal.split("/")[-1]
    
    # remove bad characters from file name
    if len(file_name)==0 or re.search("\?", file_name) or re.search("\&", file_name) or len(file_name) >32 :
        file_name =  "No_name" + str(get_random_word(8))
        
    # try to download sample   
    try:

        # check if url start with "http://
        if url_mal[:7] != "http://":
            url_mal = "http://" + url_mal

        #construct url and set timeout
        url_construct = urllib2.Request(url_mal, None, dldagent)
        u = urllib2.urlopen(url_construct, timeout = 59) #timeout 1 min
        
        # make every filename uniq: "Malware_original_filename" + "_" + 3 random characters
        f_name = str(args.dldfolder) + str(file_name) +"_" + str(get_random_word(3)) 

        # write to file
        f = open(f_name, 'wb')
        block_sz = 8192
        while True:
            buffer = u.read(block_sz)
            if not buffer:
                break
            f.write(buffer)
        f.close()
        
        # write info to _files.txt
        with open(args.dldfolder + args.info, "a") as handle:
            md5hash = md5Checksum(f_name)
            handle.write(str(md5Checksum(f_name)) +"\t" + str(file_name)+ "\t" + url_mal + "\n")
            handle.close
        
        print "\n" + "Am descarcat: " + file_name,
        
    except Exception as e:
        # adding error to _errors.txt
        with open(args.dldfolder + args.error, "a") as handle:
            handle.write(url_mal + "\t" + str(e) + "\n")
            handle.close()
        pass

# get malware address from queue and download files
print "Downloading:\n",
def worker():
    while True:
        if not q.empty():
            try:
                item = q.get()
                dld_mal(item)
                q.task_done()
            except Exception as e:
                print e

# threads number limit            
for i in range(args.nr_threads):
    t = threading.Thread(target=worker)
    t.daemon = True
    t.start()

q.join()
exit()

for problem report or leave feedback http://virii.tk/joxeankoret-com-bulk-malware-samples-download/ or directly on twitter: https://twitter.com/ViRiiTk
 #25299  by ViRii
 Fri Feb 20, 2015 11:34 am
http://minotauranalysis.com/ samples downloader
Minotaur site "WARNING: This site is in maintenance mode. The site may go down, be unstable, or data may be lost. Use with caution and at your own risk."

Image
Code: Select all
#!/usr/bin/env python

##### Script License:
# - no warranty express or implied
# - free to use if you don't use-it to gain money
#
##### Warning:
# - downloaded files may harm your computer.
#
##### Usage examples:
# python script.py -h

import re
import urllib2
import hashlib
import os
import random
import argparse

print r"""
Minotauranalysis.com Malware sample downloader                  _
                                                              _( (~\
       _ _                        /                          ( \> > \
   -/~/ / ~\                     :;                \       _  > /(~\/
  || | | /\ ;\                   |l      _____     |;     ( \/ /   /
  _\\)\)\)/ ;;;                  `8o __-~     ~\   d|      \   \  //
 ///(())(__/~;;\                  "88p;.  -. _\_;.oP        (_._/ /
(((__   __ \\   \                  `>,% (\  (\./)8"         ;:'  i
)))--`.'-- (( ;,8 \               ,;%%%:  ./V^^^V'          ;.   ;.
((\   |   /)) .,88  `: ..,,;;;;,-::::::'_::\   ||\         ;[8:   ;
 )|  ~-~  |(|(888; ..``'::::8888oooooo.  :\`^^^/,,~--._    |88::| |
  \ -===- /|  \8;; ``:.      oo.8888888888:`((( o.ooo8888Oo;:;:'  |
 |_~-___-~_|   `-\.   `        `o`88888888b` )) 888b88888P""'     ;
  ;~~~~;~~         "`--_`.       b`888888888;(.,"888b888"  ..::;-'
   ;      ;              ~"-....  b`8888888:::::.`8888. .:;;;''
      ;    ;                 `:::. `:::OOO:::::::.`OO' ;;;''
 :       ;                     `.      "``::::::''    .'
    ;                           `.   \_              /
  ;       ;                       +:   ~~--  `:'  -';
                                   `:         : .::/
      ;                            ;;+_  :::. :..;;;
http://virii.tk                            http://twitter.com/ViRiiTk
"""
#Credits for ascii art: http://www.retrojunkie.com/asciiart/myth/minotaur.htm
      
parser = argparse.ArgumentParser(description="Minotauranalysis.com Malware sample downloader ")

parser.add_argument("-a", "--agent", default="Mozilla/5.0 (Windows NT 6.1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/41.0.2228.0 Safari/537.36",
                    help= "User Agent used to download samples")

parser.add_argument("-d", "--dldfolder", default = "C:\malware\\",
                    help= "Local folder to download samples (Default: C:\malware\\ )")

parser.add_argument("-i", "--info", default = "_files.txt",
                    help = "file to store info about downloaded samples (Default: _files.txt)")

parser.add_argument("-e", "--error", default = "_errors.txt",
                    help = "file to store errors (Default: _errors.txt)")

parser.add_argument("-u", "--malurl", default = "_mal_url.txt",
                    help = "file to store malware urls (Default: _mal_url.txt)")

args = parser.parse_args()

useragent = {'User-Agent' : 'Minotaur samples downloader, more info on: http://ViRii.Tk'}
dldagent  = {'User-Agent' : args.agent}

# create download folder if not exist
if not os.path.isdir(args.dldfolder):
    os.mkdir(args.dldfolder)

# generate random string
def get_random_word(a):
    word = ''
    for i in range(a):
        word += random.choice('ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789')
    return word

# md5 file
def md5Checksum(filePath):
    fh = open(filePath, 'rb')
    m = hashlib.md5()
    while True:
        data = fh.read(8192)
        if not data:
            break
        m.update(data)
    return m.hexdigest()

# download malware samples
def dld_mal(url_mal):
    
    # write address of this sample on _mal_url.txt
    with open(args.dldfolder + args.malurl, "a") as handle:
        handle.write(url_mal + "\n")
        handle.close()
    url_mal = re.sub(" ", "%20", url_mal)
    
    #get file name    
    file_name = url_mal.split("/")[-1]
    
    # remove bad characters from file name
    if len(file_name)==0 or re.search("\?", file_name) or re.search("\&", file_name) or len(file_name) >32 :
        file_name =  "No_name" + str(get_random_word(8))
        
    # try to download sample   
    try:

        # check if url start with "http://
        if url_mal[:7] != "http://":
            url_mal = "http://" + url_mal

        #construct url and set timeout
        url_construct = urllib2.Request(url_mal, None, dldagent)
        u = urllib2.urlopen(url_construct, timeout = 59) #timeout 1 min
        
        # make every filename uniq: "Malware_original_filename" + "_" + 3 random characters
        f_name = str(args.dldfolder) + str(file_name) +"_" + str(get_random_word(3)) 

        # write to file
        f = open(f_name, 'wb')
        block_sz = 8192
        while True:
            buffer = u.read(block_sz)
            if not buffer:
                break
            f.write(buffer)
        f.close()
        
        # write info to _files.txt
        with open(args.dldfolder + args.info, "a") as handle:
            md5hash = md5Checksum(f_name)
            handle.write(str(md5Checksum(f_name)) +"\t" + str(file_name)+ "\t" + url_mal + "\n")
            handle.close
        
        print "\n" + "Am descarcat: " + file_name,
        
    except Exception as e:
        # adding error to _errors.txt
        with open(args.dldfolder + args.error, "a") as handle:
            handle.write(url_mal + "\t" + str(e) + "\n")
            handle.close()
        pass

# get samples address
adresa = "http://minotauranalysis.com/"      

# set useragent
req = urllib2.Request(adresa, None, useragent)

# access  
continut = urllib2.urlopen(req, timeout =  60).read()

#get list of malware urls
mal_list = re.findall("\<td style='word-break:break-all;\'\>(.+?)\<\/td\>\<td\>", continut)
print "Samples found: %i" %(len(mal_list))
print "Download to: %s" %(args.dldfolder)
for i in mal_list:
    try:
        dld_mal(i)
    except:
        pass
Feedback here: http://virii.tk/minotauranalysis-com-bu ... -download/ or here: https://twitter.com/ViRiiTk or just post here :mrgreen: