1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80
| import csv import random import datetime import timeit import argparse
class Argparser: def __init__(self): self.parser = argparse.ArgumentParser() self.init_parser()
def init_parser(self): parser = self.parser parser.add_argument("-i", "--integers", help="how many integer value do you want to generate") parser.add_argument("-ri", "--range-integer", help="describe range of integer") parser.add_argument("-vc", "--varchars", help="how many varchar value do you want to generate") parser.add_argument("-t", "--texts", help="how many text value do you want to generate") parser.add_argument("-dt", "--dates", help="how many datetime value do you want to generate") parser.add_argument("-d", "--doubles", help="how many double value do you want to generate") parser.add_argument("-r", "--rows", help="describe number of rows") parser.add_argument("-ta", "--textarrays", help="how many text arrays...") parser.add_argument("-o", "--output", help="describe file name") self.parser = parser
class Generator: def __init__(self, args): self.rows = int(args.rows) if args.rows else 1000 self.filename = args.output if args.output else "out.csv" self.integers = int(args.integers) if args.integers else 5 self.varchars = int(args.varchars) if args.varchars else 5 self.texts = int(args.texts) if args.texts else 5 self.doubles = int(args.doubles) if args.doubles else 5 self.dates = int(args.dates) if args.dates else 1 self.textarrays = int(args.textarrays) if args.textarrays else 0 self.text_pool = ['Amazon Elastic Compute Cloud (Amazon EC2)', 'First you need to get set up to use Amazon EC2.', 'You can provision Amazon EC2 resources such as instances and volumes.', 'If you prefer to build applications using language-sp.'] self.word_pool = self.text_pool[0].split()
def generate(self): f = open(self.filename, 'w', encoding='utf-8', newline='') wr = csv.writer(f) for i in range(0, self.rows): idx = [i + 1] val_ints = [] val_varchars = [] val_texts = [] val_doubles = [] val_dates = [] val_textarrays = []
for j in range(0, self.integers): val_ints.append(random.randrange(0, 10000)) for j in range(0, self.varchars): val_varchars.append(random.choice(self.word_pool)) for j in range(0, self.texts): val_texts.append(random.choice(self.text_pool)) for j in range(0, self.textarrays): val_textarrays.append('{\"abc\",\"def\",\"ghi\"}') for j in range(0, self.dates): val_dates.append(datetime.datetime.now().strftime('%Y-%m-%d %H:%M:%S')) for j in range(0, self.doubles): val_doubles.append(random.random()) wr.writerow(idx + val_ints + val_varchars + val_texts + val_textarrays + val_dates + val_doubles) f.close()
if __name__ == "__main__": parser = Argparser() args = parser.parser.parse_args() print("Generating file...") start = timeit.default_timer() generator = Generator(args) generator.generate() stop = timeit.default_timer() print(generator.filename + " is created. ") print("It took " + str(stop - start) + " seconds to generate data.")
|