timeit

Програмиране с Python

Курс във Факултета по Математика и Информатика към СУ

Log observer

Краен срок
27.04.2016 23:59

Срокът за предаване на решения е отминал

Log observer

Всеки себеуважаващ се сайт "log-ва" разни неща. Например в колко часа, коя страница от сайта е била "поискана", от кое IP и колко време е отнело на сървъра да обработи заявката. Така по-лесно се откриват спамери, проблеми, забавяния по-сайта, областти, в които сайта може да се подобри и т.н. т.н.

Сайтовете на курсовете http://fmi.[ruby|py-bg|golang|clojure].[bg|net] не са изключение.

Ето ви една "малка" извадка от тези логове.

Напишете функциите:

def requests_per_day(log):
    #    return {
    #        'YYYY-MM-DD': N,
    #        'YYYY-MM-DD': N,
    #        .....
    #    }
    .....

Например:

{
    "2016-02-28": 2,
    "2016-04-21": 2,
    "2016-04-22": 1
}

Която връща речник оказващ на коя дата колко request-а са били направени.

Както и функцията:

def ips_set(log):
    # return {'XXX.YYY.ZZZ.TTT', .... }
    ....

Например:

{51.254.129.87, 212.25.142.226, 66.249.66.41}

която връща множеството от IP-тата достъпвали сайта.

Горните резултати, ще бъдат върнати от функциите при подаден стринг като този.

Решения

Теодор Тошков
  • Коректно
  • 2 успешни тест(а)
  • 0 неуспешни тест(а)
Теодор Тошков
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
import re

date_pattern = r'\d{4}-(?:0[1-9]|1[0-2])-(?:0[1-9]|[1-2]\d|3[0-1])'
ip_pattern = r'\b(?:(?:25[0-5]|2[0-4]\d|[01]?\d\d?)\.){3}' \
             r'(?:25[0-5]|2[0-4]\d|[01]?\d\d?)\b'


def ips_set(log):
    return set(re.findall(ip_pattern, log))


def requests_per_day(log):
    all_dates = re.findall(date_pattern, log)
    result = {}
    for date in all_dates:
        if date in result:
            result[date] += 1
        else:
            result[date] = 1
    return result
..
----------------------------------------------------------------------
Ran 2 tests in 0.049s

OK
Илия Жечев
  • Некоректно
  • 0 успешни тест(а)
  • 2 неуспешни тест(а)
Илия Жечев
1
2
3
4
5
6
7
def requests_per_day(log):
    dates = re.findall(' at (\d{4}-\d{2}-\d{2}) ', log)
    return {date: dates.count(date) for date in dates}


def ips_set(log):
    return {ip[0] for ip in re.findall(' for ((\d{1,4}.){3}\d{1,4})', log)}
EE
======================================================================
ERROR: test_ips_set (test.TestLogObserver)
----------------------------------------------------------------------
Traceback (most recent call last):
  File "/data/rails/pyfmi-2016/releases/20160307095126/lib/language/python/runner.py", line 67, in thread
    raise result
NameError: name 're' is not defined

======================================================================
ERROR: test_requests_per_day (test.TestLogObserver)
----------------------------------------------------------------------
Traceback (most recent call last):
  File "/data/rails/pyfmi-2016/releases/20160307095126/lib/language/python/runner.py", line 67, in thread
    raise result
NameError: name 're' is not defined

----------------------------------------------------------------------
Ran 2 tests in 0.044s

FAILED (errors=2)
Тодор Димов
  • Коректно
  • 2 успешни тест(а)
  • 0 неуспешни тест(а)
Тодор Димов
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
import re
from collections import defaultdict


def requests_per_day(log):
    regex_dates = r'\d{4,4}(-\d{2,2}){2,2}'
    matches = re.finditer(regex_dates, log)
    dates = defaultdict(int)
    for match in matches:
        start, end = match.span()
        dates[log[start:end]] += 1
    return dates


def ips_set(log):
    regex_IPs = r'\d{1,3}(\.\d{1,3}){3,3}'
    matches = re.finditer(regex_IPs, log)
    IPs = set()
    for match in matches:
        start, end = match.span()
        IPs.add(log[start:end])
    return IPs
..
----------------------------------------------------------------------
Ran 2 tests in 0.048s

OK
Георги Иванов
  • Коректно
  • 2 успешни тест(а)
  • 0 неуспешни тест(а)
Георги Иванов
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
import re


def requests_per_day(log):
    matches = re.findall('[0-9]{4}-[0-9]{2}-[0-9]{2}', log)
    log_dict = {}
    for match in matches:
        if match in log_dict.keys():
            log_dict[match] += 1
        else:
            log_dict[match] = 1

    return log_dict


def ips_set(log):
    matches = re.findall('[0-9]{1,3}\.[0-9]{1,3}\.[0-9]{1,3}\.[0-9]{1,3}', log)
    return set(matches)
..
----------------------------------------------------------------------
Ran 2 tests in 0.210s

OK
Данислав Киров
  • Коректно
  • 2 успешни тест(а)
  • 0 неуспешни тест(а)
Данислав Киров
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
import re


def requests_per_day(log):
    dates = re.findall('\d+-\d+-\d+', log)
    result = {}
    for date in dates:
        if date in result:
            result[date] += 1
        else:
            result[date] = 1
    return result


def ips_set(log):
    return set(re.findall('\d+\.\d+\.\d+\.\d+', log))
..
----------------------------------------------------------------------
Ran 2 tests in 0.047s

OK
Николай Желязков
  • Коректно
  • 2 успешни тест(а)
  • 0 неуспешни тест(а)
Николай Желязков
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
import re
from collections import defaultdict


def requests_per_day(log):
    result = defaultdict(int)
    for line in log.splitlines():
        match = re.search('\d{4}-\d\d-\d\d', line)
        if match:
            result[match.group()] += 1
    return dict(result)


def ips_set(log):
    result = set()
    for line in log.splitlines():
        match = re.search('\d{1,3}\.\d{1,3}\.\d{1,3}.\d{1,3}', line)
        if match:
            result.add(match.group())
    return result
..
----------------------------------------------------------------------
Ran 2 tests in 0.049s

OK
Веселин Иванов
  • Коректно
  • 2 успешни тест(а)
  • 0 неуспешни тест(а)
Веселин Иванов
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
import re


def requests_per_day(log):
    it = re.finditer(r'(GET|POST|HEAD).*'
                     r'(\d{4,4}-(0[1-9]|1[0-2])-(0[1-9]|[12][0-9]|30|31))',
                     log)
    result = {}
    for match in it:
        date = match.group(2)
        if date in result:
            result[date] += 1
        else:
            result[date] = 1

    return result


def ips_set(log):
    it = re.finditer(r'\d{1,3}\.\d{1,3}\.\d{1,3}\.\d{1,3}', log)
    return {match.group() for match in it}
..
----------------------------------------------------------------------
Ran 2 tests in 0.047s

OK
Светомир Стоименов
  • Некоректно
  • 1 успешни тест(а)
  • 1 неуспешни тест(а)
Светомир Стоименов
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
import re
from collections import Counter


def ip_set(log):
    ip_pattern = r'\b(\d{1,3}\.){3}\d{1,3}\b'
    matches = re.finditer(ip_pattern, log)
    ip_set = set()
    for match in matches:
        start, end = match.span()
        ip_set.add(log[start:end])
    return ip_set


def requests_per_day(log):
    date_pattern = r'\b\d{1,4}-\d{1,2}-\d{1,2}\b'
    matches = re.finditer(date_pattern, log)
    requests_per_date = Counter()
    for match in matches:
        start, end = match.span()
        requests_per_date[log[start:end]] += 1
    return requests_per_date
E.
======================================================================
ERROR: test_ips_set (test.TestLogObserver)
----------------------------------------------------------------------
Traceback (most recent call last):
  File "/data/rails/pyfmi-2016/releases/20160307095126/lib/language/python/runner.py", line 67, in thread
    raise result
AttributeError: module 'solution' has no attribute 'ips_set'

----------------------------------------------------------------------
Ran 2 tests in 0.046s

FAILED (errors=1)
Николай Лазаров
  • Коректно
  • 2 успешни тест(а)
  • 0 неуспешни тест(а)
Николай Лазаров
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
import re
from collections import defaultdict


def requests_per_day(log):
    requests = defaultdict(int)
    pattern = r'^Started .*? ([0-9]{4}-[0-9]{2}-[0-9]{2})'
    for date in re.findall(pattern, log, re.MULTILINE):
        requests[date] += 1
    return requests


def is_ip(ip):
    for part in ip.split('.'):
        if 0 > int(part) or int(part) > 255:
            return False

    return True


def ips_set(log):
    pattern = r'^Started .*? ([0-9]{1,3}\.[0-9]{1,3}\.[0-9]{1,3}\.[0-9]{1,3})'
    return {ip for ip in re.findall(pattern, log, re.MULTILINE) if is_ip(ip)}
..
----------------------------------------------------------------------
Ran 2 tests in 0.047s

OK
Димитър Керезов
  • Коректно
  • 2 успешни тест(а)
  • 0 неуспешни тест(а)
Димитър Керезов
1
2
3
4
5
6
7
8
9
10
import re
from collections import Counter


def requests_per_day(log):
    return dict(Counter(re.findall('(\d{4}(?:-\d{2}){2})', log)))


def ips_set(log):
    return set(re.findall('for ((?:[0-9]{1,3}\.){3}[0-9]{1,3})', log))
..
----------------------------------------------------------------------
Ran 2 tests in 0.047s

OK
Илиан Стаменов
  • Коректно
  • 2 успешни тест(а)
  • 0 неуспешни тест(а)
Илиан Стаменов
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
import re


def requests_per_day(log):
    request = re.compile('Started GET ".*?" for .*? at (.*?) .* .*')
    res = dict()
    for line in log.split("\n"):
        result = request.match(line)
        if result:
            res[result.group(1)] = res.get(result.group(1), 0) + 1
    return res


def ips_set(log):
    request = re.compile('Started GET ".*?" for (.*?) at .*? .* .*')
    ips = set()
    for line in log.split("\n"):
        result = request.match(line)
        if result:
            ips.add(result.group(1))
    return ips
..
----------------------------------------------------------------------
Ran 2 tests in 0.046s

OK
Христо Ралев
  • Коректно
  • 2 успешни тест(а)
  • 0 неуспешни тест(а)
Христо Ралев
1
2
3
4
5
6
7
8
9
10
11
12
import re

def requests_per_day(log):
        result = {}
        dates = re.findall(r'\d+-\d+-\d+', log)
        for date in dates:
                result[date]=len(re.findall(r''+date, log))
        return result
        

def ips_set(log):
        return set(re.findall(r'\d+\.\d+\.\d+\.\d+', log))
..
----------------------------------------------------------------------
Ran 2 tests in 0.049s

OK
Николай Бабулков
  • Коректно
  • 2 успешни тест(а)
  • 0 неуспешни тест(а)
Николай Бабулков
1
2
3
4
5
6
7
8
9
10
11
12
from collections import Counter
import re


def requests_per_day(log):
    date = r'\d{4}-[01]\d-[0123]\d'
    return dict(Counter(re.findall(date, log)))


def ips_set(log):
    ip = r'\d{1,3}(?:\.\d{1,3}){3}'
    return {ip for ip in re.findall(ip, log)}
..
----------------------------------------------------------------------
Ran 2 tests in 0.048s

OK
Александрина Ламбова
  • Коректно
  • 2 успешни тест(а)
  • 0 неуспешни тест(а)
Александрина Ламбова
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
import re

def ips_set(log):
    return set(re.findall(
        '[0-9]{1,3}\.[0-9]{1,3}\.[0-9]{1,3}\.[0-9]{1,3}', log))


def requests_per_day(log):
    str = re.findall(
        '[0-9]{4}-[0-9]{2}-[0-9]{2}', log)
    d = dict.fromkeys(str, 0)
    for date in str:
        d[date] = d[date] + 1
    return d

..
----------------------------------------------------------------------
Ran 2 tests in 0.047s

OK
Десислава Цветкова
  • Коректно
  • 2 успешни тест(а)
  • 0 неуспешни тест(а)
Десислава Цветкова
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
import re

DATE_REGEX = r"Started (GET|POST|HEAD|POST|PUT|DELETE|TRACE|CONNECT) "\
    ".* at ([0-9]{4}\-[0-9]{2}\-[0-9]{2})"
IP_REGEX = r"for ([0-9]{,3}\.[0-9]{,3}\.[0-9]{,3}\.[0-9]{,3}) at"


def requests_per_day(log):
    days = re.findall(DATE_REGEX, log)
    days = [d[1] for d in days]
    return {day: days.count(day) for day in days}


def ips_set(log):
    return set(re.findall(IP_REGEX, log))
..
----------------------------------------------------------------------
Ran 2 tests in 0.051s

OK
Марина Георгиева
  • Коректно
  • 2 успешни тест(а)
  • 0 неуспешни тест(а)
Марина Георгиева
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
import re
from collections import Counter


def requests_per_day(log):
    pattern = r'\b(\d{4})-(0[1-9]|1[0-2])-(0[1-9]|[1-2][0-9]|3[0-1])\b'
    matches = re.finditer(pattern, log)
    dates = [match.group() for match in matches]
    return Counter(dates)


def ips_set(log):
    byte_pattern = r'([0-9]|[1-9][0-9]|1[0-9]{2}|2[0-4][0-9]|25[0-5])'
    pattern = r'\b(' + byte_pattern + r'\.){3}' + byte_pattern + r'\b'
    matches = re.finditer(pattern, log)
    return {match.group() for match in matches}
..
----------------------------------------------------------------------
Ran 2 tests in 0.051s

OK
Виктор Маринов
  • Некоректно
  • 1 успешни тест(а)
  • 1 неуспешни тест(а)
Виктор Маринов
1
2
3
4
5
6
7
8
9
10
11
12
import re


def request_per_day(log):
    pattern = r'\d{4}-\d{2}-\d{2}'
    return {date: len([d for d in re.findall(pattern, log) if d == date])
            for date in re.findall(pattern, log)}


def ips_set(log):
    pattern = r'\d{1,3}\.\d{1,3}\.\d{1,3}\.\d{1,3}'
    return {ip for ip in re.findall(pattern, log)}
.E
======================================================================
ERROR: test_requests_per_day (test.TestLogObserver)
----------------------------------------------------------------------
Traceback (most recent call last):
  File "/data/rails/pyfmi-2016/releases/20160307095126/lib/language/python/runner.py", line 67, in thread
    raise result
AttributeError: module 'solution' has no attribute 'requests_per_day'

----------------------------------------------------------------------
Ran 2 tests in 0.046s

FAILED (errors=1)
Кристофър Митов
  • Коректно
  • 2 успешни тест(а)
  • 0 неуспешни тест(а)
Кристофър Митов
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
from collections import defaultdict
import re


def ips_set(log):
    ip_regex = r'(?:\d{1,3}\.){3}\d{1,3}'
    return {ip for ip in re.findall(ip_regex, log)}


def requests_per_day(log):
    result = defaultdict(int)

    date_regex = date_regex = r'\d{4}\-\d{1,2}-\d{1,2}'
    for date in re.findall(date_regex, log):
        result[date] += 1

    return dict(result)
..
----------------------------------------------------------------------
Ran 2 tests in 0.048s

OK
Хризантема Станчева
  • Коректно
  • 2 успешни тест(а)
  • 0 неуспешни тест(а)
Хризантема Станчева
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
import re
from collections import defaultdict


def requests_per_day(log):
    pattern = r'\d{4}-\d{2}-\d{2}'
    log_dict = defaultdict(int)
    log_list = log.splitlines()
    for log_line in log_list:
        date = re.search(pattern, log_line)
        if date is not None:
            result = date.group()
            log_dict[result] += 1
    return log_dict


def ips_set(log):
    pattern = r'\d{1,3}\.\d{1,3}\.\d{1,3}\.\d{1,3}'
    result = set()
    log_list = re.findall(pattern, log)
    for log_line in log_list:
        result.add(log_line)
    return result
..
----------------------------------------------------------------------
Ran 2 tests in 0.049s

OK
Стилиян Стоянов
  • Некоректно
  • 0 успешни тест(а)
  • 2 неуспешни тест(а)
Стилиян Стоянов
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
import re

DATES = r'\b\d{4}-\d{2}-\d{2}\b'
IPS = r'\b\d{1,3}\.\d{1,3}\.\d{1,3}\.\d{1,3}\b'


def requests_per_day(log):
    all_dates = search_reg_expr(log, DATES)
    return dict([(date, all_dates.count(date)) for date in set(all_dates)])


def ips_set(log):
    all_ips = search_reg_expr(log, IPS)
    return set(all_ips)


def search_reg_expr(log, reg_expr):
    log_lines = log.readlines()
    matches = []
    for line in log_lines:
        matches.extend(re.findall(reg_expr, line))
    return matches
EE
======================================================================
ERROR: test_ips_set (test.TestLogObserver)
----------------------------------------------------------------------
Traceback (most recent call last):
  File "/data/rails/pyfmi-2016/releases/20160307095126/lib/language/python/runner.py", line 67, in thread
    raise result
AttributeError: 'str' object has no attribute 'readlines'

======================================================================
ERROR: test_requests_per_day (test.TestLogObserver)
----------------------------------------------------------------------
Traceback (most recent call last):
  File "/data/rails/pyfmi-2016/releases/20160307095126/lib/language/python/runner.py", line 67, in thread
    raise result
AttributeError: 'str' object has no attribute 'readlines'

----------------------------------------------------------------------
Ran 2 tests in 0.044s

FAILED (errors=2)
Николай Мантаров
  • Коректно
  • 2 успешни тест(а)
  • 0 неуспешни тест(а)
Николай Мантаров
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
import re
from collections import Counter


def ips_set(log):
    return set(re.findall(r'\b\d{1,3}\.\d{1,3}\.\d{1,3}\.\d{1,3}\b', log))


def requests_per_day(log):
    pattern = r'\b\d{4}-\d{1,2}-\d{1,2}\b'
    dates = re.findall(pattern, log)
    cnt = Counter()
    for date in dates:
        cnt[date] += 1
    return dict(cnt)
..
----------------------------------------------------------------------
Ran 2 tests in 0.048s

OK
Ивелина Христова
  • Коректно
  • 2 успешни тест(а)
  • 0 неуспешни тест(а)
Ивелина Христова
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
import re

def requests_per_day(log):
    ips_by_date = {}
    ip_logs = re.findall('[0-9]+\.[0-9]+\.[0-9]+\.[0-9]+ at [0-9]{4}-[0-9]{2}-[0-9]{2}', log, flags=re.IGNORECASE)
    for ip_log in ip_logs:
        date = ip_log.split(' at ')[1]
        
        if date in ips_by_date.keys():
            ips_by_date[date] = ips_by_date[date] + 1
        else:
            ips_by_date.update({date: 1})

    return ips_by_date

def ips_set(log):
    ips = re.findall('[0-9]+\.[0-9]+\.[0-9]+\.[0-9]+', log, flags=re.IGNORECASE)
    ip_set = set(ips)
    return ip_set
..
----------------------------------------------------------------------
Ran 2 tests in 0.050s

OK
Емил Илиев
  • Коректно
  • 2 успешни тест(а)
  • 0 неуспешни тест(а)
Емил Илиев
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
import re


def requests_per_day(log):
    requests = re.findall(r'\d{4}-\d{2}-\d{2}', log)
    dates = {}
    for item in requests:
        if item in dates:
            dates[item] = dates[item] + 1
        else:
            dates[item] = 1
    return dates


def ips_set(log):
    return set(re.findall(r"\b\d{1,3}\.\d{1,3}\.\d{1,3}\.\d{1,3}\b", log))
..
----------------------------------------------------------------------
Ran 2 tests in 0.047s

OK
Георги Данков
  • Коректно
  • 2 успешни тест(а)
  • 0 неуспешни тест(а)
Георги Данков
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
import re
from collections import defaultdict


def requests_per_day(log):
    all_dates = re.findall(r'\d{4}-\d{1,2}-\d{1,2}', log)

    dates = defaultdict(int)
    for date in all_dates:
        dates[date] += 1

    return dates


def ips_set(log):
    return set(re.findall(r'\d{1,3}\.\d{1,3}\.\d{1,3}\.\d{1,3}', log))
..
----------------------------------------------------------------------
Ran 2 tests in 0.047s

OK