«Теперь он и тебя сосчитал» или Наука о данных с нуля (Data Science from Scratch)

, Data Science, Cognitive Class. , , , - , .



, IT , Data Science , .







1. « — » —



, , Data Science , .



, , . , , :



  1. . ;
  2. Python ;
  3. Data Science, .
  4. , .


, , ?

Cognitive Class, Kaggle , , .



. , ? .



2. « — » —



«Data Science. » — , , , . 2015 . 2 IT , , Python. ( ) . , Python 2, , Python 3 ( GitHub), (, ).



, Anaconda / .



. , : « . , .» — . 100% , , 2 , , . , , , , .



, -, .

, « » - , , . «Linux from scratch», , - Linux « », , , ( ).



. , , , ( )



, : « , :»



3. « — Python» — .



, «» . , , , Data Science (, Coursera). , 2-3 , « » , - , - .



, — “DataSciencester”. , , . «» .



, , , , , «» , .



Python, , , , 2, , , ( ).



Python, 3 :



  1. . ;
  2. , , ;
  3. ( );


Ozon ( ), .



, GitHub, .



() , , .

Python 2 3, Jupyter notebook. , , Anaconda ( ). , Jupyter ( ). , , . Jupyter «» , ( Windows Linux)



, , , « » - (, API ).



.



, , ( GitHub). , .



# -*- coding: utf-8 -*-
# linear_algebra.py

import re, math, random # regexes, math functions, random numbers
import matplotlib.pyplot as plt # pyplot
from collections import defaultdict, Counter
from functools import partial, reduce

#
# functions for working with vectors
#

def vector_add(v, w):
    """adds two vectors componentwise"""
    return [v_i + w_i for v_i, w_i in zip(v,w)]

def vector_subtract(v, w):
    """subtracts two vectors componentwise"""
    return [v_i - w_i for v_i, w_i in zip(v,w)]

def vector_sum(vectors):
    return reduce(vector_add, vectors)

def scalar_multiply(c, v):
    return [c * v_i for v_i in v]

def vector_mean(vectors):
    """compute the vector whose i-th element is the mean of the
    i-th elements of the input vectors"""
    n = len(vectors)
    return scalar_multiply(1/n, vector_sum(vectors))

def dot(v, w):
    """v_1 * w_1 + ... + v_n * w_n"""
    return sum(v_i * w_i for v_i, w_i in zip(v, w))

def sum_of_squares(v):
    """v_1 * v_1 + ... + v_n * v_n"""
    return dot(v, v)

def magnitude(v):
    return math.sqrt(sum_of_squares(v))

def squared_distance(v, w):
    return sum_of_squares(vector_subtract(v, w))

def distance(v, w):
   return math.sqrt(squared_distance(v, w))

#
# functions for working with matrices
#

def shape(A):
    num_rows = len(A)
    num_cols = len(A[0]) if A else 0
    return num_rows, num_cols

def get_row(A, i):
    return A[i]

def get_column(A, j):
    return [A_i[j] for A_i in A]

def make_matrix(num_rows, num_cols, entry_fn):
    """returns a num_rows x num_cols matrix
    whose (i,j)-th entry is entry_fn(i, j)"""
    return [[entry_fn(i, j) for j in range(num_cols)]
            for i in range(num_rows)]

def is_diagonal(i, j):
    """1's on the 'diagonal', 0's everywhere else"""
    return 1 if i == j else 0

identity_matrix = make_matrix(5, 5, is_diagonal)

#          user 0  1  2  3  4  5  6  7  8  9
#
friendships = [[0, 1, 1, 0, 0, 0, 0, 0, 0, 0], # user 0
               [1, 0, 1, 1, 0, 0, 0, 0, 0, 0], # user 1
               [1, 1, 0, 1, 0, 0, 0, 0, 0, 0], # user 2
               [0, 1, 1, 0, 1, 0, 0, 0, 0, 0], # user 3
               [0, 0, 0, 1, 0, 1, 0, 0, 0, 0], # user 4
               [0, 0, 0, 0, 1, 0, 1, 1, 0, 0], # user 5
               [0, 0, 0, 0, 0, 1, 0, 0, 1, 0], # user 6
               [0, 0, 0, 0, 0, 1, 0, 0, 1, 0], # user 7
               [0, 0, 0, 0, 0, 0, 1, 1, 0, 1], # user 8
               [0, 0, 0, 0, 0, 0, 0, 0, 1, 0]] # user 9

      
      





, , , , .



(, ) - , ( ), , , , , , , , , «». .



4. « !» — .



, ?



Data Science Cognitive class (), .



, , , , , ( 2 ), , , . «» «» CC ( ).



, - ? , . - , , . , , , , ( ). , « », .



? , . , , , - « », , - , , , , .



Data Science , , , .



550 300 , , . , , - kaggle, .



All Articles