, IT , Data Science , .

1. « — » —
, , Data Science , .
, , . , , :
- . ;
- Python ;
- Data Science, .
- , .
, , ?
Cognitive Class, Kaggle , , .
. , ? .
2. « — » —
«Data Science. » — , , , . 2015 . 2 IT , , Python. ( ) . , Python 2, , Python 3 ( GitHub), (, ).
, Anaconda / .
. , : « . , .» — . 100% , , 2 , , . , , , , .
, -, .
, « » - , , . «Linux from scratch», , - Linux « », , , ( ).
. , , , ( )
, : « , :»
3. « — Python» — .
, «» . , , , Data Science (, Coursera). , 2-3 , « » , - , - .
, — “DataSciencester”. , , . «» .
, , , , , «» , .
Python, , , , 2, , , ( ).
Python, 3 :
- . ;
- , , ;
- ( );
Ozon ( ), .
, GitHub, .
() , , .
Python 2 3, Jupyter notebook. , , Anaconda ( ). , Jupyter ( ). , , . Jupyter «» , ( Windows Linux)
, , , « » - (, API ).
.
, , ( GitHub). , .
# -*- coding: utf-8 -*-
# linear_algebra.py
import re, math, random # regexes, math functions, random numbers
import matplotlib.pyplot as plt # pyplot
from collections import defaultdict, Counter
from functools import partial, reduce
#
# functions for working with vectors
#
def vector_add(v, w):
"""adds two vectors componentwise"""
return [v_i + w_i for v_i, w_i in zip(v,w)]
def vector_subtract(v, w):
"""subtracts two vectors componentwise"""
return [v_i - w_i for v_i, w_i in zip(v,w)]
def vector_sum(vectors):
return reduce(vector_add, vectors)
def scalar_multiply(c, v):
return [c * v_i for v_i in v]
def vector_mean(vectors):
"""compute the vector whose i-th element is the mean of the
i-th elements of the input vectors"""
n = len(vectors)
return scalar_multiply(1/n, vector_sum(vectors))
def dot(v, w):
"""v_1 * w_1 + ... + v_n * w_n"""
return sum(v_i * w_i for v_i, w_i in zip(v, w))
def sum_of_squares(v):
"""v_1 * v_1 + ... + v_n * v_n"""
return dot(v, v)
def magnitude(v):
return math.sqrt(sum_of_squares(v))
def squared_distance(v, w):
return sum_of_squares(vector_subtract(v, w))
def distance(v, w):
return math.sqrt(squared_distance(v, w))
#
# functions for working with matrices
#
def shape(A):
num_rows = len(A)
num_cols = len(A[0]) if A else 0
return num_rows, num_cols
def get_row(A, i):
return A[i]
def get_column(A, j):
return [A_i[j] for A_i in A]
def make_matrix(num_rows, num_cols, entry_fn):
"""returns a num_rows x num_cols matrix
whose (i,j)-th entry is entry_fn(i, j)"""
return [[entry_fn(i, j) for j in range(num_cols)]
for i in range(num_rows)]
def is_diagonal(i, j):
"""1's on the 'diagonal', 0's everywhere else"""
return 1 if i == j else 0
identity_matrix = make_matrix(5, 5, is_diagonal)
# user 0 1 2 3 4 5 6 7 8 9
#
friendships = [[0, 1, 1, 0, 0, 0, 0, 0, 0, 0], # user 0
[1, 0, 1, 1, 0, 0, 0, 0, 0, 0], # user 1
[1, 1, 0, 1, 0, 0, 0, 0, 0, 0], # user 2
[0, 1, 1, 0, 1, 0, 0, 0, 0, 0], # user 3
[0, 0, 0, 1, 0, 1, 0, 0, 0, 0], # user 4
[0, 0, 0, 0, 1, 0, 1, 1, 0, 0], # user 5
[0, 0, 0, 0, 0, 1, 0, 0, 1, 0], # user 6
[0, 0, 0, 0, 0, 1, 0, 0, 1, 0], # user 7
[0, 0, 0, 0, 0, 0, 1, 1, 0, 1], # user 8
[0, 0, 0, 0, 0, 0, 0, 0, 1, 0]] # user 9
, , , , .
(, ) - , ( ), ,
4. « !» — .
, ?
Data Science Cognitive class (), .
, , , , , ( 2 ), , , . «» «» CC ( ).
, - ? , . - , , . , , , , ( ). , « », .
? , . , , , - « », , - , , , , .
Data Science , , , .
550 300 , , . , , - kaggle, .