코세라) 신경망 및 딥러닝 - 얕은 신경망 - Planar data classification with one hidden layer (2)
생각보다 간단쓰 도함수 계산과정 좀 다시한번 봐야되겠네요. 너무 새롭네 Exercise 5 - compute_cost 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 # GRADED FUNCTION: compute_cost def compute_cost(A2, Y): """ Computes the cross-entropy cost given in equation (13) Arguments: A2 -- The sigmoid output of the second activation, of shape (1, number of examples) Y -- "true" labels vector of shape (1, number of examples) Returns: cost -- cross-entropy cost given equation (13) """ m = Y.shape[1] # number of examples # Compute the cross-entropy cost # (≈ 2 lines of code) # logprobs = ... # cost = ... # YOUR CODE STARTS HERE logprobs = np.multiply(np.log(A2), Y) + np.multiply(np.log(1-A2), 1-Y) cost = - 1 * np.sum(logprobs) / m # YOUR CODE ENDS HERE cost = float(np.squeeze(cost)) # makes sure cost is the dimension we expect. # E.g., turns [[17]] into 17 return cost Exercise 6 - backward_propagation 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 # GRADED FUNCTION: backward_propagation def backward_propagation(parameters, cache, X, Y): """ Implement the backward propagation using the instructions above. Arguments: parameters -- python dictionary containing our parameters cache -- a dictionary containing "Z1", "A1", "Z2" and "A2". X -- input data of shape (2, number of examples) Y -- "true" labels vector of shape (1, number of examples) Returns: grads -- python dictionary containing your gradients with respect to different parameters """ m = X.shape[1] # First, retrieve W1 and W2 from the dictionary "parameters". #(≈ 2 lines of code) # W1 = ... # W2 = ... # YOUR CODE STARTS HERE W1 = parameters["W1"] W2 = parameters["W2"] # YOUR CODE ENDS HERE # Retrieve also A1 and A2 from dictionary "cache". #(≈ 2 lines of code) # A1 = ... # A2 = ... # YOUR CODE STARTS HERE A1 = cache["A1"] A2 = cache["A2"] # YOUR CODE ENDS HERE # Backward propagation: calculate dW1, db1, dW2, db2. #(≈ 6 lines of code, corresponding to 6 equations on slide above) # dZ2 = ... # dW2 = ... # db2 = ... # dZ1 = ... # dW1 = ... # db1 = ... # YOUR CODE STARTS HERE dZ2 = A2 - Y dW2 = np.dot(dZ2, A1.T) / m db2 = np.sum(dZ2, axis=1, keepdims = True) / m dZ1 = np.dot(W2.T, dZ2) * (1 - np.power(A1, 2)) dW1 = np.dot(dZ1, X.T) / m db1 = np.sum(dZ1, axis=1, keepdims = True) / m # YOUR CODE ENDS HERE grads = {"dW1": dW1, "db1": db1, "dW2": dW2, "db2": db2} return grads Exercise 7 - update_parameters 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 # GRADED FUNCTION: update_parameters def update_parameters(parameters, grads, learning_rate = 1.2): """ Updates parameters using the gradient descent update rule given above Arguments: parameters -- python dictionary containing your parameters grads -- python dictionary containing your gradients Returns: parameters -- python dictionary containing your updated parameters """ # Retrieve a copy of each parameter from the dictionary "parameters". Use copy.deepcopy(...) for W1 and W2 #(≈ 4 lines of code) # W1 = ... # b1 = ... # W2 = ... # b2 = ... # YOUR CODE STARTS HERE parameters_cp = copy.deepcopy(parameters) W1 = parameters_cp["W1"] b1 = parameters_cp["b1"] W2 = parameters_cp["W2"] b2 = parameters_cp["b2"] # YOUR CODE ENDS HERE # Retrieve each gradient from the dictionary "grads" #(≈ 4 lines of code) # dW1 = ... # db1 = ... # dW2 = ... # db2 = ... # YOUR CODE STARTS HERE dW1 = grads["dW1"] db1 = grads["db1"] dW2 = grads["dW2"] db2 = grads["db2"] # YOUR CODE ENDS HERE # Update rule for each parameter #(≈ 4 lines of code) # W1 = ... # b1 = ... # W2 = ... # b2 = ... # YOUR CODE STARTS HERE W1 = W1 - learning_rate * dW1 b1 = b1 - learning_rate * db1 W2 = W2 - learning_rate * dW2 b2 = b2 - learning_rate * db2 # YOUR CODE ENDS HERE parameters = {"W1": W1, "b1": b1, "W2": W2, "b2": b2} return parameters Exercise 8 - nn_model 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 # GRADED FUNCTION: nn_model def nn_model(X, Y, n_h, num_iterations = 10000, print_cost=False): """ Arguments: X -- dataset of shape (2, number of examples) Y -- labels of shape (1, number of examples) n_h -- size of the hidden layer num_iterations -- Number of iterations in gradient descent loop print_cost -- if True, print the cost every 1000 iterations Returns: parameters -- parameters learnt by the model. They can then be used to predict. """ np.random.seed(3) n_x = layer_sizes(X, Y)[0] n_y = layer_sizes(X, Y)[2] # Initialize parameters #(≈ 1 line of code) # parameters = ... # YOUR CODE STARTS HERE parameters = initialize_parameters(n_x, n_h, n_y) # YOUR CODE ENDS HERE # Loop (gradient descent) for i in range(0, num_iterations): #(≈ 4 lines of code) # Forward propagation. Inputs: "X, parameters". Outputs: "A2, cache". # A2, cache = ... # Cost function. Inputs: "A2, Y". Outputs: "cost". # cost = ... # Backpropagation. Inputs: "parameters, cache, X, Y". Outputs: "grads". # grads = ... # Gradient descent parameter update. Inputs: "parameters, grads". Outputs: "parameters". # parameters = ... # YOUR CODE STARTS HERE A2, cache = forward_propagation(X, parameters) cost = compute_cost(A2, Y) grads = backward_propagation(parameters, cache, X, Y) parameters = update_parameters(parameters, grads) # YOUR CODE ENDS HERE # Print the cost every 1000 iterations if print_cost and i % 1000 == 0: print ("Cost after iteration %i: %f" %(i, cost)) return parameters Exercise 9 - predict 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 # GRADED FUNCTION: predict def predict(parameters, X): """ Using the learned parameters, predicts a class for each example in X Arguments: parameters -- python dictionary containing your parameters X -- input data of size (n_x, m) Returns predictions -- vector of predictions of our model (red: 0 / blue: 1) """ # Computes probabilities using forward propagation, and classifies to 0/1 using 0.5 as the threshold. #(≈ 2 lines of code) A2, cache = forward_propagation(X, parameters) predictions = np.round(A2) # YOUR CODE STARTS HERE # YOUR CODE ENDS HERE return predictions