#analyse en composantes principales (PCA) pour ajuster un plan sur un nuage de points 3D #exemple tiré de: #https://www.bing.com/search?qs=UT&pq=numpy+pca+&sk=CSYN1&sc=6-10&q=numpy+pca+example&cvid=f8480d538ba0461fa6590f521609d926&gs_lcrp=EgRlZGdlKgYIARAAGEAyBggAEEUYOTIGCAEQABhAMgYIAhAAGEAyBggDEAAYQDIGCAQQABhAMgYIBRAAGEDSAQgzMTkxajBqNKgCCLACAQ&FORM=ANAB01&DAF0=1&PC=U531 import numpy as np # Sample data: 5 samples with 3 features data = np.array([[2.5, 2.4, 15], [0.5, 0.7, 0.8], [2.2, 2.9, 1.9], [1.9, 2.2, 1.7], [3.1, 3.0, 2.5]]) # Step 1: Standardize the data (mean = 0, variance = 1) mean = np.mean(data, axis=0) std_data = data - mean # Step 2: Calculate the covariance matrix cov_matrix = np.cov(std_data, rowvar=False) # Step 3: Calculate the eigenvalues and eigenvectors eigenvalues, eigenvectors = np.linalg.eig(cov_matrix) # Step 4: Sort eigenvalues and eigenvectors sorted_index = np.argsort(eigenvalues)[::-1] sorted_eigenvalues = eigenvalues[sorted_index] sorted_eigenvectors = eigenvectors[:, sorted_index] # Step 5: Select the top k eigenvectors (here we choose k=2) k = 2 eigenvector_subset = sorted_eigenvectors[:, 0:k] # Step 6: Transform the data pca_data = np.dot(std_data, eigenvector_subset) print("Original Data:\n", data) print("Mean:\n", mean) print("Standardized Data:\n", std_data) print("Covariance Matrix:\n", cov_matrix) print("Eigenvalues:\n", eigenvalues) print("Eigenvectors:\n", eigenvectors) print("Sorted Eigenvalues:\n", sorted_eigenvalues) print("Sorted Eigenvectors:\n", sorted_eigenvectors) print("PCA Transformed Data:\n", pca_data) from mpl_toolkits import mplot3d import numpy as np import matplotlib.pyplot as plt fig = plt.figure() # syntax for 3-D projection ax = plt.axes(projection ='3d') # defining axes x = data[:,0] y = data[:,1] z = data[:,2] #https://matplotlib.org/stable/api/_as_gen/matplotlib.pyplot.scatter.html #affichage du nuage de points ax.scatter(x, y, z,marker=".") #affichage du centre de gravité du nuage de points ax.scatter(mean[0], mean[1], mean[2],marker="*") #affichage du vecteur normal au plan #echelle=10 #calcul d'une echelle sympa echelle=np.max(std_data) ax.plot([mean[0],mean[0]+echelle*sorted_eigenvectors[0,2]], [mean[1],mean[1]+echelle*sorted_eigenvectors[1,2]], [mean[2],mean[2]+echelle*sorted_eigenvectors[2,2]]) #print(sorted_eigenvectors[0,2]) ax.axis('equal') #paramètres du plan: a=sorted_eigenvectors[0,2] b=sorted_eigenvectors[1,2] c=sorted_eigenvectors[2,2] #le plan passe par mean -> calcul du paramètre d d=-(a*mean[0]+b*mean[1]+c*mean[2]) print("a:"+str(a)+" b:"+str(b)+" c:"+str(c)+" d:"+str(d)) # syntax for plotting ax.set_title('3d Scatter plot geeks for geeks') plt.show()