% Example of Empirical Orthogonal Function analysis % % John Wilkin % 12 April, 2004 % % Load a data set of monthly sea surface temperatures for the Pacific Ocean % from the "Reynolds SST" product from the Climate Analysis Center. % % These data were acquired from the Live Access Server at the National % Virtual Ocean Data System (NVODS): % http://ferret.pmel.noaa.gov/NVODS/servlets/dataset echo off % echo on to type commands to command window more off % option to control pausing of script to view results % pauseit = 0.5; % 0 to run without pausing. % 1 to pause until keystroke % any other number pauses for that number of seconds % Load the data subset saved as a Matlab structure in cac.mat load cac % Break out variables in the structure to individual variables in the % workspace lat = cac.lat; lon = cac.lon; sst = cac.sst; land = cac.land; year = cac.year; % Get the lengths of the 3 dimensions of SST [nlats,nlons,ntimes] = size(sst); % The Climate Analysis Center (CAC) data include both land and sea % temperatures. Since we don't expect temperature variability to % necessarily be strongly correlated to the adjacent sea temperature, we % need to exclude land values from the data set. This can be done by % setting all land values to zero, or explicitly excluding them from the % data set. % Create a land/sea mask of ones/zeros using the land index vector % (I created the land points vector interactively using 'ginput' and 'inside') mask = ones([nlats nlons]); mask(land) = 0; % mask=0 is land, mask=1 is water mask = repmat(mask,[1 1 ntimes]); sst = mask.*sst; % This next piece of code is only needed if you choose to explicitly remove % the land points from the data matrix by setting 'rmland=1' % % Save a vector of index values for the full lon/lat domain so that we can % later copy the EOF loadings back into a full matrix that includes the % land points for plotting water = 1:(nlats*nlons); water(land) = []; % Now, when we have a matrix A of size(A) = [nlats nlons], then A(water) % are only the water points, and A(land) are only the land points. % Simple coastline database for plotting coast = load('coastdat'); % Plot an example map figure(1) clf cax = [15 30]; set(gcf,'pos',[10 502 487 184]) for i=1:5:ntimes % flip through several images to see roughly what the data look like pcolorjw(lon,lat,sst(:,:,i)); caxis(cax) colorbar title(['SST at decimal year ' num2str(year(i))]) hold on; han=plot(coast.coastdata_x,coast.coastdata_y,'k-'); hold off drawnow end pause_it % Plot a longitude-time Hovmuller diagram figure(2) set(gcf,'pos',[655 172 358 510]) clf lati = 7; if lat(lati)<0 H = '^oS'; else H = '^oN'; end pcolorjw(lon,year,squeeze(sst(lati,:,:))') set(gca,'tickdir','out') titlestr = ['SST vs longitude and time at latitude ' ... num2str(abs(lat(lati))) H]; title(titlestr) pause_it % Reshape to form a 2-dimensional data matrix % row index corresponds to position % column index corresponds to time D = reshape(sst,[nlats*nlons ntimes]); disp(['Size of D is ' num2str(size(D))]) % Optionally remove the land points from the data matrix rmland = 0; if rmland error('Never quite got this going ... need to fix sstmean') disp('Removing land points...') D(land,:) = []; disp(['Size of D is now ' num2str(size(D))]) end % Possibly remove deterministic signals such as mean annual cycle if pauseit == 1 reply = input('Do you want to remove the mean annual cycle (y/n)? ','s'); else reply = 'y'; end if strcmp(reply,'y') rmann = 1; else rmann = 0; end if rmann disp('Removing mean annual cycle') % Remove annual harmonic Eann = [ones(size(year(:))) cos(2*pi*year(:)) sin(2*pi*year(:))]; % Could remove both annual harmonic and semiannual harmonics with the % following design matrix % Eann = [ones(size(year(:))) cos(2*pi*year(:)) sin(2*pi*year(:)) ... % cos(pi*year(:)) sin(pi*year(:))]; Dann = zeros(size(D)); for m=1:size(D,1) d = D(m,:); coeff = Eann\d(:); dfit = Eann*coeff; Dann(m,:) = dfit'; end sstmean = reshape(Dann,[nlats nlons ntimes]); D = D-Dann; sstanom = reshape(D,[nlats nlons ntimes]); % replot Hovmueller pcolorjw(lon,year,squeeze(sstanom(lati,:,:))') set(gca,'tickdir','out') title(titlestr) disp(' ');disp('Re-plotted Hovmuller with annual cycle removed'); figure(gcf) pause_it else % Must at least remove time mean of each time series (each row of D) % Some texts refer to this as "time centering" the data sstmean = repmat(mean(D,2),[1 ntimes]); D = D-sstmean; sstanom = reshape(D,[nlats nlons ntimes]); end % ---------------------------------------------------------------------- % Compute EOFs using Singular Value Decomposition % ---------------------------------------------------------------------- % Compute the SVD of the data matrix timer = cputime; [U,S,V] = svd(D,0); disp(['SVD took ' num2str(cputime-timer) ' cpu seconds']) pause_it % Left singular vectors are the columns of U and are the EOF loadings or % mode patterns % % Singular values are the diagonal elements of S and correspond to the % sqrt(lambda), so take S.^2 to get variance of each EOF L = S*S'; percent_variance = 100*diag(L)/trace(L); K = 5; percent_variance(1:K) figure(3) clf set(gcf,'pos',[582 108 292 225]) plot(0:5,[0; cumsum(percent_variance(1:5))],'d--') title('Cumulative percent variance explained by EOF modes') % Right singular vectors are the columns of V. % Construct the amplitude time series from the product S*V' then each % row of A is the amplitude times series corresponding to each column of U A = S*V'; % If land points were removed then rebuild a matrix that has all spatial % points so we can plot the EOF patterns if rmland UU = NaN*ones([nlats*nlons size(U,2)]); UU(water,:) = U; EOF = UU; else EOF = U; end % plot the EOF patterns (a.k.a. loadings or mode structures) figure(4) clf for k=1:3 figure(4) set(gcf,'pos',[11 69 518 613]) subplot(3,1,k) pcolorjw(lon,lat,reshape(EOF(:,k),[nlats nlons])) amerc colorbar title(['EOF ' int2str(k)]) end pause_it % plot the EOF amplitude time series figure(5) clf for k=1:3 figure(5) set(gcf,'pos',[ 486 71 535 614]) subplot(3,1,k) plot(year,A(k,:)) set(gca,'xlim',[min(year) max(year)]) grid on title(['EOF ' int2str(k)]) end pause_it % ---------------------------------------------------------------------- % Compute EOFs from eigenvalues of the covariance matrix % ---------------------------------------------------------------------- C = D*D'; timer = cputime; [E,Le] = eig(C); disp(['EIG took ' num2str(cputime-timer) ' cpu seconds']) pause_it % eig results come in order of ascending eigenvalue, so flipud 100*flipud(diag(Le(end-K:end,end-K:end)))/trace(Le) % Plot EOF 1 figure(6) clf set(gcf,'pos',[34 516 431 162]) pcolorjw(lon,lat,reshape(E(:,end),[nlats nlons])) title('EOF 1 computed from covariance matirx using eig function ') colorbar amerc pause_it % ---------------------------------------------------------------------- % Check sum of hierarchy of EOFs against original data % ---------------------------------------------------------------------- % i = 50; %% show = 'full'; show = 'anom'; figure(7) for K = 1:3 sst0 = U(:,1:K)*A(1:K,:); sst0 = reshape(sst0,[nlats nlons ntimes]); switch show case 'full' sst0 = sst0 + sstmean; sstdata = sst; cax = [15 30]; case 'anom' sstdata = sstanom; cax = 3*[-1 1]; end disp(['Range of data is ' mat2str(range(sstdata),4)]) disp(['Range of EOF approximation is ' mat2str(range(sst0),4)]) disp(' ') set(gcf,'pos',[16 70 494 612]) clf i = 217; subplot(311) pcolorjw(lon,lat,sstdata(:,:,i)) caxis(cax) colorbar title(['Data at decimal year ' num2str(year(i))]) hold on; han=plot(coast.coastdata_x,coast.coastdata_y,'k-'); hold off subplot(312) pcolorjw(lon,lat,sst0(:,:,i)) caxis(cax) colorbar plural = ' mode'; if K>1, plural=' modes'; end title(['SST for ' int2str(K) plural]) text(147,19,[int2str(K) plural],'fontsize',12,'fontweight','bold') hold on; han=plot(coast.coastdata_x,coast.coastdata_y,'k-'); hold off subplot(313) pcolorjw(lon,lat,sstdata(:,:,i)-sst0(:,:,i)) colorbar title(['SST(orig)-\Sigma_{i=1}^' int2str(K) 'EOFs']) hold on; han=plot(coast.coastdata_x,coast.coastdata_y,'k-'); hold off xlab1 = ... ['variance of residual = ' ... num2str(100*(var(sstdata(:)-sst0(:)))/var(sstanom(:)),3) ' %']; xlab2 = ... ['100 x \Sigma_{i=K+1}^M \lambda_i =' ... num2str(sum(percent_variance((K+1):end)),4) ' %']; xlabel([xlab1 ' ' xlab2]) pause_it end disp(' ');disp(' DONE ') if 0 figure(8) [cartoon,cmap] = imread('../cartoons/45240_m.gif'); imagesc(cartoon); colormap(cmap) label = '[U,S,V]=svd(D);'; for i=1:length(label) han = text(119,124,label(1:i)); set(han,'color','r','fontangle','italic','fontsize',26,... 'fontweight','bold','backgroundcolor','w') pause(0.05) end end