function [W, elapse] = constructW(fea,options) % Usage: % W = constructW(fea,options) % % fea: Rows of vectors of data points. Each row is x_i % options: Struct value in Matlab. The fields in options that can be set: % Metric - Choices are: % 'Euclidean' - Will use the Euclidean distance of two data % points to evaluate the "closeness" between % them. [Default One] % 'Cosine' - Will use the cosine value of two vectors % to evaluate the "closeness" between them. % A popular similarity measure used in % Information Retrieval. % % NeighborMode - Indicates how to construct the graph. Choices % are: [Default 'KNN'] % 'KNN' - k = 0 % Complete graph % k > 0 % Put an edge between two nodes if and % only if they are among the k nearst % neighbors of each other. You are % required to provide the parameter k in % the options. Default k=5. % 'Supervised' - k = 0 % Put an edge between two nodes if and % only if they belong to same class. % k > 0 % Put an edge between two nodes if % they belong to same class and they % are among the k nearst neighbors of % each other. % Default: k=0 % You are required to provide the label % information gnd in the options. % % WeightMode - Indicates how to assign weights for each edge % in the graph. Choices are: % 'Binary' - 0-1 weighting. Every edge receiveds weight % of 1. [Default One] % 'HeatKernel' - If nodes i and j are connected, put weight % W_ij = exp(-norm(x_i - x_j)/2t^2). This % weight mode can only be used under % 'Euclidean' metric and you are required to % provide the parameter t. % 'Cosine' - If nodes i and j are connected, put weight % cosine(x_i,x_j). Can only be used under % 'Cosine' metric. % % k - The parameter needed under 'KNN' NeighborMode. % Default will be 5. % gnd - The parameter needed under 'Supervised' % NeighborMode. Colunm vector of the label % information for each data point. % bLDA - 0 or 1. Only effective under 'Supervised' % NeighborMode. If 1, the graph will be constructed % to make LPP exactly same as LDA. Default will be % 0. % t - The parameter needed under 'HeatKernel' % WeightMode. Default will be 1 % bNormalized - 0 or 1. Only effective under 'Cosine' metric. % Indicates whether the fea are already be % normalized to 1. Default will be 0 % bSelfConnected - 0 or 1. Indicates whether W(i,i) == 1. Default 1 % if 'Supervised' NeighborMode & bLDA == 1, % bSelfConnected will always be 1. Default 1. % % % Examples: % % fea = rand(50,15); % options = []; % options.Metric = 'Euclidean'; % options.NeighborMode = 'KNN'; % options.k = 5; % options.WeightMode = 'HeatKernel'; % options.t = 1; % W = constructW(fea,options); % % % fea = rand(50,15); % gnd = [ones(10,1);ones(15,1)*2;ones(10,1)*3;ones(15,1)*4]; % options = []; % options.Metric = 'Euclidean'; % options.NeighborMode = 'Supervised'; % options.gnd = gnd; % options.WeightMode = 'HeatKernel'; % options.t = 1; % W = constructW(fea,options); % % % fea = rand(50,15); % gnd = [ones(10,1);ones(15,1)*2;ones(10,1)*3;ones(15,1)*4]; % options = []; % options.Metric = 'Euclidean'; % options.NeighborMode = 'Supervised'; % options.gnd = gnd; % options.bLDA = 1; % W = constructW(fea,options); % % % For more details about the different ways to construct the W, please % refer: % Deng Cai, Xiaofei He and Jiawei Han, "Document Clustering Using % Locality Preserving Indexing" IEEE TKDE, Dec. 2005. % % % Written by Deng Cai (dengcai2 AT cs.uiuc.edu), April/2004, Feb/2006, % May/2007 % if (~exist('options','var')) options = []; else if ~strcmpi(class(options),'struct') error('parameter error!'); end end %================================================= if ~isfield(options,'Metric') options.Metric = 'Cosine'; end switch lower(options.Metric) case {lower('Euclidean')} ; case {lower('Cosine')} if ~isfield(options,'bNormalized') options.bNormalized = 0; end otherwise error('Metric does not exist!'); end %================================================= if ~isfield(options,'NeighborMode') options.NeighborMode = 'KNN'; end switch lower(options.NeighborMode) case {lower('KNN')} %For simplicity, we include the data point itself in the kNN if ~isfield(options,'k') options.k = 5; end case {lower('Supervised')} if ~isfield(options,'bLDA') options.bLDA = 0; end if options.bLDA options.bSelfConnected = 1; end if ~isfield(options,'k') options.k = 0; end if ~isfield(options,'gnd') error('Label(gnd) should be provided under ''Supervised'' NeighborMode!'); end if length(options.gnd) ~= size(fea,1) error('gnd doesn''t match with fea!'); end otherwise error('NeighborMode does not exist!'); end %================================================= if ~isfield(options,'WeightMode') options.WeightMode = 'Binary'; end switch lower(options.WeightMode) case {lower('Binary')} ; case {lower('HeatKernel')} if ~strcmpi(options.Metric,'Euclidean') warning('''HeatKernel'' WeightMode should be used under ''Euclidean'' Metric!'); options.Metric = 'Euclidean'; end if ~isfield(options,'t') options.t = 1; end case {lower('Cosine')} if ~strcmpi(options.Metric,'Cosine') warning('''Cosine'' WeightMode should be used under ''Cosine'' Metric!'); options.Metric = 'Cosine'; end if ~isfield(options,'bNormalized') options.bNormalized = 0; end otherwise error('WeightMode does not exist!'); end %================================================= if ~isfield(options,'bSelfConnected') options.bSelfConnected = 1; end %================================================= tmp_T = cputime; [nSmp, nFea] = size(fea); if strcmpi(options.NeighborMode,'Supervised') Label = unique(options.gnd); nLabel = length(Label); if options.bLDA G = zeros(nSmp,nSmp); for idx=1:nLabel classIdx = find(options.gnd==Label(idx)); G(classIdx,classIdx) = 1/length(classIdx); end W = sparse(G); elapse = cputime - tmp_T; return; end switch lower(options.WeightMode) case {lower('Binary')} if options.k > 0 G = zeros(nSmp*(options.k+1),3); idNow = 0; for i=1:nLabel classIdx = find(options.gnd==Label(i)); D = EuDist2(fea(classIdx,:),[],0); [dump idx] = sort(D,2); % sort each row clear D dump; idx = idx(:,1:options.k+1); nSmpClass = length(classIdx)*(options.k+1); G(idNow+1:nSmpClass+idNow,1) = repmat(classIdx,[options.k+1,1]); G(idNow+1:nSmpClass+idNow,2) = classIdx(idx(:)); G(idNow+1:nSmpClass+idNow,3) = 1; idNow = idNow+nSmpClass; clear idx end G = sparse(G(:,1),G(:,2),G(:,3),nSmp,nSmp); G = max(G,G'); else G = zeros(nSmp,nSmp); for i=1:nLabel classIdx = find(options.gnd==Label(i)); G(classIdx,classIdx) = 1; end end if ~options.bSelfConnected for i=1:size(G,1) G(i,i) = 0; end end W = sparse(G); case {lower('HeatKernel')} if options.k > 0 G = zeros(nSmp*(options.k+1),3); idNow = 0; for i=1:nLabel classIdx = find(options.gnd==Label(i)); D = EuDist2(fea(classIdx,:),[],0); [dump idx] = sort(D,2); % sort each row clear D; idx = idx(:,1:options.k+1); dump = dump(:,1:options.k+1); dump = exp(-dump/(2*options.t^2)); nSmpClass = length(classIdx)*(options.k+1); G(idNow+1:nSmpClass+idNow,1) = repmat(classIdx,[options.k+1,1]); G(idNow+1:nSmpClass+idNow,2) = classIdx(idx(:)); G(idNow+1:nSmpClass+idNow,3) = dump(:); idNow = idNow+nSmpClass; clear dump idx end G = sparse(G(:,1),G(:,2),G(:,3),nSmp,nSmp); else G = zeros(nSmp,nSmp); for i=1:nLabel classIdx = find(options.gnd==Label(i)); D = EuDist2(fea(classIdx,:),[],0); D = exp(-D/(2*options.t^2)); G(classIdx,classIdx) = D; end end if ~options.bSelfConnected for i=1:size(G,1) G(i,i) = 0; end end W = sparse(max(G,G')); case {lower('Cosine')} if ~options.bNormalized % feaNorm = sum(fea.^2,2).^.5; % fea = fea ./ repmat(max(1e-10,feaNorm),1,size(fea,2)); [nSmp, nFea] = size(fea); if issparse(fea) fea2 = fea'; feaNorm = sum(fea2.^2,1).^.5; for i = 1:nSmp fea2(:,i) = fea2(:,i) ./ max(1e-10,feaNorm(i)); end fea = fea2'; clear fea2; else feaNorm = sum(fea.^2,2).^.5; for i = 1:nSmp fea(i,:) = fea(i,:) ./ max(1e-12,feaNorm(i)); end end end if options.k > 0 G = zeros(nSmp*(options.k+1),3); idNow = 0; for i=1:nLabel classIdx = find(options.gnd==Label(i)); D = fea(classIdx,:)*fea(classIdx,:)'; [dump idx] = sort(-D,2); % sort each row clear D; idx = idx(:,1:options.k+1); dump = -dump(:,1:options.k+1); nSmpClass = length(classIdx)*(options.k+1); G(idNow+1:nSmpClass+idNow,1) = repmat(classIdx,[options.k+1,1]); G(idNow+1:nSmpClass+idNow,2) = classIdx(idx(:)); G(idNow+1:nSmpClass+idNow,3) = dump(:); idNow = idNow+nSmpClass; clear dump idx end G = sparse(G(:,1),G(:,2),G(:,3),nSmp,nSmp); else G = zeros(nSmp,nSmp); for i=1:nLabel classIdx = find(options.gnd==Label(i)); G(classIdx,classIdx) = fea(classIdx,:)*fea(classIdx,:)'; end end if ~options.bSelfConnected for i=1:size(G,1) G(i,i) = 0; end end W = sparse(max(G,G')); otherwise error('WeightMode does not exist!'); end elapse = cputime - tmp_T; return; end if strcmpi(options.NeighborMode,'KNN') & (options.k > 0) if strcmpi(options.Metric,'Euclidean') nBlock = 4000; G = zeros(nSmp*(options.k+1),3); for i = 1:ceil(nSmp/nBlock) if i == ceil(nSmp/nBlock) smpIdx = (i-1)*nBlock+1:nSmp; dist = EuDist2(fea(smpIdx,:),fea,0); dist = full(dist); [dump idx] = sort(dist,2); % sort each row idx = idx(:,1:options.k+1); dump = dump(:,1:options.k+1); dump = exp(-dump/(2*options.t^2)); G((i-1)*nBlock*(options.k+1)+1:nSmp*(options.k+1),1) = repmat(smpIdx',[options.k+1,1]); G((i-1)*nBlock*(options.k+1)+1:nSmp*(options.k+1),2) = idx(:); G((i-1)*nBlock*(options.k+1)+1:nSmp*(options.k+1),3) = dump(:); else smpIdx = (i-1)*nBlock+1:i*nBlock; dist = EuDist2(fea(smpIdx,:),fea,0); dist = full(dist); [dump idx] = sort(dist,2); % sort each row idx = idx(:,1:options.k+1); dump = dump(:,1:options.k+1); dump = exp(-dump/(2*options.t^2)); G((i-1)*nBlock*(options.k+1)+1:i*nBlock*(options.k+1),1) = repmat(smpIdx',[options.k+1,1]); G((i-1)*nBlock*(options.k+1)+1:i*nBlock*(options.k+1),2) = idx(:); G((i-1)*nBlock*(options.k+1)+1:i*nBlock*(options.k+1),3) = dump(:); end end W = sparse(G(:,1),G(:,2),G(:,3),nSmp,nSmp); else if ~options.bNormalized [nSmp, nFea] = size(fea); if issparse(fea) fea2 = fea'; feaNorm = sum(fea2.^2,1).^.5; for i = 1:nSmp fea2(:,i) = fea2(:,i) ./ max(1e-10,feaNorm(i)); end fea = fea2'; clear fea2; else feaNorm = sum(fea.^2,2).^.5; for i = 1:nSmp fea(i,:) = fea(i,:) ./ max(1e-12,feaNorm(i)); end end end nBlock = 4000; G = zeros(nSmp*(options.k+1),3); for i = 1:ceil(nSmp/nBlock) if i == ceil(nSmp/nBlock) smpIdx = (i-1)*nBlock+1:nSmp; dist = fea(smpIdx,:)*fea'; dist = full(dist); [dump idx] = sort(-dist,2); % sort each row idx = idx(:,1:options.k+1); dump = -dump(:,1:options.k+1); G((i-1)*nBlock*(options.k+1)+1:nSmp*(options.k+1),1) = repmat(smpIdx',[options.k+1,1]); G((i-1)*nBlock*(options.k+1)+1:nSmp*(options.k+1),2) = idx(:); G((i-1)*nBlock*(options.k+1)+1:nSmp*(options.k+1),3) = dump(:); else smpIdx = (i-1)*nBlock+1:i*nBlock; dist = fea(smpIdx,:)*fea'; dist = full(dist); [dump idx] = sort(-dist,2); % sort each row idx = idx(:,1:options.k+1); dump = -dump(:,1:options.k+1); G((i-1)*nBlock*(options.k+1)+1:i*nBlock*(options.k+1),1) = repmat(smpIdx',[options.k+1,1]); G((i-1)*nBlock*(options.k+1)+1:i*nBlock*(options.k+1),2) = idx(:); G((i-1)*nBlock*(options.k+1)+1:i*nBlock*(options.k+1),3) = dump(:); end end W = sparse(G(:,1),G(:,2),G(:,3),nSmp,nSmp); end if strcmpi(options.WeightMode,'Binary') W(find(W)) = 1; end if ~options.bSelfConnected for i=1:size(W,1) W(i,i) = 0; end end W = max(W,W'); elapse = cputime - tmp_T; return; end % strcmpi(options.NeighborMode,'KNN') & (options.k == 0) % Complete Graph if strcmpi(options.Metric,'Euclidean') W = EuDist2(fea,[],0); W = exp(-W/(2*options.t^2)); else if ~options.bNormalized % feaNorm = sum(fea.^2,2).^.5; % fea = fea ./ repmat(max(1e-10,feaNorm),1,size(fea,2)); [nSmp, nFea] = size(fea); if issparse(fea) fea2 = fea'; feaNorm = sum(fea2.^2,1).^.5; for i = 1:nSmp fea2(:,i) = fea2(:,i) ./ max(1e-10,feaNorm(i)); end fea = fea2'; clear fea2; else feaNorm = sum(fea.^2,2).^.5; for i = 1:nSmp fea(i,:) = fea(i,:) ./ max(1e-12,feaNorm(i)); end end end W = full(fea*fea'); end if ~options.bSelfConnected for i=1:size(W,1) W(i,i) = 0; end end W = max(W,W'); elapse = cputime - tmp_T;