From 768e16dad19e98e5a6090bd09a7662da6079eae0 Mon Sep 17 00:00:00 2001 From: wxiaoguang Date: Sat, 8 Oct 2022 19:22:44 +0800 Subject: [PATCH] Use weighted algorithm for string matching when finding files in repo (#21370) This PR is for: * https://github.com/go-gitea/gitea/issues/20231 Now, when a user searches `word`, they always see `/{word}.txt` before `/{w}e-g{o}t-{r}esult.{d}at` Demo: When searching "a", "a.ext" comes first. Then when searching "at", the longer matched "template" comes first.
![image](https://user-images.githubusercontent.com/2114189/194588738-3644d891-956f-40e4-b79b-b97d34265456.png) ![image](https://user-images.githubusercontent.com/2114189/194588797-9b124670-4e1e-4510-a170-780295ed89b8.png)
This PR also makes the frontend tests could import feature JS files by introducing `jestSetup.js` Co-authored-by: delvh Co-authored-by: silverwind --- jest.config.js | 5 +- web_src/js/features/repo-findfile.js | 97 +++++++++++++++++------ web_src/js/features/repo-findfile.test.js | 34 ++++++++ web_src/js/testUtils/jestSetup.js | 5 ++ web_src/js/utils.js | 30 ------- web_src/js/utils.test.js | 18 +---- 6 files changed, 118 insertions(+), 71 deletions(-) create mode 100644 web_src/js/features/repo-findfile.test.js create mode 100644 web_src/js/testUtils/jestSetup.js diff --git a/jest.config.js b/jest.config.js index 34d47acc43..2767c1b7ae 100644 --- a/jest.config.js +++ b/jest.config.js @@ -1,3 +1,4 @@ +// to run tests with ES6 module, node must run with "--experimental-vm-modules", or see Makefile's "test-frontend" for reference export default { rootDir: 'web_src', setupFilesAfterEnv: ['jest-extended/all'], @@ -7,6 +8,8 @@ export default { transform: { '\\.svg$': '/js/testUtils/jestRawLoader.js', }, + setupFiles: [ + './js/testUtils/jestSetup.js', // prepare global variables used by our code (eg: window.config) + ], verbose: false, }; - diff --git a/web_src/js/features/repo-findfile.js b/web_src/js/features/repo-findfile.js index 700a7fe693..750b906cef 100644 --- a/web_src/js/features/repo-findfile.js +++ b/web_src/js/features/repo-findfile.js @@ -1,45 +1,96 @@ import $ from 'jquery'; import {svg} from '../svg.js'; -import {strSubMatch} from '../utils.js'; const {csrf} = window.config; const threshold = 50; let files = []; let $repoFindFileInput, $repoFindFileTableBody, $repoFindFileNoResult; + +// return the case-insensitive sub-match result as an array: [unmatched, matched, unmatched, matched, ...] +// res[even] is unmatched, res[odd] is matched, see unit tests for examples +// argument subLower must be a lower-cased string. +export function strSubMatch(full, subLower) { + const res = ['']; + let i = 0, j = 0; + const fullLower = full.toLowerCase(); + while (i < subLower.length && j < fullLower.length) { + if (subLower[i] === fullLower[j]) { + if (res.length % 2 !== 0) res.push(''); + res[res.length - 1] += full[j]; + j++; + i++; + } else { + if (res.length % 2 === 0) res.push(''); + res[res.length - 1] += full[j]; + j++; + } + } + if (i !== subLower.length) { + // if the sub string doesn't match the full, only return the full as unmatched. + return [full]; + } + if (j < full.length) { + // append remaining chars from full to result as unmatched + if (res.length % 2 === 0) res.push(''); + res[res.length - 1] += full.substring(j); + } + return res; +} + +export function calcMatchedWeight(matchResult) { + let weight = 0; + for (let i = 0; i < matchResult.length; i++) { + if (i % 2 === 1) { // matches are on odd indices, see strSubMatch + // use a function f(x+x) > f(x) + f(x) to make the longer matched string has higher weight. + weight += matchResult[i].length * matchResult[i].length; + } + } + return weight; +} + +export function filterRepoFilesWeighted(files, filter) { + let filterResult = []; + if (filter) { + const filterLower = filter.toLowerCase(); + // TODO: for large repo, this loop could be slow, maybe there could be one more limit: + // ... && filterResult.length < threshold * 20, wait for more feedbacks + for (let i = 0; i < files.length; i++) { + const res = strSubMatch(files[i], filterLower); + if (res.length > 1) { // length==1 means unmatched, >1 means having matched sub strings + filterResult.push({matchResult: res, matchWeight: calcMatchedWeight(res)}); + } + } + filterResult.sort((a, b) => b.matchWeight - a.matchWeight); + filterResult = filterResult.slice(0, threshold); + } else { + for (let i = 0; i < files.length && i < threshold; i++) { + filterResult.push({matchResult: [files[i]], matchWeight: 0}); + } + } + return filterResult; +} + function filterRepoFiles(filter) { const treeLink = $repoFindFileInput.attr('data-url-tree-link'); $repoFindFileTableBody.empty(); - const fileRes = []; - if (filter) { - for (let i = 0; i < files.length && fileRes.length < threshold; i++) { - const subMatch = strSubMatch(files[i], filter); - if (subMatch.length > 1) { - fileRes.push(subMatch); - } - } - } else { - for (let i = 0; i < files.length && i < threshold; i++) { - fileRes.push([files[i]]); - } - } - + const filterResult = filterRepoFilesWeighted(files, filter); const tmplRow = ``; - $repoFindFileNoResult.toggle(fileRes.length === 0); - for (const matchRes of fileRes) { + $repoFindFileNoResult.toggle(filterResult.length === 0); + for (const r of filterResult) { const $row = $(tmplRow); const $a = $row.find('a'); - $a.attr('href', `${treeLink}/${matchRes.join('')}`); + $a.attr('href', `${treeLink}/${r.matchResult.join('')}`); const $octiconFile = $(svg('octicon-file')).addClass('mr-3'); $a.append($octiconFile); - // if the target file path is "abc/xyz", to search "bx", then the matchRes is ['a', 'b', 'c/', 'x', 'yz'] - // the matchRes[odd] is matched and highlighted to red. - for (let j = 0; j < matchRes.length; j++) { - if (!matchRes[j]) continue; - const $span = $('').text(matchRes[j]); + // if the target file path is "abc/xyz", to search "bx", then the matchResult is ['a', 'b', 'c/', 'x', 'yz'] + // the matchResult[odd] is matched and highlighted to red. + for (let j = 0; j < r.matchResult.length; j++) { + if (!r.matchResult[j]) continue; + const $span = $('').text(r.matchResult[j]); if (j % 2 === 1) $span.addClass('ui text red'); $a.append($span); } diff --git a/web_src/js/features/repo-findfile.test.js b/web_src/js/features/repo-findfile.test.js new file mode 100644 index 0000000000..2d96ed4463 --- /dev/null +++ b/web_src/js/features/repo-findfile.test.js @@ -0,0 +1,34 @@ +import {strSubMatch, calcMatchedWeight, filterRepoFilesWeighted} from './repo-findfile.js'; + +describe('Repo Find Files', () => { + test('strSubMatch', () => { + expect(strSubMatch('abc', '')).toEqual(['abc']); + expect(strSubMatch('abc', 'a')).toEqual(['', 'a', 'bc']); + expect(strSubMatch('abc', 'b')).toEqual(['a', 'b', 'c']); + expect(strSubMatch('abc', 'c')).toEqual(['ab', 'c']); + expect(strSubMatch('abc', 'ac')).toEqual(['', 'a', 'b', 'c']); + expect(strSubMatch('abc', 'z')).toEqual(['abc']); + expect(strSubMatch('abc', 'az')).toEqual(['abc']); + + expect(strSubMatch('ABc', 'ac')).toEqual(['', 'A', 'B', 'c']); + expect(strSubMatch('abC', 'ac')).toEqual(['', 'a', 'b', 'C']); + + expect(strSubMatch('aabbcc', 'abc')).toEqual(['', 'a', 'a', 'b', 'b', 'c', 'c']); + expect(strSubMatch('the/directory', 'hedir')).toEqual(['t', 'he', '/', 'dir', 'ectory']); + }); + + test('calcMatchedWeight', () => { + expect(calcMatchedWeight(['a', 'b', 'c', 'd']) < calcMatchedWeight(['a', 'bc', 'c'])).toBeTruthy(); + }); + + test('filterRepoFilesWeighted', () => { + // the first matched result should always be the "word.txt" + let res = filterRepoFilesWeighted(['word.txt', 'we-got-result.dat'], 'word'); + expect(res).toHaveLength(2); + expect(res[0].matchResult).toEqual(['', 'word', '.txt']); + + res = filterRepoFilesWeighted(['we-got-result.dat', 'word.txt'], 'word'); + expect(res).toHaveLength(2); + expect(res[0].matchResult).toEqual(['', 'word', '.txt']); + }); +}); diff --git a/web_src/js/testUtils/jestSetup.js b/web_src/js/testUtils/jestSetup.js new file mode 100644 index 0000000000..779be9f0c7 --- /dev/null +++ b/web_src/js/testUtils/jestSetup.js @@ -0,0 +1,5 @@ +window.config = { + csrfToken: 'jest-test-csrf-token-123456', + pageData: {}, + i18n: {}, +}; diff --git a/web_src/js/utils.js b/web_src/js/utils.js index 8e8dc01be1..4020b7a7f4 100644 --- a/web_src/js/utils.js +++ b/web_src/js/utils.js @@ -59,36 +59,6 @@ export function parseIssueHref(href) { return {owner, repo, type, index}; } -// return the sub-match result as an array: [unmatched, matched, unmatched, matched, ...] -// res[even] is unmatched, res[odd] is matched, see unit tests for examples -export function strSubMatch(full, sub) { - const res = ['']; - let i = 0, j = 0; - const subLower = sub.toLowerCase(), fullLower = full.toLowerCase(); - while (i < subLower.length && j < fullLower.length) { - if (subLower[i] === fullLower[j]) { - if (res.length % 2 !== 0) res.push(''); - res[res.length - 1] += full[j]; - j++; - i++; - } else { - if (res.length % 2 === 0) res.push(''); - res[res.length - 1] += full[j]; - j++; - } - } - if (i !== sub.length) { - // if the sub string doesn't match the full, only return the full as unmatched. - return [full]; - } - if (j < full.length) { - // append remaining chars from full to result as unmatched - if (res.length % 2 === 0) res.push(''); - res[res.length - 1] += full.substring(j); - } - return res; -} - // pretty-print a number using locale-specific separators, e.g. 1200 -> 1,200 export function prettyNumber(num, locale = 'en-US') { if (typeof num !== 'number') return ''; diff --git a/web_src/js/utils.test.js b/web_src/js/utils.test.js index 5c17c162af..762f29f6fe 100644 --- a/web_src/js/utils.test.js +++ b/web_src/js/utils.test.js @@ -1,5 +1,5 @@ import { - basename, extname, isObject, uniq, stripTags, joinPaths, parseIssueHref, strSubMatch, + basename, extname, isObject, uniq, stripTags, joinPaths, parseIssueHref, prettyNumber, parseUrl, } from './utils.js'; @@ -86,22 +86,6 @@ test('parseIssueHref', () => { expect(parseIssueHref('')).toEqual({owner: undefined, repo: undefined, type: undefined, index: undefined}); }); -test('strSubMatch', () => { - expect(strSubMatch('abc', '')).toEqual(['abc']); - expect(strSubMatch('abc', 'a')).toEqual(['', 'a', 'bc']); - expect(strSubMatch('abc', 'b')).toEqual(['a', 'b', 'c']); - expect(strSubMatch('abc', 'c')).toEqual(['ab', 'c']); - expect(strSubMatch('abc', 'ac')).toEqual(['', 'a', 'b', 'c']); - expect(strSubMatch('abc', 'z')).toEqual(['abc']); - expect(strSubMatch('abc', 'az')).toEqual(['abc']); - - expect(strSubMatch('abc', 'aC')).toEqual(['', 'a', 'b', 'c']); - expect(strSubMatch('abC', 'ac')).toEqual(['', 'a', 'b', 'C']); - - expect(strSubMatch('aabbcc', 'abc')).toEqual(['', 'a', 'a', 'b', 'b', 'c', 'c']); - expect(strSubMatch('the/directory', 'hedir')).toEqual(['t', 'he', '/', 'dir', 'ectory']); -}); - test('prettyNumber', () => { expect(prettyNumber()).toEqual(''); expect(prettyNumber(null)).toEqual('');