top of page

CCC 2020 S3: Searching for Strings

Updated: Nov 5, 2020

The problem can be solved only by c++.

The key thing is the generated a hash table to identify these distinct string.

We can use string prefix and the following formula to keep a hash table.

hsh[i] = hsh[i-1]*base + t[i-1]; pw[i] = pw[i-1]*base;

Then:

h(s) = hsh[r] - hsh[l-1]*pw[r-l+1]


The above is a fast hash function to check the distinct string. Then using a sliding window to generate a string one by one, and check if it is unique.

The result is all distinct string number.


#include <vector>
#include <unordered_set>
#include <string>
#include <iostream>
using namespace std;

long long hsh[200010], pw[200010], base = 131;
int fs[26], ft[26];
unordered_set<long long> st;

bool check(){
    for (int i = 0; i < 26; i++){
        if (fs[i] != ft[i]) return false;
    }
    return true;
}

long long getSubHash(int l, int r){
    return hsh[r] - hsh[l-1]*pw[r-l+1];
}

int main() {

    string s, t;
    cin >> s >> t;
    int n = s.length(), m = t.length();
    for (int i = 1; i <= n; i++){
        fs[s[i-1]-'a']++;
    }
    pw[0] = 1;
    // Generate hash which can be calculated with sliding window
    // Kind of like prefix sum array
    for (int i = 1; i <= m; i++){
        hsh[i] = hsh[i-1]*base + t[i-1];
        pw[i] = pw[i-1]*base;
    }
    for (int i = 1; i <= m; i++){
        // Sliding window
        ft[t[i-1]-'a']++;
        if (i < n) continue;
        if (i > n) ft[t[i-n-1]-'a']--;
        // Rolling hash
        if (check()) st.insert(getSubHash(i-n+1, i));
    }
    cout << st.size() << endl;
}

The following code is a python that uses python built-in function hash(), it can get 8 points:


N = input()
H = input()
nlen = len(N)
nlists = [0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0]
rset = set()
isone = True
nlists[ord(N[0]) -ord('a')] += 1
nlists[ord(H[0]) - ord('a')] -= 1
for x in range(1,len(N)):
    nlists[ord(N[x]) -ord('a')] += 1
    if x < len(H):
        nlists[ord(H[x]) - ord('a')] -= 1
    if isone and N[x] != N[x-1]:
        isone = False

def issolution():
    for x in nlists:
        if x!=0:
            return False
    return True

i = len(N)-1

while i < len(H):
    if issolution():
        str1 = H[i + 1 - nlen:i+1]
        rset.add(hash(str1))
        if isone:
            break
    i += 1
    if i < len(H):
        m1 = ord(H[i]) - ord('a')
        nlists[m1] -= 1
        m1 = ord(H[i - nlen]) -ord('a')
        nlists[m1] += 1

print(len(rset))

Recent Posts

See All

Comments


bottom of page