70 TLE,请求常数优化支援

P3793 由乃救爷爷

B_1168 @ 2020-04-23 06:03:58

本萌新写了一个分块+ST表的程序,经过多次尝试,仍然只有70分:下载了#2测试点(n,m=20,000,000)进行测试,本机运行时间长达16328ms,在洛谷上能通过的最大测试点(#8)则用了4.46s/184.29MB,请各路大佬们看看有没有什么方法可以压缩本蒟蒻的代码,不胜感激!

//(以上略去一大批优化指令)

#include<bits/stdc++.h>

//#include<Windows.h>

#define maxn 20000001
#define maxs 4500
using namespace std;

int n,m,s,len,a[maxn],be[maxn],dp[maxs][14],rm[maxn],lm[maxn];

unsigned long long ans=0;

//dp for Sparse-table based O(1) cross-chunk queries
//rm and lm for the prefix and suffix max for the specific chunk
//be for the chunk where the chunk was located in

namespace GenHelper
{
    unsigned z1,z2,z3,z4,b;
    unsigned rand_()
    {
    b=((z1<<6)^z1)>>13;
    z1=((z1&4294967294U)<<18)^b;
    b=((z2<<2)^z2)>>27;
    z2=((z2&4294967288U)<<2)^b;
    b=((z3<<13)^z3)>>21;
    z3=((z3&4294967280U)<<7)^b;
    b=((z4<<3)^z4)>>12;
    z4=((z4&4294967168U)<<13)^b;
    return (z1^z2^z3^z4);
    }
}
void srand(unsigned x)
{using namespace GenHelper;
z1=x; z2=(~x)^0x233333333U; z3=x^0x1234598766U; z4=(~x)+51;}
int read()
{
    using namespace GenHelper;
    int a=rand_()&32767;
    int b=rand_()&32767;
    return a*32768+b;
}

long long query(long long from,long long to){
    int cnt=0;

    if(to<from) swap(to,from);

    int ll=be[from],rr=be[to],temp;

//  printf("%d %d %d %d ",from,to,ll,rr);

    if(ll==rr){
        for(int i=from;i<=to;i++) cnt=max(cnt,a[i]);
    }

    if(rr-ll==1){
        cnt=max(cnt,max(lm[from],rm[to]));
    }

    if(rr-ll>=2){
        cnt=max(cnt,max(lm[from],rm[to]));
        rr--,ll++;
        temp=log(rr-ll+1)/log(2);
        cnt=max(cnt,max(dp[ll][temp],dp[rr-(1<<temp)+1][temp]));
    }

/*  for(long long i=from;i<=min(to,be[from]*len);i++) cnt=min(cnt,a[i]);
    if(be[from]!=be[to]){
        for(long long i=(be[to]-1)*len+1;i<=to;i++) cnt=min(cnt,a[i]);
    }
    for(long long i=be[from]+1;i<=be[to]-1;i++) cnt=min(cnt,val[i]);*/
    return cnt;
}

int main(){
//  DWORD start_time = GetTickCount();
    scanf("%d%d%d",&n,&m,&s);
    srand(s);
    len=sqrt(n);
    for(long long i=1;i<=n;i++){
        be[i]=(i-1)/len+1;
//      printf("%d ",be[i]);
    }
//  printf("\n");

    for(long long i=1;i<=n;i++){
        a[i]=read();
        dp[be[i]][0]=max(dp[be[i]][0],a[i]);
    }

    for(int i=1;i<=n;i++) {
        if(be[i]==be[i-1]) rm[i]=max(a[i],rm[i-1]);
        else rm[i]=a[i];
//      printf("%d ",rm[i]);
    }
//  printf("\n");

    for(int i=n;i>=1;i--){
        if(be[i]==be[i+1]) lm[i]=max(a[i],lm[i+1]);
        else lm[i]=a[i];
    }
/*  for(int i=1;i<=n;i++) printf("%d ",lm[i]);
//  printf("\n");

//  for(int i=1;i<=be[n];i++) printf("%d ",val[i]);
//  printf("\n");

    for(int i=1;i<=be[n];i++) dp[i][0]=val[i];*/
    for(int j=1;(1<<j)<=be[n];j++)for(int i=1;i+(1<<j)<=be[n]+1;i++)dp[i][j]=max(dp[i][j-1],dp[i+(1<<j-1)][j-1]);

    for(;m;m--){
        int l,r,lglr;
        l=read()%n+1,r=read()%n+1;
        ans+=query(l,r); 
    }
    printf("%lld\n",ans);
//    DWORD end_time = GetTickCount();
//    cout << "The run time is:" << (end_time - start_time) << "ms!" << endl;
}

by B_1168 @ 2020-04-23 06:14:33

修正一下,不知道为什么,跑了几次之后,本机用时被压到了9500ms上下……然而洛谷时限5000ms qwq


by Rorschachindark @ 2020-04-23 07:57:53

笛卡尔树不香么?


by bovine__kebi @ 2020-04-23 08:01:24

笛卡尔树可以O(n)求RMQ


by B_1168 @ 2020-04-23 08:25:22

@Walking_Dead 太蒻了,不会写(捂脸),只会写这种常数巨大的分块+ST+前后缀……


|