萌新刚学OI,求助语法基础问题

学术版

QZJ666 @ 2024-11-29 19:29:29

#include<bits/stdc++.h>
typedef long long ll;
using namespace std;
const ll mod=998244353;
int n;
ll k;
struct node{
    ll x,y;
}arr[2005];
ll ksm(ll a,ll m,ll p){
    a=(a+p)%p;
    ll ans=1;
    while(m){
        if(m&1)ans=ans*a%p;
        a=a*a%p;
        m>>=1;
    }
    return ans;
}
int main(){
    scanf("%d%lld",&n,&k);
    ll ans=0;
    for(int i=1;i<=n;i++)scanf("%lld%lld",&arr[i].x,&arr[i].y);
    for(int i=1;i<=n;i++){
        ll add=arr[i].y;
        for(int j=1;j<=n;j++){
            if(i==j)continue;
            add=add*(k-arr[j].x+mod)%mod*ksm(arr[i].x-arr[j].x,mod-2,mod)%mod;
        }
        ans=(ans+add)%mod;
    }
    cout<<ans;
    return 0;
}

请问为什么这份代码在洛谷上开了 C++14 和 O2 的情况下会跑将近 2s ,但是如果在 ll ksm(ll a,ll m,ll p) 前面加了 inline 就可以500ms 以内通过呢?


by QZJ666 @ 2024-11-29 19:30:04

不是说开了 O2 的情况下 inline 没啥用吗


by xiaoke2021 @ 2024-11-29 19:32:48

@QZJ666 inline 可以内联函数,对于重复调用的函数可以提高效率,但用太多不太好,甚至可能玄学厌氧


by xiaoke2021 @ 2024-11-29 19:33:21

@QZJ666 有用的


by yukimianyan @ 2024-11-29 19:45:32

应该是取模优化的问题,原来的代码编译器无法进行取模优化


by yukimianyan @ 2024-11-29 19:46:16

@QZJ666 没加 inline 之前编译出来有一堆 idiv,加了之后就没了,看来是加了 inline 编译器发现了这个取模可以优化


by MYLHF @ 2024-11-29 19:49:19

@QZJ666 是这样的,S T3因为inline卡了40min(


by MYLHF @ 2024-11-29 19:50:22

@QZJ666


by Eznibuil @ 2024-11-29 21:38:44

@QZJ666 洛谷不知道怎么编译的,我这里本地使用洛谷的编译指令跑出来的汇编代码几乎是一致的,除了没有 inline 的那份多出来了一段没有用过的 ksm 函数(但这段内容并不在程序中调用,因此几乎没有影响),结果如下:

enzi@Anonymous:~$ g++-9 -S a.cpp -std=c++14 -O2 -Wall -fno-asm -lm -march=native
a.cpp: In function ‘int main()’:
a.cpp:22:7: warning: ignoring return value of ‘int scanf(const char*, ...)’, declared with attribute warn_unused_result [-Wunused-result]
   22 |  scanf("%d%lld",&n,&k);
      |  ~~~~~^~~~~~~~~~~~~~~~
a.cpp:24:28: warning: ignoring return value of ‘int scanf(const char*, ...)’, declared with attribute warn_unused_result [-Wunused-result]
   24 |  for(int i=1;i<=n;i++)scanf("%lld%lld",&arr[i].x,&arr[i].y);
      |                       ~~~~~^~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
enzi@Anonymous:~$ g++-9 -S b.cpp -std=c++14 -O2 -Wall -fno-asm -lm -march=native
b.cpp: In function ‘int main()’:
b.cpp:22:7: warning: ignoring return value of ‘int scanf(const char*, ...)’, declared with attribute warn_unused_result [-Wunused-result]
   22 |  scanf("%d%lld",&n,&k);
      |  ~~~~~^~~~~~~~~~~~~~~~
b.cpp:24:28: warning: ignoring return value of ‘int scanf(const char*, ...)’, declared with attribute warn_unused_result [-Wunused-result]
   24 |  for(int i=1;i<=n;i++)scanf("%lld%lld",&arr[i].x,&arr[i].y);
      |                       ~~~~~^~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
enzi@Anonymous:~$ diff a.s b.s
1c1
<       .file   "a.cpp"
---
>       .file   "b.cpp"
3,41d2
<       .p2align 4
<       .globl  _Z3ksmxxx
<       .type   _Z3ksmxxx, @function
< _Z3ksmxxx:
< .LFB8991:
<       .cfi_startproc
<       endbr64
<       leaq    (%rdi,%rdx), %rax
<       movq    %rdx, %r8
<       movl    $1, %r9d
<       cqto
<       idivq   %r8
<       movq    %rdx, %rcx
<       testq   %rsi, %rsi
<       je      .L1
<       .p2align 4,,10
<       .p2align 3
< .L4:
<       testb   $1, %sil
<       je      .L3
<       movq    %r9, %rax
<       imulq   %rcx, %rax
<       cqto
<       idivq   %r8
<       movq    %rdx, %r9
< .L3:
<       imulq   %rcx, %rcx
<       movq    %rcx, %rax
<       cqto
<       idivq   %r8
<       sarq    %rsi
<       movq    %rdx, %rcx
<       jne     .L4
< .L1:
<       movq    %r9, %rax
<       ret
<       .cfi_endproc
< .LFE8991:
<       .size   _Z3ksmxxx, .-_Z3ksmxxx
86c47
<       jle     .L14
---
>       jle     .L4
89c50
< .L17:
---
> .L7:
99c60
<       jge     .L17
---
>       jge     .L7
101c62
<       jle     .L14
---
>       jle     .L4
115c76
< .L22:
---
> .L12:
121c82
< .L21:
---
> .L11:
123c84
<       je      .L18
---
>       je      .L8
159c120
< .L20:
---
> .L10:
161c122
<       je      .L19
---
>       je      .L9
174c135
< .L19:
---
> .L9:
185c146
<       jne     .L20
---
>       jne     .L10
197c158
< .L18:
---
> .L8:
200c161
<       jne     .L21
---
>       jne     .L11
216,217c177,178
<       jne     .L22
< .L13:
---
>       jne     .L12
> .L3:
238c199
< .L14:
---
> .L4:
241c202
<       jmp     .L13
---
>       jmp     .L3

|