分析
https://www.luogu.org/blog/DRA/solution-p4841
代码(似乎附赠了一个全家桶呢)
#pragma GCC optimize(2) #pragma GCC optimize(3) #pragma GCC optimize("Ofast") #pragma GCC optimize("inline") #pragma GCC optimize("-fgcse") #pragma GCC optimize("-fgcse-lm") #pragma GCC optimize("-fipa-sra") #pragma GCC optimize("-ftree-pre") #pragma GCC optimize("-ftree-vrp") #pragma GCC optimize("-fpeephole2") #pragma GCC optimize("-ffast-math") #pragma GCC optimize("-fsched-spec") #pragma GCC optimize("unroll-loops") #pragma GCC optimize("-falign-jumps") #pragma GCC optimize("-falign-loops") #pragma GCC optimize("-falign-labels") #pragma GCC optimize("-fdevirtualize") #pragma GCC optimize("-fcaller-saves") #pragma GCC optimize("-fcrossjumping") #pragma GCC optimize("-fthread-jumps") #pragma GCC optimize("-funroll-loops") #pragma GCC optimize("-fwhole-program") #pragma GCC optimize("-freorder-blocks") #pragma GCC optimize("-fschedule-insns") #pragma GCC optimize("inline-functions") #pragma GCC optimize("-ftree-tail-merge") #pragma GCC optimize("-fschedule-insns2") #pragma GCC optimize("-fstrict-aliasing") #pragma GCC optimize("-fstrict-overflow") #pragma GCC optimize("-falign-functions") #pragma GCC optimize("-fcse-skip-blocks") #pragma GCC optimize("-fcse-follow-jumps") #pragma GCC optimize("-fsched-interblock") #pragma GCC optimize("-fpartial-inlining") #pragma GCC optimize("no-stack-protector") #pragma GCC optimize("-freorder-functions") #pragma GCC optimize("-findirect-inlining") #pragma GCC optimize("-frerun-cse-after-loop") #pragma GCC optimize("inline-small-functions") #pragma GCC optimize("-finline-small-functions") #pragma GCC optimize("-ftree-switch-conversion") #pragma GCC optimize("-foptimize-sibling-calls") #pragma GCC optimize("-fexpensive-optimizations") #pragma GCC optimize("-funsafe-loop-optimizations") #pragma GCC optimize("inline-functions-called-once") #pragma GCC optimize("-fdelete-null-pointer-checks") #include<bits/stdc++.h> using namespace std; #define int long long const int g = 3; const int mod = 1004535809; int G,a[400100],b[400100],c[400100],len,r[400100],a1[400100],b1[400100],ib[400100],ans[400100],ans2[400100]; inline int pw(int x,int p){ int res=1; while(p){if(p&1)res=res*x%mod;x=x*x%mod,p>>=1;} return res; } #define pi acos(-1.0) struct node { double x,y; }; node A[2000100],B[2000100]; inline node operator + (const node x,const node y){return (node){x.x+y.x,x.y+y.y};} inline node operator - (const node x,const node y){return (node){x.x-y.x,x.y-y.y};} inline node operator * (const node x,const node y){return (node){x.x*y.x-x.y*y.y,x.x*y.y+x.y*y.x};} inline void fft(node a[],int f,int n){ int i,j,k; for(i=0;i<n;i++)if(i<r[i])swap(a[i],a[r[i]]); for(i=1;i<n;i<<=1){ node wn=(node){cos(pi/i),f*sin(pi/i)}; for(j=0;j<n;j+=(i<<1)){ node w=(node){1,0},p,q; for(k=0;k<i;k++,w=w*wn){ p=a[j+k],q=a[i+j+k]*w; a[j+k]=p+q,a[i+j+k]=p-q; } } } if(f==-1)for(i=0;i<n;i++)a[i].x=a[i].x/n; return; } inline void get_fft_mul(node A[],node B[],int n){ int N=1; for(N=1;N<=n;N<<=1)len++; for(int i=0;i<N;i++)r[i]=((r[i>>1]>>1)|((i&1)<<(len-1))); fft(A,1,N),fft(B,1,N); for(int i=0;i<N;i++)A[i]=A[i]*B[i]; fft(A,-1,N); for(int i=0;i<N;i++)ans[i]=(int)(A[i].x+0.5); return; } inline void fwt_or(int a[],int f,int n){ int i,j,k; for(i=1;i<n;i<<=1) for(j=0;j<n;j+=(i<<1)) for(k=0;k<i;k++){ if(f==1)a[i+j+k]=(a[j+k]+a[i+j+k])%mod; else a[i+j+k]=(a[i+j+k]+mod-a[j+k])%mod; } } inline void fwt_and(int a[],int f,int n){ int i,j,k; for(i=1;i<n;i<<=1) for(j=0;j<n;j+=(i<<1)) for(k=0;k<i;k++){ if(f==1)a[j+k]=(a[j+k]+a[i+j+k])%mod; else a[j+k]=(a[j+k]+mod-a[i+j+k])%mod; } } inline void fwt_xor(int a[],int f,int n){ int i,j,k,inv2=pw(2,mod-2); for(i=1;i<n;i<<=1) for(j=0;j<n;j+=(i<<1)) for(k=0;k<i;k++){ int x=a[j+k],y=a[i+j+k]; a[j+k]=(x+y)%mod;a[i+j+k]=(x+mod-y)%mod; } if(f==-1)for(i=0;i<n;i++)a[i]=a[i]*inv2%mod; } inline void ntt(int a[],int opt,int n){ int i,j,k,inv=pw(n,mod-2),now,wn,w,p,q; for(i=0;i<n;i++)if(i<r[i])swap(a[i],a[r[i]]); for(i=1;i<n;i<<=1){ now=(opt==1?g:G),wn=pw(now,(mod-1)/(i<<1)); for(j=0;j<n;j+=(i<<1)) for(k=0,w=1;k<i;k++,w=w*wn%mod) p=a[j+k],q=a[i+j+k]*w%mod,a[j+k]=(p+q)%mod,a[i+j+k]=(p-q+mod)%mod; } if(opt==-1)for(i=0;i<n;i++)a[i]=a[i]*inv%mod; return; } inline void cdq_fft(int le,int ri){ if(le==ri){ //..... return; } int i,j,k,n,m=ri-le,mid=(le+ri)>>1;len=0; cdq_fft(le,mid); for(n=1;n<=2*(m+1);n<<=1)len++; for(i=0;i<n;i++)a1[i]=b1[i]=0; for(i=0;i<n;i++)r[i]=((r[i>>1]>>1)|((i&1)<<(len-1))); //for(i=0;i<=mid-le;i++)b1[i]=b[i+le]; //for(i=0;i<=m;i++)a1[i]=a[i]; ntt(a1,1,n),ntt(b1,1,n); //for(i=0;i<n;i++)b1[i]=a1[i]*b1[i]%mod; ntt(b1,-1,n); for(i=mid+1;i<=ri;i++)b[i]=(b[i]+b1[i-le])%mod; cdq_fft(mid+1,ri); return; } inline void get_inv(int x,int a[],int b[]){ if(x==1){b[0]=pw(a[0],mod-2);return;} int i,j,k,n;get_inv((x+1)>>1,a,b);len=0; for(n=1;n<(x<<1);n<<=1)len++; for(i=0;i<n;i++)r[i]=((r[i>>1]>>1)|((i&1)<<(len-1))); for(i=0;i<x;i++)c[i]=a[i];for(i=x;i<n;i++)c[i]=0; ntt(b,1,n),ntt(c,1,n); for(i=0;i<n;i++)b[i]=(2-c[i]*b[i]%mod+mod)%mod*b[i]%mod; ntt(b,-1,n);for(i=x;i<n;i++)b[i]=0;return; } inline void get_dao(int n,int a[],int b[]){ for(int i=0;i<n;i++)b[i]=a[i+1]*(i+1)%mod; b[n-1]=0;return; } inline void get_ji(int n,int a[],int b[]){ for(int i=n-1;i>0;i--)b[i]=a[i-1]*pw(i,mod-2)%mod; b[0]=0;return; } inline void get_mul(int n,int a[],int b[]){ int i,j,k; ntt(a,1,n),ntt(b,1,n); for(i=0;i<n;i++)a[i]=a[i]*b[i]%mod; ntt(a,-1,n);return; } inline void get_ln(int n,int a[],int b[]){ for(int i=0;i<(n<<1);i++)b[i]=0; get_inv(n,a,b); int N;len=0; for(N=1;N<(n<<1);N<<=1)len++; for(int i=0;i<N;i++)r[i]=((r[i>>1]>>1)|((i&1)<<(len-1))); get_dao(N,a,a1); get_mul(N,a1,b); get_ji(N,a1,b); for(int i=0;i<N;i++)a1[i]=0; return; } inline void get_exp(int n,int a[],int b[]){ if(n==1){b[0]=1;return;} get_exp((n+1)>>1,a,b); get_ln(n,b,b1); b1[0]=(a[0]+1-b1[0]+mod)%mod; for(int i=1;i<n;i++)b1[i]=(a[i]-b1[i]+mod)%mod; int N;len=0; for(N=1;N<(n<<1);N<<=1)len++; for(int i=0;i<N;i++)r[i]=((r[i>>1]>>1)|((i&1)<<(len-1))); for(int i=n;i<N;i++)b[i]=b1[i]=0; ntt(b,1,N),ntt(b1,1,N); for(int i=0;i<N;i++)b[i]=b[i]*b1[i]%mod; ntt(b,-1,N); for(int i=n;i<N;i++)b[i]=b1[i]=0; } inline int read_pw(){ int x=0;char s=getchar(); while(!isdigit(s))s=getchar(); while(isdigit(s))x=((x<<3)+(x<<1)+(s-'0'))%mod,s=getchar(); return x; } inline void get_pw(int n,int a[],int b[],int k){ get_ln(n,a,b); for(int i=0;i<n;i++)b[i]=b[i]*k%mod; get_exp(n,b,ans); return; } inline void get_div(int n,int m,int a1[],int b1[]){ int i,j,k; for(i=0;i<n;i++)a[i]=a1[n-1-i]; for(i=0;i<m;i++)b[i]=b1[m-1-i],ib[i]=0; for(i=n-m+2;i<m;i++)b[i]=0; get_inv(n-m+1,b,ib); int N;len=0; for(N=1;N<(n<<1);N<<=1)len++; for(int i=0;i<N;i++)r[i]=((r[i>>1]>>1)|((i&1)<<(len-1))); get_mul(N,a,ib); int _n=n-m+1; for(i=0;i<_n;i++)ans[i]=a[_n-1-i],printf("%lld ",ans[i]); for(i=0;i<m;i++)b[i]=b1[m-1-i]; for(i=m;i<N;i++)b[i]=0; puts(""); get_mul(N,ans,b1); for(i=0;i<m-1;i++)ans2[i]=(a1[i]-ans[i]+mod)%mod,printf("%lld ",ans2[i]); puts(""); return; } int fac[100100],inv[100100]; signed main(){ G=pw(g,mod-2); int n,m,i,k; scanf("%lld",&n); //k=read_pw(); k=1; fac[0]=1; for(i=1;i<=n;i++)fac[i]=fac[i-1]*i%mod; inv[n]=pw(fac[n],mod-2); for(i=n-1;i>=0;i--)inv[i]=inv[i+1]*(i+1)%mod; for(i=0;i<=n;i++)a[i]=pw(2,i*(i-1)/2)*inv[i]%mod; get_inv(n+1,a,b); int N;len=0; for(N=1;N<=n+1;N<<=1)len++; for(i=0;i<N;i++)r[i]=((r[i>>1]>>1)|((i&1)<<(len-1))); for(i=0;i<N;i++)a[i]=0; for(i=1;i<=n;i++)a[i]=pw(2,i*(i-1)/2)*inv[i-1]%mod; get_mul(N,a,b);a[0]=1; for(i=1;i<N;i++)a[i]=a[i]*fac[i-1]%mod*inv[i]%mod; for(i=N;i<=100000;i++)a[i]=0; memset(b,0,sizeof(b)); get_pw(n+1,a,b,k); printf("%lld\n",ans[n]*fac[n]%mod); return 0; }