test/yosupo/inv_of_Poly.test.cpp

View this file on GitHub
Last update: 2024-10-01 17:03:34+09:00
Problem: https://judge.yosupo.jp/problem/inv_of_polynomials

Depends on

Code

// competitive-verifier: PROBLEM https://judge.yosupo.jp/problem/inv_of_polynomials
// competitive-verifier: TLE 1
// competitive-verifier: MLE 512
#include <iostream>
#include "src/Math/ModInt.hpp"
#include "src/FFT/Polynomial.hpp"
#include "src/FFT/extgcd.hpp"
using namespace std;
signed main() {
 cin.tie(0);
 ios::sync_with_stdio(0);
 using Mint= ModInt<998244353>;
 int N, M;
 cin >> N >> M;
 Polynomial<Mint> f(N), g(M), x, y;
 for (int i= 0; i < N; i++) cin >> f[i];
 for (int i= 0; i < M; i++) cin >> g[i];
 auto d= extgcd(f, g, x, y);
 if (d.deg() != 0) {
  cout << -1 << '\n';
 } else if (x.deg() == -1) {
  cout << 0 << '\n';
 } else {
  cout << x.size() << '\n';
  x/= d[0];
  for (int i= 0, ed= x.size(); i < ed; i++) cout << x[i] << " \n"[i == ed - 1];
 }
 return 0;
}

#line 1 "test/yosupo/inv_of_Poly.test.cpp"
// competitive-verifier: PROBLEM https://judge.yosupo.jp/problem/inv_of_polynomials
// competitive-verifier: TLE 1
// competitive-verifier: MLE 512
#include <iostream>
#line 2 "src/Math/mod_inv.hpp"
#include <utility>
#include <type_traits>
#include <cassert>
template <class Uint> constexpr inline Uint mod_inv(Uint a, Uint mod) {
 std::make_signed_t<Uint> x= 1, y= 0, z= 0;
 for (Uint q= 0, b= mod, c= 0; b;) z= x, x= y, y= z - y * (q= a / b), c= a, a= b, b= c - b * q;
 return assert(a == 1), x < 0 ? mod - (-x) % mod : x % mod;
}
#line 2 "src/Internal/Remainder.hpp"
namespace math_internal {
using namespace std;
using u8= unsigned char;
using u32= unsigned;
using i64= long long;
using u64= unsigned long long;
using u128= __uint128_t;
struct MP_Na {  // mod < 2^32
 u32 mod;
 constexpr MP_Na(): mod(0) {}
 constexpr MP_Na(u32 m): mod(m) {}
 constexpr inline u32 mul(u32 l, u32 r) const { return u64(l) * r % mod; }
 constexpr inline u32 set(u32 n) const { return n; }
 constexpr inline u32 get(u32 n) const { return n; }
 constexpr inline u32 norm(u32 n) const { return n; }
 constexpr inline u32 plus(u64 l, u32 r) const { return l+= r, l < mod ? l : l - mod; }
 constexpr inline u32 diff(u64 l, u32 r) const { return l-= r, l >> 63 ? l + mod : l; }
};
template <class u_t, class du_t, u8 B> struct MP_Mo {  // mod < 2^32, mod < 2^62
 u_t mod;
 constexpr MP_Mo(): mod(0), iv(0), r2(0) {}
 constexpr MP_Mo(u_t m): mod(m), iv(inv(m)), r2(-du_t(mod) % mod) {}
 constexpr inline u_t mul(u_t l, u_t r) const { return reduce(du_t(l) * r); }
 constexpr inline u_t set(u_t n) const { return mul(n, r2); }
 constexpr inline u_t get(u_t n) const { return n= reduce(n), n >= mod ? n - mod : n; }
 constexpr inline u_t norm(u_t n) const { return n >= mod ? n - mod : n; }
 constexpr inline u_t plus(u_t l, u_t r) const { return l+= r, l < (mod << 1) ? l : l - (mod << 1); }
 constexpr inline u_t diff(u_t l, u_t r) const { return l-= r, l >> (B - 1) ? l + (mod << 1) : l; }
private:
 u_t iv, r2;
 static constexpr u_t inv(u_t n, int e= 6, u_t x= 1) { return e ? inv(n, e - 1, x * (2 - x * n)) : x; }
 constexpr inline u_t reduce(const du_t &w) const { return u_t(w >> B) + mod - ((du_t(u_t(w) * iv) * mod) >> B); }
};
using MP_Mo32= MP_Mo<u32, u64, 32>;
using MP_Mo64= MP_Mo<u64, u128, 64>;
struct MP_Br {  // 2^20 < mod <= 2^41
 u64 mod;
 constexpr MP_Br(): mod(0), x(0) {}
 constexpr MP_Br(u64 m): mod(m), x((u128(1) << 84) / m) {}
 constexpr inline u64 mul(u64 l, u64 r) const { return rem(u128(l) * r); }
 static constexpr inline u64 set(u64 n) { return n; }
 constexpr inline u64 get(u64 n) const { return n >= mod ? n - mod : n; }
 constexpr inline u64 norm(u64 n) const { return n >= mod ? n - mod : n; }
 constexpr inline u64 plus(u64 l, u64 r) const { return l+= r, l < (mod << 1) ? l : l - (mod << 1); }
 constexpr inline u64 diff(u64 l, u64 r) const { return l-= r, l >> 63 ? l + (mod << 1) : l; }
private:
 u64 x;
 constexpr inline u128 quo(const u128 &n) const { return (n * x) >> 84; }
 constexpr inline u64 rem(const u128 &n) const { return n - quo(n) * mod; }
};
template <class du_t, u8 B> struct MP_D2B1 {  // mod < 2^63, mod < 2^64
 u64 mod;
 constexpr MP_D2B1(): mod(0), s(0), d(0), v(0) {}
 constexpr MP_D2B1(u64 m): mod(m), s(__builtin_clzll(m)), d(m << s), v(u128(-1) / d) {}
 constexpr inline u64 mul(u64 l, u64 r) const { return rem((u128(l) * r) << s) >> s; }
 constexpr inline u64 set(u64 n) const { return n; }
 constexpr inline u64 get(u64 n) const { return n; }
 constexpr inline u64 norm(u64 n) const { return n; }
 constexpr inline u64 plus(du_t l, u64 r) const { return l+= r, l < mod ? l : l - mod; }
 constexpr inline u64 diff(du_t l, u64 r) const { return l-= r, l >> B ? l + mod : l; }
private:
 u8 s;
 u64 d, v;
 constexpr inline u64 rem(const u128 &u) const {
  u128 q= (u >> 64) * v + u;
  u64 r= u64(u) - (q >> 64) * d - d;
  if (r > u64(q)) r+= d;
  if (r >= d) r-= d;
  return r;
 }
};
using MP_D2B1_1= MP_D2B1<u64, 63>;
using MP_D2B1_2= MP_D2B1<u128, 127>;
template <class u_t, class MP> constexpr u_t pow(u_t x, u64 k, const MP &md) {
 for (u_t ret= md.set(1);; x= md.mul(x, x))
  if (k & 1 ? ret= md.mul(ret, x) : 0; !(k>>= 1)) return ret;
}
}
#line 3 "src/Internal/modint_traits.hpp"
namespace math_internal {
struct m_b {};
struct s_b: m_b {};
}
template <class mod_t> constexpr bool is_modint_v= std::is_base_of_v<math_internal::m_b, mod_t>;
template <class mod_t> constexpr bool is_staticmodint_v= std::is_base_of_v<math_internal::s_b, mod_t>;
#line 6 "src/Math/ModInt.hpp"
namespace math_internal {
template <class MP, u64 MOD> struct SB: s_b {
protected:
 static constexpr MP md= MP(MOD);
};
template <class U, class B> struct MInt: public B {
 using Uint= U;
 static constexpr inline auto mod() { return B::md.mod; }
 constexpr MInt(): x(0) {}
 template <class T, typename= enable_if_t<is_modint_v<T> && !is_same_v<T, MInt>>> constexpr MInt(T v): x(B::md.set(v.val() % B::md.mod)) {}
 constexpr MInt(__int128_t n): x(B::md.set((n < 0 ? ((n= (-n) % B::md.mod) ? B::md.mod - n : n) : n % B::md.mod))) {}
 constexpr MInt operator-() const { return MInt() - *this; }
#define FUNC(name, op) \
 constexpr MInt name const { \
  MInt ret; \
  return ret.x= op, ret; \
 }
 FUNC(operator+(const MInt & r), B::md.plus(x, r.x))
 FUNC(operator-(const MInt & r), B::md.diff(x, r.x))
 FUNC(operator*(const MInt & r), B::md.mul(x, r.x))
 FUNC(pow(u64 k), math_internal::pow(x, k, B::md))
#undef FUNC
 constexpr MInt operator/(const MInt &r) const { return *this * r.inv(); }
 constexpr MInt &operator+=(const MInt &r) { return *this= *this + r; }
 constexpr MInt &operator-=(const MInt &r) { return *this= *this - r; }
 constexpr MInt &operator*=(const MInt &r) { return *this= *this * r; }
 constexpr MInt &operator/=(const MInt &r) { return *this= *this / r; }
 constexpr bool operator==(const MInt &r) const { return B::md.norm(x) == B::md.norm(r.x); }
 constexpr bool operator!=(const MInt &r) const { return !(*this == r); }
 constexpr bool operator<(const MInt &r) const { return B::md.norm(x) < B::md.norm(r.x); }
 constexpr inline MInt inv() const { return mod_inv<U>(val(), B::md.mod); }
 constexpr inline Uint val() const { return B::md.get(x); }
 friend ostream &operator<<(ostream &os, const MInt &r) { return os << r.val(); }
 friend istream &operator>>(istream &is, MInt &r) {
  i64 v;
  return is >> v, r= MInt(v), is;
 }
private:
 Uint x;
};
template <u64 MOD> using MP_B= conditional_t < (MOD < (1 << 30)) & MOD, MP_Mo32, conditional_t < MOD < (1ull << 32), MP_Na, conditional_t<(MOD < (1ull << 62)) & MOD, MP_Mo64, conditional_t<MOD<(1ull << 41), MP_Br, conditional_t<MOD<(1ull << 63), MP_D2B1_1, MP_D2B1_2>>>>>;
template <u64 MOD> using ModInt= MInt < conditional_t<MOD<(1 << 30), u32, u64>, SB<MP_B<MOD>, MOD>>;
}
using math_internal::ModInt;
#line 2 "src/FFT/fps_inv.hpp"
#include <vector>
#include <algorithm>
#line 2 "src/FFT/NTT.hpp"
#include <array>
#include <limits>
#line 3 "src/NumberTheory/is_prime.hpp"
namespace math_internal {
template <class Uint, class MP, u32... args> constexpr bool miller_rabin(Uint n) {
 const MP md(n);
 const Uint s= __builtin_ctzll(n - 1), d= n >> s, one= md.set(1), n1= md.norm(md.set(n - 1));
 for (u32 a: (u32[]){args...})
  if (Uint b= a % n; b)
   if (Uint p= md.norm(pow(md.set(b), d, md)); p != one)
    for (int i= s; p != n1; p= md.norm(md.mul(p, p)))
     if (!(--i)) return 0;
 return 1;
}
}
constexpr bool is_prime(unsigned long long n) {
 if (n < 2 || n % 6 % 4 != 1) return (n | 1) == 3;
 if (n < (1 << 30)) return math_internal::miller_rabin<unsigned, math_internal::MP_Mo32, 2, 7, 61>(n);
 if (n < (1ull << 62)) return math_internal::miller_rabin<unsigned long long, math_internal::MP_Mo64, 2, 325, 9375, 28178, 450775, 9780504, 1795265022>(n);
 if (n < (1ull << 63)) return math_internal::miller_rabin<unsigned long long, math_internal::MP_D2B1_1, 2, 325, 9375, 28178, 450775, 9780504, 1795265022>(n);
 return math_internal::miller_rabin<unsigned long long, math_internal::MP_D2B1_2, 2, 325, 9375, 28178, 450775, 9780504, 1795265022>(n);
}
#line 6 "src/FFT/NTT.hpp"
template <class mod_t, size_t LM> mod_t get_inv(int n) {
 static_assert(is_modint_v<mod_t>);
 static const auto m= mod_t::mod();
 static mod_t* dat= new mod_t[LM];
 static int l= 1;
 if (l == 1) dat[l++]= 1;
 for (; l <= n; ++l) dat[l]= dat[m % l] * (m - m / l);
 return dat[n];
}
namespace math_internal {
#define CE constexpr
#define ST static
#define TP template
#define BSF(_, n) __builtin_ctz##_(n)
TP<class mod_t> struct NTT {
#define _DFT(a, b, c, ...) \
 mod_t r, u, *x0, *x1; \
 for (int a= n, b= 1, s, i; a>>= 1; b<<= 1) \
  for (s= 0, r= I, x0= x;; r*= c[BSF(, s)], x0= x1 + p) { \
   for (x1= x0 + (i= p); i--;) __VA_ARGS__; \
   if (++s == e) break; \
  }
 ST inline void dft(int n, mod_t x[]) { _DFT(p, e, r2, x1[i]= x0[i] - (u= r * x1[i]), x0[i]+= u); }
 ST inline void idft(int n, mod_t x[]) {
  _DFT(e, p, ir2, u= x0[i] - x1[i], x0[i]+= x1[i], x1[i]= r * u)
  for (const mod_t iv= I / n; n--;) x[n]*= iv;
 }
#undef _DFT
 ST inline void even_dft(int n, mod_t x[]) {
  for (int i= 0, j= 0; i < n; i+= 2) x[j++]= iv2 * (x[i] + x[i + 1]);
 }
 ST inline void odd_dft(int n, mod_t x[], mod_t r= iv2) {
  for (int i= 0, j= 0;; r*= ir2[BSF(, ++j)])
   if (x[j]= r * (x[i] - x[i + 1]); (i+= 2) == n) break;
 }
 ST inline void dft_doubling(int n, mod_t x[], int i= 0) {
  mod_t k= I, t= rt[BSF(, n << 1)];
  for (copy_n(x, n, x + n), idft(n, x + n); i < n; ++i) x[n + i]*= k, k*= t;
  dft(n, x + n);
 }
protected:
 ST CE u64 md= mod_t::mod();
 static_assert(md & 1);
 static_assert(is_prime(md));
 ST CE u8 E= BSF(ll, md - 1);
 ST CE mod_t w= [](u8 e) {
  for (mod_t r= 2;; r+= 1)
   if (auto s= r.pow((md - 1) / 2); s != 1 && s * s == 1) return r.pow((md - 1) >> e);
  return mod_t();
 }(E);
 static_assert(w != mod_t());
 ST CE mod_t I= 1, iv2= (md + 1) / 2, iw= w.pow((1ULL << E) - 1);
 ST CE auto roots(mod_t w) {
  array<mod_t, E + 1> x= {};
  for (u8 e= E; e; w*= w) x[e--]= w;
  return x[0]= w, x;
 }
 TP<u32 N> ST CE auto ras(const array<mod_t, E + 1>& rt, const array<mod_t, E + 1>& irt, int i= N) {
  array<mod_t, E + 1 - N> x= {};
  for (mod_t ro= 1; i <= E; ro*= irt[i++]) x[i - N]= rt[i] * ro;
  return x;
 }
 ST CE auto rt= roots(w), irt= roots(iw);
 ST CE auto r2= ras<2>(rt, irt), ir2= ras<2>(irt, rt);
};
TP<class T, u8 t, class B> struct NI: public B {
 using B::B;
#define FUNC(op, name, HG, ...) \
 inline void name(__VA_ARGS__) { \
  HG(op, 1); \
  if CE (t > 1) HG(op, 2); \
  if CE (t > 2) HG(op, 3); \
  if CE (t > 3) HG(op, 4); \
  if CE (t > 4) HG(op, 5); \
 }
#define REP for (int i= b; i < e; ++i)
#define DFT(fft, _) B::ntt##_::fft(e - b, this->dt##_ + b)
#define ZEROS(op, _) fill_n(this->dt##_ + b, e - b, typename B::m##_())
#define SET(op, _) copy(x + b, x + e, this->dt##_ + b)
#define SET_S(op, _) this->dt##_[i]= x;
#define SUBST(op, _) copy(r.dt##_ + b, r.dt##_ + e, this->dt##_ + b)
#define ASGN(op, _) REP this->dt##_[i] op##= r.dt##_[i]
#define ASN(nm, op) TP<class C> FUNC(op, nm, ASGN, const NI<T, t, C>& r, int b, int e)
#define BOP(op, _) REP this->dt##_[i]= l.dt##_[i] op r.dt##_[i]
#define OP(nm, op) TP<class C, class D> FUNC(op, nm, BOP, const NI<T, t, C>& l, const NI<T, t, D>& r, int b, int e)
 OP(add, +) OP(dif, -) OP(mul, *) ASN(add, +) ASN(dif, -) ASN(mul, *) FUNC(dft, dft, DFT, int b, int e) FUNC(idft, idft, DFT, int b, int e) FUNC(__, zeros, ZEROS, int b, int e) FUNC(__, set, SET, const T x[], int b, int e) FUNC(__, set, SET_S, int i, T x) TP<class C> FUNC(__, subst, SUBST, const NI<T, t, C>& r, int b, int e) inline void get(T x[], int b, int e) const {
  if CE (t == 1) copy(this->dt1 + b, this->dt1 + e, x + b);
  else REP x[i]= get(i);
 }
#define TMP(_) B::iv##_##1 * (this->dt##_[i] - r1)
 inline T get(int i) const {
  if CE (t > 1) {
   u64 r1= this->dt1[i].val(), r2= (TMP(2)).val();
   T a= 0;
   if CE (t > 2) {
    u64 r3= (TMP(3) - B::iv32 * r2).val();
    if CE (t > 3) {
     u64 r4= (TMP(4) - B::iv42 * r2 - B::iv43 * r3).val();
     if CE (t > 4) a= T(B::m4::mod()) * (TMP(5) - B::iv52 * r2 - B::iv53 * r3 - B::iv54 * r4).val();
     a= (a + r4) * B::m3::mod();
    }
    a= (a + r3) * B::m2::mod();
   }
   return (a + r2) * B::m1::mod() + r1;
  } else return this->dt1[i];
 }
#undef TMP
#undef DFT
#undef ZEROS
#undef SET
#undef SET_S
#undef SUBST
#undef ASGN
#undef ASN
#undef BOP
#undef OP
#undef FUNC
#undef REP
};
#define ARR(_) \
 using m##_= ModInt<M##_>; \
 using ntt##_= NTT<m##_>; \
 m##_* dt##_= new m##_[LM];
#define IV2 ST CE m2 iv21= m2(1) / m1::mod();
#define IV3 ST CE m3 iv32= m3(1) / m2::mod(), iv31= iv32 / m1::mod();
#define IV4 ST CE m4 iv43= m4(1) / m3::mod(), iv42= iv43 / m2::mod(), iv41= iv42 / m1::mod();
#define IV5 ST CE m5 iv54= m5(1) / m4::mod(), iv53= iv54 / m3::mod(), iv52= iv53 / m2::mod(), iv51= iv52 / m1::mod();
TP<u8 t, u64 M1, u32 M2, u32 M3, u32 M4, u32 M5, u32 LM, bool v> struct NB {
 ARR(1)
};
TP<u64 M1, u32 M2, u32 M3, u32 M4, u32 M5, u32 LM> struct NB<2, M1, M2, M3, M4, M5, LM, 0> {
 ARR(1) ARR(2) IV2
};
TP<u64 M1, u32 M2, u32 M3, u32 M4, u32 M5, u32 LM> struct NB<3, M1, M2, M3, M4, M5, LM, 0> {
 ARR(1) ARR(2) ARR(3) IV2 IV3
};
TP<u64 M1, u32 M2, u32 M3, u32 M4, u32 M5, u32 LM> struct NB<4, M1, M2, M3, M4, M5, LM, 0> {
 ARR(1) ARR(2) ARR(3) ARR(4) IV2 IV3 IV4
};
TP<u64 M1, u32 M2, u32 M3, u32 M4, u32 M5, u32 LM> struct NB<5, M1, M2, M3, M4, M5, LM, 0> {
 ARR(1) ARR(2) ARR(3) ARR(4) ARR(5) IV2 IV3 IV4 IV5
};
#undef ARR
#define VC(_) \
 using m##_= ModInt<M##_>; \
 using ntt##_= NTT<m##_>; \
 vector<m##_> bf##_; \
 m##_* dt##_;
#define RS resize
TP<u64 M1, u32 M2, u32 M3, u32 M4, u32 M5, u32 LM> struct NB<1, M1, M2, M3, M4, M5, LM, 1> {
 NB(): dt1(bf1.data()) {}
 void RS(int n) { bf1.RS(n), dt1= bf1.data(); }
 u32 size() const { return bf1.size(); }
 VC(1)
};
TP<u64 M1, u32 M2, u32 M3, u32 M4, u32 M5, u32 LM> struct NB<2, M1, M2, M3, M4, M5, LM, 1> {
 NB(): dt1(bf1.data()), dt2(bf2.data()) {}
 void RS(int n) { bf1.RS(n), dt1= bf1.data(), bf2.RS(n), dt2= bf2.data(); }
 u32 size() const { return bf1.size(); }
 VC(1) VC(2) IV2
};
TP<u64 M1, u32 M2, u32 M3, u32 M4, u32 M5, u32 LM> struct NB<3, M1, M2, M3, M4, M5, LM, 1> {
 NB(): dt1(bf1.data()), dt2(bf2.data()), dt3(bf3.data()) {}
 void RS(int n) { bf1.RS(n), dt1= bf1.data(), bf2.RS(n), dt2= bf2.data(), bf3.RS(n), dt3= bf3.data(); }
 u32 size() const { return bf1.size(); }
 VC(1) VC(2) VC(3) IV2 IV3
};
TP<u64 M1, u32 M2, u32 M3, u32 M4, u32 M5, u32 LM> struct NB<4, M1, M2, M3, M4, M5, LM, 1> {
 NB(): dt1(bf1.data()), dt2(bf2.data()), dt3(bf3.data()), dt4(bf4.data()) {}
 void RS(int n) { bf1.RS(n), dt1= bf1.data(), bf2.RS(n), dt2= bf2.data(), bf3.RS(n), dt3= bf3.data(), bf4.RS(n), dt4= bf4.data(); }
 u32 size() const { return bf1.size(); }
 VC(1) VC(2) VC(3) VC(4) IV2 IV3 IV4
};
TP<u64 M1, u32 M2, u32 M3, u32 M4, u32 M5, u32 LM> struct NB<5, M1, M2, M3, M4, M5, LM, 1> {
 NB(): dt1(bf1.data()), dt2(bf2.data()), dt3(bf3.data()), dt4(bf4.data()), dt5(bf5.data()) {}
 void RS(int n) { bf1.RS(n), dt1= bf1.data(), bf2.RS(n), dt2= bf2.data(), bf3.RS(n), dt3= bf3.data(), bf4.RS(n), dt4= bf4.data(), bf5.RS(n), dt5= bf5.data(); }
 u32 size() const { return bf1.size(); }
 VC(1) VC(2) VC(3) VC(4) VC(5) IV2 IV3 IV4 IV5
};
#undef VC
#undef IV2
#undef IV3
#undef IV4
#undef IV5
TP<class T, u32 LM> CE bool is_nttfriend() {
 if CE (!is_staticmodint_v<T>) return 0;
 else return (T::mod() & is_prime(T::mod())) && LM <= (1ULL << BSF(ll, T::mod() - 1));
}
TP<class T, enable_if_t<is_arithmetic_v<T>, nullptr_t> = nullptr> CE u64 mv() { return numeric_limits<T>::max(); }
TP<class T, enable_if_t<is_staticmodint_v<T>, nullptr_t> = nullptr> CE u64 mv() { return T::mod(); }
TP<class T, u32 LM, u32 M1, u32 M2, u32 M3, u32 M4> CE u8 nt() {
 if CE (!is_nttfriend<T, LM>()) {
  CE u128 m= mv<T>(), mm= m * m;
  if CE (mm <= M1 / LM) return 1;
  else if CE (mm <= u64(M1) * M2 / LM) return 2;
  else if CE (mm <= u128(M1) * M2 * M3 / LM) return 3;
  else if CE (mm <= u128(M1) * M2 * M3 * M4 / LM) return 4;
  else return 5;
 } else return 1;
}
#undef BSF
#undef RS
CE u32 MOD1= 998244353, MOD2= 897581057, MOD3= 880803841, MOD4= 754974721, MOD5= 645922817;
TP<class T, u32 LM> CE u8 nttarr_type= nt<T, LM, MOD1, MOD2, MOD3, MOD4>();
TP<class T, u32 LM> CE u8 nttarr_cat= is_nttfriend<T, LM>() && (mv<T>() > (1 << 30)) ? 0 : nttarr_type<T, LM>;
TP<class T, u32 LM, bool v> using NTTArray= NI<T, nttarr_type<T, LM>, conditional_t<is_nttfriend<T, LM>(), NB<1, mv<T>(), 0, 0, 0, 0, LM, v>, NB<nttarr_type<T, LM>, MOD1, MOD2, MOD3, MOD4, MOD5, LM, v>>>;
#undef CE
#undef ST
#undef TP
}
using math_internal::is_nttfriend, math_internal::nttarr_type, math_internal::nttarr_cat, math_internal::NTT, math_internal::NTTArray;
template <class T, size_t LM, int id= 0> struct GlobalNTTArray {
 static inline NTTArray<T, LM, 0> bf;
};
template <class T, size_t LM, size_t LM2, int id= 0> struct GlobalNTTArray2D {
 static inline NTTArray<T, LM, 0>* bf= new NTTArray<T, LM, 0>[LM2];
};
template <class T, size_t LM, int id= 0> struct GlobalArray {
 static inline T* bf= new T[LM];
};
constexpr unsigned pw2(unsigned n) { return --n, n|= n >> 1, n|= n >> 2, n|= n >> 4, n|= n >> 8, n|= n >> 16, ++n; }
#line 6 "src/FFT/fps_inv.hpp"
namespace math_internal {
template <u32 LM, class mod_t> inline void inv_base(const mod_t p[], int n, mod_t r[], int i= 1, int l= -1) {
 static constexpr int t= nttarr_cat<mod_t, LM>, TH= (int[]){64, 64, 128, 256, 512, 512}[t];
 if (n <= i) return;
 if (l < 0) l= n;
 assert(((n & -n) == n)), assert(i && ((i & -i) == i));
 const mod_t miv= -r[0];
 for (int j, m= min(n, TH); i < m; r[i++]*= miv)
  for (r[i]= mod_t(), j= min(i + 1, l); --j;) r[i]+= r[i - j] * p[j];
 static constexpr int lnR= 2 + (!t), R= (1 << lnR) - 1;
 using GNA1= GlobalNTTArray<mod_t, LM, 1>;
 using GNA2= GlobalNTTArray<mod_t, LM, 2>;
 for (auto gt1= GlobalNTTArray2D<mod_t, LM, R, 1>::bf, gt2= GlobalNTTArray2D<mod_t, LM, R, 2>::bf; i < n;) {
  mod_t* rr= r;
  const mod_t* pp= p;
  const int s= i, e= s << 1, ss= (l - 1) / s;
  for (int k= 0, j; i < n && k < R; ++k, i+= s, pp+= s) {
   if (j= min(e, l - k * s); j > 0) gt2[k].set(pp, 0, j), gt2[k].zeros(j, e), gt2[k].dft(0, e);
   for (gt1[k].set(rr, 0, s), gt1[k].zeros(s, e), gt1[k].dft(0, e), GNA2::bf.mul(gt1[k], gt2[0], 0, e), j= min(k, ss) + 1; --j;) GNA1::bf.mul(gt1[k - j], gt2[j], 0, e), GNA2::bf.add(GNA1::bf, 0, e);
   GNA2::bf.idft(0, e), GNA2::bf.zeros(0, s);
   if constexpr (!is_nttfriend<mod_t, LM>()) GNA2::bf.get(rr, s, e), GNA2::bf.set(rr, s, e);
   for (GNA2::bf.dft(0, e), GNA2::bf.mul(gt1[0], 0, e), GNA2::bf.idft(0, e), GNA2::bf.get(rr, s, e), rr+= j= s; j--;) rr[j]= -rr[j];
  }
 }
}
template <u32 lnR, class mod_t, u32 LM= 1 << 22> void inv_(const mod_t p[], int n, mod_t r[]) {
 static constexpr u32 R= (1 << lnR) - 1, LM2= LM >> (lnR - 1);
 using GNA1= GlobalNTTArray<mod_t, LM2, 1>;
 using GNA2= GlobalNTTArray<mod_t, LM2, 2>;
 auto gt1= GlobalNTTArray2D<mod_t, LM2, R, 1>::bf, gt2= GlobalNTTArray2D<mod_t, LM2, R, 2>::bf;
 assert(n > 0), assert(p[0] != mod_t());
 const int m= pw2(n) >> lnR, m2= m << 1, ed= (n - 1) / m;
 inv_base<LM2>(p, m, r);
 for (int k= 0, l; k < ed; p+= m) {
  for (gt2[k].set(p, 0, l= min(m2, n - m * k)), gt2[k].zeros(l, m2), gt2[k].dft(0, m2), gt1[k].set(r, 0, m), gt1[k].zeros(m, m2), gt1[k].dft(0, m2), GNA2::bf.mul(gt1[k], gt2[0], 0, m2), l= k; l--;) GNA1::bf.mul(gt1[l], gt2[k - l], 0, m2), GNA2::bf.add(GNA1::bf, 0, m2);
  GNA2::bf.idft(0, m2), GNA2::bf.zeros(0, m);
  if constexpr (!is_nttfriend<mod_t, LM>()) GNA2::bf.get(r, m, m2), GNA2::bf.set(r, m, m2);
  for (GNA2::bf.dft(0, m2), GNA2::bf.mul(gt1[0], 0, m2), GNA2::bf.idft(0, m2), GNA2::bf.get(r, m, m + (l= min(m, n - m * ++k))), r+= m; l--;) r[l]= -r[l];
 }
}
template <class mod_t, u32 LM= 1 << 22> vector<mod_t> inv(const vector<mod_t>& p) {
 static constexpr int t= nttarr_cat<mod_t, LM>, TH= (int[]){234, 106, 280, 458, 603, 861}[t];
 mod_t *pp= GlobalArray<mod_t, LM, 1>::bf, *r= GlobalArray<mod_t, LM, 2>::bf;
 const int n= p.size();
 copy_n(p.begin(), n, pp), assert(n > 0), assert(p[0] != mod_t());
 if (const mod_t miv= -(r[0]= mod_t(1) / p[0]); n > TH) {
  const int l= pw2(n), l1= l >> 1, k= (n - l1 - 1) / (l1 >> 3), bl= __builtin_ctz(l1);
  int a= 4;
  if constexpr (!t) a= bl < 8 ? k > 5 ? 1 : 3 : bl < 9 ? k & 1 ? 3 : 4 : bl < 10 ? k & 1 && k > 4 ? 3 : 4 : bl < 11 ? k > 6 ? 3 : 4 : 4;
  else if constexpr (t < 2) a= bl < 7 ? 2 : bl < 9 ? k ? 3 : 4 : k & 1 ? 3 : 4;
  else if constexpr (t < 3) a= bl < 9 ? k > 5 ? 1 : k ? 3 : 4 : k & 1 ? 3 : 4;
  else if constexpr (t < 4) a= bl < 9 ? 1 : bl < 10 ? k > 5 ? 1 : !k ? 4 : k & 2 ? 2 : 3 : k & 1 ? 3 : 4;
  else if constexpr (t < 5) a= bl < 10 ? k & 2 ? 2 : 3 : k & 1 ? 3 : 4;
  else a= bl < 10 ? 1 : bl < 11 ? k > 5 ? 1 : !k ? 4 : k & 2 ? 2 : 3 : k & 1 ? 3 : 4;
  (a < 2 ? inv_<1, mod_t, LM> : a < 3 ? inv_<2, mod_t, LM> : a < 4 ? inv_<3, mod_t, LM> : inv_<4, mod_t, LM>)(pp, n, r);
 } else
  for (int j, i= 1; i < n; r[i++]*= miv)
   for (r[j= i]= mod_t(); j--;) r[i]+= r[j] * pp[i - j];
 return vector(r, r + n);
}
}
using math_internal::inv_base, math_internal::inv;
#line 3 "src/FFT/fps_div.hpp"
namespace math_internal {
template <size_t LM, class mod_t> void div_base(const mod_t p[], int n, const mod_t q[], int l, mod_t r[], const mod_t iv[]) {
 static constexpr int t= nttarr_cat<mod_t, LM>, TH= (int[]){64, 64, 256, 256, 256, 256}[t];
 assert(n > 0), assert(((n & -n) == n)), assert(l > 0);
 const mod_t iv0= iv[0];
 const int m= min(TH, n);
 int i= 0;
 for (copy_n(p, m, r); i < m; r[i++]*= iv0)
  for (int j= min(i + 1, l); --j;) r[i]-= r[i - j] * q[j];
 using GNA1= GlobalNTTArray<mod_t, LM, 1>;
 using GNA2= GlobalNTTArray<mod_t, LM, 2>;
 using GNA3= GlobalNTTArray<mod_t, LM, 3>;
 auto gt1= GlobalNTTArray2D<mod_t, LM, 7, 1>::bf, gt2= GlobalNTTArray2D<mod_t, LM, 7, 2>::bf;
 int skip= (__builtin_ctz(n / i) + 2) % 3 + 1;
 for (int ed= (1 << skip) - 1; i < n; ed= 7) {
  mod_t* rr= r;
  const mod_t *qq= q, *pp= p;
  const int s= i, e= s << 1, ss= (l - 1) / s;
  GNA3::bf.set(iv, 0, s), GNA3::bf.zeros(s, e), GNA3::bf.dft(0, e);
  for (int k= 0, j; i < n && k < ed; ++k, i+= s, qq+= s, pp+= s) {
   if (j= min(e, l - k * s); j > 0) gt2[k].set(qq, 0, j), gt2[k].zeros(j, e), gt2[k].dft(0, e);
   gt1[k].set(rr, 0, s), gt1[k].zeros(s, e), gt1[k].dft(0, e);
   for (GNA2::bf.mul(gt1[k], gt2[0], 0, e), j= min(k, ss) + 1; --j;) GNA1::bf.mul(gt1[k - j], gt2[j], 0, e), GNA2::bf.add(GNA1::bf, 0, e);
   GNA2::bf.idft(0, e), GNA2::bf.zeros(0, s), GNA2::bf.get(rr, s, e);
   for (j= s; j < e; ++j) rr[j]-= pp[j];
   GNA2::bf.set(rr, s, e), GNA2::bf.dft(0, e), GNA2::bf.mul(GNA3::bf, 0, e), GNA2::bf.idft(0, e), GNA2::bf.get(rr, s, e);
   for (rr+= s, j= s; j--;) rr[j]= -rr[j];
  }
 }
}
template <size_t lnR, class mod_t, size_t LM= 1 << 22> void div_(const mod_t p[], int n, const mod_t q[], int l, mod_t r[]) {
 static constexpr size_t R= (1 << lnR) - 1, LM2= LM >> (lnR - 1);
 mod_t* iv= GlobalArray<mod_t, LM2, 2>::bf;
 using GNA1= GlobalNTTArray<mod_t, LM2, 1>;
 using GNA2= GlobalNTTArray<mod_t, LM2, 2>;
 using GNA3= GlobalNTTArray<mod_t, LM2, 3>;
 auto gt1= GlobalNTTArray2D<mod_t, LM2, R, 1>::bf, gt2= GlobalNTTArray2D<mod_t, LM2, R, 2>::bf;
 const int m= pw2(n) >> lnR, m2= m << 1, ed= (n - 1) / m, ss= (l - 1) / m;
 iv[0]= mod_t(1) / q[0], inv_base<LM2>(q, m, iv, 1, l), div_base<LM2>(p, m, q, l, r, iv), GNA3::bf.set(iv, 0, m), GNA3::bf.zeros(m, m2), GNA3::bf.dft(0, m2);
 for (int k= 0, i= m, j, o; k < ed; ++k, i+= m, q+= m, p+= m) {
  if (j= min(m2, l - k * m); j > 0) gt2[k].set(q, 0, j), gt2[k].zeros(j, m2), gt2[k].dft(0, m2);
  for (gt1[k].set(r, 0, m), gt1[k].zeros(m, m2), gt1[k].dft(0, m2), GNA2::bf.mul(gt1[k], gt2[0], 0, m2), j= min(k, ss) + 1; --j;) GNA1::bf.mul(gt1[k - j], gt2[j], 0, m2), GNA2::bf.add(GNA1::bf, 0, m2);
  for (GNA2::bf.idft(0, m2), GNA2::bf.zeros(0, m), GNA2::bf.get(r, m, m + (o= min(m, n - i))), j= o + m; j-- > m;) r[j]-= p[j];
  for (GNA2::bf.set(r, m, m + o), GNA2::bf.dft(0, m2), GNA2::bf.mul(GNA3::bf, 0, m2), GNA2::bf.idft(0, m2), GNA2::bf.get(r, m, m + o), r+= m, j= o; j--;) r[j]= -r[j];
 }
}
template <class mod_t, size_t LM= 1 << 22> vector<mod_t> div(const vector<mod_t>& p, const vector<mod_t>& q) {
 static constexpr int t= nttarr_cat<mod_t, LM>, TH= (int[]){120, 152, 361, 626, 359, 418}[t];
 mod_t *r= GlobalArray<mod_t, LM, 1>::bf, *qq= GlobalArray<mod_t, LM, 2>::bf;
 const int n= p.size(), l= q.size();
 assert(l > 0), assert(q[0] != mod_t(0));
 if (n > TH) {
  div_<3, mod_t, LM>(p.data(), n, q.data(), l, r);
 } else {
  const mod_t iv0= mod_t(1) / q[0];
  copy(p.begin(), p.end(), r), copy(q.begin(), q.end(), qq);
  for (int i= 0; i < n; r[i++]*= iv0)
   for (int j= min(i + 1, l); --j;) r[i]-= r[i - j] * qq[j];
 }
 return vector(r, r + n);
}
}
using math_internal::div;
#line 3 "src/FFT/convolve.hpp"
#include <cmath>
#line 5 "src/FFT/convolve.hpp"
template <class mod_t, size_t LM= 1 << 22> std::vector<mod_t> convolve(const std::vector<mod_t>& p, const std::vector<mod_t>& q) {
 mod_t *pp= GlobalArray<mod_t, LM, 0>::bf, *qq= GlobalArray<mod_t, LM, 1>::bf, *rr= GlobalArray<mod_t, LM, 2>::bf;
 static constexpr int t= nttarr_cat<mod_t, LM>, TH= (int[]){70, 30, 70, 100, 135, 150}[t];
 auto f= [](int l) -> int {
  static constexpr double B[]= {(double[]){8.288, 5.418, 7.070, 9.676, 11.713, 13.374}[t], (double[]){8.252, 6.578, 9.283, 12.810, 13.853, 15.501}[t]};
  return std::round(std::pow(l, 0.535) * B[__builtin_ctz(l) & 1]);
 };
 const int n= p.size(), m= q.size(), sz= n + m - 1;
 if (!n || !m) return std::vector<mod_t>();
 if (std::min(n, m) < TH) {
  std::fill_n(rr, sz, mod_t()), std::copy(p.begin(), p.end(), pp), std::copy(q.begin(), q.end(), qq);
  for (int i= n; i--;)
   for (int j= m; j--;) rr[i + j]+= pp[i] * qq[j];
 } else {
  const int rl= pw2(sz), l= pw2(std::max(n, m)), fl= f(l);
  static constexpr size_t LM2= LM >> 3;
  static constexpr bool b= nttarr_cat<mod_t, LM2> < t;
  if (b || (l + fl < sz && sz <= (rl >> 3) * 5)) {
   using GNA1= GlobalNTTArray<mod_t, LM2, 1>;
   using GNA2= GlobalNTTArray<mod_t, LM2, 2>;
   auto gt1= GlobalNTTArray2D<mod_t, LM2, 16, 1>::bf, gt2= GlobalNTTArray2D<mod_t, LM2, 16, 2>::bf;
   const int l= rl >> 4, l2= l << 1, nn= (n + l - 1) / l, mm= (m + l - 1) / l, ss= nn + mm - 1;
   for (int i= 0, k= 0, s; k < n; ++i, k+= l) gt1[i].set(p.data() + k, 0, s= std::min(l, n - k)), gt1[i].zeros(s, l2), gt1[i].dft(0, l2);
   if (&p != &q)
    for (int i= 0, k= 0, s; k < m; ++i, k+= l) gt2[i].set(q.data() + k, 0, s= std::min(l, m - k)), gt2[i].zeros(s, l2), gt2[i].dft(0, l2);
   else
    for (int i= nn; i--;) gt2[i].subst(gt1[i], 0, l2);
   GNA2::bf.mul(gt1[0], gt2[0], 0, l2), GNA2::bf.idft(0, l2), GNA2::bf.get(rr, 0, l2);
   for (int i= 1, k= l, j, ed; i < ss; ++i, k+= l) {
    for (j= std::max(0, i - nn + 1), ed= std::min(mm - 1, i), GNA2::bf.mul(gt1[i - ed], gt2[ed], 0, l2); j < ed; ++j) GNA1::bf.mul(gt1[i - j], gt2[j], 0, l2), GNA2::bf.add(GNA1::bf, 0, l2);
    for (GNA2::bf.idft(0, l2), GNA2::bf.get(pp, 0, j= std::min(l, sz - k)); j--;) rr[k + j]+= pp[j];
    if (l < sz - k) GNA2::bf.get(rr + k, l, std::min(l2, sz - k));
   }
  } else {
   using GNA1= GlobalNTTArray<mod_t, LM, 1>;
   using GNA2= GlobalNTTArray<mod_t, LM, 2>;
   const int len= sz <= l + fl ? l : rl;
   if (GNA1::bf.set(p.data(), 0, n), GNA1::bf.zeros(n, len), GNA1::bf.dft(0, len); &p != &q) GNA2::bf.set(q.data(), 0, m), GNA2::bf.zeros(m, len), GNA2::bf.dft(0, len), GNA1::bf.mul(GNA2::bf, 0, len);
   else GNA1::bf.mul(GNA1::bf, 0, len);
   if (GNA1::bf.idft(0, len), GNA1::bf.get(rr, 0, std::min(sz, len)); len < sz) {
    std::copy(p.begin() + len - m + 1, p.end(), pp + len - m + 1), std::copy(q.begin() + len - n + 1, q.end(), qq + len - n + 1);
    for (int i= len, j; i < sz; rr[i - len]-= rr[i], ++i)
     for (rr[i]= mod_t(), j= i - m + 1; j < n; ++j) rr[i]+= pp[j] * qq[i - j];
   }
  }
 }
 return std::vector(rr, rr + sz);
}
#line 4 "src/FFT/Polynomial.hpp"
template <class mod_t, std::size_t LM= 1 << 22> class Polynomial: public std::vector<mod_t> {
 using Poly= Polynomial;
 struct Inde;
 struct XP_plus_C {  // x^p+c
  Inde x;
  mod_t c;
  XP_plus_C(const Inde &x_): x(x_) {}
  XP_plus_C(int p_, mod_t c_): x(p_), c(c_) {}
 };
 struct Inde {  // indeterminate
  int p_;
  Inde(int p): p_(p) {}
  Inde(): Inde(1) {}
  Inde operator^(int p) const { return Inde(p_ * p); }
  Inde operator*(const Inde &rhs) const { return Inde(p_ + rhs.p_); }
  int pow() const { return p_; }
  XP_plus_C operator+(mod_t c) { return XP_plus_C(p_, c); }
  XP_plus_C operator-(mod_t c) { return XP_plus_C(p_, -c); }
 };
 using GNA1= GlobalNTTArray<mod_t, LM, 1>;
 using GNA2= GlobalNTTArray<mod_t, LM, 2>;
 using GA= GlobalArray<mod_t, LM, 0>;
 using GAp= GlobalArray<mod_t, LM, 1>;
 using GAq= GlobalArray<mod_t, LM, 2>;
 using GA3= GlobalArray<mod_t, LM, 3>;
 static constexpr int A= 8;
 static constexpr int B= 42;
 std::pair<Poly, Poly> quorem_na(const Poly &q) const {
  int n= deg(), m= q.deg(), qsz= n - m + 1, i= qsz, j;
  std::copy_n(this->begin(), n + 1, GAp::bf), std::copy_n(q.begin(), m + 1, GAq::bf);
  for (mod_t *bf= GAp::bf + n - m, iv= mod_t(1) / GAq::bf[m]; i--; bf--)
   for (GA::bf[i]= bf[j= m] * iv; j--;) bf[j]-= GAq::bf[j] * GA::bf[i];
  Poly rem(GAp::bf, GAp::bf + m);
  return {Poly(GA::bf, GA::bf + qsz), rem.shrink()};
 }
 Poly quo(const Poly &q) const {
  const int n= deg() + 1, m= q.deg() + 1, qsz= n - m + 1, nb= this->size() - n, mb= q.size() - m;
  auto ret= div<mod_t, LM>(Poly(this->rbegin() + nb, this->rbegin() + nb + qsz), Poly(q.rbegin() + mb, q.rbegin() + mb + std::min(qsz, m)));
  return std::reverse(ret.begin(), ret.end()), ret;
 }
 std::pair<Poly, Poly> quorem_ntt(const Poly &q) const {
  const int n= deg(), m= q.deg(), qsz= n - m + 1;
  auto qu= quo(q);
  std::copy(qu.begin(), qu.end(), GA::bf), std::copy_n(this->begin(), n + 1, GAp::bf), std::copy_n(q.begin(), m + 1, GAq::bf);
  const int len= pw2(m), mask= len - 1;
  if (len > qsz) std::fill_n(GA::bf + qsz, len - qsz, mod_t());
  if (len > m + 1) std::fill_n(GAq::bf + m + 1, len - m - 1, mod_t());
  for (int i= qsz; i-- > len;) GA::bf[i & mask]+= GA::bf[i];
  for (int i= n; i >= len; i--) GAp::bf[i & mask]+= GAp::bf[i];
  if (GNA1::bf.set(GA::bf, 0, len); m == len) GAq::bf[0]+= GAq::bf[m];
  GNA2::bf.set(GAq::bf, 0, len), GNA1::bf.dft(0, len), GNA2::bf.dft(0, len), GNA1::bf.mul(GNA2::bf, 0, len), GNA1::bf.idft(0, len), GNA1::bf.get(GAq::bf, 0, m);
  for (int i= m; i--;) GAp::bf[i]-= GAq::bf[i];
  Poly rem(GAp::bf, GAp::bf + m);
  return std::make_pair(qu, rem.shrink());
 }
public:
 using std::vector<mod_t>::vector;
 Polynomial(mod_t a): Polynomial(1, a) {}
 Polynomial(const std::vector<mod_t> &p): Polynomial(p.begin(), p.end()) {}
 Polynomial(const XP_plus_C &xpc): Polynomial(xpc.x.pow() + 1) { (*this)[xpc.x.pow()]= 1, (*this)[0]= xpc.c; }
 static Inde x() { return Inde(); }
 inline int deg() const {
  for (int n= this->size() - 1;; n--)
   if (n < 0 || (*this)[n] != mod_t()) return n;
 }
 inline Poly &shrink() { return this->resize(std::max(deg() + 1, 1)), *this; }
#define ASSIGN(op) \
 Poly &operator op##=(const Poly &r) { \
  const std::size_t n= r.deg() + 1; \
  if (this->size() < n) this->resize(n); \
  for (int i= n; i--;) (*this)[i] op##= r[i]; \
  return shrink(); \
 }
 ASSIGN(+)
 ASSIGN(-)
#undef ASSIGN
 Poly &operator*=(const Poly &r) { return *this= *this * r, *this; }
 Poly &operator/=(const Poly &r) { return *this= *this / r, *this; }
 Poly &operator%=(const Poly &r) { return *this= *this % r, *this; }
 Poly operator-() const { return Poly()-= *this; }
 Poly operator+(const Poly &r) const { return Poly(*this)+= r; }
 Poly operator-(const Poly &r) const { return Poly(*this)-= r; }
 Poly operator*(const Poly &r) const { return convolve<mod_t, LM>(*this, r); }
 Poly operator/(const Poly &r) const {
  const int m= r.deg(), qsz= deg() - m + 1, ln= __builtin_ctz(pw2(qsz));
  assert(m >= 0);
  if (qsz <= 0) return Poly{mod_t()};
  return m + 3 < A * ln + B || qsz <= 64 ? quorem_na(r).first : quo(r);
 }
 std::pair<Poly, Poly> quorem(const Poly &r) const {
  const int n= deg(), m= r.deg(), qsz= n - m + 1, ln= __builtin_ctz(pw2(qsz));
  assert(m >= 0);
  if (qsz <= 0) return {Poly{mod_t()}, Poly(this->begin(), this->begin() + n + 1)};
  return m < A * ln + B || qsz <= 64 ? quorem_na(r) : quorem_ntt(r);
 }
 Poly operator%(const Poly &r) const { return quorem(r).second; }
 Poly &operator+=(const mod_t r) { return (*this)[0]+= r, *this; }
 Poly &operator-=(const mod_t r) { return (*this)[0]-= r, *this; }
 Poly &operator*=(const mod_t r) {
  for (mod_t &c: *this) c*= r;
  return shrink();
 }
 Poly &operator/=(const mod_t r) {
  for (mod_t &c: *this) c/= r;
  return shrink();
 }
 Poly operator+(const mod_t r) { return Poly(*this)+= r; }
 Poly operator-(const mod_t r) { return Poly(*this)-= r; }
 Poly operator*(const mod_t r) { return Poly(*this)*= r; }
 Poly operator/(const mod_t r) { return Poly(*this)/= r; }
 friend Poly operator+(const mod_t l, Poly r) { return r+= l; }
 friend Poly operator-(const mod_t l, Poly r) { return -(r-= l); }
 friend Poly operator*(const mod_t l, Poly r) { return r*= l; }
 mod_t operator()(mod_t c) const {  // eval f(c)
  if (c == mod_t()) return (*this)[0];
  mod_t ret= 0;
  for (int i= deg() + 1; i--;) ret*= c, ret+= (*this)[i];
  return ret;
 }
 Poly operator()(const XP_plus_C &xpc) const {  // f(x^p+c)
  return taylor_shift(xpc.c).scale(xpc.x.pow());
 }
 Poly operator()(const Poly &q) const {  // f(g) mod x^n
  const int n= this->deg() + 1, k= std::ceil(std::sqrt(n));
  std::vector<Poly> pw1(k + 1), pw2(k + 1);
  if (pw1[0]= {1}, pw1[1]= q; q.size() > n) pw1[1].resize(n);
  for (int i= 2; i <= k; ++i)
   if (pw1[i]= pw1[i - 1] * pw1[1]; pw1[i].size() > n) pw1[i].resize(n);
  pw2[0]= {1}, pw2[1]= pw1[k];
  for (int i= 2; i <= k; ++i)
   if (pw2[i]= pw2[i - 1] * pw2[1]; pw2[i].size() > n) pw2[i].resize(n);
  Poly ret(n), f;
  for (int i= 0, j; i <= k; ++i) {
   for (f.assign(n, mod_t()), j= std::min(k, std::max(0, n - k * i)); j--;) {
    mod_t coef= (*this)[k * i + j];
    for (int d= pw1[j].size(); d--;) f[d]+= pw1[j][d] * coef;
   }
   for (f*= pw2[i], j= std::min<int>(n, f.size()); j--;) ret[j]+= f[j];
  }
  return ret;
 }
 Poly &operator*=(const XP_plus_C &xpc) {
  Poly q;
  if (xpc.c != mod_t()) q= *this * xpc.c;
  return this->insert(this->begin(), xpc.x.pow(), mod_t()), *this+= q;
 }
 Poly operator*(const XP_plus_C &xpc) const { return Poly(*this)*= xpc; }
 friend Poly operator*(const XP_plus_C &xpc, const Poly &p) { return p * xpc; }
 Poly scale(int k) const {
  const int n= deg();
  Poly ret(n * k + 1);
  for (int i= 0; i <= n; i++) ret[i * k]+= (*this)[i];
  return ret;
 }
 Poly taylor_shift(mod_t c) const {
  int n= deg(), i= 0;
  if (n < 1 || c == mod_t()) return Poly(*this);
  mod_t cpw= 1, fact= 1;
  for (; i <= n; fact*= ++i) GAp::bf[n - i]= (*this)[i] * fact;
  for (fact= mod_t(1) / fact; i--;) GA3::bf[i]= (fact*= i + 1);
  for (; ++i <= n;) GAq::bf[i]= cpw * GA3::bf[i], cpw*= c;
  auto ret= Poly(GAp::bf, GAp::bf + n + 1) * Poly(GAq::bf, GAq::bf + n + 1);
  for (ret.resize(n + 1), std::reverse(ret.begin(), ret.end()); i--;) ret[i]*= GA3::bf[i];
  return ret;
 }
 friend std::ostream &operator<<(std::ostream &os, const Poly &p) {
  if (p.deg() == -1) return os << 0;
  for (int i= 0, e= p.deg(); i <= e; i++) {
   if (p[i] == mod_t()) continue;
   if (i == 0 || p[i] != mod_t(1)) os << p[i];
   if (i >= 1) os << 'x';
   if (i > 9) os << "^(" << i << ')';
   else if (i > 1) os << '^' << i;
   if (i + 1 <= e) os << " + ";
  }
  return os;
 }
};
#define __POLYNOMIAL Polynomial<mod_t, LM>
#ifdef __FPS_DIVAT
__FPS_DIVAT(__POLYNOMIAL)
#endif
#line 3 "src/FFT/extgcd.hpp"
#include <tuple>
#line 5 "src/FFT/extgcd.hpp"
// ax + by = gcd(a, b)
template <class mod_t, std::size_t _Nm> Polynomial<mod_t, _Nm> extgcd(const Polynomial<mod_t, _Nm> &a, const Polynomial<mod_t, _Nm> &b, Polynomial<mod_t, _Nm> &x, Polynomial<mod_t, _Nm> &y) {
 using Poly= Polynomial<mod_t, _Nm>;
 using PolyMat= std::array<Poly, 4>;
 assert(a.deg() >= 0), assert(b.deg() >= 0);
#define SUF(f, bg, ed) Poly(f.begin() + bg, f.begin() + ed)
#define __COPY_HOGE \
 if (n= p3.deg(), k= 2 * m - n, o= qr.second.size(); o <= m) return R; \
 if (o <= (((n + 1) * 3 + k) >> 2)) return p3= SUF(p3, k, n + 1) / SUF(qr.second, k, o), PolyMat{R[2], R[3], R[0] - p3 * R[2], R[1] - p3 * R[3]}; \
 PolyMat A= self(self, SUF(p3, k, n + 1), SUF(qr.second, k, o)); \
 return {A[0] * R[0] + A[1] * R[2], A[0] * R[1] + A[1] * R[3], A[2] * R[0] + A[3] * R[2], A[2] * R[1] + A[3] * R[3]};
 auto hgcd= [&](auto self, const Poly &p0, const Poly &p1) -> PolyMat {
  int o= p0.deg(), m= (o + 1) / 2, n= p1.deg(), k= (o + m + 1) / 2;
  if (assert(o > n); n < (((o + 1) * 3 + m) >> 2)) {
   Poly tmp= SUF(p0, m, o + 1) / SUF(p1, m, n + 1), p3= p0 - tmp * p1;
   std::pair<Poly, Poly> qr= p1.quorem(p3);
   PolyMat R= {mod_t(1), -tmp, -qr.first, qr.first * tmp + mod_t(1)};
   __COPY_HOGE
  }
  PolyMat R= self(self, SUF(p0, m, o + 1), SUF(p1, m, n + 1));
  Poly p3= R[2] * p0 + R[3] * p1;
  std::pair<Poly, Poly> qr= (R[0] * p0 + R[1] * p1).quorem(p3);
  R[0].swap(R[2]), R[1].swap(R[3]), R[2]-= qr.first * R[0], R[3]-= qr.first * R[1];
  __COPY_HOGE
 };
#undef SUF
#undef __COPY_HOGE
 auto cogcd= [&](auto self, const Poly &p0, const Poly &p1) -> std::pair<Poly, Poly> {
  int o= p0.deg(), m= (o + 1) / 2, n= p1.deg(), k= (o + m + 1) / 2;
  if (assert(o > n); n < k) {
   std::pair<Poly, Poly> t= p0.quorem(p1);
   if (t.second.deg() == -1) return {Poly(), mod_t(1)};
   std::pair<Poly, Poly> qr= p1.quorem(t.second);
   if (qr.second.deg() == -1) return {mod_t(1), -t.first};
   auto A= self(self, t.second, qr.second);
   return A.first-= A.second * qr.first, std::make_pair(A.first, A.second - A.first * t.first);
  }
  PolyMat M= hgcd(hgcd, p0, p1);
  Poly p3= M[2] * p0 + M[3] * p1;
  if (p3.deg() == -1) return {M[0], M[1]};
  std::pair<Poly, Poly> qr= (M[0] * p0 + M[1] * p1).quorem(p3);
  if (qr.second.deg() == -1) return {M[2], M[3]};
  auto A= self(self, p3, qr.second);
  return A.first-= A.second * qr.first, std::make_pair(A.second * M[0] + A.first * M[2], A.second * M[1] + A.first * M[3]);
 };
 if (a.deg() <= b.deg()) {
  std::pair<Poly, Poly> qr= a.quorem(b);
  std::tie(y, x)= cogcd(cogcd, b, qr.second), y-= x * qr.first;
 } else std::tie(x, y)= cogcd(cogcd, a, b);
 return a * x + b * y;
}
#line 8 "test/yosupo/inv_of_Poly.test.cpp"
using namespace std;
signed main() {
 cin.tie(0);
 ios::sync_with_stdio(0);
 using Mint= ModInt<998244353>;
 int N, M;
 cin >> N >> M;
 Polynomial<Mint> f(N), g(M), x, y;
 for (int i= 0; i < N; i++) cin >> f[i];
 for (int i= 0; i < M; i++) cin >> g[i];
 auto d= extgcd(f, g, x, y);
 if (d.deg() != 0) {
  cout << -1 << '\n';
 } else if (x.deg() == -1) {
  cout << 0 << '\n';
 } else {
  cout << x.size() << '\n';
  x/= d[0];
  for (int i= 0, ed= x.size(); i < ed; i++) cout << x[i] << " \n"[i == ed - 1];
 }
 return 0;
}

Test cases

Env	Name	Status	Elapsed	Memory
g++-13	abnormal_random_00	AC	86 ms	254 MB
g++-13	abnormal_random_01	AC	84 ms	254 MB
g++-13	abnormal_random_02	AC	83 ms	254 MB
g++-13	abnormal_random_03	AC	82 ms	254 MB
g++-13	abnormal_random_04	AC	83 ms	254 MB
g++-13	example_00	AC	78 ms	254 MB
g++-13	example_01	AC	81 ms	254 MB
g++-13	example_02	AC	79 ms	254 MB
g++-13	max_random_00	AC	661 ms	257 MB
g++-13	max_random_01	AC	662 ms	257 MB
g++-13	max_random_02	AC	660 ms	257 MB
g++-13	max_random_03	AC	662 ms	257 MB
g++-13	max_random_04	AC	662 ms	257 MB
g++-13	random_00	AC	192 ms	255 MB
g++-13	random_01	AC	123 ms	255 MB
g++-13	random_02	AC	163 ms	255 MB
g++-13	random_03	AC	356 ms	256 MB
g++-13	random_04	AC	236 ms	255 MB
clang++-18	abnormal_random_00	AC	83 ms	254 MB
clang++-18	abnormal_random_01	AC	84 ms	254 MB
clang++-18	abnormal_random_02	AC	82 ms	254 MB
clang++-18	abnormal_random_03	AC	81 ms	254 MB
clang++-18	abnormal_random_04	AC	81 ms	254 MB
clang++-18	example_00	AC	80 ms	254 MB
clang++-18	example_01	AC	79 ms	254 MB
clang++-18	example_02	AC	79 ms	254 MB
clang++-18	max_random_00	AC	515 ms	257 MB
clang++-18	max_random_01	AC	514 ms	257 MB
clang++-18	max_random_02	AC	512 ms	257 MB
clang++-18	max_random_03	AC	516 ms	257 MB
clang++-18	max_random_04	AC	515 ms	257 MB
clang++-18	random_00	AC	168 ms	255 MB
clang++-18	random_01	AC	113 ms	255 MB
clang++-18	random_02	AC	143 ms	255 MB
clang++-18	random_03	AC	284 ms	256 MB
clang++-18	random_04	AC	203 ms	256 MB